ignore frequent partial search terms

Adds word counts from a full planet to the word table. There is a
new configuration option CONST_Max_Word_Frequency which allows to
take into account the word count: the value that was set on import
is used to determine if a word is added to the search_name table.
The value during runtime determines if a single term should be
used for partial search or simply be ignored.
This commit is contained in:
Sarah Hoffmann
2012-09-25 00:36:34 +02:00
parent f1063497ef
commit e73e67001e
5 changed files with 49667 additions and 12 deletions

View File

@@ -271,7 +271,7 @@
if ($aCMDResult['load-data'] || $aCMDResult['all'])
{
echo "Load Data\n";
echo "Drop old Data\n";
$bDidSomething = true;
$oDB =& getDB();
@@ -307,17 +307,18 @@
echo '.';
}
// used by getorcreate_word_id to ignore frequent partial words
if (!pg_query($oDB->connection, 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS $$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE')) fail(pg_last_error($oDB->connection));
echo ".\n";
// pre-create the word list
if (!$aCMDResult['disable-token-precalc'])
{
if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct postcode as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!pg_query($oDB->connection, 'select count(getorcreate_housenumber_id(v)) from (select distinct housenumber as v from place where housenumber is not null) as w;')) fail(pg_last_error($oDB->connection));
echo '.';
echo "Loading word list\n";
pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
}
echo "Load Data\n";
$aDBInstances = array();
for($i = 0; $i < $iInstances; $i++)
{