From c32fa2172d7d688e3b816957cd1e92f6ca129244 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 13 May 2012 09:15:56 +0200 Subject: [PATCH 1/4] pre-create word list to avoid duplicates due to parallel indexing --- sql/functions.sql | 26 +++++++++++++++++++++++++- utils/setup.php | 8 ++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/sql/functions.sql b/sql/functions.sql index ec25c7fa..b4657bf8 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -389,7 +389,7 @@ BEGIN result := '{}'::INTEGER[]; s := make_standard_name(src); - w := getorcreate_name_id(s); + w := getorcreate_name_id(s, src); IF NOT (ARRAY[w] <@ result) THEN result := result || w; @@ -407,6 +407,30 @@ BEGIN END LOOP; END IF; + words := regexp_split_to_array(src, E'[,;()]'); + IF array_upper(words, 1) != 1 THEN + FOR j IN 1..array_upper(words, 1) LOOP + s := make_standard_name(words[j]); + IF s != '' THEN + w := getorcreate_word_id(s); + IF NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + s := regexp_replace(src, '市$', ''); + IF s != src THEN + s := make_standard_name(s); + IF s != '' THEN + w := getorcreate_name_id(s, src); + IF NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END IF; + RETURN result; END; $$ diff --git a/utils/setup.php b/utils/setup.php index 29dd86ac..b40bad3a 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -264,6 +264,14 @@ if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection)); echo '.'; + // pre-create the word list + if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection)); + echo '.'; + if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct postcode as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection)); + echo '.'; + if (!pg_query($oDB->connection, 'select count(getorcreate_housenumber_id(v)) from (select distinct housenumber as v from place where housenumber is not null) as w;')) fail(pg_last_error($oDB->connection)); + echo '.'; + $aDBInstances = array(); for($i = 0; $i < $iInstances; $i++) { From 6d5c101a9e5443825728f3c2d21f5f3611222cdb Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 13 May 2012 10:05:27 +0200 Subject: [PATCH 2/4] make sure nominatim.so is accessible for postgresql Even if the user has a very restrictive umask in place. --- module/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/Makefile b/module/Makefile index 0fdb2daa..12d66b63 100644 --- a/module/Makefile +++ b/module/Makefile @@ -2,3 +2,5 @@ MODULES = nominatim PGXS := $(shell pg_config --pgxs) include $(PGXS) +all: + chmod 755 nominatim.so From fd448170c5b5b8870d5e54f0622a8f151dceca34 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 13 May 2012 10:50:08 +0200 Subject: [PATCH 3/4] remove intermediate ANALYSE while indexing Autovacuum does a much better job. --- utils/setup.php | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/utils/setup.php b/utils/setup.php index b40bad3a..4b60815c 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -431,11 +431,7 @@ $sOutputFile = ''; if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output']; $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile; - passthru($sBaseCmd.' -R 4'); - pgsqlRunScript('ANALYSE'); - passthru($sBaseCmd.' -r 5 -R 25'); - pgsqlRunScript('ANALYSE'); - passthru($sBaseCmd.' -r 26'); + passthru($sBaseCmd); } if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) From 7c4b68d2ea30a0615b7159c80b18132b09efd97b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 13 May 2012 11:38:50 +0200 Subject: [PATCH 4/4] move index over nameaddress_vector back to delayed index creation No longer needed for linking places. --- sql/indices.src.sql | 7 +++++-- sql/tables.sql | 1 - 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/indices.src.sql b/sql/indices.src.sql index d6a1f08f..70776f99 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -1,6 +1,10 @@ +-- Indices used only during search and update. +-- These indices are created only after the indexing process is done. + CREATE INDEX idx_word_word_id on word USING BTREE (word_id); -CREATE INDEX searchnameplacesearch_search_nameaddress_vector_idx ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); @@ -21,6 +25,5 @@ CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off); -- start -CREATE INDEX idx_search_name_-partition-_nameaddress_vector ON search_name_-partition- USING GIN (nameaddress_vector) WITH (fastupdate = off); CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); -- end diff --git a/sql/tables.sql b/sql/tables.sql index 48740f23..ac4d0956 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -138,7 +138,6 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); drop table IF EXISTS search_name; CREATE TABLE search_name () INHERITS (search_name_blank); CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); -CREATE INDEX search_name_name_vector_idx ON search_name USING GIN (name_vector) WITH (fastupdate = off); drop table IF EXISTS place_addressline; CREATE TABLE place_addressline (