From 15dbb6383c7ee081457c39eee97c8c0c688f8ecb Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 27 Jun 2017 21:10:34 +0200 Subject: [PATCH 01/41] add new location_postcode table Artifical postcode centroids are now saved in there. --- sql/tables.sql | 13 +++++++++++-- utils/setup.php | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/sql/tables.sql b/sql/tables.sql index f3217d5a..07b0ada5 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -197,8 +197,17 @@ CREATE TRIGGER place_before_delete BEFORE DELETE ON place CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); -DROP SEQUENCE IF EXISTS seq_postcodes; -CREATE SEQUENCE seq_postcodes start 1; +DROP TABLE IF EXISTS location_postcode; +CREATE TABLE location_postcode ( + place_id BIGINT, + parent_place_id BIGINT, + rank_address SMALLINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT, + geometry GEOMETRY(Geometry, 4326) + ); DROP TABLE IF EXISTS import_polygon_error; CREATE TABLE import_polygon_error ( diff --git a/utils/setup.php b/utils/setup.php index 7490d820..54ddf082 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -493,22 +493,34 @@ if ($aCMDResult['import-tiger-data']) { if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $bDidSomething = true; $oDB =& getDB(); - if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection)); - $sSQL = "insert into placex (osm_type,osm_id,class,type,address,country_code,geometry) "; - $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',"; - $sSQL .= "hstore('postcode', pc),country_code,"; - $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,"; - $sSQL .= "address->'postcode' as pc,"; - $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where address ? 'postcode' group by country_code,pc) as x "; - $sSQL .= "where ST_Point(x,y) is not null"; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + if (!pg_query($oDB->connection, 'TRUNCATE location_postcode')) { + fail(pg_last_error($oDB->connection)); + } + + $sSQL = "INSERT INTO location_postcode"; + $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; + $sSQL .= "SELECT nextval('seq_place'), 1, country_code,"; + $sSQL .= " lower(trim (both ' ' from address->'postcode')) as pc,"; + $sSQL .= " ST_Centroid(ST_Collect(ST_Centroid(geometry)))"; + $sSQL .= " FROM placex"; + $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'"; + $sSQL .= " AND geometry IS NOT null"; + $sSQL .= " GROUP BY country_code, pc"; + + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } if (CONST_Use_Extra_US_Postcodes) { - $sSQL = "insert into placex (osm_type,osm_id,class,type,address,country_code,geometry) "; - $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',"; - $sSQL .= "hstore('postcode', postcode),'us',"; - $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode"; + // only add postcodes that are not yet available in OSM + $sSQL = "INSERT INTO location_postcode"; + $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; + $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,"; + $sSQL .= " ST_SetSRID(ST_Point(x,y),4326)"; + $sSQL .= " FROM us_postcode WHERE postcode NOT IN"; + $sSQL .= " (SELECT postcode FROM location_postcode"; + $sSQL .= " WHERE country_code = 'us')"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } } From 80ef6cbaab0e503739d98c97cf2d6a80407bfb00 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 29 Jun 2017 21:39:00 +0200 Subject: [PATCH 02/41] add indexing of artificial postcodes --- sql/functions.sql | 141 ++++++++++++++++++++++++++++++-------------- sql/indices.src.sql | 4 ++ sql/tables.sql | 6 ++ utils/setup.php | 6 +- 4 files changed, 111 insertions(+), 46 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index 1cda2e98..5e16a175 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -236,6 +236,65 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT, + OUT rank_search SMALLINT, OUT rank_address SMALLINT) +AS $$ +DECLARE + part TEXT; +BEGIN + rank_search := 30; + rank_address := 30; + + IF country_code = 'gb' THEN + IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN + rank_search := 25; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN + rank_search := 23; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN + rank_search := 21; + rank_address := 5; + END IF; + + ELSEIF country_code = 'sg' THEN + IF postcode ~ '^([0-9]{6})$' THEN + rank_search := 25; + rank_address := 11; + END IF; + + ELSEIF country_code = 'de' THEN + IF postcode ~ '^([0-9]{5})$' THEN + rank_search := 21; + rank_address := 11; + END IF; + + ELSE + -- Guess at the postcode format and coverage (!) + IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local + rank_search := 21; + rank_address := 11; + ELSE + -- Does it look splitable into and area and local code? + part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); + + IF part IS NOT NULL THEN + rank_search := 25; + rank_address := 11; + ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN + rank_search := 21; + rank_address := 11; + END IF; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql IMMUTABLE; + + + CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID AS $$ DECLARE @@ -725,51 +784,8 @@ BEGIN NEW.postcode := NEW.address->'postcode'; NEW.name := hstore('ref', NEW.postcode); - IF NEW.country_code = 'gb' THEN - - IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN - NEW.rank_search := 25; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN - NEW.rank_search := 23; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN - NEW.rank_search := 21; - NEW.rank_address := 5; - END IF; - - ELSEIF NEW.country_code = 'sg' THEN - - IF NEW.postcode ~ '^([0-9]{6})$' THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - END IF; - - ELSEIF NEW.country_code = 'de' THEN - - IF NEW.postcode ~ '^([0-9]{5})$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - - ELSE - -- Guess at the postcode format and coverage (!) - IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local - NEW.rank_search := 21; - NEW.rank_address := 11; - ELSE - -- Does it look splitable into and area and local code? - postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); - - IF postcode IS NOT NULL THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - END IF; - END IF; + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) + INTO NEW.rank_search, NEW.rank_address; ELSEIF NEW.class = 'place' THEN IF NEW.type in ('continent') THEN @@ -1094,7 +1110,42 @@ END; $$ LANGUAGE plpgsql; +-- Trigger for updates of location_postcode +-- +-- Computes the parent object the postcode most likely refers to. +-- This will be the place that determines the address displayed when +-- searching for this postcode. +CREATE OR REPLACE FUNCTION postcode_update() RETURNS +TRIGGER + AS $$ +DECLARE + partition SMALLINT; + location RECORD; +BEGIN + IF NEW.indexed_status != 0 OR OLD.indexed_status = 0 THEN + RETURN NEW; + END IF; + NEW.indexed_date = now(); + + partition := get_partition(NEW.country_code); + + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) + INTO NEW.rank_search, NEW.rank_address; + + NEW.parent_place_id = 0; + FOR location IN + SELECT place_id + FROM getNearFeatures(partition, NEW.geometry, NEW.rank_search, '{}'::int[]) + WHERE NOT isguess ORDER BY rank_address DESC LIMIT 1 + LOOP + NEW.parent_place_id = location.place_id; + END LOOP; + + RETURN NEW; +END; +$$ +LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER diff --git a/sql/indices.src.sql b/sql/indices.src.sql index bbd5a76a..9de2c97f 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -26,3 +26,7 @@ CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,cl CREATE INDEX idx_gb_postcode_postcode ON gb_postcode USING BTREE (postcode) {ts:search-index}; + +CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index}; +CREATE INDEX idx_postcode_parent_id ON location_postcode USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL; +CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode, country_code) {ts:search-index}; diff --git a/sql/tables.sql b/sql/tables.sql index 07b0ada5..334fcbc0 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -197,10 +197,12 @@ CREATE TRIGGER place_before_delete BEFORE DELETE ON place CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); +-- Table for synthetic postcodes. DROP TABLE IF EXISTS location_postcode; CREATE TABLE location_postcode ( place_id BIGINT, parent_place_id BIGINT, + rank_search SMALLINT, rank_address SMALLINT, indexed_status SMALLINT, indexed_date TIMESTAMP, @@ -208,6 +210,10 @@ CREATE TABLE location_postcode ( postcode TEXT, geometry GEOMETRY(Geometry, 4326) ); +CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {ts:address-index}; + +CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode + FOR EACH ROW EXECUTE PROCEDURE postcode_update(); DROP TABLE IF EXISTS import_polygon_error; CREATE TABLE import_polygon_error ( diff --git a/utils/setup.php b/utils/setup.php index 54ddf082..a9597da8 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -500,7 +500,7 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL = "INSERT INTO location_postcode"; $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; $sSQL .= "SELECT nextval('seq_place'), 1, country_code,"; - $sSQL .= " lower(trim (both ' ' from address->'postcode')) as pc,"; + $sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,"; $sSQL .= " ST_Centroid(ST_Collect(ST_Centroid(geometry)))"; $sSQL .= " FROM placex"; $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'"; @@ -523,6 +523,10 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } + + echo "Indexing postcodes....\n"; + $sSQL = 'UPDATE location_postcode SET indexed_status = 0'; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } if ($aCMDResult['osmosis-init']) { From 71ddeb40a66e0fef307350cdcd7cb823c90eb132 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 29 Jun 2017 23:13:40 +0200 Subject: [PATCH 03/41] remove now useless getNearestPostcode function Most postcodes are not in the location-area tables anymore. --- sql/functions.sql | 4 +--- sql/partition-functions.src.sql | 23 ----------------------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index 5e16a175..fd71c6fd 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -2683,9 +2683,7 @@ BEGIN IF out_postcode IS NULL THEN SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode; END IF; - IF out_postcode IS NULL THEN - out_postcode := getNearestPostcode(out_partition, place_centroid); - END IF; + -- XXX look into postcode table newpoints := 0; insert into location_property_aux (place_id, partition, parent_place_id, housenumber, postcode, centroid) diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index 7d4f7cbe..f4df9135 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -173,29 +173,6 @@ $$ LANGUAGE plpgsql; -create or replace function getNearestPostcode(in_partition INTEGER, point GEOMETRY) - RETURNS TEXT AS $$ -DECLARE - out_postcode TEXT; -BEGIN - --- start - IF in_partition = -partition- THEN - SELECT postcode - FROM location_area_large_-partition- join placex using (place_id) - WHERE st_contains(location_area_large_-partition-.geometry, point) - AND class = 'place' and type = 'postcode' - ORDER BY st_distance(location_area_large_-partition-.centroid, point) ASC limit 1 - INTO out_postcode; - RETURN out_postcode; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; -END -$$ -LANGUAGE plpgsql; - create or replace function insertSearchName( in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_name_vector INTEGER[], in_nameaddress_vector INTEGER[], From bdec4e648823bf33dd3f898b6c202d6d9f1251e3 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 29 Jun 2017 23:34:09 +0200 Subject: [PATCH 04/41] replace AddGeometryColumn() functions Directly add the columns to the table definitions instead. --- sql/partition-tables.src.sql | 8 ++++---- sql/tables.sql | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 6f799bcf..eb2ed77b 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -29,9 +29,9 @@ CREATE TABLE search_name_blank ( place_id BIGINT, search_rank smallint, address_rank smallint, - name_vector integer[] + name_vector integer[], + centroid GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large) {ts:address-data}; @@ -54,9 +54,9 @@ CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- CREATE TABLE location_road_-partition- ( place_id BIGINT, partition SMALLINT, - country_code VARCHAR(2) + country_code VARCHAR(2), + geometry GEOMETRY(Geometry, 4326) ) {ts:address-data}; -SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) {ts:address-index}; CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) {ts:address-index}; diff --git a/sql/tables.sql b/sql/tables.sql index 334fcbc0..8d8ba9a7 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -61,10 +61,10 @@ CREATE TABLE location_area ( rank_search SMALLINT NOT NULL, rank_address SMALLINT NOT NULL, country_code VARCHAR(2), - isguess BOOL + isguess BOOL, + centroid GEOMETRY(Point, 4326), + geometry GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('location_area', 'centroid', 4326, 'POINT', 2); -SELECT AddGeometryColumn('location_area', 'geometry', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_large () INHERITS (location_area); @@ -74,9 +74,9 @@ CREATE TABLE location_property ( parent_place_id BIGINT, partition SMALLINT, housenumber TEXT, - postcode TEXT + postcode TEXT, + centroid GEOMETRY(Point, 4326) ); -SELECT AddGeometryColumn('location_property', 'centroid', 4326, 'POINT', 2); CREATE TABLE location_property_aux () INHERITS (location_property); CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); @@ -125,9 +125,9 @@ CREATE TABLE search_name ( address_rank SMALLINT, name_vector integer[], nameaddress_vector integer[], - country_code varchar(2) + country_code varchar(2), + centroid GEOMETRY(Geometry, 4326) ) {ts:search-data}; -SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {ts:search-index}; drop table IF EXISTS place_addressline; @@ -157,9 +157,9 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) country_code varchar(2), housenumber TEXT, - postcode TEXT + postcode TEXT, + centroid GEOMETRY(Geometry, 4326) ) {ts:search-data}; -SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {ts:search-index}; CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {ts:search-index}; CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {ts:address-index} WHERE linked_place_id IS NOT NULL; @@ -224,10 +224,10 @@ CREATE TABLE import_polygon_error ( name HSTORE, country_code varchar(2), updated timestamp, - errormessage text + errormessage text, + prevgeometry GEOMTRY(Geometry, 4326), + newgeometry GEOMTRY(Geometry, 4326) ); -SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); -SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); GRANT SELECT ON import_polygon_error TO "{www-user}"; From 79aa74b7710f7381b71c89490fc7c9a34967e958 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 29 Jun 2017 23:39:31 +0200 Subject: [PATCH 05/41] remove unused loaddata file Load data is being done in the setup script. --- sql/loaddata.sql | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 sql/loaddata.sql diff --git a/sql/loaddata.sql b/sql/loaddata.sql deleted file mode 100644 index a3c8914a..00000000 --- a/sql/loaddata.sql +++ /dev/null @@ -1,16 +0,0 @@ -TRUNCATE placex; -TRUNCATE search_name; -TRUNCATE place_addressline; -TRUNCATE location_area; - -DROP SEQUENCE seq_place; -CREATE SEQUENCE seq_place start 100000; - -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'N'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'W'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'R'; - ---select count(*) from (select create_interpolation(osm_id, housenumber) from placex where indexed=false and class='place' and type='houses') as x; From 4e792546e81844a8b4ba1026358b2115452f42ab Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 1 Jul 2017 19:02:25 +0200 Subject: [PATCH 06/41] add postcode to location_area tables --- sql/functions.sql | 23 +++++++++++++---------- sql/partition-functions.src.sql | 8 ++++---- sql/tables.sql | 1 + 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index fd71c6fd..3e80c2df 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -574,36 +574,39 @@ CREATE OR REPLACE FUNCTION add_location( keywords INTEGER[], rank_search INTEGER, rank_address INTEGER, + in_postcode TEXT, geometry GEOMETRY ) RETURNS BOOLEAN AS $$ DECLARE locationid INTEGER; - isarea BOOLEAN; centroid GEOMETRY; diameter FLOAT; x BOOLEAN; splitGeom RECORD; secgeo GEOMETRY; + postcode TEXT; BEGIN IF rank_search > 25 THEN RAISE EXCEPTION 'Adding location with rank > 25 (% rank %)', place_id, rank_search; END IF; --- RAISE WARNING 'Adding location with rank > 25 (% rank %)', place_id, rank_search; - x := deleteLocationArea(partition, place_id, rank_search); - isarea := false; - IF (ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(geometry)) THEN + -- add postcode only if it contains a single entry, i.e. ignore postcode lists + IF in_postcode IS NULL OR in_postcode similar to '%(,|;)%' THEN + postcode := NULL; + ELSE + postcode := in_postcode; + END IF; - isArea := true; + IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN centroid := ST_Centroid(geometry); FOR secgeo IN select split_geometry(geometry) AS geom LOOP - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, centroid, secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, postcode, centroid, secgeo); END LOOP; ELSE @@ -628,7 +631,7 @@ BEGIN -- RAISE WARNING 'adding % diameter %', place_id, diameter; secgeo := ST_Buffer(geometry, diameter); - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, ST_Centroid(geometry), secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, ST_Centroid(geometry), secgeo); END IF; @@ -1529,7 +1532,7 @@ BEGIN -- Just be happy with inheriting from parent road only IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.address->'postcode', NEW.geometry); --DEBUG: RAISE WARNING 'Place added to location table'; END IF; @@ -1901,7 +1904,7 @@ BEGIN IF NEW.name IS NOT NULL THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.address->'postcode', NEW.geometry); --DEBUG: RAISE WARNING 'added to location (full)'; END IF; diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index f4df9135..d7153ca3 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -55,8 +55,8 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaLarge( - in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], - in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, + in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], + in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, postcode TEXT, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -72,8 +72,8 @@ BEGIN -- start IF in_partition = -partition- THEN - INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, centroid, geometry) - values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, postcode, centroid, geometry) + values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, postcode, in_centroid, in_geometry); RETURN TRUE; END IF; -- end diff --git a/sql/tables.sql b/sql/tables.sql index 8d8ba9a7..98bf0c02 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -62,6 +62,7 @@ CREATE TABLE location_area ( rank_address SMALLINT NOT NULL, country_code VARCHAR(2), isguess BOOL, + postcode TEXT, centroid GEOMETRY(Point, 4326), geometry GEOMETRY(Geometry, 4326) ); From d59d57957c8c0cf2980fa1a12d0f3766b8655cab Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 1 Jul 2017 22:49:24 +0200 Subject: [PATCH 07/41] precompute postcodes Set postcode column to the best guess for the postcode for the place. --- sql/functions.sql | 78 +++++++++++++++++++++------------ sql/partition-functions.src.sql | 4 +- sql/partition-tables.src.sql | 1 + 3 files changed, 54 insertions(+), 29 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index 3e80c2df..44918774 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -293,6 +293,27 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; +-- Find the nearest artificial postcode for the given geometry. +-- TODO For areas there should not be more than two inside the geometry. +CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT + AS $$ +DECLARE + item RECORD; +BEGIN + FOR item IN + SELECT postcode FROM location_postcode + WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) + AND location_postcode.country_code = country + ORDER BY ST_Distance(geom, location_postcode.geometry) + LIMIT 1 + LOOP + RETURN item.postcode; + END LOOP; + + RETURN null; +END; +$$ +LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID @@ -1212,11 +1233,6 @@ BEGIN --DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id; - IF NEW.class = 'place' AND NEW.type = 'postcodearea' THEN - -- Silently do nothing - RETURN NEW; - END IF; - NEW.indexed_date = now(); result := deleteSearchName(NEW.partition, NEW.place_id); @@ -1252,8 +1268,6 @@ BEGIN addr_street = NEW.address->'street'; addr_place = NEW.address->'place'; - - NEW.postcode = NEW.address->'postcode'; END IF; -- Speed up searches - just use the centroid of the feature @@ -1496,6 +1510,16 @@ BEGIN NEW.country_code := location.country_code; --DEBUG: RAISE WARNING 'Got parent details from search name'; + -- determine postcode + IF NEW.address is not null AND NEW.address ? 'postcode' THEN + NEW.postcode = NEW.address->'postcode'; + ELSE + SELECT postcode FROM placex WHERE place_id = parent_place_id INTO NEW.postcode; + END IF; + IF NEW.postcode is null THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); + END IF; + -- Merge the postcode into the parent's address if necessary IF NEW.postcode IS NOT NULL THEN --DEBUG: RAISE WARNING 'Merging postcode into parent'; @@ -1719,6 +1743,7 @@ BEGIN isin := avals(NEW.address); IF array_upper(isin, 1) IS NOT NULL THEN FOR i IN 1..array_upper(isin, 1) LOOP + -- TODO further split terms with comma and semicolon address_street_word_id := get_name_id(make_standard_name(isin[i])); IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); @@ -1733,26 +1758,6 @@ BEGIN END LOOP; END IF; END IF; - --DEBUG: RAISE WARNING '"address:* tokens collected'; - IF NEW.postcode IS NOT NULL THEN - isin := regexp_split_to_array(NEW.postcode, E'[;,]'); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - address_street_word_id := get_name_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - isin_tokens := isin_tokens || address_street_word_id; - END IF; - - -- merge into address vector - address_street_word_id := get_word_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - END IF; - END LOOP; - END IF; - END IF; - --DEBUG: RAISE WARNING 'postcode tokens collected'; -- %NOTIGERDATA% IF 0 THEN -- for the USA we have an additional address table. Merge in zip codes from there too @@ -1824,6 +1829,11 @@ BEGIN VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); IF location_isaddress THEN + -- add postcode if we have one + -- (If multiple postcodes are available, we end up with the highest ranking one.) + IF location.postcode is not null THEN + NEW.postcode = location.postcode; + END IF; address_havelevel[location.rank_address] := true; IF NOT location.isguess THEN @@ -1858,6 +1868,11 @@ BEGIN nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); + IF NEW.postcode is null AND location.postcode is not null + AND NOT address_havelevel[location.rank_address] THEN + NEW.postcode := location.postcode; + END IF; + address_havelevel[location.rank_address] := true; IF location.rank_address > parent_place_id_rank THEN @@ -1900,6 +1915,15 @@ BEGIN END IF; --DEBUG: RAISE WARNING 'search terms for long ways added'; + IF NEW.address is not null AND NEW.address ? 'postcode' + AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + NEW.postcode := NEW.address->'postcode'; + END IF; + + IF NEW.postcode is null AND NEW.rank_search > 8 THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); + END IF; + -- if we have a name add this to the name search table IF NEW.name IS NOT NULL THEN diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index d7153ca3..110dd0cd 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -6,12 +6,12 @@ BEGIN -- start IF in_partition = -partition- THEN FOR r IN - SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, centroid FROM ( + SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid FROM ( SELECT * FROM location_area_large_-partition- WHERE ST_Intersects(geometry, feature) and rank_search < maxrank UNION ALL SELECT * FROM location_area_country WHERE ST_Intersects(geometry, feature) and rank_search < maxrank ) as location_area - GROUP BY place_id, keywords, rank_address, rank_search, isguess, centroid + GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid ORDER BY rank_address, isin_tokens && keywords desc, isguess asc, ST_Distance(feature, centroid) * CASE diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index eb2ed77b..d8f02e10 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -21,6 +21,7 @@ create type nearfeaturecentr as ( rank_search smallint, distance float, isguess boolean, + postcode TEXT, centroid GEOMETRY ); From 16053e81bf9dacc9761de3c833edd0d1c5b20831 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 1 Jul 2017 23:16:01 +0200 Subject: [PATCH 08/41] show address tags and postcode in details --- lib/template/details-html.php | 2 ++ website/details.php | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/lib/template/details-html.php b/lib/template/details-html.php index 46b6a50c..76818395 100644 --- a/lib/template/details-html.php +++ b/lib/template/details-html.php @@ -124,6 +124,8 @@ kv('Wikipedia Calculated' , wikipediaLink($aPointDetails) ); } + kv('Computed Postcode', $aPointDetails['postcode']); + kv('Address Tags' , hash_to_subtable($aPointDetails['aAddressTags']) ); kv('Extra Tags' , hash_to_subtable($aPointDetails['aExtraTags']) ); ?> diff --git a/website/details.php b/website/details.php index 7f0a951f..5b52e2cc 100755 --- a/website/details.php +++ b/website/details.php @@ -130,6 +130,13 @@ if (PEAR::isError($aPointDetails['aNames'])) { // possible timeout $aPointDetails['aNames'] = []; } +// Address tags +$sSQL = "SELECT (each(address)).key as key,(each(address)).value FROM placex WHERE place_id = $iPlaceID ORDER BY key"; +$aPointDetails['aAddressTags'] = $oDB->getAssoc($sSQL); +if (PEAR::isError($aPointDetails['aAddressTags'])) { // possible timeout + $aPointDetails['aAddressTags'] = []; +} + // Extra tags $sSQL = "SELECT (each(extratags)).key,(each(extratags)).value FROM placex WHERE place_id = $iPlaceID ORDER BY (each(extratags)).key"; $aPointDetails['aExtraTags'] = $oDB->getAssoc($sSQL); From dc2911ae7257b0b1d95eec922e17361a458d213f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Jul 2017 11:32:32 +0200 Subject: [PATCH 09/41] normalize postcodes before adding to location tables --- sql/functions.sql | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index 44918774..1940ea75 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -617,10 +617,9 @@ BEGIN x := deleteLocationArea(partition, place_id, rank_search); -- add postcode only if it contains a single entry, i.e. ignore postcode lists - IF in_postcode IS NULL OR in_postcode similar to '%(,|;)%' THEN - postcode := NULL; - ELSE - postcode := in_postcode; + postcode := NULL; + IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN + postcode := upper(trim (both ' ' from in_postcode)); END IF; IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN From d0cc4006d30dfbf7050ff6287dd16b3e4fec8f88 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Jul 2017 11:37:37 +0200 Subject: [PATCH 10/41] remove unused get_address_postcode function --- sql/functions.sql | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index 1940ea75..e354d010 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -2324,46 +2324,6 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; - -CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id BIGINT) RETURNS TEXT - AS $$ -DECLARE - result TEXT[]; - search TEXT[]; - for_postcode TEXT; - found INTEGER; - location RECORD; -BEGIN - - found := 1000; - search := ARRAY['ref']; - result := '{}'; - - select postcode from placex where place_id = for_place_id limit 1 into for_postcode; - - FOR location IN - select rank_address,name,distance,length(name::text) as namelength - from place_addressline join placex on (address_place_id = placex.place_id) - where place_addressline.place_id = for_place_id and rank_address in (5,11) - order by rank_address desc,rank_search desc,fromarea desc,distance asc,namelength desc - LOOP - IF array_upper(search, 1) IS NOT NULL AND array_upper(location.name, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(search, 1) LOOP - FOR k IN 1..array_upper(location.name, 1) LOOP - IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result @> ARRAY[trim(location.name[k].value)] AND (for_postcode IS NULL OR location.name[k].value ilike for_postcode||'%') THEN - result[(100 - location.rank_address)] := trim(location.name[k].value); - found := location.rank_address; - END IF; - END LOOP; - END LOOP; - END IF; - END LOOP; - - RETURN array_to_string(result,', '); -END; -$$ -LANGUAGE plpgsql; - --housenumber only needed for tiger data CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, housenumber INTEGER, languagepref TEXT[]) RETURNS TEXT AS $$ From 9a86c0cebcdf80e8d595dac56024cd502bba6665 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Jul 2017 14:16:48 +0200 Subject: [PATCH 11/41] use only computed postcode when getting address Postcodes from address parts are now ignored as they have been already taken into account when computing the postcode. --- sql/functions.sql | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index e354d010..573e80e7 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -804,7 +804,6 @@ BEGIN RETURN NULL; END IF; - NEW.postcode := NEW.address->'postcode'; NEW.name := hstore('ref', NEW.postcode); SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) @@ -2430,9 +2429,8 @@ BEGIN found := 1000; hadcountry := false; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, - class, type, admin_level, true as fromarea, true as isaddress, + select placex.place_id, osm_type, osm_id, name, + class, type, admin_level, true as isaddress, CASE WHEN rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, 0 as distance, country_code, postcode from placex @@ -2442,13 +2440,9 @@ BEGIN IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; - END IF; - IF searchpostcode IS NULL and location.postcode IS NOT NULL THEN - searchpostcode := location.postcode; - END IF; - IF location.rank_address = 4 AND location.isaddress THEN + ELSEIF location.rank_address = 4 THEN hadcountry := true; END IF; IF location.rank_address < 4 AND NOT hadcountry THEN @@ -2459,15 +2453,14 @@ BEGIN END IF; END IF; countrylocation := ROW(location.place_id, location.osm_type, location.osm_id, location.name, location.class, - location.type, location.admin_level, location.fromarea, location.isaddress, location.rank_address, + location.type, location.admin_level, true, location.isaddress, location.rank_address, location.distance)::addressline; RETURN NEXT countrylocation; found := location.rank_address; END LOOP; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, + select placex.place_id, osm_type, osm_id, name, CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class, CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type, admin_level, fromarea, isaddress, @@ -2484,12 +2477,9 @@ BEGIN IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; END IF; - IF searchpostcode IS NULL and location.isaddress and location.type != 'postcode' and location.postcode IS NOT NULL and location.postcode not similar to '%(,|;)%' THEN - searchpostcode := location.postcode; - END IF; IF location.rank_address = 4 AND location.isaddress THEN hadcountry := true; END IF; @@ -2523,7 +2513,6 @@ BEGIN IF searchhousename IS NOT NULL THEN location := ROW(in_place_id, null, null, searchhousename, searchclass, searchtype, null, true, true, 29, 0)::addressline; --- location := ROW(in_place_id, null, null, searchhousename, 'place', 'house_name', null, true, true, 29, 0)::addressline; RETURN NEXT location; END IF; From 43869b9938a25f87481d6ecd0654906a9627e7aa Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Jul 2017 16:28:02 +0200 Subject: [PATCH 12/41] make sure postcode gets recomputed on update --- sql/functions.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/functions.sql b/sql/functions.sql index 573e80e7..1c4fbb64 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1272,6 +1272,7 @@ BEGIN -- cheaper but less acurate place_centroid := ST_PointOnSurface(NEW.geometry); NEW.centroid := null; + NEW.postcode := null; --DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(place_centroid); -- recalculate country and partition @@ -1512,7 +1513,7 @@ BEGIN IF NEW.address is not null AND NEW.address ? 'postcode' THEN NEW.postcode = NEW.address->'postcode'; ELSE - SELECT postcode FROM placex WHERE place_id = parent_place_id INTO NEW.postcode; + SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; END IF; IF NEW.postcode is null THEN NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); From a2a1901b090defca71fddc6ac3d5165048bb4621 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 3 Jul 2017 22:13:01 +0200 Subject: [PATCH 13/41] add postcodes as special items in word table --- data/words.sql | 2 +- sql/functions.sql | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/data/words.sql b/data/words.sql index f927d000..b1aa6677 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,7 @@ st 5557484 -- prefill word table select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; -select count(make_keywords(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null; +select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode' and address->'postcode' not similar to '%(,|;)%') as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/sql/functions.sql b/sql/functions.sql index 1c4fbb64..b5ac15a3 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -83,6 +83,26 @@ END; $$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + lookup_word TEXT; + return_word_id INTEGER; +BEGIN + lookup_word := upper(trim(postcode)); + lookup_token := ' ' || make_standard_name(lookup_word); + SELECT min(word_id) FROM word WHERE word_token = lookup_token and class='place' and type='postcode' into return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, 'place', 'postcode', null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, lookup_country_code varchar(2)) RETURNS INTEGER AS $$ @@ -619,7 +639,7 @@ BEGIN -- add postcode only if it contains a single entry, i.e. ignore postcode lists postcode := NULL; IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN - postcode := upper(trim (both ' ' from in_postcode)); + postcode := upper(trim (in_postcode)); END IF; IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN @@ -1266,6 +1286,10 @@ BEGIN addr_street = NEW.address->'street'; addr_place = NEW.address->'place'; + + IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(,|;)%' THEN + i := getorcreate_postcode_id(NEW.address->'postcode'); + END IF; END IF; -- Speed up searches - just use the centroid of the feature From 727bd73d0beb8839286bec0d1cadf9199e3f1802 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 3 Jul 2017 23:03:19 +0200 Subject: [PATCH 14/41] fix typo --- sql/tables.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/tables.sql b/sql/tables.sql index 98bf0c02..caaa62df 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -226,8 +226,8 @@ CREATE TABLE import_polygon_error ( country_code varchar(2), updated timestamp, errormessage text, - prevgeometry GEOMTRY(Geometry, 4326), - newgeometry GEOMTRY(Geometry, 4326) + prevgeometry GEOMETRY(Geometry, 4326), + newgeometry GEOMETRY(Geometry, 4326) ); CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); GRANT SELECT ON import_polygon_error TO "{www-user}"; From 872e73314e85af8ae92cf77a6a8bf5ea84829938 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 4 Jul 2017 00:02:11 +0200 Subject: [PATCH 15/41] move postcodes into special operation for Searches Introduces postcode field in searches and sorts out any marked postcodes. --- lib/Geocode.php | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 80449cb6..f07c5104 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -666,12 +666,12 @@ class Geocode Score how good the search is so they can be ordered */ - foreach ($aPhrases as $iPhrase => $sPhrase) { + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; else $sPhraseType = ''; - foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) { + foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@ -710,30 +710,25 @@ class Geocode ); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif ($sPhraseType == 'postalcode') { + } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0) { $aNewSearch = $aSearch; + $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_token']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) { + $aSearch['sPostcode'] = $aSearchTerm['word_token']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { if ($aSearch['sHouseNumber'] === '') { @@ -985,6 +980,7 @@ class Geocode 'sClass' => '', 'sType' => '', 'sHouseNumber' => '', + 'sPostcode' => '', 'oNear' => $oNearPoint ) ); From 57dc0304b50e9e4519cbecabdd5e5c43c1130121 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 4 Jul 2017 23:25:48 +0200 Subject: [PATCH 16/41] add search for postcode Implements the 'postcode' operator. --- lib/Geocode.php | 43 ++++++++++++++++++++++++++++++++++++++++++- lib/lib.php | 3 ++- sql/functions.sql | 10 +++++++++- sql/tables.sql | 1 + 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index f07c5104..82c9b0b6 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -476,6 +476,35 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "name, "; $sSQL .= " extratags->'place' "; + // postcode table + $sSQL .= "UNION "; + $sSQL .= "SELECT"; + $sSQL .= " 'P' as osm_type,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = parent_place_id) as osm_id,"; + $sSQL .= " 'place' as class, 'postcode' as type,"; + $sSQL .= " null as admin_level, rank_search, rank_address,"; + $sSQL .= " place_id, parent_place_id, country_code,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; + $sSQL .= " postcode as placename,"; + $sSQL .= " postcode as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; + $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; + $sSQL .= $sImportanceSQL."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " ("; + $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; + $sSQL .= " FROM "; + $sSQL .= " place_addressline s, "; + $sSQL .= " placex p"; + $sSQL .= " WHERE s.place_id = parent_place_id"; + $sSQL .= " AND p.place_id = s.address_place_id "; + $sSQL .= " AND s.isaddress"; + $sSQL .= " AND p.importance is not null"; + $sSQL .= " ) AS addressimportance, "; + $sSQL .= " null AS extra_place "; + $sSQL .= "FROM location_postcode"; + $sSQL .= " WHERE place_id in ($sPlaceIDs) "; + if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines // with start- and endnumber, the common osm housenumbers are usually saved as points @@ -719,7 +748,7 @@ class Geocode $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_token']; + $aNewSearch['aName'][$aSearchTerm['word_id']] = substr($aSearchTerm['word_token'], 1); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } @@ -1308,6 +1337,18 @@ class Geocode // If a coordinate is given, the search must either // be for a name or a special search. Ignore everythin else. $aPlaceIDs = array(); + } elseif ($aSearch['sOperator'] == 'postcode') { + $sSQL = "SELECT place_id FROM location_postcode "; + $sSQL .= "WHERE postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; + if ($aSearch['sCountryCode']) { + $sSQL .= " AND country_code = '".$aSearch['sCountryCode']."'"; + } + if ($sCountryCodesSQL) { + $sSQL .= " AND country_code in ($sCountryCodesSQL)"; + } + $sSQL .= " LIMIT $this->iLimit"; + if (CONST_Debug) var_dump($sSQL); + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); } else { $aPlaceIDs = array(); diff --git a/lib/lib.php b/lib/lib.php index cb599ae8..98b7d092 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -516,7 +516,7 @@ function _debugDumpGroupedSearches($aData, $aTokens) echo ""; echo ""; echo ""; - echo ""; + echo ""; echo ""; foreach ($aData as $iRank => $aRankedSet) { foreach ($aRankedSet as $aRow) { @@ -561,6 +561,7 @@ function _debugDumpGroupedSearches($aData, $aTokens) echo ""; echo ""; + echo ""; echo ""; echo ""; diff --git a/sql/functions.sql b/sql/functions.sql index b5ac15a3..bceb9ec4 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -2431,11 +2431,19 @@ BEGIN -- %NOAUXDATA% IF 0 THEN IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux - WHERE place_id = in_place_id + WHERE place_id = in_place_id INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; END IF; -- %NOAUXDATA% END IF; + -- postcode table + IF for_place_id IS NULL THEN + select parent_place_id, country_code, rank_address, postcode, 'place', 'postcode' + FROM location_postcode + WHERE place_id = in_place_id + INTO for_place_id, searchcountrycode, searchrankaddress, searchpostcode, searchclass, searchtype; + END IF; + IF for_place_id IS NULL THEN select parent_place_id, country_code, housenumber, rank_search, postcode, name, class, type from placex WHERE place_id = in_place_id and rank_search > 27 diff --git a/sql/tables.sql b/sql/tables.sql index caaa62df..4a22a814 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -212,6 +212,7 @@ CREATE TABLE location_postcode ( geometry GEOMETRY(Geometry, 4326) ); CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {ts:address-index}; +GRANT SELECT ON location_postcode TO "{www-user}" ; CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode FOR EACH ROW EXECUTE PROCEDURE postcode_update(); From ce76a2510159a503c0bae7c7c2ae92cb5e42d68d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 4 Jul 2017 23:42:33 +0200 Subject: [PATCH 17/41] merging back postcodes is no longer necessary --- sql/functions.sql | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index bceb9ec4..dea2eda4 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1543,27 +1543,6 @@ BEGIN NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); END IF; - -- Merge the postcode into the parent's address if necessary - IF NEW.postcode IS NOT NULL THEN - --DEBUG: RAISE WARNING 'Merging postcode into parent'; - isin_tokens := '{}'::int[]; - address_street_word_id := getorcreate_word_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - address_street_word_id := getorcreate_name_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - IF isin_tokens != '{}'::int[] THEN - UPDATE search_name - SET nameaddress_vector = search_name.nameaddress_vector || isin_tokens - WHERE place_id = NEW.parent_place_id; - END IF; - END IF; - -- If there is no name it isn't searchable, don't bother to create a search record IF NEW.name is NULL THEN --DEBUG: RAISE WARNING 'Not a searchable place % %', NEW.osm_type, NEW.osm_id; From 563099f7fa0f03ce563020803b21bb8cc882e468 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 5 Jul 2017 21:15:50 +0200 Subject: [PATCH 18/41] take address part into account in postcode search --- lib/Geocode.php | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 82c9b0b6..eae3baf8 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -744,7 +744,7 @@ class Geocode if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { // If we have structured search or this is the first term, // make the postcode the primary search element. - if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0) { + if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0)) { $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); @@ -1338,13 +1338,20 @@ class Geocode // be for a name or a special search. Ignore everythin else. $aPlaceIDs = array(); } elseif ($aSearch['sOperator'] == 'postcode') { - $sSQL = "SELECT place_id FROM location_postcode "; - $sSQL .= "WHERE postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; + $sSQL = "SELECT p.place_id FROM location_postcode p "; + if (sizeof($aSearch['aAddress'])) { + $sSQL .= ", search_name s "; + $sSQL .= "WHERE s.place_id = p.parent_place_id "; + $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND "; + } else { + $sSQL .= " WHERE "; + } + $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; if ($aSearch['sCountryCode']) { - $sSQL .= " AND country_code = '".$aSearch['sCountryCode']."'"; + $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'"; } if ($sCountryCodesSQL) { - $sSQL .= " AND country_code in ($sCountryCodesSQL)"; + $sSQL .= " AND p.country_code in ($sCountryCodesSQL)"; } $sSQL .= " LIMIT $this->iLimit"; if (CONST_Debug) var_dump($sSQL); From 0ecb920866cea50294d1124b85f971626355d520 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 5 Jul 2017 21:34:00 +0200 Subject: [PATCH 19/41] immediately drop searches where requested country code does not match --- lib/Geocode.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index eae3baf8..7f1c4edc 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1272,13 +1272,16 @@ class Geocode if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); + if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { + continue; + } + // No location term? if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) { if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) { // Just looking for a country by code - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($bBoundingBoxSearch) $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -1349,8 +1352,7 @@ class Geocode $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; if ($aSearch['sCountryCode']) { $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'"; - } - if ($sCountryCodesSQL) { + } elseif ($sCountryCodesSQL) { $sSQL .= " AND p.country_code in ($sCountryCodesSQL)"; } $sSQL .= " LIMIT $this->iLimit"; From 3714b7ea7d5c269d21693564b81497ccb4056571 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 5 Jul 2017 23:35:22 +0200 Subject: [PATCH 20/41] take postcode into account for other searches Existence of postcode is still optional but if a matching result is found, then non-matching ones will be discarded. --- lib/Geocode.php | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 7f1c4edc..d77d9752 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -755,7 +755,7 @@ class Geocode // If we have a structured search or this is not the first term, // add the postcode as an addendum. if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) { - $aSearch['sPostcode'] = $aSearchTerm['word_token']; + $aSearch['sPostcode'] = substr($aSearchTerm['word_token'], 1); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } @@ -1423,6 +1423,8 @@ class Geocode $aTerms[] = $aSearch['oNear']->withinSQL('centroid'); $aOrder[] = $aSearch['oNear']->distanceSQL('centroid'); + } elseif ($aSearch['sPostcode']) { + $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; } if (sizeof($this->aExcludePlaceIDs)) { $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")"; @@ -1711,6 +1713,21 @@ class Geocode var_Dump($aPlaceIDs); } + if ($aSearch['sPostcode']) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; + $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'"; + if (CONST_Debug) var_dump($sSQL); + $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); + if ($aFilteredPlaceIDs) { + $aPlaceIDs = $aFilteredPlaceIDs; + if (CONST_Debug) { + echo "
Place IDs after postcode filtering: "; + var_Dump($aPlaceIDs); + } + } + } + foreach ($aPlaceIDs as $iPlaceID) { // array for placeID => -1 | Tiger housenumber $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber; From 53f8459e974b025299a1329449a6483e443bdd27 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 6 Jul 2017 19:51:18 +0200 Subject: [PATCH 21/41] move postcode indexing to end of setup The search_name tables are needed for finding the parent, so the rest of the database must be indexed. --- utils/setup.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/setup.php b/utils/setup.php index a9597da8..94087323 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -523,10 +523,6 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } - - echo "Indexing postcodes....\n"; - $sSQL = 'UPDATE location_postcode SET indexed_status = 0'; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } if ($aCMDResult['osmosis-init']) { @@ -543,6 +539,10 @@ if ($aCMDResult['index'] || $aCMDResult['all']) { passthruCheckReturn($sBaseCmd.' -r 5 -R 25'); if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE'); passthruCheckReturn($sBaseCmd.' -r 26'); + + echo "Indexing postcodes....\n"; + $sSQL = 'UPDATE location_postcode SET indexed_status = 0'; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { From 413c69ddc93e75998e66bf13451bdbaaf9204a1a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 6 Jul 2017 20:42:03 +0200 Subject: [PATCH 22/41] improve calculation of postcode for interpolations --- sql/functions.sql | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index dea2eda4..be836c92 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1043,8 +1043,8 @@ DECLARE linegeo GEOMETRY; splitline GEOMETRY; sectiongeo GEOMETRY; + interpol_postcode TEXT; postcode TEXT; - seg_postcode TEXT; BEGIN -- deferred delete IF OLD.indexed_status = 100 THEN @@ -1063,9 +1063,11 @@ BEGIN NEW.address->'place', NEW.partition, place_centroid, NEW.linegeo); - - IF NEW.address is not NULL and NEW.address ? 'postcode' THEN - NEW.postcode = NEW.address->'postcode'; + IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + interpol_postcode := NEW.address->'postcode'; + housenum := getorcreate_postcode_id(NEW.address->'postcode'); + ELSE + interpol_postcode := NULL; END IF; -- if the line was newly inserted, split the line as necessary @@ -1078,7 +1080,6 @@ BEGIN linegeo := NEW.linegeo; startnumber := NULL; - postcode := NEW.postcode; FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP @@ -1111,15 +1112,24 @@ BEGIN sectiongeo := ST_Reverse(sectiongeo); END IF; - seg_postcode := coalesce(postcode, - prevnode.address->'postcode', - nextnode.address->'postcode'); + -- determine postcode + postcode := coalesce(interpol_postcode, + prevnode.address->'postcode', + nextnode.address->'postcode', + postcode); + + IF postcode is NULL THEN + SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; + END IF; + IF postcode is NULL THEN + postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry); + END IF; IF NEW.startnumber IS NULL THEN NEW.startnumber := startnumber; NEW.endnumber := endnumber; NEW.linegeo := sectiongeo; - NEW.postcode := seg_postcode; + NEW.postcode := postcode; ELSE insert into location_property_osmline (linegeo, partition, osm_id, parent_place_id, @@ -1128,7 +1138,7 @@ BEGIN geometry_sector, indexed_status) values (sectiongeo, NEW.partition, NEW.osm_id, NEW.parent_place_id, startnumber, endnumber, NEW.interpolationtype, - NEW.address, seg_postcode, + NEW.address, postcode, NEW.country_code, NEW.geometry_sector, 0); END IF; END IF; From 5b4bbab9be984706313d9273d029df42f5762b54 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 6 Jul 2017 22:48:09 +0200 Subject: [PATCH 23/41] include GB CodePoint data into location_postcode table --- lib/Geocode.php | 16 ++-------------- lib/lib.php | 19 ------------------- sql/indices.src.sql | 5 +---- utils/setup.php | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 37 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index d77d9752..0546983f 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1134,21 +1134,9 @@ class Geocode } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); - // Try and calculate GB postcodes we might be missing + // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code foreach ($aTokens as $sToken) { - // Source of gb postcodes is now definitive - always use - if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { - if (substr($aData[1], -2, 1) != ' ') { - $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1); - $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1); - } - $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); - if ($aGBPostcodeLocation) { - $aValidTokens[$sToken] = $aGBPostcodeLocation; - } - } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - if there is no token, - // merge in the 5-digit ZIP code + if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { if (isset($aValidTokens[$aData[1]])) { foreach ($aValidTokens[$aData[1]] as $aToken) { if (!$aToken['class']) { diff --git a/lib/lib.php b/lib/lib.php index 98b7d092..8f353a4d 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -116,25 +116,6 @@ function getTokensFromSets($aSets) } -function gbPostcodeCalculate($sPostcode, $sPostcodeSector, $sPostcodeEnd, &$oDB) -{ - // Try an exact match on the gb_postcode table - $sSQL = 'select \'AA\', ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where postcode = \''.$sPostcode.'\''; - $aNearPostcodes = chksql($oDB->getAll($sSQL)); - - if (sizeof($aNearPostcodes)) { - $aPostcodes = array(); - foreach ($aNearPostcodes as $aPostcode) { - $aPostcodes[] = array('lat' => $aPostcode['lat'], 'lon' => $aPostcode['lon'], 'radius' => 0.005); - } - - return $aPostcodes; - } - - return false; -} - - function getClassTypes() { return array( diff --git a/sql/indices.src.sql b/sql/indices.src.sql index 9de2c97f..cf5c4bc1 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -25,8 +25,5 @@ DROP INDEX IF EXISTS place_id_idx; CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index}; -CREATE INDEX idx_gb_postcode_postcode ON gb_postcode USING BTREE (postcode) {ts:search-index}; - CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index}; -CREATE INDEX idx_postcode_parent_id ON location_postcode USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL; -CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode, country_code) {ts:search-index}; +CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index}; diff --git a/utils/setup.php b/utils/setup.php index 94087323..23861201 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -522,7 +522,22 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL .= " WHERE country_code = 'us')"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + + $sSQL = "SELECT count(getorcreate_postcode_id(postcode)) FROM us_postcode"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } + + // add missing postcodes for GB (if available) + $sSQL = "INSERT INTO location_postcode"; + $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; + $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry"; + $sSQL .= " FROM gb_postcode WHERE postcode NOT IN"; + $sSQL .= " (SELECT postcode FROM location_postcode"; + $sSQL .= " WHERE country_code = 'gb')"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + + $sSQL = "SELECT count(getorcreate_postcode_id(postcode)) FROM gb_postcode"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } if ($aCMDResult['osmosis-init']) { @@ -541,6 +556,7 @@ if ($aCMDResult['index'] || $aCMDResult['all']) { passthruCheckReturn($sBaseCmd.' -r 26'); echo "Indexing postcodes....\n"; + $oDB =& getDB(); $sSQL = 'UPDATE location_postcode SET indexed_status = 0'; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } From 99e9abe8431a3118d53f3450fb4c60dbe3eb5d3b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 6 Jul 2017 23:06:13 +0200 Subject: [PATCH 24/41] require postcodes to match exactly in normalised form --- lib/Geocode.php | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 0546983f..a133d868 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -51,10 +51,22 @@ class Geocode protected $sQuery = false; protected $aStructuredQuery = false; + protected $oNormalizer = null; + public function __construct(&$oDB) { $this->oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + } + + private function normTerm($sTerm) + { + if ($this->oNormalizer === null) { + return null; + } + + return $this->oNormalizer->transliterate($sTerm); } public function setReverseInPlan($bReverse) @@ -741,21 +753,21 @@ class Geocode } } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { // If we have structured search or this is the first term, // make the postcode the primary search element. if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0)) { $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'][$aSearchTerm['word_id']] = substr($aSearchTerm['word_token'], 1); + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } // If we have a structured search or this is not the first term, // add the postcode as an addendum. if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) { - $aSearch['sPostcode'] = substr($aSearchTerm['word_token'], 1); + $aSearch['sPostcode'] = $aSearchTerm['word']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } @@ -943,13 +955,7 @@ class Geocode { if (!$this->sQuery && !$this->aStructuredQuery) return array(); - $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); - if ($oNormalizer !== null) { - $sNormQuery = $oNormalizer->transliterate($this->sQuery); - } else { - $sNormQuery = null; - } - + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { From 5237f44c4a3361e321bea6bcd2a7e3390a3cbb9c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 6 Jul 2017 23:11:17 +0200 Subject: [PATCH 25/41] remove lat/lon check for search terms Was only used with GB postcodes which were removed. --- lib/Geocode.php | 9 --------- 1 file changed, 9 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index a133d868..6f26b3cd 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -742,15 +742,6 @@ class Geocode } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { - if ($aSearch['oNear'] === false) { - $aSearch['oNear'] = new NearPoint( - $aSearchTerm['lat'], - $aSearchTerm['lon'], - $aSearchTerm['radius'] - ); - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { From a44377c7b07c8398c09079b56a743029b57be5a2 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 6 Jul 2017 23:54:07 +0200 Subject: [PATCH 26/41] fix postcode-related tests --- sql/functions.sql | 5 +- test/bdd/db/import/search_name.feature | 14 ----- .../db/update/poi-inherited-postcode.feature | 57 ------------------- test/bdd/db/update/search_terms.feature | 21 ------- 4 files changed, 3 insertions(+), 94 deletions(-) delete mode 100644 test/bdd/db/update/poi-inherited-postcode.feature delete mode 100644 test/bdd/db/update/search_terms.feature diff --git a/sql/functions.sql b/sql/functions.sql index be836c92..c779e826 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -265,6 +265,7 @@ DECLARE BEGIN rank_search := 30; rank_address := 30; + postcode := upper(postcode); IF country_code = 'gb' THEN IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN @@ -824,9 +825,9 @@ BEGIN RETURN NULL; END IF; - NEW.name := hstore('ref', NEW.postcode); + NEW.name := hstore('ref', NEW.address->'postcode'); - SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode') INTO NEW.rank_search, NEW.rank_address; ELSEIF NEW.class = 'place' THEN diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 98def330..86bdea9b 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -23,17 +23,3 @@ Feature: Creation of search terms Then search_name contains | object | name_vector | nameaddress_vector | | N1 | foo | the road | - - Scenario: Roads take over the postcode from attached houses - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St | :p-S1 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - diff --git a/test/bdd/db/update/poi-inherited-postcode.feature b/test/bdd/db/update/poi-inherited-postcode.feature deleted file mode 100644 index 1b2065e6..00000000 --- a/test/bdd/db/update/poi-inherited-postcode.feature +++ /dev/null @@ -1,57 +0,0 @@ -@DB -Feature: Update of POI-inherited poscode - Test updates of postcodes on street which was inherited from a related POI - - Background: Street and house with postcode - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S1 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way type is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | North St | :w-north | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way name is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | South St | :w-north | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way geometry is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | South St | :w-south | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode is added when POI postcode changes - When updating places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 54321 | North St |:p-S1 | - Then search_name contains - | object | nameaddress_vector | - | W1 | 54321 | - - Scenario: POI-inherited postcode remains when POI geometry changes - When updating places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S2 | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - diff --git a/test/bdd/db/update/search_terms.feature b/test/bdd/db/update/search_terms.feature deleted file mode 100644 index 07dbd451..00000000 --- a/test/bdd/db/update/search_terms.feature +++ /dev/null @@ -1,21 +0,0 @@ -@DB -Feature: Update of search terms - Tests that search_name table is updated correctly - - Scenario: POI-inherited postcode remains when another POI is deleted - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S1 | - | N2 | place | house | 2 | | North St |:p-S2 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - When marking for delete N2 - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | From 50c5abf6bb5c904ca00c2e515359c74237d31b64 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 9 Jul 2017 09:37:10 +0200 Subject: [PATCH 27/41] fix API tests wrt postcodes --- lib/Geocode.php | 17 ++++++++++++----- test/bdd/api/search/params.feature | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 6f26b3cd..ae6f9cfb 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -422,8 +422,15 @@ class Geocode $sPlaceIDs = join(',', array_keys($aPlaceIDs)); $sImportanceSQL = ''; - if ($this->sViewboxSmallSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - if ($this->sViewboxLargeSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom = ''; + if ($this->sViewboxSmallSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * "; + } + if ($this->sViewboxLargeSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * "; + } $sSQL = "SELECT "; $sSQL .= " osm_type,"; @@ -502,7 +509,7 @@ class Geocode if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; - $sSQL .= $sImportanceSQL."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, "; $sSQL .= " ("; $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; $sSQL .= " FROM "; @@ -747,7 +754,7 @@ class Geocode if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { // If we have structured search or this is the first term, // make the postcode the primary search element. - if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0)) { + if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); @@ -1698,7 +1705,7 @@ class Geocode var_Dump($aPlaceIDs); } - if ($aSearch['sPostcode']) { + if (sizeof($aPlaceIDs) && $aSearch['sPostcode']) { $sSQL = 'SELECT place_id FROM placex'; $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'"; diff --git a/test/bdd/api/search/params.feature b/test/bdd/api/search/params.feature index d1cff1cc..fba83eab 100644 --- a/test/bdd/api/search/params.feature +++ b/test/bdd/api/search/params.feature @@ -25,6 +25,7 @@ Feature: Search queries | type | value | | city | Montevideo | | state | Montevideo | + | postcode | 11000 | | country | Uruguay | | country_code | uy | From ec8af1dd401addae37250d2fe33c141995498b0b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 9 Jul 2017 10:09:37 +0200 Subject: [PATCH 28/41] fix more tests --- test/bdd/api/search/params.feature | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/bdd/api/search/params.feature b/test/bdd/api/search/params.feature index fba83eab..d21f317e 100644 --- a/test/bdd/api/search/params.feature +++ b/test/bdd/api/search/params.feature @@ -67,7 +67,7 @@ Feature: Search queries Then there are duplicates Scenario: Search with bounded viewbox in right area - When sending json search query "restaurant" with address + When sending json search query "bar" with address | bounded | viewbox | | 1 | -56.16786,-34.84061,-56.12525,-34.86526 | Then result addresses contain @@ -75,7 +75,7 @@ Feature: Search queries | Montevideo | Scenario: Search with bounded viewboxlbrt in right area - When sending json search query "restaurant" with address + When sending json search query "bar" with address | bounded | viewboxlbrt | | 1 | -56.16786,-34.86526,-56.12525,-34.84061 | Then result addresses contain From 5673c4cf919d36ea440cfc23c2e5ebb41d815b43 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 9 Jul 2017 17:29:48 +0200 Subject: [PATCH 29/41] special handling for estimated postcode in areas Don't add a postcode at all if multiple estimated postcodes fall into the area. --- sql/functions.sql | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index c779e826..8d477a64 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -321,6 +321,22 @@ CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETR DECLARE item RECORD; BEGIN + -- If the geometry is an area then only one postcode must be within + -- that area, otherwise consider the area as not having a postcode. + IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN + FOR item IN + SELECT min(postcode) as postcode, count(*) as cnt FROM + (SELECT postcode FROM location_postcode + WHERE ST_Contains(geom, location_postcode.geometry) LIMIT 2) sub + LOOP + IF item.cnt > 1 THEN + RETURN null; + ELSEIF item.cnt = 1 THEN + RETURN item.postcode; + END IF; + END LOOP; + END IF; + FOR item IN SELECT postcode FROM location_postcode WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) @@ -1545,13 +1561,15 @@ BEGIN --DEBUG: RAISE WARNING 'Got parent details from search name'; -- determine postcode - IF NEW.address is not null AND NEW.address ? 'postcode' THEN - NEW.postcode = NEW.address->'postcode'; - ELSE - SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; - END IF; - IF NEW.postcode is null THEN - NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); + IF NEW.rank_search > 4 THEN + IF NEW.address is not null AND NEW.address ? 'postcode' THEN + NEW.postcode = NEW.address->'postcode'; + ELSE + SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; + END IF; + IF NEW.postcode is null THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); + END IF; END IF; -- If there is no name it isn't searchable, don't bother to create a search record From ccae2c733b1dea9eea57b7eadf220cab63d2a349 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 16 Jul 2017 19:49:47 +0200 Subject: [PATCH 30/41] simplify search for artificial postcodes --- sql/functions.sql | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index 8d477a64..a7e09ab3 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -319,35 +319,31 @@ LANGUAGE plpgsql IMMUTABLE; CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT AS $$ DECLARE - item RECORD; + outcode TEXT; + cnt INTEGER; BEGIN -- If the geometry is an area then only one postcode must be within -- that area, otherwise consider the area as not having a postcode. IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN - FOR item IN - SELECT min(postcode) as postcode, count(*) as cnt FROM + SELECT min(postcode), count(*) FROM (SELECT postcode FROM location_postcode WHERE ST_Contains(geom, location_postcode.geometry) LIMIT 2) sub - LOOP - IF item.cnt > 1 THEN - RETURN null; - ELSEIF item.cnt = 1 THEN - RETURN item.postcode; - END IF; - END LOOP; + INTO outcode, cnt; + + IF cnt = 1 THEN + RETURN outcode; + ELSE + RETURN null; + END IF; END IF; - FOR item IN - SELECT postcode FROM location_postcode - WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) - AND location_postcode.country_code = country - ORDER BY ST_Distance(geom, location_postcode.geometry) - LIMIT 1 - LOOP - RETURN item.postcode; - END LOOP; + SELECT postcode FROM location_postcode + WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) + AND location_postcode.country_code = country + ORDER BY ST_Distance(geom, location_postcode.geometry) LIMIT 1 + INTO outcode; - RETURN null; + RETURN outcode; END; $$ LANGUAGE plpgsql; From caf018538fab83e3b5a32bbb5e5c41ec7045ead2 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 16 Jul 2017 19:55:55 +0200 Subject: [PATCH 31/41] normalize all postcodes before use --- sql/functions.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/functions.sql b/sql/functions.sql index a7e09ab3..3c64cf15 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1142,7 +1142,7 @@ BEGIN NEW.startnumber := startnumber; NEW.endnumber := endnumber; NEW.linegeo := sectiongeo; - NEW.postcode := postcode; + NEW.postcode := upper(trim(postcode)); ELSE insert into location_property_osmline (linegeo, partition, osm_id, parent_place_id, @@ -1559,7 +1559,7 @@ BEGIN -- determine postcode IF NEW.rank_search > 4 THEN IF NEW.address is not null AND NEW.address ? 'postcode' THEN - NEW.postcode = NEW.address->'postcode'; + NEW.postcode = upper(trim(NEW.address->'postcode')); ELSE SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; END IF; @@ -1583,7 +1583,7 @@ BEGIN -- Just be happy with inheriting from parent road only IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.address->'postcode', NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'Place added to location table'; END IF; @@ -1944,7 +1944,7 @@ BEGIN IF NEW.address is not null AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN - NEW.postcode := NEW.address->'postcode'; + NEW.postcode := upper(trim(NEW.address->'postcode')); END IF; IF NEW.postcode is null AND NEW.rank_search > 8 THEN @@ -1955,7 +1955,7 @@ BEGIN IF NEW.name IS NOT NULL THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.address->'postcode', NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'added to location (full)'; END IF; From 5e54e78176d44308b741033f41efdef3d6e60da8 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 9 Jul 2017 21:33:40 +0200 Subject: [PATCH 32/41] add migration path for postcodes --- docs/Migration.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 docs/Migration.md diff --git a/docs/Migration.md b/docs/Migration.md new file mode 100644 index 00000000..62cbc4d9 --- /dev/null +++ b/docs/Migration.md @@ -0,0 +1,56 @@ +Database Migrations +=================== + +This page describes database migrations necessary to update existing databases +to newer versions of Nominatim. + +SQL statements should be executed from the postgres commandline. Execute +`psql nominiatim` to enter command line mode. + +3.0.0 +----- + +### Postcode Table + +A new separate table for artificially computed postcode centroids was introduced. +Migration to the new format is possible but **not recommended**. + + * create postcode table and indexes, running the following SQL statements: + + CREATE TABLE location_postcode + (place_id BIGINT, parent_place_id BIGINT, rank_search SMALLINT, + rank_address SMALLINT, indexed_status SMALLINT, indexed_date TIMESTAMP, + country_code varchar(2), postcode TEXT, + geometry GEOMETRY(Geometry, 4326)); + CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry); + CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id); + CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode); + GRANT SELECT ON location_postcode TO "www-data"; + + * add postcode column to location_area tables with SQL statement: + + ALTER TABLE location_area ADD COLUMN postcode TEXT; + + * reimport functions + + ./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions + + * create appropriate triggers with SQL: + + CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode + FOR EACH ROW EXECUTE PROCEDURE postcode_update(); + + * populate postcode table (will take a while): + + ./utils/setup.php --calculate-postcodes --index --index-noanalyse + +This will create a working database. You may also delete the old artificial +postcodes now. Note that this may be expensive and is not absolutely necessary. +The following SQL statement will remove them: + + DELETE FROM place_addressline a USING placex p + WHERE a.address_place_id = p.place_id and p.osm_type = 'P'; + ALTER TABLE placex DISABLE TRIGGER USER; + DELETE FROM placex WHERE osm_type = 'P'; + ALTER TABLE placex ENABLE TRIGGER USER; + From 3c9af7f151f27c05bc5873d4671e887ad95d0181 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 16 Jul 2017 20:32:21 +0200 Subject: [PATCH 33/41] move adding postcodes to word table to calculation step --- data/words.sql | 1 - utils/setup.php | 18 +++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/data/words.sql b/data/words.sql index b1aa6677..85578f20 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,6 @@ st 5557484 -- prefill word table select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; -select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode' and address->'postcode' not similar to '%(,|;)%') as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/utils/setup.php b/utils/setup.php index 23861201..5236a827 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -522,9 +522,6 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL .= " WHERE country_code = 'us')"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); - - $sSQL = "SELECT count(getorcreate_postcode_id(postcode)) FROM us_postcode"; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } // add missing postcodes for GB (if available) @@ -536,8 +533,19 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL .= " WHERE country_code = 'gb')"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); - $sSQL = "SELECT count(getorcreate_postcode_id(postcode)) FROM gb_postcode"; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + if (!$aCMDResult['all']) { + $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'"; + $sSQL .= "and word NOT IN (SELECT postcode FROM location_postcode)"; + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } + } + $sSQL = "SELECT count(getorcreate_postcode_id(v)) FROM "; + $sSQL .= "(SELECT distinct(postcode) FROM location_postcode) p"; + + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } } if ($aCMDResult['osmosis-init']) { From f9205caf220e790fb9ac0607228bfbdbf7ab7373 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 6 Aug 2017 18:41:30 +0200 Subject: [PATCH 34/41] adapt scene generation tool to newest libosmium --- test/scenes/bin/osm2wkt.cc | 76 ++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/test/scenes/bin/osm2wkt.cc b/test/scenes/bin/osm2wkt.cc index d13a4632..22e74b4d 100644 --- a/test/scenes/bin/osm2wkt.cc +++ b/test/scenes/bin/osm2wkt.cc @@ -7,19 +7,49 @@ #include #include -#include -#include +#include + #include #include #include #include #include +#include #include +#include typedef osmium::index::map::SparseMemArray index_type; typedef osmium::handler::NodeLocationsForWays location_handler_type; +struct AbsoluteIdHandler : public osmium::handler::Handler { + + enum { BASE = 100000000 }; + + void node(osmium::Node& o) { + if (o.id() < 0) + o.set_id(BASE-o.id()); + } + + void way(osmium::Way& o) { + if (o.id() < 0) + o.set_id(BASE-o.id()); + + for (osmium::NodeRef &n: o.nodes()) + if (n.ref() < 0) + n.set_ref(BASE-n.ref()); + } + + void relation(osmium::Relation& o) { + if (o.id() < 0) + o.set_id(BASE-o.id()); + + for (auto &m : o.members()) + if (m.ref() < 0) + m.set_ref(BASE-m.ref()); + } +}; + class ExportToWKTHandler : public osmium::handler::Handler { @@ -33,7 +63,8 @@ public: } void way(const osmium::Way& way) { - if (!way.is_closed() || !way.tags().get_value_by_key("area")) + if (!way.nodes().empty() + && (!way.is_closed() || !way.tags().get_value_by_key("area"))) print_geometry(way.tags(), m_factory.create_linestring(way)); } @@ -48,7 +79,6 @@ public: } private: - void print_geometry(const osmium::TagList& tags, const std::string& wkt) { const char* scenario = tags.get_value_by_key("test:section"); const char* id = tags.get_value_by_key("test:id"); @@ -68,28 +98,42 @@ int main(int argc, char* argv[]) { exit(1); } - std::string input_filename {argv[1]}; + osmium::io::File input_file{argv[1]}; - osmium::area::ProblemReporterException problem_reporter; - osmium::area::Assembler::config_type assembler_config(&problem_reporter); - osmium::area::MultipolygonCollector collector(assembler_config); + // need to sort the data first and make ids absolute + std::cerr << "Read file...\n"; + osmium::io::Reader reader{input_file}; + std::vector changes; + osmium::ObjectPointerCollection objects; + AbsoluteIdHandler abshandler; + while (osmium::memory::Buffer buffer = reader.read()) { + osmium::apply(buffer, abshandler, objects); + changes.push_back(std::move(buffer)); + } + reader.close(); + + std::cerr << "Sort file...\n"; + objects.sort(osmium::object_order_type_id_version()); + + osmium::area::Assembler::config_type assembler_config; + osmium::area::MultipolygonManager mp_manager{assembler_config}; std::cerr << "Pass 1...\n"; - osmium::io::Reader reader1(input_filename, osmium::osm_entity_bits::relation); - collector.read_relations(reader1); - std::cerr << "Pass 1 done\n"; - index_type index_pos; index_type index_neg; location_handler_type location_handler(index_pos, index_neg); + ExportToWKTHandler export_handler; + osmium::apply(objects.begin(), objects.end(), location_handler, + export_handler, mp_manager); + mp_manager.prepare_for_lookup(); + std::cerr << "Pass 1 done\n"; + std::cerr << "Pass 2...\n"; - ExportToWKTHandler export_handler; - osmium::io::Reader reader2(input_filename); - osmium::apply(reader2, location_handler, export_handler, collector.handler([&export_handler](osmium::memory::Buffer&& buffer) { + osmium::apply(objects.cbegin(), objects.cend(), mp_manager.handler([&export_handler](osmium::memory::Buffer&& buffer) { osmium::apply(buffer, export_handler); })); - reader2.close(); + export_handler.close(); std::cerr << "Pass 2 done\n"; } From e55ac77c9441359a44e5c4d464353a7970a648c7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 6 Aug 2017 18:42:04 +0200 Subject: [PATCH 35/41] add simple tests for postcode import --- test/bdd/db/import/postcodes.feature | 115 ++++++++++++++++++ test/bdd/steps/db_ops.py | 2 +- .../scenes/data/building-on-street-corner.wkt | 2 +- test/scenes/data/poly-area.wkt | 22 ++-- test/scenes/data/way-area-with-center.wkt | 8 +- utils/setup.php | 2 +- 6 files changed, 133 insertions(+), 18 deletions(-) create mode 100644 test/bdd/db/import/postcodes.feature diff --git a/test/bdd/db/import/postcodes.feature b/test/bdd/db/import/postcodes.feature new file mode 100644 index 00000000..be469fe6 --- /dev/null +++ b/test/bdd/db/import/postcodes.feature @@ -0,0 +1,115 @@ +@DB +Feature: Import of postcodes + Tests for postcode estimation + + Scenario: Postcodes on the object are prefered over those on the address + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W93 | highway | residential | 112 DE 344 | :w2N | + | W22 | building | yes | 112 DE 344N | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W22 | 112 DE 344N | + | W93 | 112 DE 344 | + | R4 | 112 DE 34 | + | R34 | 112 DE | + | R1 | 112 | + + Scenario: Postcodes from a road are inherited by an attached building + Given the scene admin-areas + And the named places + | osm | class | type | addr+postcode | geometry | + | W93 | highway | residential | 86034 | :w2N | + And the named places + | osm | class | type | geometry | + | W22 | building | yes | :building:w2N | + When importing + Then placex contains + | object | postcode | parent_place_id | + | W22 | 86034 | W93 | + + Scenario: Postcodes from the lowest admin area are inherited by ways + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 112 DE 34 | + + Scenario: Postcodes from the lowest admin area with postcode are inherited by ways + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + And the named places + | osm | class | type | admin | geometry | + | R4 | boundary | administrative | 10 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + When importing + Then placex contains + | object | postcode | parent_place_id | + | W93 | 112 DE | R4 | + + Scenario: Postcodes from the lowest admin area are inherited by buildings + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W22 | building | yes | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W22 | 112 DE 34 | + + Scenario: Roads get postcodes from nearby buildings without other info + Given the scene admin-areas + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W22 | building | yes | 445023 | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 445023 | + + @wip + Scenario: Postcodes from admin boundaries are preferred over estimated postcodes + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W22 | building | yes | 445023 | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 112 DE 34 | diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index fa8cd0de..be2211fa 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -287,7 +287,7 @@ def import_and_index_data_from_place_table(context): WHERE class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'""") context.db.commit() - context.nominatim.run_setup_script('index', 'index-noanalyse') + context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') @when("updating places") def update_place_table(context): diff --git a/test/scenes/data/building-on-street-corner.wkt b/test/scenes/data/building-on-street-corner.wkt index 4c007e9e..24da78f1 100644 --- a/test/scenes/data/building-on-street-corner.wkt +++ b/test/scenes/data/building-on-street-corner.wkt @@ -4,4 +4,4 @@ n-outer | POINT(1.0039478 2.0004676) n-edge-WE | POINT(1.0039599 2.0002345) w-WE | LINESTRING(1.0031759 2.0002316,1.0040361 2.0002211,1.0042735 2.0002264) w-NS | LINESTRING(1.0040414 2.0001051,1.0040361 2.0002211,1.0040364 2.0006377) -w-building | MULTIPOLYGON(((1.0040019 2.000324,1.0040016 2.0002344,1.0039599 2.0002345,1.0039037 2.0002347,1.0039043 2.0004389,1.0040023 2.0004386,1.0040019 2.000324))) +w-building | MULTIPOLYGON(((1.0039037 2.0002347,1.0039599 2.0002345,1.0040016 2.0002344,1.0040019 2.000324,1.0040023 2.0004386,1.0039043 2.0004389,1.0039037 2.0002347))) diff --git a/test/scenes/data/poly-area.wkt b/test/scenes/data/poly-area.wkt index a8fb045d..e65f2054 100644 --- a/test/scenes/data/poly-area.wkt +++ b/test/scenes/data/poly-area.wkt @@ -1,11 +1,11 @@ -0.0001 | MULTIPOLYGON(((0.001 0,0 0,0 0.1,0.001 0.1,0.001 0))) -0.0005 | MULTIPOLYGON(((0.005 0,0 0,0 0.1,0.005 0.1,0.005 0))) -0.001 | MULTIPOLYGON(((0.01 0,0 0,0 0.1,0.01 0.1,0.01 0))) -0.005 | MULTIPOLYGON(((0.05 0,0 0,0 0.1,0.05 0.1,0.05 0))) -0.01 | MULTIPOLYGON(((0.1 0,0 0,0 0.1,0.1 0.1,0.1 0))) -0.05 | MULTIPOLYGON(((0.5 0,0 0,0 0.1,0.5 0.1,0.5 0))) -0.1 | MULTIPOLYGON(((0.1 0,0 0,0 1,0.1 1,0.1 0))) -0.5 | MULTIPOLYGON(((0.5 0,0 0,0 1,0.5 1,0.5 0))) -1.0 | MULTIPOLYGON(((1 0,0 0,0 1,1 1,1 0))) -2.0 | MULTIPOLYGON(((2 0,0 0,0 1,2 1,2 0))) -5.0 | MULTIPOLYGON(((5 0,0 0,0 1,5 1,5 0))) +0.0001 | MULTIPOLYGON(((0 0,0.001 0,0.001 0.1,0 0.1,0 0))) +0.0005 | MULTIPOLYGON(((0 0,0.005 0,0.005 0.1,0 0.1,0 0))) +0.001 | MULTIPOLYGON(((0 0,0.01 0,0.01 0.1,0 0.1,0 0))) +0.005 | MULTIPOLYGON(((0 0,0.05 0,0.05 0.1,0 0.1,0 0))) +0.01 | MULTIPOLYGON(((0 0,0.1 0,0.1 0.1,0 0.1,0 0))) +0.05 | MULTIPOLYGON(((0 0,0.5 0,0.5 0.1,0 0.1,0 0))) +0.1 | MULTIPOLYGON(((0 0,0.1 0,0.1 1,0 1,0 0))) +0.5 | MULTIPOLYGON(((0 0,0.5 0,0.5 1,0 1,0 0))) +1.0 | MULTIPOLYGON(((0 0,1 0,1 1,0 1,0 0))) +2.0 | MULTIPOLYGON(((0 0,2 0,2 1,0 1,0 0))) +5.0 | MULTIPOLYGON(((0 0,5 0,5 1,0 1,0 0))) diff --git a/test/scenes/data/way-area-with-center.wkt b/test/scenes/data/way-area-with-center.wkt index 8a773633..ed34cd3b 100644 --- a/test/scenes/data/way-area-with-center.wkt +++ b/test/scenes/data/way-area-with-center.wkt @@ -1,5 +1,5 @@ -inner-C | POINT(0.0035625 -0.0066188) -outer-C | POINT(0.0041244 -0.0060007) -inner-N | POINT(0.0018846 -0.0023652) inner-S | POINT(0.0048516 -0.0095176) -area | MULTIPOLYGON(((0.0077125 -0.0066566,0.0065469 -0.0099414,0.0038979 -0.0109481,0.0026794 -0.0105772,0.0022025 -0.0099944,0.0026264 -0.0091997,0.0026264 -0.0080341,0.0019376 -0.0065507,0.0010369 -0.0072924,0.0005071 -0.0060738,0.0017787 -0.00565,0.0005071 -0.0042195,0.0005601 -0.0025771,0.0013019 -0.0015175,0.0050105 -0.0021533,0.006441 -0.0025771,0.0075006 -0.0040076,0.0033681 -0.0059149,0.0051694 -0.0076633,0.0061231 -0.0064977,0.0068648 -0.0049612,0.0077125 -0.0066566))) +inner-N | POINT(0.0018846 -0.0023652) +outer-C | POINT(0.0041244 -0.0060007) +inner-C | POINT(0.0035625 -0.0066188) +area | MULTIPOLYGON(((0.0005071 -0.0060738,0.0010369 -0.0072924,0.0019376 -0.0065507,0.0026264 -0.0080341,0.0026264 -0.0091997,0.0022025 -0.0099944,0.0026794 -0.0105772,0.0038979 -0.0109481,0.0065469 -0.0099414,0.0077125 -0.0066566,0.0068648 -0.0049612,0.0061231 -0.0064977,0.0051694 -0.0076633,0.0033681 -0.0059149,0.0075006 -0.0040076,0.006441 -0.0025771,0.0050105 -0.0021533,0.0013019 -0.0015175,0.0005601 -0.0025771,0.0005071 -0.0042195,0.0017787 -0.00565,0.0005071 -0.0060738))) diff --git a/utils/setup.php b/utils/setup.php index 5236a827..67c7fd7f 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -541,7 +541,7 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { } } $sSQL = "SELECT count(getorcreate_postcode_id(v)) FROM "; - $sSQL .= "(SELECT distinct(postcode) FROM location_postcode) p"; + $sSQL .= "(SELECT distinct(postcode) as v FROM location_postcode) p"; if (!pg_query($oDB->connection, $sSQL)) { fail(pg_last_error($oDB->connection)); From 67bb885900b61e2c128c64a4067db1d3a0e93625 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 18 Aug 2017 21:34:52 +0200 Subject: [PATCH 36/41] throw away searches with two postcodes --- lib/Geocode.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index ae6f9cfb..01abc43c 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -751,10 +751,11 @@ class Geocode } } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { + if ($aSearch['sPostcode'] === '' && + isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { // If we have structured search or this is the first term, // make the postcode the primary search element. - if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { + if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); @@ -1207,7 +1208,6 @@ class Geocode ksort($aGroupedSearches); } - if (CONST_Debug) var_Dump($aGroupedSearches); if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach ($aCopyGroupedSearches as $iGroup => $aSearches) { From 86a8900e21f86f01524284ecae76117ed5a91637 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 18 Aug 2017 23:07:53 +0200 Subject: [PATCH 37/41] fix subqueries when getting details for postcodes --- lib/Geocode.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 01abc43c..7d899b0e 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -499,7 +499,7 @@ class Geocode $sSQL .= "UNION "; $sSQL .= "SELECT"; $sSQL .= " 'P' as osm_type,"; - $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = parent_place_id) as osm_id,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = lp.parent_place_id) as osm_id,"; $sSQL .= " 'place' as class, 'postcode' as type,"; $sSQL .= " null as admin_level, rank_search, rank_address,"; $sSQL .= " place_id, parent_place_id, country_code,"; @@ -515,13 +515,13 @@ class Geocode $sSQL .= " FROM "; $sSQL .= " place_addressline s, "; $sSQL .= " placex p"; - $sSQL .= " WHERE s.place_id = parent_place_id"; + $sSQL .= " WHERE s.place_id = lp.parent_place_id"; $sSQL .= " AND p.place_id = s.address_place_id "; $sSQL .= " AND s.isaddress"; $sSQL .= " AND p.importance is not null"; $sSQL .= " ) AS addressimportance, "; $sSQL .= " null AS extra_place "; - $sSQL .= "FROM location_postcode"; + $sSQL .= "FROM location_postcode lp"; $sSQL .= " WHERE place_id in ($sPlaceIDs) "; if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { From f4a00eba26fff15305dc73da159cb81d1ac2a6df Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 19 Aug 2017 09:46:56 +0200 Subject: [PATCH 38/41] enable details view for artificial postcodes --- website/details.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/website/details.php b/website/details.php index 5b52e2cc..4d7c1efb 100755 --- a/website/details.php +++ b/website/details.php @@ -73,6 +73,9 @@ if (CONST_Use_US_Tiger_Data) { $iParentPlaceID = chksql($oDB->getOne('SELECT parent_place_id FROM location_property_osmline WHERE place_id = '.$iPlaceID)); if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; +// artificial postcodes +$iParentPlaceID = chksql($oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID)); +if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; if (CONST_Use_Aux_Location_data) { $iParentPlaceID = chksql($oDB->getOne('SELECT parent_place_id FROM location_property_aux WHERE place_id = '.$iPlaceID)); From 7ca521929725599b83a41ebd4edc20d3f190ab06 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 19 Aug 2017 19:36:34 +0200 Subject: [PATCH 39/41] fixup tests --- test/bdd/api/search/params.feature | 1 - test/bdd/db/import/placex.feature | 2 -- test/bdd/db/query/search_simple.feature | 4 +++- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/bdd/api/search/params.feature b/test/bdd/api/search/params.feature index d21f317e..bfc7cb15 100644 --- a/test/bdd/api/search/params.feature +++ b/test/bdd/api/search/params.feature @@ -25,7 +25,6 @@ Feature: Search queries | type | value | | city | Montevideo | | state | Montevideo | - | postcode | 11000 | | country | Uruguay | | country_code | uy | diff --git a/test/bdd/db/import/placex.feature b/test/bdd/db/import/placex.feature index f124e482..f3695642 100644 --- a/test/bdd/db/import/placex.feature +++ b/test/bdd/db/import/placex.feature @@ -79,13 +79,11 @@ Feature: Import into placex | osm | class | type | postcode | geometry | | N1 | place | postcode | EA452CD | country:gb | | N2 | place | postcode | E45 23 | country:gb | - | N3 | place | postcode | y45 | country:gb | When importing Then placex contains | object | country_code | rank_search | rank_address | | N1 | gb | 30 | 30 | | N2 | gb | 30 | 30 | - | N3 | gb | 30 | 30 | Scenario: search and address rank for DE postcodes correctly assigned Given the places diff --git a/test/bdd/db/query/search_simple.feature b/test/bdd/db/query/search_simple.feature index 417df769..409ed44f 100644 --- a/test/bdd/db/query/search_simple.feature +++ b/test/bdd/db/query/search_simple.feature @@ -25,7 +25,9 @@ Feature: Searching of simple objects | osm | class | type | postcode | geometry | | R1 | boundary | postal_code | 54321 | poly-area:1.0 | And searching for "12345" - Then exactly 0 results are returned + Then results contain + | osm_type | + | P | When searching for "54321" Then results contain | ID | osm_type | osm_id | From 9aeb111fba8f40bd19147488c401d5dbf511e8ab Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 20 Aug 2017 09:29:56 +0200 Subject: [PATCH 40/41] tests: add new admin scene --- test/scenes/data/admin-areas.wkt | 19 +++ test/scenes/data/admin.osm | 250 +++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 test/scenes/data/admin-areas.wkt create mode 100644 test/scenes/data/admin.osm diff --git a/test/scenes/data/admin-areas.wkt b/test/scenes/data/admin-areas.wkt new file mode 100644 index 00000000..af741d99 --- /dev/null +++ b/test/scenes/data/admin-areas.wkt @@ -0,0 +1,19 @@ +c1:N | POINT(73.8419358 60.0763887) +c1:E | POINT(73.8393798 60.0488584) +c0 | POINT(73.8679209 60.0588527) +c2:N | POINT(73.896249 60.0631047) +c2:S | POINT(73.8932671 60.0434346) +c2:E | POINT(73.9162704 60.0471569) +c1:W | POINT(73.8990179 60.055876) +c2:W | POINT(73.8568453 60.0597032) +w2N | LINESTRING(73.8836825 60.0612977,73.8880489 60.0598094,73.8953972 60.0601283,73.9033844 60.058959) +w1W:2W | LINESTRING(73.8523722 60.0497092,73.85791 60.0520485,73.8617439 60.0573645,73.8706896 60.0554508) +building:w2N | LINESTRING(73.8963618 60.0604955,73.8961463 60.0602249,73.8967091 60.0601132,73.8969246 60.0603838,73.8963618 60.0604955) +b0 | MULTIPOLYGON(((73.8012539 60.0573645,73.8225532 60.0371591,73.8493903 60.035457,73.8843212 60.0356698,73.9049815 60.0358825,73.9192521 60.0356698,73.9260679 60.0514105,73.9216633 60.0591056,73.9141402 60.0722448,73.8804873 60.070332,73.8719676 60.0917916,73.8255351 60.0875433,73.8084956 60.0758576,73.8012539 60.0573645))) +b1:N | MULTIPOLYGON(((73.8012539 60.0573645,73.8447045 60.0611915,73.8692843 60.0674706,73.8804873 60.070332,73.8719676 60.0917916,73.8255351 60.0875433,73.8084956 60.0758576,73.8012539 60.0573645))) +b2:S | MULTIPOLYGON(((73.8694117 60.0507725,73.8843212 60.0356698,73.9049815 60.0358825,73.9075368 60.0523758,73.8830432 60.0517295,73.8694117 60.0507725))) +b1:W | MULTIPOLYGON(((73.8012539 60.0573645,73.8225532 60.0371591,73.8493903 60.035457,73.8843212 60.0356698,73.8694117 60.0507725,73.8447045 60.0611915,73.8012539 60.0573645))) +b1:E | MULTIPOLYGON(((73.8447045 60.0611915,73.8694117 60.0507725,73.8843212 60.0356698,73.9049815 60.0358825,73.9192521 60.0356698,73.9260679 60.0514105,73.9216633 60.0591056,73.9141402 60.0722448,73.8804873 60.070332,73.8692843 60.0674706,73.8447045 60.0611915))) +b2:E | MULTIPOLYGON(((73.9049815 60.0358825,73.9192521 60.0356698,73.9260679 60.0514105,73.9216633 60.0591056,73.9075368 60.0523758,73.9049815 60.0358825))) +b2:N | MULTIPOLYGON(((73.8692843 60.0674706,73.8830432 60.0517295,73.9075368 60.0523758,73.9216633 60.0591056,73.9141402 60.0722448,73.8804873 60.070332,73.8692843 60.0674706))) +b2:W | MULTIPOLYGON(((73.8447045 60.0611915,73.8694117 60.0507725,73.8830432 60.0517295,73.8692843 60.0674706,73.8447045 60.0611915))) diff --git a/test/scenes/data/admin.osm b/test/scenes/data/admin.osm new file mode 100644 index 00000000..bfe4340e --- /dev/null +++ b/test/scenes/data/admin.osm @@ -0,0 +1,250 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 88610b1b7470bab17e41429d6ad24fec8c4b45c6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 21 Aug 2017 22:29:51 +0200 Subject: [PATCH 41/41] further restrict results for , Disallow postcode operator together with housenumbers and force results around a postcode when no address is given. --- lib/Geocode.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 7d899b0e..b8933704 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -751,7 +751,7 @@ class Geocode } } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if ($aSearch['sPostcode'] === '' && + if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { // If we have structured search or this is the first term, // make the postcode the primary search element. @@ -765,13 +765,13 @@ class Geocode // If we have a structured search or this is not the first term, // add the postcode as an addendum. - if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) { + if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) { $aSearch['sPostcode'] = $aSearchTerm['word']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { - if ($aSearch['sHouseNumber'] === '') { + if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') { $aSearch['sHouseNumber'] = $sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty @@ -1416,7 +1416,11 @@ class Geocode $aOrder[] = $aSearch['oNear']->distanceSQL('centroid'); } elseif ($aSearch['sPostcode']) { - $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; + if (!sizeof($aSearch['aAddress'])) { + $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; + } else { + $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; + } } if (sizeof($this->aExcludePlaceIDs)) { $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")";
rankName TokensName NotAddress TokensAddress Notcountryoperatorclasstypehouse#operatorclasstypepostcodehouse#LatLonRadius
".$aRow['sClass']."".$aRow['sType']."".$aRow['sPostcode']."".$aRow['sHouseNumber']."".$aRow['fLat']."