From ff85da0a31874050c32712d9bb1d1a5592c50a81 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 30 Nov 2020 22:54:36 +0100 Subject: [PATCH 1/3] cleanup get_addressdata Save location data in a ROW instead of using separate varaibles for each value. --- sql/functions/address_lookup.sql | 176 +++++++++++++++---------------- 1 file changed, 83 insertions(+), 93 deletions(-) diff --git a/sql/functions/address_lookup.sql b/sql/functions/address_lookup.sql index 6ee1f048..b57ae040 100644 --- a/sql/functions/address_lookup.sql +++ b/sql/functions/address_lookup.sql @@ -87,92 +87,83 @@ CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT, in_housenumber IN RETURNS setof addressline AS $$ DECLARE - for_place_id BIGINT; - result TEXT[]; - search TEXT[]; - current_rank_address INTEGER; + place RECORD; location RECORD; - countrylocation RECORD; - searchcountrycode varchar(2); - searchhousenumber TEXT; - searchhousename HSTORE; - searchpostcode TEXT; - postcode_isexact BOOL; - searchclass TEXT; - searchtype TEXT; - search_unlisted_place TEXT; - countryname HSTORE; + current_rank_address INTEGER; BEGIN -- The place in question might not have a direct entry in place_addressline. -- Look for the parent of such places then and save if in for_place_id. - postcode_isexact := false; - -- first query osmline (interpolation lines) IF in_housenumber >= 0 THEN - SELECT parent_place_id, country_code, in_housenumber::text, postcode, - null, 'place', 'house' + SELECT parent_place_id as place_id, country_code, + in_housenumber::text as housenumber, postcode, + 'place' as class, 'house' as type, + null as name, null::hstore as address + INTO place FROM location_property_osmline - WHERE place_id = in_place_id AND in_housenumber>=startnumber - AND in_housenumber <= endnumber - INTO for_place_id, searchcountrycode, searchhousenumber, - searchpostcode, searchhousename, searchclass, searchtype; + WHERE place_id = in_place_id + AND in_housenumber between startnumber and endnumber; END IF; --then query tiger data -- %NOTIGERDATA% IF 0 THEN - IF for_place_id IS NULL AND in_housenumber >= 0 THEN - SELECT parent_place_id, 'us', in_housenumber::text, postcode, null, - 'place', 'house' + IF place IS NULL AND in_housenumber >= 0 THEN + SELECT parent_place_id as place_id, 'us' as country_code, + in_housenumber::text as housenumber, postcode, + 'place' as class, 'house' as type, + null as name, null::hstore as address + INTO place FROM location_property_tiger - WHERE place_id = in_place_id AND in_housenumber >= startnumber - AND in_housenumber <= endnumber - INTO for_place_id, searchcountrycode, searchhousenumber, - searchpostcode, searchhousename, searchclass, searchtype; + WHERE place_id = in_place_id + AND in_housenumber between startnumber and endnumber; END IF; -- %NOTIGERDATA% END IF; -- %NOAUXDATA% IF 0 THEN - IF for_place_id IS NULL THEN - SELECT parent_place_id, 'us', housenumber, postcode, null, 'place', 'house' + IF place IS NULL THEN + SELECT parent_place_id as place_id, 'us' as country_code, + housenumber, postcode, + 'place' as class, 'house' as type, + null as name, null::hstore as address + INTO place FROM location_property_aux - WHERE place_id = in_place_id - INTO for_place_id,searchcountrycode, searchhousenumber, - searchpostcode, searchhousename, searchclass, searchtype; + WHERE place_id = in_place_id; END IF; -- %NOAUXDATA% END IF; -- postcode table - IF for_place_id IS NULL THEN - SELECT parent_place_id, country_code, postcode, 'place', 'postcode' + IF place IS NULL THEN + SELECT parent_place_id as place_id, country_code, + null as housenumber, postcode, + 'place' as class, 'postcode' as type, + null as name, null::hstore as address + INTO place FROM location_postcode - WHERE place_id = in_place_id - INTO for_place_id, searchcountrycode, searchpostcode, - searchclass, searchtype; + WHERE place_id = in_place_id; END IF; -- POI objects in the placex table - IF for_place_id IS NULL THEN - SELECT parent_place_id, country_code, housenumber, - postcode, address is not null and address ? 'postcode', - name, class, type, - address -> '_unlisted_place' as unlisted_place + IF place IS NULL THEN + SELECT parent_place_id as place_id, country_code, + housenumber, postcode, + class, type, + name, address + INTO place FROM placex - WHERE place_id = in_place_id and rank_search > 27 - INTO for_place_id, searchcountrycode, searchhousenumber, - searchpostcode, postcode_isexact, searchhousename, searchclass, - searchtype, search_unlisted_place; + WHERE place_id = in_place_id and rank_search > 27; END IF; -- If for_place_id is still NULL at this point then the object has its own -- entry in place_address line. However, still check if there is not linked -- place we should be using instead. - IF for_place_id IS NULL THEN - select coalesce(linked_place_id, place_id), country_code, + IF place IS NULL THEN + select coalesce(linked_place_id, place_id) as place_id, country_code, housenumber, postcode, - address is not null and address ? 'postcode', null - from placex where place_id = in_place_id - INTO for_place_id, searchcountrycode, searchhousenumber, searchpostcode, postcode_isexact, searchhousename; + class, type, + null as name, address + INTO place + FROM placex where place_id = in_place_id; END IF; --RAISE WARNING '% % % %',searchcountrycode, searchhousenumber, searchpostcode; @@ -183,28 +174,27 @@ BEGIN SELECT placex.place_id, osm_type, osm_id, name, coalesce(extratags->'linked_place', extratags->'place') as place_type, class, type, admin_level, - type not in ('postcode', 'postal_code') as isaddress, CASE WHEN rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, - 0 as distance, country_code, postcode + country_code FROM placex - WHERE place_id = for_place_id + WHERE place_id = place.place_id LOOP --RAISE WARNING '%',location; - IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN - searchcountrycode := location.country_code; - END IF; IF location.rank_address < 4 THEN -- no country locations for ranks higher than country - searchcountrycode := NULL; + place.country_code := NULL; + ELSEIF place.country_code IS NULL AND location.country_code IS NOT NULL THEN + place.country_code := location.country_code; END IF; - countrylocation := ROW(location.place_id, location.osm_type, location.osm_id, - location.name, location.class, location.type, - location.place_type, - location.admin_level, true, location.isaddress, - location.rank_address, location.distance)::addressline; - RETURN NEXT countrylocation; + + RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id, + location.name, location.class, location.type, + location.place_type, + location.admin_level, true, + location.type not in ('postcode', 'postal_code'), + location.rank_address, 0)::addressline; current_rank_address := location.rank_address; END LOOP; @@ -218,38 +208,39 @@ BEGIN CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, distance, country_code, postcode FROM place_addressline join placex on (address_place_id = placex.place_id) - WHERE place_addressline.place_id IN (for_place_id, in_place_id) + WHERE place_addressline.place_id IN (place.place_id, in_place_id) AND linked_place_id is null - AND (placex.country_code IS NULL OR searchcountrycode IS NULL - OR placex.country_code = searchcountrycode) + AND (placex.country_code IS NULL OR place.country_code IS NULL + OR placex.country_code = place.country_code) ORDER BY rank_address desc, (place_addressline.place_id = in_place_id) desc, isaddress desc, fromarea desc, distance asc, rank_search desc LOOP -- RAISE WARNING '%',location; - IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN - searchcountrycode := location.country_code; + IF place.country_code IS NULL AND location.country_code IS NOT NULL THEN + place.country_code := location.country_code; END IF; IF location.type in ('postcode', 'postal_code') - AND searchpostcode is not null + AND place.postcode is not null THEN -- If the place had a postcode assigned, take this one only -- into consideration when it is an area and the place does not have -- a postcode itself. - IF location.fromarea AND not postcode_isexact AND location.isaddress THEN - searchpostcode := null; -- remove the less exact postcode + IF location.fromarea AND location.isaddress + AND (place.address is null or not place.address ? 'postcode') + THEN + place.postcode := null; -- remove the less exact postcode ELSE location.isaddress := false; END IF; END IF; - countrylocation := ROW(location.place_id, location.osm_type, location.osm_id, + RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id, location.name, location.class, location.type, location.place_type, location.admin_level, location.fromarea, location.isaddress and location.rank_address != current_rank_address, location.rank_address, location.distance)::addressline; - RETURN NEXT countrylocation; IF location.isaddress THEN current_rank_address := location.rank_address; @@ -258,42 +249,41 @@ BEGIN -- If no country was included yet, add the name information from country_name. IF current_rank_address > 4 THEN - SELECT name FROM country_name - WHERE country_code = searchcountrycode LIMIT 1 INTO countryname; + FOR location IN + SELECT name FROM country_name WHERE country_code = place.country_code LIMIT 1 + LOOP --RAISE WARNING '% % %',current_rank_address,searchcountrycode,countryname; - IF countryname IS NOT NULL THEN - location := ROW(null, null, null, countryname, 'place', 'country', NULL, + RETURN NEXT ROW(null, null, null, location.name, 'place', 'country', NULL, null, true, true, 4, 0)::addressline; - RETURN NEXT location; - END IF; + END LOOP; END IF; -- Finally add some artificial rows. - IF searchcountrycode IS NOT NULL THEN - location := ROW(null, null, null, hstore('ref', searchcountrycode), + IF place.country_code IS NOT NULL THEN + location := ROW(null, null, null, hstore('ref', place.country_code), 'place', 'country_code', null, null, true, false, 4, 0)::addressline; RETURN NEXT location; END IF; - IF searchhousename IS NOT NULL THEN - location := ROW(in_place_id, null, null, searchhousename, searchclass, - searchtype, null, null, true, true, 29, 0)::addressline; + IF place.name IS NOT NULL THEN + location := ROW(in_place_id, null, null, place.name, place.class, + place.type, null, null, true, true, 29, 0)::addressline; RETURN NEXT location; END IF; - IF searchhousenumber IS NOT NULL THEN - location := ROW(null, null, null, hstore('ref', searchhousenumber), + IF place.housenumber IS NOT NULL THEN + location := ROW(null, null, null, hstore('ref', place.housenumber), 'place', 'house_number', null, null, true, true, 28, 0)::addressline; RETURN NEXT location; END IF; - IF search_unlisted_place is not null THEN - RETURN NEXT ROW(null, null, null, hstore('name', search_unlisted_place), + IF place.address is not null and place.address ? '_unlisted_place' THEN + RETURN NEXT ROW(null, null, null, hstore('name', place.address->'_unlisted_place'), 'place', 'locality', null, null, true, true, 25, 0)::addressline; END IF; - IF searchpostcode IS NOT NULL THEN - location := ROW(null, null, null, hstore('ref', searchpostcode), 'place', + IF place.postcode is not null THEN + location := ROW(null, null, null, hstore('ref', place.postcode), 'place', 'postcode', null, null, false, true, 5, 0)::addressline; RETURN NEXT location; END IF; From 7295cad71555b8890979eaaee06c77d49a927f54 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 1 Dec 2020 11:58:25 +0100 Subject: [PATCH 2/3] compute address parts for rank 30 objects on the fly Rank 30 objects usually use the address parts of their parent. When the parent has address parts that are areas but not marked as isaddress, then the parent might go through multiple administrative areas. In that case recheck if the right area has been choosen for the object in question instead of relying on isaddress. Note that we really only have to do the recomputation in the case of 'isarea = True and isaddress = False' which hopefully keeps the number of additional geometric operations we have to do to a minimum. There is one more special case to be taken into account here: a street may go through two administrative areas and a house along that street is placed in one of the area while the addr:* tags says it belongs to the other. In that case we must not switch the isaddress to the one it is situated. To avoid that recheck the address names against the name of the ara. That is not perfect but should cover most cases. Fixes #328. --- sql/functions/address_lookup.sql | 45 +++++++++++++++++---------- test/bdd/db/import/addressing.feature | 32 +++++++++++++++++++ 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/sql/functions/address_lookup.sql b/sql/functions/address_lookup.sql index b57ae040..1194022a 100644 --- a/sql/functions/address_lookup.sql +++ b/sql/functions/address_lookup.sql @@ -90,6 +90,7 @@ DECLARE place RECORD; location RECORD; current_rank_address INTEGER; + location_isaddress BOOLEAN; BEGIN -- The place in question might not have a direct entry in place_addressline. -- Look for the parent of such places then and save if in for_place_id. @@ -99,7 +100,8 @@ BEGIN SELECT parent_place_id as place_id, country_code, in_housenumber::text as housenumber, postcode, 'place' as class, 'house' as type, - null as name, null::hstore as address + null as name, null::hstore as address, + centroid INTO place FROM location_property_osmline WHERE place_id = in_place_id @@ -112,7 +114,8 @@ BEGIN SELECT parent_place_id as place_id, 'us' as country_code, in_housenumber::text as housenumber, postcode, 'place' as class, 'house' as type, - null as name, null::hstore as address + null as name, null::hstore as address, + ST_Centroid(linegeo) as centroid INTO place FROM location_property_tiger WHERE place_id = in_place_id @@ -125,7 +128,8 @@ BEGIN SELECT parent_place_id as place_id, 'us' as country_code, housenumber, postcode, 'place' as class, 'house' as type, - null as name, null::hstore as address + null as name, null::hstore as address, + centroid INTO place FROM location_property_aux WHERE place_id = in_place_id; @@ -137,7 +141,8 @@ BEGIN SELECT parent_place_id as place_id, country_code, null as housenumber, postcode, 'place' as class, 'postcode' as type, - null as name, null::hstore as address + null as name, null::hstore as address, + null as centroid INTO place FROM location_postcode WHERE place_id = in_place_id; @@ -148,7 +153,8 @@ BEGIN SELECT parent_place_id as place_id, country_code, housenumber, postcode, class, type, - name, address + name, address, + centroid INTO place FROM placex WHERE place_id = in_place_id and rank_search > 27; @@ -161,7 +167,8 @@ BEGIN select coalesce(linked_place_id, place_id) as place_id, country_code, housenumber, postcode, class, type, - null as name, address + null as name, address, + null as centroid INTO place FROM placex where place_id = in_place_id; END IF; @@ -212,11 +219,17 @@ BEGIN AND linked_place_id is null AND (placex.country_code IS NULL OR place.country_code IS NULL OR placex.country_code = place.country_code) - ORDER BY rank_address desc, (place_addressline.place_id = in_place_id) desc, + ORDER BY rank_address desc, + (place_addressline.place_id = in_place_id) desc, + (fromarea and place.centroid is not null and not isaddress + and (place.address is null or avals(name) && avals(place.address)) + and ST_Contains(geometry, place.centroid)) desc, isaddress desc, fromarea desc, distance asc, rank_search desc LOOP -- RAISE WARNING '%',location; + location_isaddress := location.rank_address != current_rank_address; + IF place.country_code IS NULL AND location.country_code IS NOT NULL THEN place.country_code := location.country_code; END IF; @@ -231,20 +244,18 @@ BEGIN THEN place.postcode := null; -- remove the less exact postcode ELSE - location.isaddress := false; + location_isaddress := false; END IF; END IF; RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id, - location.name, location.class, location.type, - location.place_type, - location.admin_level, location.fromarea, - location.isaddress and location.rank_address != current_rank_address, - location.rank_address, - location.distance)::addressline; + location.name, location.class, location.type, + location.place_type, + location.admin_level, location.fromarea, + location_isaddress, + location.rank_address, + location.distance)::addressline; - IF location.isaddress THEN - current_rank_address := location.rank_address; - END IF; + current_rank_address := location.rank_address; END LOOP; -- If no country was included yet, add the name information from country_name. diff --git a/test/bdd/db/import/addressing.feature b/test/bdd/db/import/addressing.feature index 9050c19b..5eea3de1 100644 --- a/test/bdd/db/import/addressing.feature +++ b/test/bdd/db/import/addressing.feature @@ -397,3 +397,35 @@ Feature: Address computation Then results contain | osm_type | osm_id | name | | N | 1 | Bolder, Wonderway, Left | + + Scenario: POIs can correct address parts on the fly + Given the grid + | 1 | | | | 2 | | 5 | + | | | | 9 | | 8 | | + | 4 | | | | 3 | | 6 | + And the places + | osm | class | type | admin | name | geometry | + | R1 | boundary | administrative | 8 | Left | (1,2,3,4,1) | + | R2 | boundary | administrative | 8 | Right | (2,3,6,5,2) | + And the places + | osm | class | type | name | geometry | + | W1 | highway | primary | Wonderway | 2,3 | + | N1 | amenity | cafe | Bolder | 9 | + | N2 | amenity | cafe | Leftside | 8 | + When importing + Then place_addressline contains + | object | address | isaddress | + | W1 | R1 | False | + | W1 | R2 | True | + And place_addressline doesn't contain + | object | address | + | N1 | R1 | + | N2 | R2 | + When searching for "Bolder" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Bolder, Wonderway, Left | + When searching for "Leftside" + Then results contain + | osm_type | osm_id | name | + | N | 2 | Leftside, Wonderway, Right | From 63544db8f9c56f74d8b061ec5c83250076f86453 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 1 Dec 2020 14:54:42 +0100 Subject: [PATCH 3/3] null entries need to be typed --- sql/functions/address_lookup.sql | 22 +++++++++++----------- test/bdd/db/import/addressing.feature | 14 +++++++++----- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/sql/functions/address_lookup.sql b/sql/functions/address_lookup.sql index 1194022a..dfd5a953 100644 --- a/sql/functions/address_lookup.sql +++ b/sql/functions/address_lookup.sql @@ -93,15 +93,15 @@ DECLARE location_isaddress BOOLEAN; BEGIN -- The place in question might not have a direct entry in place_addressline. - -- Look for the parent of such places then and save if in for_place_id. + -- Look for the parent of such places then and save it in place. -- first query osmline (interpolation lines) IF in_housenumber >= 0 THEN SELECT parent_place_id as place_id, country_code, in_housenumber::text as housenumber, postcode, 'place' as class, 'house' as type, - null as name, null::hstore as address, - centroid + null::hstore as name, null::hstore as address, + ST_Centroid(linegeo) as centroid INTO place FROM location_property_osmline WHERE place_id = in_place_id @@ -114,7 +114,7 @@ BEGIN SELECT parent_place_id as place_id, 'us' as country_code, in_housenumber::text as housenumber, postcode, 'place' as class, 'house' as type, - null as name, null::hstore as address, + null::hstore as name, null::hstore as address, ST_Centroid(linegeo) as centroid INTO place FROM location_property_tiger @@ -128,7 +128,7 @@ BEGIN SELECT parent_place_id as place_id, 'us' as country_code, housenumber, postcode, 'place' as class, 'house' as type, - null as name, null::hstore as address, + null::hstore as name, null::hstore as address, centroid INTO place FROM location_property_aux @@ -139,10 +139,10 @@ BEGIN -- postcode table IF place IS NULL THEN SELECT parent_place_id as place_id, country_code, - null as housenumber, postcode, + null::text as housenumber, postcode, 'place' as class, 'postcode' as type, - null as name, null::hstore as address, - null as centroid + null::hstore as name, null::hstore as address, + null::geometry as centroid INTO place FROM location_postcode WHERE place_id = in_place_id; @@ -160,15 +160,15 @@ BEGIN WHERE place_id = in_place_id and rank_search > 27; END IF; - -- If for_place_id is still NULL at this point then the object has its own + -- If place is still NULL at this point then the object has its own -- entry in place_address line. However, still check if there is not linked -- place we should be using instead. IF place IS NULL THEN select coalesce(linked_place_id, place_id) as place_id, country_code, housenumber, postcode, class, type, - null as name, address, - null as centroid + null::hstore as name, address, + null::geometry as centroid INTO place FROM placex where place_id = in_place_id; END IF; diff --git a/test/bdd/db/import/addressing.feature b/test/bdd/db/import/addressing.feature index 5eea3de1..479ddd31 100644 --- a/test/bdd/db/import/addressing.feature +++ b/test/bdd/db/import/addressing.feature @@ -5,11 +5,11 @@ Feature: Address computation Scenario: place nodes are added to the address when they are close enough Given the 0.002 grid | 2 | | | | | | 1 | | 3 | - And the named places - | osm | class | type | geometry | - | N1 | place | square | 1 | - | N2 | place | hamlet | 2 | - | N3 | place | hamlet | 3 | + And the places + | osm | class | type | name | geometry | + | N1 | place | square | Square | 1 | + | N2 | place | hamlet | West Farm | 2 | + | N3 | place | hamlet | East Farm | 3 | When importing Then place_addressline contains | object | address | fromarea | @@ -17,6 +17,10 @@ Feature: Address computation Then place_addressline doesn't contain | object | address | | N1 | N2 | + When searching for "Square" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Square, East Farm | Scenario: given two place nodes, the closer one wins for the address Given the grid