precompute postcodes

Set postcode column to the best guess for the postcode for
the place.
This commit is contained in:
Sarah Hoffmann
2017-07-01 22:49:24 +02:00
parent 4e792546e8
commit d59d57957c
3 changed files with 54 additions and 29 deletions

View File

@@ -293,6 +293,27 @@ END;
$$ $$
LANGUAGE plpgsql IMMUTABLE; LANGUAGE plpgsql IMMUTABLE;
-- Find the nearest artificial postcode for the given geometry.
-- TODO For areas there should not be more than two inside the geometry.
CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT
AS $$
DECLARE
item RECORD;
BEGIN
FOR item IN
SELECT postcode FROM location_postcode
WHERE ST_DWithin(geom, location_postcode.geometry, 0.05)
AND location_postcode.country_code = country
ORDER BY ST_Distance(geom, location_postcode.geometry)
LIMIT 1
LOOP
RETURN item.postcode;
END LOOP;
RETURN null;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID
@@ -1212,11 +1233,6 @@ BEGIN
--DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id; --DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id;
IF NEW.class = 'place' AND NEW.type = 'postcodearea' THEN
-- Silently do nothing
RETURN NEW;
END IF;
NEW.indexed_date = now(); NEW.indexed_date = now();
result := deleteSearchName(NEW.partition, NEW.place_id); result := deleteSearchName(NEW.partition, NEW.place_id);
@@ -1252,8 +1268,6 @@ BEGIN
addr_street = NEW.address->'street'; addr_street = NEW.address->'street';
addr_place = NEW.address->'place'; addr_place = NEW.address->'place';
NEW.postcode = NEW.address->'postcode';
END IF; END IF;
-- Speed up searches - just use the centroid of the feature -- Speed up searches - just use the centroid of the feature
@@ -1496,6 +1510,16 @@ BEGIN
NEW.country_code := location.country_code; NEW.country_code := location.country_code;
--DEBUG: RAISE WARNING 'Got parent details from search name'; --DEBUG: RAISE WARNING 'Got parent details from search name';
-- determine postcode
IF NEW.address is not null AND NEW.address ? 'postcode' THEN
NEW.postcode = NEW.address->'postcode';
ELSE
SELECT postcode FROM placex WHERE place_id = parent_place_id INTO NEW.postcode;
END IF;
IF NEW.postcode is null THEN
NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid);
END IF;
-- Merge the postcode into the parent's address if necessary -- Merge the postcode into the parent's address if necessary
IF NEW.postcode IS NOT NULL THEN IF NEW.postcode IS NOT NULL THEN
--DEBUG: RAISE WARNING 'Merging postcode into parent'; --DEBUG: RAISE WARNING 'Merging postcode into parent';
@@ -1719,6 +1743,7 @@ BEGIN
isin := avals(NEW.address); isin := avals(NEW.address);
IF array_upper(isin, 1) IS NOT NULL THEN IF array_upper(isin, 1) IS NOT NULL THEN
FOR i IN 1..array_upper(isin, 1) LOOP FOR i IN 1..array_upper(isin, 1) LOOP
-- TODO further split terms with comma and semicolon
address_street_word_id := get_name_id(make_standard_name(isin[i])); address_street_word_id := get_name_id(make_standard_name(isin[i]));
IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
@@ -1733,26 +1758,6 @@ BEGIN
END LOOP; END LOOP;
END IF; END IF;
END IF; END IF;
--DEBUG: RAISE WARNING '"address:* tokens collected';
IF NEW.postcode IS NOT NULL THEN
isin := regexp_split_to_array(NEW.postcode, E'[;,]');
IF array_upper(isin, 1) IS NOT NULL THEN
FOR i IN 1..array_upper(isin, 1) LOOP
address_street_word_id := get_name_id(make_standard_name(isin[i]));
IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
isin_tokens := isin_tokens || address_street_word_id;
END IF;
-- merge into address vector
address_street_word_id := get_word_id(make_standard_name(isin[i]));
IF address_street_word_id IS NOT NULL THEN
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
END IF;
END LOOP;
END IF;
END IF;
--DEBUG: RAISE WARNING 'postcode tokens collected';
-- %NOTIGERDATA% IF 0 THEN -- %NOTIGERDATA% IF 0 THEN
-- for the USA we have an additional address table. Merge in zip codes from there too -- for the USA we have an additional address table. Merge in zip codes from there too
@@ -1824,6 +1829,11 @@ BEGIN
VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address);
IF location_isaddress THEN IF location_isaddress THEN
-- add postcode if we have one
-- (If multiple postcodes are available, we end up with the highest ranking one.)
IF location.postcode is not null THEN
NEW.postcode = location.postcode;
END IF;
address_havelevel[location.rank_address] := true; address_havelevel[location.rank_address] := true;
IF NOT location.isguess THEN IF NOT location.isguess THEN
@@ -1858,6 +1868,11 @@ BEGIN
nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]);
INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address)
VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address);
IF NEW.postcode is null AND location.postcode is not null
AND NOT address_havelevel[location.rank_address] THEN
NEW.postcode := location.postcode;
END IF;
address_havelevel[location.rank_address] := true; address_havelevel[location.rank_address] := true;
IF location.rank_address > parent_place_id_rank THEN IF location.rank_address > parent_place_id_rank THEN
@@ -1900,6 +1915,15 @@ BEGIN
END IF; END IF;
--DEBUG: RAISE WARNING 'search terms for long ways added'; --DEBUG: RAISE WARNING 'search terms for long ways added';
IF NEW.address is not null AND NEW.address ? 'postcode'
AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
NEW.postcode := NEW.address->'postcode';
END IF;
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
END IF;
-- if we have a name add this to the name search table -- if we have a name add this to the name search table
IF NEW.name IS NOT NULL THEN IF NEW.name IS NOT NULL THEN

View File

@@ -6,12 +6,12 @@ BEGIN
-- start -- start
IF in_partition = -partition- THEN IF in_partition = -partition- THEN
FOR r IN FOR r IN
SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, centroid FROM ( SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid FROM (
SELECT * FROM location_area_large_-partition- WHERE ST_Intersects(geometry, feature) and rank_search < maxrank SELECT * FROM location_area_large_-partition- WHERE ST_Intersects(geometry, feature) and rank_search < maxrank
UNION ALL UNION ALL
SELECT * FROM location_area_country WHERE ST_Intersects(geometry, feature) and rank_search < maxrank SELECT * FROM location_area_country WHERE ST_Intersects(geometry, feature) and rank_search < maxrank
) as location_area ) as location_area
GROUP BY place_id, keywords, rank_address, rank_search, isguess, centroid GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
ORDER BY rank_address, isin_tokens && keywords desc, isguess asc, ORDER BY rank_address, isin_tokens && keywords desc, isguess asc,
ST_Distance(feature, centroid) * ST_Distance(feature, centroid) *
CASE CASE

View File

@@ -21,6 +21,7 @@ create type nearfeaturecentr as (
rank_search smallint, rank_search smallint,
distance float, distance float,
isguess boolean, isguess boolean,
postcode TEXT,
centroid GEOMETRY centroid GEOMETRY
); );