mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
overhaul address computation
This is a complete rewrite of the selection of address parts to be inserted into the place_addressline table. The new algorithm selects for each rank: * the boundary overlapping with the addressee and contained in the already selected boundaries of lower rank, or failing that * the place node closest to the addressee that is contained in the already selected boundaries and in the influence radius of already selected place nodes of lower rank Place nodes that are not contained in already selected boundaries of lower rank are completely thrown away. All other candidates are added as non-address parts.
This commit is contained in:
@@ -259,21 +259,16 @@ CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
|
|||||||
OUT nameaddress_vector INT[])
|
OUT nameaddress_vector INT[])
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
current_rank_address INTEGER := 0;
|
address_havelevel BOOLEAN[];
|
||||||
location_distance FLOAT := 0;
|
|
||||||
location_parent GEOMETRY := NULL;
|
|
||||||
parent_place_id_rank SMALLINT := 0;
|
|
||||||
|
|
||||||
location_isaddress BOOLEAN;
|
location_isaddress BOOLEAN;
|
||||||
|
current_boundary GEOMETRY := NULL;
|
||||||
address_havelevel BOOLEAN[];
|
current_node_area GEOMETRY := NULL;
|
||||||
location_keywords INT[];
|
|
||||||
|
|
||||||
location RECORD;
|
location RECORD;
|
||||||
addr_item RECORD;
|
addr_item RECORD;
|
||||||
|
|
||||||
isin_tokens INT[];
|
isin_tokens INT[];
|
||||||
isin TEXT[];
|
|
||||||
BEGIN
|
BEGIN
|
||||||
parent_place_id := 0;
|
parent_place_id := 0;
|
||||||
nameaddress_vector := '{}'::int[];
|
nameaddress_vector := '{}'::int[];
|
||||||
@@ -302,7 +297,7 @@ BEGIN
|
|||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
---- now compute the address terms
|
---- now compute the address terms
|
||||||
FOR i IN 1..28 LOOP
|
FOR i IN 1..maxrank LOOP
|
||||||
address_havelevel[i] := false;
|
address_havelevel[i] := false;
|
||||||
END LOOP;
|
END LOOP;
|
||||||
|
|
||||||
@@ -315,70 +310,58 @@ BEGIN
|
|||||||
WHEN rank_address = 16 AND rank_search = 18 THEN 0.5
|
WHEN rank_address = 16 AND rank_search = 18 THEN 0.5
|
||||||
ELSE 1 END ASC
|
ELSE 1 END ASC
|
||||||
LOOP
|
LOOP
|
||||||
IF location.rank_address != current_rank_address THEN
|
-- Ignore all place nodes that do not fit in a lower level boundary.
|
||||||
current_rank_address := location.rank_address;
|
CONTINUE WHEN location.isguess
|
||||||
IF location.isguess THEN
|
and current_boundary is not NULL
|
||||||
location_distance := location.distance * 1.5;
|
and not ST_Contains(current_boundary, location.centroid);
|
||||||
ELSE
|
|
||||||
IF location.rank_address <= 12 THEN
|
-- If this is the first item in the rank, then assume it is the address.
|
||||||
-- for county and above, if we have an area consider that exact
|
location_isaddress := not address_havelevel[location.rank_address];
|
||||||
-- (It would be nice to relax the constraint for places close to
|
|
||||||
-- the boundary but we'd need the exact geometry for that. Too
|
-- Further sanity checks to ensure that the address forms a sane hierarchy.
|
||||||
-- expensive.)
|
IF location_isaddress THEN
|
||||||
location_distance = 0;
|
IF location.isguess and current_node_area is not NULL THEN
|
||||||
ELSE
|
location_isaddress := ST_Contains(current_node_area, location.centroid);
|
||||||
-- Below county level remain slightly fuzzy.
|
END IF;
|
||||||
location_distance := location.distance * 0.5;
|
IF not location.isguess and current_boundary is not NULL
|
||||||
END IF;
|
and location.rank_address != 11 AND location.rank_address != 5 THEN
|
||||||
|
location_isaddress := ST_Contains(current_boundary, location.centroid);
|
||||||
END IF;
|
END IF;
|
||||||
ELSE
|
|
||||||
CONTINUE WHEN location.keywords <@ location_keywords;
|
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
IF location.distance < location_distance OR NOT location.isguess THEN
|
IF location_isaddress THEN
|
||||||
location_keywords := location.keywords;
|
address_havelevel[location.rank_address] := true;
|
||||||
|
parent_place_id := location.place_id;
|
||||||
|
|
||||||
location_isaddress := NOT address_havelevel[location.rank_address];
|
-- Set postcode if we have one.
|
||||||
--DEBUG: RAISE WARNING 'should be address: %, is guess: %, rank: %', location_isaddress, location.isguess, location.rank_address;
|
-- (Returned will be the highest ranking one.)
|
||||||
IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
|
IF location.postcode is not NULL THEN
|
||||||
location_isaddress := ST_Contains(location_parent, location.centroid);
|
postcode = location.postcode;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
--DEBUG: RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
|
-- Recompute the areas we need for hierarchy sanity checks.
|
||||||
-- Add it to the list of search terms
|
IF location.rank_address != 11 AND location.rank_address != 5 THEN
|
||||||
IF NOT %REVERSE-ONLY% THEN
|
IF location.isguess THEN
|
||||||
nameaddress_vector := array_merge(nameaddress_vector,
|
current_node_area := place_node_fuzzy_area(location.centroid,
|
||||||
location.keywords::integer[]);
|
location.rank_search);
|
||||||
|
ELSE
|
||||||
|
current_node_area := NULL;
|
||||||
|
SELECT p.geometry FROM placex p
|
||||||
|
WHERE p.place_id = location.place_id INTO current_boundary;
|
||||||
|
END IF;
|
||||||
END IF;
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
-- Add it to the list of search terms
|
||||||
|
IF NOT %REVERSE-ONLY% THEN
|
||||||
|
nameaddress_vector := array_merge(nameaddress_vector,
|
||||||
|
location.keywords::integer[]);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||||
isaddress, distance, cached_rank_address)
|
isaddress, distance, cached_rank_address)
|
||||||
VALUES (obj_place_id, location.place_id, true,
|
VALUES (obj_place_id, location.place_id, true,
|
||||||
location_isaddress, location.distance, location.rank_address);
|
location_isaddress, location.distance, location.rank_address);
|
||||||
|
|
||||||
IF location_isaddress THEN
|
|
||||||
-- add postcode if we have one
|
|
||||||
-- (If multiple postcodes are available, we end up with the highest ranking one.)
|
|
||||||
IF location.postcode is not null THEN
|
|
||||||
postcode = location.postcode;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
address_havelevel[location.rank_address] := true;
|
|
||||||
-- add a hack against postcode ranks
|
|
||||||
IF NOT location.isguess
|
|
||||||
AND location.rank_address != 11 AND location.rank_address != 5
|
|
||||||
THEN
|
|
||||||
SELECT p.geometry FROM placex p
|
|
||||||
WHERE p.place_id = location.place_id INTO location_parent;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
IF location.rank_address > parent_place_id_rank THEN
|
|
||||||
parent_place_id = location.place_id;
|
|
||||||
parent_place_id_rank = location.rank_address;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
END LOOP;
|
END LOOP;
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
|
|||||||
@@ -272,21 +272,27 @@ END;
|
|||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION near_feature_rank_distance(rank_search INTEGER)
|
-- Create a bounding box with an extent computed from the radius (in meters)
|
||||||
RETURNS FLOAT
|
-- which in turn is derived from the given search rank.
|
||||||
|
CREATE OR REPLACE FUNCTION place_node_fuzzy_area(geom GEOMETRY, rank_search INTEGER)
|
||||||
|
RETURNS GEOMETRY
|
||||||
AS $$
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
radius FLOAT := 500;
|
||||||
BEGIN
|
BEGIN
|
||||||
IF rank_search <= 16 THEN -- city
|
IF rank_search <= 16 THEN -- city
|
||||||
RETURN 15000;
|
radius := 15000;
|
||||||
ELSIF rank_search <= 18 THEN -- town
|
ELSIF rank_search <= 18 THEN -- town
|
||||||
RETURN 4000;
|
radius := 4000;
|
||||||
ELSIF rank_search <= 19 THEN -- village
|
ELSIF rank_search <= 19 THEN -- village
|
||||||
RETURN 2000;
|
radius := 2000;
|
||||||
ELSIF rank_search <= 20 THEN -- hamlet
|
ELSIF rank_search <= 20 THEN -- hamlet
|
||||||
RETURN 1000;
|
radius := 1000;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
RETURN 500;
|
RETURN ST_Envelope(ST_Collect(
|
||||||
|
ST_Project(geom, radius, 0.785398)::geometry,
|
||||||
|
ST_Project(geom, radius, 3.9269908)::geometry));
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql IMMUTABLE;
|
LANGUAGE plpgsql IMMUTABLE;
|
||||||
@@ -301,7 +307,6 @@ CREATE OR REPLACE FUNCTION add_location(place_id BIGINT, country_code varchar(2)
|
|||||||
DECLARE
|
DECLARE
|
||||||
locationid INTEGER;
|
locationid INTEGER;
|
||||||
centroid GEOMETRY;
|
centroid GEOMETRY;
|
||||||
radius FLOAT;
|
|
||||||
secgeo GEOMETRY;
|
secgeo GEOMETRY;
|
||||||
postcode TEXT;
|
postcode TEXT;
|
||||||
BEGIN
|
BEGIN
|
||||||
@@ -321,13 +326,7 @@ BEGIN
|
|||||||
END LOOP;
|
END LOOP;
|
||||||
|
|
||||||
ELSEIF ST_GeometryType(geometry) = 'ST_Point' THEN
|
ELSEIF ST_GeometryType(geometry) = 'ST_Point' THEN
|
||||||
radius := near_feature_rank_distance(rank_search);
|
secgeo := place_node_fuzzy_area(geometry, rank_search);
|
||||||
--DEBUG: RAISE WARNING 'adding % radius %', place_id, radius;
|
|
||||||
|
|
||||||
-- Create a bounding box with an extent computed from the radius (in meters).
|
|
||||||
secgeo := ST_Envelope(ST_Collect(
|
|
||||||
ST_Project(geometry, radius, 0.785398)::geometry,
|
|
||||||
ST_Project(geometry, radius, 3.9269908)::geometry));
|
|
||||||
PERFORM insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, geometry, secgeo);
|
PERFORM insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, geometry, secgeo);
|
||||||
|
|
||||||
END IF;
|
END IF;
|
||||||
|
|||||||
Reference in New Issue
Block a user