Merge pull request #2454 from lonvia/sort-out-token-assignment-in-sql

ICU tokenizer: switch match method to using partial terms
This commit is contained in:
Sarah Hoffmann
2021-09-28 09:45:15 +02:00
committed by GitHub
11 changed files with 236 additions and 189 deletions

View File

@@ -43,7 +43,7 @@ LANGUAGE plpgsql STABLE;
-- find the parent road of the cut road parts
CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
CREATE OR REPLACE FUNCTION get_interpolation_parent(token_info JSONB,
partition SMALLINT,
centroid GEOMETRY, geom GEOMETRY)
RETURNS BIGINT
@@ -52,7 +52,7 @@ DECLARE
parent_place_id BIGINT;
location RECORD;
BEGIN
parent_place_id := find_parent_for_address(street, place, partition, centroid);
parent_place_id := find_parent_for_address(token_info, partition, centroid);
IF parent_place_id is null THEN
FOR location IN SELECT place_id FROM placex
@@ -155,9 +155,8 @@ BEGIN
NEW.interpolationtype = NEW.address->'interpolation';
place_centroid := ST_PointOnSurface(NEW.linegeo);
NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
token_addr_place_match_tokens(NEW.token_info),
NEW.partition, place_centroid, NEW.linegeo);
NEW.parent_place_id = get_interpolation_parent(NEW.token_info, NEW.partition,
place_centroid, NEW.linegeo);
interpol_postcode := token_normalized_postcode(NEW.address->'postcode');

View File

@@ -66,7 +66,7 @@ LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
from_rank SMALLINT, to_rank SMALLINT,
extent FLOAT, tokens INT[])
extent FLOAT, token_info JSONB, key TEXT)
RETURNS nearfeaturecentr
AS $$
DECLARE
@@ -80,7 +80,7 @@ BEGIN
FROM location_area_large_{{ partition }}
WHERE geometry && ST_Expand(feature, extent)
AND rank_address between from_rank and to_rank
AND tokens && keywords
AND token_matches_address(token_info, key, keywords)
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
RETURN r;
@@ -148,18 +148,21 @@ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceId(in_partition INTEGER,
point GEOMETRY,
isin_token INTEGER[])
token_info JSONB)
RETURNS BIGINT
AS $$
DECLARE
parent BIGINT;
BEGIN
IF not token_has_addr_street(token_info) THEN
RETURN NULL;
END IF;
{% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN
SELECT place_id FROM search_name_{{ partition }}
INTO parent
WHERE name_vector && isin_token
WHERE token_matches_street(token_info, name_vector)
AND centroid && ST_Expand(point, 0.015)
AND address_rank between 26 and 27
ORDER BY ST_Distance(centroid, point) ASC limit 1;
@@ -174,19 +177,22 @@ LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION getNearestNamedPlacePlaceId(in_partition INTEGER,
point GEOMETRY,
isin_token INTEGER[])
token_info JSONB)
RETURNS BIGINT
AS $$
DECLARE
parent BIGINT;
BEGIN
IF not token_has_addr_place(token_info) THEN
RETURN NULL;
END IF;
{% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN
SELECT place_id
INTO parent
FROM search_name_{{ partition }}
WHERE name_vector && isin_token
WHERE token_matches_place(token_info, name_vector)
AND centroid && ST_Expand(point, 0.04)
AND address_rank between 16 and 25
ORDER BY ST_Distance(centroid, point) ASC limit 1;

View File

@@ -104,8 +104,7 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
poi_osm_id BIGINT,
poi_partition SMALLINT,
bbox GEOMETRY,
addr_street INTEGER[],
addr_place INTEGER[],
token_info JSONB,
is_place_addr BOOLEAN)
RETURNS BIGINT
AS $$
@@ -119,8 +118,7 @@ BEGIN
parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
IF parent_place_id is null THEN
parent_place_id := find_parent_for_address(addr_street, addr_place,
poi_partition, bbox);
parent_place_id := find_parent_for_address(token_info, poi_partition, bbox);
END IF;
IF parent_place_id is null and poi_osm_type = 'N' THEN
@@ -333,13 +331,14 @@ BEGIN
WHERE s.place_id = parent_place_id;
FOR addr_item IN
SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
FROM token_get_address_tokens(token_info)
WHERE not search_tokens <@ parent_address_vector
SELECT (get_addr_tag_rank(key, country)).*, key,
token_get_address_search_tokens(token_info, key) as search_tokens
FROM token_get_address_keys(token_info) as key
WHERE not token_get_address_search_tokens(token_info, key) <@ parent_address_vector
LOOP
addr_place := get_address_place(in_partition, geometry,
addr_item.from_rank, addr_item.to_rank,
addr_item.extent, addr_item.match_tokens);
addr_item.extent, token_info, addr_item.key);
IF addr_place is null THEN
-- No place found in OSM that matches. Make it at least searchable.
@@ -447,14 +446,16 @@ BEGIN
FOR location IN
SELECT (get_address_place(partition, geometry, from_rank, to_rank,
extent, match_tokens)).*, search_tokens
FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
FROM token_get_address_tokens(token_info)) x
extent, token_info, key)).*, key
FROM (SELECT (get_addr_tag_rank(key, country)).*, key
FROM token_get_address_keys(token_info) as key) x
ORDER BY rank_address, distance, isguess desc
LOOP
IF location.place_id is null THEN
{% if not db.reverse_only %}
nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
nameaddress_vector := array_merge(nameaddress_vector,
token_get_address_search_tokens(token_info,
location.key));
{% endif %}
ELSE
{% if not db.reverse_only %}
@@ -689,9 +690,6 @@ DECLARE
parent_address_level SMALLINT;
place_address_level SMALLINT;
addr_street INTEGER[];
addr_place INTEGER[];
max_rank SMALLINT;
name_vector INTEGER[];
@@ -860,8 +858,6 @@ BEGIN
END IF;
NEW.housenumber := token_normalized_housenumber(NEW.token_info);
addr_street := token_addr_street_match_tokens(NEW.token_info);
addr_place := token_addr_place_match_tokens(NEW.token_info);
NEW.postcode := null;
@@ -907,7 +903,7 @@ BEGIN
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
NEW.partition,
ST_Envelope(NEW.geometry),
addr_street, addr_place,
NEW.token_info,
is_place_address);
-- If we found the road take a shortcut here.

View File

@@ -215,13 +215,12 @@ LANGUAGE plpgsql STABLE;
-- Find the parent of an address with addr:street/addr:place tag.
--
-- \param street Value of addr:street or NULL if tag is missing.
-- \param place Value of addr:place or NULL if tag is missing.
-- \param token_info Naming info with the address information.
-- \param partition Partition where to search the parent.
-- \param centroid Location of the address.
--
-- \return Place ID of the parent if one was found, NULL otherwise.
CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
CREATE OR REPLACE FUNCTION find_parent_for_address(token_info JSONB,
partition SMALLINT,
centroid GEOMETRY)
RETURNS BIGINT
@@ -229,30 +228,22 @@ CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEG
DECLARE
parent_place_id BIGINT;
BEGIN
IF street is not null THEN
-- Check for addr:street attributes
-- Note that addr:street links can only be indexed, once the street itself is indexed
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
IF parent_place_id is not null THEN
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END IF;
-- Check for addr:street attributes
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, token_info);
IF parent_place_id is not null THEN
{% if debug %}RAISE WARNING 'Get parent from addr:street: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END IF;
-- Check for addr:place attributes.
IF place is not null THEN
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
IF parent_place_id is not null THEN
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END IF;
END IF;
RETURN NULL;
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, token_info);
{% if debug %}RAISE WARNING 'Get parent from addr:place: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END;
$$
LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT)
RETURNS BOOLEAN
AS $$