mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
Merge pull request #2454 from lonvia/sort-out-token-assignment-in-sql
ICU tokenizer: switch match method to using partial terms
This commit is contained in:
@@ -43,7 +43,7 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- find the parent road of the cut road parts
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(token_info JSONB,
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY, geom GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
@@ -52,7 +52,7 @@ DECLARE
|
||||
parent_place_id BIGINT;
|
||||
location RECORD;
|
||||
BEGIN
|
||||
parent_place_id := find_parent_for_address(street, place, partition, centroid);
|
||||
parent_place_id := find_parent_for_address(token_info, partition, centroid);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
FOR location IN SELECT place_id FROM placex
|
||||
@@ -155,9 +155,8 @@ BEGIN
|
||||
NEW.interpolationtype = NEW.address->'interpolation';
|
||||
|
||||
place_centroid := ST_PointOnSurface(NEW.linegeo);
|
||||
NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
|
||||
token_addr_place_match_tokens(NEW.token_info),
|
||||
NEW.partition, place_centroid, NEW.linegeo);
|
||||
NEW.parent_place_id = get_interpolation_parent(NEW.token_info, NEW.partition,
|
||||
place_centroid, NEW.linegeo);
|
||||
|
||||
interpol_postcode := token_normalized_postcode(NEW.address->'postcode');
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
|
||||
from_rank SMALLINT, to_rank SMALLINT,
|
||||
extent FLOAT, tokens INT[])
|
||||
extent FLOAT, token_info JSONB, key TEXT)
|
||||
RETURNS nearfeaturecentr
|
||||
AS $$
|
||||
DECLARE
|
||||
@@ -80,7 +80,7 @@ BEGIN
|
||||
FROM location_area_large_{{ partition }}
|
||||
WHERE geometry && ST_Expand(feature, extent)
|
||||
AND rank_address between from_rank and to_rank
|
||||
AND tokens && keywords
|
||||
AND token_matches_address(token_info, key, keywords)
|
||||
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
|
||||
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
|
||||
RETURN r;
|
||||
@@ -148,18 +148,21 @@ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceId(in_partition INTEGER,
|
||||
point GEOMETRY,
|
||||
isin_token INTEGER[])
|
||||
token_info JSONB)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent BIGINT;
|
||||
BEGIN
|
||||
IF not token_has_addr_street(token_info) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
{% for partition in db.partitions %}
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id FROM search_name_{{ partition }}
|
||||
INTO parent
|
||||
WHERE name_vector && isin_token
|
||||
WHERE token_matches_street(token_info, name_vector)
|
||||
AND centroid && ST_Expand(point, 0.015)
|
||||
AND address_rank between 26 and 27
|
||||
ORDER BY ST_Distance(centroid, point) ASC limit 1;
|
||||
@@ -174,19 +177,22 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestNamedPlacePlaceId(in_partition INTEGER,
|
||||
point GEOMETRY,
|
||||
isin_token INTEGER[])
|
||||
token_info JSONB)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent BIGINT;
|
||||
BEGIN
|
||||
IF not token_has_addr_place(token_info) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
{% for partition in db.partitions %}
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id
|
||||
INTO parent
|
||||
FROM search_name_{{ partition }}
|
||||
WHERE name_vector && isin_token
|
||||
WHERE token_matches_place(token_info, name_vector)
|
||||
AND centroid && ST_Expand(point, 0.04)
|
||||
AND address_rank between 16 and 25
|
||||
ORDER BY ST_Distance(centroid, point) ASC limit 1;
|
||||
|
||||
@@ -104,8 +104,7 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT,
|
||||
poi_partition SMALLINT,
|
||||
bbox GEOMETRY,
|
||||
addr_street INTEGER[],
|
||||
addr_place INTEGER[],
|
||||
token_info JSONB,
|
||||
is_place_addr BOOLEAN)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
@@ -119,8 +118,7 @@ BEGIN
|
||||
parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
||||
poi_partition, bbox);
|
||||
parent_place_id := find_parent_for_address(token_info, poi_partition, bbox);
|
||||
END IF;
|
||||
|
||||
IF parent_place_id is null and poi_osm_type = 'N' THEN
|
||||
@@ -333,13 +331,14 @@ BEGIN
|
||||
WHERE s.place_id = parent_place_id;
|
||||
|
||||
FOR addr_item IN
|
||||
SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)
|
||||
WHERE not search_tokens <@ parent_address_vector
|
||||
SELECT (get_addr_tag_rank(key, country)).*, key,
|
||||
token_get_address_search_tokens(token_info, key) as search_tokens
|
||||
FROM token_get_address_keys(token_info) as key
|
||||
WHERE not token_get_address_search_tokens(token_info, key) <@ parent_address_vector
|
||||
LOOP
|
||||
addr_place := get_address_place(in_partition, geometry,
|
||||
addr_item.from_rank, addr_item.to_rank,
|
||||
addr_item.extent, addr_item.match_tokens);
|
||||
addr_item.extent, token_info, addr_item.key);
|
||||
|
||||
IF addr_place is null THEN
|
||||
-- No place found in OSM that matches. Make it at least searchable.
|
||||
@@ -447,14 +446,16 @@ BEGIN
|
||||
|
||||
FOR location IN
|
||||
SELECT (get_address_place(partition, geometry, from_rank, to_rank,
|
||||
extent, match_tokens)).*, search_tokens
|
||||
FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)) x
|
||||
extent, token_info, key)).*, key
|
||||
FROM (SELECT (get_addr_tag_rank(key, country)).*, key
|
||||
FROM token_get_address_keys(token_info) as key) x
|
||||
ORDER BY rank_address, distance, isguess desc
|
||||
LOOP
|
||||
IF location.place_id is null THEN
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
token_get_address_search_tokens(token_info,
|
||||
location.key));
|
||||
{% endif %}
|
||||
ELSE
|
||||
{% if not db.reverse_only %}
|
||||
@@ -689,9 +690,6 @@ DECLARE
|
||||
parent_address_level SMALLINT;
|
||||
place_address_level SMALLINT;
|
||||
|
||||
addr_street INTEGER[];
|
||||
addr_place INTEGER[];
|
||||
|
||||
max_rank SMALLINT;
|
||||
|
||||
name_vector INTEGER[];
|
||||
@@ -860,8 +858,6 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
NEW.housenumber := token_normalized_housenumber(NEW.token_info);
|
||||
addr_street := token_addr_street_match_tokens(NEW.token_info);
|
||||
addr_place := token_addr_place_match_tokens(NEW.token_info);
|
||||
|
||||
NEW.postcode := null;
|
||||
|
||||
@@ -907,7 +903,7 @@ BEGIN
|
||||
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
|
||||
NEW.partition,
|
||||
ST_Envelope(NEW.geometry),
|
||||
addr_street, addr_place,
|
||||
NEW.token_info,
|
||||
is_place_address);
|
||||
|
||||
-- If we found the road take a shortcut here.
|
||||
|
||||
@@ -215,13 +215,12 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
-- Find the parent of an address with addr:street/addr:place tag.
|
||||
--
|
||||
-- \param street Value of addr:street or NULL if tag is missing.
|
||||
-- \param place Value of addr:place or NULL if tag is missing.
|
||||
-- \param token_info Naming info with the address information.
|
||||
-- \param partition Partition where to search the parent.
|
||||
-- \param centroid Location of the address.
|
||||
--
|
||||
-- \return Place ID of the parent if one was found, NULL otherwise.
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(token_info JSONB,
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
@@ -229,30 +228,22 @@ CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEG
|
||||
DECLARE
|
||||
parent_place_id BIGINT;
|
||||
BEGIN
|
||||
IF street is not null THEN
|
||||
-- Check for addr:street attributes
|
||||
-- Note that addr:street links can only be indexed, once the street itself is indexed
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
-- Check for addr:street attributes
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, token_info);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent from addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
|
||||
-- Check for addr:place attributes.
|
||||
IF place is not null THEN
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, token_info);
|
||||
{% if debug %}RAISE WARNING 'Get parent from addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
|
||||
@@ -14,7 +14,6 @@ DECLARE
|
||||
out_partition INTEGER;
|
||||
out_parent_place_id BIGINT;
|
||||
location RECORD;
|
||||
address_street_word_ids INTEGER[];
|
||||
|
||||
BEGIN
|
||||
|
||||
@@ -54,13 +53,9 @@ BEGIN
|
||||
|
||||
place_centroid := ST_Centroid(linegeo);
|
||||
out_partition := get_partition('us');
|
||||
out_parent_place_id := null;
|
||||
|
||||
address_street_word_ids := token_addr_street_match_tokens(token_info);
|
||||
IF address_street_word_ids IS NOT NULL THEN
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
address_street_word_ids);
|
||||
END IF;
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
token_info);
|
||||
|
||||
IF out_parent_place_id IS NULL THEN
|
||||
SELECT getNearestParallelRoadFeature(out_partition, linegeo)
|
||||
|
||||
@@ -34,40 +34,59 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
SELECT info->>'street' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'place' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[] <@ street_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
SELECT (info->>'place')::INTEGER[] <@ place_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_search')::INTEGER[]
|
||||
SELECT (info->>'place')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
|
||||
RETURNS SETOF TEXT
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
SELECT * FROM jsonb_object_keys(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->'addr'->>key)::INTEGER[];
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->'addr'->>key)::INTEGER[] <@ tokens;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
@@ -127,15 +146,34 @@ BEGIN
|
||||
VALUES (term_id, term, 'w', json_build_object('count', term_count));
|
||||
END IF;
|
||||
|
||||
IF term_count < {{ max_word_freq }} THEN
|
||||
partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
|
||||
END IF;
|
||||
partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_partial_word(partial TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
token INTEGER;
|
||||
BEGIN
|
||||
SELECT min(word_id) INTO token
|
||||
FROM word WHERE word_token = partial and type = 'w';
|
||||
|
||||
IF token IS NULL THEN
|
||||
token := nextval('seq_word');
|
||||
INSERT INTO word (word_id, word_token, type, info)
|
||||
VALUES (token, partial, 'w', json_build_object('count', 0));
|
||||
END IF;
|
||||
|
||||
RETURN token;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
|
||||
@@ -34,17 +34,31 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
SELECT info->>'street' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'place_match' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[] && street_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
SELECT (info->>'place_match')::INTEGER[] && place_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
@@ -55,19 +69,24 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
|
||||
RETURNS SETOF TEXT
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
SELECT * FROM jsonb_object_keys(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->'addr'->key->>0)::INTEGER[];
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user