mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 02:47:59 +00:00
This adds precomputation of abbreviated terms for names and removes abbreviation of terms in the query. Basic import works but still needs some thorough testing as well as speed improvements during import. New dependency for python library datrie.
158 lines
4.4 KiB
PL/PgSQL
158 lines
4.4 KiB
PL/PgSQL
-- Get tokens used for searching the given place.
|
|
--
|
|
-- These are the tokens that will be saved in the search_name table.
|
|
CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
|
|
RETURNS INTEGER[]
|
|
AS $$
|
|
SELECT (info->>'names')::INTEGER[]
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
-- Get tokens for matching the place name against others.
|
|
--
|
|
-- This should usually be restricted to full name tokens.
|
|
CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
|
|
RETURNS INTEGER[]
|
|
AS $$
|
|
SELECT (info->>'names')::INTEGER[]
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
-- Return the housenumber tokens applicable for the place.
|
|
CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
|
|
RETURNS INTEGER[]
|
|
AS $$
|
|
SELECT (info->>'hnr_tokens')::INTEGER[]
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
-- Return the housenumber in the form that it can be matched during search.
|
|
CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
|
|
RETURNS TEXT
|
|
AS $$
|
|
SELECT info->>'hnr';
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
|
RETURNS INTEGER[]
|
|
AS $$
|
|
SELECT (info->>'street')::INTEGER[]
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
|
RETURNS INTEGER[]
|
|
AS $$
|
|
SELECT (info->>'place_match')::INTEGER[]
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
|
|
RETURNS INTEGER[]
|
|
AS $$
|
|
SELECT (info->>'place_search')::INTEGER[]
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
|
CREATE TYPE token_addresstoken AS (
|
|
key TEXT,
|
|
match_tokens INT[],
|
|
search_tokens INT[]
|
|
);
|
|
|
|
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
|
RETURNS SETOF token_addresstoken
|
|
AS $$
|
|
SELECT key, (value->>1)::int[] as match_tokens,
|
|
(value->>0)::int[] as search_tokens
|
|
FROM jsonb_each(info->'addr');
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
|
RETURNS TEXT
|
|
AS $$
|
|
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
|
|
-- Return token info that should be saved permanently in the database.
|
|
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
|
RETURNS JSONB
|
|
AS $$
|
|
SELECT NULL::JSONB;
|
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
|
|
--------------- private functions ----------------------------------------------
|
|
|
|
CREATE OR REPLACE FUNCTION getorcreate_full_word(norm_term TEXT, lookup_terms TEXT[],
|
|
OUT full_token INT,
|
|
OUT partial_tokens INT[])
|
|
AS $$
|
|
DECLARE
|
|
partial_terms TEXT[] = '{}'::TEXT[];
|
|
term TEXT;
|
|
term_id INTEGER;
|
|
term_count INTEGER;
|
|
BEGIN
|
|
SELECT min(word_id) INTO full_token
|
|
FROM word WHERE word = norm_term and class is null and country_code is null;
|
|
|
|
IF full_token IS NULL THEN
|
|
full_token := nextval('seq_word');
|
|
INSERT INTO word (word_id, word_token, word, search_name_count)
|
|
SELECT full_token, ' ' || lookup_term, norm_term, 0 FROM unnest(lookup_terms) as lookup_term;
|
|
END IF;
|
|
|
|
FOR term IN SELECT unnest(string_to_array(unnest(lookup_terms), ' ')) LOOP
|
|
term := trim(term);
|
|
IF NOT (ARRAY[term] <@ partial_terms) THEN
|
|
partial_terms := partial_terms || term;
|
|
END IF;
|
|
END LOOP;
|
|
|
|
partial_tokens := '{}'::INT[];
|
|
FOR term IN SELECT unnest(partial_terms) LOOP
|
|
SELECT min(word_id), max(search_name_count) INTO term_id, term_count
|
|
FROM word WHERE word_token = term and class is null and country_code is null;
|
|
|
|
IF term_id IS NULL THEN
|
|
term_id := nextval('seq_word');
|
|
term_count := 0;
|
|
INSERT INTO word (word_id, word_token, search_name_count)
|
|
VALUES (term_id, term, 0);
|
|
END IF;
|
|
|
|
IF term_count < {{ max_word_freq }} THEN
|
|
partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
|
|
END IF;
|
|
END LOOP;
|
|
END;
|
|
$$
|
|
LANGUAGE plpgsql;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
|
|
RETURNS INTEGER
|
|
AS $$
|
|
DECLARE
|
|
return_id INTEGER;
|
|
BEGIN
|
|
SELECT min(word_id) INTO return_id
|
|
FROM word
|
|
WHERE word_token = ' ' || lookup_term
|
|
and class = 'place' and type = 'house';
|
|
|
|
IF return_id IS NULL THEN
|
|
return_id := nextval('seq_word');
|
|
INSERT INTO word (word_id, word_token, class, type, search_name_count)
|
|
VALUES (return_id, ' ' || lookup_term, 'place', 'house', 0);
|
|
END IF;
|
|
|
|
RETURN return_id;
|
|
END;
|
|
$$
|
|
LANGUAGE plpgsql;
|