move houseunumber handling to tokenizer

Normalization and token computation are now done in the tokenizer.
The tokenizer keeps a cache to the hundred most used house numbers
to keep the numbers of calls to the database low.
This commit is contained in:
Sarah Hoffmann
2021-04-25 11:47:29 +02:00
parent d711f5a81e
commit d8ed1bfc60
5 changed files with 101 additions and 34 deletions

View File

@@ -7,6 +7,7 @@ AS $$
SELECT (info->>'names')::INTEGER[]
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Get tokens for matching the place name against others.
--
-- This should usually be restricted to full name tokens.
@@ -17,6 +18,22 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return the housenumber tokens applicable for the place.
CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
RETURNS INTEGER[]
AS $$
SELECT (info->>'hnr_tokens')::INTEGER[]
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return the housenumber in the form that it can be matched during search.
CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
RETURNS TEXT
AS $$
SELECT info->>'hnr';
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return token info that should be saved permanently in the database.
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
RETURNS JSONB
@@ -75,26 +92,25 @@ END;
$$
LANGUAGE plpgsql;
-- Create housenumber tokens from an OSM addr:housenumber.
-- The housnumber is split at comma and semicolon as necessary.
-- The function returns the normalized form of the housenumber suitable
-- for comparison.
CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
RETURNS TEXT
CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
OUT tokens TEXT,
OUT normtext TEXT)
AS $$
DECLARE
normtext TEXT;
BEGIN
SELECT array_to_string(array_agg(trans), ';')
INTO normtext
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
INTO normtext, tokens
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
FROM (SELECT make_standard_name(h) as lookup_word
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
return normtext;
FROM unnest(housenumbers) h) x) y;
END;
$$ LANGUAGE plpgsql STABLE STRICT;
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
RETURNS INTEGER
AS $$