move postcode normalization into tokenizer

This commit is contained in:
Sarah Hoffmann
2021-04-25 18:26:36 +02:00
parent d8ed1bfc60
commit ffc2d82b0e
12 changed files with 181 additions and 74 deletions

View File

@@ -12,39 +12,47 @@ $$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
RETURNS HSTORE
AS $$
DECLARE
location RECORD;
waynodes BIGINT[];
BEGIN
IF akeys(in_address) != ARRAY['interpolation'] THEN
RETURN in_address;
END IF;
SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
FOR location IN
SELECT placex.address, placex.osm_id FROM placex
WHERE osm_type = 'N' and osm_id = ANY(waynodes)
and placex.address is not null
and (placex.address ? 'street' or placex.address ? 'place')
and indexed_status < 100
LOOP
-- mark it as a derived address
RETURN location.address || in_address || hstore('_inherited', '');
END LOOP;
RETURN in_address;
END;
$$
LANGUAGE plpgsql STABLE;
-- find the parent road of the cut road parts
CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
CREATE OR REPLACE FUNCTION get_interpolation_parent(street TEXT,
place TEXT, partition SMALLINT,
centroid GEOMETRY, geom GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
addr_street TEXT;
addr_place TEXT;
parent_place_id BIGINT;
waynodes BIGINT[];
location RECORD;
BEGIN
addr_street = street;
addr_place = place;
IF addr_street is null and addr_place is null THEN
select nodes from planet_osm_ways where id = wayid INTO waynodes;
FOR location IN SELECT placex.address from placex
where osm_type = 'N' and osm_id = ANY(waynodes)
and placex.address is not null
and (placex.address ? 'street' or placex.address ? 'place')
and indexed_status < 100
limit 1 LOOP
addr_street = location.address->'street';
addr_place = location.address->'place';
END LOOP;
END IF;
parent_place_id := find_parent_for_address(addr_street, addr_place,
partition, centroid);
parent_place_id := find_parent_for_address(street, place, partition, centroid);
IF parent_place_id is null THEN
FOR location IN SELECT place_id FROM placex
@@ -147,17 +155,20 @@ BEGIN
NEW.interpolationtype = NEW.address->'interpolation';
place_centroid := ST_PointOnSurface(NEW.linegeo);
NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
NEW.parent_place_id = get_interpolation_parent(NEW.address->'street',
NEW.address->'place',
NEW.partition, place_centroid, NEW.linegeo);
IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
interpol_postcode := NEW.address->'postcode';
housenum := getorcreate_postcode_id(NEW.address->'postcode');
ELSE
interpol_postcode := NULL;
END IF;
IF NEW.address ? '_inherited' THEN
NEW.address := hstore('interpolation', NEW.interpolationtype);
END IF;
-- if the line was newly inserted, split the line as necessary
IF OLD.indexed_status = 1 THEN
select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes;

View File

@@ -817,10 +817,6 @@ BEGIN
IF NEW.address is not NULL THEN
addr_street := NEW.address->'street';
addr_place := NEW.address->'place';
IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
i := getorcreate_postcode_id(NEW.address->'postcode');
END IF;
END IF;
NEW.postcode := null;

View File

@@ -34,6 +34,13 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
RETURNS TEXT
AS $$
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return token info that should be saved permanently in the database.
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
RETURNS JSONB
@@ -133,26 +140,26 @@ $$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
RETURNS INTEGER
CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
RETURNS BOOLEAN
AS $$
DECLARE
r RECORD;
lookup_token TEXT;
lookup_word TEXT;
return_word_id INTEGER;
BEGIN
lookup_word := upper(trim(postcode));
lookup_token := ' ' || make_standard_name(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and word = lookup_word
lookup_token := ' ' || make_standard_name(postcode);
FOR r IN
SELECT word_id FROM word
WHERE word_token = lookup_token and word = postcode
and class='place' and type='postcode'
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
'place', 'postcode', null, 0);
END IF;
RETURN return_word_id;
LOOP
RETURN false;
END LOOP;
INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
'place', 'postcode', null, 0);
RETURN true;
END;
$$
LANGUAGE plpgsql;