export more data for the tokenizer name preparation

Adds class, type, country and rank to the exported information
and removes the rather odd hack for countries. Whether a place
represents a country boundary can now be computed by the tokenizer.
This commit is contained in:
Sarah Hoffmann
2021-09-29 11:54:14 +02:00
parent 231250f2eb
commit be65c8303f
7 changed files with 85 additions and 45 deletions

View File

@@ -1,30 +1,33 @@
-- Trigger functions for the placex table. -- Trigger functions for the placex table.
-- Information returned by update preparation.
DROP TYPE IF EXISTS prepare_update_info CASCADE;
CREATE TYPE prepare_update_info AS (
name HSTORE,
address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
);
-- Retrieve the data needed by the indexer for updating the place. -- Retrieve the data needed by the indexer for updating the place.
-- CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex)
-- Return parameters: RETURNS prepare_update_info
-- name list of names
-- address list of address tags, either from the object or a surrounding
-- building
-- country_feature If the place is a country feature, this contains the
-- country code, otherwise it is null.
CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
OUT name HSTORE,
OUT address HSTORE,
OUT country_feature VARCHAR,
OUT linked_place_id BIGINT)
AS $$ AS $$
DECLARE DECLARE
location RECORD; location RECORD;
result prepare_update_info;
BEGIN BEGIN
-- For POI nodes, check if the address should be derived from a surrounding -- For POI nodes, check if the address should be derived from a surrounding
-- building. -- building.
IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
address := p.address; result.address := p.address;
ELSE ELSE
-- The additional && condition works around the misguided query -- The additional && condition works around the misguided query
-- planner of postgis 3.0. -- planner of postgis 3.0.
SELECT placex.address || hstore('_inherited', '') INTO address SELECT placex.address || hstore('_inherited', '') INTO result.address
FROM placex FROM placex
WHERE ST_Covers(geometry, p.centroid) WHERE ST_Covers(geometry, p.centroid)
and geometry && p.centroid and geometry && p.centroid
@@ -34,27 +37,26 @@ BEGIN
LIMIT 1; LIMIT 1;
END IF; END IF;
address := address - '_unlisted_place'::TEXT; result.address := result.address - '_unlisted_place'::TEXT;
name := p.name; result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
-- Names of linked places need to be merged in, so search for a linkable -- Names of linked places need to be merged in, so search for a linkable
-- place already here. -- place already here.
SELECT * INTO location FROM find_linked_place(p); SELECT * INTO location FROM find_linked_place(p);
IF location.place_id is not NULL THEN IF location.place_id is not NULL THEN
linked_place_id := location.place_id; result.linked_place_id := location.place_id;
IF NOT location.name IS NULL THEN IF NOT location.name IS NULL THEN
name := location.name || name; result.name := location.name || result.name;
END IF; END IF;
END IF; END IF;
country_feature := CASE WHEN p.admin_level = 2 RETURN result;
and p.class = 'boundary' and p.type = 'administrative'
and p.osm_type = 'R'
THEN p.country_code
ELSE null
END;
END; END;
$$ $$
LANGUAGE plpgsql STABLE; LANGUAGE plpgsql STABLE;

View File

@@ -38,7 +38,31 @@ class PlaceInfo:
@property @property
def country_feature(self): def country_code(self):
""" Return the country code if the place is a valid country boundary. """ The country code of the country the place is in. Guaranteed
to be a two-letter lower-case string or None, if no country
could be found.
""" """
return self._info.get('country_feature') return self._info.get('country_code')
@property
def rank_address(self):
""" The computed rank address before rank correction.
"""
return self._info.get('rank_address')
def is_a(self, key, value):
""" Check if the place's primary tag corresponds to the given
key and value.
"""
return self._info.get('class') == key and self._info.get('type') == value
def is_country(self):
""" Check if the place is a valid country boundary.
"""
return self.rank_address == 4 \
and self.is_a('boundary', 'administrative') \
and self.country_code is not None

View File

@@ -39,7 +39,7 @@ class AbstractPlacexRunner:
@staticmethod @staticmethod
def get_place_details(worker, ids): def get_place_details(worker, ids):
worker.perform("""SELECT place_id, (placex_prepare_update(placex)).* worker.perform("""SELECT place_id, (placex_indexing_prepare(placex)).*
FROM placex WHERE place_id IN %s""", FROM placex WHERE place_id IN %s""",
(tuple((p[0] for p in ids)), )) (tuple((p[0] for p in ids)), ))

View File

@@ -397,9 +397,8 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
token_info.add_names(fulls, partials) token_info.add_names(fulls, partials)
country_feature = place.country_feature if place.is_country():
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature): self.add_country_names(place.country_code, names)
self.add_country_names(country_feature.lower(), names)
address = place.address address = place.address
if address: if address:

View File

@@ -410,9 +410,8 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
if names: if names:
token_info.add_names(self.conn, names) token_info.add_names(self.conn, names)
country_feature = place.country_feature if place.is_country():
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature): self.add_country_names(place.country_code, names)
self.add_country_names(country_feature.lower(), names)
address = place.address address = place.address
if address: if address:

View File

@@ -29,6 +29,7 @@ class IndexerTestDB:
indexed_date TIMESTAMP, indexed_date TIMESTAMP,
partition SMALLINT, partition SMALLINT,
admin_level SMALLINT, admin_level SMALLINT,
country_code TEXT,
address HSTORE, address HSTORE,
token_info JSONB, token_info JSONB,
geometry_sector INTEGER)""") geometry_sector INTEGER)""")
@@ -54,15 +55,26 @@ class IndexerTestDB:
END IF; END IF;
RETURN NEW; RETURN NEW;
END; $$ LANGUAGE plpgsql;""") END; $$ LANGUAGE plpgsql;""")
cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
OUT name HSTORE, cur.execute("""CREATE TYPE prepare_update_info AS (
OUT address HSTORE, name HSTORE,
OUT country_feature VARCHAR, address HSTORE,
OUT linked_place_id BIGINT) rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
)""")
cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info)
AS $$ AS $$
BEGIN BEGIN
address := p.address; result.address := p.address;
name := p.name; result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END; END;
$$ LANGUAGE plpgsql STABLE; $$ LANGUAGE plpgsql STABLE;
""") """)

View File

@@ -323,10 +323,8 @@ class TestPlaceNames:
assert eval(info['names']) == set((t[2] for t in tokens)) assert eval(info['names']) == set((t[2] for t in tokens))
def process_named_place(self, names, country_feature=None): def process_named_place(self, names):
place = {'name': names} place = {'name': names}
if country_feature:
place['country_feature'] = country_feature
return self.analyzer.process_place(PlaceInfo(place)) return self.analyzer.process_place(PlaceInfo(place))
@@ -353,7 +351,13 @@ class TestPlaceNames:
def test_country_name(self, word_table): def test_country_name(self, word_table):
info = self.process_named_place({'name': 'Norge'}, country_feature='no') place = PlaceInfo({'name' : {'name': 'Norge'},
'country_code': 'no',
'rank_address': 4,
'class': 'boundary',
'type': 'administrative'})
info = self.analyzer.process_place(place)
self.expect_name_terms(info, '#norge', 'norge') self.expect_name_terms(info, '#norge', 'norge')
assert word_table.get_country() == {('no', 'NORGE')} assert word_table.get_country() == {('no', 'NORGE')}