forked from hans/Nominatim
export more data for the tokenizer name preparation
Adds class, type, country and rank to the exported information and removes the rather odd hack for countries. Whether a place represents a country boundary can now be computed by the tokenizer.
This commit is contained in:
@@ -1,30 +1,33 @@
|
|||||||
-- Trigger functions for the placex table.
|
-- Trigger functions for the placex table.
|
||||||
|
|
||||||
|
-- Information returned by update preparation.
|
||||||
|
DROP TYPE IF EXISTS prepare_update_info CASCADE;
|
||||||
|
CREATE TYPE prepare_update_info AS (
|
||||||
|
name HSTORE,
|
||||||
|
address HSTORE,
|
||||||
|
rank_address SMALLINT,
|
||||||
|
country_code TEXT,
|
||||||
|
class TEXT,
|
||||||
|
type TEXT,
|
||||||
|
linked_place_id BIGINT
|
||||||
|
);
|
||||||
|
|
||||||
-- Retrieve the data needed by the indexer for updating the place.
|
-- Retrieve the data needed by the indexer for updating the place.
|
||||||
--
|
CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex)
|
||||||
-- Return parameters:
|
RETURNS prepare_update_info
|
||||||
-- name list of names
|
|
||||||
-- address list of address tags, either from the object or a surrounding
|
|
||||||
-- building
|
|
||||||
-- country_feature If the place is a country feature, this contains the
|
|
||||||
-- country code, otherwise it is null.
|
|
||||||
CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
|
|
||||||
OUT name HSTORE,
|
|
||||||
OUT address HSTORE,
|
|
||||||
OUT country_feature VARCHAR,
|
|
||||||
OUT linked_place_id BIGINT)
|
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
location RECORD;
|
location RECORD;
|
||||||
|
result prepare_update_info;
|
||||||
BEGIN
|
BEGIN
|
||||||
-- For POI nodes, check if the address should be derived from a surrounding
|
-- For POI nodes, check if the address should be derived from a surrounding
|
||||||
-- building.
|
-- building.
|
||||||
IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
|
IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
|
||||||
address := p.address;
|
result.address := p.address;
|
||||||
ELSE
|
ELSE
|
||||||
-- The additional && condition works around the misguided query
|
-- The additional && condition works around the misguided query
|
||||||
-- planner of postgis 3.0.
|
-- planner of postgis 3.0.
|
||||||
SELECT placex.address || hstore('_inherited', '') INTO address
|
SELECT placex.address || hstore('_inherited', '') INTO result.address
|
||||||
FROM placex
|
FROM placex
|
||||||
WHERE ST_Covers(geometry, p.centroid)
|
WHERE ST_Covers(geometry, p.centroid)
|
||||||
and geometry && p.centroid
|
and geometry && p.centroid
|
||||||
@@ -34,27 +37,26 @@ BEGIN
|
|||||||
LIMIT 1;
|
LIMIT 1;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
address := address - '_unlisted_place'::TEXT;
|
result.address := result.address - '_unlisted_place'::TEXT;
|
||||||
name := p.name;
|
result.name := p.name;
|
||||||
|
result.class := p.class;
|
||||||
|
result.type := p.type;
|
||||||
|
result.country_code := p.country_code;
|
||||||
|
result.rank_address := p.rank_address;
|
||||||
|
|
||||||
-- Names of linked places need to be merged in, so search for a linkable
|
-- Names of linked places need to be merged in, so search for a linkable
|
||||||
-- place already here.
|
-- place already here.
|
||||||
SELECT * INTO location FROM find_linked_place(p);
|
SELECT * INTO location FROM find_linked_place(p);
|
||||||
|
|
||||||
IF location.place_id is not NULL THEN
|
IF location.place_id is not NULL THEN
|
||||||
linked_place_id := location.place_id;
|
result.linked_place_id := location.place_id;
|
||||||
|
|
||||||
IF NOT location.name IS NULL THEN
|
IF NOT location.name IS NULL THEN
|
||||||
name := location.name || name;
|
result.name := location.name || result.name;
|
||||||
END IF;
|
END IF;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
country_feature := CASE WHEN p.admin_level = 2
|
RETURN result;
|
||||||
and p.class = 'boundary' and p.type = 'administrative'
|
|
||||||
and p.osm_type = 'R'
|
|
||||||
THEN p.country_code
|
|
||||||
ELSE null
|
|
||||||
END;
|
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql STABLE;
|
LANGUAGE plpgsql STABLE;
|
||||||
|
|||||||
@@ -38,7 +38,31 @@ class PlaceInfo:
|
|||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def country_feature(self):
|
def country_code(self):
|
||||||
""" Return the country code if the place is a valid country boundary.
|
""" The country code of the country the place is in. Guaranteed
|
||||||
|
to be a two-letter lower-case string or None, if no country
|
||||||
|
could be found.
|
||||||
"""
|
"""
|
||||||
return self._info.get('country_feature')
|
return self._info.get('country_code')
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rank_address(self):
|
||||||
|
""" The computed rank address before rank correction.
|
||||||
|
"""
|
||||||
|
return self._info.get('rank_address')
|
||||||
|
|
||||||
|
|
||||||
|
def is_a(self, key, value):
|
||||||
|
""" Check if the place's primary tag corresponds to the given
|
||||||
|
key and value.
|
||||||
|
"""
|
||||||
|
return self._info.get('class') == key and self._info.get('type') == value
|
||||||
|
|
||||||
|
|
||||||
|
def is_country(self):
|
||||||
|
""" Check if the place is a valid country boundary.
|
||||||
|
"""
|
||||||
|
return self.rank_address == 4 \
|
||||||
|
and self.is_a('boundary', 'administrative') \
|
||||||
|
and self.country_code is not None
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class AbstractPlacexRunner:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_place_details(worker, ids):
|
def get_place_details(worker, ids):
|
||||||
worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
|
worker.perform("""SELECT place_id, (placex_indexing_prepare(placex)).*
|
||||||
FROM placex WHERE place_id IN %s""",
|
FROM placex WHERE place_id IN %s""",
|
||||||
(tuple((p[0] for p in ids)), ))
|
(tuple((p[0] for p in ids)), ))
|
||||||
|
|
||||||
|
|||||||
@@ -397,9 +397,8 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
|
|
||||||
token_info.add_names(fulls, partials)
|
token_info.add_names(fulls, partials)
|
||||||
|
|
||||||
country_feature = place.country_feature
|
if place.is_country():
|
||||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
self.add_country_names(place.country_code, names)
|
||||||
self.add_country_names(country_feature.lower(), names)
|
|
||||||
|
|
||||||
address = place.address
|
address = place.address
|
||||||
if address:
|
if address:
|
||||||
|
|||||||
@@ -410,9 +410,8 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
|
|||||||
if names:
|
if names:
|
||||||
token_info.add_names(self.conn, names)
|
token_info.add_names(self.conn, names)
|
||||||
|
|
||||||
country_feature = place.country_feature
|
if place.is_country():
|
||||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
self.add_country_names(place.country_code, names)
|
||||||
self.add_country_names(country_feature.lower(), names)
|
|
||||||
|
|
||||||
address = place.address
|
address = place.address
|
||||||
if address:
|
if address:
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ class IndexerTestDB:
|
|||||||
indexed_date TIMESTAMP,
|
indexed_date TIMESTAMP,
|
||||||
partition SMALLINT,
|
partition SMALLINT,
|
||||||
admin_level SMALLINT,
|
admin_level SMALLINT,
|
||||||
|
country_code TEXT,
|
||||||
address HSTORE,
|
address HSTORE,
|
||||||
token_info JSONB,
|
token_info JSONB,
|
||||||
geometry_sector INTEGER)""")
|
geometry_sector INTEGER)""")
|
||||||
@@ -54,15 +55,26 @@ class IndexerTestDB:
|
|||||||
END IF;
|
END IF;
|
||||||
RETURN NEW;
|
RETURN NEW;
|
||||||
END; $$ LANGUAGE plpgsql;""")
|
END; $$ LANGUAGE plpgsql;""")
|
||||||
cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
|
cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
|
||||||
OUT name HSTORE,
|
cur.execute("""CREATE TYPE prepare_update_info AS (
|
||||||
OUT address HSTORE,
|
name HSTORE,
|
||||||
OUT country_feature VARCHAR,
|
address HSTORE,
|
||||||
OUT linked_place_id BIGINT)
|
rank_address SMALLINT,
|
||||||
|
country_code TEXT,
|
||||||
|
class TEXT,
|
||||||
|
type TEXT,
|
||||||
|
linked_place_id BIGINT
|
||||||
|
)""")
|
||||||
|
cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
|
||||||
|
OUT result prepare_update_info)
|
||||||
AS $$
|
AS $$
|
||||||
BEGIN
|
BEGIN
|
||||||
address := p.address;
|
result.address := p.address;
|
||||||
name := p.name;
|
result.name := p.name;
|
||||||
|
result.class := p.class;
|
||||||
|
result.type := p.type;
|
||||||
|
result.country_code := p.country_code;
|
||||||
|
result.rank_address := p.rank_address;
|
||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql STABLE;
|
$$ LANGUAGE plpgsql STABLE;
|
||||||
""")
|
""")
|
||||||
|
|||||||
@@ -323,10 +323,8 @@ class TestPlaceNames:
|
|||||||
assert eval(info['names']) == set((t[2] for t in tokens))
|
assert eval(info['names']) == set((t[2] for t in tokens))
|
||||||
|
|
||||||
|
|
||||||
def process_named_place(self, names, country_feature=None):
|
def process_named_place(self, names):
|
||||||
place = {'name': names}
|
place = {'name': names}
|
||||||
if country_feature:
|
|
||||||
place['country_feature'] = country_feature
|
|
||||||
|
|
||||||
return self.analyzer.process_place(PlaceInfo(place))
|
return self.analyzer.process_place(PlaceInfo(place))
|
||||||
|
|
||||||
@@ -353,7 +351,13 @@ class TestPlaceNames:
|
|||||||
|
|
||||||
|
|
||||||
def test_country_name(self, word_table):
|
def test_country_name(self, word_table):
|
||||||
info = self.process_named_place({'name': 'Norge'}, country_feature='no')
|
place = PlaceInfo({'name' : {'name': 'Norge'},
|
||||||
|
'country_code': 'no',
|
||||||
|
'rank_address': 4,
|
||||||
|
'class': 'boundary',
|
||||||
|
'type': 'administrative'})
|
||||||
|
|
||||||
|
info = self.analyzer.process_place(place)
|
||||||
|
|
||||||
self.expect_name_terms(info, '#norge', 'norge')
|
self.expect_name_terms(info, '#norge', 'norge')
|
||||||
assert word_table.get_country() == {('no', 'NORGE')}
|
assert word_table.get_country() == {('no', 'NORGE')}
|
||||||
|
|||||||
Reference in New Issue
Block a user