export more data for the tokenizer name preparation

Adds class, type, country and rank to the exported information
and removes the rather odd hack for countries. Whether a place
represents a country boundary can now be computed by the tokenizer.
This commit is contained in:
Sarah Hoffmann
2021-09-29 11:54:14 +02:00
parent 231250f2eb
commit be65c8303f
7 changed files with 85 additions and 45 deletions

View File

@@ -29,6 +29,7 @@ class IndexerTestDB:
indexed_date TIMESTAMP,
partition SMALLINT,
admin_level SMALLINT,
country_code TEXT,
address HSTORE,
token_info JSONB,
geometry_sector INTEGER)""")
@@ -54,15 +55,26 @@ class IndexerTestDB:
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;""")
cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
OUT name HSTORE,
OUT address HSTORE,
OUT country_feature VARCHAR,
OUT linked_place_id BIGINT)
cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
cur.execute("""CREATE TYPE prepare_update_info AS (
name HSTORE,
address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
)""")
cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info)
AS $$
BEGIN
address := p.address;
name := p.name;
result.address := p.address;
result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")

View File

@@ -323,10 +323,8 @@ class TestPlaceNames:
assert eval(info['names']) == set((t[2] for t in tokens))
def process_named_place(self, names, country_feature=None):
def process_named_place(self, names):
place = {'name': names}
if country_feature:
place['country_feature'] = country_feature
return self.analyzer.process_place(PlaceInfo(place))
@@ -353,7 +351,13 @@ class TestPlaceNames:
def test_country_name(self, word_table):
info = self.process_named_place({'name': 'Norge'}, country_feature='no')
place = PlaceInfo({'name' : {'name': 'Norge'},
'country_code': 'no',
'rank_address': 4,
'class': 'boundary',
'type': 'administrative'})
info = self.analyzer.process_place(place)
self.expect_name_terms(info, '#norge', 'norge')
assert word_table.get_country() == {('no', 'NORGE')}