move postcode normalization into tokenizer

This commit is contained in:
Sarah Hoffmann
2021-04-25 18:26:36 +02:00
parent d8ed1bfc60
commit ffc2d82b0e
12 changed files with 181 additions and 74 deletions

View File

@@ -299,7 +299,7 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
@pytest.fixture
def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):
""" Sets up the configuration so that the test dummy tokenizer will be
loaded.
"""

View File

@@ -43,6 +43,9 @@ class DummyNameAnalyzer:
"""
pass
def add_postcodes_from_db(self):
pass
def process_place(self, place):
""" Determine tokenizer information about the given place.

View File

@@ -33,6 +33,9 @@ class IndexerTestDB:
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_property_osmline (
place_id BIGINT,
osm_id BIGINT,
address HSTORE,
token_info JSONB,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
geometry_sector INTEGER)""")
@@ -61,6 +64,14 @@ class IndexerTestDB:
END;
$$ LANGUAGE plpgsql STABLE;
""")
cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
RETURNS HSTORE AS $$
BEGIN
RETURN in_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")
for table in ('placex', 'location_property_osmline', 'location_postcode'):
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
FOR EACH ROW EXECUTE PROCEDURE date_update()
@@ -91,9 +102,9 @@ class IndexerTestDB:
next_id = next(self.osmline_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_property_osmline
(place_id, indexed_status, geometry_sector)
VALUES (%s, 1, %s)""",
(next_id, sector))
(place_id, osm_id, indexed_status, geometry_sector)
VALUES (%s, %s, 1, %s)""",
(next_id, next_id, sector))
return next_id
def add_postcode(self, country, postcode):

View File

@@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
import pytest
from nominatim.tools import postcodes
import dummy_tokenizer
@pytest.fixture
def tokenizer():
return dummy_tokenizer.DummyTokenizer(None, None)
@pytest.fixture
def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
@@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
postcode TEXT,
geometry GEOMETRY(Geometry, 4326)""")
temp_db_cursor.execute('CREATE SEQUENCE seq_place')
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
""")
def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
postcodes.import_postcodes(dsn, tmp_path)
def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
postcodes.import_postcodes(dsn, tmp_path, tokenizer)
assert temp_db_cursor.table_exists('gb_postcode')
assert temp_db_cursor.table_exists('us_postcode')
assert temp_db_cursor.table_rows('location_postcode') == 0
def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
temp_db_cursor.execute("""
INSERT INTO placex (place_id, country_code, address, geometry)
VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
""")
postcodes.import_postcodes(dsn, tmp_path)
postcodes.import_postcodes(dsn, tmp_path, tokenizer)
rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
ST_X(geometry), ST_Y(geometry)