move postcode normalization into tokenizer

2026-02-15 02:47:59 +00:00 · 2021-04-25 18:26:36 +02:00
parent d8ed1bfc60
commit ffc2d82b0e
12 changed files with 181 additions and 74 deletions
--- a/test/python/conftest.py
+++ b/test/python/conftest.py
@@ -299,7 +299,7 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):


@pytest.fixture
-def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):
    """ Sets up the configuration so that the test dummy tokenizer will be
        loaded.
    """
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@@ -43,6 +43,9 @@ class DummyNameAnalyzer:
        """
        pass

+    def add_postcodes_from_db(self):
+        pass
+
    def process_place(self, place):
        """ Determine tokenizer information about the given place.

--- a/test/python/test_indexing.py
+++ b/test/python/test_indexing.py
@@ -33,6 +33,9 @@ class IndexerTestDB:
                                                geometry_sector INTEGER)""")
            cur.execute("""CREATE TABLE location_property_osmline (
                               place_id BIGINT,
+                               osm_id BIGINT,
+                               address HSTORE,
+                               token_info JSONB,
                               indexed_status SMALLINT,
                               indexed_date TIMESTAMP,
                               geometry_sector INTEGER)""")
@@ -61,6 +64,14 @@ class IndexerTestDB:
                           END;
                           $$ LANGUAGE plpgsql STABLE;
                        """)
+            cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+                           RETURNS HSTORE AS $$
+                           BEGIN
+                             RETURN in_address;
+                           END;
+                           $$ LANGUAGE plpgsql STABLE;
+                        """)
+
            for table in ('placex', 'location_property_osmline', 'location_postcode'):
                cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
                               FOR EACH ROW EXECUTE PROCEDURE date_update()
@@ -91,9 +102,9 @@ class IndexerTestDB:
        next_id = next(self.osmline_id)
        with self.conn.cursor() as cur:
            cur.execute("""INSERT INTO location_property_osmline
-                              (place_id, indexed_status, geometry_sector)
-                              VALUES (%s, 1, %s)""",
-                        (next_id, sector))
+                              (place_id, osm_id, indexed_status, geometry_sector)
+                              VALUES (%s, %s, 1, %s)""",
+                        (next_id, next_id, sector))
        return next_id

    def add_postcode(self, country, postcode):
--- a/test/python/test_tools_postcodes.py
+++ b/test/python/test_tools_postcodes.py
@@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
 import pytest

 from nominatim.tools import postcodes
+import dummy_tokenizer
+
+@pytest.fixture
+def tokenizer():
+    return dummy_tokenizer.DummyTokenizer(None, None)

@pytest.fixture
 def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
@@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
                      postcode TEXT,
                      geometry GEOMETRY(Geometry, 4326)""")
    temp_db_cursor.execute('CREATE SEQUENCE seq_place')
-    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-                              RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+                              RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
                           """)


-def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
-    postcodes.import_postcodes(dsn, tmp_path)
+def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)

    assert temp_db_cursor.table_exists('gb_postcode')
    assert temp_db_cursor.table_exists('us_postcode')
    assert temp_db_cursor.table_rows('location_postcode') == 0


-def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
+def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
    temp_db_cursor.execute("""
        INSERT INTO placex (place_id, country_code, address, geometry)
          VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
    """)

-    postcodes.import_postcodes(dsn, tmp_path)
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)

    rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
                                      ST_X(geometry), ST_Y(geometry)