Merge pull request #3986 from lonvia/rework-tiger-tests

Rework unit tests for import of tiger data
2026-02-14 01:47:57 +00:00 · 2026-02-13 14:11:04 +01:00
parent 3f14f89bdf 8ed7a3875a
commit 2ddb19c0b0
3 changed files with 88 additions and 135 deletions
--- a/lib-sql/tables/search_name.sql
+++ b/lib-sql/tables/search_name.sql
@@ -7,7 +7,7 @@
 DROP TABLE IF EXISTS search_name;
-{% if not db.reverse_only %}
+{% if not create_reverse_only %}
 CREATE TABLE search_name (
  place_id BIGINT NOT NULL,
--- a/src/nominatim_db/tools/database_import.py
+++ b/src/nominatim_db/tools/database_import.py
@@ -152,12 +152,11 @@ def create_tables(conn: Connection, config: Configuration, reverse_only: bool =
        When `reverse_only` is True, then the main table for searching will
        be skipped and only reverse search is possible.
    """
-    sql = SQLPreprocessor(conn, config)
+    SQLPreprocessor(conn, config).run_sql_file(conn, 'tables.sql',
-    sql.env.globals['db']['reverse_only'] = reverse_only
+                                               create_reverse_only=reverse_only)
-    sql.run_sql_file(conn, 'tables.sql')
+    # reinitiate the preprocessor to get all the newly created tables
-
+    SQLPreprocessor(conn, config).run_sql_file(conn, 'grants.sql')
    sql.run_sql_file(conn, 'grants.sql')
 def create_table_triggers(conn: Connection, config: Configuration) -> None:
--- a/test/python/tools/test_tiger_data.py
+++ b/test/python/tools/test_tiger_data.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2025 by the Nominatim developer community.
+# Copyright (C) 2026 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Test for tiger data function
@@ -13,59 +13,10 @@ from textwrap import dedent
 import pytest
 import pytest_asyncio  # noqa: F401
-from nominatim_db.db.connection import execute_scalar
+from nominatim_db.tools import tiger_data
 from nominatim_db.tools import tiger_data, freeze
 from nominatim_db.errors import UsageError
 class MockTigerTable:
    def __init__(self, conn):
        self.conn = conn
        with conn.cursor() as cur:
            cur.execute("""CREATE TABLE tiger (linegeo GEOMETRY,
                                               start INTEGER,
                                               stop INTEGER,
                                               interpol TEXT,
                                               token_info JSONB,
                                               postcode TEXT)""")
            # We need this table to determine if the database is frozen or not
            cur.execute("CREATE TABLE place (number INTEGER)")
            # We need this table to determine if the database is in reverse-only mode
            cur.execute("CREATE TABLE search_name (place_id BIGINT)")
    def count(self):
        return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
    def row(self):
        with self.conn.cursor() as cur:
            cur.execute("SELECT * FROM tiger LIMIT 1")
            return cur.fetchone()
@pytest.fixture
 def tiger_table(def_config, temp_db_conn, sql_preprocessor,
                temp_db_with_extensions, tmp_path):
    def_config.lib_dir.sql = tmp_path / 'sql'
    def_config.lib_dir.sql.mkdir()
    (def_config.lib_dir.sql / 'tiger_import_start.sql').write_text(
        """CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, start INTEGER,
                                                        stop INTEGER, interpol TEXT,
                                                        token_info JSONB, postcode TEXT)
           RETURNS INTEGER AS $$
            INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
            RETURNING 1
           $$ LANGUAGE SQL;""", encoding='utf-8')
    (def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
        """DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
                                 in_endnumber INTEGER, interpolationtype TEXT,
                                 token_info JSONB, in_postcode TEXT);""", encoding='utf-8')
    return MockTigerTable(temp_db_conn)
@pytest.fixture
 def csv_factory(tmp_path):
    def _mk_file(fname, hnr_from=1, hnr_to=9, interpol='odd', street='Main St',
@@ -80,107 +31,110 @@ def csv_factory(tmp_path):
    return _mk_file
-@pytest.mark.parametrize("threads", (1, 5))
+class TestTiger:
@pytest.mark.asyncio
 async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads):
    await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
                                    def_config, threads, tokenizer_mock())
-    assert tiger_table.count() == 6213
+    @pytest.fixture(autouse=True)
    def setup(self, temp_db_conn, placex_row, load_sql):
        load_sql('tables/search_name.sql', create_reverse_only=False)
        load_sql('tables/tiger.sql')
        # fake parent roads
        for x in range(-870, -863):
            for y in range(323, 328):
                placex_row(rank_search=26, rank_address=26,
                           geom=f"LINESTRING({x/10 - 0.1} {y/10}, {x/10 + 0.1} {y/10})")
-@pytest.mark.parametrize("threads", (1, 5))
+        temp_db_conn.execute("""
-@pytest.mark.asyncio
+            CREATE OR REPLACE FUNCTION get_partition(cc VARCHAR(10)) RETURNS INTEGER AS $$
-async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table,
+              SELECT 0;
-                                              tokenizer_mock, threads):
+            $$ LANGUAGE sql;
-    freeze.drop_update_tables(temp_db_conn)
+            CREATE OR REPLACE FUNCTION token_matches_street(i JSONB, s INT[]) RETURNS BOOLEAN AS $$
             SELECT false
            $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
        """)
-    await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
+    @pytest.mark.parametrize("threads", (1, 5))
-                                    def_config, threads, tokenizer_mock())
+    @pytest.mark.asyncio
-
+    async def test_add_tiger_data_database_frozen(self, def_config, src_dir, temp_db_cursor,
-    assert tiger_table.count() == 6213
+                                                  tokenizer_mock, threads):
@pytest.mark.asyncio
 async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table,
                                           tokenizer_mock):
    with temp_db_conn.cursor() as cur:
        cur.execute("DROP TABLE search_name")
    temp_db_conn.commit()
    with pytest.raises(UsageError,
                       match="Cannot perform tiger import: required tables are missing. "
                       "See https://github.com/osm-search/Nominatim/issues/2463 for details."):
        await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
-                                        def_config, 1, tokenizer_mock())
+                                        def_config, threads, tokenizer_mock())
-    assert tiger_table.count() == 0
+        assert temp_db_cursor.table_rows('location_property_tiger') == 6209
    @pytest.mark.asyncio
    async def test_add_tiger_data_reverse_only(self, def_config, src_dir, temp_db_cursor,
                                               tokenizer_mock):
        temp_db_cursor.execute("DROP TABLE search_name")
-@pytest.mark.asyncio
+        with pytest.raises(UsageError,
-async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
+                           match="Cannot perform tiger import: required tables are missing. "
-                                       tmp_path):
+                           "See https://github.com/osm-search/Nominatim/issues/2463 for details."):
-    await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
+            await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
                                            def_config, 1, tokenizer_mock())
-    assert tiger_table.count() == 0
+        assert temp_db_cursor.table_rows('location_property_tiger') == 0
    @pytest.mark.asyncio
    async def test_add_tiger_data_no_files(self, def_config, temp_db_cursor, tokenizer_mock,
                                           tmp_path):
        await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
-@pytest.mark.asyncio
+        assert temp_db_cursor.table_rows('location_property_tiger') == 0
 async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
                                       tmp_path):
    sqlfile = tmp_path / '1010.csv'
    sqlfile.write_text("""Random text""", encoding='utf-8')
-    await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
+    @pytest.mark.asyncio
    async def test_add_tiger_data_bad_file(self, def_config, temp_db_cursor, tokenizer_mock,
                                           tmp_path):
        sqlfile = tmp_path / '1010.csv'
        sqlfile.write_text('Random text', encoding='utf-8')
-    assert tiger_table.count() == 0
+        await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
        assert temp_db_cursor.table_rows('location_property_tiger') == 0
-@pytest.mark.asyncio
+    @pytest.mark.asyncio
-async def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock,
+    async def test_add_tiger_data_hnr_nan(self, def_config, temp_db_cursor, tokenizer_mock,
-                                      csv_factory, tmp_path):
+                                          csv_factory, tmp_path):
-    csv_factory('file1', hnr_from=99)
+        csv_factory('file1', hnr_to=99)
-    csv_factory('file2', hnr_from='L12')
+        csv_factory('file2', hnr_from='L12')
-    csv_factory('file3', hnr_to='12.4')
+        csv_factory('file3', hnr_to='12.4')
-    await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
+        await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
-    assert tiger_table.count() == 1
+        rows = temp_db_cursor.row_set("""
-    assert tiger_table.row().start == 99
+            SELECT startnumber, endnumber FROM location_property_tiger""")
        assert rows == {(1, 99)}
-@pytest.mark.parametrize("threads", (1, 5))
+    @pytest.mark.parametrize("threads", (1, 5))
-@pytest.mark.asyncio
+    @pytest.mark.asyncio
-async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
+    async def test_add_tiger_data_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
-                                      tmp_path, src_dir, threads):
+                                          tmp_path, src_dir, threads):
-    tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
+        tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
-    tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
+        tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
-    tar.close()
+        tar.close()
-    await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
+        await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
-                                    tokenizer_mock())
+                                        tokenizer_mock())
-    assert tiger_table.count() == 6213
+        assert temp_db_cursor.table_rows('location_property_tiger') == 6209
    @pytest.mark.asyncio
    async def test_add_tiger_data_bad_tarfile(self, def_config, tokenizer_mock, tmp_path):
        tarfile = tmp_path / 'sample.tar.gz'
        tarfile.write_text("""Random text""", encoding='utf-8')
-@pytest.mark.asyncio
+        with pytest.raises(UsageError):
-async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
+            await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
                                          tmp_path):
    tarfile = tmp_path / 'sample.tar.gz'
    tarfile.write_text("""Random text""", encoding='utf-8')
-    with pytest.raises(UsageError):
+    @pytest.mark.asyncio
-        await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
+    async def test_add_tiger_data_empty_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
                                                tmp_path):
        tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
        tar.add(__file__)
        tar.close()
        await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
                                        tokenizer_mock())
-@pytest.mark.asyncio
+        assert temp_db_cursor.table_rows('location_property_tiger') == 0
 async def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock,
                                            tmp_path):
    tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
    tar.add(__file__)
    tar.close()
    await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
                                    tokenizer_mock())
    assert tiger_table.count() == 0