diff --git a/lib-sql/tables/search_name.sql b/lib-sql/tables/search_name.sql index a5b802fa..5ea3d5f8 100644 --- a/lib-sql/tables/search_name.sql +++ b/lib-sql/tables/search_name.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS search_name; -{% if not db.reverse_only %} +{% if not create_reverse_only %} CREATE TABLE search_name ( place_id BIGINT NOT NULL, diff --git a/src/nominatim_db/tools/database_import.py b/src/nominatim_db/tools/database_import.py index 18a7c321..9af74bf6 100644 --- a/src/nominatim_db/tools/database_import.py +++ b/src/nominatim_db/tools/database_import.py @@ -152,12 +152,11 @@ def create_tables(conn: Connection, config: Configuration, reverse_only: bool = When `reverse_only` is True, then the main table for searching will be skipped and only reverse search is possible. """ - sql = SQLPreprocessor(conn, config) - sql.env.globals['db']['reverse_only'] = reverse_only + SQLPreprocessor(conn, config).run_sql_file(conn, 'tables.sql', + create_reverse_only=reverse_only) - sql.run_sql_file(conn, 'tables.sql') - - sql.run_sql_file(conn, 'grants.sql') + # reinitiate the preprocessor to get all the newly created tables + SQLPreprocessor(conn, config).run_sql_file(conn, 'grants.sql') def create_table_triggers(conn: Connection, config: Configuration) -> None: diff --git a/test/python/tools/test_tiger_data.py b/test/python/tools/test_tiger_data.py index 65c4e929..250eb173 100644 --- a/test/python/tools/test_tiger_data.py +++ b/test/python/tools/test_tiger_data.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2025 by the Nominatim developer community. +# Copyright (C) 2026 by the Nominatim developer community. # For a full list of authors see the git log. """ Test for tiger data function @@ -13,59 +13,10 @@ from textwrap import dedent import pytest import pytest_asyncio # noqa: F401 -from nominatim_db.db.connection import execute_scalar -from nominatim_db.tools import tiger_data, freeze +from nominatim_db.tools import tiger_data from nominatim_db.errors import UsageError -class MockTigerTable: - - def __init__(self, conn): - self.conn = conn - with conn.cursor() as cur: - cur.execute("""CREATE TABLE tiger (linegeo GEOMETRY, - start INTEGER, - stop INTEGER, - interpol TEXT, - token_info JSONB, - postcode TEXT)""") - - # We need this table to determine if the database is frozen or not - cur.execute("CREATE TABLE place (number INTEGER)") - # We need this table to determine if the database is in reverse-only mode - cur.execute("CREATE TABLE search_name (place_id BIGINT)") - - def count(self): - return execute_scalar(self.conn, "SELECT count(*) FROM tiger") - - def row(self): - with self.conn.cursor() as cur: - cur.execute("SELECT * FROM tiger LIMIT 1") - return cur.fetchone() - - -@pytest.fixture -def tiger_table(def_config, temp_db_conn, sql_preprocessor, - temp_db_with_extensions, tmp_path): - def_config.lib_dir.sql = tmp_path / 'sql' - def_config.lib_dir.sql.mkdir() - - (def_config.lib_dir.sql / 'tiger_import_start.sql').write_text( - """CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, start INTEGER, - stop INTEGER, interpol TEXT, - token_info JSONB, postcode TEXT) - RETURNS INTEGER AS $$ - INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode) - RETURNING 1 - $$ LANGUAGE SQL;""", encoding='utf-8') - (def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text( - """DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER, - in_endnumber INTEGER, interpolationtype TEXT, - token_info JSONB, in_postcode TEXT);""", encoding='utf-8') - - return MockTigerTable(temp_db_conn) - - @pytest.fixture def csv_factory(tmp_path): def _mk_file(fname, hnr_from=1, hnr_to=9, interpol='odd', street='Main St', @@ -80,107 +31,110 @@ def csv_factory(tmp_path): return _mk_file -@pytest.mark.parametrize("threads", (1, 5)) -@pytest.mark.asyncio -async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads): - await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'), - def_config, threads, tokenizer_mock()) +class TestTiger: - assert tiger_table.count() == 6213 + @pytest.fixture(autouse=True) + def setup(self, temp_db_conn, placex_row, load_sql): + load_sql('tables/search_name.sql', create_reverse_only=False) + load_sql('tables/tiger.sql') + # fake parent roads + for x in range(-870, -863): + for y in range(323, 328): + placex_row(rank_search=26, rank_address=26, + geom=f"LINESTRING({x/10 - 0.1} {y/10}, {x/10 + 0.1} {y/10})") -@pytest.mark.parametrize("threads", (1, 5)) -@pytest.mark.asyncio -async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table, - tokenizer_mock, threads): - freeze.drop_update_tables(temp_db_conn) + temp_db_conn.execute(""" + CREATE OR REPLACE FUNCTION get_partition(cc VARCHAR(10)) RETURNS INTEGER AS $$ + SELECT 0; + $$ LANGUAGE sql; + CREATE OR REPLACE FUNCTION token_matches_street(i JSONB, s INT[]) RETURNS BOOLEAN AS $$ + SELECT false + $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; + """) - await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'), - def_config, threads, tokenizer_mock()) - - assert tiger_table.count() == 6213 - - -@pytest.mark.asyncio -async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table, - tokenizer_mock): - with temp_db_conn.cursor() as cur: - cur.execute("DROP TABLE search_name") - temp_db_conn.commit() - - with pytest.raises(UsageError, - match="Cannot perform tiger import: required tables are missing. " - "See https://github.com/osm-search/Nominatim/issues/2463 for details."): + @pytest.mark.parametrize("threads", (1, 5)) + @pytest.mark.asyncio + async def test_add_tiger_data_database_frozen(self, def_config, src_dir, temp_db_cursor, + tokenizer_mock, threads): await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'), - def_config, 1, tokenizer_mock()) + def_config, threads, tokenizer_mock()) - assert tiger_table.count() == 0 + assert temp_db_cursor.table_rows('location_property_tiger') == 6209 + @pytest.mark.asyncio + async def test_add_tiger_data_reverse_only(self, def_config, src_dir, temp_db_cursor, + tokenizer_mock): + temp_db_cursor.execute("DROP TABLE search_name") -@pytest.mark.asyncio -async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock, - tmp_path): - await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) + with pytest.raises(UsageError, + match="Cannot perform tiger import: required tables are missing. " + "See https://github.com/osm-search/Nominatim/issues/2463 for details."): + await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'), + def_config, 1, tokenizer_mock()) - assert tiger_table.count() == 0 + assert temp_db_cursor.table_rows('location_property_tiger') == 0 + @pytest.mark.asyncio + async def test_add_tiger_data_no_files(self, def_config, temp_db_cursor, tokenizer_mock, + tmp_path): + await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) -@pytest.mark.asyncio -async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock, - tmp_path): - sqlfile = tmp_path / '1010.csv' - sqlfile.write_text("""Random text""", encoding='utf-8') + assert temp_db_cursor.table_rows('location_property_tiger') == 0 - await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) + @pytest.mark.asyncio + async def test_add_tiger_data_bad_file(self, def_config, temp_db_cursor, tokenizer_mock, + tmp_path): + sqlfile = tmp_path / '1010.csv' + sqlfile.write_text('Random text', encoding='utf-8') - assert tiger_table.count() == 0 + await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) + assert temp_db_cursor.table_rows('location_property_tiger') == 0 -@pytest.mark.asyncio -async def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock, - csv_factory, tmp_path): - csv_factory('file1', hnr_from=99) - csv_factory('file2', hnr_from='L12') - csv_factory('file3', hnr_to='12.4') + @pytest.mark.asyncio + async def test_add_tiger_data_hnr_nan(self, def_config, temp_db_cursor, tokenizer_mock, + csv_factory, tmp_path): + csv_factory('file1', hnr_to=99) + csv_factory('file2', hnr_from='L12') + csv_factory('file3', hnr_to='12.4') - await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) + await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) - assert tiger_table.count() == 1 - assert tiger_table.row().start == 99 + rows = temp_db_cursor.row_set(""" + SELECT startnumber, endnumber FROM location_property_tiger""") + assert rows == {(1, 99)} -@pytest.mark.parametrize("threads", (1, 5)) -@pytest.mark.asyncio -async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock, - tmp_path, src_dir, threads): - tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz") - tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv')) - tar.close() + @pytest.mark.parametrize("threads", (1, 5)) + @pytest.mark.asyncio + async def test_add_tiger_data_tarfile(self, def_config, temp_db_cursor, tokenizer_mock, + tmp_path, src_dir, threads): + tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz") + tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv')) + tar.close() - await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads, - tokenizer_mock()) + await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads, + tokenizer_mock()) - assert tiger_table.count() == 6213 + assert temp_db_cursor.table_rows('location_property_tiger') == 6209 + @pytest.mark.asyncio + async def test_add_tiger_data_bad_tarfile(self, def_config, tokenizer_mock, tmp_path): + tarfile = tmp_path / 'sample.tar.gz' + tarfile.write_text("""Random text""", encoding='utf-8') -@pytest.mark.asyncio -async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock, - tmp_path): - tarfile = tmp_path / 'sample.tar.gz' - tarfile.write_text("""Random text""", encoding='utf-8') + with pytest.raises(UsageError): + await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock()) - with pytest.raises(UsageError): - await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock()) + @pytest.mark.asyncio + async def test_add_tiger_data_empty_tarfile(self, def_config, temp_db_cursor, tokenizer_mock, + tmp_path): + tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz") + tar.add(__file__) + tar.close() + await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1, + tokenizer_mock()) -@pytest.mark.asyncio -async def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock, - tmp_path): - tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz") - tar.add(__file__) - tar.close() - - await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1, - tokenizer_mock()) - - assert tiger_table.count() == 0 + assert temp_db_cursor.table_rows('location_property_tiger') == 0