From 954771a42df8b4d03ecb9a7ab5c5475c853511ff Mon Sep 17 00:00:00 2001 From: Ayush Dhar Dubey Date: Thu, 11 Dec 2025 20:40:57 +0530 Subject: [PATCH 1/3] Add fallback search mechanism for dropped databases lookup --- lib-sql/tiger_import_start.sql | 151 +++++++++++++++++++++++++++++---- 1 file changed, 133 insertions(+), 18 deletions(-) diff --git a/lib-sql/tiger_import_start.sql b/lib-sql/tiger_import_start.sql index 0ff53436..d0bbdf63 100644 --- a/lib-sql/tiger_import_start.sql +++ b/lib-sql/tiger_import_start.sql @@ -15,6 +15,99 @@ CREATE TABLE location_property_tiger_import ( step SMALLINT, postcode TEXT); + +-- Lookup functions for tiger import when update +-- informations are dropped (see gh-issue #2463) +CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceIdSlow(in_centroid GEOMETRY, + in_token_info JSONB) + RETURNS BIGINT + AS $$ +DECLARE + out_place_id BIGINT; + +BEGIN + SELECT place_id INTO out_place_id + FROM search_name + WHERE + -- finds rows where name_vector shares elements with search tokens. + token_matches_street(in_token_info, name_vector) + -- limits search area + AND centroid && ST_Expand(in_centroid, 0.015) + AND address_rank BETWEEN 26 AND 27 + ORDER BY ST_Distance(centroid, in_centroid) ASC + LIMIT 1; + + RETURN out_place_id; +END +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getNearestParallelRoadFeatureSlow(line GEOMETRY) + RETURNS BIGINT + AS $$ +DECLARE + r RECORD; + search_diameter FLOAT; + p1 GEOMETRY; + p2 GEOMETRY; + p3 GEOMETRY; + +BEGIN + IF ST_GeometryType(line) not in ('ST_LineString') THEN + RETURN NULL; + END IF; + + p1 := ST_LineInterpolatePoint(line,0); + p2 := ST_LineInterpolatePoint(line,0.5); + p3 := ST_LineInterpolatePoint(line,1); + + search_diameter := 0.0005; + WHILE search_diameter < 0.01 LOOP + FOR r IN + SELECT place_id FROM placex + WHERE ST_DWithin(line, geometry, search_diameter) + AND rank_address BETWEEN 26 AND 27 + ORDER BY (ST_distance(geometry, p1)+ + ST_distance(geometry, p2)+ + ST_distance(geometry, p3)) ASC limit 1 + LOOP + RETURN r.place_id; + END LOOP; + search_diameter := search_diameter * 2; + END LOOP; + RETURN NULL; +END +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getNearestRoadPlaceIdSlow(point GEOMETRY) + RETURNS BIGINT + AS $$ +DECLARE + r RECORD; + search_diameter FLOAT; +BEGIN + search_diameter := 0.00005; + WHILE search_diameter < 0.1 LOOP + FOR r IN + SELECT place_id FROM placex + WHERE ST_DWithin(geometry, point, search_diameter) + AND rank_address BETWEEN 26 AND 27 + ORDER BY ST_Distance(geometry, point) ASC limit 1 + LOOP + RETURN r.place_id; + END LOOP; + search_diameter := search_diameter * 2; + END LOOP; + RETURN NULL; +END +$$ +LANGUAGE plpgsql; + + +-- Tiger import function CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER, in_endnumber INTEGER, interpolationtype TEXT, token_info JSONB, in_postcode TEXT) RETURNS INTEGER @@ -71,28 +164,50 @@ BEGIN place_centroid := ST_Centroid(linegeo); out_partition := get_partition('us'); - out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid, + -- HYBRID LOOKUP LOGIC (see gh-issue #2463) + -- if partition tables exist, use them for fast spatial lookups + {% if 'location_road_0' in db.tables %} + out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid, token_info); - IF out_parent_place_id IS NULL THEN - SELECT getNearestParallelRoadFeature(out_partition, linegeo) - INTO out_parent_place_id; + IF out_parent_place_id IS NULL THEN + SELECT getNearestParallelRoadFeature(out_partition, linegeo) + INTO out_parent_place_id; + END IF; + + IF out_parent_place_id IS NULL THEN + SELECT getNearestRoadPlaceId(out_partition, place_centroid) + INTO out_parent_place_id; + END IF; + + -- When updatable information has been dropped: + -- Partition tables no longer exist, but search_name still persists. + {% elif 'search_name' in db.tables %} + -- Fallback: Look up in 'search_name' table + -- though spatial lookups here can be slower. + out_parent_place_id := getNearestNamedRoadPlaceIdSlow(place_centroid, token_info); + + IF out_parent_place_id IS NULL THEN + out_parent_place_id := getNearestParallelRoadFeatureSlow(linegeo); + END IF; + + IF out_parent_place_id IS NULL THEN + out_parent_place_id := getNearestRoadPlaceIdSlow(place_centroid); + END IF; + {% endif %} + + -- If parent was found, insert street(line) into import table + IF out_parent_place_id IS NOT NULL THEN + INSERT INTO location_property_tiger_import (linegeo, place_id, partition, + parent_place_id, startnumber, endnumber, + step, postcode) + VALUES (linegeo, nextval('seq_place'), out_partition, + out_parent_place_id, startnumber, endnumber, + stepsize, in_postcode); + + RETURN 1; END IF; - IF out_parent_place_id IS NULL THEN - SELECT getNearestRoadPlaceId(out_partition, place_centroid) - INTO out_parent_place_id; - END IF; - ---insert street(line) into import table -insert into location_property_tiger_import (linegeo, place_id, partition, - parent_place_id, startnumber, endnumber, - step, postcode) -values (linegeo, nextval('seq_place'), out_partition, - out_parent_place_id, startnumber, endnumber, - stepsize, in_postcode); - - RETURN 1; END; $$ LANGUAGE plpgsql; From 2698382552608103958eb0a1b5ce40e23a1c43a8 Mon Sep 17 00:00:00 2001 From: Ayush Dhar Dubey Date: Fri, 9 Jan 2026 16:51:53 +0530 Subject: [PATCH 2/3] permit import of tiger after freeze --- src/nominatim_db/clicmd/add_data.py | 6 +++--- src/nominatim_db/tools/tiger_data.py | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/nominatim_db/clicmd/add_data.py b/src/nominatim_db/clicmd/add_data.py index 580740f1..9d80a514 100644 --- a/src/nominatim_db/clicmd/add_data.py +++ b/src/nominatim_db/clicmd/add_data.py @@ -65,14 +65,14 @@ class UpdateAddData: def run(self, args: NominatimArgs) -> int: from ..tools import add_osm_data + if args.tiger_data: + return asyncio.run(self._add_tiger_data(args)) + with connect(args.config.get_libpq_dsn()) as conn: if is_frozen(conn): print('Database is marked frozen. New data can\'t be added.') return 1 - if args.tiger_data: - return asyncio.run(self._add_tiger_data(args)) - osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1) if args.file or args.diff: return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(), diff --git a/src/nominatim_db/tools/tiger_data.py b/src/nominatim_db/tools/tiger_data.py index 7b865570..b2f63839 100644 --- a/src/nominatim_db/tools/tiger_data.py +++ b/src/nominatim_db/tools/tiger_data.py @@ -17,13 +17,12 @@ import tarfile from psycopg.types.json import Json from ..config import Configuration -from ..db.connection import connect +from ..db.connection import connect, table_exists from ..db.sql_preprocessor import SQLPreprocessor from ..errors import UsageError from ..db.query_pool import QueryPool from ..data.place_info import PlaceInfo from ..tokenizer.base import AbstractTokenizer -from . import freeze LOG = logging.getLogger() @@ -90,16 +89,19 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int, """ dsn = config.get_libpq_dsn() - with connect(dsn) as conn: - if freeze.is_frozen(conn): - raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)") - with TigerInput(data_dir) as tar: if not tar: return 1 with connect(dsn) as conn: sql = SQLPreprocessor(conn, config) + + if not table_exists(conn, 'search_name'): + raise UsageError( + "Cannot perform tiger import: required tables are missing. " + "See https://github.com/osm-search/Nominatim/issues/2463 for details." + ) + sql.run_sql_file(conn, 'tiger_import_start.sql') # Reading files and then for each file line handling From eefd0efa59356cf635e9b1ee4c0536cc5d57d7c4 Mon Sep 17 00:00:00 2001 From: Ayush Dhar Dubey Date: Fri, 9 Jan 2026 16:52:15 +0530 Subject: [PATCH 3/3] update test frozen db: new tiger import mechanism --- lib-sql/tiger_import_start.sql | 1 + test/python/tools/test_tiger_data.py | 27 ++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/lib-sql/tiger_import_start.sql b/lib-sql/tiger_import_start.sql index d0bbdf63..40a2f525 100644 --- a/lib-sql/tiger_import_start.sql +++ b/lib-sql/tiger_import_start.sql @@ -207,6 +207,7 @@ BEGIN RETURN 1; END IF; + RETURN 0; END; $$ diff --git a/test/python/tools/test_tiger_data.py b/test/python/tools/test_tiger_data.py index f7dfe32e..9623e21b 100644 --- a/test/python/tools/test_tiger_data.py +++ b/test/python/tools/test_tiger_data.py @@ -32,6 +32,8 @@ class MockTigerTable: # We need this table to determine if the database is frozen or not cur.execute("CREATE TABLE place (number INTEGER)") + # We need this table to determine if the database is in reverse-only mode + cur.execute("CREATE TABLE search_name (place_id BIGINT)") def count(self): return execute_scalar(self.conn, "SELECT count(*) FROM tiger") @@ -87,15 +89,30 @@ async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, assert tiger_table.count() == 6213 +@pytest.mark.parametrize("threads", (1, 5)) @pytest.mark.asyncio -async def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock, - tmp_path): +async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table, + tokenizer_mock, threads): freeze.drop_update_tables(temp_db_conn) - with pytest.raises(UsageError) as excinfo: - await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) + await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'), + def_config, threads, tokenizer_mock()) - assert "database frozen" in str(excinfo.value) + assert tiger_table.count() == 6213 + + +@pytest.mark.asyncio +async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table, + tokenizer_mock): + with temp_db_conn.cursor() as cur: + cur.execute("DROP TABLE search_name") + temp_db_conn.commit() + + with pytest.raises(UsageError, + match="Cannot perform tiger import: required tables are missing. " + "See https://github.com/osm-search/Nominatim/issues/2463 for details."): + await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'), + def_config, 1, tokenizer_mock()) assert tiger_table.count() == 0