mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 01:47:57 +00:00
Merge pull request #3906 from AyushDharDubey/fix/issue_2463-Use-search_name-table-for-TIGER-data-imports-on-'dropped'-databases
Use `search_name` as fallback for TIGER imports when update tables are dropped
This commit is contained in:
@@ -15,6 +15,99 @@ CREATE TABLE location_property_tiger_import (
|
||||
step SMALLINT,
|
||||
postcode TEXT);
|
||||
|
||||
|
||||
-- Lookup functions for tiger import when update
|
||||
-- informations are dropped (see gh-issue #2463)
|
||||
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceIdSlow(in_centroid GEOMETRY,
|
||||
in_token_info JSONB)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
out_place_id BIGINT;
|
||||
|
||||
BEGIN
|
||||
SELECT place_id INTO out_place_id
|
||||
FROM search_name
|
||||
WHERE
|
||||
-- finds rows where name_vector shares elements with search tokens.
|
||||
token_matches_street(in_token_info, name_vector)
|
||||
-- limits search area
|
||||
AND centroid && ST_Expand(in_centroid, 0.015)
|
||||
AND address_rank BETWEEN 26 AND 27
|
||||
ORDER BY ST_Distance(centroid, in_centroid) ASC
|
||||
LIMIT 1;
|
||||
|
||||
RETURN out_place_id;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestParallelRoadFeatureSlow(line GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
r RECORD;
|
||||
search_diameter FLOAT;
|
||||
p1 GEOMETRY;
|
||||
p2 GEOMETRY;
|
||||
p3 GEOMETRY;
|
||||
|
||||
BEGIN
|
||||
IF ST_GeometryType(line) not in ('ST_LineString') THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
p1 := ST_LineInterpolatePoint(line,0);
|
||||
p2 := ST_LineInterpolatePoint(line,0.5);
|
||||
p3 := ST_LineInterpolatePoint(line,1);
|
||||
|
||||
search_diameter := 0.0005;
|
||||
WHILE search_diameter < 0.01 LOOP
|
||||
FOR r IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE ST_DWithin(line, geometry, search_diameter)
|
||||
AND rank_address BETWEEN 26 AND 27
|
||||
ORDER BY (ST_distance(geometry, p1)+
|
||||
ST_distance(geometry, p2)+
|
||||
ST_distance(geometry, p3)) ASC limit 1
|
||||
LOOP
|
||||
RETURN r.place_id;
|
||||
END LOOP;
|
||||
search_diameter := search_diameter * 2;
|
||||
END LOOP;
|
||||
RETURN NULL;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestRoadPlaceIdSlow(point GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
r RECORD;
|
||||
search_diameter FLOAT;
|
||||
BEGIN
|
||||
search_diameter := 0.00005;
|
||||
WHILE search_diameter < 0.1 LOOP
|
||||
FOR r IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE ST_DWithin(geometry, point, search_diameter)
|
||||
AND rank_address BETWEEN 26 AND 27
|
||||
ORDER BY ST_Distance(geometry, point) ASC limit 1
|
||||
LOOP
|
||||
RETURN r.place_id;
|
||||
END LOOP;
|
||||
search_diameter := search_diameter * 2;
|
||||
END LOOP;
|
||||
RETURN NULL;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Tiger import function
|
||||
CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER,
|
||||
in_endnumber INTEGER, interpolationtype TEXT,
|
||||
token_info JSONB, in_postcode TEXT) RETURNS INTEGER
|
||||
@@ -71,6 +164,9 @@ BEGIN
|
||||
place_centroid := ST_Centroid(linegeo);
|
||||
out_partition := get_partition('us');
|
||||
|
||||
-- HYBRID LOOKUP LOGIC (see gh-issue #2463)
|
||||
-- if partition tables exist, use them for fast spatial lookups
|
||||
{% if 'location_road_0' in db.tables %}
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
token_info);
|
||||
|
||||
@@ -84,15 +180,35 @@ BEGIN
|
||||
INTO out_parent_place_id;
|
||||
END IF;
|
||||
|
||||
--insert street(line) into import table
|
||||
insert into location_property_tiger_import (linegeo, place_id, partition,
|
||||
-- When updatable information has been dropped:
|
||||
-- Partition tables no longer exist, but search_name still persists.
|
||||
{% elif 'search_name' in db.tables %}
|
||||
-- Fallback: Look up in 'search_name' table
|
||||
-- though spatial lookups here can be slower.
|
||||
out_parent_place_id := getNearestNamedRoadPlaceIdSlow(place_centroid, token_info);
|
||||
|
||||
IF out_parent_place_id IS NULL THEN
|
||||
out_parent_place_id := getNearestParallelRoadFeatureSlow(linegeo);
|
||||
END IF;
|
||||
|
||||
IF out_parent_place_id IS NULL THEN
|
||||
out_parent_place_id := getNearestRoadPlaceIdSlow(place_centroid);
|
||||
END IF;
|
||||
{% endif %}
|
||||
|
||||
-- If parent was found, insert street(line) into import table
|
||||
IF out_parent_place_id IS NOT NULL THEN
|
||||
INSERT INTO location_property_tiger_import (linegeo, place_id, partition,
|
||||
parent_place_id, startnumber, endnumber,
|
||||
step, postcode)
|
||||
values (linegeo, nextval('seq_place'), out_partition,
|
||||
VALUES (linegeo, nextval('seq_place'), out_partition,
|
||||
out_parent_place_id, startnumber, endnumber,
|
||||
stepsize, in_postcode);
|
||||
|
||||
RETURN 1;
|
||||
END IF;
|
||||
RETURN 0;
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
@@ -65,14 +65,14 @@ class UpdateAddData:
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tools import add_osm_data
|
||||
|
||||
if args.tiger_data:
|
||||
return asyncio.run(self._add_tiger_data(args))
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
if is_frozen(conn):
|
||||
print('Database is marked frozen. New data can\'t be added.')
|
||||
return 1
|
||||
|
||||
if args.tiger_data:
|
||||
return asyncio.run(self._add_tiger_data(args))
|
||||
|
||||
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
|
||||
if args.file or args.diff:
|
||||
return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
|
||||
|
||||
@@ -17,13 +17,12 @@ import tarfile
|
||||
from psycopg.types.json import Json
|
||||
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect
|
||||
from ..db.connection import connect, table_exists
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..errors import UsageError
|
||||
from ..db.query_pool import QueryPool
|
||||
from ..data.place_info import PlaceInfo
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from . import freeze
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -90,16 +89,19 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
|
||||
"""
|
||||
dsn = config.get_libpq_dsn()
|
||||
|
||||
with connect(dsn) as conn:
|
||||
if freeze.is_frozen(conn):
|
||||
raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)")
|
||||
|
||||
with TigerInput(data_dir) as tar:
|
||||
if not tar:
|
||||
return 1
|
||||
|
||||
with connect(dsn) as conn:
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
|
||||
if not table_exists(conn, 'search_name'):
|
||||
raise UsageError(
|
||||
"Cannot perform tiger import: required tables are missing. "
|
||||
"See https://github.com/osm-search/Nominatim/issues/2463 for details."
|
||||
)
|
||||
|
||||
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
||||
|
||||
# Reading files and then for each file line handling
|
||||
|
||||
@@ -32,6 +32,8 @@ class MockTigerTable:
|
||||
|
||||
# We need this table to determine if the database is frozen or not
|
||||
cur.execute("CREATE TABLE place (number INTEGER)")
|
||||
# We need this table to determine if the database is in reverse-only mode
|
||||
cur.execute("CREATE TABLE search_name (place_id BIGINT)")
|
||||
|
||||
def count(self):
|
||||
return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
|
||||
@@ -87,15 +89,30 @@ async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock,
|
||||
assert tiger_table.count() == 6213
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock,
|
||||
tmp_path):
|
||||
async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table,
|
||||
tokenizer_mock, threads):
|
||||
freeze.drop_update_tables(temp_db_conn)
|
||||
|
||||
with pytest.raises(UsageError) as excinfo:
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||
def_config, threads, tokenizer_mock())
|
||||
|
||||
assert "database frozen" in str(excinfo.value)
|
||||
assert tiger_table.count() == 6213
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table,
|
||||
tokenizer_mock):
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("DROP TABLE search_name")
|
||||
temp_db_conn.commit()
|
||||
|
||||
with pytest.raises(UsageError,
|
||||
match="Cannot perform tiger import: required tables are missing. "
|
||||
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
|
||||
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||
def_config, 1, tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user