mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 01:47:57 +00:00
Merge pull request #3906 from AyushDharDubey/fix/issue_2463-Use-search_name-table-for-TIGER-data-imports-on-'dropped'-databases
Use `search_name` as fallback for TIGER imports when update tables are dropped
This commit is contained in:
@@ -15,6 +15,99 @@ CREATE TABLE location_property_tiger_import (
|
|||||||
step SMALLINT,
|
step SMALLINT,
|
||||||
postcode TEXT);
|
postcode TEXT);
|
||||||
|
|
||||||
|
|
||||||
|
-- Lookup functions for tiger import when update
|
||||||
|
-- informations are dropped (see gh-issue #2463)
|
||||||
|
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceIdSlow(in_centroid GEOMETRY,
|
||||||
|
in_token_info JSONB)
|
||||||
|
RETURNS BIGINT
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
out_place_id BIGINT;
|
||||||
|
|
||||||
|
BEGIN
|
||||||
|
SELECT place_id INTO out_place_id
|
||||||
|
FROM search_name
|
||||||
|
WHERE
|
||||||
|
-- finds rows where name_vector shares elements with search tokens.
|
||||||
|
token_matches_street(in_token_info, name_vector)
|
||||||
|
-- limits search area
|
||||||
|
AND centroid && ST_Expand(in_centroid, 0.015)
|
||||||
|
AND address_rank BETWEEN 26 AND 27
|
||||||
|
ORDER BY ST_Distance(centroid, in_centroid) ASC
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
RETURN out_place_id;
|
||||||
|
END
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION getNearestParallelRoadFeatureSlow(line GEOMETRY)
|
||||||
|
RETURNS BIGINT
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
r RECORD;
|
||||||
|
search_diameter FLOAT;
|
||||||
|
p1 GEOMETRY;
|
||||||
|
p2 GEOMETRY;
|
||||||
|
p3 GEOMETRY;
|
||||||
|
|
||||||
|
BEGIN
|
||||||
|
IF ST_GeometryType(line) not in ('ST_LineString') THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
p1 := ST_LineInterpolatePoint(line,0);
|
||||||
|
p2 := ST_LineInterpolatePoint(line,0.5);
|
||||||
|
p3 := ST_LineInterpolatePoint(line,1);
|
||||||
|
|
||||||
|
search_diameter := 0.0005;
|
||||||
|
WHILE search_diameter < 0.01 LOOP
|
||||||
|
FOR r IN
|
||||||
|
SELECT place_id FROM placex
|
||||||
|
WHERE ST_DWithin(line, geometry, search_diameter)
|
||||||
|
AND rank_address BETWEEN 26 AND 27
|
||||||
|
ORDER BY (ST_distance(geometry, p1)+
|
||||||
|
ST_distance(geometry, p2)+
|
||||||
|
ST_distance(geometry, p3)) ASC limit 1
|
||||||
|
LOOP
|
||||||
|
RETURN r.place_id;
|
||||||
|
END LOOP;
|
||||||
|
search_diameter := search_diameter * 2;
|
||||||
|
END LOOP;
|
||||||
|
RETURN NULL;
|
||||||
|
END
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION getNearestRoadPlaceIdSlow(point GEOMETRY)
|
||||||
|
RETURNS BIGINT
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
r RECORD;
|
||||||
|
search_diameter FLOAT;
|
||||||
|
BEGIN
|
||||||
|
search_diameter := 0.00005;
|
||||||
|
WHILE search_diameter < 0.1 LOOP
|
||||||
|
FOR r IN
|
||||||
|
SELECT place_id FROM placex
|
||||||
|
WHERE ST_DWithin(geometry, point, search_diameter)
|
||||||
|
AND rank_address BETWEEN 26 AND 27
|
||||||
|
ORDER BY ST_Distance(geometry, point) ASC limit 1
|
||||||
|
LOOP
|
||||||
|
RETURN r.place_id;
|
||||||
|
END LOOP;
|
||||||
|
search_diameter := search_diameter * 2;
|
||||||
|
END LOOP;
|
||||||
|
RETURN NULL;
|
||||||
|
END
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
|
-- Tiger import function
|
||||||
CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER,
|
CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER,
|
||||||
in_endnumber INTEGER, interpolationtype TEXT,
|
in_endnumber INTEGER, interpolationtype TEXT,
|
||||||
token_info JSONB, in_postcode TEXT) RETURNS INTEGER
|
token_info JSONB, in_postcode TEXT) RETURNS INTEGER
|
||||||
@@ -71,28 +164,51 @@ BEGIN
|
|||||||
place_centroid := ST_Centroid(linegeo);
|
place_centroid := ST_Centroid(linegeo);
|
||||||
out_partition := get_partition('us');
|
out_partition := get_partition('us');
|
||||||
|
|
||||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
-- HYBRID LOOKUP LOGIC (see gh-issue #2463)
|
||||||
|
-- if partition tables exist, use them for fast spatial lookups
|
||||||
|
{% if 'location_road_0' in db.tables %}
|
||||||
|
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||||
token_info);
|
token_info);
|
||||||
|
|
||||||
IF out_parent_place_id IS NULL THEN
|
IF out_parent_place_id IS NULL THEN
|
||||||
SELECT getNearestParallelRoadFeature(out_partition, linegeo)
|
SELECT getNearestParallelRoadFeature(out_partition, linegeo)
|
||||||
INTO out_parent_place_id;
|
INTO out_parent_place_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF out_parent_place_id IS NULL THEN
|
||||||
|
SELECT getNearestRoadPlaceId(out_partition, place_centroid)
|
||||||
|
INTO out_parent_place_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- When updatable information has been dropped:
|
||||||
|
-- Partition tables no longer exist, but search_name still persists.
|
||||||
|
{% elif 'search_name' in db.tables %}
|
||||||
|
-- Fallback: Look up in 'search_name' table
|
||||||
|
-- though spatial lookups here can be slower.
|
||||||
|
out_parent_place_id := getNearestNamedRoadPlaceIdSlow(place_centroid, token_info);
|
||||||
|
|
||||||
|
IF out_parent_place_id IS NULL THEN
|
||||||
|
out_parent_place_id := getNearestParallelRoadFeatureSlow(linegeo);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF out_parent_place_id IS NULL THEN
|
||||||
|
out_parent_place_id := getNearestRoadPlaceIdSlow(place_centroid);
|
||||||
|
END IF;
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
-- If parent was found, insert street(line) into import table
|
||||||
|
IF out_parent_place_id IS NOT NULL THEN
|
||||||
|
INSERT INTO location_property_tiger_import (linegeo, place_id, partition,
|
||||||
|
parent_place_id, startnumber, endnumber,
|
||||||
|
step, postcode)
|
||||||
|
VALUES (linegeo, nextval('seq_place'), out_partition,
|
||||||
|
out_parent_place_id, startnumber, endnumber,
|
||||||
|
stepsize, in_postcode);
|
||||||
|
|
||||||
|
RETURN 1;
|
||||||
END IF;
|
END IF;
|
||||||
|
RETURN 0;
|
||||||
|
|
||||||
IF out_parent_place_id IS NULL THEN
|
|
||||||
SELECT getNearestRoadPlaceId(out_partition, place_centroid)
|
|
||||||
INTO out_parent_place_id;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
--insert street(line) into import table
|
|
||||||
insert into location_property_tiger_import (linegeo, place_id, partition,
|
|
||||||
parent_place_id, startnumber, endnumber,
|
|
||||||
step, postcode)
|
|
||||||
values (linegeo, nextval('seq_place'), out_partition,
|
|
||||||
out_parent_place_id, startnumber, endnumber,
|
|
||||||
stepsize, in_postcode);
|
|
||||||
|
|
||||||
RETURN 1;
|
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -65,14 +65,14 @@ class UpdateAddData:
|
|||||||
def run(self, args: NominatimArgs) -> int:
|
def run(self, args: NominatimArgs) -> int:
|
||||||
from ..tools import add_osm_data
|
from ..tools import add_osm_data
|
||||||
|
|
||||||
|
if args.tiger_data:
|
||||||
|
return asyncio.run(self._add_tiger_data(args))
|
||||||
|
|
||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
with connect(args.config.get_libpq_dsn()) as conn:
|
||||||
if is_frozen(conn):
|
if is_frozen(conn):
|
||||||
print('Database is marked frozen. New data can\'t be added.')
|
print('Database is marked frozen. New data can\'t be added.')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.tiger_data:
|
|
||||||
return asyncio.run(self._add_tiger_data(args))
|
|
||||||
|
|
||||||
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
|
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
|
||||||
if args.file or args.diff:
|
if args.file or args.diff:
|
||||||
return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
|
return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
|
||||||
|
|||||||
@@ -17,13 +17,12 @@ import tarfile
|
|||||||
from psycopg.types.json import Json
|
from psycopg.types.json import Json
|
||||||
|
|
||||||
from ..config import Configuration
|
from ..config import Configuration
|
||||||
from ..db.connection import connect
|
from ..db.connection import connect, table_exists
|
||||||
from ..db.sql_preprocessor import SQLPreprocessor
|
from ..db.sql_preprocessor import SQLPreprocessor
|
||||||
from ..errors import UsageError
|
from ..errors import UsageError
|
||||||
from ..db.query_pool import QueryPool
|
from ..db.query_pool import QueryPool
|
||||||
from ..data.place_info import PlaceInfo
|
from ..data.place_info import PlaceInfo
|
||||||
from ..tokenizer.base import AbstractTokenizer
|
from ..tokenizer.base import AbstractTokenizer
|
||||||
from . import freeze
|
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
@@ -90,16 +89,19 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
|
|||||||
"""
|
"""
|
||||||
dsn = config.get_libpq_dsn()
|
dsn = config.get_libpq_dsn()
|
||||||
|
|
||||||
with connect(dsn) as conn:
|
|
||||||
if freeze.is_frozen(conn):
|
|
||||||
raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)")
|
|
||||||
|
|
||||||
with TigerInput(data_dir) as tar:
|
with TigerInput(data_dir) as tar:
|
||||||
if not tar:
|
if not tar:
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
with connect(dsn) as conn:
|
with connect(dsn) as conn:
|
||||||
sql = SQLPreprocessor(conn, config)
|
sql = SQLPreprocessor(conn, config)
|
||||||
|
|
||||||
|
if not table_exists(conn, 'search_name'):
|
||||||
|
raise UsageError(
|
||||||
|
"Cannot perform tiger import: required tables are missing. "
|
||||||
|
"See https://github.com/osm-search/Nominatim/issues/2463 for details."
|
||||||
|
)
|
||||||
|
|
||||||
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
||||||
|
|
||||||
# Reading files and then for each file line handling
|
# Reading files and then for each file line handling
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ class MockTigerTable:
|
|||||||
|
|
||||||
# We need this table to determine if the database is frozen or not
|
# We need this table to determine if the database is frozen or not
|
||||||
cur.execute("CREATE TABLE place (number INTEGER)")
|
cur.execute("CREATE TABLE place (number INTEGER)")
|
||||||
|
# We need this table to determine if the database is in reverse-only mode
|
||||||
|
cur.execute("CREATE TABLE search_name (place_id BIGINT)")
|
||||||
|
|
||||||
def count(self):
|
def count(self):
|
||||||
return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
|
return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
|
||||||
@@ -87,15 +89,30 @@ async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock,
|
|||||||
assert tiger_table.count() == 6213
|
assert tiger_table.count() == 6213
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("threads", (1, 5))
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock,
|
async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table,
|
||||||
tmp_path):
|
tokenizer_mock, threads):
|
||||||
freeze.drop_update_tables(temp_db_conn)
|
freeze.drop_update_tables(temp_db_conn)
|
||||||
|
|
||||||
with pytest.raises(UsageError) as excinfo:
|
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
def_config, threads, tokenizer_mock())
|
||||||
|
|
||||||
assert "database frozen" in str(excinfo.value)
|
assert tiger_table.count() == 6213
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table,
|
||||||
|
tokenizer_mock):
|
||||||
|
with temp_db_conn.cursor() as cur:
|
||||||
|
cur.execute("DROP TABLE search_name")
|
||||||
|
temp_db_conn.commit()
|
||||||
|
|
||||||
|
with pytest.raises(UsageError,
|
||||||
|
match="Cannot perform tiger import: required tables are missing. "
|
||||||
|
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
|
||||||
|
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||||
|
def_config, 1, tokenizer_mock())
|
||||||
|
|
||||||
assert tiger_table.count() == 0
|
assert tiger_table.count() == 0
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user