Merge branch 'clean-deleted-relations' of https://github.com/lujoh/Nominatim into lujoh-clean-deleted-relations

This commit is contained in:
Sarah Hoffmann
2023-10-23 10:47:31 +02:00
8 changed files with 176 additions and 62 deletions

View File

@@ -60,16 +60,13 @@ to finish the recomputation.
## Removing large deleted objects ## Removing large deleted objects
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
Nominatim refuses to delete very large areas because often these deletions are Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up. the `import_polygon_delete` table and left to the administrator to clean up.
There is currently no command to do that. You can use the following SQL To run this command you will need to pass a PostgreSQL time interval. For example to
query to force a deletion on all objects that have been deleted more than delete any objects that have been deleted more than a month ago you would run:
a certain timespan ago (here: 1 month): `nominatim admin --clean-deleted '1 month'`
```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```

View File

@@ -363,57 +363,3 @@ BEGIN
RETURN NULL; RETURN NULL;
END; END;
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@@ -487,3 +487,56 @@ BEGIN
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@@ -29,6 +29,7 @@ class AdminFuncs:
""" """
def add_args(self, parser: argparse.ArgumentParser) -> None: def add_args(self, parser: argparse.ArgumentParser) -> None:
self.parser = parser
group = parser.add_argument_group('Admin tasks') group = parser.add_argument_group('Admin tasks')
objs = group.add_mutually_exclusive_group(required=True) objs = group.add_mutually_exclusive_group(required=True)
objs.add_argument('--warm', action='store_true', objs.add_argument('--warm', action='store_true',
@@ -41,6 +42,8 @@ class AdminFuncs:
help='Print performance analysis of the indexing process') help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true", objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information") help="Generate a report about the host system information")
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
help='Clean up deleted relations')
group = parser.add_argument_group('Arguments for cache warming') group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target', group.add_argument('--search-only', action='store_const', dest='target',
const='search', const='search',
@@ -54,8 +57,11 @@ class AdminFuncs:
help='Analyse indexing of the given OSM object') help='Analyse indexing of the given OSM object')
mgroup.add_argument('--place-id', type=int, mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object') help='Analyse indexing of the given Nominatim object')
group = parser.add_argument_group('Arguments for cleaning deleted')
def run(self, args: NominatimArgs) -> int: def run(self, args: NominatimArgs) -> int:
# pylint: disable=too-many-return-statements
if args.warm: if args.warm:
return self._warm(args) return self._warm(args)
@@ -81,6 +87,12 @@ class AdminFuncs:
collect_os_info.report_system_information(args.config) collect_os_info.report_system_information(args.config)
return 0 return 0
if args.clean_deleted:
LOG.warning('Cleaning up deleted relations')
from ..tools import admin
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
return 0
return 1 return 1

View File

@@ -72,6 +72,7 @@ class NominatimArgs:
check_database: bool check_database: bool
migrate: bool migrate: bool
collect_os_info: bool collect_os_info: bool
clean_deleted: str
analyse_indexing: bool analyse_indexing: bool
target: Optional[str] target: Optional[str]
osm_id: Optional[str] osm_id: Optional[str]

View File

@@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
import logging import logging
from psycopg2.extras import Json, register_hstore from psycopg2.extras import Json, register_hstore
from psycopg2 import DataError
from nominatim.config import Configuration from nominatim.config import Configuration
from nominatim.db.connection import connect, Cursor from nominatim.db.connection import connect, Cursor
@@ -87,3 +88,20 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
for msg in conn.notices: for msg in conn.notices:
print(msg) print(msg)
def clean_deleted_relations(config: Configuration, age: str) -> None:
""" Clean deleted relations older than a given age
"""
with connect(config.get_libpq_dsn()) as conn:
with conn.cursor() as cur:
try:
cur.execute("""SELECT place_force_delete(p.place_id)
FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
AND age(p.indexed_date) > %s::interval""",
(age, ))
except DataError as exc:
raise UsageError('Invalid PostgreSQL time interval format') from exc
conn.commit()

View File

@@ -33,6 +33,17 @@ def test_admin_migrate(cli_call, mock_func_factory):
assert mock.called == 1 assert mock.called == 1
def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
assert cli_call('admin', '--clean-deleted', '1 month') == 0
assert mock.called == 1
def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
assert cli_call('admin', '--clean-deleted') == 1
class TestCliAdminWithDb: class TestCliAdminWithDb:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)

View File

@@ -12,6 +12,7 @@ import pytest
from nominatim.errors import UsageError from nominatim.errors import UsageError
from nominatim.tools import admin from nominatim.tools import admin
from nominatim.tokenizer import factory from nominatim.tokenizer import factory
from nominatim.db.sql_preprocessor import SQLPreprocessor
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table): def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
@@ -70,3 +71,78 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
VALUES(9988, 'N', 10000)""") VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(project_env, osm_id='N10000') admin.analyse_indexing(project_env, osm_id='N10000')
class TestAdminCleanDeleted:
@pytest.fixture(autouse=True)
def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
""" Set up place_force_delete function and related tables
"""
self.project_env = project_env
self.temp_db_cursor = temp_db_cursor
table_factory('import_polygon_delete',
"""osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL""",
((100, 'N', 'boundary', 'administrative'),
(145, 'N', 'boundary', 'administrative'),
(175, 'R', 'landcover', 'grass')))
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
# set up tables and triggers for utils function
table_factory('place_to_be_deleted',
"""osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
deferred BOOLEAN""")
table_factory('country_name', 'partition INT')
table_factory('import_polygon_error', """osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL""")
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
RETURNS TRIGGER AS $$
BEGIN RETURN NULL; END;
$$ LANGUAGE plpgsql;""")
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
orig_sql = def_config.lib_dir.sql
def_config.lib_dir.sql = src_dir / 'lib-sql'
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
def_config.lib_dir.sql = orig_sql
def test_admin_clean_deleted_no_records(self):
admin.clean_deleted_relations(self.project_env, age='1 year')
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
(145, 'N', 'boundary', 'administrative', 1),
(175, 'R', 'landcover', 'grass', 1)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
@pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
def test_admin_clean_deleted_bad_age(self, test_age):
with pytest.raises(UsageError):
admin.clean_deleted_relations(self.project_env, age = test_age)
def test_admin_clean_deleted_partial(self):
admin.clean_deleted_relations(self.project_env, age = '2 months')
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
(145, 'N', 'boundary', 'administrative', 100),
(175, 'R', 'landcover', 'grass', 100)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
@pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
def test_admin_clean_deleted(self, test_age):
admin.clean_deleted_relations(self.project_env, age = test_age)
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
(145, 'N', 'boundary', 'administrative', 100),
(175, 'R', 'landcover', 'grass', 100)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0