mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Merge branch 'clean-deleted-relations' of https://github.com/lujoh/Nominatim into lujoh-clean-deleted-relations
This commit is contained in:
@@ -60,16 +60,13 @@ to finish the recomputation.
|
|||||||
|
|
||||||
## Removing large deleted objects
|
## Removing large deleted objects
|
||||||
|
|
||||||
|
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
|
||||||
|
|
||||||
Nominatim refuses to delete very large areas because often these deletions are
|
Nominatim refuses to delete very large areas because often these deletions are
|
||||||
accidental and are reverted within hours. Instead the deletions are logged in
|
accidental and are reverted within hours. Instead the deletions are logged in
|
||||||
the `import_polygon_delete` table and left to the administrator to clean up.
|
the `import_polygon_delete` table and left to the administrator to clean up.
|
||||||
|
|
||||||
There is currently no command to do that. You can use the following SQL
|
To run this command you will need to pass a PostgreSQL time interval. For example to
|
||||||
query to force a deletion on all objects that have been deleted more than
|
delete any objects that have been deleted more than a month ago you would run:
|
||||||
a certain timespan ago (here: 1 month):
|
`nominatim admin --clean-deleted '1 month'`
|
||||||
|
|
||||||
```sql
|
|
||||||
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
|
|
||||||
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
|
|
||||||
and age(p.indexed_date) > '1 month'::interval
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -363,57 +363,3 @@ BEGIN
|
|||||||
RETURN NULL;
|
RETURN NULL;
|
||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql;
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION flush_deleted_places()
|
|
||||||
RETURNS INTEGER
|
|
||||||
AS $$
|
|
||||||
BEGIN
|
|
||||||
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
|
|
||||||
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
|
|
||||||
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
|
|
||||||
|
|
||||||
-- delete from place table
|
|
||||||
ALTER TABLE place DISABLE TRIGGER place_before_delete;
|
|
||||||
DELETE FROM place USING place_to_be_deleted
|
|
||||||
WHERE place.osm_type = place_to_be_deleted.osm_type
|
|
||||||
and place.osm_id = place_to_be_deleted.osm_id
|
|
||||||
and place.class = place_to_be_deleted.class
|
|
||||||
and place.type = place_to_be_deleted.type
|
|
||||||
and not deferred;
|
|
||||||
ALTER TABLE place ENABLE TRIGGER place_before_delete;
|
|
||||||
|
|
||||||
-- Mark for delete in the placex table
|
|
||||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
|
||||||
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
|
|
||||||
and placex.osm_id = place_to_be_deleted.osm_id
|
|
||||||
and placex.class = place_to_be_deleted.class
|
|
||||||
and placex.type = place_to_be_deleted.type
|
|
||||||
and not deferred;
|
|
||||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
|
||||||
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
|
|
||||||
and placex.osm_id = place_to_be_deleted.osm_id
|
|
||||||
and placex.class = place_to_be_deleted.class
|
|
||||||
and placex.type = place_to_be_deleted.type
|
|
||||||
and not deferred;
|
|
||||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
|
||||||
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
|
|
||||||
and placex.osm_id = place_to_be_deleted.osm_id
|
|
||||||
and placex.class = place_to_be_deleted.class
|
|
||||||
and placex.type = place_to_be_deleted.type
|
|
||||||
and not deferred;
|
|
||||||
|
|
||||||
-- Mark for delete in interpolations
|
|
||||||
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
|
|
||||||
WHERE place_to_be_deleted.osm_type = 'W'
|
|
||||||
and place_to_be_deleted.class = 'place'
|
|
||||||
and place_to_be_deleted.type = 'houses'
|
|
||||||
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
|
|
||||||
and not deferred;
|
|
||||||
|
|
||||||
-- Clear todo list.
|
|
||||||
TRUNCATE TABLE place_to_be_deleted;
|
|
||||||
|
|
||||||
RETURN NULL;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
|
|||||||
@@ -487,3 +487,56 @@ BEGIN
|
|||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION flush_deleted_places()
|
||||||
|
RETURNS INTEGER
|
||||||
|
AS $$
|
||||||
|
BEGIN
|
||||||
|
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
|
||||||
|
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
|
||||||
|
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
|
||||||
|
|
||||||
|
-- delete from place table
|
||||||
|
ALTER TABLE place DISABLE TRIGGER place_before_delete;
|
||||||
|
DELETE FROM place USING place_to_be_deleted
|
||||||
|
WHERE place.osm_type = place_to_be_deleted.osm_type
|
||||||
|
and place.osm_id = place_to_be_deleted.osm_id
|
||||||
|
and place.class = place_to_be_deleted.class
|
||||||
|
and place.type = place_to_be_deleted.type
|
||||||
|
and not deferred;
|
||||||
|
ALTER TABLE place ENABLE TRIGGER place_before_delete;
|
||||||
|
|
||||||
|
-- Mark for delete in the placex table
|
||||||
|
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||||
|
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
|
||||||
|
and placex.osm_id = place_to_be_deleted.osm_id
|
||||||
|
and placex.class = place_to_be_deleted.class
|
||||||
|
and placex.type = place_to_be_deleted.type
|
||||||
|
and not deferred;
|
||||||
|
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||||
|
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
|
||||||
|
and placex.osm_id = place_to_be_deleted.osm_id
|
||||||
|
and placex.class = place_to_be_deleted.class
|
||||||
|
and placex.type = place_to_be_deleted.type
|
||||||
|
and not deferred;
|
||||||
|
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||||
|
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
|
||||||
|
and placex.osm_id = place_to_be_deleted.osm_id
|
||||||
|
and placex.class = place_to_be_deleted.class
|
||||||
|
and placex.type = place_to_be_deleted.type
|
||||||
|
and not deferred;
|
||||||
|
|
||||||
|
-- Mark for delete in interpolations
|
||||||
|
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
|
||||||
|
WHERE place_to_be_deleted.osm_type = 'W'
|
||||||
|
and place_to_be_deleted.class = 'place'
|
||||||
|
and place_to_be_deleted.type = 'houses'
|
||||||
|
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
|
||||||
|
and not deferred;
|
||||||
|
|
||||||
|
-- Clear todo list.
|
||||||
|
TRUNCATE TABLE place_to_be_deleted;
|
||||||
|
|
||||||
|
RETURN NULL;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ class AdminFuncs:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||||
|
self.parser = parser
|
||||||
group = parser.add_argument_group('Admin tasks')
|
group = parser.add_argument_group('Admin tasks')
|
||||||
objs = group.add_mutually_exclusive_group(required=True)
|
objs = group.add_mutually_exclusive_group(required=True)
|
||||||
objs.add_argument('--warm', action='store_true',
|
objs.add_argument('--warm', action='store_true',
|
||||||
@@ -41,6 +42,8 @@ class AdminFuncs:
|
|||||||
help='Print performance analysis of the indexing process')
|
help='Print performance analysis of the indexing process')
|
||||||
objs.add_argument('--collect-os-info', action="store_true",
|
objs.add_argument('--collect-os-info', action="store_true",
|
||||||
help="Generate a report about the host system information")
|
help="Generate a report about the host system information")
|
||||||
|
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
|
||||||
|
help='Clean up deleted relations')
|
||||||
group = parser.add_argument_group('Arguments for cache warming')
|
group = parser.add_argument_group('Arguments for cache warming')
|
||||||
group.add_argument('--search-only', action='store_const', dest='target',
|
group.add_argument('--search-only', action='store_const', dest='target',
|
||||||
const='search',
|
const='search',
|
||||||
@@ -54,8 +57,11 @@ class AdminFuncs:
|
|||||||
help='Analyse indexing of the given OSM object')
|
help='Analyse indexing of the given OSM object')
|
||||||
mgroup.add_argument('--place-id', type=int,
|
mgroup.add_argument('--place-id', type=int,
|
||||||
help='Analyse indexing of the given Nominatim object')
|
help='Analyse indexing of the given Nominatim object')
|
||||||
|
group = parser.add_argument_group('Arguments for cleaning deleted')
|
||||||
|
|
||||||
|
|
||||||
def run(self, args: NominatimArgs) -> int:
|
def run(self, args: NominatimArgs) -> int:
|
||||||
|
# pylint: disable=too-many-return-statements
|
||||||
if args.warm:
|
if args.warm:
|
||||||
return self._warm(args)
|
return self._warm(args)
|
||||||
|
|
||||||
@@ -81,6 +87,12 @@ class AdminFuncs:
|
|||||||
collect_os_info.report_system_information(args.config)
|
collect_os_info.report_system_information(args.config)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
if args.clean_deleted:
|
||||||
|
LOG.warning('Cleaning up deleted relations')
|
||||||
|
from ..tools import admin
|
||||||
|
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
|
||||||
|
return 0
|
||||||
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ class NominatimArgs:
|
|||||||
check_database: bool
|
check_database: bool
|
||||||
migrate: bool
|
migrate: bool
|
||||||
collect_os_info: bool
|
collect_os_info: bool
|
||||||
|
clean_deleted: str
|
||||||
analyse_indexing: bool
|
analyse_indexing: bool
|
||||||
target: Optional[str]
|
target: Optional[str]
|
||||||
osm_id: Optional[str]
|
osm_id: Optional[str]
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from psycopg2.extras import Json, register_hstore
|
from psycopg2.extras import Json, register_hstore
|
||||||
|
from psycopg2 import DataError
|
||||||
|
|
||||||
from nominatim.config import Configuration
|
from nominatim.config import Configuration
|
||||||
from nominatim.db.connection import connect, Cursor
|
from nominatim.db.connection import connect, Cursor
|
||||||
@@ -87,3 +88,20 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
|
|||||||
|
|
||||||
for msg in conn.notices:
|
for msg in conn.notices:
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_deleted_relations(config: Configuration, age: str) -> None:
|
||||||
|
""" Clean deleted relations older than a given age
|
||||||
|
"""
|
||||||
|
with connect(config.get_libpq_dsn()) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
try:
|
||||||
|
cur.execute("""SELECT place_force_delete(p.place_id)
|
||||||
|
FROM import_polygon_delete d, placex p
|
||||||
|
WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
|
||||||
|
AND age(p.indexed_date) > %s::interval""",
|
||||||
|
(age, ))
|
||||||
|
except DataError as exc:
|
||||||
|
raise UsageError('Invalid PostgreSQL time interval format') from exc
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
@@ -33,6 +33,17 @@ def test_admin_migrate(cli_call, mock_func_factory):
|
|||||||
assert mock.called == 1
|
assert mock.called == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
|
||||||
|
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
|
||||||
|
|
||||||
|
assert cli_call('admin', '--clean-deleted', '1 month') == 0
|
||||||
|
assert mock.called == 1
|
||||||
|
|
||||||
|
def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
|
||||||
|
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
|
||||||
|
|
||||||
|
assert cli_call('admin', '--clean-deleted') == 1
|
||||||
|
|
||||||
class TestCliAdminWithDb:
|
class TestCliAdminWithDb:
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import pytest
|
|||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
from nominatim.tools import admin
|
from nominatim.tools import admin
|
||||||
from nominatim.tokenizer import factory
|
from nominatim.tokenizer import factory
|
||||||
|
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
|
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
|
||||||
@@ -70,3 +71,78 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
|
|||||||
VALUES(9988, 'N', 10000)""")
|
VALUES(9988, 'N', 10000)""")
|
||||||
|
|
||||||
admin.analyse_indexing(project_env, osm_id='N10000')
|
admin.analyse_indexing(project_env, osm_id='N10000')
|
||||||
|
|
||||||
|
|
||||||
|
class TestAdminCleanDeleted:
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
|
||||||
|
""" Set up place_force_delete function and related tables
|
||||||
|
"""
|
||||||
|
self.project_env = project_env
|
||||||
|
self.temp_db_cursor = temp_db_cursor
|
||||||
|
table_factory('import_polygon_delete',
|
||||||
|
"""osm_id BIGINT,
|
||||||
|
osm_type CHAR(1),
|
||||||
|
class TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL""",
|
||||||
|
((100, 'N', 'boundary', 'administrative'),
|
||||||
|
(145, 'N', 'boundary', 'administrative'),
|
||||||
|
(175, 'R', 'landcover', 'grass')))
|
||||||
|
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
|
||||||
|
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
|
||||||
|
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
|
||||||
|
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
|
||||||
|
# set up tables and triggers for utils function
|
||||||
|
table_factory('place_to_be_deleted',
|
||||||
|
"""osm_id BIGINT,
|
||||||
|
osm_type CHAR(1),
|
||||||
|
class TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL,
|
||||||
|
deferred BOOLEAN""")
|
||||||
|
table_factory('country_name', 'partition INT')
|
||||||
|
table_factory('import_polygon_error', """osm_id BIGINT,
|
||||||
|
osm_type CHAR(1),
|
||||||
|
class TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL""")
|
||||||
|
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN RETURN NULL; END;
|
||||||
|
$$ LANGUAGE plpgsql;""")
|
||||||
|
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
|
||||||
|
FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
|
||||||
|
orig_sql = def_config.lib_dir.sql
|
||||||
|
def_config.lib_dir.sql = src_dir / 'lib-sql'
|
||||||
|
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
|
||||||
|
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
|
||||||
|
def_config.lib_dir.sql = orig_sql
|
||||||
|
|
||||||
|
|
||||||
|
def test_admin_clean_deleted_no_records(self):
|
||||||
|
admin.clean_deleted_relations(self.project_env, age='1 year')
|
||||||
|
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
|
||||||
|
(145, 'N', 'boundary', 'administrative', 1),
|
||||||
|
(175, 'R', 'landcover', 'grass', 1)}
|
||||||
|
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
|
||||||
|
def test_admin_clean_deleted_bad_age(self, test_age):
|
||||||
|
with pytest.raises(UsageError):
|
||||||
|
admin.clean_deleted_relations(self.project_env, age = test_age)
|
||||||
|
|
||||||
|
|
||||||
|
def test_admin_clean_deleted_partial(self):
|
||||||
|
admin.clean_deleted_relations(self.project_env, age = '2 months')
|
||||||
|
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
|
||||||
|
(145, 'N', 'boundary', 'administrative', 100),
|
||||||
|
(175, 'R', 'landcover', 'grass', 100)}
|
||||||
|
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
|
||||||
|
def test_admin_clean_deleted(self, test_age):
|
||||||
|
admin.clean_deleted_relations(self.project_env, age = test_age)
|
||||||
|
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
|
||||||
|
(145, 'N', 'boundary', 'administrative', 100),
|
||||||
|
(175, 'R', 'landcover', 'grass', 100)}
|
||||||
|
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0
|
||||||
|
|||||||
Reference in New Issue
Block a user