Merge pull request #2666 from lonvia/admin-command-for-forced-indexing

Admin command for forced indexing
This commit is contained in:
Sarah Hoffmann
2022-04-14 21:44:08 +02:00
committed by GitHub
7 changed files with 156 additions and 11 deletions

View File

@@ -34,6 +34,30 @@ to rerun the statistics computation when adding larger amounts of new data,
for example, when adding an additional country via `nominatim add-data`.
## Forcing recomputation of places and areas
Command: `nominatim refresh --data-object [NWR]<id> --data-area [NWR]<id>`
When running replication updates, Nominatim tries to recompute the search
and address information for all places that are affected by a change. But it
needs to restrict the total number of changes to make sure it can keep up
with the minutely updates. Therefore it will refrain from propagating changes
that affect a lot of objects.
The administrator may force an update of places in the database.
`nominatim refresh --data-object` invalidates a single OSM object.
`nominatim refresh --data-area` invalidates an OSM object and all dependent
objects. That are usually the places that inside its area or around the
center of the object. Both commands expect the OSM object as an argument
of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or
`R` (relation).
After invalidating the object, indexing must be run again. If continuous
update are running in the background, the objects will be recomputed together
with the next round of updates. Otherwise you need to run `nominatim index`
to finish the recomputation.
## Removing large deleted objects
Nominatim refuses to delete very large areas because often these deletions are

View File

@@ -70,7 +70,10 @@ class CommandlineParser:
appropriate subcommand.
"""
args = NominatimArgs()
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
try:
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
except SystemExit:
return 1
if args.subcommand is None:
self.parser.print_help()

View File

@@ -7,6 +7,7 @@
"""
Implementation of 'refresh' subcommand.
"""
from argparse import ArgumentTypeError
import logging
from pathlib import Path
@@ -19,6 +20,16 @@ from nominatim.db.connection import connect
LOG = logging.getLogger()
def _parse_osm_object(obj):
""" Parse the given argument into a tuple of OSM type and ID.
Raises an ArgumentError if the format is not recognized.
"""
if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
raise ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
return (obj[0].upper(), int(obj[1:]))
class UpdateRefresh:
"""\
Recompute auxiliary data used by the indexing process.
@@ -53,6 +64,15 @@ class UpdateRefresh:
help='Recompute place importances (expensive!)')
group.add_argument('--website', action='store_true',
help='Refresh the directory that serves the scripts for the web API')
group.add_argument('--data-object', action='append',
type=_parse_osm_object, metavar='OBJECT',
help='Mark the given OSM object as requiring an update'
' (format: [NWR]<id>)')
group.add_argument('--data-area', action='append',
type=_parse_osm_object, metavar='OBJECT',
help='Mark the area around the given OSM object as requiring an update'
' (format: [NWR]<id>)')
group = parser.add_argument_group('Arguments for function refresh')
group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
help='Do not enable code for propagating updates')
@@ -60,7 +80,7 @@ class UpdateRefresh:
help='Enable debug warning statements in functions')
def run(self, args):
def run(self, args): #pylint: disable=too-many-branches
from ..tools import refresh, postcodes
from ..indexer.indexer import Indexer
@@ -124,6 +144,14 @@ class UpdateRefresh:
with connect(args.config.get_libpq_dsn()) as conn:
refresh.setup_website(webdir, args.config, conn)
if args.data_object or args.data_area:
with connect(args.config.get_libpq_dsn()) as conn:
for obj in args.data_object or []:
refresh.invalidate_osm_object(*obj, conn, recursive=False)
for obj in args.data_area or []:
refresh.invalidate_osm_object(*obj, conn, recursive=True)
conn.commit()
return 0

View File

@@ -19,6 +19,7 @@ from nominatim.version import NOMINATIM_VERSION
LOG = logging.getLogger()
OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
def _add_address_level_rows_from_entry(rows, entry):
""" Converts a single entry from the JSON format for address rank
@@ -210,3 +211,28 @@ def setup_website(basedir, config, conn):
(basedir / script).write_text(template.format('reverse-only-search.php'), 'utf-8')
else:
(basedir / script).write_text(template.format(script), 'utf-8')
def invalidate_osm_object(osm_type, osm_id, conn, recursive=True):
""" Mark the given OSM object for reindexing. When 'recursive' is set
to True (the default), then all dependent objects are marked for
reindexing as well.
'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
If the given object does not exist, then nothing happens.
"""
assert osm_type in ('N', 'R', 'W')
LOG.warning("Invalidating OSM %s %s%s.",
OSM_TYPE[osm_type], osm_id,
' and its dependent places' if recursive else '')
with conn.cursor() as cur:
if recursive:
sql = """SELECT place_force_update(place_id)
FROM placex WHERE osm_type = %s and osm_id = %s"""
else:
sql = """UPDATE placex SET indexed_status = 2
WHERE osm_type = %s and osm_id = %s"""
cur.execute(sql, (osm_type, osm_id))

View File

@@ -14,15 +14,14 @@ import nominatim.clicmd.api
@pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status')))
def test_no_api_without_phpcgi(src_dir, endpoint):
with pytest.raises(SystemExit):
nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
phplib_dir=str(src_dir / 'lib-php'),
data_dir=str(src_dir / 'data'),
phpcgi_path=None,
sqllib_dir=str(src_dir / 'lib-sql'),
config_dir=str(src_dir / 'settings'),
cli_args=[endpoint])
assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
phplib_dir=str(src_dir / 'lib-php'),
data_dir=str(src_dir / 'data'),
phpcgi_path=None,
sqllib_dir=str(src_dir / 'lib-sql'),
config_dir=str(src_dir / 'settings'),
cli_args=[endpoint]) == 1
@pytest.mark.parametrize("params", [('search', '--query', 'new'),

View File

@@ -82,3 +82,24 @@ class TestRefresh:
assert self.call_nominatim('refresh', '--importance', '--wiki-data') == 0
assert calls == ['import', 'update']
@pytest.mark.parametrize('params', [('--data-object', 'w234'),
('--data-object', 'N23', '--data-object', 'N24'),
('--data-area', 'R7723'),
('--data-area', 'r7723', '--data-area', 'r2'),
('--data-area', 'R9284425', '--data-object', 'n1234567894567')])
def test_refresh_objects(self, params, mock_func_factory):
func_mock = mock_func_factory(nominatim.tools.refresh, 'invalidate_osm_object')
assert self.call_nominatim('refresh', *params) == 0
assert func_mock.called == len(params)/2
@pytest.mark.parametrize('func', ('--data-object', '--data-area'))
@pytest.mark.parametrize('param', ('234', 'a55', 'R 453', 'Rel'))
def test_refresh_objects_bad_param(self, func, param, mock_func_factory):
func_mock = mock_func_factory(nominatim.tools.refresh, 'invalidate_osm_object')
self.call_nominatim('refresh', func, param) == 1
assert func_mock.called == 0

View File

@@ -39,3 +39,47 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")
refresh.recompute_importance(temp_db_conn)
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type=osm_type, osm_id=57283)
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False)
temp_db_conn.commit()
assert 2 == temp_db_cursor.scalar("""SELECT indexed_status FROM placex
WHERE osm_type = %s and osm_id = %s""",
(osm_type, 57283))
def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type='W', osm_id=57283)
refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False)
temp_db_conn.commit()
assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM placex
WHERE indexed_status > 0""")
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
def test_invalidate_osm_object_recursive(placex_table, osm_type, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type=osm_type, osm_id=57283)
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT)
RETURNS BOOLEAN AS $$
BEGIN
UPDATE placex SET indexed_status = 522
WHERE place_id = placeid;
RETURN TRUE;
END;
$$
LANGUAGE plpgsql;""")
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn)
temp_db_conn.commit()
assert 522 == temp_db_cursor.scalar("""SELECT indexed_status FROM placex
WHERE osm_type = %s and osm_id = %s""",
(osm_type, 57283))