mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Merge pull request #2666 from lonvia/admin-command-for-forced-indexing
Admin command for forced indexing
This commit is contained in:
@@ -34,6 +34,30 @@ to rerun the statistics computation when adding larger amounts of new data,
|
||||
for example, when adding an additional country via `nominatim add-data`.
|
||||
|
||||
|
||||
## Forcing recomputation of places and areas
|
||||
|
||||
Command: `nominatim refresh --data-object [NWR]<id> --data-area [NWR]<id>`
|
||||
|
||||
When running replication updates, Nominatim tries to recompute the search
|
||||
and address information for all places that are affected by a change. But it
|
||||
needs to restrict the total number of changes to make sure it can keep up
|
||||
with the minutely updates. Therefore it will refrain from propagating changes
|
||||
that affect a lot of objects.
|
||||
|
||||
The administrator may force an update of places in the database.
|
||||
`nominatim refresh --data-object` invalidates a single OSM object.
|
||||
`nominatim refresh --data-area` invalidates an OSM object and all dependent
|
||||
objects. That are usually the places that inside its area or around the
|
||||
center of the object. Both commands expect the OSM object as an argument
|
||||
of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or
|
||||
`R` (relation).
|
||||
|
||||
After invalidating the object, indexing must be run again. If continuous
|
||||
update are running in the background, the objects will be recomputed together
|
||||
with the next round of updates. Otherwise you need to run `nominatim index`
|
||||
to finish the recomputation.
|
||||
|
||||
|
||||
## Removing large deleted objects
|
||||
|
||||
Nominatim refuses to delete very large areas because often these deletions are
|
||||
|
||||
@@ -70,7 +70,10 @@ class CommandlineParser:
|
||||
appropriate subcommand.
|
||||
"""
|
||||
args = NominatimArgs()
|
||||
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
|
||||
try:
|
||||
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
|
||||
except SystemExit:
|
||||
return 1
|
||||
|
||||
if args.subcommand is None:
|
||||
self.parser.print_help()
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"""
|
||||
Implementation of 'refresh' subcommand.
|
||||
"""
|
||||
from argparse import ArgumentTypeError
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
@@ -19,6 +20,16 @@ from nominatim.db.connection import connect
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _parse_osm_object(obj):
|
||||
""" Parse the given argument into a tuple of OSM type and ID.
|
||||
Raises an ArgumentError if the format is not recognized.
|
||||
"""
|
||||
if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
|
||||
raise ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
|
||||
|
||||
return (obj[0].upper(), int(obj[1:]))
|
||||
|
||||
|
||||
class UpdateRefresh:
|
||||
"""\
|
||||
Recompute auxiliary data used by the indexing process.
|
||||
@@ -53,6 +64,15 @@ class UpdateRefresh:
|
||||
help='Recompute place importances (expensive!)')
|
||||
group.add_argument('--website', action='store_true',
|
||||
help='Refresh the directory that serves the scripts for the web API')
|
||||
group.add_argument('--data-object', action='append',
|
||||
type=_parse_osm_object, metavar='OBJECT',
|
||||
help='Mark the given OSM object as requiring an update'
|
||||
' (format: [NWR]<id>)')
|
||||
group.add_argument('--data-area', action='append',
|
||||
type=_parse_osm_object, metavar='OBJECT',
|
||||
help='Mark the area around the given OSM object as requiring an update'
|
||||
' (format: [NWR]<id>)')
|
||||
|
||||
group = parser.add_argument_group('Arguments for function refresh')
|
||||
group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
|
||||
help='Do not enable code for propagating updates')
|
||||
@@ -60,7 +80,7 @@ class UpdateRefresh:
|
||||
help='Enable debug warning statements in functions')
|
||||
|
||||
|
||||
def run(self, args):
|
||||
def run(self, args): #pylint: disable=too-many-branches
|
||||
from ..tools import refresh, postcodes
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
@@ -124,6 +144,14 @@ class UpdateRefresh:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.setup_website(webdir, args.config, conn)
|
||||
|
||||
if args.data_object or args.data_area:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
for obj in args.data_object or []:
|
||||
refresh.invalidate_osm_object(*obj, conn, recursive=False)
|
||||
for obj in args.data_area or []:
|
||||
refresh.invalidate_osm_object(*obj, conn, recursive=True)
|
||||
conn.commit()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from nominatim.version import NOMINATIM_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
|
||||
|
||||
def _add_address_level_rows_from_entry(rows, entry):
|
||||
""" Converts a single entry from the JSON format for address rank
|
||||
@@ -210,3 +211,28 @@ def setup_website(basedir, config, conn):
|
||||
(basedir / script).write_text(template.format('reverse-only-search.php'), 'utf-8')
|
||||
else:
|
||||
(basedir / script).write_text(template.format(script), 'utf-8')
|
||||
|
||||
|
||||
def invalidate_osm_object(osm_type, osm_id, conn, recursive=True):
|
||||
""" Mark the given OSM object for reindexing. When 'recursive' is set
|
||||
to True (the default), then all dependent objects are marked for
|
||||
reindexing as well.
|
||||
|
||||
'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
|
||||
If the given object does not exist, then nothing happens.
|
||||
"""
|
||||
assert osm_type in ('N', 'R', 'W')
|
||||
|
||||
LOG.warning("Invalidating OSM %s %s%s.",
|
||||
OSM_TYPE[osm_type], osm_id,
|
||||
' and its dependent places' if recursive else '')
|
||||
|
||||
with conn.cursor() as cur:
|
||||
if recursive:
|
||||
sql = """SELECT place_force_update(place_id)
|
||||
FROM placex WHERE osm_type = %s and osm_id = %s"""
|
||||
else:
|
||||
sql = """UPDATE placex SET indexed_status = 2
|
||||
WHERE osm_type = %s and osm_id = %s"""
|
||||
|
||||
cur.execute(sql, (osm_type, osm_id))
|
||||
|
||||
@@ -14,15 +14,14 @@ import nominatim.clicmd.api
|
||||
|
||||
@pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status')))
|
||||
def test_no_api_without_phpcgi(src_dir, endpoint):
|
||||
with pytest.raises(SystemExit):
|
||||
nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
|
||||
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
|
||||
phplib_dir=str(src_dir / 'lib-php'),
|
||||
data_dir=str(src_dir / 'data'),
|
||||
phpcgi_path=None,
|
||||
sqllib_dir=str(src_dir / 'lib-sql'),
|
||||
config_dir=str(src_dir / 'settings'),
|
||||
cli_args=[endpoint])
|
||||
assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
|
||||
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
|
||||
phplib_dir=str(src_dir / 'lib-php'),
|
||||
data_dir=str(src_dir / 'data'),
|
||||
phpcgi_path=None,
|
||||
sqllib_dir=str(src_dir / 'lib-sql'),
|
||||
config_dir=str(src_dir / 'settings'),
|
||||
cli_args=[endpoint]) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params", [('search', '--query', 'new'),
|
||||
|
||||
@@ -82,3 +82,24 @@ class TestRefresh:
|
||||
assert self.call_nominatim('refresh', '--importance', '--wiki-data') == 0
|
||||
|
||||
assert calls == ['import', 'update']
|
||||
|
||||
@pytest.mark.parametrize('params', [('--data-object', 'w234'),
|
||||
('--data-object', 'N23', '--data-object', 'N24'),
|
||||
('--data-area', 'R7723'),
|
||||
('--data-area', 'r7723', '--data-area', 'r2'),
|
||||
('--data-area', 'R9284425', '--data-object', 'n1234567894567')])
|
||||
def test_refresh_objects(self, params, mock_func_factory):
|
||||
func_mock = mock_func_factory(nominatim.tools.refresh, 'invalidate_osm_object')
|
||||
|
||||
assert self.call_nominatim('refresh', *params) == 0
|
||||
|
||||
assert func_mock.called == len(params)/2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('func', ('--data-object', '--data-area'))
|
||||
@pytest.mark.parametrize('param', ('234', 'a55', 'R 453', 'Rel'))
|
||||
def test_refresh_objects_bad_param(self, func, param, mock_func_factory):
|
||||
func_mock = mock_func_factory(nominatim.tools.refresh, 'invalidate_osm_object')
|
||||
|
||||
self.call_nominatim('refresh', func, param) == 1
|
||||
assert func_mock.called == 0
|
||||
|
||||
@@ -39,3 +39,47 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db
|
||||
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")
|
||||
|
||||
refresh.recompute_importance(temp_db_conn)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
|
||||
def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp_db_cursor):
|
||||
placex_table.add(osm_type=osm_type, osm_id=57283)
|
||||
|
||||
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False)
|
||||
temp_db_conn.commit()
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("""SELECT indexed_status FROM placex
|
||||
WHERE osm_type = %s and osm_id = %s""",
|
||||
(osm_type, 57283))
|
||||
|
||||
|
||||
def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, temp_db_cursor):
|
||||
placex_table.add(osm_type='W', osm_id=57283)
|
||||
|
||||
refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False)
|
||||
temp_db_conn.commit()
|
||||
|
||||
assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM placex
|
||||
WHERE indexed_status > 0""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
|
||||
def test_invalidate_osm_object_recursive(placex_table, osm_type, temp_db_conn, temp_db_cursor):
|
||||
placex_table.add(osm_type=osm_type, osm_id=57283)
|
||||
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT)
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE placex SET indexed_status = 522
|
||||
WHERE place_id = placeid;
|
||||
RETURN TRUE;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;""")
|
||||
|
||||
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn)
|
||||
temp_db_conn.commit()
|
||||
|
||||
assert 522 == temp_db_cursor.scalar("""SELECT indexed_status FROM placex
|
||||
WHERE osm_type = %s and osm_id = %s""",
|
||||
(osm_type, 57283))
|
||||
|
||||
Reference in New Issue
Block a user