mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-07 18:44:07 +00:00
Merge pull request #3971 from jayaddison/issue-3969/indexer-avoid-addressrank-loop
Indexer: allow 'has_pending' to consider address-rank subsets
This commit is contained in:
@@ -64,4 +64,4 @@ class UpdateIndex:
|
|||||||
if not args.boundaries_only:
|
if not args.boundaries_only:
|
||||||
await indexer.index_by_rank(args.minrank, args.maxrank)
|
await indexer.index_by_rank(args.minrank, args.maxrank)
|
||||||
await indexer.index_postcodes()
|
await indexer.index_postcodes()
|
||||||
has_pending = indexer.has_pending()
|
has_pending = indexer.has_pending(args.minrank, args.maxrank)
|
||||||
|
|||||||
@@ -31,14 +31,19 @@ class Indexer:
|
|||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
self.num_threads = num_threads
|
self.num_threads = num_threads
|
||||||
|
|
||||||
def has_pending(self) -> bool:
|
def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool:
|
||||||
""" Check if any data still needs indexing.
|
""" Check if any data still needs indexing.
|
||||||
This function must only be used after the import has finished.
|
This function must only be used after the import has finished.
|
||||||
Otherwise it will be very expensive.
|
Otherwise it will be very expensive.
|
||||||
"""
|
"""
|
||||||
with connect(self.dsn) as conn:
|
with connect(self.dsn) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
|
cur.execute(""" SELECT 'a'
|
||||||
|
FROM placex
|
||||||
|
WHERE rank_address BETWEEN %s AND %s
|
||||||
|
AND indexed_status > 0
|
||||||
|
LIMIT 1""",
|
||||||
|
(minrank, maxrank))
|
||||||
return cur.rowcount > 0
|
return cur.rowcount > 0
|
||||||
|
|
||||||
async def index_full(self, analyse: bool = True) -> None:
|
async def index_full(self, analyse: bool = True) -> None:
|
||||||
|
|||||||
42
test/python/cli/test_cmd_index.py
Normal file
42
test/python/cli/test_cmd_index.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
#
|
||||||
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
|
#
|
||||||
|
# Copyright (C) 2025 by the Nominatim developer community.
|
||||||
|
# For a full list of authors see the git log.
|
||||||
|
"""
|
||||||
|
Tests for index command of the command-line interface wrapper.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import nominatim_db.indexer.indexer
|
||||||
|
|
||||||
|
|
||||||
|
class TestCliIndexWithDb:
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def setup_cli_call(self, cli_call, cli_tokenizer_mock):
|
||||||
|
self.call_nominatim = cli_call
|
||||||
|
self.tokenizer_mock = cli_tokenizer_mock
|
||||||
|
|
||||||
|
def test_index_empty_subset(self, monkeypatch, async_mock_func_factory, placex_row):
|
||||||
|
placex_row(rank_address=1, indexed_status=1)
|
||||||
|
placex_row(rank_address=20, indexed_status=1)
|
||||||
|
|
||||||
|
mocks = [
|
||||||
|
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries'),
|
||||||
|
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank'),
|
||||||
|
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _reject_repeat_call(*args, **kwargs):
|
||||||
|
assert False, "Did not expect multiple Indexer.has_pending invocations"
|
||||||
|
|
||||||
|
has_pending_calls = [nominatim_db.indexer.indexer.Indexer.has_pending, _reject_repeat_call]
|
||||||
|
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
|
||||||
|
lambda *args, **kwargs: has_pending_calls.pop(0)(*args, **kwargs))
|
||||||
|
|
||||||
|
assert self.call_nominatim('index', '--minrank', '5', '--maxrank', '10') == 0
|
||||||
|
|
||||||
|
for mock in mocks:
|
||||||
|
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||||
Reference in New Issue
Block a user