Merge pull request #3971 from jayaddison/issue-3969/indexer-avoid-addressrank-loop

Indexer: allow 'has_pending' to consider address-rank subsets
This commit is contained in:
Sarah Hoffmann
2026-03-02 22:02:06 +01:00
committed by GitHub
3 changed files with 50 additions and 3 deletions

View File

@@ -64,4 +64,4 @@ class UpdateIndex:
if not args.boundaries_only:
await indexer.index_by_rank(args.minrank, args.maxrank)
await indexer.index_postcodes()
has_pending = indexer.has_pending()
has_pending = indexer.has_pending(args.minrank, args.maxrank)

View File

@@ -31,14 +31,19 @@ class Indexer:
self.tokenizer = tokenizer
self.num_threads = num_threads
def has_pending(self) -> bool:
def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool:
""" Check if any data still needs indexing.
This function must only be used after the import has finished.
Otherwise it will be very expensive.
"""
with connect(self.dsn) as conn:
with conn.cursor() as cur:
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
cur.execute(""" SELECT 'a'
FROM placex
WHERE rank_address BETWEEN %s AND %s
AND indexed_status > 0
LIMIT 1""",
(minrank, maxrank))
return cur.rowcount > 0
async def index_full(self, analyse: bool = True) -> None:

View File

@@ -0,0 +1,42 @@
# SPDX-License-Identifier: GPL-2.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for index command of the command-line interface wrapper.
"""
import pytest
import nominatim_db.indexer.indexer
class TestCliIndexWithDb:
@pytest.fixture(autouse=True)
def setup_cli_call(self, cli_call, cli_tokenizer_mock):
self.call_nominatim = cli_call
self.tokenizer_mock = cli_tokenizer_mock
def test_index_empty_subset(self, monkeypatch, async_mock_func_factory, placex_row):
placex_row(rank_address=1, indexed_status=1)
placex_row(rank_address=20, indexed_status=1)
mocks = [
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes'),
]
def _reject_repeat_call(*args, **kwargs):
assert False, "Did not expect multiple Indexer.has_pending invocations"
has_pending_calls = [nominatim_db.indexer.indexer.Indexer.has_pending, _reject_repeat_call]
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
lambda *args, **kwargs: has_pending_calls.pop(0)(*args, **kwargs))
assert self.call_nominatim('index', '--minrank', '5', '--maxrank', '10') == 0
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)