Merge pull request #3971 from jayaddison/issue-3969/indexer-avoid-addressrank-loop

Indexer: allow 'has_pending' to consider address-rank subsets
This commit is contained in:
Sarah Hoffmann
2026-03-02 22:02:06 +01:00
committed by GitHub
3 changed files with 50 additions and 3 deletions

View File

@@ -64,4 +64,4 @@ class UpdateIndex:
if not args.boundaries_only: if not args.boundaries_only:
await indexer.index_by_rank(args.minrank, args.maxrank) await indexer.index_by_rank(args.minrank, args.maxrank)
await indexer.index_postcodes() await indexer.index_postcodes()
has_pending = indexer.has_pending() has_pending = indexer.has_pending(args.minrank, args.maxrank)

View File

@@ -31,14 +31,19 @@ class Indexer:
self.tokenizer = tokenizer self.tokenizer = tokenizer
self.num_threads = num_threads self.num_threads = num_threads
def has_pending(self) -> bool: def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool:
""" Check if any data still needs indexing. """ Check if any data still needs indexing.
This function must only be used after the import has finished. This function must only be used after the import has finished.
Otherwise it will be very expensive. Otherwise it will be very expensive.
""" """
with connect(self.dsn) as conn: with connect(self.dsn) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1") cur.execute(""" SELECT 'a'
FROM placex
WHERE rank_address BETWEEN %s AND %s
AND indexed_status > 0
LIMIT 1""",
(minrank, maxrank))
return cur.rowcount > 0 return cur.rowcount > 0
async def index_full(self, analyse: bool = True) -> None: async def index_full(self, analyse: bool = True) -> None:

View File

@@ -0,0 +1,42 @@
# SPDX-License-Identifier: GPL-2.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for index command of the command-line interface wrapper.
"""
import pytest
import nominatim_db.indexer.indexer
class TestCliIndexWithDb:
@pytest.fixture(autouse=True)
def setup_cli_call(self, cli_call, cli_tokenizer_mock):
self.call_nominatim = cli_call
self.tokenizer_mock = cli_tokenizer_mock
def test_index_empty_subset(self, monkeypatch, async_mock_func_factory, placex_row):
placex_row(rank_address=1, indexed_status=1)
placex_row(rank_address=20, indexed_status=1)
mocks = [
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes'),
]
def _reject_repeat_call(*args, **kwargs):
assert False, "Did not expect multiple Indexer.has_pending invocations"
has_pending_calls = [nominatim_db.indexer.indexer.Indexer.has_pending, _reject_repeat_call]
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
lambda *args, **kwargs: has_pending_calls.pop(0)(*args, **kwargs))
assert self.call_nominatim('index', '--minrank', '5', '--maxrank', '10') == 0
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)