diff --git a/src/nominatim_db/clicmd/index.py b/src/nominatim_db/clicmd/index.py index f1890a65..2786185f 100644 --- a/src/nominatim_db/clicmd/index.py +++ b/src/nominatim_db/clicmd/index.py @@ -64,4 +64,4 @@ class UpdateIndex: if not args.boundaries_only: await indexer.index_by_rank(args.minrank, args.maxrank) await indexer.index_postcodes() - has_pending = indexer.has_pending() + has_pending = indexer.has_pending(args.minrank, args.maxrank) diff --git a/src/nominatim_db/indexer/indexer.py b/src/nominatim_db/indexer/indexer.py index 48195328..4e7c831b 100644 --- a/src/nominatim_db/indexer/indexer.py +++ b/src/nominatim_db/indexer/indexer.py @@ -31,14 +31,19 @@ class Indexer: self.tokenizer = tokenizer self.num_threads = num_threads - def has_pending(self) -> bool: + def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool: """ Check if any data still needs indexing. This function must only be used after the import has finished. Otherwise it will be very expensive. """ with connect(self.dsn) as conn: with conn.cursor() as cur: - cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1") + cur.execute(""" SELECT 'a' + FROM placex + WHERE rank_address BETWEEN %s AND %s + AND indexed_status > 0 + LIMIT 1""", + (minrank, maxrank)) return cur.rowcount > 0 async def index_full(self, analyse: bool = True) -> None: diff --git a/test/python/cli/test_cmd_index.py b/test/python/cli/test_cmd_index.py new file mode 100644 index 00000000..01dc80b7 --- /dev/null +++ b/test/python/cli/test_cmd_index.py @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2025 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for index command of the command-line interface wrapper. +""" +import pytest + +import nominatim_db.indexer.indexer + + +class TestCliIndexWithDb: + + @pytest.fixture(autouse=True) + def setup_cli_call(self, cli_call, cli_tokenizer_mock): + self.call_nominatim = cli_call + self.tokenizer_mock = cli_tokenizer_mock + + def test_index_empty_subset(self, monkeypatch, async_mock_func_factory, placex_row): + placex_row(rank_address=1, indexed_status=1) + placex_row(rank_address=20, indexed_status=1) + + mocks = [ + async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries'), + async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank'), + async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes'), + ] + + def _reject_repeat_call(*args, **kwargs): + assert False, "Did not expect multiple Indexer.has_pending invocations" + + has_pending_calls = [nominatim_db.indexer.indexer.Indexer.has_pending, _reject_repeat_call] + monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending', + lambda *args, **kwargs: has_pending_calls.pop(0)(*args, **kwargs)) + + assert self.call_nominatim('index', '--minrank', '5', '--maxrank', '10') == 0 + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)