Merge pull request #3971 from jayaddison/issue-3969/indexer-avoid-addressrank-loop

Indexer: allow 'has_pending' to consider address-rank subsets
This commit is contained in:
Sarah Hoffmann
2026-03-02 22:02:06 +01:00
committed by GitHub
3 changed files with 50 additions and 3 deletions

View File

@@ -64,4 +64,4 @@ class UpdateIndex:
if not args.boundaries_only:
await indexer.index_by_rank(args.minrank, args.maxrank)
await indexer.index_postcodes()
has_pending = indexer.has_pending()
has_pending = indexer.has_pending(args.minrank, args.maxrank)

View File

@@ -31,14 +31,19 @@ class Indexer:
self.tokenizer = tokenizer
self.num_threads = num_threads
def has_pending(self) -> bool:
def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool:
""" Check if any data still needs indexing.
This function must only be used after the import has finished.
Otherwise it will be very expensive.
"""
with connect(self.dsn) as conn:
with conn.cursor() as cur:
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
cur.execute(""" SELECT 'a'
FROM placex
WHERE rank_address BETWEEN %s AND %s
AND indexed_status > 0
LIMIT 1""",
(minrank, maxrank))
return cur.rowcount > 0
async def index_full(self, analyse: bool = True) -> None: