Indexer: allow has_pending to consider address-rank subsets

This commit is contained in:
James Addison
2026-02-11 14:21:17 +00:00
parent d1b0bcaea7
commit 8c3c1f0a15
3 changed files with 50 additions and 3 deletions

View File

@@ -64,4 +64,4 @@ class UpdateIndex:
if not args.boundaries_only:
await indexer.index_by_rank(args.minrank, args.maxrank)
await indexer.index_postcodes()
has_pending = indexer.has_pending()
has_pending = indexer.has_pending(args.minrank, args.maxrank)

View File

@@ -31,14 +31,19 @@ class Indexer:
self.tokenizer = tokenizer
self.num_threads = num_threads
def has_pending(self) -> bool:
def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool:
""" Check if any data still needs indexing.
This function must only be used after the import has finished.
Otherwise it will be very expensive.
"""
with connect(self.dsn) as conn:
with conn.cursor() as cur:
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
cur.execute(""" SELECT 'a'
FROM placex
WHERE rank_address BETWEEN %s AND %s
AND indexed_status > 0
LIMIT 1""",
(minrank, maxrank))
return cur.rowcount > 0
async def index_full(self, analyse: bool = True) -> None: