change indexing order for interpolations

Interpolations are now indexed after rank 30 objects. The housenumber
nodes no longer need information from the interpolations while the
interpolations can make use of precomputed postcodes.
This commit is contained in:
Sarah Hoffmann
2022-05-31 14:16:06 +02:00
parent 4b0d9f71e8
commit cbb4749996
2 changed files with 6 additions and 18 deletions

View File

@@ -160,15 +160,12 @@ class Indexer:
minrank, maxrank, self.num_threads)
with self.tokenizer.name_analyzer() as analyzer:
for rank in range(max(1, minrank), maxrank):
self._index(runners.RankRunner(rank, analyzer))
for rank in range(max(1, minrank), maxrank + 1):
self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1)
if maxrank == 30:
self._index(runners.RankRunner(0, analyzer))
self._index(runners.InterpolationRunner(analyzer), 20)
self._index(runners.RankRunner(30, analyzer), 20)
else:
self._index(runners.RankRunner(maxrank, analyzer))
def index_postcodes(self):

View File

@@ -177,25 +177,16 @@ def test_index_all_by_rank(test_db, threads, test_tokenizer):
SELECT count(*) FROM placex p WHERE rank_address > 0
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
WHERE p.rank_address < o.rank_address)""") == 0
# placex rank < 30 objects come before interpolations
# placex address ranked objects come before interpolations
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address < 30
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
# placex rank = 30 objects come after interpolations
# rank 0 comes after all other placex objects
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address = 30
AND indexed_date <
(SELECT max(indexed_date) FROM location_property_osmline)""") == 0
# rank 0 comes after rank 29 and before rank 30
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address < 30
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address = 30
AND indexed_date <
(SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
@pytest.mark.parametrize("threads", [1, 15])