fix order of address rankings prefering longest words

This commit is contained in:
Sarah Hoffmann
2025-07-09 23:22:20 +02:00
parent e4b671f8b1
commit 71025f3f43

View File

@@ -301,14 +301,14 @@ class SearchBuilder:
ranks: List[dbf.RankedTokens] = [] ranks: List[dbf.RankedTokens] = []
while todo: while todo:
neglen, pos, rank = heapq.heappop(todo) _, pos, rank = heapq.heappop(todo)
# partial node # partial node
partial = self.query.nodes[pos].partial partial = self.query.nodes[pos].partial
if partial is not None: if partial is not None:
if pos + 1 < trange.end: if pos + 1 < trange.end:
penalty = rank.penalty + partial.penalty \ penalty = rank.penalty + partial.penalty \
+ self.query.nodes[pos + 1].word_break_penalty + self.query.nodes[pos + 1].word_break_penalty
heapq.heappush(todo, (neglen - 1, pos + 1, heapq.heappush(todo, (-(pos + 1), pos + 1,
dbf.RankedTokens(penalty, rank.tokens))) dbf.RankedTokens(penalty, rank.tokens)))
else: else:
ranks.append(dbf.RankedTokens(rank.penalty + partial.penalty, ranks.append(dbf.RankedTokens(rank.penalty + partial.penalty,
@@ -321,7 +321,7 @@ class SearchBuilder:
+ self.query.get_in_word_penalty( + self.query.get_in_word_penalty(
qmod.TokenRange(pos, tlist.end)) qmod.TokenRange(pos, tlist.end))
for t in tlist.tokens: for t in tlist.tokens:
heapq.heappush(todo, (neglen - 1, tlist.end, heapq.heappush(todo, (-tlist.end, tlist.end,
rank.with_token(t, chgpenalty))) rank.with_token(t, chgpenalty)))
elif tlist.end == trange.end: elif tlist.end == trange.end:
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens) ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)