replace use of range when computing word list

This commit is contained in:
Sarah Hoffmann
2025-04-11 09:59:04 +02:00
parent 3980791cfd
commit e0e067b1d6

View File

@@ -354,16 +354,18 @@ class QueryStruct:
words: Dict[str, List[TokenRange]] = defaultdict(list) words: Dict[str, List[TokenRange]] = defaultdict(list)
for first in range(start, endpos - 1): for first, first_node in enumerate(self.nodes[start + 1:endpos], start):
word = self.nodes[first + 1].term_lookup word = first_node.term_lookup
penalty = base_penalty penalty = base_penalty
words[word].append(TokenRange(first, first + 1, penalty=penalty)) words[word].append(TokenRange(first, first + 1, penalty=penalty))
if self.nodes[first + 1].btype != BREAK_PHRASE: if first_node.btype != BREAK_PHRASE:
for last in range(first + 2, min(first + 20, endpos)): penalty += first_node.penalty
word = ' '.join((word, self.nodes[last].term_lookup)) max_last = min(first + 20, endpos)
penalty += self.nodes[last - 1].penalty for last, last_node in enumerate(self.nodes[first + 2:max_last], first + 2):
word = ' '.join((word, last_node.term_lookup))
words[word].append(TokenRange(first, last, penalty=penalty)) words[word].append(TokenRange(first, last, penalty=penalty))
if self.nodes[last].btype == BREAK_PHRASE: if last_node.btype == BREAK_PHRASE:
break break
penalty += last_node.penalty
return words return words