move partial token into a separate field in the query struct

There is exactly one token to be expected and the token is usually
present.
This commit is contained in:
Sarah Hoffmann
2025-04-11 08:57:34 +02:00
parent 1db717b886
commit 497e27bb9a
6 changed files with 78 additions and 51 deletions

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Datastructures for a tokenized query.
@@ -192,6 +192,14 @@ class QueryNode:
"""
starting: List[TokenList] = dataclasses.field(default_factory=list)
""" List of all full tokens starting at this node.
"""
partial: Optional[Token] = None
""" Base token going to the next node.
May be None when the query has parts for which no words are known.
Note that the query may still be parsable when there are other
types of tokens spanning over the gap.
"""
def adjust_break(self, btype: BreakType, penalty: float) -> None:
""" Change the break type and penalty for this node.
@@ -269,33 +277,37 @@ class QueryStruct:
be added to, then the token is silently dropped.
"""
snode = self.nodes[trange.start]
full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\
and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END)
if _phrase_compatible_with(snode.ptype, ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype)
if tlist is None:
snode.starting.append(TokenList(trange.end, ttype, [token]))
else:
tlist.append(token)
if ttype == TOKEN_PARTIAL:
assert snode.partial is None
if _phrase_compatible_with(snode.ptype, TOKEN_PARTIAL, False):
snode.partial = token
else:
full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\
and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END)
if _phrase_compatible_with(snode.ptype, ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype)
if tlist is None:
snode.starting.append(TokenList(trange.end, ttype, [token]))
else:
tlist.append(token)
def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
""" Get the list of tokens of a given type, spanning the given
nodes. The nodes must exist. If no tokens exist, an
empty list is returned.
Cannot be used to get the partial token.
"""
assert ttype != TOKEN_PARTIAL
return self.nodes[trange.start].get_tokens(trange.end, ttype) or []
def get_partials_list(self, trange: TokenRange) -> List[Token]:
""" Create a list of partial tokens between the given nodes.
The list is composed of the first token of type PARTIAL
going to the subsequent node. Such PARTIAL tokens are
assumed to exist.
"""
return [next(iter(self.get_tokens(TokenRange(i, i+1), TOKEN_PARTIAL)))
for i in range(trange.start, trange.end)]
return list(filter(None, (self.nodes[i].partial for i in range(trange.start, trange.end))))
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
""" Iterator over all token lists in the query.
""" Iterator over all token lists except partial tokens in the query.
"""
for i, node in enumerate(self.nodes):
for tlist in node.starting:
@@ -308,6 +320,8 @@ class QueryStruct:
debugging.
"""
for node in self.nodes:
if node.partial is not None and node.partial.token == token:
return f"[P]{node.partial.lookup_word}"
for tlist in node.starting:
for t in tlist.tokens:
if t.token == token: