Merge pull request #3719 from lonvia/query-direction

Estimate query direction
2025-04-17 15:17:56 +02:00
parent 1db717b886 7f710d2394
commit 478a8741db
7 changed files with 210 additions and 108 deletions
--- a/src/nominatim_api/search/db_search_builder.py
+++ b/src/nominatim_api/search/db_search_builder.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2024 by the Nominatim developer community.
+# Copyright (C) 2025 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Conversion from token assignment to an abstract DB search.
@@ -146,7 +146,7 @@ class SearchBuilder:
            if address:
                sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
                                                 [t.token for r in address
-                                                  for t in self.query.get_partials_list(r)],
+                                                  for t in self.query.iter_partials(r)],
                                                 lookups.Restrict)]
            yield dbs.PostcodeSearch(penalty, sdata)

@@ -159,7 +159,7 @@ class SearchBuilder:
        expected_count = sum(t.count for t in hnrs)

        partials = {t.token: t.addr_count for trange in address
-                    for t in self.query.get_partials_list(trange)}
+                    for t in self.query.iter_partials(trange)}

        if not partials:
            # can happen when none of the partials is indexed
@@ -203,9 +203,9 @@ class SearchBuilder:
            are and tries to find a lookup that optimizes index use.
        """
        penalty = 0.0  # extra penalty
-        name_partials = {t.token: t for t in self.query.get_partials_list(name)}
+        name_partials = {t.token: t for t in self.query.iter_partials(name)}

-        addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
+        addr_partials = [t for r in address for t in self.query.iter_partials(r)]
        addr_tokens = list({t.token for t in addr_partials})

        exp_count = min(t.count for t in name_partials.values()) / (3**(len(name_partials) - 1))
@@ -282,8 +282,7 @@ class SearchBuilder:
        ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
        ranks.sort(key=lambda r: r.penalty)
        # Fallback, sum of penalty for partials
-        name_partials = self.query.get_partials_list(trange)
-        default = sum(t.penalty for t in name_partials) + 0.2
+        default = sum(t.penalty for t in self.query.iter_partials(trange)) + 0.2
        return dbf.FieldRanking(db_field, default, ranks)

    def get_addr_ranking(self, trange: qmod.TokenRange) -> dbf.FieldRanking:
@@ -296,35 +295,35 @@ class SearchBuilder:

        while todo:
            neglen, pos, rank = heapq.heappop(todo)
+            # partial node
+            partial = self.query.nodes[pos].partial
+            if partial is not None:
+                if pos + 1 < trange.end:
+                    penalty = rank.penalty + partial.penalty \
+                              + PENALTY_WORDCHANGE[self.query.nodes[pos + 1].btype]
+                    heapq.heappush(todo, (neglen - 1, pos + 1,
+                                   dbf.RankedTokens(penalty, rank.tokens)))
+                else:
+                    ranks.append(dbf.RankedTokens(rank.penalty + partial.penalty,
+                                                  rank.tokens))
+            # full words
            for tlist in self.query.nodes[pos].starting:
-                if tlist.ttype in (qmod.TOKEN_PARTIAL, qmod.TOKEN_WORD):
+                if tlist.ttype == qmod.TOKEN_WORD:
                    if tlist.end < trange.end:
                        chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
-                        if tlist.ttype == qmod.TOKEN_PARTIAL:
-                            penalty = rank.penalty + chgpenalty \
-                                      + max(t.penalty for t in tlist.tokens)
+                        for t in tlist.tokens:
                            heapq.heappush(todo, (neglen - 1, tlist.end,
-                                                  dbf.RankedTokens(penalty, rank.tokens)))
-                        else:
-                            for t in tlist.tokens:
-                                heapq.heappush(todo, (neglen - 1, tlist.end,
-                                                      rank.with_token(t, chgpenalty)))
+                                                  rank.with_token(t, chgpenalty)))
                    elif tlist.end == trange.end:
-                        if tlist.ttype == qmod.TOKEN_PARTIAL:
-                            ranks.append(dbf.RankedTokens(rank.penalty
-                                                          + max(t.penalty for t in tlist.tokens),
-                                                          rank.tokens))
-                        else:
-                            ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
-                        if len(ranks) >= 10:
-                            # Too many variants, bail out and only add
-                            # Worst-case Fallback: sum of penalty of partials
-                            name_partials = self.query.get_partials_list(trange)
-                            default = sum(t.penalty for t in name_partials) + 0.2
-                            ranks.append(dbf.RankedTokens(rank.penalty + default, []))
-                            # Bail out of outer loop
-                            todo.clear()
-                            break
+                        ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
+
+            if len(ranks) >= 10:
+                # Too many variants, bail out and only add
+                # Worst-case Fallback: sum of penalty of partials
+                default = sum(t.penalty for t in self.query.iter_partials(trange)) + 0.2
+                ranks.append(dbf.RankedTokens(rank.penalty + default, []))
+                # Bail out of outer loop
+                break

        ranks.sort(key=lambda r: len(r.tokens))
        default = ranks[0].penalty + 0.3
--- a/src/nominatim_api/search/geocoder.py
+++ b/src/nominatim_api/search/geocoder.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2024 by the Nominatim developer community.
+# Copyright (C) 2025 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Public interface to the search code.
@@ -50,6 +50,9 @@ class ForwardGeocoder:
            self.query_analyzer = await make_query_analyzer(self.conn)

        query = await self.query_analyzer.analyze_query(phrases)
+        query.compute_direction_penalty()
+        log().var_dump('Query direction penalty',
+                       lambda: f"[{'LR' if query.dir_penalty < 0 else 'RL'}] {query.dir_penalty}")

        searches: List[AbstractSearch] = []
        if query.num_token_slots() > 0:
--- a/src/nominatim_api/search/icu_tokenizer.py
+++ b/src/nominatim_api/search/icu_tokenizer.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2024 by the Nominatim developer community.
+# Copyright (C) 2025 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Implementation of query analysis for the ICU tokenizer.
@@ -267,32 +267,47 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
    def rerank_tokens(self, query: qmod.QueryStruct) -> None:
        """ Add penalties to tokens that depend on presence of other token.
        """
-        for i, node, tlist in query.iter_token_lists():
-            if tlist.ttype == qmod.TOKEN_POSTCODE:
-                tlen = len(cast(ICUToken, tlist.tokens[0]).word_token)
-                for repl in node.starting:
-                    if repl.end == tlist.end and repl.ttype != qmod.TOKEN_POSTCODE \
-                       and (repl.ttype != qmod.TOKEN_HOUSENUMBER or tlen > 4):
-                        repl.add_penalty(0.39)
-            elif (tlist.ttype == qmod.TOKEN_HOUSENUMBER
-                  and len(tlist.tokens[0].lookup_word) <= 3):
-                if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
-                    for repl in node.starting:
-                        if repl.end == tlist.end and repl.ttype != qmod.TOKEN_HOUSENUMBER:
-                            repl.add_penalty(0.5 - tlist.tokens[0].penalty)
-            elif tlist.ttype not in (qmod.TOKEN_COUNTRY, qmod.TOKEN_PARTIAL):
-                norm = ' '.join(n.term_normalized for n in query.nodes[i + 1:tlist.end + 1]
-                                if n.btype != qmod.BREAK_TOKEN)
-                if not norm:
-                    # Can happen when the token only covers a partial term
-                    norm = query.nodes[i + 1].term_normalized
-                for token in tlist.tokens:
-                    cast(ICUToken, token).rematch(norm)
+        for start, end, tlist in query.iter_tokens_by_edge():
+            if len(tlist) > 1:
+                # If it looks like a Postcode, give preference.
+                if qmod.TOKEN_POSTCODE in tlist:
+                    for ttype, tokens in tlist.items():
+                        if ttype != qmod.TOKEN_POSTCODE and \
+                               (ttype != qmod.TOKEN_HOUSENUMBER or
+                                start + 1 > end or
+                                len(query.nodes[end].term_lookup) > 4):
+                            for token in tokens:
+                                token.penalty += 0.39
+
+                # If it looks like a simple housenumber, prefer that.
+                if qmod.TOKEN_HOUSENUMBER in tlist:
+                    hnr_lookup = tlist[qmod.TOKEN_HOUSENUMBER][0].lookup_word
+                    if len(hnr_lookup) <= 3 and any(c.isdigit() for c in hnr_lookup):
+                        penalty = 0.5 - tlist[qmod.TOKEN_HOUSENUMBER][0].penalty
+                        for ttype, tokens in tlist.items():
+                            if ttype != qmod.TOKEN_HOUSENUMBER:
+                                for token in tokens:
+                                    token.penalty += penalty
+
+            # rerank tokens against the normalized form
+            norm = ' '.join(n.term_normalized for n in query.nodes[start + 1:end + 1]
+                            if n.btype != qmod.BREAK_TOKEN)
+            if not norm:
+                # Can happen when the token only covers a partial term
+                norm = query.nodes[start + 1].term_normalized
+            for ttype, tokens in tlist.items():
+                if ttype != qmod.TOKEN_COUNTRY:
+                    for token in tokens:
+                        cast(ICUToken, token).rematch(norm)


 def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
    yield ['type', 'from', 'to', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
    for i, node in enumerate(query.nodes):
+        if node.partial is not None:
+            t = cast(ICUToken, node.partial)
+            yield [qmod.TOKEN_PARTIAL, str(i), str(i + 1), t.token,
+                   t.word_token, t.lookup_word, t.penalty, t.count, t.info]
        for tlist in node.starting:
            for token in tlist.tokens:
                t = cast(ICUToken, token)
--- a/src/nominatim_api/search/query.py
+++ b/src/nominatim_api/search/query.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2024 by the Nominatim developer community.
+# Copyright (C) 2025 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Datastructures for a tokenized query.
@@ -12,6 +12,17 @@ from abc import ABC, abstractmethod
 from collections import defaultdict
 import dataclasses

+# Precomputed denominator for the computation of the linear regression slope
+# used to determine the query direction.
+# The x value for the regression computation will be the position of the
+# token in the query. Thus we know the x values will be [0, query length).
+# As the denominator only depends on the x values, we can pre-compute here
+# the denominatior to use for a given query length.
+# Note that query length of two or less is special cased and will not use
+# the values from this array. Thus it is not a problem that they are 0.
+LINFAC = [i * (sum(si * si for si in range(i)) - (i - 1) * i * (i - 1) / 4)
+          for i in range(50)]
+

 BreakType = str
 """ Type of break between tokens.
@@ -183,15 +194,32 @@ class QueryNode:
    """ Penalty for the break at this node.
    """
    term_lookup: str
-    """ Transliterated term following this node.
+    """ Transliterated term ending at this node.
    """
    term_normalized: str
-    """ Normalised form of term following this node.
+    """ Normalised form of term ending at this node.
        When the token resulted from a split during transliteration,
        then this string contains the complete source term.
    """

    starting: List[TokenList] = dataclasses.field(default_factory=list)
+    """ List of all full tokens starting at this node.
+    """
+    partial: Optional[Token] = None
+    """ Base token going to the next node.
+        May be None when the query has parts for which no words are known.
+        Note that the query may still be parsable when there are other
+        types of tokens spanning over the gap.
+    """
+
+    def name_address_ratio(self) -> float:
+        """ Return the propability that the partial token belonging to
+            this node forms part of a name (as opposed of part of the address).
+        """
+        if self.partial is None:
+            return 0.5
+
+        return self.partial.count / (self.partial.count + self.partial.addr_count)

    def adjust_break(self, btype: BreakType, penalty: float) -> None:
        """ Change the break type and penalty for this node.
@@ -234,12 +262,20 @@ class QueryStruct:
        need to be direct neighbours. Thus the query is represented as a
        directed acyclic graph.

+        A query also has a direction penalty 'dir_penalty'. This describes
+        the likelyhood if the query should be read from left-to-right or
+        vice versa. A negative 'dir_penalty' should be read as a penalty on
+        right-to-left reading, while a positive value represents a penalty
+        for left-to-right reading. The default value is 0, which is equivalent
+        to having no information about the reading.
+
        When created, a query contains a single node: the start of the
        query. Further nodes can be added by appending to 'nodes'.
    """

    def __init__(self, source: List[Phrase]) -> None:
        self.source = source
+        self.dir_penalty = 0.0
        self.nodes: List[QueryNode] = \
            [QueryNode(BREAK_START, source[0].ptype if source else PHRASE_ANY,
                       0.0, '', '')]
@@ -269,37 +305,63 @@ class QueryStruct:
            be added to, then the token is silently dropped.
        """
        snode = self.nodes[trange.start]
-        full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\
-            and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END)
-        if _phrase_compatible_with(snode.ptype, ttype, full_phrase):
-            tlist = snode.get_tokens(trange.end, ttype)
-            if tlist is None:
-                snode.starting.append(TokenList(trange.end, ttype, [token]))
-            else:
-                tlist.append(token)
+        if ttype == TOKEN_PARTIAL:
+            assert snode.partial is None
+            if _phrase_compatible_with(snode.ptype, TOKEN_PARTIAL, False):
+                snode.partial = token
+        else:
+            full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\
+                and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END)
+            if _phrase_compatible_with(snode.ptype, ttype, full_phrase):
+                tlist = snode.get_tokens(trange.end, ttype)
+                if tlist is None:
+                    snode.starting.append(TokenList(trange.end, ttype, [token]))
+                else:
+                    tlist.append(token)
+
+    def compute_direction_penalty(self) -> None:
+        """ Recompute the direction probability from the partial tokens
+            of each node.
+        """
+        n = len(self.nodes) - 1
+        if n == 1 or n >= 50:
+            self.dir_penalty = 0
+        elif n == 2:
+            self.dir_penalty = (self.nodes[1].name_address_ratio()
+                                - self.nodes[0].name_address_ratio()) / 3
+        else:
+            ratios = [n.name_address_ratio() for n in self.nodes[:-1]]
+            self.dir_penalty = (n * sum(i * r for i, r in enumerate(ratios))
+                                - sum(ratios) * n * (n - 1) / 2) / LINFAC[n]

    def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
        """ Get the list of tokens of a given type, spanning the given
            nodes. The nodes must exist. If no tokens exist, an
            empty list is returned.
+
+            Cannot be used to get the partial token.
        """
+        assert ttype != TOKEN_PARTIAL
        return self.nodes[trange.start].get_tokens(trange.end, ttype) or []

-    def get_partials_list(self, trange: TokenRange) -> List[Token]:
-        """ Create a list of partial tokens between the given nodes.
-            The list is composed of the first token of type PARTIAL
-            going to the subsequent node. Such PARTIAL tokens are
-            assumed to exist.
+    def iter_partials(self, trange: TokenRange) -> Iterator[Token]:
+        """ Iterate over the partial tokens between the given nodes.
+            Missing partials are ignored.
        """
-        return [next(iter(self.get_tokens(TokenRange(i, i+1), TOKEN_PARTIAL)))
-                for i in range(trange.start, trange.end)]
+        return (n.partial for n in self.nodes[trange.start:trange.end] if n.partial is not None)

-    def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
-        """ Iterator over all token lists in the query.
+    def iter_tokens_by_edge(self) -> Iterator[Tuple[int, int, Dict[TokenType, List[Token]]]]:
+        """ Iterator over all tokens except partial ones grouped by edge.
+
+            Returns the start and end node indexes and a dictionary
+            of list of tokens by token type.
        """
        for i, node in enumerate(self.nodes):
+            by_end: Dict[int, Dict[TokenType, List[Token]]] = defaultdict(dict)
            for tlist in node.starting:
-                yield i, node, tlist
+                by_end[tlist.end][tlist.ttype] = tlist.tokens
+            for end, endlist in by_end.items():
+                yield i, end, endlist

    def find_lookup_word_by_id(self, token: int) -> str:
        """ Find the first token with the given token ID and return
@@ -308,6 +370,8 @@ class QueryStruct:
            debugging.
        """
        for node in self.nodes:
+            if node.partial is not None and node.partial.token == token:
+                return f"[P]{node.partial.lookup_word}"
            for tlist in node.starting:
                for t in tlist.tokens:
                    if t.token == token:
@@ -339,16 +403,18 @@ class QueryStruct:

        words: Dict[str, List[TokenRange]] = defaultdict(list)

-        for first in range(start, endpos - 1):
-            word = self.nodes[first + 1].term_lookup
+        for first, first_node in enumerate(self.nodes[start + 1:endpos], start):
+            word = first_node.term_lookup
            penalty = base_penalty
            words[word].append(TokenRange(first, first + 1, penalty=penalty))
-            if self.nodes[first + 1].btype != BREAK_PHRASE:
-                for last in range(first + 2, min(first + 20, endpos)):
-                    word = ' '.join((word, self.nodes[last].term_lookup))
-                    penalty += self.nodes[last - 1].penalty
+            if first_node.btype != BREAK_PHRASE:
+                penalty += first_node.penalty
+                max_last = min(first + 20, endpos)
+                for last, last_node in enumerate(self.nodes[first + 2:max_last], first + 2):
+                    word = ' '.join((word, last_node.term_lookup))
                    words[word].append(TokenRange(first, last, penalty=penalty))
-                    if self.nodes[last].btype == BREAK_PHRASE:
+                    if last_node.btype == BREAK_PHRASE:
                        break
+                    penalty += last_node.penalty

        return words
--- a/src/nominatim_api/search/token_assignment.py
+++ b/src/nominatim_api/search/token_assignment.py
@@ -286,8 +286,12 @@ class _TokenSequence:
            log().var_dump('skip forward', (base.postcode, first))
            return

+        penalty = self.penalty
+        if self.direction == 1 and query.dir_penalty > 0:
+            penalty += query.dir_penalty
+
        log().comment('first word = name')
-        yield dataclasses.replace(base, penalty=self.penalty,
+        yield dataclasses.replace(base, penalty=penalty,
                                  name=first, address=base.address[1:])

        # To paraphrase:
@@ -300,14 +304,15 @@ class _TokenSequence:
           or (query.nodes[first.start].ptype != qmod.PHRASE_ANY):
            return

-        penalty = self.penalty
-
        # Penalty for:
        #  * <name>, <street>, <housenumber> , ...
        #  * queries that are comma-separated
        if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
            penalty += 0.25

+        if self.direction == 0 and query.dir_penalty > 0:
+            penalty += query.dir_penalty
+
        for i in range(first.start + 1, first.end):
            name, addr = first.split(i)
            log().comment(f'split first word = name ({i - first.start})')
@@ -326,9 +331,13 @@ class _TokenSequence:
            log().var_dump('skip backward', (base.postcode, last))
            return

+        penalty = self.penalty
+        if self.direction == -1 and query.dir_penalty < 0:
+            penalty -= query.dir_penalty
+
        if self.direction == -1 or len(base.address) > 1 or base.postcode:
            log().comment('last word = name')
-            yield dataclasses.replace(base, penalty=self.penalty,
+            yield dataclasses.replace(base, penalty=penalty,
                                      name=last, address=base.address[:-1])

        # To paraphrase:
@@ -341,12 +350,14 @@ class _TokenSequence:
           or (query.nodes[last.start].ptype != qmod.PHRASE_ANY):
            return

-        penalty = self.penalty
        if base.housenumber and base.housenumber < last:
            penalty += 0.4
        if len(query.source) > 1:
            penalty += 0.25

+        if self.direction == 0 and query.dir_penalty < 0:
+            penalty -= query.dir_penalty
+
        for i in range(last.start + 1, last.end):
            addr, name = last.split(i)
            log().comment(f'split last word = name ({i - last.start})')
@@ -379,11 +390,11 @@ class _TokenSequence:
            if base.postcode and base.postcode.start == 0:
                self.penalty += 0.1

-            # Right-to-left reading of the address
+            # Left-to-right reading of the address
            if self.direction != -1:
                yield from self._get_assignments_address_forward(base, query)

-            # Left-to-right reading of the address
+            # Right-to-left reading of the address
            if self.direction != 1:
                yield from self._get_assignments_address_backward(base, query)

@@ -409,11 +420,22 @@ def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment
        node = query.nodes[state.end_pos]

        for tlist in node.starting:
-            newstate = state.advance(tlist.ttype, tlist.end, node.btype)
-            if newstate is not None:
-                if newstate.end_pos == query.num_token_slots():
-                    if newstate.recheck_sequence():
-                        log().var_dump('Assignment', newstate)
-                        yield from newstate.get_assignments(query)
-                elif not newstate.is_final():
-                    todo.append(newstate)
+            yield from _append_state_to_todo(
+                query, todo,
+                state.advance(tlist.ttype, tlist.end, node.btype))
+
+        if node.partial is not None:
+            yield from _append_state_to_todo(
+                query, todo,
+                state.advance(qmod.TOKEN_PARTIAL, state.end_pos + 1, node.btype))
+
+
+def _append_state_to_todo(query: qmod.QueryStruct, todo: List[_TokenSequence],
+                          newstate: Optional[_TokenSequence]) -> Iterator[TokenAssignment]:
+    if newstate is not None:
+        if newstate.end_pos == query.num_token_slots():
+            if newstate.recheck_sequence():
+                log().var_dump('Assignment', newstate)
+                yield from newstate.get_assignments(query)
+        elif not newstate.is_final():
+            todo.append(newstate)
--- a/test/python/api/search/test_api_search_query.py
+++ b/test/python/api/search/test_api_search_query.py
@@ -44,7 +44,6 @@ def test_phrase_incompatible(ptype):


 def test_query_node_empty(qnode):
-    assert not qnode.has_tokens(3, query.TOKEN_PARTIAL)
    assert qnode.get_tokens(3, query.TOKEN_WORD) is None


@@ -57,7 +56,6 @@ def test_query_node_with_content(qnode):
    assert qnode.has_tokens(2, query.TOKEN_PARTIAL)
    assert qnode.has_tokens(2, query.TOKEN_WORD)

-    assert qnode.get_tokens(3, query.TOKEN_PARTIAL) is None
    assert qnode.get_tokens(2, query.TOKEN_COUNTRY) is None
    assert len(qnode.get_tokens(2, query.TOKEN_PARTIAL)) == 2
    assert len(qnode.get_tokens(2, query.TOKEN_WORD)) == 1
@@ -84,7 +82,7 @@ def test_query_struct_with_tokens():
    assert q.get_tokens(query.TokenRange(0, 2), query.TOKEN_WORD) == []
    assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_WORD)) == 2

-    partials = q.get_partials_list(query.TokenRange(0, 2))
+    partials = list(q.iter_partials(query.TokenRange(0, 2)))

    assert len(partials) == 2
    assert [t.token for t in partials] == [1, 2]
@@ -101,7 +99,6 @@ def test_query_struct_incompatible_token():
    q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
    q.add_token(query.TokenRange(1, 2), query.TOKEN_COUNTRY, mktoken(100))

-    assert q.get_tokens(query.TokenRange(0, 1), query.TOKEN_PARTIAL) == []
    assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_COUNTRY)) == 1


@@ -113,7 +110,7 @@ def test_query_struct_amenity_single_word():
    q.add_token(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM, mktoken(2))
    q.add_token(query.TokenRange(0, 1), query.TOKEN_QUALIFIER, mktoken(3))

-    assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_PARTIAL)) == 1
+    assert q.nodes[0].partial.token == 1
    assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM)) == 1
    assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_QUALIFIER)) == 0

@@ -128,10 +125,10 @@ def test_query_struct_amenity_two_words():
        q.add_token(query.TokenRange(*trange), query.TOKEN_NEAR_ITEM, mktoken(2))
        q.add_token(query.TokenRange(*trange), query.TOKEN_QUALIFIER, mktoken(3))

-    assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_PARTIAL)) == 1
+    assert q.nodes[0].partial.token == 1
    assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM)) == 0
    assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_QUALIFIER)) == 1

-    assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_PARTIAL)) == 1
+    assert q.nodes[1].partial.token == 1
    assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_NEAR_ITEM)) == 0
    assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_QUALIFIER)) == 1
--- a/test/python/api/search/test_icu_query_analyzer.py
+++ b/test/python/api/search/test_icu_query_analyzer.py
@@ -69,8 +69,8 @@ async def test_single_phrase_with_unknown_terms(conn):
    assert query.source[0].text == 'foo bar'

    assert query.num_token_slots() == 2
-    assert len(query.nodes[0].starting) == 1
-    assert not query.nodes[1].starting
+    assert query.nodes[0].partial.token == 1
+    assert query.nodes[1].partial is None


@pytest.mark.asyncio
@@ -103,8 +103,8 @@ async def test_splitting_in_transliteration(conn):


@pytest.mark.asyncio
-@pytest.mark.parametrize('term,order', [('23456', ['P', 'H', 'W', 'w']),
-                                        ('3', ['H', 'W', 'w'])])
+@pytest.mark.parametrize('term,order', [('23456', ['P', 'H', 'W']),
+                                        ('3', ['H', 'W'])])
 async def test_penalty_postcodes_and_housenumbers(conn, term, order):
    ana = await tok.create_query_analyzer(conn)