mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
query analyzer for ICU tokenizer
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
"""
|
||||
Datastructures for a tokenized query.
|
||||
"""
|
||||
from typing import List, Tuple, Optional, NamedTuple
|
||||
from typing import List, Tuple, Optional, NamedTuple, Iterator
|
||||
from abc import ABC, abstractmethod
|
||||
import dataclasses
|
||||
import enum
|
||||
@@ -124,6 +124,13 @@ class TokenList:
|
||||
tokens: List[Token]
|
||||
|
||||
|
||||
def add_penalty(self, penalty: float) -> None:
|
||||
""" Add the given penalty to all tokens in the list.
|
||||
"""
|
||||
for token in self.tokens:
|
||||
token.penalty += penalty
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class QueryNode:
|
||||
""" A node of the querry representing a break between terms.
|
||||
@@ -226,6 +233,14 @@ class QueryStruct:
|
||||
for i in range(trange.start, trange.end)]
|
||||
|
||||
|
||||
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
|
||||
""" Iterator over all token lists in the query.
|
||||
"""
|
||||
for i, node in enumerate(self.nodes):
|
||||
for tlist in node.starting:
|
||||
yield i, node, tlist
|
||||
|
||||
|
||||
def find_lookup_word_by_id(self, token: int) -> str:
|
||||
""" Find the first token with the given token ID and return
|
||||
its lookup word. Returns 'None' if no such token exists.
|
||||
|
||||
Reference in New Issue
Block a user