add address counts to tokens

This commit is contained in:
Sarah Hoffmann
2024-03-15 10:54:13 +01:00
parent bb5de9b955
commit 07b7fd1dbb
10 changed files with 32 additions and 21 deletions

View File

@@ -97,6 +97,7 @@ class ICUToken(qmod.Token):
""" Create a ICUToken from the row of the word table.
"""
count = 1 if row.info is None else row.info.get('count', 1)
addr_count = 1 if row.info is None else row.info.get('addr_count', 1)
penalty = 0.0
if row.type == 'w':
@@ -123,7 +124,8 @@ class ICUToken(qmod.Token):
return ICUToken(penalty=penalty, token=row.word_id, count=count,
lookup_word=lookup_word, is_indexed=True,
word_token=row.word_token, info=row.info)
word_token=row.word_token, info=row.info,
addr_count=addr_count)
@@ -257,7 +259,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
if len(part.token) <= 4 and part[0].isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
ICUToken(0.5, 0, 1, part.token, True, part.token, None))
ICUToken(0.5, 0, 1, 1, part.token, True, part.token, None))
def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:

View File

@@ -210,6 +210,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
return LegacyToken(penalty=penalty, token=row.word_id,
count=row.search_name_count or 1,
addr_count=1, # not supported
lookup_word=lookup_word,
word_token=row.word_token.strip(),
category=(rowclass, row.type) if rowclass is not None else None,
@@ -226,7 +227,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
if len(part) <= 4 and part.isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
LegacyToken(penalty=0.5, token=0, count=1,
LegacyToken(penalty=0.5, token=0, count=1, addr_count=1,
lookup_word=part, word_token=part,
category=None, country=None,
operator=None, is_indexed=True))

View File

@@ -99,10 +99,10 @@ class Token(ABC):
penalty: float
token: int
count: int
addr_count: int
lookup_word: str
is_indexed: bool
addr_count: int = 1
@abstractmethod
def get_category(self) -> Tuple[str, str]: