replace TokenType enum with simple char constants

This commit is contained in:
Sarah Hoffmann
2025-02-21 10:23:41 +01:00
parent 4577669213
commit 31412e0674
8 changed files with 281 additions and 281 deletions

View File

@@ -97,7 +97,7 @@ class SearchBuilder:
builder = self.build_poi_search(sdata) builder = self.build_poi_search(sdata)
elif assignment.housenumber: elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber, hnr_tokens = self.query.get_tokens(assignment.housenumber,
qmod.TokenType.HOUSENUMBER) qmod.TOKEN_HOUSENUMBER)
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address) builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else: else:
builder = self.build_special_search(sdata, assignment.address, builder = self.build_special_search(sdata, assignment.address,
@@ -173,7 +173,7 @@ class SearchBuilder:
list(partials), lookups.LookupAll)) list(partials), lookups.LookupAll))
else: else:
addr_fulls = [t.token for t addr_fulls = [t.token for t
in self.query.get_tokens(address[0], qmod.TokenType.WORD)] in self.query.get_tokens(address[0], qmod.TOKEN_WORD)]
if len(addr_fulls) > 5: if len(addr_fulls) > 5:
return return
sdata.lookups.append( sdata.lookups.append(
@@ -216,7 +216,7 @@ class SearchBuilder:
addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000 addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000
# Partial term to frequent. Try looking up by rare full names first. # Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, qmod.TokenType.WORD) name_fulls = self.query.get_tokens(name, qmod.TOKEN_WORD)
if name_fulls: if name_fulls:
fulls_count = sum(t.count for t in name_fulls) fulls_count = sum(t.count for t in name_fulls)
@@ -285,7 +285,7 @@ class SearchBuilder:
db_field: str = 'name_vector') -> dbf.FieldRanking: db_field: str = 'name_vector') -> dbf.FieldRanking:
""" Create a ranking expression for a name term in the given range. """ Create a ranking expression for a name term in the given range.
""" """
name_fulls = self.query.get_tokens(trange, qmod.TokenType.WORD) name_fulls = self.query.get_tokens(trange, qmod.TOKEN_WORD)
ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls] ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
ranks.sort(key=lambda r: r.penalty) ranks.sort(key=lambda r: r.penalty)
# Fallback, sum of penalty for partials # Fallback, sum of penalty for partials
@@ -304,10 +304,10 @@ class SearchBuilder:
while todo: while todo:
neglen, pos, rank = heapq.heappop(todo) neglen, pos, rank = heapq.heappop(todo)
for tlist in self.query.nodes[pos].starting: for tlist in self.query.nodes[pos].starting:
if tlist.ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.WORD): if tlist.ttype in (qmod.TOKEN_PARTIAL, qmod.TOKEN_WORD):
if tlist.end < trange.end: if tlist.end < trange.end:
chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype] chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
if tlist.ttype == qmod.TokenType.PARTIAL: if tlist.ttype == qmod.TOKEN_PARTIAL:
penalty = rank.penalty + chgpenalty \ penalty = rank.penalty + chgpenalty \
+ max(t.penalty for t in tlist.tokens) + max(t.penalty for t in tlist.tokens)
heapq.heappush(todo, (neglen - 1, tlist.end, heapq.heappush(todo, (neglen - 1, tlist.end,
@@ -317,7 +317,7 @@ class SearchBuilder:
heapq.heappush(todo, (neglen - 1, tlist.end, heapq.heappush(todo, (neglen - 1, tlist.end,
rank.with_token(t, chgpenalty))) rank.with_token(t, chgpenalty)))
elif tlist.end == trange.end: elif tlist.end == trange.end:
if tlist.ttype == qmod.TokenType.PARTIAL: if tlist.ttype == qmod.TOKEN_PARTIAL:
ranks.append(dbf.RankedTokens(rank.penalty ranks.append(dbf.RankedTokens(rank.penalty
+ max(t.penalty for t in tlist.tokens), + max(t.penalty for t in tlist.tokens),
rank.tokens)) rank.tokens))
@@ -357,11 +357,11 @@ class SearchBuilder:
if assignment.housenumber: if assignment.housenumber:
sdata.set_strings('housenumbers', sdata.set_strings('housenumbers',
self.query.get_tokens(assignment.housenumber, self.query.get_tokens(assignment.housenumber,
qmod.TokenType.HOUSENUMBER)) qmod.TOKEN_HOUSENUMBER))
if assignment.postcode: if assignment.postcode:
sdata.set_strings('postcodes', sdata.set_strings('postcodes',
self.query.get_tokens(assignment.postcode, self.query.get_tokens(assignment.postcode,
qmod.TokenType.POSTCODE)) qmod.TOKEN_POSTCODE))
if assignment.qualifier: if assignment.qualifier:
tokens = self.get_qualifier_tokens(assignment.qualifier) tokens = self.get_qualifier_tokens(assignment.qualifier)
if not tokens: if not tokens:
@@ -391,7 +391,7 @@ class SearchBuilder:
optionally filtered by the country list from the details optionally filtered by the country list from the details
parameters. parameters.
""" """
tokens = self.query.get_tokens(trange, qmod.TokenType.COUNTRY) tokens = self.query.get_tokens(trange, qmod.TOKEN_COUNTRY)
if self.details.countries: if self.details.countries:
tokens = [t for t in tokens if t.lookup_word in self.details.countries] tokens = [t for t in tokens if t.lookup_word in self.details.countries]
@@ -402,7 +402,7 @@ class SearchBuilder:
optionally filtered by the qualifier list from the details optionally filtered by the qualifier list from the details
parameters. parameters.
""" """
tokens = self.query.get_tokens(trange, qmod.TokenType.QUALIFIER) tokens = self.query.get_tokens(trange, qmod.TOKEN_QUALIFIER)
if self.details.categories: if self.details.categories:
tokens = [t for t in tokens if t.get_category() in self.details.categories] tokens = [t for t in tokens if t.get_category() in self.details.categories]
@@ -415,7 +415,7 @@ class SearchBuilder:
""" """
if assignment.near_item: if assignment.near_item:
tokens: Dict[Tuple[str, str], float] = {} tokens: Dict[Tuple[str, str], float] = {}
for t in self.query.get_tokens(assignment.near_item, qmod.TokenType.NEAR_ITEM): for t in self.query.get_tokens(assignment.near_item, qmod.TOKEN_NEAR_ITEM):
cat = t.get_category() cat = t.get_category()
# The category of a near search will be that of near_item. # The category of a near search will be that of near_item.
# Thus, if search is restricted to a category parameter, # Thus, if search is restricted to a category parameter,

View File

@@ -29,11 +29,11 @@ from .query_analyzer_factory import AbstractQueryAnalyzer
DB_TO_TOKEN_TYPE = { DB_TO_TOKEN_TYPE = {
'W': qmod.TokenType.WORD, 'W': qmod.TOKEN_WORD,
'w': qmod.TokenType.PARTIAL, 'w': qmod.TOKEN_PARTIAL,
'H': qmod.TokenType.HOUSENUMBER, 'H': qmod.TOKEN_HOUSENUMBER,
'P': qmod.TokenType.POSTCODE, 'P': qmod.TOKEN_POSTCODE,
'C': qmod.TokenType.COUNTRY 'C': qmod.TOKEN_COUNTRY
} }
PENALTY_IN_TOKEN_BREAK = { PENALTY_IN_TOKEN_BREAK = {
@@ -225,12 +225,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
if row.type == 'S': if row.type == 'S':
if row.info['op'] in ('in', 'near'): if row.info['op'] in ('in', 'near'):
if trange.start == 0: if trange.start == 0:
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token) query.add_token(trange, qmod.TOKEN_NEAR_ITEM, token)
else: else:
if trange.start == 0 and trange.end == query.num_token_slots(): if trange.start == 0 and trange.end == query.num_token_slots():
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token) query.add_token(trange, qmod.TOKEN_NEAR_ITEM, token)
else: else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token) query.add_token(trange, qmod.TOKEN_QUALIFIER, token)
else: else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token) query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
@@ -297,8 +297,8 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
""" """
for part, node, i in zip(parts, query.nodes, range(1000)): for part, node, i in zip(parts, query.nodes, range(1000)):
if len(part.token) <= 4 and part.token.isdigit()\ if len(part.token) <= 4 and part.token.isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER): and not node.has_tokens(i+1, qmod.TOKEN_HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER, query.add_token(qmod.TokenRange(i, i+1), qmod.TOKEN_HOUSENUMBER,
ICUToken(penalty=0.5, token=0, ICUToken(penalty=0.5, token=0,
count=1, addr_count=1, lookup_word=part.token, count=1, addr_count=1, lookup_word=part.token,
word_token=part.token, info=None)) word_token=part.token, info=None))
@@ -307,19 +307,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
""" Add penalties to tokens that depend on presence of other token. """ Add penalties to tokens that depend on presence of other token.
""" """
for i, node, tlist in query.iter_token_lists(): for i, node, tlist in query.iter_token_lists():
if tlist.ttype == qmod.TokenType.POSTCODE: if tlist.ttype == qmod.TOKEN_POSTCODE:
for repl in node.starting: for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \ if repl.end == tlist.end and repl.ttype != qmod.TOKEN_POSTCODE \
and (repl.ttype != qmod.TokenType.HOUSENUMBER and (repl.ttype != qmod.TOKEN_HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4): or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39) repl.add_penalty(0.39)
elif (tlist.ttype == qmod.TokenType.HOUSENUMBER elif (tlist.ttype == qmod.TOKEN_HOUSENUMBER
and len(tlist.tokens[0].lookup_word) <= 3): and len(tlist.tokens[0].lookup_word) <= 3):
if any(c.isdigit() for c in tlist.tokens[0].lookup_word): if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting: for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER: if repl.end == tlist.end and repl.ttype != qmod.TOKEN_HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty) repl.add_penalty(0.5 - tlist.tokens[0].penalty)
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL): elif tlist.ttype not in (qmod.TOKEN_COUNTRY, qmod.TOKEN_PARTIAL):
norm = parts[i].normalized norm = parts[i].normalized
for j in range(i + 1, tlist.end): for j in range(i + 1, tlist.end):
if node.btype != qmod.BREAK_TOKEN: if node.btype != qmod.BREAK_TOKEN:
@@ -341,7 +341,7 @@ def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
for tlist in node.starting: for tlist in node.starting:
for token in tlist.tokens: for token in tlist.tokens:
t = cast(ICUToken, token) t = cast(ICUToken, token)
yield [tlist.ttype.name, t.token, t.word_token or '', yield [tlist.ttype, t.token, t.word_token or '',
t.lookup_word or '', t.penalty, t.count, t.info] t.lookup_word or '', t.penalty, t.count, t.info]

View File

@@ -38,23 +38,23 @@ BREAK_TOKEN = '`'
""" """
class TokenType(enum.Enum): TokenType = str
""" Type of token. """ Type of token.
""" """
WORD = enum.auto() TOKEN_WORD = 'W'
""" Full name of a place. """ """ Full name of a place. """
PARTIAL = enum.auto() TOKEN_PARTIAL = 'w'
""" Word term without breaks, does not necessarily represent a full name. """ """ Word term without breaks, does not necessarily represent a full name. """
HOUSENUMBER = enum.auto() TOKEN_HOUSENUMBER = 'H'
""" Housenumber term. """ """ Housenumber term. """
POSTCODE = enum.auto() TOKEN_POSTCODE = 'P'
""" Postal code term. """ """ Postal code term. """
COUNTRY = enum.auto() TOKEN_COUNTRY = 'C'
""" Country name or reference. """ """ Country name or reference. """
QUALIFIER = enum.auto() TOKEN_QUALIFIER = 'Q'
""" Special term used together with name (e.g. _Hotel_ Bellevue). """ """ Special term used together with name (e.g. _Hotel_ Bellevue). """
NEAR_ITEM = enum.auto() TOKEN_NEAR_ITEM = 'N'
""" Special term used as searchable object(e.g. supermarket in ...). """ """ Special term used as searchable object(e.g. supermarket in ...). """
class PhraseType(enum.Enum): class PhraseType(enum.Enum):
@@ -82,19 +82,19 @@ class PhraseType(enum.Enum):
""" Check if the given token type can be used with the phrase type. """ Check if the given token type can be used with the phrase type.
""" """
if self == PhraseType.NONE: if self == PhraseType.NONE:
return not is_full_phrase or ttype != TokenType.QUALIFIER return not is_full_phrase or ttype != TOKEN_QUALIFIER
if self == PhraseType.AMENITY: if self == PhraseType.AMENITY:
return ttype in (TokenType.WORD, TokenType.PARTIAL)\ return ttype in (TOKEN_WORD, TOKEN_PARTIAL)\
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\ or (is_full_phrase and ttype == TOKEN_NEAR_ITEM)\
or (not is_full_phrase and ttype == TokenType.QUALIFIER) or (not is_full_phrase and ttype == TOKEN_QUALIFIER)
if self == PhraseType.STREET: if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER) return ttype in (TOKEN_WORD, TOKEN_PARTIAL, TOKEN_HOUSENUMBER)
if self == PhraseType.POSTCODE: if self == PhraseType.POSTCODE:
return ttype == TokenType.POSTCODE return ttype == TOKEN_POSTCODE
if self == PhraseType.COUNTRY: if self == PhraseType.COUNTRY:
return ttype == TokenType.COUNTRY return ttype == TOKEN_COUNTRY
return ttype in (TokenType.WORD, TokenType.PARTIAL) return ttype in (TOKEN_WORD, TOKEN_PARTIAL)
@dataclasses.dataclass @dataclasses.dataclass
@@ -265,7 +265,7 @@ class QueryStruct:
going to the subsequent node. Such PARTIAL tokens are going to the subsequent node. Such PARTIAL tokens are
assumed to exist. assumed to exist.
""" """
return [next(iter(self.get_tokens(TokenRange(i, i+1), TokenType.PARTIAL))) return [next(iter(self.get_tokens(TokenRange(i, i+1), TOKEN_PARTIAL)))
for i in range(trange.start, trange.end)] for i in range(trange.start, trange.end)]
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]: def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
@@ -285,5 +285,5 @@ class QueryStruct:
for tlist in node.starting: for tlist in node.starting:
for t in tlist.tokens: for t in tlist.tokens:
if t.token == token: if t.token == token:
return f"[{tlist.ttype.name[0]}]{t.lookup_word}" return f"[{tlist.ttype}]{t.lookup_word}"
return 'None' return 'None'

View File

@@ -56,17 +56,17 @@ class TokenAssignment:
""" """
out = TokenAssignment() out = TokenAssignment()
for token in ranges: for token in ranges:
if token.ttype == qmod.TokenType.PARTIAL: if token.ttype == qmod.TOKEN_PARTIAL:
out.address.append(token.trange) out.address.append(token.trange)
elif token.ttype == qmod.TokenType.HOUSENUMBER: elif token.ttype == qmod.TOKEN_HOUSENUMBER:
out.housenumber = token.trange out.housenumber = token.trange
elif token.ttype == qmod.TokenType.POSTCODE: elif token.ttype == qmod.TOKEN_POSTCODE:
out.postcode = token.trange out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY: elif token.ttype == qmod.TOKEN_COUNTRY:
out.country = token.trange out.country = token.trange
elif token.ttype == qmod.TokenType.NEAR_ITEM: elif token.ttype == qmod.TOKEN_NEAR_ITEM:
out.near_item = token.trange out.near_item = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER: elif token.ttype == qmod.TOKEN_QUALIFIER:
out.qualifier = token.trange out.qualifier = token.trange
return out return out
@@ -84,7 +84,7 @@ class _TokenSequence:
self.penalty = penalty self.penalty = penalty
def __str__(self) -> str: def __str__(self) -> str:
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq) seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype}]' for r in self.seq)
return f'{seq} (dir: {self.direction}, penalty: {self.penalty})' return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
@property @property
@@ -105,7 +105,7 @@ class _TokenSequence:
""" """
# Country and category must be the final term for left-to-right # Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \ return len(self.seq) > 1 and \
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM) self.seq[-1].ttype in (qmod.TOKEN_COUNTRY, qmod.TOKEN_NEAR_ITEM)
def appendable(self, ttype: qmod.TokenType) -> Optional[int]: def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
""" Check if the give token type is appendable to the existing sequence. """ Check if the give token type is appendable to the existing sequence.
@@ -114,23 +114,23 @@ class _TokenSequence:
new direction of the sequence after adding such a type. The new direction of the sequence after adding such a type. The
token is not added. token is not added.
""" """
if ttype == qmod.TokenType.WORD: if ttype == qmod.TOKEN_WORD:
return None return None
if not self.seq: if not self.seq:
# Append unconditionally to the empty list # Append unconditionally to the empty list
if ttype == qmod.TokenType.COUNTRY: if ttype == qmod.TOKEN_COUNTRY:
return -1 return -1
if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER): if ttype in (qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_QUALIFIER):
return 1 return 1
return self.direction return self.direction
# Name tokens are always acceptable and don't change direction # Name tokens are always acceptable and don't change direction
if ttype == qmod.TokenType.PARTIAL: if ttype == qmod.TOKEN_PARTIAL:
# qualifiers cannot appear in the middle of the query. They need # qualifiers cannot appear in the middle of the query. They need
# to be near the next phrase. # to be near the next phrase.
if self.direction == -1 \ if self.direction == -1 \
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]): and any(t.ttype == qmod.TOKEN_QUALIFIER for t in self.seq[:-1]):
return None return None
return self.direction return self.direction
@@ -138,54 +138,54 @@ class _TokenSequence:
if self.has_types(ttype): if self.has_types(ttype):
return None return None
if ttype == qmod.TokenType.HOUSENUMBER: if ttype == qmod.TOKEN_HOUSENUMBER:
if self.direction == 1: if self.direction == 1:
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER: if len(self.seq) == 1 and self.seq[0].ttype == qmod.TOKEN_QUALIFIER:
return None return None
if len(self.seq) > 2 \ if len(self.seq) > 2 \
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY): or self.has_types(qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY):
return None # direction left-to-right: housenumber must come before anything return None # direction left-to-right: housenumber must come before anything
elif (self.direction == -1 elif (self.direction == -1
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY)): or self.has_types(qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY)):
return -1 # force direction right-to-left if after other terms return -1 # force direction right-to-left if after other terms
return self.direction return self.direction
if ttype == qmod.TokenType.POSTCODE: if ttype == qmod.TOKEN_POSTCODE:
if self.direction == -1: if self.direction == -1:
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER): if self.has_types(qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_QUALIFIER):
return None return None
return -1 return -1
if self.direction == 1: if self.direction == 1:
return None if self.has_types(qmod.TokenType.COUNTRY) else 1 return None if self.has_types(qmod.TOKEN_COUNTRY) else 1
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER): if self.has_types(qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_QUALIFIER):
return 1 return 1
return self.direction return self.direction
if ttype == qmod.TokenType.COUNTRY: if ttype == qmod.TOKEN_COUNTRY:
return None if self.direction == -1 else 1 return None if self.direction == -1 else 1
if ttype == qmod.TokenType.NEAR_ITEM: if ttype == qmod.TOKEN_NEAR_ITEM:
return self.direction return self.direction
if ttype == qmod.TokenType.QUALIFIER: if ttype == qmod.TOKEN_QUALIFIER:
if self.direction == 1: if self.direction == 1:
if (len(self.seq) == 1 if (len(self.seq) == 1
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \ and self.seq[0].ttype in (qmod.TOKEN_PARTIAL, qmod.TOKEN_NEAR_ITEM)) \
or (len(self.seq) == 2 or (len(self.seq) == 2
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM and self.seq[0].ttype == qmod.TOKEN_NEAR_ITEM
and self.seq[1].ttype == qmod.TokenType.PARTIAL): and self.seq[1].ttype == qmod.TOKEN_PARTIAL):
return 1 return 1
return None return None
if self.direction == -1: if self.direction == -1:
return -1 return -1
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TOKEN_NEAR_ITEM else self.seq
if len(tempseq) == 0: if len(tempseq) == 0:
return 1 return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER: if len(tempseq) == 1 and self.seq[0].ttype == qmod.TOKEN_HOUSENUMBER:
return None return None
if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY): if len(tempseq) > 1 or self.has_types(qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY):
return -1 return -1
return 0 return 0
@@ -240,18 +240,18 @@ class _TokenSequence:
# housenumbers may not be further than 2 words from the beginning. # housenumbers may not be further than 2 words from the beginning.
# If there are two words in front, give it a penalty. # If there are two words in front, give it a penalty.
hnrpos = next((i for i, tr in enumerate(self.seq) hnrpos = next((i for i, tr in enumerate(self.seq)
if tr.ttype == qmod.TokenType.HOUSENUMBER), if tr.ttype == qmod.TOKEN_HOUSENUMBER),
None) None)
if hnrpos is not None: if hnrpos is not None:
if self.direction != -1: if self.direction != -1:
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL) priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TOKEN_PARTIAL)
if not self._adapt_penalty_from_priors(priors, -1): if not self._adapt_penalty_from_priors(priors, -1):
return False return False
if self.direction != 1: if self.direction != 1:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TOKEN_PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1): if not self._adapt_penalty_from_priors(priors, 1):
return False return False
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq): if any(t.ttype == qmod.TOKEN_NEAR_ITEM for t in self.seq):
self.penalty += 1.0 self.penalty += 1.0
return True return True

View File

@@ -22,42 +22,42 @@ def mktoken(tid: int):
lookup_word='foo') lookup_word='foo')
@pytest.mark.parametrize('ptype,ttype', [('NONE', 'WORD'), @pytest.mark.parametrize('ptype,ttype', [('NONE', 'W'),
('AMENITY', 'QUALIFIER'), ('AMENITY', 'Q'),
('STREET', 'PARTIAL'), ('STREET', 'w'),
('CITY', 'WORD'), ('CITY', 'W'),
('COUNTRY', 'COUNTRY'), ('COUNTRY', 'C'),
('POSTCODE', 'POSTCODE')]) ('POSTCODE', 'P')])
def test_phrase_compatible(ptype, ttype): def test_phrase_compatible(ptype, ttype):
assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype], False) assert query.PhraseType[ptype].compatible_with(ttype, False)
@pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE']) @pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE'])
def test_phrase_incompatible(ptype): def test_phrase_incompatible(ptype):
assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL, True) assert not query.PhraseType[ptype].compatible_with(query.TOKEN_PARTIAL, True)
def test_query_node_empty(): def test_query_node_empty():
qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE) qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE)
assert not qn.has_tokens(3, query.TokenType.PARTIAL) assert not qn.has_tokens(3, query.TOKEN_PARTIAL)
assert qn.get_tokens(3, query.TokenType.WORD) is None assert qn.get_tokens(3, query.TOKEN_WORD) is None
def test_query_node_with_content(): def test_query_node_with_content():
qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE) qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE)
qn.starting.append(query.TokenList(2, query.TokenType.PARTIAL, [mktoken(100), mktoken(101)])) qn.starting.append(query.TokenList(2, query.TOKEN_PARTIAL, [mktoken(100), mktoken(101)]))
qn.starting.append(query.TokenList(2, query.TokenType.WORD, [mktoken(1000)])) qn.starting.append(query.TokenList(2, query.TOKEN_WORD, [mktoken(1000)]))
assert not qn.has_tokens(3, query.TokenType.PARTIAL) assert not qn.has_tokens(3, query.TOKEN_PARTIAL)
assert not qn.has_tokens(2, query.TokenType.COUNTRY) assert not qn.has_tokens(2, query.TOKEN_COUNTRY)
assert qn.has_tokens(2, query.TokenType.PARTIAL) assert qn.has_tokens(2, query.TOKEN_PARTIAL)
assert qn.has_tokens(2, query.TokenType.WORD) assert qn.has_tokens(2, query.TOKEN_WORD)
assert qn.get_tokens(3, query.TokenType.PARTIAL) is None assert qn.get_tokens(3, query.TOKEN_PARTIAL) is None
assert qn.get_tokens(2, query.TokenType.COUNTRY) is None assert qn.get_tokens(2, query.TOKEN_COUNTRY) is None
assert len(qn.get_tokens(2, query.TokenType.PARTIAL)) == 2 assert len(qn.get_tokens(2, query.TOKEN_PARTIAL)) == 2
assert len(qn.get_tokens(2, query.TokenType.WORD)) == 1 assert len(qn.get_tokens(2, query.TOKEN_WORD)) == 1
def test_query_struct_empty(): def test_query_struct_empty():
@@ -73,13 +73,13 @@ def test_query_struct_with_tokens():
assert q.num_token_slots() == 2 assert q.num_token_slots() == 2
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
q.add_token(query.TokenRange(1, 2), query.TokenType.PARTIAL, mktoken(2)) q.add_token(query.TokenRange(1, 2), query.TOKEN_PARTIAL, mktoken(2))
q.add_token(query.TokenRange(1, 2), query.TokenType.WORD, mktoken(99)) q.add_token(query.TokenRange(1, 2), query.TOKEN_WORD, mktoken(99))
q.add_token(query.TokenRange(1, 2), query.TokenType.WORD, mktoken(98)) q.add_token(query.TokenRange(1, 2), query.TOKEN_WORD, mktoken(98))
assert q.get_tokens(query.TokenRange(0, 2), query.TokenType.WORD) == [] assert q.get_tokens(query.TokenRange(0, 2), query.TOKEN_WORD) == []
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.WORD)) == 2 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_WORD)) == 2
partials = q.get_partials_list(query.TokenRange(0, 2)) partials = q.get_partials_list(query.TokenRange(0, 2))
@@ -95,24 +95,24 @@ def test_query_struct_incompatible_token():
q.add_node(query.BREAK_WORD, query.PhraseType.COUNTRY) q.add_node(query.BREAK_WORD, query.PhraseType.COUNTRY)
q.add_node(query.BREAK_END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
q.add_token(query.TokenRange(1, 2), query.TokenType.COUNTRY, mktoken(100)) q.add_token(query.TokenRange(1, 2), query.TOKEN_COUNTRY, mktoken(100))
assert q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL) == [] assert q.get_tokens(query.TokenRange(0, 1), query.TOKEN_PARTIAL) == []
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.COUNTRY)) == 1 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_COUNTRY)) == 1
def test_query_struct_amenity_single_word(): def test_query_struct_amenity_single_word():
q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')]) q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
q.add_node(query.BREAK_END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2)) q.add_token(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM, mktoken(2))
q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3)) q.add_token(query.TokenRange(0, 1), query.TOKEN_QUALIFIER, mktoken(3))
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_QUALIFIER)) == 0
def test_query_struct_amenity_two_words(): def test_query_struct_amenity_two_words():
@@ -121,15 +121,15 @@ def test_query_struct_amenity_two_words():
q.add_node(query.BREAK_END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
for trange in [(0, 1), (1, 2)]: for trange in [(0, 1), (1, 2)]:
q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(*trange), query.TOKEN_PARTIAL, mktoken(1))
q.add_token(query.TokenRange(*trange), query.TokenType.NEAR_ITEM, mktoken(2)) q.add_token(query.TokenRange(*trange), query.TOKEN_NEAR_ITEM, mktoken(2))
q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3)) q.add_token(query.TokenRange(*trange), query.TOKEN_QUALIFIER, mktoken(3))
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 0 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM)) == 0
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_QUALIFIER)) == 1
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.NEAR_ITEM)) == 0 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_NEAR_ITEM)) == 0
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_QUALIFIER)) == 1

View File

@@ -9,7 +9,7 @@ Tests for creating abstract searches from token assignments.
""" """
import pytest import pytest
from nominatim_api.search.query import Token, TokenRange, PhraseType, TokenType, QueryStruct, Phrase from nominatim_api.search.query import Token, TokenRange, PhraseType, QueryStruct, Phrase
import nominatim_api.search.query as qmod import nominatim_api.search.query as qmod
from nominatim_api.search.db_search_builder import SearchBuilder from nominatim_api.search.db_search_builder import SearchBuilder
from nominatim_api.search.token_assignment import TokenAssignment from nominatim_api.search.token_assignment import TokenAssignment
@@ -32,7 +32,7 @@ def make_query(*args):
for end, ttype, tinfo in tlist: for end, ttype, tinfo in tlist:
for tid, word in tinfo: for tid, word in tinfo:
q.add_token(TokenRange(start, end), ttype, q.add_token(TokenRange(start, end), ttype,
MyToken(penalty=0.5 if ttype == TokenType.PARTIAL else 0.0, MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
token=tid, count=1, addr_count=1, token=tid, count=1, addr_count=1,
lookup_word=word)) lookup_word=word))
@@ -41,7 +41,7 @@ def make_query(*args):
def test_country_search(): def test_country_search():
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
@@ -55,7 +55,7 @@ def test_country_search():
def test_country_search_with_country_restriction(): def test_country_search_with_country_restriction():
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'})) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
@@ -69,7 +69,7 @@ def test_country_search_with_country_restriction():
def test_country_search_with_conflicting_country_restriction(): def test_country_search_with_conflicting_country_restriction():
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'})) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
@@ -78,7 +78,7 @@ def test_country_search_with_conflicting_country_restriction():
def test_postcode_search_simple(): def test_postcode_search_simple():
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])]) q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
@@ -94,8 +94,8 @@ def test_postcode_search_simple():
def test_postcode_with_country(): def test_postcode_with_country():
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])], q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
[(2, TokenType.COUNTRY, [(1, 'xx')])]) [(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1), searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
@@ -112,8 +112,8 @@ def test_postcode_with_country():
def test_postcode_with_address(): def test_postcode_with_address():
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])], q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
[(2, TokenType.PARTIAL, [(100, 'word')])]) [(2, qmod.TOKEN_PARTIAL, [(100, 'word')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1), searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
@@ -130,9 +130,9 @@ def test_postcode_with_address():
def test_postcode_with_address_with_full_word(): def test_postcode_with_address_with_full_word():
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])], q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
[(2, TokenType.PARTIAL, [(100, 'word')]), [(2, qmod.TOKEN_PARTIAL, [(100, 'word')]),
(2, TokenType.WORD, [(1, 'full')])]) (2, qmod.TOKEN_WORD, [(1, 'full')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1), searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
@@ -151,7 +151,7 @@ def test_postcode_with_address_with_full_word():
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True}, @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
{'near': '10,10'}]) {'near': '10,10'}])
def test_near_item_only(kwargs): def test_near_item_only(kwargs):
q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])]) q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs)) builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
@@ -167,7 +167,7 @@ def test_near_item_only(kwargs):
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'}, @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
{}]) {}])
def test_near_item_skipped(kwargs): def test_near_item_skipped(kwargs):
q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])]) q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs)) builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
@@ -176,8 +176,8 @@ def test_near_item_skipped(kwargs):
def test_name_only_search(): def test_name_only_search():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])]) (1, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
@@ -195,9 +195,9 @@ def test_name_only_search():
def test_name_with_qualifier(): def test_name_with_qualifier():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])], (1, qmod.TOKEN_WORD, [(100, 'a')])],
[(2, TokenType.QUALIFIER, [(55, 'hotel')])]) [(2, qmod.TOKEN_QUALIFIER, [(55, 'hotel')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1), searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
@@ -216,9 +216,9 @@ def test_name_with_qualifier():
def test_name_with_housenumber_search(): def test_name_with_housenumber_search():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])], (1, qmod.TOKEN_WORD, [(100, 'a')])],
[(2, TokenType.HOUSENUMBER, [(66, '66')])]) [(2, qmod.TOKEN_HOUSENUMBER, [(66, '66')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1), searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
@@ -236,12 +236,12 @@ def test_name_with_housenumber_search():
def test_name_and_address(): def test_name_and_address():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])], (1, qmod.TOKEN_WORD, [(100, 'a')])],
[(2, TokenType.PARTIAL, [(2, 'b')]), [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
(2, TokenType.WORD, [(101, 'b')])], (2, qmod.TOKEN_WORD, [(101, 'b')])],
[(3, TokenType.PARTIAL, [(3, 'c')]), [(3, qmod.TOKEN_PARTIAL, [(3, 'c')]),
(3, TokenType.WORD, [(102, 'c')])] (3, qmod.TOKEN_WORD, [(102, 'c')])]
) )
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
@@ -261,13 +261,13 @@ def test_name_and_address():
def test_name_and_complex_address(): def test_name_and_complex_address():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])], (1, qmod.TOKEN_WORD, [(100, 'a')])],
[(2, TokenType.PARTIAL, [(2, 'b')]), [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
(3, TokenType.WORD, [(101, 'bc')])], (3, qmod.TOKEN_WORD, [(101, 'bc')])],
[(3, TokenType.PARTIAL, [(3, 'c')])], [(3, qmod.TOKEN_PARTIAL, [(3, 'c')])],
[(4, TokenType.PARTIAL, [(4, 'd')]), [(4, qmod.TOKEN_PARTIAL, [(4, 'd')]),
(4, TokenType.WORD, [(103, 'd')])] (4, qmod.TOKEN_WORD, [(103, 'd')])]
) )
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
@@ -287,9 +287,9 @@ def test_name_and_complex_address():
def test_name_only_near_search(): def test_name_only_near_search():
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])], q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]), [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])]) (2, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
@@ -303,8 +303,8 @@ def test_name_only_near_search():
def test_name_only_search_with_category(): def test_name_only_search_with_category():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])]) (1, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]})) builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
@@ -317,9 +317,9 @@ def test_name_only_search_with_category():
def test_name_with_near_item_search_with_category_mismatch(): def test_name_with_near_item_search_with_category_mismatch():
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])], q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]), [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])]) (2, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]})) builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
@@ -329,9 +329,9 @@ def test_name_with_near_item_search_with_category_mismatch():
def test_name_with_near_item_search_with_category_match(): def test_name_with_near_item_search_with_category_match():
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])], q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]), [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])]) (2, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'), builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
('this', 'that')]})) ('this', 'that')]}))
@@ -346,9 +346,9 @@ def test_name_with_near_item_search_with_category_match():
def test_name_with_qualifier_search_with_category_mismatch(): def test_name_with_qualifier_search_with_category_mismatch():
q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])], q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]), [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])]) (2, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]})) builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
@@ -358,9 +358,9 @@ def test_name_with_qualifier_search_with_category_mismatch():
def test_name_with_qualifier_search_with_category_match(): def test_name_with_qualifier_search_with_category_match():
q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])], q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]), [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])]) (2, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'), builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
('this', 'that')]})) ('this', 'that')]}))
@@ -375,8 +375,8 @@ def test_name_with_qualifier_search_with_category_match():
def test_name_only_search_with_countries(): def test_name_only_search_with_countries():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])]) (1, qmod.TOKEN_WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'})) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
@@ -397,14 +397,14 @@ def make_counted_searches(name_part, name_full, address_part, address_full,
q.add_node(qmod.BREAK_WORD, PhraseType.NONE) q.add_node(qmod.BREAK_WORD, PhraseType.NONE)
q.add_node(qmod.BREAK_END, PhraseType.NONE) q.add_node(qmod.BREAK_END, PhraseType.NONE)
q.add_token(TokenRange(0, 1), TokenType.PARTIAL, q.add_token(TokenRange(0, 1), qmod.TOKEN_PARTIAL,
MyToken(0.5, 1, name_part, 1, 'name_part')) MyToken(0.5, 1, name_part, 1, 'name_part'))
q.add_token(TokenRange(0, 1), TokenType.WORD, q.add_token(TokenRange(0, 1), qmod.TOKEN_WORD,
MyToken(0, 101, name_full, 1, 'name_full')) MyToken(0, 101, name_full, 1, 'name_full'))
for i in range(num_address_parts): for i in range(num_address_parts):
q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL, q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_PARTIAL,
MyToken(0.5, 2, address_part, 1, 'address_part')) MyToken(0.5, 2, address_part, 1, 'address_part'))
q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD, q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_WORD,
MyToken(0, 102, address_full, 1, 'address_full')) MyToken(0, 102, address_full, 1, 'address_full'))
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())

View File

@@ -11,7 +11,7 @@ import pytest
import pytest_asyncio import pytest_asyncio
from nominatim_api import NominatimAPIAsync from nominatim_api import NominatimAPIAsync
from nominatim_api.search.query import Phrase, PhraseType, TokenType from nominatim_api.search.query import Phrase, PhraseType
import nominatim_api.search.query as qmod import nominatim_api.search.query as qmod
import nominatim_api.search.icu_tokenizer as tok import nominatim_api.search.icu_tokenizer as tok
from nominatim_api.logging import set_log_output, get_and_disable from nominatim_api.logging import set_log_output, get_and_disable
@@ -101,8 +101,8 @@ async def test_splitting_in_transliteration(conn):
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize('term,order', [('23456', ['POSTCODE', 'HOUSENUMBER', 'WORD', 'PARTIAL']), @pytest.mark.parametrize('term,order', [('23456', ['P', 'H', 'W', 'w']),
('3', ['HOUSENUMBER', 'POSTCODE', 'WORD', 'PARTIAL']) ('3', ['H', 'P', 'W', 'w'])
]) ])
async def test_penalty_postcodes_and_housenumbers(conn, term, order): async def test_penalty_postcodes_and_housenumbers(conn, term, order):
ana = await tok.create_query_analyzer(conn) ana = await tok.create_query_analyzer(conn)
@@ -116,7 +116,7 @@ async def test_penalty_postcodes_and_housenumbers(conn, term, order):
assert query.num_token_slots() == 1 assert query.num_token_slots() == 1
torder = [(tl.tokens[0].penalty, tl.ttype.name) for tl in query.nodes[0].starting] torder = [(tl.tokens[0].penalty, tl.ttype) for tl in query.nodes[0].starting]
torder.sort() torder.sort()
assert [t[1] for t in torder] == order assert [t[1] for t in torder] == order
@@ -132,7 +132,7 @@ async def test_category_words_only_at_beginning(conn):
assert query.num_token_slots() == 3 assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1 assert len(query.nodes[0].starting) == 1
assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM
assert not query.nodes[2].starting assert not query.nodes[2].starting
@@ -146,7 +146,7 @@ async def test_freestanding_qualifier_words_become_category(conn):
assert query.num_token_slots() == 1 assert query.num_token_slots() == 1
assert len(query.nodes[0].starting) == 1 assert len(query.nodes[0].starting) == 1
assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -159,9 +159,9 @@ async def test_qualifier_words(conn):
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5 assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[0].starting) == {qmod.TOKEN_QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[2].starting) == {qmod.TOKEN_QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[4].starting) == {qmod.TOKEN_QUALIFIER}
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -173,10 +173,10 @@ async def test_add_unknown_housenumbers(conn):
query = await ana.analyze_query(make_phrase('466 23 99834 34a')) query = await ana.analyze_query(make_phrase('466 23 99834 34a'))
assert query.num_token_slots() == 4 assert query.num_token_slots() == 4
assert query.nodes[0].starting[0].ttype == TokenType.HOUSENUMBER assert query.nodes[0].starting[0].ttype == qmod.TOKEN_HOUSENUMBER
assert len(query.nodes[0].starting[0].tokens) == 1 assert len(query.nodes[0].starting[0].tokens) == 1
assert query.nodes[0].starting[0].tokens[0].token == 0 assert query.nodes[0].starting[0].tokens[0].token == 0
assert query.nodes[1].starting[0].ttype == TokenType.HOUSENUMBER assert query.nodes[1].starting[0].ttype == qmod.TOKEN_HOUSENUMBER
assert len(query.nodes[1].starting[0].tokens) == 1 assert len(query.nodes[1].starting[0].tokens) == 1
assert query.nodes[1].starting[0].tokens[0].token == 1 assert query.nodes[1].starting[0].tokens[0].token == 1
assert not query.nodes[2].starting assert not query.nodes[2].starting

View File

@@ -9,7 +9,7 @@ Test for creation of token assignments from tokenized queries.
""" """
import pytest import pytest
from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenType, TokenRange, Token from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenRange, Token
import nominatim_api.search.query as qmod import nominatim_api.search.query as qmod
from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
@@ -52,9 +52,9 @@ def test_query_with_missing_tokens():
def test_one_word_query(): def test_one_word_query():
q = make_query((qmod.BREAK_START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.PARTIAL), [(1, qmod.TOKEN_PARTIAL),
(1, TokenType.WORD), (1, qmod.TOKEN_WORD),
(1, TokenType.HOUSENUMBER)])) (1, qmod.TOKEN_HOUSENUMBER)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
assert res == [TokenAssignment(name=TokenRange(0, 1))] assert res == [TokenAssignment(name=TokenRange(0, 1))]
@@ -62,7 +62,7 @@ def test_one_word_query():
def test_single_postcode(): def test_single_postcode():
q = make_query((qmod.BREAK_START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.POSTCODE)])) [(1, qmod.TOKEN_POSTCODE)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
assert res == [TokenAssignment(postcode=TokenRange(0, 1))] assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
@@ -70,7 +70,7 @@ def test_single_postcode():
def test_single_country_name(): def test_single_country_name():
q = make_query((qmod.BREAK_START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.COUNTRY)])) [(1, qmod.TOKEN_COUNTRY)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
assert res == [TokenAssignment(country=TokenRange(0, 1))] assert res == [TokenAssignment(country=TokenRange(0, 1))]
@@ -78,8 +78,8 @@ def test_single_country_name():
def test_single_word_poi_search(): def test_single_word_poi_search():
q = make_query((qmod.BREAK_START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.NEAR_ITEM), [(1, qmod.TOKEN_NEAR_ITEM),
(1, TokenType.QUALIFIER)])) (1, qmod.TOKEN_QUALIFIER)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
assert res == [TokenAssignment(near_item=TokenRange(0, 1))] assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
@@ -87,9 +87,9 @@ def test_single_word_poi_search():
@pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN]) @pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN])
def test_multiple_simple_words(btype): def test_multiple_simple_words(btype):
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (btype, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(btype, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (btype, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]))
penalty = PENALTY_TOKENCHANGE[btype] penalty = PENALTY_TOKENCHANGE[btype]
@@ -107,8 +107,8 @@ def test_multiple_simple_words(btype):
def test_multiple_words_respect_phrase_break(): def test_multiple_words_respect_phrase_break():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1), TokenAssignment(name=TokenRange(0, 1),
@@ -118,8 +118,8 @@ def test_multiple_words_respect_phrase_break():
def test_housenumber_and_street(): def test_housenumber_and_street():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(1, 2), TokenAssignment(name=TokenRange(1, 2),
@@ -129,8 +129,8 @@ def test_housenumber_and_street():
def test_housenumber_and_street_backwards(): def test_housenumber_and_street_backwards():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_HOUSENUMBER)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1), TokenAssignment(name=TokenRange(0, 1),
@@ -140,10 +140,10 @@ def test_housenumber_and_street_backwards():
def test_housenumber_and_postcode(): def test_housenumber_and_postcode():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_POSTCODE)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=pytest.approx(0.3), TokenAssignment(penalty=pytest.approx(0.3),
@@ -157,10 +157,10 @@ def test_housenumber_and_postcode():
postcode=TokenRange(3, 4))) postcode=TokenRange(3, 4)))
def test_postcode_and_housenumber(): def test_postcode_and_housenumber():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_POSTCODE)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)])) (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_HOUSENUMBER)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=pytest.approx(0.3), TokenAssignment(penalty=pytest.approx(0.3),
@@ -175,38 +175,38 @@ def test_postcode_and_housenumber():
def test_country_housenumber_postcode(): def test_country_housenumber_postcode():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_COUNTRY)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_POSTCODE)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY, @pytest.mark.parametrize('ttype', [qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY,
TokenType.NEAR_ITEM, TokenType.QUALIFIER]) qmod.TOKEN_NEAR_ITEM, qmod.TOKEN_QUALIFIER])
def test_housenumber_with_only_special_terms(ttype): def test_housenumber_with_only_special_terms(ttype):
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY]) @pytest.mark.parametrize('ttype', [qmod.TOKEN_POSTCODE, qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_COUNTRY])
def test_multiple_special_tokens(ttype): def test_multiple_special_tokens(ttype):
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_housenumber_many_phrases(): def test_housenumber_many_phrases():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(4, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(5, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, TokenAssignment(penalty=0.1,
@@ -221,8 +221,8 @@ def test_housenumber_many_phrases():
def test_country_at_beginning(): def test_country_at_beginning():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_COUNTRY)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
@@ -230,8 +230,8 @@ def test_country_at_beginning():
def test_country_at_end(): def test_country_at_end():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_COUNTRY)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1), TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
@@ -239,16 +239,16 @@ def test_country_at_end():
def test_country_in_middle(): def test_country_in_middle():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_COUNTRY)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_postcode_with_designation(): def test_postcode_with_designation():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.POSTCODE)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_POSTCODE)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
@@ -258,8 +258,8 @@ def test_postcode_with_designation():
def test_postcode_with_designation_backwards(): def test_postcode_with_designation_backwards():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_POSTCODE)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1), TokenAssignment(name=TokenRange(0, 1),
@@ -269,8 +269,8 @@ def test_postcode_with_designation_backwards():
def test_near_item_at_beginning(): def test_near_item_at_beginning():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_NEAR_ITEM)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
@@ -278,8 +278,8 @@ def test_near_item_at_beginning():
def test_near_item_at_end(): def test_near_item_at_end():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_NEAR_ITEM)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1), TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
@@ -287,17 +287,17 @@ def test_near_item_at_end():
def test_near_item_in_middle(): def test_near_item_in_middle():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_NEAR_ITEM)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_qualifier_at_beginning(): def test_qualifier_at_beginning():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_QUALIFIER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
@@ -309,11 +309,11 @@ def test_qualifier_at_beginning():
def test_qualifier_after_name(): def test_qualifier_after_name():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_QUALIFIER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(5, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
@@ -326,27 +326,27 @@ def test_qualifier_after_name():
def test_qualifier_before_housenumber(): def test_qualifier_before_housenumber():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_QUALIFIER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_qualifier_after_housenumber(): def test_qualifier_after_housenumber():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_HOUSENUMBER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_QUALIFIER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_qualifier_in_middle_of_phrase(): def test_qualifier_in_middle_of_phrase():
q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_QUALIFIER)]),
(qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_PARTIAL)]),
(qmod.BREAK_PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(5, qmod.TOKEN_PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))