Merge pull request #3262 from lonvia/fix-category-search

Fix use of category parameter for search() function
This commit is contained in:
Sarah Hoffmann
2023-11-28 20:02:36 +01:00
committed by GitHub
12 changed files with 174 additions and 73 deletions

View File

@@ -89,12 +89,14 @@ class SearchBuilder:
if sdata is None: if sdata is None:
return return
categories = self.get_search_categories(assignment) near_items = self.get_near_items(assignment)
if near_items is not None and not near_items:
return # impossible compbination of near items and category parameter
if assignment.name is None: if assignment.name is None:
if categories and not sdata.postcodes: if near_items and not sdata.postcodes:
sdata.qualifiers = categories sdata.qualifiers = near_items
categories = None near_items = None
builder = self.build_poi_search(sdata) builder = self.build_poi_search(sdata)
elif assignment.housenumber: elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber, hnr_tokens = self.query.get_tokens(assignment.housenumber,
@@ -102,16 +104,16 @@ class SearchBuilder:
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address) builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else: else:
builder = self.build_special_search(sdata, assignment.address, builder = self.build_special_search(sdata, assignment.address,
bool(categories)) bool(near_items))
else: else:
builder = self.build_name_search(sdata, assignment.name, assignment.address, builder = self.build_name_search(sdata, assignment.name, assignment.address,
bool(categories)) bool(near_items))
if categories: if near_items:
penalty = min(categories.penalties) penalty = min(near_items.penalties)
categories.penalties = [p - penalty for p in categories.penalties] near_items.penalties = [p - penalty for p in near_items.penalties]
for search in builder: for search in builder:
yield dbs.NearSearch(penalty + assignment.penalty, categories, search) yield dbs.NearSearch(penalty + assignment.penalty, near_items, search)
else: else:
for search in builder: for search in builder:
search.penalty += assignment.penalty search.penalty += assignment.penalty
@@ -321,8 +323,15 @@ class SearchBuilder:
self.query.get_tokens(assignment.postcode, self.query.get_tokens(assignment.postcode,
TokenType.POSTCODE)) TokenType.POSTCODE))
if assignment.qualifier: if assignment.qualifier:
sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier, tokens = self.query.get_tokens(assignment.qualifier, TokenType.QUALIFIER)
TokenType.QUALIFIER)) if self.details.categories:
tokens = [t for t in tokens if t.get_category() in self.details.categories]
if not tokens:
return None
sdata.set_qualifiers(tokens)
elif self.details.categories:
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))
if assignment.address: if assignment.address:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address]) sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
@@ -332,25 +341,23 @@ class SearchBuilder:
return sdata return sdata
def get_search_categories(self, def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]: """ Collect tokens for near items search or use the categories
""" Collect tokens for category search or use the categories
requested per parameter. requested per parameter.
Returns None if no category search is requested. Returns None if no category search is requested.
""" """
if assignment.category: if assignment.near_item:
tokens: Dict[Tuple[str, str], float] = {} tokens: Dict[Tuple[str, str], float] = {}
for t in self.query.get_tokens(assignment.category, TokenType.CATEGORY): for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
cat = t.get_category() cat = t.get_category()
# The category of a near search will be that of near_item.
# Thus, if search is restricted to a category parameter,
# the two sets must intersect.
if (not self.details.categories or cat in self.details.categories)\ if (not self.details.categories or cat in self.details.categories)\
and t.penalty < tokens.get(cat, 1000.0): and t.penalty < tokens.get(cat, 1000.0):
tokens[cat] = t.penalty tokens[cat] = t.penalty
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values())) return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
if self.details.categories:
return dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))
return None return None

View File

@@ -766,9 +766,6 @@ class PlaceSearch(AbstractSearch):
assert result assert result
result.bbox = Bbox.from_wkb(row.bbox) result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy result.accuracy = row.accuracy
if not details.excluded or not result.place_id in details.excluded:
results.append(result)
if self.housenumbers and row.rank_address < 30: if self.housenumbers and row.rank_address < 30:
if row.placex_hnr: if row.placex_hnr:
subs = _get_placex_housenumbers(conn, row.placex_hnr, details) subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
@@ -788,6 +785,14 @@ class PlaceSearch(AbstractSearch):
sub.accuracy += 0.6 sub.accuracy += 0.6
results.append(sub) results.append(sub)
result.accuracy += 1.0 # penalty for missing housenumber # Only add the street as a result, if it meets all other
# filter conditions.
if (not details.excluded or result.place_id not in details.excluded)\
and (not self.qualifiers or result.category in self.qualifiers.values)\
and result.rank_address >= details.min_rank:
result.accuracy += 1.0 # penalty for missing housenumber
results.append(result)
else:
results.append(result)
return results return results

View File

@@ -184,13 +184,13 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
if row.type == 'S': if row.type == 'S':
if row.info['op'] in ('in', 'near'): if row.info['op'] in ('in', 'near'):
if trange.start == 0: if trange.start == 0:
query.add_token(trange, qmod.TokenType.CATEGORY, token) query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else: else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token) query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots(): if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token) token = copy(token)
token.penalty += 0.1 * (query.num_token_slots()) token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.CATEGORY, token) query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else: else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token) query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)

View File

@@ -107,15 +107,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
for row in await self.lookup_in_db(lookup_words): for row in await self.lookup_in_db(lookup_words):
for trange in words[row.word_token.strip()]: for trange in words[row.word_token.strip()]:
token, ttype = self.make_token(row) token, ttype = self.make_token(row)
if ttype == qmod.TokenType.CATEGORY: if ttype == qmod.TokenType.NEAR_ITEM:
if trange.start == 0: if trange.start == 0:
query.add_token(trange, qmod.TokenType.CATEGORY, token) query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype == qmod.TokenType.QUALIFIER: elif ttype == qmod.TokenType.QUALIFIER:
query.add_token(trange, qmod.TokenType.QUALIFIER, token) query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots(): if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token) token = copy(token)
token.penalty += 0.1 * (query.num_token_slots()) token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.CATEGORY, token) query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end: elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
query.add_token(trange, ttype, token) query.add_token(trange, ttype, token)
@@ -195,7 +195,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
ttype = qmod.TokenType.POSTCODE ttype = qmod.TokenType.POSTCODE
lookup_word = row.word_token[1:] lookup_word = row.word_token[1:]
else: else:
ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\ ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
else qmod.TokenType.QUALIFIER else qmod.TokenType.QUALIFIER
lookup_word = row.word lookup_word = row.word
elif row.word_token.startswith(' '): elif row.word_token.startswith(' '):

View File

@@ -46,7 +46,7 @@ class TokenType(enum.Enum):
""" Country name or reference. """ """ Country name or reference. """
QUALIFIER = enum.auto() QUALIFIER = enum.auto()
""" Special term used together with name (e.g. _Hotel_ Bellevue). """ """ Special term used together with name (e.g. _Hotel_ Bellevue). """
CATEGORY = enum.auto() NEAR_ITEM = enum.auto()
""" Special term used as searchable object(e.g. supermarket in ...). """ """ Special term used as searchable object(e.g. supermarket in ...). """
@@ -78,7 +78,7 @@ class PhraseType(enum.Enum):
return not is_full_phrase or ttype != TokenType.QUALIFIER return not is_full_phrase or ttype != TokenType.QUALIFIER
if self == PhraseType.AMENITY: if self == PhraseType.AMENITY:
return ttype in (TokenType.WORD, TokenType.PARTIAL)\ return ttype in (TokenType.WORD, TokenType.PARTIAL)\
or (is_full_phrase and ttype == TokenType.CATEGORY)\ or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
or (not is_full_phrase and ttype == TokenType.QUALIFIER) or (not is_full_phrase and ttype == TokenType.QUALIFIER)
if self == PhraseType.STREET: if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER) return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)

View File

@@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
housenumber: Optional[qmod.TokenRange] = None housenumber: Optional[qmod.TokenRange] = None
postcode: Optional[qmod.TokenRange] = None postcode: Optional[qmod.TokenRange] = None
country: Optional[qmod.TokenRange] = None country: Optional[qmod.TokenRange] = None
category: Optional[qmod.TokenRange] = None near_item: Optional[qmod.TokenRange] = None
qualifier: Optional[qmod.TokenRange] = None qualifier: Optional[qmod.TokenRange] = None
@@ -64,8 +64,8 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
out.postcode = token.trange out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY: elif token.ttype == qmod.TokenType.COUNTRY:
out.country = token.trange out.country = token.trange
elif token.ttype == qmod.TokenType.CATEGORY: elif token.ttype == qmod.TokenType.NEAR_ITEM:
out.category = token.trange out.near_item = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER: elif token.ttype == qmod.TokenType.QUALIFIER:
out.qualifier = token.trange out.qualifier = token.trange
return out return out
@@ -109,7 +109,7 @@ class _TokenSequence:
""" """
# Country and category must be the final term for left-to-right # Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \ return len(self.seq) > 1 and \
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY) self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
def appendable(self, ttype: qmod.TokenType) -> Optional[int]: def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
@@ -165,22 +165,22 @@ class _TokenSequence:
if ttype == qmod.TokenType.COUNTRY: if ttype == qmod.TokenType.COUNTRY:
return None if self.direction == -1 else 1 return None if self.direction == -1 else 1
if ttype == qmod.TokenType.CATEGORY: if ttype == qmod.TokenType.NEAR_ITEM:
return self.direction return self.direction
if ttype == qmod.TokenType.QUALIFIER: if ttype == qmod.TokenType.QUALIFIER:
if self.direction == 1: if self.direction == 1:
if (len(self.seq) == 1 if (len(self.seq) == 1
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \ and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
or (len(self.seq) == 2 or (len(self.seq) == 2
and self.seq[0].ttype == qmod.TokenType.CATEGORY and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
and self.seq[1].ttype == qmod.TokenType.PARTIAL): and self.seq[1].ttype == qmod.TokenType.PARTIAL):
return 1 return 1
return None return None
if self.direction == -1: if self.direction == -1:
return -1 return -1
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
if len(tempseq) == 0: if len(tempseq) == 0:
return 1 return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER: if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
@@ -253,7 +253,7 @@ class _TokenSequence:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1): if not self._adapt_penalty_from_priors(priors, 1):
return False return False
if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq): if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
self.penalty += 1.0 self.penalty += 1.0
return True return True
@@ -368,7 +368,7 @@ class _TokenSequence:
# Postcode or country-only search # Postcode or country-only search
if not base.address: if not base.address:
if not base.housenumber and (base.postcode or base.country or base.category): if not base.housenumber and (base.postcode or base.country or base.near_item):
log().comment('postcode/country search') log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty) yield dataclasses.replace(base, penalty=self.penalty)
else: else:

View File

@@ -106,11 +106,11 @@ def test_query_struct_amenity_single_word():
q.add_node(query.BreakType.END, query.PhraseType.NONE) q.add_node(query.BreakType.END, query.PhraseType.NONE)
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
q.add_token(query.TokenRange(0, 1), query.TokenType.CATEGORY, mktoken(2)) q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2))
q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3)) q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0
@@ -121,14 +121,14 @@ def test_query_struct_amenity_two_words():
for trange in [(0, 1), (1, 2)]: for trange in [(0, 1), (1, 2)]:
q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
q.add_token(query.TokenRange(*trange), query.TokenType.CATEGORY, mktoken(2)) q.add_token(query.TokenRange(*trange), query.TokenType.NEAR_ITEM, mktoken(2))
q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3)) q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 0 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 0
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.CATEGORY)) == 0 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.NEAR_ITEM)) == 0
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1

View File

@@ -147,11 +147,11 @@ def test_postcode_with_address_with_full_word():
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True}, @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
{'near': '10,10'}]) {'near': '10,10'}])
def test_category_only(kwargs): def test_near_item_only(kwargs):
q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])]) q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs)) builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
assert len(searches) == 1 assert len(searches) == 1
@@ -163,11 +163,11 @@ def test_category_only(kwargs):
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'}, @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
{}]) {}])
def test_category_skipped(kwargs): def test_near_item_skipped(kwargs):
q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])]) q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs)) builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1)))) searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
assert len(searches) == 0 assert len(searches) == 0
@@ -284,13 +284,13 @@ def test_name_and_complex_address():
def test_name_only_near_search(): def test_name_only_near_search():
q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])], q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]), [(2, TokenType.PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])]) (2, TokenType.WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails()) builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
category=TokenRange(0, 1)))) near_item=TokenRange(0, 1))))
assert len(searches) == 1 assert len(searches) == 1
search = searches[0] search = searches[0]
@@ -309,10 +309,68 @@ def test_name_only_search_with_category():
assert len(searches) == 1 assert len(searches) == 1
search = searches[0] search = searches[0]
assert isinstance(search, dbs.PlaceSearch)
assert search.qualifiers.values == [('foo', 'bar')]
def test_name_with_near_item_search_with_category_mismatch():
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
near_item=TokenRange(0, 1))))
assert len(searches) == 0
def test_name_with_near_item_search_with_category_match():
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
('this', 'that')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
near_item=TokenRange(0, 1))))
assert len(searches) == 1
search = searches[0]
assert isinstance(search, dbs.NearSearch) assert isinstance(search, dbs.NearSearch)
assert isinstance(search.search, dbs.PlaceSearch) assert isinstance(search.search, dbs.PlaceSearch)
def test_name_with_qualifier_search_with_category_mismatch():
q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
qualifier=TokenRange(0, 1))))
assert len(searches) == 0
def test_name_with_qualifier_search_with_category_match():
q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
('this', 'that')]}))
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
qualifier=TokenRange(0, 1))))
assert len(searches) == 1
search = searches[0]
assert isinstance(search, dbs.PlaceSearch)
assert search.qualifiers.values == [('this', 'that')]
def test_name_only_search_with_countries(): def test_name_only_search_with_countries():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])]) (1, TokenType.WORD, [(100, 'a')])])

View File

@@ -134,7 +134,7 @@ async def test_category_words_only_at_beginning(conn):
assert query.num_token_slots() == 3 assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1 assert len(query.nodes[0].starting) == 1
assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
assert not query.nodes[2].starting assert not query.nodes[2].starting
@@ -148,9 +148,9 @@ async def test_qualifier_words(conn):
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5 assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
@pytest.mark.asyncio @pytest.mark.asyncio

View File

@@ -212,7 +212,7 @@ async def test_category_words_only_at_beginning(conn):
assert query.num_token_slots() == 3 assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1 assert len(query.nodes[0].starting) == 1
assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
assert not query.nodes[2].starting assert not query.nodes[2].starting
@@ -226,9 +226,9 @@ async def test_qualifier_words(conn):
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5 assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
@pytest.mark.asyncio @pytest.mark.asyncio

View File

@@ -281,6 +281,37 @@ class TestStreetWithHousenumber:
assert [r.place_id for r in results] == [2, 92, 2000] assert [r.place_id for r in results] == [2, 92, 2000]
def test_lookup_only_house_qualifier(self, apiobj):
lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
quals=[('place', 'house')])
assert [r.place_id for r in results] == [2, 92]
def test_lookup_only_street_qualifier(self, apiobj):
lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
quals=[('highway', 'residential')])
assert [r.place_id for r in results] == [1000, 2000]
@pytest.mark.parametrize('rank,found', [(26, True), (27, False), (30, False)])
def test_lookup_min_rank(self, apiobj, rank, found):
lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
details=SearchDetails(min_rank=rank))
assert [r.place_id for r in results] == ([2, 92, 1000, 2000] if found else [2, 92])
@pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON, @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
napi.GeometryFormat.KML, napi.GeometryFormat.KML,
napi.GeometryFormat.SVG, napi.GeometryFormat.SVG,

View File

@@ -76,11 +76,11 @@ def test_single_country_name():
def test_single_word_poi_search(): def test_single_word_poi_search():
q = make_query((BreakType.START, PhraseType.NONE, q = make_query((BreakType.START, PhraseType.NONE,
[(1, TokenType.CATEGORY), [(1, TokenType.NEAR_ITEM),
(1, TokenType.QUALIFIER)])) (1, TokenType.QUALIFIER)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
assert res == [TokenAssignment(category=TokenRange(0, 1))] assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
@pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN]) @pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
@@ -182,7 +182,7 @@ def test_country_housenumber_postcode():
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY, @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
TokenType.CATEGORY, TokenType.QUALIFIER]) TokenType.NEAR_ITEM, TokenType.QUALIFIER])
def test_housenumber_with_only_special_terms(ttype): def test_housenumber_with_only_special_terms(ttype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(2, ttype)])) (BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
@@ -266,27 +266,27 @@ def test_postcode_with_designation_backwards():
address=[TokenRange(0, 1)])) address=[TokenRange(0, 1)]))
def test_category_at_beginning(): def test_near_item_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.CATEGORY)]), q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
category=TokenRange(0, 1))) near_item=TokenRange(0, 1)))
def test_category_at_end(): def test_near_item_at_end():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)])) (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1), TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
category=TokenRange(1, 2))) near_item=TokenRange(1, 2)))
def test_category_in_middle(): def test_near_item_in_middle():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]), (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))