mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
451 lines
16 KiB
Python
451 lines
16 KiB
Python
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2023 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Tests for creating abstract searches from token assignments.
|
|
"""
|
|
import pytest
|
|
|
|
from nominatim.api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
|
|
from nominatim.api.search.db_search_builder import SearchBuilder
|
|
from nominatim.api.search.token_assignment import TokenAssignment
|
|
from nominatim.api.types import SearchDetails
|
|
import nominatim.api.search.db_searches as dbs
|
|
|
|
class MyToken(Token):
|
|
def get_category(self):
|
|
return 'this', 'that'
|
|
|
|
|
|
def make_query(*args):
|
|
q = QueryStruct([Phrase(PhraseType.NONE, '')])
|
|
|
|
for _ in range(max(inner[0] for tlist in args for inner in tlist)):
|
|
q.add_node(BreakType.WORD, PhraseType.NONE)
|
|
q.add_node(BreakType.END, PhraseType.NONE)
|
|
|
|
for start, tlist in enumerate(args):
|
|
for end, ttype, tinfo in tlist:
|
|
for tid, word in tinfo:
|
|
q.add_token(TokenRange(start, end), ttype,
|
|
MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
|
|
|
|
|
|
return q
|
|
|
|
|
|
def test_country_search():
|
|
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.CountrySearch)
|
|
assert set(search.countries.values) == {'de', 'en'}
|
|
|
|
|
|
def test_country_search_with_country_restriction():
|
|
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
|
|
|
|
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.CountrySearch)
|
|
assert set(search.countries.values) == {'en'}
|
|
|
|
|
|
def test_country_search_with_conflicting_country_restriction():
|
|
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
|
|
|
|
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 0
|
|
|
|
|
|
def test_postcode_search_simple():
|
|
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PostcodeSearch)
|
|
assert search.postcodes.values == ['2367']
|
|
assert not search.countries.values
|
|
assert not search.lookups
|
|
assert not search.rankings
|
|
|
|
|
|
def test_postcode_with_country():
|
|
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
|
|
[(2, TokenType.COUNTRY, [(1, 'xx')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
|
|
country=TokenRange(1, 2))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PostcodeSearch)
|
|
assert search.postcodes.values == ['2367']
|
|
assert search.countries.values == ['xx']
|
|
assert not search.lookups
|
|
assert not search.rankings
|
|
|
|
|
|
def test_postcode_with_address():
|
|
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
|
|
[(2, TokenType.PARTIAL, [(100, 'word')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
|
|
address=[TokenRange(1, 2)])))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PostcodeSearch)
|
|
assert search.postcodes.values == ['2367']
|
|
assert not search.countries
|
|
assert search.lookups
|
|
assert not search.rankings
|
|
|
|
|
|
def test_postcode_with_address_with_full_word():
|
|
q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
|
|
[(2, TokenType.PARTIAL, [(100, 'word')]),
|
|
(2, TokenType.WORD, [(1, 'full')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
|
|
address=[TokenRange(1, 2)])))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PostcodeSearch)
|
|
assert search.postcodes.values == ['2367']
|
|
assert not search.countries
|
|
assert search.lookups
|
|
assert len(search.rankings) == 1
|
|
|
|
|
|
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
|
|
{'near': '10,10'}])
|
|
def test_near_item_only(kwargs):
|
|
q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
|
|
|
|
searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PoiSearch)
|
|
assert search.qualifiers.values == [('this', 'that')]
|
|
|
|
|
|
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
|
|
{}])
|
|
def test_near_item_skipped(kwargs):
|
|
q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
|
|
|
|
searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 0
|
|
|
|
|
|
def test_name_only_search():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert not search.postcodes.values
|
|
assert not search.countries.values
|
|
assert not search.housenumbers.values
|
|
assert not search.qualifiers.values
|
|
assert len(search.lookups) == 1
|
|
assert len(search.rankings) == 1
|
|
|
|
|
|
def test_name_with_qualifier():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])],
|
|
[(2, TokenType.QUALIFIER, [(55, 'hotel')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
|
qualifier=TokenRange(1, 2))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert not search.postcodes.values
|
|
assert not search.countries.values
|
|
assert not search.housenumbers.values
|
|
assert search.qualifiers.values == [('this', 'that')]
|
|
assert len(search.lookups) == 1
|
|
assert len(search.rankings) == 1
|
|
|
|
|
|
def test_name_with_housenumber_search():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])],
|
|
[(2, TokenType.HOUSENUMBER, [(66, '66')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
|
housenumber=TokenRange(1, 2))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert not search.postcodes.values
|
|
assert not search.countries.values
|
|
assert search.housenumbers.values == ['66']
|
|
assert len(search.lookups) == 1
|
|
assert len(search.rankings) == 1
|
|
|
|
|
|
def test_name_and_address():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])],
|
|
[(2, TokenType.PARTIAL, [(2, 'b')]),
|
|
(2, TokenType.WORD, [(101, 'b')])],
|
|
[(3, TokenType.PARTIAL, [(3, 'c')]),
|
|
(3, TokenType.WORD, [(102, 'c')])]
|
|
)
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
|
address=[TokenRange(1, 2),
|
|
TokenRange(2, 3)])))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert not search.postcodes.values
|
|
assert not search.countries.values
|
|
assert not search.housenumbers.values
|
|
assert len(search.lookups) == 2
|
|
assert len(search.rankings) == 3
|
|
|
|
|
|
def test_name_and_complex_address():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])],
|
|
[(2, TokenType.PARTIAL, [(2, 'b')]),
|
|
(3, TokenType.WORD, [(101, 'bc')])],
|
|
[(3, TokenType.PARTIAL, [(3, 'c')])],
|
|
[(4, TokenType.PARTIAL, [(4, 'd')]),
|
|
(4, TokenType.WORD, [(103, 'd')])]
|
|
)
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
|
address=[TokenRange(1, 2),
|
|
TokenRange(2, 4)])))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert not search.postcodes.values
|
|
assert not search.countries.values
|
|
assert not search.housenumbers.values
|
|
assert len(search.lookups) == 2
|
|
assert len(search.rankings) == 2
|
|
|
|
|
|
def test_name_only_near_search():
|
|
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
|
|
[(2, TokenType.PARTIAL, [(1, 'a')]),
|
|
(2, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
|
|
near_item=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.NearSearch)
|
|
assert isinstance(search.search, dbs.PlaceSearch)
|
|
|
|
|
|
def test_name_only_search_with_category():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert search.qualifiers.values == [('foo', 'bar')]
|
|
|
|
|
|
def test_name_with_near_item_search_with_category_mismatch():
|
|
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
|
|
[(2, TokenType.PARTIAL, [(1, 'a')]),
|
|
(2, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
|
|
near_item=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 0
|
|
|
|
|
|
def test_name_with_near_item_search_with_category_match():
|
|
q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
|
|
[(2, TokenType.PARTIAL, [(1, 'a')]),
|
|
(2, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
|
|
('this', 'that')]}))
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
|
|
near_item=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.NearSearch)
|
|
assert isinstance(search.search, dbs.PlaceSearch)
|
|
|
|
|
|
def test_name_with_qualifier_search_with_category_mismatch():
|
|
q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
|
|
[(2, TokenType.PARTIAL, [(1, 'a')]),
|
|
(2, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
|
|
qualifier=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 0
|
|
|
|
|
|
def test_name_with_qualifier_search_with_category_match():
|
|
q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
|
|
[(2, TokenType.PARTIAL, [(1, 'a')]),
|
|
(2, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
|
|
('this', 'that')]}))
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
|
|
qualifier=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert search.qualifiers.values == [('this', 'that')]
|
|
|
|
|
|
def test_name_only_search_with_countries():
|
|
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
|
|
(1, TokenType.WORD, [(100, 'a')])])
|
|
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
|
|
|
|
searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert not search.postcodes.values
|
|
assert set(search.countries.values) == {'de', 'en'}
|
|
assert not search.housenumbers.values
|
|
|
|
|
|
def make_counted_searches(name_part, name_full, address_part, address_full,
|
|
num_address_parts=1):
|
|
q = QueryStruct([Phrase(PhraseType.NONE, '')])
|
|
for i in range(1 + num_address_parts):
|
|
q.add_node(BreakType.WORD, PhraseType.NONE)
|
|
q.add_node(BreakType.END, PhraseType.NONE)
|
|
|
|
q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
|
|
MyToken(0.5, 1, name_part, 'name_part', True))
|
|
q.add_token(TokenRange(0, 1), TokenType.WORD,
|
|
MyToken(0, 101, name_full, 'name_full', True))
|
|
for i in range(num_address_parts):
|
|
q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
|
|
MyToken(0.5, 2, address_part, 'address_part', True))
|
|
q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
|
|
MyToken(0, 102, address_full, 'address_full', True))
|
|
|
|
builder = SearchBuilder(q, SearchDetails())
|
|
|
|
return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
|
address=[TokenRange(1, 1 + num_address_parts)])))
|
|
|
|
|
|
def test_infrequent_partials_in_name():
|
|
searches = make_counted_searches(1, 1, 1, 1)
|
|
|
|
assert len(searches) == 1
|
|
search = searches[0]
|
|
|
|
assert isinstance(search, dbs.PlaceSearch)
|
|
assert len(search.lookups) == 2
|
|
assert len(search.rankings) == 2
|
|
|
|
assert set((l.column, l.lookup_type.__name__) for l in search.lookups) == \
|
|
{('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')}
|
|
|
|
|
|
def test_frequent_partials_in_name_and_address():
|
|
searches = make_counted_searches(9999, 1, 9999, 1)
|
|
|
|
assert len(searches) == 2
|
|
|
|
assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
|
|
searches.sort(key=lambda s: s.penalty)
|
|
|
|
assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
|
|
{('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
|
|
assert set((l.column, l.lookup_type.__name__) for l in searches[1].lookups) == \
|
|
{('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')}
|
|
|
|
|
|
def test_too_frequent_partials_in_name_and_address():
|
|
searches = make_counted_searches(20000, 1, 10000, 1)
|
|
|
|
assert len(searches) == 1
|
|
|
|
assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
|
|
searches.sort(key=lambda s: s.penalty)
|
|
|
|
assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
|
|
{('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
|