mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
do not lookup by address vector when only few tokens are available
Names of countries and states are exceedingly rare in the word count but are very frequent in the address. A short name has the danger of producing too many results.
This commit is contained in:
@@ -212,7 +212,7 @@ class SearchBuilder:
|
|||||||
|
|
||||||
exp_count = min(exp_count, min(t.count for t in addr_partials)) \
|
exp_count = min(exp_count, min(t.count for t in addr_partials)) \
|
||||||
if addr_partials else exp_count
|
if addr_partials else exp_count
|
||||||
if exp_count < 1000 and partials_indexed:
|
if exp_count < 1000 and len(addr_tokens) > 3 and partials_indexed:
|
||||||
# Lookup by address partials and restrict results through name terms.
|
# Lookup by address partials and restrict results through name terms.
|
||||||
# Give this a small penalty because lookups in the address index are
|
# Give this a small penalty because lookups in the address index are
|
||||||
# more expensive
|
# more expensive
|
||||||
|
|||||||
@@ -332,9 +332,10 @@ def test_name_only_search_with_countries():
|
|||||||
assert not search.housenumbers.values
|
assert not search.housenumbers.values
|
||||||
|
|
||||||
|
|
||||||
def make_counted_searches(name_part, name_full, address_part, address_full):
|
def make_counted_searches(name_part, name_full, address_part, address_full,
|
||||||
|
num_address_parts=1):
|
||||||
q = QueryStruct([Phrase(PhraseType.NONE, '')])
|
q = QueryStruct([Phrase(PhraseType.NONE, '')])
|
||||||
for i in range(2):
|
for i in range(1 + num_address_parts):
|
||||||
q.add_node(BreakType.WORD, PhraseType.NONE)
|
q.add_node(BreakType.WORD, PhraseType.NONE)
|
||||||
q.add_node(BreakType.END, PhraseType.NONE)
|
q.add_node(BreakType.END, PhraseType.NONE)
|
||||||
|
|
||||||
@@ -342,15 +343,16 @@ def make_counted_searches(name_part, name_full, address_part, address_full):
|
|||||||
MyToken(0.5, 1, name_part, 'name_part', True))
|
MyToken(0.5, 1, name_part, 'name_part', True))
|
||||||
q.add_token(TokenRange(0, 1), TokenType.WORD,
|
q.add_token(TokenRange(0, 1), TokenType.WORD,
|
||||||
MyToken(0, 101, name_full, 'name_full', True))
|
MyToken(0, 101, name_full, 'name_full', True))
|
||||||
q.add_token(TokenRange(1, 2), TokenType.PARTIAL,
|
for i in range(num_address_parts):
|
||||||
MyToken(0.5, 2, address_part, 'address_part', True))
|
q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
|
||||||
q.add_token(TokenRange(1, 2), TokenType.WORD,
|
MyToken(0.5, 2, address_part, 'address_part', True))
|
||||||
MyToken(0, 102, address_full, 'address_full', True))
|
q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
|
||||||
|
MyToken(0, 102, address_full, 'address_full', True))
|
||||||
|
|
||||||
builder = SearchBuilder(q, SearchDetails())
|
builder = SearchBuilder(q, SearchDetails())
|
||||||
|
|
||||||
return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
|
||||||
address=[TokenRange(1, 2)])))
|
address=[TokenRange(1, 1 + num_address_parts)])))
|
||||||
|
|
||||||
|
|
||||||
def test_infrequent_partials_in_name():
|
def test_infrequent_partials_in_name():
|
||||||
@@ -368,7 +370,7 @@ def test_infrequent_partials_in_name():
|
|||||||
|
|
||||||
|
|
||||||
def test_frequent_partials_in_name_but_not_in_address():
|
def test_frequent_partials_in_name_but_not_in_address():
|
||||||
searches = make_counted_searches(10000, 1, 1, 1)
|
searches = make_counted_searches(10000, 1, 1, 1, num_address_parts=4)
|
||||||
|
|
||||||
assert len(searches) == 1
|
assert len(searches) == 1
|
||||||
search = searches[0]
|
search = searches[0]
|
||||||
|
|||||||
Reference in New Issue
Block a user