forked from hans/Nominatim
simplify yield_lookups() function
Move creation of field lookups in separate functions to make the code more readable.
This commit is contained in:
@@ -15,7 +15,6 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
|
||||
from nominatim.api.search.token_assignment import TokenAssignment
|
||||
import nominatim.api.search.db_search_fields as dbf
|
||||
import nominatim.api.search.db_searches as dbs
|
||||
from nominatim.api.logging import log
|
||||
|
||||
|
||||
def wrap_near_search(categories: List[Tuple[str, str]],
|
||||
@@ -188,34 +187,29 @@ class SearchBuilder:
|
||||
be searched for. This takes into account how frequent the terms
|
||||
are and tries to find a lookup that optimizes index use.
|
||||
"""
|
||||
penalty = 0.0 # extra penalty currently unused
|
||||
|
||||
penalty = 0.0 # extra penalty
|
||||
name_partials = self.query.get_partials_list(name)
|
||||
exp_name_count = min(t.count for t in name_partials)
|
||||
addr_partials = []
|
||||
for trange in address:
|
||||
addr_partials.extend(self.query.get_partials_list(trange))
|
||||
name_tokens = [t.token for t in name_partials]
|
||||
|
||||
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
|
||||
addr_tokens = [t.token for t in addr_partials]
|
||||
|
||||
partials_indexed = all(t.is_indexed for t in name_partials) \
|
||||
and all(t.is_indexed for t in addr_partials)
|
||||
exp_count = min(t.count for t in name_partials)
|
||||
|
||||
if (len(name_partials) > 3 or exp_name_count < 1000) and partials_indexed:
|
||||
# Lookup by name partials, use address partials to restrict results.
|
||||
lookup = [dbf.FieldLookup('name_vector',
|
||||
[t.token for t in name_partials], 'lookup_all')]
|
||||
if addr_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
yield penalty, exp_name_count, lookup
|
||||
if (len(name_partials) > 3 or exp_count < 1000) and partials_indexed:
|
||||
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
|
||||
return
|
||||
|
||||
exp_addr_count = min(t.count for t in addr_partials) if addr_partials else exp_name_count
|
||||
if exp_addr_count < 1000 and partials_indexed:
|
||||
exp_count = min(exp_count, min(t.count for t in addr_partials)) \
|
||||
if addr_partials else exp_count
|
||||
if exp_count < 1000 and partials_indexed:
|
||||
# Lookup by address partials and restrict results through name terms.
|
||||
# Give this a small penalty because lookups in the address index are
|
||||
# more expensive
|
||||
yield penalty + exp_addr_count/5000, exp_addr_count,\
|
||||
[dbf.FieldLookup('name_vector', [t.token for t in name_partials], 'restrict'),
|
||||
dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
|
||||
yield penalty + exp_count/5000, exp_count,\
|
||||
dbf.lookup_by_addr(name_tokens, addr_tokens)
|
||||
return
|
||||
|
||||
# Partial term to frequent. Try looking up by rare full names first.
|
||||
@@ -228,20 +222,17 @@ class SearchBuilder:
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
if rare_names:
|
||||
# Any of the full names applies with all of the partials from the address
|
||||
lookup = [dbf.FieldLookup('name_vector', [t.token for t in rare_names], 'lookup_any')]
|
||||
if addr_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
yield penalty, sum(t.count for t in rare_names), lookup
|
||||
yield penalty, sum(t.count for t in rare_names),\
|
||||
dbf.lookup_by_any_name([t.token for t in rare_names], addr_tokens)
|
||||
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
if min(exp_name_count, exp_addr_count) < 10000:
|
||||
if exp_count < 10000:
|
||||
if all(t.is_indexed for t in name_partials):
|
||||
lookup = [dbf.FieldLookup('name_vector',
|
||||
[t.token for t in name_partials], 'lookup_all')]
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
else:
|
||||
# we don't have the partials, try with the non-rare names
|
||||
non_rare_names = [t.token for t in name_fulls if t.count >= 1000]
|
||||
non_rare_names = [t.token for t in name_fulls if t.count >= 10000]
|
||||
if not non_rare_names:
|
||||
return
|
||||
lookup = [dbf.FieldLookup('name_vector', non_rare_names, 'lookup_any')]
|
||||
@@ -252,7 +243,7 @@ class SearchBuilder:
|
||||
# if there already was a search for all full tokens,
|
||||
# avoid this if anything has been found
|
||||
penalty += 0.25
|
||||
yield penalty, min(exp_name_count, exp_addr_count), lookup
|
||||
yield penalty, exp_count, lookup
|
||||
|
||||
|
||||
def get_name_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
|
||||
|
||||
@@ -211,3 +211,34 @@ class SearchData:
|
||||
self.rankings.append(ranking)
|
||||
else:
|
||||
self.penalty += ranking.default
|
||||
|
||||
|
||||
def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and only one of the name tokens must be present.
|
||||
Potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where address tokens are looked up via index
|
||||
and the name tokens are only used to restrict the search further.
|
||||
"""
|
||||
return [FieldLookup('name_vector', name_tokens, 'restrict'),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
|
||||
|
||||
Reference in New Issue
Block a user