mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-25 18:48:15 +00:00
simplify handling of SQL lookup code for search_name
Use function classes which can be instantiated directly.
This commit is contained in:
@@ -15,6 +15,7 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
|
||||
from nominatim.api.search.token_assignment import TokenAssignment
|
||||
import nominatim.api.search.db_search_fields as dbf
|
||||
import nominatim.api.search.db_searches as dbs
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
|
||||
|
||||
def wrap_near_search(categories: List[Tuple[str, str]],
|
||||
@@ -152,7 +153,7 @@ class SearchBuilder:
|
||||
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for r in address
|
||||
for t in self.query.get_partials_list(r)],
|
||||
'restrict')]
|
||||
lookups.Restrict)]
|
||||
penalty += 0.2
|
||||
yield dbs.PostcodeSearch(penalty, sdata)
|
||||
|
||||
@@ -162,7 +163,7 @@ class SearchBuilder:
|
||||
""" Build a simple address search for special entries where the
|
||||
housenumber is the main name token.
|
||||
"""
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
|
||||
partials = [t for trange in address
|
||||
@@ -170,16 +171,16 @@ class SearchBuilder:
|
||||
|
||||
if expected_count < 8000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t in partials], 'restrict'))
|
||||
[t.token for t in partials], lookups.Restrict))
|
||||
elif len(partials) != 1 or partials[0].count < 10000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t in partials], 'lookup_all'))
|
||||
[t.token for t in partials], lookups.LookupAll))
|
||||
else:
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)],
|
||||
'lookup_any'))
|
||||
lookups.LookupAny))
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count)
|
||||
@@ -232,16 +233,16 @@ class SearchBuilder:
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
# Any of the full names applies with all of the partials from the address
|
||||
yield penalty, fulls_count / (2**len(addr_partials)),\
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
|
||||
'restrict' if fulls_count < 10000 else 'lookup_all')
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_tokens, fulls_count > 10000)
|
||||
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
|
||||
if exp_count < 10000 and all(t.is_indexed for t in name_partials):
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
|
||||
penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
|
||||
yield penalty, exp_count, lookup
|
||||
|
||||
|
||||
@@ -7,15 +7,17 @@
|
||||
"""
|
||||
Data structures for more complex fields in abstract search descriptions.
|
||||
"""
|
||||
from typing import List, Tuple, Iterator, cast, Dict
|
||||
from typing import List, Tuple, Iterator, Dict, Type
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaFromClause, SaColumn, SaExpression
|
||||
from nominatim.api.search.query import Token
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
from nominatim.utils.json_writer import JsonWriter
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedStrings:
|
||||
""" A list of strings together with a penalty.
|
||||
@@ -152,18 +154,12 @@ class FieldLookup:
|
||||
"""
|
||||
column: str
|
||||
tokens: List[int]
|
||||
lookup_type: str
|
||||
lookup_type: Type[lookups.LookupType]
|
||||
|
||||
def sql_condition(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the given match condition.
|
||||
"""
|
||||
col = table.c[self.column]
|
||||
if self.lookup_type == 'lookup_all':
|
||||
return col.contains(self.tokens)
|
||||
if self.lookup_type == 'lookup_any':
|
||||
return cast(SaColumn, col.overlaps(self.tokens))
|
||||
|
||||
return sa.func.coalesce(sa.null(), col).contains(self.tokens) # pylint: disable=not-callable
|
||||
return self.lookup_type(table, self.column, self.tokens)
|
||||
|
||||
|
||||
class SearchData:
|
||||
@@ -229,22 +225,23 @@ def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[Fiel
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
|
||||
lookup_type: str) -> List[FieldLookup]:
|
||||
use_index_for_addr: bool) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and only one of the name tokens must be present.
|
||||
Potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
|
||||
lookups.LookupAll if use_index_for_addr else lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
@@ -253,5 +250,5 @@ def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[Field
|
||||
""" Create a lookup list where address tokens are looked up via index
|
||||
and the name tokens are only used to restrict the search further.
|
||||
"""
|
||||
return [FieldLookup('name_vector', name_tokens, 'restrict'),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
|
||||
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
|
||||
|
||||
78
nominatim/api/search/db_search_lookups.py
Normal file
78
nominatim/api/search/db_search_lookups.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of lookup functions for the search_name table.
|
||||
"""
|
||||
from typing import List, Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
from nominatim.typing import SaFromClause
|
||||
from nominatim.db.sqlalchemy_types import IntArray
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
|
||||
LookupType = sa.sql.expression.FunctionElement[Any]
|
||||
|
||||
class LookupAll(LookupType):
|
||||
""" Find all entries in search_name table that contain all of
|
||||
a given list of tokens using an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
col, tokens = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
|
||||
|
||||
class LookupAny(LookupType):
|
||||
""" Find all entries that contain at least one of the given tokens.
|
||||
Use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
col, tokens = list(element.clauses)
|
||||
return "(%s && %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
|
||||
|
||||
class Restrict(LookupType):
|
||||
""" Find all entries that contain all of the given tokens.
|
||||
Do not use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
|
||||
def _default_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
@@ -563,7 +563,6 @@ class PostcodeSearch(AbstractSearch):
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
assert self.lookups[0].lookup_type == 'restrict'
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
|
||||
Reference in New Issue
Block a user