forked from hans/Nominatim
Merge pull request #3273 from lonvia/search-with-sqlite
Add forward search capability for SQLite databases
This commit is contained in:
@@ -19,6 +19,7 @@ import sqlalchemy.ext.asyncio as sa_asyncio
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.db.sqlalchemy_schema import SearchTables
|
||||
from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
|
||||
import nominatim.db.sqlite_functions
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.status import get_status, StatusResult
|
||||
@@ -84,6 +85,14 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
|
||||
extra_args: Dict[str, Any] = {'future': True,
|
||||
'echo': self.config.get_bool('DEBUG_SQL')}
|
||||
|
||||
if self.config.get_int('API_POOL_SIZE') == 0:
|
||||
extra_args['poolclass'] = sa.pool.NullPool
|
||||
else:
|
||||
extra_args['poolclass'] = sa.pool.QueuePool
|
||||
extra_args['max_overflow'] = 0
|
||||
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
|
||||
|
||||
|
||||
is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
|
||||
|
||||
if is_sqlite:
|
||||
@@ -92,6 +101,10 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
|
||||
dburl = sa.engine.URL.create('sqlite+aiosqlite',
|
||||
database=params.get('dbname'))
|
||||
|
||||
if not ('NOMINATIM_DATABASE_RW' in self.config.environ
|
||||
and self.config.get_bool('DATABASE_RW')) \
|
||||
and not Path(params.get('dbname', '')).is_file():
|
||||
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
|
||||
else:
|
||||
dsn = self.config.get_database_params()
|
||||
query = {k: v for k, v in dsn.items()
|
||||
@@ -105,39 +118,40 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
|
||||
host=dsn.get('host'),
|
||||
port=int(dsn['port']) if 'port' in dsn else None,
|
||||
query=query)
|
||||
extra_args['max_overflow'] = 0
|
||||
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
|
||||
|
||||
engine = sa_asyncio.create_async_engine(dburl, **extra_args)
|
||||
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
result = await conn.scalar(sa.text('SHOW server_version_num'))
|
||||
server_version = int(result)
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
if is_sqlite:
|
||||
server_version = 0
|
||||
|
||||
if server_version >= 110000 and not is_sqlite:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_connect(dbapi_con: Any, _: Any) -> None:
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SET jit_above_cost TO '-1'")
|
||||
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
|
||||
# Make sure that all connections get the new settings
|
||||
await self.close()
|
||||
|
||||
if is_sqlite:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
|
||||
nominatim.db.sqlite_functions.install_custom_functions(dbapi_con)
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SELECT load_extension('mod_spatialite')")
|
||||
cursor.execute('SELECT SetDecimalPrecision(7)')
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
|
||||
else:
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
result = await conn.scalar(sa.text('SHOW server_version_num'))
|
||||
server_version = int(result)
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
server_version = 0
|
||||
|
||||
if server_version >= 110000:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_connect(dbapi_con: Any, _: Any) -> None:
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SET jit_above_cost TO '-1'")
|
||||
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
|
||||
# Make sure that all connections get the new settings
|
||||
await engine.dispose()
|
||||
|
||||
self._property_cache['DB:server_version'] = server_version
|
||||
|
||||
self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
|
||||
self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
|
||||
self._engine = engine
|
||||
|
||||
|
||||
|
||||
@@ -90,26 +90,42 @@ class BaseLogger:
|
||||
params = dict(compiled.params)
|
||||
if isinstance(extra_params, Mapping):
|
||||
for k, v in extra_params.items():
|
||||
params[k] = str(v)
|
||||
if hasattr(v, 'to_wkt'):
|
||||
params[k] = v.to_wkt()
|
||||
elif isinstance(v, (int, float)):
|
||||
params[k] = v
|
||||
else:
|
||||
params[k] = str(v)
|
||||
elif isinstance(extra_params, Sequence) and extra_params:
|
||||
for k in extra_params[0]:
|
||||
params[k] = f':{k}'
|
||||
|
||||
sqlstr = str(compiled)
|
||||
|
||||
if sa.__version__.startswith('1'):
|
||||
try:
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
|
||||
return sqlstr % tuple((repr(params.get(name, None))
|
||||
for name in compiled.positiontup)) # type: ignore
|
||||
except TypeError:
|
||||
return sqlstr
|
||||
if conn.dialect.name == 'postgresql':
|
||||
if sa.__version__.startswith('1'):
|
||||
try:
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
|
||||
return sqlstr % tuple((repr(params.get(name, None))
|
||||
for name in compiled.positiontup)) # type: ignore
|
||||
except TypeError:
|
||||
return sqlstr
|
||||
|
||||
# Fixes an odd issue with Python 3.7 where percentages are not
|
||||
# quoted correctly.
|
||||
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
|
||||
return sqlstr % params
|
||||
# Fixes an odd issue with Python 3.7 where percentages are not
|
||||
# quoted correctly.
|
||||
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
|
||||
return sqlstr % params
|
||||
|
||||
assert conn.dialect.name == 'sqlite'
|
||||
|
||||
# params in positional order
|
||||
pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
|
||||
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
|
||||
sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
|
||||
|
||||
return sqlstr
|
||||
|
||||
class HTMLLogger(BaseLogger):
|
||||
""" Logger that formats messages in HTML.
|
||||
|
||||
@@ -180,7 +180,7 @@ class ReverseGeocoder:
|
||||
diststr = sa.text(f"{distance}")
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
|
||||
.where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, diststr))
|
||||
.where(t.c.indexed_status == 0)
|
||||
.where(t.c.linked_place_id == None)
|
||||
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
|
||||
@@ -219,7 +219,7 @@ class ReverseGeocoder:
|
||||
t = self.conn.t.placex
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
|
||||
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, 0.001))
|
||||
.where(t.c.parent_place_id == parent_place_id)
|
||||
.where(sa.func.IsAddressPoint(t))
|
||||
.where(t.c.indexed_status == 0)
|
||||
@@ -241,7 +241,7 @@ class ReverseGeocoder:
|
||||
sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))
|
||||
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, distance))
|
||||
.where(t.c.startnumber != None)
|
||||
.order_by('distance')
|
||||
.limit(1))
|
||||
@@ -275,7 +275,7 @@ class ReverseGeocoder:
|
||||
inner = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.order_by('distance')\
|
||||
.limit(1)\
|
||||
|
||||
@@ -15,6 +15,7 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
|
||||
from nominatim.api.search.token_assignment import TokenAssignment
|
||||
import nominatim.api.search.db_search_fields as dbf
|
||||
import nominatim.api.search.db_searches as dbs
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
|
||||
|
||||
def wrap_near_search(categories: List[Tuple[str, str]],
|
||||
@@ -152,7 +153,7 @@ class SearchBuilder:
|
||||
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for r in address
|
||||
for t in self.query.get_partials_list(r)],
|
||||
'restrict')]
|
||||
lookups.Restrict)]
|
||||
penalty += 0.2
|
||||
yield dbs.PostcodeSearch(penalty, sdata)
|
||||
|
||||
@@ -162,7 +163,7 @@ class SearchBuilder:
|
||||
""" Build a simple address search for special entries where the
|
||||
housenumber is the main name token.
|
||||
"""
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
|
||||
partials = [t for trange in address
|
||||
@@ -170,16 +171,16 @@ class SearchBuilder:
|
||||
|
||||
if expected_count < 8000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t in partials], 'restrict'))
|
||||
[t.token for t in partials], lookups.Restrict))
|
||||
elif len(partials) != 1 or partials[0].count < 10000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t in partials], 'lookup_all'))
|
||||
[t.token for t in partials], lookups.LookupAll))
|
||||
else:
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)],
|
||||
'lookup_any'))
|
||||
lookups.LookupAny))
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count)
|
||||
@@ -232,16 +233,16 @@ class SearchBuilder:
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
# Any of the full names applies with all of the partials from the address
|
||||
yield penalty, fulls_count / (2**len(addr_partials)),\
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
|
||||
'restrict' if fulls_count < 10000 else 'lookup_all')
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_tokens, fulls_count > 10000)
|
||||
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
|
||||
if exp_count < 10000 and all(t.is_indexed for t in name_partials):
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
|
||||
penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
|
||||
yield penalty, exp_count, lookup
|
||||
|
||||
|
||||
@@ -7,14 +7,16 @@
|
||||
"""
|
||||
Data structures for more complex fields in abstract search descriptions.
|
||||
"""
|
||||
from typing import List, Tuple, Iterator, cast, Dict
|
||||
from typing import List, Tuple, Iterator, Dict, Type
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
|
||||
from nominatim.typing import SaFromClause, SaColumn, SaExpression
|
||||
from nominatim.api.search.query import Token
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
from nominatim.utils.json_writer import JsonWriter
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedStrings:
|
||||
@@ -129,11 +131,17 @@ class FieldRanking:
|
||||
"""
|
||||
assert self.rankings
|
||||
|
||||
return sa.func.weigh_search(table.c[self.column],
|
||||
[f"{{{','.join((str(s) for s in r.tokens))}}}"
|
||||
for r in self.rankings],
|
||||
[r.penalty for r in self.rankings],
|
||||
self.default)
|
||||
rout = JsonWriter().start_array()
|
||||
for rank in self.rankings:
|
||||
rout.start_array().value(rank.penalty).next()
|
||||
rout.start_array()
|
||||
for token in rank.tokens:
|
||||
rout.value(token).next()
|
||||
rout.end_array()
|
||||
rout.end_array().next()
|
||||
rout.end_array()
|
||||
|
||||
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -146,19 +154,12 @@ class FieldLookup:
|
||||
"""
|
||||
column: str
|
||||
tokens: List[int]
|
||||
lookup_type: str
|
||||
lookup_type: Type[lookups.LookupType]
|
||||
|
||||
def sql_condition(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the given match condition.
|
||||
"""
|
||||
col = table.c[self.column]
|
||||
if self.lookup_type == 'lookup_all':
|
||||
return col.contains(self.tokens)
|
||||
if self.lookup_type == 'lookup_any':
|
||||
return cast(SaColumn, col.overlap(self.tokens))
|
||||
|
||||
return sa.func.array_cat(col, sa.text('ARRAY[]::integer[]'),
|
||||
type_=ARRAY(sa.Integer())).contains(self.tokens)
|
||||
return self.lookup_type(table, self.column, self.tokens)
|
||||
|
||||
|
||||
class SearchData:
|
||||
@@ -224,22 +225,23 @@ def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[Fiel
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
|
||||
lookup_type: str) -> List[FieldLookup]:
|
||||
use_index_for_addr: bool) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and only one of the name tokens must be present.
|
||||
Potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
|
||||
lookups.LookupAll if use_index_for_addr else lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
@@ -248,5 +250,5 @@ def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[Field
|
||||
""" Create a lookup list where address tokens are looked up via index
|
||||
and the name tokens are only used to restrict the search further.
|
||||
"""
|
||||
return [FieldLookup('name_vector', name_tokens, 'restrict'),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
|
||||
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
|
||||
|
||||
114
nominatim/api/search/db_search_lookups.py
Normal file
114
nominatim/api/search/db_search_lookups.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of lookup functions for the search_name table.
|
||||
"""
|
||||
from typing import List, Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
from nominatim.typing import SaFromClause
|
||||
from nominatim.db.sqlalchemy_types import IntArray
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
|
||||
LookupType = sa.sql.expression.FunctionElement[Any]
|
||||
|
||||
class LookupAll(LookupType):
|
||||
""" Find all entries in search_name table that contain all of
|
||||
a given list of tokens using an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
|
||||
@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, col, colname, tokens = list(element.clauses)
|
||||
return "(%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_intersect_fuzzy(places) as p FROM"\
|
||||
" (SELECT places FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s"\
|
||||
" ORDER BY length(places)) as x) as u,"\
|
||||
" json_each('[' || u.p || ']'))"\
|
||||
" AND array_contains(%s, %s))"\
|
||||
% (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw),
|
||||
compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw)
|
||||
)
|
||||
|
||||
|
||||
|
||||
class LookupAny(LookupType):
|
||||
""" Find all entries that contain at least one of the given tokens.
|
||||
Use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s && %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, _, colname, tokens = list(element.clauses)
|
||||
return "%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_union(places) as p FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s) as u,"\
|
||||
" json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw))
|
||||
|
||||
|
||||
|
||||
class Restrict(LookupType):
|
||||
""" Find all entries that contain all of the given tokens.
|
||||
Do not use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
|
||||
def _default_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
|
||||
@@ -11,7 +11,6 @@ from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
|
||||
import abc
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import ARRAY, array_agg
|
||||
|
||||
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
|
||||
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
|
||||
@@ -19,7 +18,7 @@ from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
|
||||
import nominatim.api.results as nres
|
||||
from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
from nominatim.db.sqlalchemy_types import Geometry, IntArray
|
||||
|
||||
#pylint: disable=singleton-comparison,not-callable
|
||||
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
|
||||
@@ -55,12 +54,29 @@ NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
|
||||
|
||||
def filter_by_area(sql: SaSelect, t: SaFromClause,
|
||||
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
|
||||
""" Apply SQL statements for filtering by viewbox and near point,
|
||||
if applicable.
|
||||
"""
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1 and not avoid_index:
|
||||
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
|
||||
use_index=not avoid_index and
|
||||
details.viewbox.area < 0.2))
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
|
||||
|
||||
|
||||
def _select_placex(t: SaFromClause) -> SaSelect:
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
@@ -93,7 +109,7 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet
|
||||
|
||||
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
|
||||
all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
|
||||
all_ids = sa.func.ArrayAgg(table.c.place_id)
|
||||
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
|
||||
|
||||
if len(numerals) == 1:
|
||||
@@ -117,9 +133,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 30))
|
||||
elif layers & DataLayer.ADDRESS:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 29))
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
sa.or_(table.c.housenumber != None,
|
||||
table.c.address.has_key('addr:housename'))))
|
||||
orexpr.append(sa.func.IsAddressPoint(table))
|
||||
elif layers & DataLayer.POI:
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
table.c.class_.not_in(('place', 'building'))))
|
||||
@@ -171,12 +185,21 @@ async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
yield result
|
||||
|
||||
|
||||
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
|
||||
""" Create a subselect that returns the given list of integers
|
||||
as rows in the column 'nr'.
|
||||
"""
|
||||
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
|
||||
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
|
||||
|
||||
|
||||
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.osmline
|
||||
values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
|
||||
.data([(n,) for n in numerals])
|
||||
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id,
|
||||
t.c.parent_place_id, t.c.address,
|
||||
values.c.nr.label('housenumber'),
|
||||
@@ -199,8 +222,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int], osm_id: int,
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.tiger
|
||||
values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
|
||||
.data([(n,) for n in numerals])
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
sa.literal('W').label('osm_type'),
|
||||
sa.literal(osm_id).label('osm_id'),
|
||||
@@ -295,7 +317,7 @@ class NearSearch(AbstractSearch):
|
||||
|
||||
if table is None:
|
||||
# No classtype table available, do a simplified lookup in placex.
|
||||
table = conn.t.placex.alias('inner')
|
||||
table = conn.t.placex
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
@@ -366,7 +388,7 @@ class PoiSearch(AbstractSearch):
|
||||
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))\
|
||||
.where(t.c.linked_place_id == None) \
|
||||
.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
|
||||
.limit(LIMIT_PARAM)
|
||||
|
||||
@@ -403,8 +425,8 @@ class PoiSearch(AbstractSearch):
|
||||
|
||||
if details.near and details.near_radius is not None:
|
||||
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
|
||||
.where(table.c.centroid.ST_DWithin(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
.where(table.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
@@ -449,11 +471,7 @@ class CountrySearch(AbstractSearch):
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
sql = sql.where(_within_near(t))
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
@@ -486,18 +504,12 @@ class CountrySearch(AbstractSearch):
|
||||
.where(tgrid.c.country_code.in_(self.countries.values))\
|
||||
.group_by(tgrid.c.country_code)
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
sql = sql.where(_within_near(tgrid))
|
||||
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
|
||||
|
||||
sub = sql.subquery('grid')
|
||||
|
||||
sql = sa.select(t.c.country_code,
|
||||
(t.c.name
|
||||
+ sa.func.coalesce(t.c.derived_name,
|
||||
sa.cast('', type_=conn.t.types.Composite))
|
||||
).label('name'),
|
||||
t.c.name.merge(t.c.derived_name).label('name'),
|
||||
sub.c.centroid, sub.c.bbox)\
|
||||
.join(sub, t.c.country_code == sub.c.country_code)
|
||||
|
||||
@@ -545,19 +557,16 @@ class PostcodeSearch(AbstractSearch):
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
else:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
if details.near_radius is not None:
|
||||
sql = sql.where(_within_near(t))
|
||||
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
@@ -566,13 +575,11 @@ class PostcodeSearch(AbstractSearch):
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
assert self.lookups[0].lookup_type == 'restrict'
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where(sa.func.array_cat(tsearch.c.name_vector,
|
||||
tsearch.c.nameaddress_vector,
|
||||
type_=ARRAY(sa.Integer))
|
||||
.contains(self.lookups[0].tokens))
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
.contains(sa.type_coerce(self.lookups[0].tokens,
|
||||
IntArray)))
|
||||
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(conn.t.search_name)
|
||||
@@ -637,11 +644,11 @@ class PlaceSearch(AbstractSearch):
|
||||
sql = sql.where(tsearch.c.address_rank > 9)
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
if self.expected_count > 1000:
|
||||
if self.expected_count > 5000:
|
||||
# Many results expected. Restrict by postcode.
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(pcs))
|
||||
.where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
|
||||
.where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
|
||||
.exists())
|
||||
|
||||
# Less results, only have a preference for close postcodes
|
||||
@@ -653,27 +660,26 @@ class PlaceSearch(AbstractSearch):
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
if details.viewbox.area < 0.2:
|
||||
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
elif self.expected_count >= 10000:
|
||||
if details.viewbox.area < 0.5:
|
||||
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.intersects(VIEWBOX2_PARAM,
|
||||
use_index=details.viewbox.area < 0.5))
|
||||
else:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
if details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
@@ -692,10 +698,10 @@ class PlaceSearch(AbstractSearch):
|
||||
sql = sql.order_by(sa.text('accuracy'))
|
||||
|
||||
if self.housenumbers:
|
||||
hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
|
||||
hnr_list = '|'.join(self.housenumbers.values)
|
||||
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
|
||||
.where(sa.or_(tsearch.c.address_rank < 30,
|
||||
t.c.housenumber.op('~*')(hnr_regexp)))
|
||||
sa.func.RegexpWord(hnr_list, t.c.housenumber)))
|
||||
|
||||
# Cross check for housenumbers, need to do that on a rather large
|
||||
# set. Worst case there are 40.000 main streets in OSM.
|
||||
@@ -703,10 +709,10 @@ class PlaceSearch(AbstractSearch):
|
||||
|
||||
# Housenumbers from placex
|
||||
thnr = conn.t.placex.alias('hnr')
|
||||
pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
|
||||
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
|
||||
place_sql = sa.select(pid_list)\
|
||||
.where(thnr.c.parent_place_id == inner.c.place_id)\
|
||||
.where(thnr.c.housenumber.op('~*')(hnr_regexp))\
|
||||
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
|
||||
.where(thnr.c.linked_place_id == None)\
|
||||
.where(thnr.c.indexed_status == 0)
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.search import query as qmod
|
||||
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||
from nominatim.db.sqlalchemy_types import Json
|
||||
|
||||
|
||||
DB_TO_TOKEN_TYPE = {
|
||||
@@ -159,7 +160,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('info', self.conn.t.types.Json))
|
||||
sa.Column('info', Json))
|
||||
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
|
||||
@@ -76,7 +76,7 @@ class ConvertDB:
|
||||
group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for reverse and lookup API'
|
||||
' (default: enabled)')
|
||||
group.add_argument('--search', action=WithAction, dest_set=self.options, default=False,
|
||||
group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for search API (default: disabled)')
|
||||
group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for details API (default: enabled)')
|
||||
|
||||
@@ -29,7 +29,7 @@ class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
|
||||
@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
|
||||
def _default_intersects(element: SaColumn,
|
||||
def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
|
||||
" AND placex.rank_address between 4 and 25"
|
||||
@@ -40,7 +40,7 @@ def _default_intersects(element: SaColumn,
|
||||
|
||||
|
||||
@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_intersects(element: SaColumn,
|
||||
def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
|
||||
" AND placex.rank_address between 4 and 25"
|
||||
@@ -61,7 +61,7 @@ class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
|
||||
@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
|
||||
def default_reverse_place_diameter(element: SaColumn,
|
||||
def default_reverse_place_diameter(element: IntersectsReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
table = element.tablename
|
||||
return f"({table}.rank_address between 4 and 25"\
|
||||
@@ -74,7 +74,7 @@ def default_reverse_place_diameter(element: SaColumn,
|
||||
|
||||
|
||||
@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_reverse_place_diameter(element: SaColumn,
|
||||
def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
geom1, rank, geom2 = list(element.clauses)
|
||||
table = element.tablename
|
||||
@@ -102,7 +102,7 @@ class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
|
||||
@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
|
||||
def default_is_below_reverse_distance(element: SaColumn,
|
||||
def default_is_below_reverse_distance(element: IsBelowReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
dist, rank = list(element.clauses)
|
||||
return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
|
||||
@@ -110,25 +110,13 @@ def default_is_below_reverse_distance(element: SaColumn,
|
||||
|
||||
|
||||
@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_is_below_reverse_distance(element: SaColumn,
|
||||
def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
dist, rank = list(element.clauses)
|
||||
return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
|
||||
compiler.process(rank, **kw))
|
||||
|
||||
|
||||
def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
|
||||
""" Create an expression with the necessary conditions over a placex
|
||||
table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
|
||||
can be used.
|
||||
"""
|
||||
return sa.text(f"{table}.rank_address between 4 and 25"
|
||||
f" AND {table}.type != 'postcode'"
|
||||
f" AND {table}.name is not null"
|
||||
f" AND {table}.linked_place_id is null"
|
||||
f" AND {table}.osm_type = 'N'")
|
||||
|
||||
|
||||
class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IsAddressPoint'
|
||||
inherit_cache = True
|
||||
@@ -139,7 +127,7 @@ class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
|
||||
@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
|
||||
def default_is_address_point(element: SaColumn,
|
||||
def default_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
|
||||
@@ -149,7 +137,7 @@ def default_is_address_point(element: SaColumn,
|
||||
|
||||
|
||||
@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_is_address_point(element: SaColumn,
|
||||
def sqlite_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
|
||||
@@ -166,7 +154,7 @@ class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
|
||||
inherit_cache = True
|
||||
|
||||
@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
|
||||
def compile_crosscheck_names(element: SaColumn,
|
||||
def compile_crosscheck_names(element: CrosscheckNames,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
|
||||
@@ -174,7 +162,7 @@ def compile_crosscheck_names(element: SaColumn,
|
||||
|
||||
|
||||
@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def compile_sqlite_crosscheck_names(element: SaColumn,
|
||||
def compile_sqlite_crosscheck_names(element: CrosscheckNames,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "EXISTS(SELECT *"\
|
||||
@@ -191,15 +179,16 @@ class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
|
||||
@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
|
||||
def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "json_each(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class Greatest(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Function to compute maximum of all its input parameters.
|
||||
"""
|
||||
@@ -208,5 +197,25 @@ class Greatest(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
|
||||
@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_greatest(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "max(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class RegexpWord(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check if a full word is in a given string.
|
||||
"""
|
||||
name = 'RegexpWord'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
|
||||
def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s ~* ('\\m(' || %s || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
|
||||
|
||||
|
||||
@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "regexp('\\b(' || %s || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
@@ -7,37 +7,10 @@
|
||||
"""
|
||||
SQLAlchemy definitions for all tables used by the frontend.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB, array
|
||||
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
|
||||
|
||||
import nominatim.db.sqlalchemy_functions #pylint: disable=unused-import
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
|
||||
class PostgresTypes:
|
||||
""" Type definitions for complex types as used in Postgres variants.
|
||||
"""
|
||||
Composite = HSTORE
|
||||
Json = JSONB
|
||||
IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
|
||||
to_array = array
|
||||
|
||||
|
||||
class SqliteTypes:
|
||||
""" Type definitions for complex types as used in Postgres variants.
|
||||
"""
|
||||
Composite = sqlite_json
|
||||
Json = sqlite_json
|
||||
IntArray = sqlite_json
|
||||
|
||||
@staticmethod
|
||||
def to_array(arr: Any) -> Any:
|
||||
""" Sqlite has no special conversion for arrays.
|
||||
"""
|
||||
return arr
|
||||
|
||||
from nominatim.db.sqlalchemy_types import Geometry, KeyValueStore, IntArray
|
||||
|
||||
#pylint: disable=too-many-instance-attributes
|
||||
class SearchTables:
|
||||
@@ -47,14 +20,7 @@ class SearchTables:
|
||||
Any data used for updates only will not be visible.
|
||||
"""
|
||||
|
||||
def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
|
||||
if engine_name == 'postgresql':
|
||||
self.types: Any = PostgresTypes
|
||||
elif engine_name == 'sqlite':
|
||||
self.types = SqliteTypes
|
||||
else:
|
||||
raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.")
|
||||
|
||||
def __init__(self, meta: sa.MetaData) -> None:
|
||||
self.meta = meta
|
||||
|
||||
self.import_status = sa.Table('import_status', meta,
|
||||
@@ -80,9 +46,9 @@ class SearchTables:
|
||||
sa.Column('class', sa.Text, nullable=False, key='class_'),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('admin_level', sa.SmallInteger),
|
||||
sa.Column('name', self.types.Composite),
|
||||
sa.Column('address', self.types.Composite),
|
||||
sa.Column('extratags', self.types.Composite),
|
||||
sa.Column('name', KeyValueStore),
|
||||
sa.Column('address', KeyValueStore),
|
||||
sa.Column('extratags', KeyValueStore),
|
||||
sa.Column('geometry', Geometry, nullable=False),
|
||||
sa.Column('wikipedia', sa.Text),
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
@@ -118,14 +84,14 @@ class SearchTables:
|
||||
sa.Column('step', sa.SmallInteger),
|
||||
sa.Column('indexed_status', sa.SmallInteger),
|
||||
sa.Column('linegeo', Geometry),
|
||||
sa.Column('address', self.types.Composite),
|
||||
sa.Column('address', KeyValueStore),
|
||||
sa.Column('postcode', sa.Text),
|
||||
sa.Column('country_code', sa.String(2)))
|
||||
|
||||
self.country_name = sa.Table('country_name', meta,
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
sa.Column('name', self.types.Composite),
|
||||
sa.Column('derived_name', self.types.Composite),
|
||||
sa.Column('name', KeyValueStore),
|
||||
sa.Column('derived_name', KeyValueStore),
|
||||
sa.Column('partition', sa.Integer))
|
||||
|
||||
self.country_grid = sa.Table('country_osm_grid', meta,
|
||||
@@ -139,8 +105,8 @@ class SearchTables:
|
||||
sa.Column('importance', sa.Float),
|
||||
sa.Column('search_rank', sa.SmallInteger),
|
||||
sa.Column('address_rank', sa.SmallInteger),
|
||||
sa.Column('name_vector', self.types.IntArray),
|
||||
sa.Column('nameaddress_vector', self.types.IntArray),
|
||||
sa.Column('name_vector', IntArray),
|
||||
sa.Column('nameaddress_vector', IntArray),
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
|
||||
17
nominatim/db/sqlalchemy_types/__init__.py
Normal file
17
nominatim/db/sqlalchemy_types/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Module with custom types for SQLAlchemy
|
||||
"""
|
||||
|
||||
# See also https://github.com/PyCQA/pylint/issues/6006
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .geometry import (Geometry as Geometry)
|
||||
from .int_array import (IntArray as IntArray)
|
||||
from .key_value import (KeyValueStore as KeyValueStore)
|
||||
from .json import (Json as Json)
|
||||
@@ -28,7 +28,7 @@ class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):
|
||||
|
||||
|
||||
@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
|
||||
def _default_distance_spheroid(element: SaColumn,
|
||||
def _default_distance_spheroid(element: Geometry_DistanceSpheroid,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_DistanceSpheroid(%s,"\
|
||||
" 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
|
||||
@@ -36,7 +36,7 @@ def _default_distance_spheroid(element: SaColumn,
|
||||
|
||||
|
||||
@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _spatialite_distance_spheroid(element: SaColumn,
|
||||
def _spatialite_distance_spheroid(element: Geometry_DistanceSpheroid,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
@@ -49,14 +49,14 @@ class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):
|
||||
|
||||
|
||||
@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
|
||||
def _default_is_line_like(element: SaColumn,
|
||||
def _default_is_line_like(element: Geometry_IsLineLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_is_line_like(element: SaColumn,
|
||||
def _sqlite_is_line_like(element: Geometry_IsLineLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
@@ -70,14 +70,14 @@ class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):
|
||||
|
||||
|
||||
@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
|
||||
def _default_is_area_like(element: SaColumn,
|
||||
def _default_is_area_like(element: Geometry_IsAreaLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_is_area_like(element: SaColumn,
|
||||
def _sqlite_is_area_like(element: Geometry_IsAreaLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
@@ -91,14 +91,14 @@ class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):
|
||||
|
||||
|
||||
@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
|
||||
def _default_intersects(element: SaColumn,
|
||||
def _default_intersects(element: Geometry_IntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_intersects(element: SaColumn,
|
||||
def _sqlite_intersects(element: Geometry_IntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)
|
||||
|
||||
@@ -114,14 +114,14 @@ class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):
|
||||
|
||||
|
||||
@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
|
||||
def default_intersects_column(element: SaColumn,
|
||||
def default_intersects_column(element: Geometry_ColumnIntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def spatialite_intersects_column(element: SaColumn,
|
||||
def spatialite_intersects_column(element: Geometry_ColumnIntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "MbrIntersects(%s, %s) = 1 and "\
|
||||
@@ -145,12 +145,12 @@ class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):
|
||||
|
||||
|
||||
@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
|
||||
def default_dwithin_column(element: SaColumn,
|
||||
def default_dwithin_column(element: Geometry_ColumnDWithin,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def spatialite_dwithin_column(element: SaColumn,
|
||||
def spatialite_dwithin_column(element: Geometry_ColumnDWithin,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
geom1, geom2, dist = list(element.clauses)
|
||||
return "ST_Distance(%s, %s) < %s and "\
|
||||
@@ -165,7 +165,6 @@ def spatialite_dwithin_column(element: SaColumn,
|
||||
compiler.process(dist, **kw))
|
||||
|
||||
|
||||
|
||||
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
|
||||
""" Simplified type decorator for PostGIS geometry. This type
|
||||
only supports geometries in 4326 projection.
|
||||
@@ -206,7 +205,10 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
|
||||
|
||||
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def intersects(self, other: SaColumn) -> 'sa.Operators':
|
||||
def intersects(self, other: SaColumn, use_index: bool = True) -> 'sa.Operators':
|
||||
if not use_index:
|
||||
return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self.expr), other)
|
||||
|
||||
if isinstance(self.expr, sa.Column):
|
||||
return Geometry_ColumnIntersectsBbox(self.expr, other)
|
||||
|
||||
@@ -221,20 +223,11 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
|
||||
return Geometry_IsAreaLike(self)
|
||||
|
||||
|
||||
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
|
||||
def within_distance(self, other: SaColumn, distance: SaColumn) -> SaColumn:
|
||||
if isinstance(self.expr, sa.Column):
|
||||
return Geometry_ColumnDWithin(self.expr, other, distance)
|
||||
|
||||
return sa.func.ST_DWithin(self.expr, other, distance)
|
||||
|
||||
|
||||
def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
|
||||
other, distance)
|
||||
|
||||
|
||||
def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
|
||||
return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self), other)
|
||||
return self.ST_Distance(other) < distance
|
||||
|
||||
|
||||
def ST_Distance(self, other: SaColumn) -> SaColumn:
|
||||
@@ -313,18 +306,3 @@ def _add_function_alias(func: str, ftype: type, alias: str) -> None:
|
||||
|
||||
for alias in SQLITE_FUNCTION_ALIAS:
|
||||
_add_function_alias(*alias)
|
||||
|
||||
|
||||
class ST_DWithin(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'ST_DWithin'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ST_DWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
geom1, geom2, dist = list(element.clauses)
|
||||
return "(MbrIntersects(%s, ST_Expand(%s, %s)) = 1 AND ST_Distance(%s, %s) <= %s)" % (
|
||||
compiler.process(geom1, **kw), compiler.process(geom2, **kw),
|
||||
compiler.process(dist, **kw),
|
||||
compiler.process(geom1, **kw), compiler.process(geom2, **kw),
|
||||
compiler.process(dist, **kw))
|
||||
123
nominatim/db/sqlalchemy_types/int_array.py
Normal file
123
nominatim/db/sqlalchemy_types/int_array.py
Normal file
@@ -0,0 +1,123 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom type for an array of integers.
|
||||
"""
|
||||
from typing import Any, List, cast, Optional
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
|
||||
from nominatim.typing import SaDialect, SaColumn
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class IntList(sa.types.TypeDecorator[Any]):
|
||||
""" A list of integers saved as a text of comma-separated numbers.
|
||||
"""
|
||||
impl = sa.types.Unicode
|
||||
cache_ok = True
|
||||
|
||||
def process_bind_param(self, value: Optional[Any], dialect: 'sa.Dialect') -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
assert isinstance(value, list)
|
||||
return ','.join(map(str, value))
|
||||
|
||||
def process_result_value(self, value: Optional[Any],
|
||||
dialect: SaDialect) -> Optional[List[int]]:
|
||||
return [int(v) for v in value.split(',')] if value is not None else None
|
||||
|
||||
def copy(self, **kw: Any) -> 'IntList':
|
||||
return IntList(self.impl.length)
|
||||
|
||||
|
||||
class IntArray(sa.types.TypeDecorator[Any]):
|
||||
""" Dialect-independent list of integers.
|
||||
"""
|
||||
impl = IntList
|
||||
cache_ok = True
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return ARRAY(sa.Integer()) #pylint: disable=invalid-name
|
||||
|
||||
return IntList()
|
||||
|
||||
|
||||
class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def __add__(self, other: SaColumn) -> 'sa.ColumnOperators':
|
||||
""" Concate the array with the given array. If one of the
|
||||
operants is null, the value of the other will be returned.
|
||||
"""
|
||||
return ArrayCat(self.expr, other)
|
||||
|
||||
|
||||
def contains(self, other: SaColumn, **kwargs: Any) -> 'sa.ColumnOperators':
|
||||
""" Return true if the array contains all the value of the argument
|
||||
array.
|
||||
"""
|
||||
return ArrayContains(self.expr, other)
|
||||
|
||||
|
||||
|
||||
class ArrayAgg(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Aggregate function to collect elements in an array.
|
||||
"""
|
||||
type = IntArray()
|
||||
identifier = 'ArrayAgg'
|
||||
name = 'array_agg'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ArrayAgg, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_array_agg(element: ArrayAgg, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "group_concat(%s, ',')" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class ArrayContains(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Function to check if an array is fully contained in another.
|
||||
"""
|
||||
name = 'ArrayContains'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ArrayContains) # type: ignore[no-untyped-call, misc]
|
||||
def generic_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(ArrayContains, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class ArrayCat(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Function to check if an array is fully contained in another.
|
||||
"""
|
||||
type = IntArray()
|
||||
identifier = 'ArrayCat'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ArrayCat) # type: ignore[no-untyped-call, misc]
|
||||
def generic_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_cat(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(ArrayCat, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(%s || ',' || %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
30
nominatim/db/sqlalchemy_types/json.py
Normal file
30
nominatim/db/sqlalchemy_types/json.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Common json type for different dialects.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
|
||||
|
||||
from nominatim.typing import SaDialect
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class Json(sa.types.TypeDecorator[Any]):
|
||||
""" Dialect-independent type for JSON.
|
||||
"""
|
||||
impl = sa.types.JSON
|
||||
cache_ok = True
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return JSONB(none_as_null=True) # type: ignore[no-untyped-call]
|
||||
|
||||
return sqlite_json(none_as_null=True)
|
||||
62
nominatim/db/sqlalchemy_types/key_value.py
Normal file
62
nominatim/db/sqlalchemy_types/key_value.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
A custom type that implements a simple key-value store of strings.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
from sqlalchemy.dialects.postgresql import HSTORE
|
||||
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
|
||||
|
||||
from nominatim.typing import SaDialect, SaColumn
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class KeyValueStore(sa.types.TypeDecorator[Any]):
|
||||
""" Dialect-independent type of a simple key-value store of strings.
|
||||
"""
|
||||
impl = HSTORE
|
||||
cache_ok = True
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return HSTORE() # type: ignore[no-untyped-call]
|
||||
|
||||
return sqlite_json(none_as_null=True)
|
||||
|
||||
|
||||
class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def merge(self, other: SaColumn) -> 'sa.Operators':
|
||||
""" Merge the values from the given KeyValueStore into this
|
||||
one, overwriting values where necessary. When the argument
|
||||
is null, nothing happens.
|
||||
"""
|
||||
return KeyValueConcat(self.expr, other)
|
||||
|
||||
|
||||
class KeyValueConcat(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Return the merged key-value store from the input parameters.
|
||||
"""
|
||||
type = KeyValueStore()
|
||||
name = 'JsonConcat'
|
||||
inherit_cache = True
|
||||
|
||||
@compiles(KeyValueConcat) # type: ignore[no-untyped-call, misc]
|
||||
def default_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(%s || coalesce(%s, ''::hstore))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
@compiles(KeyValueConcat, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "json_patch(%s, coalesce(%s, '{}'))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
|
||||
122
nominatim/db/sqlite_functions.py
Normal file
122
nominatim/db/sqlite_functions.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom functions for SQLite.
|
||||
"""
|
||||
from typing import cast, Optional, Set, Any
|
||||
import json
|
||||
|
||||
# pylint: disable=protected-access
|
||||
|
||||
def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
|
||||
""" Custom weight function for search results.
|
||||
"""
|
||||
if search_vector is not None:
|
||||
svec = [int(x) for x in search_vector.split(',')]
|
||||
for rank in json.loads(rankings):
|
||||
if all(r in svec for r in rank[1]):
|
||||
return cast(float, rank[0])
|
||||
|
||||
return default
|
||||
|
||||
|
||||
class ArrayIntersectFuzzy:
|
||||
""" Compute the array of common elements of all input integer arrays.
|
||||
Very large input paramenters may be ignored to speed up
|
||||
computation. Therefore, the result is a superset of common elements.
|
||||
|
||||
Input and output arrays are given as comma-separated lists.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.first = ''
|
||||
self.values: Optional[Set[int]] = None
|
||||
|
||||
def step(self, value: Optional[str]) -> None:
|
||||
""" Add the next array to the intersection.
|
||||
"""
|
||||
if value is not None:
|
||||
if not self.first:
|
||||
self.first = value
|
||||
elif len(value) < 10000000:
|
||||
if self.values is None:
|
||||
self.values = {int(x) for x in self.first.split(',')}
|
||||
self.values.intersection_update((int(x) for x in value.split(',')))
|
||||
|
||||
def finalize(self) -> str:
|
||||
""" Return the final result.
|
||||
"""
|
||||
if self.values is not None:
|
||||
return ','.join(map(str, self.values))
|
||||
|
||||
return self.first
|
||||
|
||||
|
||||
class ArrayUnion:
|
||||
""" Compute the set of all elements of the input integer arrays.
|
||||
|
||||
Input and output arrays are given as strings of comma-separated lists.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.values: Optional[Set[str]] = None
|
||||
|
||||
def step(self, value: Optional[str]) -> None:
|
||||
""" Add the next array to the union.
|
||||
"""
|
||||
if value is not None:
|
||||
if self.values is None:
|
||||
self.values = set(value.split(','))
|
||||
else:
|
||||
self.values.update(value.split(','))
|
||||
|
||||
def finalize(self) -> str:
|
||||
""" Return the final result.
|
||||
"""
|
||||
return '' if self.values is None else ','.join(self.values)
|
||||
|
||||
|
||||
def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
|
||||
""" Is the array 'containee' completely contained in array 'container'.
|
||||
"""
|
||||
if container is None or containee is None:
|
||||
return None
|
||||
|
||||
vset = container.split(',')
|
||||
return all(v in vset for v in containee.split(','))
|
||||
|
||||
|
||||
def array_pair_contains(container1: Optional[str], container2: Optional[str],
|
||||
containee: Optional[str]) -> Optional[bool]:
|
||||
""" Is the array 'containee' completely contained in the union of
|
||||
array 'container1' and array 'container2'.
|
||||
"""
|
||||
if container1 is None or container2 is None or containee is None:
|
||||
return None
|
||||
|
||||
vset = container1.split(',') + container2.split(',')
|
||||
return all(v in vset for v in containee.split(','))
|
||||
|
||||
|
||||
def install_custom_functions(conn: Any) -> None:
|
||||
""" Install helper functions for Nominatim into the given SQLite
|
||||
database connection.
|
||||
"""
|
||||
conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
|
||||
conn.create_function('array_contains', 2, array_contains, deterministic=True)
|
||||
conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
|
||||
_create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
|
||||
_create_aggregate(conn, 'array_union', 1, ArrayUnion)
|
||||
|
||||
|
||||
async def _make_aggregate(aioconn: Any, *args: Any) -> None:
|
||||
await aioconn._execute(aioconn._conn.create_aggregate, *args)
|
||||
|
||||
|
||||
def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
|
||||
try:
|
||||
conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
|
||||
except Exception as error: # pylint: disable=broad-exception-caught
|
||||
conn._handle_exception(error)
|
||||
@@ -14,7 +14,8 @@ from pathlib import Path
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaSelect
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
from nominatim.db.sqlalchemy_types import Geometry, IntArray
|
||||
from nominatim.api.search.query_analyzer_factory import make_query_analyzer
|
||||
import nominatim.api as napi
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@@ -27,11 +28,15 @@ async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:
|
||||
|
||||
try:
|
||||
outapi = napi.NominatimAPIAsync(project_dir,
|
||||
{'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}"})
|
||||
{'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}",
|
||||
'NOMINATIM_DATABASE_RW': '1'})
|
||||
|
||||
async with api.begin() as src, outapi.begin() as dest:
|
||||
writer = SqliteWriter(src, dest, options)
|
||||
await writer.write()
|
||||
try:
|
||||
async with api.begin() as src, outapi.begin() as dest:
|
||||
writer = SqliteWriter(src, dest, options)
|
||||
await writer.write()
|
||||
finally:
|
||||
await outapi.close()
|
||||
finally:
|
||||
await api.close()
|
||||
|
||||
@@ -51,18 +56,24 @@ class SqliteWriter:
|
||||
""" Create the database structure and copy the data from
|
||||
the source database to the destination.
|
||||
"""
|
||||
LOG.warning('Setting up spatialite')
|
||||
await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))
|
||||
|
||||
await self.create_tables()
|
||||
await self.copy_data()
|
||||
if 'search' in self.options:
|
||||
await self.create_word_table()
|
||||
await self.create_indexes()
|
||||
|
||||
|
||||
async def create_tables(self) -> None:
|
||||
""" Set up the database tables.
|
||||
"""
|
||||
LOG.warning('Setting up tables')
|
||||
if 'search' not in self.options:
|
||||
self.dest.t.meta.remove(self.dest.t.search_name)
|
||||
else:
|
||||
await self.create_class_tables()
|
||||
|
||||
await self.dest.connection.run_sync(self.dest.t.meta.create_all)
|
||||
|
||||
@@ -75,6 +86,41 @@ class SqliteWriter:
|
||||
col.type.subtype.upper(), 'XY')))
|
||||
|
||||
|
||||
async def create_class_tables(self) -> None:
|
||||
""" Set up the table that serve class/type-specific geometries.
|
||||
"""
|
||||
sql = sa.text("""SELECT tablename FROM pg_tables
|
||||
WHERE tablename LIKE 'place_classtype_%'""")
|
||||
for res in await self.src.execute(sql):
|
||||
for db in (self.src, self.dest):
|
||||
sa.Table(res[0], db.t.meta,
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
|
||||
async def create_word_table(self) -> None:
|
||||
""" Create the word table.
|
||||
This table needs the property information to determine the
|
||||
correct format. Therefore needs to be done after all other
|
||||
data has been copied.
|
||||
"""
|
||||
await make_query_analyzer(self.src)
|
||||
await make_query_analyzer(self.dest)
|
||||
src = self.src.t.meta.tables['word']
|
||||
dest = self.dest.t.meta.tables['word']
|
||||
|
||||
await self.dest.connection.run_sync(dest.create)
|
||||
|
||||
LOG.warning("Copying word table")
|
||||
async_result = await self.src.connection.stream(sa.select(src))
|
||||
|
||||
async for partition in async_result.partitions(10000):
|
||||
data = [{k: getattr(r, k) for k in r._fields} for r in partition]
|
||||
await self.dest.execute(dest.insert(), data)
|
||||
|
||||
await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
|
||||
|
||||
|
||||
async def copy_data(self) -> None:
|
||||
""" Copy data for all registered tables.
|
||||
"""
|
||||
@@ -87,6 +133,14 @@ class SqliteWriter:
|
||||
for r in partition]
|
||||
await self.dest.execute(table.insert(), data)
|
||||
|
||||
# Set up a minimal copy of pg_tables used to look up the class tables later.
|
||||
pg_tables = sa.Table('pg_tables', self.dest.t.meta,
|
||||
sa.Column('schemaname', sa.Text, default='public'),
|
||||
sa.Column('tablename', sa.Text))
|
||||
await self.dest.connection.run_sync(pg_tables.create)
|
||||
data = [{'tablename': t} for t in self.dest.t.meta.tables]
|
||||
await self.dest.execute(pg_tables.insert().values(data))
|
||||
|
||||
|
||||
async def create_indexes(self) -> None:
|
||||
""" Add indexes necessary for the frontend.
|
||||
@@ -116,6 +170,22 @@ class SqliteWriter:
|
||||
await self.create_index('placex', 'parent_place_id')
|
||||
await self.create_index('placex', 'rank_address')
|
||||
await self.create_index('addressline', 'place_id')
|
||||
await self.create_index('postcode', 'place_id')
|
||||
await self.create_index('osmline', 'place_id')
|
||||
await self.create_index('tiger', 'place_id')
|
||||
|
||||
if 'search' in self.options:
|
||||
await self.create_spatial_index('postcode', 'geometry')
|
||||
await self.create_spatial_index('search_name', 'centroid')
|
||||
await self.create_index('search_name', 'place_id')
|
||||
await self.create_index('osmline', 'parent_place_id')
|
||||
await self.create_index('tiger', 'parent_place_id')
|
||||
await self.create_search_index()
|
||||
|
||||
for t in self.dest.t.meta.tables:
|
||||
if t.startswith('place_classtype_'):
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(t, 'centroid')))
|
||||
|
||||
|
||||
async def create_spatial_index(self, table: str, column: str) -> None:
|
||||
@@ -133,6 +203,36 @@ class SqliteWriter:
|
||||
sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
|
||||
|
||||
|
||||
async def create_search_index(self) -> None:
|
||||
""" Create the tables and indexes needed for word lookup.
|
||||
"""
|
||||
LOG.warning("Creating reverse search table")
|
||||
rsn = sa.Table('reverse_search_name', self.dest.t.meta,
|
||||
sa.Column('word', sa.Integer()),
|
||||
sa.Column('column', sa.Text()),
|
||||
sa.Column('places', IntArray))
|
||||
await self.dest.connection.run_sync(rsn.create)
|
||||
|
||||
tsrc = self.src.t.search_name
|
||||
for column in ('name_vector', 'nameaddress_vector'):
|
||||
sql = sa.select(sa.func.unnest(getattr(tsrc.c, column)).label('word'),
|
||||
sa.func.ArrayAgg(tsrc.c.place_id).label('places'))\
|
||||
.group_by('word')
|
||||
|
||||
async_result = await self.src.connection.stream(sql)
|
||||
async for partition in async_result.partitions(100):
|
||||
data = []
|
||||
for row in partition:
|
||||
row.places.sort()
|
||||
data.append({'word': row.word,
|
||||
'column': column,
|
||||
'places': row.places})
|
||||
await self.dest.execute(rsn.insert(), data)
|
||||
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
|
||||
|
||||
|
||||
def select_from(self, table: str) -> SaSelect:
|
||||
""" Create the SQL statement to select the source columns and rows.
|
||||
"""
|
||||
|
||||
@@ -72,3 +72,4 @@ SaLabel: TypeAlias = 'sa.Label[Any]'
|
||||
SaFromClause: TypeAlias = 'sa.FromClause'
|
||||
SaSelectable: TypeAlias = 'sa.Selectable'
|
||||
SaBind: TypeAlias = 'sa.BindParameter[Any]'
|
||||
SaDialect: TypeAlias = 'sa.Dialect'
|
||||
|
||||
@@ -76,8 +76,8 @@ class JsonWriter:
|
||||
def end_array(self) -> 'JsonWriter':
|
||||
""" Write the closing bracket of a JSON array.
|
||||
"""
|
||||
assert self.pending in (',', '[', '')
|
||||
if self.pending == '[':
|
||||
assert self.pending in (',', '[', ']', ')', '')
|
||||
if self.pending not in (',', ''):
|
||||
self.data.write(self.pending)
|
||||
self.pending = ']'
|
||||
return self
|
||||
|
||||
Reference in New Issue
Block a user