split code into submodules

This commit is contained in:
Sarah Hoffmann
2024-05-16 11:55:17 +02:00
parent 0fb4fe8e4d
commit 6e89310a92
137 changed files with 757 additions and 716 deletions

View File

@@ -0,0 +1,221 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom functions and expressions for SQLAlchemy.
"""
from __future__ import annotations
from typing import Any
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from nominatim_core.typing import SaColumn
# pylint: disable=all
class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
""" Check for conditions that allow partial index use on
'idx_placex_geometry_reverse_lookupPolygon'.
Needs to be constant, so that the query planner picks them up correctly
in prepared statements.
"""
name = 'PlacexGeometryReverseLookuppolygon'
inherit_cache = True
@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
compiler: 'sa.Compiled', **kw: Any) -> str:
return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
" AND placex.rank_address between 4 and 25"
" AND placex.type != 'postcode'"
" AND placex.name is not null"
" AND placex.indexed_status = 0"
" AND placex.linked_place_id is null)")
@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
compiler: 'sa.Compiled', **kw: Any) -> str:
return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
" AND placex.rank_address between 4 and 25"
" AND placex.type != 'postcode'"
" AND placex.name is not null"
" AND placex.indexed_status = 0"
" AND placex.linked_place_id is null)")
class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
name = 'IntersectsReverseDistance'
inherit_cache = True
def __init__(self, table: sa.Table, geom: SaColumn) -> None:
super().__init__(table.c.geometry,
table.c.rank_search, geom)
self.tablename = table.name
@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
def default_reverse_place_diameter(element: IntersectsReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
table = element.tablename
return f"({table}.rank_address between 4 and 25"\
f" AND {table}.type != 'postcode'"\
f" AND {table}.name is not null"\
f" AND {table}.linked_place_id is null"\
f" AND {table}.osm_type = 'N'" + \
" AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
geom1, rank, geom2 = list(element.clauses)
table = element.tablename
return (f"({table}.rank_address between 4 and 25"\
f" AND {table}.type != 'postcode'"\
f" AND {table}.name is not null"\
f" AND {table}.linked_place_id is null"\
f" AND {table}.osm_type = 'N'"\
" AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
f" AND {table}.place_id IN"\
" (SELECT place_id FROM placex_place_node_areas"\
" WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
" WHERE f_table_name = 'placex_place_node_areas'"\
" AND search_frame = %s)))") % (
compiler.process(geom1, **kw),
compiler.process(geom2, **kw),
compiler.process(rank, **kw),
compiler.process(geom2, **kw))
class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
name = 'IsBelowReverseDistance'
inherit_cache = True
@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
def default_is_below_reverse_distance(element: IsBelowReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
dist, rank = list(element.clauses)
return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
compiler.process(rank, **kw))
@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
dist, rank = list(element.clauses)
return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
compiler.process(rank, **kw))
class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
name = 'IsAddressPoint'
inherit_cache = True
def __init__(self, table: sa.Table) -> None:
super().__init__(table.c.rank_address,
table.c.housenumber, table.c.name)
@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
def default_is_address_point(element: IsAddressPoint,
compiler: 'sa.Compiled', **kw: Any) -> str:
rank, hnr, name = list(element.clauses)
return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
compiler.process(rank, **kw),
compiler.process(hnr, **kw),
compiler.process(name, **kw))
@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_is_address_point(element: IsAddressPoint,
compiler: 'sa.Compiled', **kw: Any) -> str:
rank, hnr, name = list(element.clauses)
return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
compiler.process(rank, **kw),
compiler.process(hnr, **kw),
compiler.process(name, **kw))
class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
""" Check if in the given list of names in parameters 1 any of the names
from the JSON array in parameter 2 are contained.
"""
name = 'CrosscheckNames'
inherit_cache = True
@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
def compile_crosscheck_names(element: CrosscheckNames,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
compiler.process(arg1, **kw), compiler.process(arg2, **kw))
@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
def compile_sqlite_crosscheck_names(element: CrosscheckNames,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "EXISTS(SELECT *"\
" FROM json_each(%s) as name, json_each(%s) as match_name"\
" WHERE name.value = match_name.value)"\
% (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
""" Return elements of a json array as a set.
"""
name = 'JsonArrayEach'
inherit_cache = True
@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
return "json_each(%s)" % compiler.process(element.clauses, **kw)
class Greatest(sa.sql.functions.GenericFunction[Any]):
""" Function to compute maximum of all its input parameters.
"""
name = 'greatest'
inherit_cache = True
@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
return "max(%s)" % compiler.process(element.clauses, **kw)
class RegexpWord(sa.sql.functions.GenericFunction[Any]):
""" Check if a full word is in a given string.
"""
name = 'RegexpWord'
inherit_cache = True
@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "%s ~* ('\\m(' || %s || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "regexp('\\b(' || %s || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))

View File

@@ -0,0 +1,122 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom functions for SQLite.
"""
from typing import cast, Optional, Set, Any
import json
# pylint: disable=protected-access
def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
""" Custom weight function for search results.
"""
if search_vector is not None:
svec = [int(x) for x in search_vector.split(',')]
for rank in json.loads(rankings):
if all(r in svec for r in rank[1]):
return cast(float, rank[0])
return default
class ArrayIntersectFuzzy:
""" Compute the array of common elements of all input integer arrays.
Very large input parameters may be ignored to speed up
computation. Therefore, the result is a superset of common elements.
Input and output arrays are given as comma-separated lists.
"""
def __init__(self) -> None:
self.first = ''
self.values: Optional[Set[int]] = None
def step(self, value: Optional[str]) -> None:
""" Add the next array to the intersection.
"""
if value is not None:
if not self.first:
self.first = value
elif len(value) < 10000000:
if self.values is None:
self.values = {int(x) for x in self.first.split(',')}
self.values.intersection_update((int(x) for x in value.split(',')))
def finalize(self) -> str:
""" Return the final result.
"""
if self.values is not None:
return ','.join(map(str, self.values))
return self.first
class ArrayUnion:
""" Compute the set of all elements of the input integer arrays.
Input and output arrays are given as strings of comma-separated lists.
"""
def __init__(self) -> None:
self.values: Optional[Set[str]] = None
def step(self, value: Optional[str]) -> None:
""" Add the next array to the union.
"""
if value is not None:
if self.values is None:
self.values = set(value.split(','))
else:
self.values.update(value.split(','))
def finalize(self) -> str:
""" Return the final result.
"""
return '' if self.values is None else ','.join(self.values)
def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
""" Is the array 'containee' completely contained in array 'container'.
"""
if container is None or containee is None:
return None
vset = container.split(',')
return all(v in vset for v in containee.split(','))
def array_pair_contains(container1: Optional[str], container2: Optional[str],
containee: Optional[str]) -> Optional[bool]:
""" Is the array 'containee' completely contained in the union of
array 'container1' and array 'container2'.
"""
if container1 is None or container2 is None or containee is None:
return None
vset = container1.split(',') + container2.split(',')
return all(v in vset for v in containee.split(','))
def install_custom_functions(conn: Any) -> None:
""" Install helper functions for Nominatim into the given SQLite
database connection.
"""
conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
conn.create_function('array_contains', 2, array_contains, deterministic=True)
conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
_create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
_create_aggregate(conn, 'array_union', 1, ArrayUnion)
async def _make_aggregate(aioconn: Any, *args: Any) -> None:
await aioconn._execute(aioconn._conn.create_aggregate, *args)
def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
try:
conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
except Exception as error: # pylint: disable=broad-exception-caught
conn._handle_exception(error)