make SQL statements in reverse lambda functions

Further improves internal caching of statements by SQLAlchemy.
This commit is contained in:
Sarah Hoffmann
2023-07-12 11:46:07 +02:00
parent 35fd74af6d
commit f264eaeda2
2 changed files with 115 additions and 90 deletions

View File

@@ -11,7 +11,8 @@ from typing import Optional, List, Callable, Type, Tuple, Dict, Any
import sqlalchemy as sa import sqlalchemy as sa
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, SaBind from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
SaBind, SaLambdaSelect
from nominatim.api.connection import SearchConnection from nominatim.api.connection import SearchConnection
import nominatim.api.results as nres import nominatim.api.results as nres
from nominatim.api.logging import log from nominatim.api.logging import log
@@ -135,10 +136,7 @@ class ReverseGeocoder:
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL) return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect: def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
if not self.has_geometries():
return sql
out = [] out = []
if self.params.geometry_simplification > 0.0: if self.params.geometry_simplification > 0.0:
@@ -182,18 +180,19 @@ class ReverseGeocoder:
# PostgreSQL must not get the distance as a parameter because # PostgreSQL must not get the distance as a parameter because
# there is a danger it won't be able to proberly estimate index use # there is a danger it won't be able to proberly estimate index use
# when used with prepared statements # when used with prepared statements
dist_param = sa.text(f"{distance}") diststr = sa.text(f"{distance}")
sql = _select_from_placex(t)\ sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.ST_DWithin(WKT_PARAM, dist_param))\ .where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
.where(t.c.indexed_status == 0)\ .where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)\ .where(t.c.linked_place_id == None)
.where(sa.or_(sa.not_(t.c.geometry.is_area()), .where(sa.or_(sa.not_(t.c.geometry.is_area()),
t.c.centroid.ST_Distance(WKT_PARAM) < dist_param))\ t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
.order_by('distance')\ .order_by('distance')
.limit(1) .limit(1))
sql = self._add_geometry_columns(sql, t.c.geometry) if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
restrict: List[SaColumn] = [] restrict: List[SaColumn] = []
@@ -221,16 +220,17 @@ class ReverseGeocoder:
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]: async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex t = self.conn.t.placex
sql = _select_from_placex(t)\ sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\ .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
.where(t.c.parent_place_id == parent_place_id)\ .where(t.c.parent_place_id == parent_place_id)
.where(_is_address_point(t))\ .where(_is_address_point(t))
.where(t.c.indexed_status == 0)\ .where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)\ .where(t.c.linked_place_id == None)
.order_by('distance')\ .order_by('distance')
.limit(1) .limit(1))
sql = self._add_geometry_columns(sql, t.c.geometry) if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
return (await self.conn.execute(sql, self.bind_params)).one_or_none() return (await self.conn.execute(sql, self.bind_params)).one_or_none()
@@ -239,25 +239,29 @@ class ReverseGeocoder:
distance: float) -> Optional[SaRow]: distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline t = self.conn.t.osmline
sql = sa.select(t, sql: Any = sa.lambda_stmt(lambda:
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), sa.select(t,
_locate_interpolation(t))\ t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\ _locate_interpolation(t))
.where(t.c.startnumber != None)\ .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
.order_by('distance')\ .where(t.c.startnumber != None)
.limit(1) .order_by('distance')
.limit(1))
if parent_place_id is not None: if parent_place_id is not None:
sql = sql.where(t.c.parent_place_id == parent_place_id) sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
inner = sql.subquery('ipol') def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
inner = base_sql.subquery('ipol')
sql = sa.select(inner.c.place_id, inner.c.osm_id, return sa.select(inner.c.place_id, inner.c.osm_id,
inner.c.parent_place_id, inner.c.address, inner.c.parent_place_id, inner.c.address,
_interpolated_housenumber(inner), _interpolated_housenumber(inner),
_interpolated_position(inner), _interpolated_position(inner),
inner.c.postcode, inner.c.country_code, inner.c.postcode, inner.c.country_code,
inner.c.distance) inner.c.distance)
sql += _wrap_query
if self.has_geometries(): if self.has_geometries():
sub = sql.subquery('geom') sub = sql.subquery('geom')
@@ -271,23 +275,26 @@ class ReverseGeocoder:
parent_id: int) -> Optional[SaRow]: parent_id: int) -> Optional[SaRow]:
t = self.conn.t.tiger t = self.conn.t.tiger
inner = sa.select(t, def _base_query() -> SaSelect:
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), inner = sa.select(t,
_locate_interpolation(t))\ t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\ _locate_interpolation(t))\
.where(t.c.parent_place_id == parent_place_id)\ .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
.order_by('distance')\ .where(t.c.parent_place_id == parent_place_id)\
.limit(1)\ .order_by('distance')\
.subquery('tiger') .limit(1)\
.subquery('tiger')
sql = sa.select(inner.c.place_id, return sa.select(inner.c.place_id,
inner.c.parent_place_id, inner.c.parent_place_id,
sa.literal(parent_type).label('osm_type'), sa.sql.expression.label('osm_type', parent_type),
sa.literal(parent_id).label('osm_id'), sa.sql.expression.label('osm_id', parent_id),
_interpolated_housenumber(inner), _interpolated_housenumber(inner),
_interpolated_position(inner), _interpolated_position(inner),
inner.c.postcode, inner.c.postcode,
inner.c.distance) inner.c.distance)
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
if self.has_geometries(): if self.has_geometries():
sub = sql.subquery('geom') sub = sql.subquery('geom')
@@ -356,31 +363,40 @@ class ReverseGeocoder:
log().comment('Reverse lookup by larger address area features') log().comment('Reverse lookup by larger address area features')
t = self.conn.t.placex t = self.conn.t.placex
# The inner SQL brings results in the right order, so that def _base_query() -> SaSelect:
# later only a minimum of results needs to be checked with ST_Contains. # The inner SQL brings results in the right order, so that
inner = sa.select(t, sa.literal(0.0).label('distance'))\ # later only a minimum of results needs to be checked with ST_Contains.
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\ inner = sa.select(t, sa.literal(0.0).label('distance'))\
.where(t.c.geometry.intersects(WKT_PARAM))\ .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\ .where(t.c.geometry.intersects(WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\ .where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
.limit(50)\ .order_by(sa.desc(t.c.rank_search))\
.subquery('area') .limit(50)\
.subquery('area')
sql = _select_from_placex(inner, False)\ return _select_from_placex(inner, False)\
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\ .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
.order_by(sa.desc(inner.c.rank_search))\ .order_by(sa.desc(inner.c.rank_search))\
.limit(1) .limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry) sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (area)', address_row) log().var_dump('Result (area)', address_row)
if address_row is not None and address_row.rank_search < self.max_rank: if address_row is not None and address_row.rank_search < self.max_rank:
log().comment('Search for better matching place nodes inside the area') log().comment('Search for better matching place nodes inside the area')
inner = sa.select(t,
address_rank = address_row.rank_search
address_id = address_row.place_id
def _place_inside_area_query() -> SaSelect:
inner = \
sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_search > address_row.rank_search)\ .where(t.c.rank_search > address_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\ .where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.indexed_status == 0)\ .where(t.c.indexed_status == 0)\
.where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\ .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
@@ -391,15 +407,17 @@ class ReverseGeocoder:
.limit(50)\ .limit(50)\
.subquery('places') .subquery('places')
touter = self.conn.t.placex.alias('outer') touter = t.alias('outer')
sql = _select_from_placex(inner, False)\ return _select_from_placex(inner, False)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\ .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_row.place_id)\ .where(touter.c.place_id == address_id)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\ .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1) .limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry) sql = sa.lambda_stmt(_place_inside_area_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (place node)', place_address_row) log().var_dump('Result (place node)', place_address_row)
@@ -433,7 +451,8 @@ class ReverseGeocoder:
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1) .limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry) if self.has_geometries():
sql = self._add_geometry_columns(sql, inner.c.geometry)
row = (await self.conn.execute(sql, self.bind_params)).one_or_none() row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (non-address feature)', row) log().var_dump('Result (non-address feature)', row)
@@ -464,7 +483,7 @@ class ReverseGeocoder:
""" """
log().section('Reverse lookup by country code') log().section('Reverse lookup by country code')
t = self.conn.t.country_grid t = self.conn.t.country_grid
sql = sa.select(t.c.country_code).distinct()\ sql: SaLambdaSelect = sa.select(t.c.country_code).distinct()\
.where(t.c.geometry.ST_Contains(WKT_PARAM)) .where(t.c.geometry.ST_Contains(WKT_PARAM))
ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params))) ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
@@ -477,7 +496,9 @@ class ReverseGeocoder:
if self.max_rank > 4: if self.max_rank > 4:
log().comment('Search for place nodes in country') log().comment('Search for place nodes in country')
inner = sa.select(t, def _base_query() -> SaSelect:
inner = \
sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_search > 4)\ .where(t.c.rank_search > 4)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\ .where(t.c.rank_search <= MAX_RANK_PARAM)\
@@ -489,14 +510,16 @@ class ReverseGeocoder:
.intersects(WKT_PARAM))\ .intersects(WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\ .order_by(sa.desc(t.c.rank_search))\
.limit(50)\ .limit(50)\
.subquery() .subquery('area')
sql = _select_from_placex(inner, False)\ return _select_from_placex(inner, False)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\ .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1) .limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry) sql = sa.lambda_stmt(_base_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (addressable place node)', address_row) log().var_dump('Result (addressable place node)', address_row)
@@ -505,15 +528,16 @@ class ReverseGeocoder:
if address_row is None: if address_row is None:
# Still nothing, then return a country with the appropriate country code. # Still nothing, then return a country with the appropriate country code.
sql = _select_from_placex(t)\ sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\ .where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\ .where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\ .where(t.c.rank_search == 4)\
.where(t.c.linked_place_id == None)\ .where(t.c.linked_place_id == None)\
.order_by('distance')\ .order_by('distance')\
.limit(1) .limit(1))
sql = self._add_geometry_columns(sql, t.c.geometry) if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()

View File

@@ -62,6 +62,7 @@ if TYPE_CHECKING:
else: else:
TypeAlias = str TypeAlias = str
SaLambdaSelect: TypeAlias = 'Union[sa.Select[Any], sa.StatementLambdaElement]'
SaSelect: TypeAlias = 'sa.Select[Any]' SaSelect: TypeAlias = 'sa.Select[Any]'
SaScalarSelect: TypeAlias = 'sa.ScalarSelect[Any]' SaScalarSelect: TypeAlias = 'sa.ScalarSelect[Any]'
SaRow: TypeAlias = 'sa.Row[Any]' SaRow: TypeAlias = 'sa.Row[Any]'