Merge pull request #3101 from lonvia/custom-geometry-type

Improve use of SQLAlchemy statement cache with search queries
This commit is contained in:
Sarah Hoffmann
2023-07-03 11:03:26 +02:00
committed by GitHub
22 changed files with 415 additions and 251 deletions

View File

@@ -27,10 +27,10 @@ runs:
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 GeoAlchemy2==0.10.0 datrie asyncpg
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 datrie asyncpg
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
pip3 install sqlalchemy GeoAlchemy2 psycopg
pip3 install sqlalchemy psycopg
fi
shell: bash
env:

View File

@@ -49,7 +49,6 @@ For running Nominatim:
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4+ with greenlet support)
* [GeoAlchemy2](https://geoalchemy-2.readthedocs.io/) (0.10+)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)

View File

@@ -284,3 +284,26 @@ BEGIN
END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
term_vectors TEXT[],
weight_vectors FLOAT[],
def_weight FLOAT)
RETURNS FLOAT
AS $$
DECLARE
pos INT := 1;
terms TEXT;
BEGIN
FOREACH terms IN ARRAY term_vectors
LOOP
IF search_vector @> terms::INTEGER[] THEN
RETURN weight_vectors[pos];
END IF;
pos := pos + 1;
END LOOP;
RETURN def_weight;
END;
$$
LANGUAGE plpgsql IMMUTABLE;

View File

@@ -10,11 +10,11 @@ Extended SQLAlchemy connection class that also includes access to the schema.
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set
import sqlalchemy as sa
from geoalchemy2 import Geometry
from sqlalchemy.ext.asyncio import AsyncConnection
from nominatim.typing import SaFromClause
from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.logging import log
class SearchConnection:
@@ -38,7 +38,7 @@ class SearchConnection:
) -> Any:
""" Execute a 'scalar()' query on the connection.
"""
log().sql(self.connection, sql)
log().sql(self.connection, sql, params)
return await self.connection.scalar(sql, params)
@@ -47,7 +47,7 @@ class SearchConnection:
) -> 'sa.Result[Any]':
""" Execute a 'execute()' query on the connection.
"""
log().sql(self.connection, sql)
log().sql(self.connection, sql, params)
return await self.connection.execute(sql, params)
@@ -112,4 +112,4 @@ class SearchConnection:
return sa.Table(tablename, self.t.meta,
sa.Column('place_id', sa.BigInteger),
sa.Column('centroid', Geometry(srid=4326, spatial_index=False)))
sa.Column('centroid', Geometry))

View File

@@ -66,7 +66,8 @@ class NominatimAPIAsync:
username=dsn.get('user'), password=dsn.get('password'),
host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None,
query=query)
engine = sa_asyncio.create_async_engine(dburl, future=True)
engine = sa_asyncio.create_async_engine(dburl, future=True,
echo=self.config.get_bool('DEBUG_SQL'))
try:
async with engine.begin() as conn:

View File

@@ -7,11 +7,12 @@
"""
Functions for specialised logging with HTML output.
"""
from typing import Any, Iterator, Optional, List, Tuple, cast
from typing import Any, Iterator, Optional, List, Tuple, cast, Union, Mapping, Sequence
from contextvars import ContextVar
import datetime as dt
import textwrap
import io
import re
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
@@ -74,22 +75,39 @@ class BaseLogger:
"""
def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
""" Print the SQL for the given statement.
"""
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> str:
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
extra_params: Union[Mapping[str, Any],
Sequence[Mapping[str, Any]], None]) -> str:
""" Return the comiled version of the statement.
"""
try:
return str(cast('sa.ClauseElement', statement)
.compile(conn.sync_engine, compile_kwargs={"literal_binds": True}))
except sa.exc.CompileError:
pass
except NotImplementedError:
pass
compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
return str(cast('sa.ClauseElement', statement).compile(conn.sync_engine))
params = dict(compiled.params)
if isinstance(extra_params, Mapping):
for k, v in extra_params.items():
params[k] = str(v)
elif isinstance(extra_params, Sequence) and extra_params:
for k in extra_params[0]:
params[k] = f':{k}'
sqlstr = str(compiled)
if sa.__version__.startswith('1'):
try:
return sqlstr % tuple((repr(params.get(name, None))
for name in compiled.positiontup)) # type: ignore
except TypeError:
return sqlstr
# Fixes an odd issue with Python 3.7 where percentages are not
# quoted correctly.
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
return sqlstr % params
class HTMLLogger(BaseLogger):
@@ -183,9 +201,10 @@ class HTMLLogger(BaseLogger):
self._write(f'</dl><b>TOTAL:</b> {total}</p>')
def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
self._timestamp()
sqlstr = self.format_sql(conn, statement)
sqlstr = self.format_sql(conn, statement, params)
if CODE_HIGHLIGHT:
sqlstr = highlight(sqlstr, PostgresLexer(),
HtmlFormatter(nowrap=True, lineseparator='<br />'))
@@ -276,8 +295,9 @@ class TextLogger(BaseLogger):
self._write(f'TOTAL: {total}\n\n')
def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement), width=78))
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
self._write(f"| {sqlstr}\n\n")

View File

@@ -263,7 +263,7 @@ def create_from_placex_row(row: Optional[SaRow],
rank_search=row.rank_search,
importance=row.importance,
country_code=row.country_code,
centroid=Point.from_wkb(row.centroid.data),
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
@@ -288,7 +288,7 @@ def create_from_osmline_row(row: Optional[SaRow],
address=row.address,
postcode=row.postcode,
country_code=row.country_code,
centroid=Point.from_wkb(row.centroid.data),
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
if hnr is None:
@@ -321,7 +321,7 @@ def create_from_tiger_row(row: Optional[SaRow],
category=('place', 'houses' if hnr is None else 'house'),
postcode=row.postcode,
country_code='us',
centroid=Point.from_wkb(row.centroid.data),
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
if hnr is None:
@@ -350,7 +350,7 @@ def create_from_postcode_row(row: Optional[SaRow],
rank_search=row.rank_search,
rank_address=row.rank_address,
country_code=row.country_code,
centroid=Point.from_wkb(row.centroid.data),
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
@@ -365,7 +365,7 @@ def create_from_country_row(row: Optional[SaRow],
return class_type(source_table=SourceTable.COUNTRY,
category=('place', 'country'),
centroid=Point.from_wkb(row.centroid.data),
centroid=Point.from_wkb(row.centroid),
names=row.name,
rank_address=4, rank_search=4,
country_code=row.country_code)

View File

@@ -7,16 +7,16 @@
"""
Implementation of reverse geocoding.
"""
from typing import Optional, List, Callable, Type, Tuple
from typing import Optional, List, Callable, Type, Tuple, Dict, Any
import sqlalchemy as sa
from geoalchemy2 import WKTElement
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, SaBind
from nominatim.api.connection import SearchConnection
import nominatim.api.results as nres
from nominatim.api.logging import log
from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
from nominatim.db.sqlalchemy_types import Geometry
# In SQLAlchemy expression which compare with NULL need to be expressed with
# the equal sign.
@@ -24,20 +24,20 @@ from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFor
RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
def _select_from_placex(t: SaFromClause, wkt: Optional[str] = None) -> SaSelect:
WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
""" Create a select statement with the columns relevant for reverse
results.
"""
if wkt is None:
if not use_wkt:
distance = t.c.distance
centroid = t.c.centroid
else:
distance = t.c.geometry.ST_Distance(wkt)
centroid = sa.case(
(t.c.geometry.ST_GeometryType().in_(('ST_LineString',
'ST_MultiLineString')),
t.c.geometry.ST_ClosestPoint(wkt)),
else_=t.c.centroid).label('centroid')
distance = t.c.geometry.ST_Distance(WKT_PARAM)
centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
else_=t.c.centroid).label('centroid')
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
@@ -66,11 +66,11 @@ def _interpolated_position(table: SaFromClause) -> SaLabel:
else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
def _locate_interpolation(table: SaFromClause, wkt: WKTElement) -> SaLabel:
def _locate_interpolation(table: SaFromClause) -> SaLabel:
""" Given a position, locate the closest point on the line.
"""
return sa.case((table.c.linegeo.ST_GeometryType() == 'ST_LineString',
sa.func.ST_LineLocatePoint(table.c.linegeo, wkt)),
return sa.case((table.c.linegeo.is_line_like(),
table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
else_=0).label('position')
@@ -79,9 +79,11 @@ def _is_address_point(table: SaFromClause) -> SaColumn:
sa.or_(table.c.housenumber != None,
table.c.name.has_key('housename')))
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
return min(rows, key=lambda row: 1000 if row is None else row.distance)
class ReverseGeocoder:
""" Class implementing the logic for looking up a place from a
coordinate.
@@ -91,6 +93,8 @@ class ReverseGeocoder:
self.conn = conn
self.params = params
self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
@property
def max_rank(self) -> int:
@@ -122,6 +126,7 @@ class ReverseGeocoder:
"""
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect:
if not self.has_geometries():
return sql
@@ -129,16 +134,16 @@ class ReverseGeocoder:
out = []
if self.params.geometry_simplification > 0.0:
col = col.ST_SimplifyPreserveTopology(self.params.geometry_simplification)
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
if self.params.geometry_output & GeometryFormat.GEOJSON:
out.append(col.ST_AsGeoJSON().label('geometry_geojson'))
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
if self.params.geometry_output & GeometryFormat.TEXT:
out.append(col.ST_AsText().label('geometry_text'))
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if self.params.geometry_output & GeometryFormat.KML:
out.append(col.ST_AsKML().label('geometry_kml'))
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
if self.params.geometry_output & GeometryFormat.SVG:
out.append(col.ST_AsSVG().label('geometry_svg'))
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
return sql.add_columns(*out)
@@ -160,20 +165,18 @@ class ReverseGeocoder:
return table.c.class_.in_(tuple(include))
async def _find_closest_street_or_poi(self, wkt: WKTElement,
distance: float) -> Optional[SaRow]:
async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
""" Look up the closest rank 26+ place in the database, which
is closer than the given distance.
"""
t = self.conn.t.placex
sql = _select_from_placex(t, wkt)\
.where(t.c.geometry.ST_DWithin(wkt, distance))\
sql = _select_from_placex(t)\
.where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(sa.or_(t.c.geometry.ST_GeometryType()
.not_in(('ST_Polygon', 'ST_MultiPolygon')),
t.c.centroid.ST_Distance(wkt) < distance))\
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
t.c.centroid.ST_Distance(WKT_PARAM) < distance))\
.order_by('distance')\
.limit(1)
@@ -189,24 +192,25 @@ class ReverseGeocoder:
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
restrict.append(sa.and_(t.c.rank_search == 30,
t.c.class_.not_in(('place', 'building')),
t.c.geometry.ST_GeometryType() != 'ST_LineString'))
sa.not_(t.c.geometry.is_line_like())))
if self.has_feature_layers():
restrict.append(sa.and_(t.c.rank_search.between(26, self.max_rank),
restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM),
t.c.rank_address == 0,
self._filter_by_layer(t)))
if not restrict:
return None
return (await self.conn.execute(sql.where(sa.or_(*restrict)))).one_or_none()
sql = sql.where(sa.or_(*restrict))
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def _find_housenumber_for_street(self, parent_place_id: int,
wkt: WKTElement) -> Optional[SaRow]:
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex
sql = _select_from_placex(t, wkt)\
.where(t.c.geometry.ST_DWithin(wkt, 0.001))\
sql = _select_from_placex(t)\
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.where(_is_address_point(t))\
.where(t.c.indexed_status == 0)\
@@ -216,18 +220,17 @@ class ReverseGeocoder:
sql = self._add_geometry_columns(sql, t.c.geometry)
return (await self.conn.execute(sql)).one_or_none()
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
wkt: WKTElement,
distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline
sql = sa.select(t,
t.c.linegeo.ST_Distance(wkt).label('distance'),
_locate_interpolation(t, wkt))\
.where(t.c.linegeo.ST_DWithin(wkt, distance))\
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\
.where(t.c.startnumber != None)\
.order_by('distance')\
.limit(1)
@@ -248,18 +251,18 @@ class ReverseGeocoder:
sub = sql.subquery('geom')
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
return (await self.conn.execute(sql)).one_or_none()
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def _find_tiger_number_for_street(self, parent_place_id: int,
parent_type: str, parent_id: int,
wkt: WKTElement) -> Optional[SaRow]:
parent_type: str,
parent_id: int) -> Optional[SaRow]:
t = self.conn.t.tiger
inner = sa.select(t,
t.c.linegeo.ST_Distance(wkt).label('distance'),
_locate_interpolation(t, wkt))\
.where(t.c.linegeo.ST_DWithin(wkt, 0.001))\
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.order_by('distance')\
.limit(1)\
@@ -278,18 +281,17 @@ class ReverseGeocoder:
sub = sql.subquery('geom')
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
return (await self.conn.execute(sql)).one_or_none()
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def lookup_street_poi(self,
wkt: WKTElement) -> Tuple[Optional[SaRow], RowFunc]:
async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
""" Find a street or POI/address for the given WKT point.
"""
log().section('Reverse lookup on street/address level')
distance = 0.006
parent_place_id = None
row = await self._find_closest_street_or_poi(wkt, distance)
row = await self._find_closest_street_or_poi(distance)
row_func: RowFunc = nres.create_from_placex_row
log().var_dump('Result (street/building)', row)
@@ -302,7 +304,7 @@ class ReverseGeocoder:
distance = 0.001
parent_place_id = row.place_id
log().comment('Find housenumber for street')
addr_row = await self._find_housenumber_for_street(parent_place_id, wkt)
addr_row = await self._find_housenumber_for_street(parent_place_id)
log().var_dump('Result (street housenumber)', addr_row)
if addr_row is not None:
@@ -313,8 +315,7 @@ class ReverseGeocoder:
log().comment('Find TIGER housenumber for street')
addr_row = await self._find_tiger_number_for_street(parent_place_id,
row.osm_type,
row.osm_id,
wkt)
row.osm_id)
log().var_dump('Result (street Tiger housenumber)', addr_row)
if addr_row is not None:
@@ -328,7 +329,7 @@ class ReverseGeocoder:
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
log().comment('Find interpolation for street')
addr_row = await self._find_interpolation_for_street(parent_place_id,
wkt, distance)
distance)
log().var_dump('Result (street interpolation)', addr_row)
if addr_row is not None:
row = addr_row
@@ -337,7 +338,7 @@ class ReverseGeocoder:
return row, row_func
async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]:
async def _lookup_area_address(self) -> Optional[SaRow]:
""" Lookup large addressable areas for the given WKT point.
"""
log().comment('Reverse lookup by larger address area features')
@@ -346,10 +347,10 @@ class ReverseGeocoder:
# The inner SQL brings results in the right order, so that
# later only a minimum of results needs to be checked with ST_Contains.
inner = sa.select(t, sa.literal(0.0).label('distance'))\
.where(t.c.rank_search.between(5, self.max_rank))\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.rank_address.between(5, 25))\
.where(t.c.geometry.ST_GeometryType().in_(('ST_Polygon', 'ST_MultiPolygon')))\
.where(t.c.geometry.intersects(wkt))\
.where(t.c.geometry.is_area())\
.where(t.c.geometry.intersects(WKT_PARAM))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
@@ -358,23 +359,23 @@ class ReverseGeocoder:
.limit(50)\
.subquery('area')
sql = _select_from_placex(inner)\
.where(inner.c.geometry.ST_Contains(wkt))\
sql = _select_from_placex(inner, False)\
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\
.order_by(sa.desc(inner.c.rank_search))\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
address_row = (await self.conn.execute(sql)).one_or_none()
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (area)', address_row)
if address_row is not None and address_row.rank_search < self.max_rank:
log().comment('Search for better matching place nodes inside the area')
inner = sa.select(t,
t.c.geometry.ST_Distance(wkt).label('distance'))\
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.osm_type == 'N')\
.where(t.c.rank_search > address_row.rank_search)\
.where(t.c.rank_search <= self.max_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.rank_address.between(5, 25))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
@@ -382,13 +383,13 @@ class ReverseGeocoder:
.where(t.c.type != 'postcode')\
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(wkt))\
.intersects(WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('places')
touter = self.conn.t.placex.alias('outer')
sql = _select_from_placex(inner)\
sql = _select_from_placex(inner, False)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_row.place_id)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
@@ -397,7 +398,7 @@ class ReverseGeocoder:
sql = self._add_geometry_columns(sql, inner.c.geometry)
place_address_row = (await self.conn.execute(sql)).one_or_none()
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (place node)', place_address_row)
if place_address_row is not None:
@@ -406,65 +407,64 @@ class ReverseGeocoder:
return address_row
async def _lookup_area_others(self, wkt: WKTElement) -> Optional[SaRow]:
async def _lookup_area_others(self) -> Optional[SaRow]:
t = self.conn.t.placex
inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\
inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_address == 0)\
.where(t.c.rank_search.between(5, self.max_rank))\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(self._filter_by_layer(t))\
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(wkt))\
.intersects(WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery()
sql = _select_from_placex(inner)\
.where(sa.or_(inner.c.geometry.ST_GeometryType()
.not_in(('ST_Polygon', 'ST_MultiPolygon')),
inner.c.geometry.ST_Contains(wkt)))\
sql = _select_from_placex(inner, False)\
.where(sa.or_(sa.not_(inner.c.geometry.is_area()),
inner.c.geometry.ST_Contains(WKT_PARAM)))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
row = (await self.conn.execute(sql)).one_or_none()
row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (non-address feature)', row)
return row
async def lookup_area(self, wkt: WKTElement) -> Optional[SaRow]:
""" Lookup large areas for the given WKT point.
async def lookup_area(self) -> Optional[SaRow]:
""" Lookup large areas for the current search.
"""
log().section('Reverse lookup by larger area features')
if self.layer_enabled(DataLayer.ADDRESS):
address_row = await self._lookup_area_address(wkt)
address_row = await self._lookup_area_address()
else:
address_row = None
if self.has_feature_layers():
other_row = await self._lookup_area_others(wkt)
other_row = await self._lookup_area_others()
else:
other_row = None
return _get_closest(address_row, other_row)
async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]:
""" Lookup the country for the given WKT point.
async def lookup_country(self) -> Optional[SaRow]:
""" Lookup the country for the current search.
"""
log().section('Reverse lookup by country code')
t = self.conn.t.country_grid
sql = sa.select(t.c.country_code).distinct()\
.where(t.c.geometry.ST_Contains(wkt))
.where(t.c.geometry.ST_Contains(WKT_PARAM))
ccodes = tuple((r[0] for r in await self.conn.execute(sql)))
ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
log().var_dump('Country codes', ccodes)
if not ccodes:
@@ -475,10 +475,10 @@ class ReverseGeocoder:
log().comment('Search for place nodes in country')
inner = sa.select(t,
t.c.geometry.ST_Distance(wkt).label('distance'))\
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.osm_type == 'N')\
.where(t.c.rank_search > 4)\
.where(t.c.rank_search <= self.max_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.rank_address.between(5, 25))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
@@ -487,26 +487,26 @@ class ReverseGeocoder:
.where(t.c.country_code.in_(ccodes))\
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(wkt))\
.intersects(WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery()
sql = _select_from_placex(inner)\
sql = _select_from_placex(inner, False)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
address_row = (await self.conn.execute(sql)).one_or_none()
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (addressable place node)', address_row)
else:
address_row = None
if address_row is None:
# Still nothing, then return a country with the appropriate country code.
sql = _select_from_placex(t, wkt)\
sql = _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\
@@ -516,7 +516,7 @@ class ReverseGeocoder:
sql = self._add_geometry_columns(sql, t.c.geometry)
address_row = (await self.conn.execute(sql)).one_or_none()
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
return address_row
@@ -528,26 +528,26 @@ class ReverseGeocoder:
log().function('reverse_lookup', coord=coord, params=self.params)
wkt = WKTElement(f'POINT({coord[0]} {coord[1]})', srid=4326)
self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
row: Optional[SaRow] = None
row_func: RowFunc = nres.create_from_placex_row
if self.max_rank >= 26:
row, tmp_row_func = await self.lookup_street_poi(wkt)
row, tmp_row_func = await self.lookup_street_poi()
if row is not None:
row_func = tmp_row_func
if row is None and self.max_rank > 4:
row = await self.lookup_area(wkt)
row = await self.lookup_area()
if row is None and self.layer_enabled(DataLayer.ADDRESS):
row = await self.lookup_country(wkt)
row = await self.lookup_country()
result = row_func(row, nres.ReverseResult)
if result is not None:
assert row is not None
result.distance = row.distance
if hasattr(row, 'bbox'):
result.bbox = Bbox.from_wkb(row.bbox.data)
result.bbox = Bbox.from_wkb(row.bbox)
await nres.add_result_details(self.conn, [result], self.params)
return result

View File

@@ -129,10 +129,11 @@ class FieldRanking:
"""
assert self.rankings
col = table.c[self.column]
return sa.case(*((col.contains(r.tokens),r.penalty) for r in self.rankings),
else_=self.default)
return sa.func.weigh_search(table.c[self.column],
[f"{{{','.join((str(s) for s in r.tokens))}}}"
for r in self.rankings],
[r.penalty for r in self.rankings],
self.default)
@dataclasses.dataclass

View File

@@ -7,22 +7,48 @@
"""
Implementation of the acutal database accesses for forward search.
"""
from typing import List, Tuple, AsyncIterator
from typing import List, Tuple, AsyncIterator, Dict, Any
import abc
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import ARRAY, array_agg
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
SaExpression, SaSelect, SaRow
SaExpression, SaSelect, SaRow, SaBind
from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
import nominatim.api.results as nres
from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
from nominatim.db.sqlalchemy_types import Geometry
#pylint: disable=singleton-comparison,not-callable
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
""" Create a dictionary from search parameters that can be used
as bind parameter for SQL execute.
"""
return {'limit': details.max_results,
'min_rank': details.min_rank,
'max_rank': details.max_rank,
'viewbox': details.viewbox,
'viewbox2': details.viewbox_x2,
'near': details.near,
'near_radius': details.near_radius,
'excluded': details.excluded,
'countries': details.countries}
LIMIT_PARAM: SaBind = sa.bindparam('limit')
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
EXCLUDED_PARAM: SaBind = sa.bindparam('excluded')
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
def _select_placex(t: SaFromClause) -> SaSelect:
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
@@ -41,16 +67,16 @@ def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails)
out = []
if details.geometry_simplification > 0.0:
col = col.ST_SimplifyPreserveTopology(details.geometry_simplification)
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & GeometryFormat.GEOJSON:
out.append(col.ST_AsGeoJSON().label('geometry_geojson'))
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
if details.geometry_output & GeometryFormat.TEXT:
out.append(col.ST_AsText().label('geometry_text'))
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & GeometryFormat.KML:
out.append(col.ST_AsKML().label('geometry_kml'))
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
if details.geometry_output & GeometryFormat.SVG:
out.append(col.ST_AsSVG().label('geometry_svg'))
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
return sql.add_columns(*out)
@@ -70,7 +96,7 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
for n in numerals)))
if details.excluded:
sql = sql.where(table.c.place_id.not_in(details.excluded))
sql = sql.where(table.c.place_id.not_in(EXCLUDED_PARAM))
return sql.scalar_subquery()
@@ -129,7 +155,7 @@ async def _get_placex_housenumbers(conn: SearchConnection,
for row in await conn.execute(sql):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.bbox = Bbox.from_wkb(row.bbox.data)
result.bbox = Bbox.from_wkb(row.bbox)
yield result
@@ -259,28 +285,25 @@ class NearSearch(AbstractSearch):
sql = sql.join(table, t.c.place_id == table.c.place_id)\
.join(tgeom,
sa.case((sa.and_(tgeom.c.rank_address < 9,
tgeom.c.geometry.ST_GeometryType().in_(
('ST_Polygon', 'ST_MultiPolygon'))),
tgeom.c.geometry.is_area()),
tgeom.c.geometry.ST_Contains(table.c.centroid)),
else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\
.order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
sql = sql.where(t.c.country_code.in_(details.countries))
if details.min_rank > 0:
sql = sql.where(t.c.rank_address >= details.min_rank)
if details.max_rank < 30:
sql = sql.where(t.c.rank_address <= details.max_rank)
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
if details.excluded:
sql = sql.where(t.c.place_id.not_in(details.excluded))
sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
if details.layers is not None:
sql = sql.where(_filter_by_layer(t, details.layers))
for row in await conn.execute(sql.limit(details.max_results)):
sql = sql.limit(LIMIT_PARAM)
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + penalty
result.bbox = Bbox.from_wkb(row.bbox.data)
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
@@ -298,6 +321,7 @@ class PoiSearch(AbstractSearch):
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
bind_params = _details_to_bind_params(details)
t = conn.t.placex
rows: List[SaRow] = []
@@ -306,15 +330,14 @@ class PoiSearch(AbstractSearch):
# simply search in placex table
sql = _select_placex(t) \
.where(t.c.linked_place_id == None) \
.where(t.c.geometry.ST_DWithin(details.near.sql_value(),
details.near_radius)) \
.order_by(t.c.centroid.ST_Distance(details.near.sql_value()))
.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value()))
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
classtype = self.categories.values
if len(classtype) == 1:
@@ -324,7 +347,8 @@ class PoiSearch(AbstractSearch):
sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
for cls, typ in classtype)))
rows.extend(await conn.execute(sql.limit(details.max_results)))
sql = sql.limit(LIMIT_PARAM)
rows.extend(await conn.execute(sql, bind_params))
else:
# use the class type tables
for category in self.categories.values:
@@ -336,24 +360,25 @@ class PoiSearch(AbstractSearch):
.where(t.c.type == category[1])
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(table.c.centroid.intersects(details.viewbox.sql_value()))
sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
if details.near:
sql = sql.order_by(table.c.centroid.ST_Distance(details.near.sql_value()))\
.where(table.c.centroid.ST_DWithin(details.near.sql_value(),
details.near_radius or 0.5))
if details.near and details.near_radius is not None:
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
.where(table.c.centroid.ST_DWithin(NEAR_PARAM,
NEAR_RADIUS_PARAM))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
rows.extend(await conn.execute(sql.limit(details.max_results)))
sql = sql.limit(LIMIT_PARAM)
rows.extend(await conn.execute(sql, bind_params))
results = nres.SearchResults()
for row in rows:
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
result.bbox = Bbox.from_wkb(row.bbox.data)
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
return results
@@ -380,17 +405,16 @@ class CountrySearch(AbstractSearch):
sql = _add_geometry_columns(sql, t.c.geometry, details)
if details.excluded:
sql = sql.where(t.c.place_id.not_in(details.excluded))
sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value()))
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
if details.near is not None and details.near_radius is not None:
sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(),
details.near_radius))
sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
results = nres.SearchResults()
for row in await conn.execute(sql):
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
@@ -419,10 +443,9 @@ class CountrySearch(AbstractSearch):
.group_by(tgrid.c.country_code)
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(tgrid.c.geometry.intersects(details.viewbox.sql_value()))
sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
if details.near is not None and details.near_radius is not None:
sql = sql.where(tgrid.c.geometry.ST_DWithin(details.near.sql_value(),
details.near_radius))
sql = sql.where(tgrid.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
sub = sql.subquery('grid')
@@ -435,7 +458,7 @@ class CountrySearch(AbstractSearch):
.join(sub, t.c.country_code == sub.c.country_code)
results = nres.SearchResults()
for row in await conn.execute(sql):
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_country_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
@@ -474,23 +497,22 @@ class PostcodeSearch(AbstractSearch):
if details.viewbox is not None:
if details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value()))
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0),
(t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0),
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
else_=2.0)
if details.near is not None:
if details.near_radius is not None:
sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(),
details.near_radius))
sql = sql.order_by(t.c.geometry.ST_Distance(details.near.sql_value()))
sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
if details.excluded:
sql = sql.where(t.c.place_id.not_in(details.excluded))
sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
if self.lookups:
assert len(self.lookups) == 1
@@ -509,10 +531,10 @@ class PostcodeSearch(AbstractSearch):
sql = sql.add_columns(penalty.label('accuracy'))
sql = sql.order_by('accuracy')
sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
results = nres.SearchResults()
for row in await conn.execute(sql.limit(details.max_results)):
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_postcode_row(row, nres.SearchResult)
assert result
result.accuracy = row.accuracy
@@ -542,7 +564,6 @@ class PlaceSearch(AbstractSearch):
"""
t = conn.t.placex.alias('p')
tsearch = conn.t.search_name.alias('s')
limit = details.max_results
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
@@ -587,17 +608,16 @@ class PlaceSearch(AbstractSearch):
if details.viewbox is not None:
if details.bounded_viewbox:
sql = sql.where(tsearch.c.centroid.intersects(details.viewbox.sql_value()))
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0),
(t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0),
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
else_=2.0)
if details.near is not None:
if details.near_radius is not None:
sql = sql.where(tsearch.c.centroid.ST_DWithin(details.near.sql_value(),
details.near_radius))
sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(details.near.sql_value())
sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
.label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance')))
else:
@@ -613,7 +633,7 @@ class PlaceSearch(AbstractSearch):
hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
.where(sa.or_(tsearch.c.address_rank < 30,
t.c.housenumber.regexp_match(hnr_regexp, flags='i')))
t.c.housenumber.op('~*')(hnr_regexp)))
# Cross check for housenumbers, need to do that on a rather large
# set. Worst case there are 40.000 main streets in OSM.
@@ -624,12 +644,12 @@ class PlaceSearch(AbstractSearch):
pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
place_sql = sa.select(pid_list)\
.where(thnr.c.parent_place_id == inner.c.place_id)\
.where(thnr.c.housenumber.regexp_match(hnr_regexp, flags='i'))\
.where(thnr.c.housenumber.op('~*')(hnr_regexp))\
.where(thnr.c.linked_place_id == None)\
.where(thnr.c.indexed_status == 0)
if details.excluded:
place_sql = place_sql.where(thnr.c.place_id.not_in(details.excluded))
place_sql = place_sql.where(thnr.c.place_id.not_in(EXCLUDED_PARAM))
if self.qualifiers:
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
@@ -665,22 +685,23 @@ class PlaceSearch(AbstractSearch):
if self.qualifiers:
sql = sql.where(self.qualifiers.sql_restrict(t))
if details.excluded:
sql = sql.where(tsearch.c.place_id.not_in(details.excluded))
sql = sql.where(tsearch.c.place_id.not_in(EXCLUDED_PARAM))
if details.min_rank > 0:
sql = sql.where(sa.or_(tsearch.c.address_rank >= details.min_rank,
tsearch.c.search_rank >= details.min_rank))
sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
tsearch.c.search_rank >= MIN_RANK_PARAM))
if details.max_rank < 30:
sql = sql.where(sa.or_(tsearch.c.address_rank <= details.max_rank,
tsearch.c.search_rank <= details.max_rank))
sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
tsearch.c.search_rank <= MAX_RANK_PARAM))
if details.layers is not None:
sql = sql.where(_filter_by_layer(t, details.layers))
sql = sql.limit(LIMIT_PARAM)
results = nres.SearchResults()
for row in await conn.execute(sql.limit(limit)):
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.bbox = Bbox.from_wkb(row.bbox.data)
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy
if not details.excluded or not result.place_id in details.excluded:
results.append(result)

View File

@@ -14,9 +14,7 @@ import dataclasses
import enum
import math
from struct import unpack
from geoalchemy2 import WKTElement
import geoalchemy2.functions
from binascii import unhexlify
from nominatim.errors import UsageError
@@ -73,11 +71,13 @@ class Point(NamedTuple):
@staticmethod
def from_wkb(wkb: bytes) -> 'Point':
def from_wkb(wkb: Union[str, bytes]) -> 'Point':
""" Create a point from EWKB as returned from the database.
"""
if isinstance(wkb, str):
wkb = unhexlify(wkb)
if len(wkb) != 25:
raise ValueError("Point wkb has unexpected length")
raise ValueError(f"Point wkb has unexpected length {len(wkb)}")
if wkb[0] == 0:
gtype, srid, x, y = unpack('>iidd', wkb[1:])
elif wkb[0] == 1:
@@ -122,10 +122,10 @@ class Point(NamedTuple):
return Point(x, y)
def sql_value(self) -> WKTElement:
""" Create an SQL expression for the point.
def to_wkt(self) -> str:
""" Return the WKT representation of the point.
"""
return WKTElement(f'POINT({self.x} {self.y})', srid=4326)
return f'POINT({self.x} {self.y})'
@@ -179,12 +179,6 @@ class Bbox:
return (self.coords[2] - self.coords[0]) * (self.coords[3] - self.coords[1])
def sql_value(self) -> Any:
""" Create an SQL expression for the box.
"""
return geoalchemy2.functions.ST_MakeEnvelope(*self.coords, 4326)
def contains(self, pt: Point) -> bool:
""" Check if the point is inside or on the boundary of the box.
"""
@@ -192,14 +186,25 @@ class Bbox:
and self.coords[2] >= pt[0] and self.coords[3] >= pt[1]
def to_wkt(self) -> str:
""" Return the WKT representation of the Bbox. This
is a simple polygon with four points.
"""
return 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'\
.format(*self.coords) # pylint: disable=consider-using-f-string
@staticmethod
def from_wkb(wkb: Optional[bytes]) -> 'Optional[Bbox]':
def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
""" Create a Bbox from a bounding box polygon as returned by
the database. Return s None if the input value is None.
"""
if wkb is None:
return None
if isinstance(wkb, str):
wkb = unhexlify(wkb)
if len(wkb) != 97:
raise ValueError("WKB must be a bounding box polygon")
if wkb.startswith(WKB_BBOX_HEADER_LE):
@@ -439,6 +444,7 @@ class SearchDetails(LookupDetails):
""" Restrict search to places with one of the given class/type categories.
An empty list (the default) will disable this filter.
"""
viewbox_x2: Optional[Bbox] = None
def __post_init__(self) -> None:
if self.viewbox is not None:

View File

@@ -10,10 +10,11 @@ SQLAlchemy definitions for all tables used by the frontend.
from typing import Any
import sqlalchemy as sa
from geoalchemy2 import Geometry
from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
from nominatim.db.sqlalchemy_types import Geometry
class PostgresTypes:
""" Type definitions for complex types as used in Postgres variants.
"""
@@ -72,12 +73,12 @@ class SearchTables:
sa.Column('name', self.types.Composite),
sa.Column('address', self.types.Composite),
sa.Column('extratags', self.types.Composite),
sa.Column('geometry', Geometry(srid=4326), nullable=False),
sa.Column('geometry', Geometry, nullable=False),
sa.Column('wikipedia', sa.Text),
sa.Column('country_code', sa.String(2)),
sa.Column('housenumber', sa.Text),
sa.Column('postcode', sa.Text),
sa.Column('centroid', Geometry(srid=4326, spatial_index=False)))
sa.Column('centroid', Geometry))
self.addressline = sa.Table('place_addressline', meta,
sa.Column('place_id', sa.BigInteger, index=True),
@@ -96,7 +97,7 @@ class SearchTables:
sa.Column('indexed_date', sa.DateTime),
sa.Column('country_code', sa.String(2)),
sa.Column('postcode', sa.Text, index=True),
sa.Column('geometry', Geometry(srid=4326)))
sa.Column('geometry', Geometry))
self.osmline = sa.Table('location_property_osmline', meta,
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
@@ -108,7 +109,7 @@ class SearchTables:
sa.Column('step', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('linegeo', Geometry(srid=4326)),
sa.Column('linegeo', Geometry),
sa.Column('address', self.types.Composite),
sa.Column('postcode', sa.Text),
sa.Column('country_code', sa.String(2)))
@@ -123,7 +124,7 @@ class SearchTables:
self.country_grid = sa.Table('country_osm_grid', meta,
sa.Column('country_code', sa.String(2)),
sa.Column('area', sa.Float),
sa.Column('geometry', Geometry(srid=4326)))
sa.Column('geometry', Geometry))
# The following tables are not necessarily present.
self.search_name = sa.Table('search_name', meta,
@@ -134,7 +135,7 @@ class SearchTables:
sa.Column('name_vector', self.types.IntArray, index=True),
sa.Column('nameaddress_vector', self.types.IntArray, index=True),
sa.Column('country_code', sa.String(2)),
sa.Column('centroid', Geometry(srid=4326)))
sa.Column('centroid', Geometry))
self.tiger = sa.Table('location_property_tiger', meta,
sa.Column('place_id', sa.BigInteger),
@@ -143,5 +144,5 @@ class SearchTables:
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('linegeo', Geometry(srid=4326, spatial_index=False)),
sa.Column('linegeo', Geometry),
sa.Column('postcode', sa.Text))

View File

@@ -0,0 +1,105 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom types for SQLAlchemy.
"""
from typing import Callable, Any, cast
import sys
import sqlalchemy as sa
from sqlalchemy import types
from nominatim.typing import SaColumn, SaBind
#pylint: disable=all
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
""" Simplified type decorator for PostGIS geometry. This type
only supports geometries in 4326 projection.
"""
cache_ok = True
def __init__(self, subtype: str = 'Geometry'):
self.subtype = subtype
def get_col_spec(self) -> str:
return f'GEOMETRY({self.subtype}, 4326)'
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
def process(value: Any) -> str:
if isinstance(value, str):
return 'SRID=4326;' + value
return 'SRID=4326;' + cast(str, value.to_wkt())
return process
def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]:
def process(value: Any) -> str:
assert isinstance(value, str)
return value
return process
def bind_expression(self, bindvalue: SaBind) -> SaColumn:
return sa.func.ST_GeomFromText(bindvalue, type_=self)
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
def intersects(self, other: SaColumn) -> 'sa.Operators':
return self.op('&&')(other)
def is_line_like(self) -> SaColumn:
return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString',
'ST_MultiLineString'))
def is_area(self) -> SaColumn:
return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon',
'ST_MultiPolygon'))
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
return sa.func.ST_DWithin(self, other, distance, type_=sa.Float)
def ST_Distance(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Distance(self, other, type_=sa.Float)
def ST_Contains(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Contains(self, other, type_=sa.Float)
def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_ClosestPoint(self, other, type_=Geometry)
def ST_Buffer(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Buffer(self, other, type_=Geometry)
def ST_Expand(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Expand(self, other, type_=Geometry)
def ST_Collect(self) -> SaColumn:
return sa.func.ST_Collect(self, type_=Geometry)
def ST_Centroid(self) -> SaColumn:
return sa.func.ST_Centroid(self, type_=Geometry)
def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry)
def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)

View File

@@ -46,7 +46,6 @@ def migrate(config: Configuration, paths: Any) -> int:
db_version = _guess_version(conn)
has_run_migration = False
for version, func in _MIGRATION_FUNCTIONS:
if db_version < version or \
(db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
@@ -55,13 +54,11 @@ def migrate(config: Configuration, paths: Any) -> int:
kwargs = dict(conn=conn, config=config, paths=paths)
func(**kwargs)
conn.commit()
has_run_migration = True
if has_run_migration:
LOG.warning('Updating SQL functions.')
refresh.create_functions(conn, config)
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
tokenizer.update_sql_functions(config)
LOG.warning('Updating SQL functions.')
refresh.create_functions(conn, config)
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
tokenizer.update_sql_functions(config)
properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))

View File

@@ -70,3 +70,4 @@ SaExpression: TypeAlias = 'sa.ColumnElement[bool]'
SaLabel: TypeAlias = 'sa.Label[Any]'
SaFromClause: TypeAlias = 'sa.FromClause'
SaSelectable: TypeAlias = 'sa.Selectable'
SaBind: TypeAlias = 'sa.BindParameter[Any]'

View File

@@ -34,7 +34,7 @@ class NominatimVersion(NamedTuple):
return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}"
NOMINATIM_VERSION = NominatimVersion(4, 2, 99, 1)
NOMINATIM_VERSION = NominatimVersion(4, 2, 99, 2)
POSTGRESQL_REQUIRED_VERSION = (9, 6)
POSTGIS_REQUIRED_VERSION = (2, 2)

View File

@@ -222,3 +222,7 @@ NOMINATIM_LOG_DB=no
# Enable logging of requests into a file.
# To enable logging set this setting to the file to log to.
NOMINATIM_LOG_FILE=
# Echo raw SQL from SQLAlchemy statements.
# Works only in command line/library use.
NOMINATIM_DEBUG_SQL=no

View File

@@ -66,11 +66,11 @@ class APITester:
'rank_search': kw.get('rank_search', 30),
'rank_address': kw.get('rank_address', 30),
'importance': kw.get('importance'),
'centroid': 'SRID=4326;POINT(%f %f)' % centroid,
'centroid': 'POINT(%f %f)' % centroid,
'indexed_status': kw.get('indexed_status', 0),
'indexed_date': kw.get('indexed_date',
dt.datetime(2022, 12, 7, 14, 14, 46, 0)),
'geometry': 'SRID=4326;' + geometry})
'geometry': geometry})
def add_address_placex(self, object_id, **kw):
@@ -97,7 +97,7 @@ class APITester:
'address': kw.get('address'),
'postcode': kw.get('postcode'),
'country_code': kw.get('country_code'),
'linegeo': 'SRID=4326;' + kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')})
'linegeo': kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')})
def add_tiger(self, **kw):
@@ -108,7 +108,7 @@ class APITester:
'endnumber': kw.get('endnumber', 6),
'step': kw.get('step', 2),
'postcode': kw.get('postcode'),
'linegeo': 'SRID=4326;' + kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')})
'linegeo': kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')})
def add_postcode(self, **kw):
@@ -121,14 +121,14 @@ class APITester:
'rank_address': kw.get('rank_address', 22),
'indexed_date': kw.get('indexed_date',
dt.datetime(2022, 12, 7, 14, 14, 46, 0)),
'geometry': 'SRID=4326;' + kw.get('geometry', 'POINT(23 34)')})
'geometry': kw.get('geometry', 'POINT(23 34)')})
def add_country(self, country_code, geometry):
self.add_data('country_grid',
{'country_code': country_code,
'area': 0.1,
'geometry': 'SRID=4326;' + geometry})
'geometry': geometry})
def add_country_name(self, country_code, names, partition=0):
@@ -148,7 +148,7 @@ class APITester:
'name_vector': kw.get('names', []),
'nameaddress_vector': kw.get('address', []),
'country_code': kw.get('country_code', 'xx'),
'centroid': 'SRID=4326;POINT(%f %f)' % centroid})
'centroid': 'POINT(%f %f)' % centroid})
def add_class_type_table(self, cls, typ):

View File

@@ -8,6 +8,7 @@
Tests for result datatype helper functions.
"""
import struct
from binascii import hexlify
import pytest
import pytest_asyncio
@@ -17,10 +18,8 @@ import sqlalchemy as sa
from nominatim.api import SourceTable, DetailedResult, Point
import nominatim.api.results as nresults
class FakeCentroid:
def __init__(self, x, y):
self.data = struct.pack("=biidd", 1, 0x20000001, 4326,
x, y)
def mkpoint(x, y):
return hexlify(struct.pack("=biidd", 1, 0x20000001, 4326, x, y)).decode('utf-8')
class FakeRow:
def __init__(self, **kwargs):
@@ -60,7 +59,7 @@ def test_create_row_none(func):
def test_create_row_with_housenumber(func):
row = FakeRow(place_id=2345, osm_type='W', osm_id=111, housenumber=4,
address=None, postcode='99900', country_code='xd',
centroid=FakeCentroid(0, 0))
centroid=mkpoint(0, 0))
res = func(row, DetailedResult)
@@ -75,7 +74,7 @@ def test_create_row_without_housenumber(func):
row = FakeRow(place_id=2345, osm_type='W', osm_id=111,
startnumber=1, endnumber=11, step=2,
address=None, postcode='99900', country_code='xd',
centroid=FakeCentroid(0, 0))
centroid=mkpoint(0, 0))
res = func(row, DetailedResult)

View File

@@ -71,20 +71,6 @@ def test_already_at_version(def_config, property_table):
assert migration.migrate(def_config, {}) == 0
def test_no_migrations_necessary(def_config, temp_db_cursor, property_table,
monkeypatch):
oldversion = [x for x in nominatim.version.NOMINATIM_VERSION]
oldversion[0] -= 1
property_table.set('database_version',
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(oldversion))
oldversion[0] = 0
monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS',
[(tuple(oldversion), lambda **attr: True)])
assert migration.migrate(def_config, {}) == 0
def test_run_single_migration(def_config, temp_db_cursor, property_table,
monkeypatch, postprocess_mock):
oldversion = [x for x in nominatim.version.NOMINATIM_VERSION]

View File

@@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
# Some of the Python packages that come with Ubuntu 20.04 are too old, so
# install the latest version from pip:
pip3 install --user sqlalchemy GeoAlchemy2 asyncpg
pip3 install --user sqlalchemy asyncpg
#

View File

@@ -29,7 +29,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
php-cli php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 \
python3-icu python3-datrie python3-sqlalchemy \
python3-geoalchemy2 python3-asyncpg git
python3-asyncpg git
#
# System Configuration