python: implement reverse lookup function

The implementation follows for most part the PHP code but introduces an
additional layer parameter with which the kind of places to be returned
can be restricted. This replaces the hard-coded exclusion lists.
This commit is contained in:
Sarah Hoffmann
2023-03-23 22:38:37 +01:00
parent 41da298b18
commit 2f54732500
10 changed files with 424 additions and 118 deletions

View File

@@ -21,7 +21,7 @@ from nominatim.config import Configuration
from nominatim.api.connection import SearchConnection
from nominatim.api.status import get_status, StatusResult
from nominatim.api.lookup import get_place_by_id
from nominatim.api.reverse import reverse_lookup
from nominatim.api.reverse import ReverseGeocoder
from nominatim.api.types import PlaceRef, LookupDetails, AnyPoint, DataLayer
from nominatim.api.results import DetailedResult, ReverseResult
@@ -156,8 +156,9 @@ class NominatimAPIAsync:
max_rank = max(0, min(max_rank or 30, 30))
async with self.begin() as conn:
return await reverse_lookup(conn, coord, max_rank, layer,
details or LookupDetails())
geocoder = ReverseGeocoder(conn, max_rank, layer,
details or LookupDetails())
return await geocoder.lookup(coord)
class NominatimAPI:

View File

@@ -60,6 +60,17 @@ class BaseLogger:
""" Print the SQL for the given statement.
"""
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> str:
""" Return the comiled version of the statement.
"""
try:
return str(cast('sa.ClauseElement', statement)
.compile(conn.sync_engine, compile_kwargs={"literal_binds": True}))
except sa.exc.CompileError:
pass
return str(cast('sa.ClauseElement', statement).compile(conn.sync_engine))
class HTMLLogger(BaseLogger):
""" Logger that formats messages in HTML.
@@ -92,8 +103,7 @@ class HTMLLogger(BaseLogger):
def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
sqlstr = str(cast('sa.ClauseElement', statement)
.compile(conn.sync_engine, compile_kwargs={"literal_binds": True}))
sqlstr = self.format_sql(conn, statement)
if CODE_HIGHLIGHT:
sqlstr = highlight(sqlstr, PostgresLexer(),
HtmlFormatter(nowrap=True, lineseparator='<br />'))
@@ -147,9 +157,7 @@ class TextLogger(BaseLogger):
def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
sqlstr = str(cast('sa.ClauseElement', statement)
.compile(conn.sync_engine, compile_kwargs={"literal_binds": True}))
sqlstr = '\n| '.join(textwrap.wrap(sqlstr, width=78))
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement), width=78))
self._write(f"| {sqlstr}\n\n")

View File

@@ -172,6 +172,7 @@ async def get_place_by_id(conn: SearchConnection, place: ntyp.PlaceRef,
assert result is not None
result.parent_place_id = row.parent_place_id
result.linked_place_id = getattr(row, 'linked_place_id', None)
result.admin_level = getattr(row, 'admin_level', 15)
indexed_date = getattr(row, 'indexed_date', None)
if indexed_date is not None:
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)

View File

@@ -46,8 +46,6 @@ class AddressLine:
names: Dict[str, str]
extratags: Optional[Dict[str, str]]
local_name: Optional[str] = None
admin_level: Optional[int]
fromarea: bool
isaddress: bool
@@ -81,7 +79,6 @@ class BaseResult:
place_id : Optional[int] = None
osm_object: Optional[Tuple[str, int]] = None
admin_level: int = 15
names: Optional[Dict[str, str]] = None
address: Optional[Dict[str, str]] = None
@@ -135,6 +132,7 @@ class DetailedResult(BaseResult):
"""
parent_place_id: Optional[int] = None
linked_place_id: Optional[int] = None
admin_level: int = 15
indexed_date: Optional[dt.datetime] = None
@@ -164,7 +162,6 @@ def create_from_placex_row(row: Optional[SaRow],
place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
admin_level=row.admin_level,
names=row.name,
address=row.address,
extratags=row.extratags,

View File

@@ -7,19 +7,22 @@
"""
Implementation of reverse geocoding.
"""
from typing import Optional
from typing import Optional, List
import sqlalchemy as sa
from geoalchemy2 import WKTElement
from geoalchemy2.types import Geometry
from nominatim.typing import SaColumn, SaSelect, SaTable, SaLabel, SaClause
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow
from nominatim.api.connection import SearchConnection
import nominatim.api.results as nres
from nominatim.api.logging import log
from nominatim.api.types import AnyPoint, DataLayer, LookupDetails, GeometryFormat
def _select_from_placex(t: SaTable, wkt: Optional[str] = None) -> SaSelect:
# In SQLAlchemy expression which compare with NULL need to be expressed with
# the equal sign.
# pylint: disable=singleton-comparison
def _select_from_placex(t: SaFromClause, wkt: Optional[str] = None) -> SaSelect:
""" Create a select statement with the columns relevant for reverse
results.
"""
@@ -39,19 +42,20 @@ def _select_from_placex(t: SaTable, wkt: Optional[str] = None) -> SaSelect:
t.c.geometry.ST_Expand(0).label('bbox'))
def _interpolated_housenumber(table: SaTable) -> SaLabel:
# Entries with startnumber = endnumber are legacy from version < 4.1
def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
return sa.cast(table.c.startnumber
+ sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
/ table.c.step) * table.c.step,
sa.Integer).label('housenumber')
def _is_address_point(table: SaTable) -> SaClause:
def _is_address_point(table: SaFromClause) -> SaColumn:
return sa.and_(table.c.rank_address == 30,
sa.or_(table.c.housenumber != None,
table.c.name.has_key('housename')))
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
return min(rows, key=lambda row: 1000 if row is None else row.distance)
class ReverseGeocoder:
""" Class implementing the logic for looking up a place from a
@@ -87,25 +91,27 @@ class ReverseGeocoder:
return sql.add_columns(*out)
def _filter_by_layer(self, table: SaTable) -> SaColumn:
def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
if self.layer & DataLayer.MANMADE:
exclude = []
if not (self.layer & DataLayer.RAILWAY):
if not self.layer & DataLayer.RAILWAY:
exclude.append('railway')
if not (self.layer & DataLayer.NATURAL):
if not self.layer & DataLayer.NATURAL:
exclude.extend(('natural', 'water', 'waterway'))
return table.c.class_.not_in(tuple(exclude))
include = []
if self.layer & DataLayer.RAILWAY:
include.append('railway')
if not (self.layer & DataLayer.NATURAL):
if self.layer & DataLayer.NATURAL:
include.extend(('natural', 'water', 'waterway'))
return table.c.class_.in_(tuple(include))
async def _find_closest_street_or_poi(self, wkt: WKTElement) -> SaRow:
""" Look up the clostest rank 26+ place in the database.
async def _find_closest_street_or_poi(self, wkt: WKTElement,
distance: float) -> Optional[SaRow]:
""" Look up the closest rank 26+ place in the database, which
is closer than the given distance.
"""
t = self.conn.t.placex
@@ -113,38 +119,39 @@ class ReverseGeocoder:
.where(t.c.geometry.ST_DWithin(wkt, distance))\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(sa.or_(t.c.geometry.ST_GeometryType().not_in(('ST_Polygon', 'ST_MultiPolygon')),
.where(sa.or_(t.c.geometry.ST_GeometryType()
.not_in(('ST_Polygon', 'ST_MultiPolygon')),
t.c.centroid.ST_Distance(wkt) < distance))\
.order_by('distance')\
.limit(1)
sql = self._add_geometry_columns(sql, t.c.geometry)
restrict = []
restrict: List[SaColumn] = []
if self.layer & DataLayer.ADDRESS:
restrict.append(sa.and_(t.c.rank_address >= 26,
t.c.rank_address <= self.max_rank))
t.c.rank_address <= min(29, self.max_rank)))
if self.max_rank == 30:
restrict.append(_is_address_point(t))
if self.layer & DataLayer.POI and max_rank == 30:
if self.layer & DataLayer.POI and self.max_rank == 30:
restrict.append(sa.and_(t.c.rank_search == 30,
t.c.class_.not_in(('place', 'building')),
t.c.geometry.ST_GeometryType() != 'ST_LineString'))
if self.layer & (DataLayer.RAILWAY | DataLayer.MANMADE | DataLayer.NATURAL):
restrict.append(sa.and_(t.c.rank_search >= 26,
tc.rank_search <= self.max_rank,
restrict.append(sa.and_(t.c.rank_search.between(26, self.max_rank),
t.c.rank_address == 0,
self._filter_by_layer(t)))
if restrict:
sql = sql.where(sa.or_(*restrict))
if not restrict:
return None
return (await self.conn.execute(sql)).one_or_none()
return (await self.conn.execute(sql.where(sa.or_(*restrict)))).one_or_none()
async def _find_housenumber_for_street(self, parent_place_id: int,
wkt: WKTElement) -> Optional[SaRow]:
t = conn.t.placex
t = self.conn.t.placex
sql = _select_from_placex(t, wkt)\
.where(t.c.geometry.ST_DWithin(wkt, 0.001))\
@@ -161,20 +168,21 @@ class ReverseGeocoder:
async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
wkt: WKTElement) -> Optional[SaRow]:
wkt: WKTElement,
distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline
inner = sa.select(t,
t.c.linegeo.ST_Distance(wkt).label('distance'),
t.c.linegeo.ST_LineLocatePoint(wkt).label('position'))\
.where(t.c.linegeo.ST_DWithin(wkt, distance))\
.order_by('distance')\
.limit(1)
sql = sa.select(t,
t.c.linegeo.ST_Distance(wkt).label('distance'),
t.c.linegeo.ST_LineLocatePoint(wkt).label('position'))\
.where(t.c.linegeo.ST_DWithin(wkt, distance))\
.order_by('distance')\
.limit(1)
if parent_place_id is not None:
inner = inner.where(t.c.parent_place_id == parent_place_id)
sql = sql.where(t.c.parent_place_id == parent_place_id)
inner = inner.subquery()
inner = sql.subquery()
sql = sa.select(inner.c.place_id, inner.c.osm_id,
inner.c.parent_place_id, inner.c.address,
@@ -214,7 +222,7 @@ class ReverseGeocoder:
sub = sql.subquery()
sql = self._add_geometry_columns(sql, sub.c.centroid)
return (await conn.execute(sql)).one_or_none()
return (await self.conn.execute(sql)).one_or_none()
async def lookup_street_poi(self, wkt: WKTElement) -> Optional[nres.ReverseResult]:
@@ -225,7 +233,7 @@ class ReverseGeocoder:
distance = 0.006
parent_place_id = None
row = await self._find_closest_street_or_poi(wkt)
row = await self._find_closest_street_or_poi(wkt, distance)
log().var_dump('Result (street/building)', row)
# If the closest result was a street, but an address was requested,
@@ -249,7 +257,7 @@ class ReverseGeocoder:
log().var_dump('Result (street Tiger housenumber)', addr_row)
if addr_row is not None:
result = nres.create_from_tiger_row(addr_row)
result = nres.create_from_tiger_row(addr_row, nres.ReverseResult)
else:
distance = row.distance
@@ -257,12 +265,13 @@ class ReverseGeocoder:
# or belongs to a close street found.
if self.max_rank > 27 and self.layer & DataLayer.ADDRESS:
log().comment('Find interpolation for street')
addr_row = await self._find_interpolation_for_street(parent_place_id, wkt)
addr_row = await self._find_interpolation_for_street(parent_place_id,
wkt, distance)
log().var_dump('Result (street interpolation)', addr_row)
if addr_row is not None:
result = nres.create_from_osmline_row(addr_row)
result = nres.create_from_osmline_row(addr_row, nres.ReverseResult)
return result or nres.create_from_placex_row(row)
return result or nres.create_from_placex_row(row, nres.ReverseResult)
async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]:
@@ -296,13 +305,13 @@ class ReverseGeocoder:
address_row = (await self.conn.execute(sql)).one_or_none()
log().var_dump('Result (area)', address_row)
if address_row is not None and address_row.rank_search < max_rank:
if address_row is not None and address_row.rank_search < self.max_rank:
log().comment('Search for better matching place nodes inside the area')
inner = sa.select(t,
t.c.geometry.ST_Distance(wkt).label('distance'))\
.where(t.c.osm_type == 'N')\
.where(t.c.rank_search > address_row.rank_search)\
.where(t.c.rank_search <= max_rank)\
.where(t.c.rank_search <= self.max_rank)\
.where(t.c.rank_address.between(5, 25))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
@@ -315,10 +324,10 @@ class ReverseGeocoder:
.limit(50)\
.subquery()
touter = conn.t.placex.alias('outer')
touter = self.conn.t.placex.alias('outer')
sql = _select_from_placex(inner)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_row.place_id)\
.where(touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
@@ -335,7 +344,7 @@ class ReverseGeocoder:
async def _lookup_area_others(self, wkt: WKTElement) -> Optional[SaRow]:
t = conn.t.placex
t = self.conn.t.placex
inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\
.where(t.c.rank_address == 0)\
@@ -344,13 +353,16 @@ class ReverseGeocoder:
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(self._filter_by_layer(t))\
.where(sa.func.reverse_buffered_extent(t.c.geometry, type_=Geometry)
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(wkt))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)
.limit(50)\
.subquery()
sql = _select_from_placex(inner)\
.where(sa._or(inner.c.geometry.ST_GeometryType().not_in(('ST_Polygon', 'ST_MultiPolygon')),
.where(sa.or_(inner.c.geometry.ST_GeometryType()
.not_in(('ST_Polygon', 'ST_MultiPolygon')),
inner.c.geometry.ST_Contains(wkt)))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
@@ -367,25 +379,18 @@ class ReverseGeocoder:
""" Lookup large areas for the given WKT point.
"""
log().section('Reverse lookup by larger area features')
t = self.conn.t.placex
if self.layer & DataLayer.ADDRESS:
address_row = await self._lookup_area_address(wkt)
address_distance = address_row.distance
else:
address_row = None
address_distance = 1000
if self.layer & (~DataLayer.ADDRESS & ~DataLayer.POI):
other_row = await self._lookup_area_others(wkt)
other_distance = other_row.distance
else:
other_row = None
other_distance = 1000
result = address_row if address_distance <= other_distance else other_row
return nres.create_from_placex_row(result)
return nres.create_from_placex_row(_get_closest(address_row, other_row), nres.ReverseResult)
async def lookup_country(self, wkt: WKTElement) -> Optional[nres.ReverseResult]:
@@ -402,10 +407,10 @@ class ReverseGeocoder:
if not ccodes:
return None
if self.layer & DataLayer.ADDRESS and self.max_rank > 4:
t = self.conn.t.placex
if self.max_rank > 4:
log().comment('Search for place nodes in country')
t = conn.t.placex
inner = sa.select(t,
t.c.geometry.ST_Distance(wkt).label('distance'))\
.where(t.c.osm_type == 'N')\
@@ -436,40 +441,8 @@ class ReverseGeocoder:
else:
address_row = None
if layer & (~DataLayer.ADDRESS & ~DataLayer.POI) and self.max_rank > 4:
log().comment('Search for non-address features inside country')
t = conn.t.placex
inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\
.where(t.c.rank_address == 0)\
.where(t.c.rank_search.between(5, self.max_rank))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(self._filter_by_layer(t))\
.where(t.c.country_code.in_(ccode))\
.where(sa.func.reverse_buffered_extent(t.c.geometry, type_=Geometry)
.intersects(wkt))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery()
sql = _select_from_placex(inner)\
.where(sa._or(inner.c.geometry.ST_GeometryType().not_in(('ST_Polygon', 'ST_MultiPolygon')),
inner.c.geometry.ST_Contains(wkt)))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
other_row = (await self.conn.execute(sql)).one_or_none()
log().var_dump('Result (non-address feature)', other_row)
else:
other_row = None
if layer & DataLayer.ADDRESS and address_row is None and other_row is None:
if address_row is None:
# Still nothing, then return a country with the appropriate country code.
t = conn.t.placex
sql = _select_from_placex(t, wkt)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
@@ -477,11 +450,11 @@ class ReverseGeocoder:
.where(t.c.linked_place_id == None)\
.order_by('distance')
sql = self._add_geometry_columns(sql, inner.c.geometry)
sql = self._add_geometry_columns(sql, t.c.geometry)
address_row = (await self.conn.execute(sql)).one_or_none()
return nres.create_from_placex_row(_get_closest_row(address_row, other_row))
return nres.create_from_placex_row(address_row, nres.ReverseResult)
async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
@@ -495,13 +468,13 @@ class ReverseGeocoder:
wkt = WKTElement(f'POINT({coord[0]} {coord[1]})', srid=4326)
result: Optional[ReverseResult] = None
result: Optional[nres.ReverseResult] = None
if max_rank >= 26:
if self.max_rank >= 26:
result = await self.lookup_street_poi(wkt)
if result is None and max_rank > 4:
if result is None and self.max_rank > 4:
result = await self.lookup_area(wkt)
if result is None:
if result is None and self.layer & DataLayer.ADDRESS:
result = await self.lookup_country(wkt)
if result is not None:
await nres.add_result_details(self.conn, result, self.details)

View File

@@ -85,6 +85,8 @@ class Point(NamedTuple):
AnyPoint = Union[Point, Tuple[float, float]]
WKB_BBOX_HEADER_LE = b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'
WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
class Bbox:
""" A bounding box in WSG84 projection.
@@ -134,9 +136,9 @@ class Bbox:
if len(wkb) != 97:
raise ValueError("WKB must be a bounding box polygon")
if wkb.startswith(b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'):
if wkb.startswith(WKB_BBOX_HEADER_LE):
x1, y1, _, _, x2, y2 = unpack('<dddddd', wkb[17:65])
elif wkb.startswith(b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'):
elif wkb.startswith(WKB_BBOX_HEADER_BE):
x1, y1, _, _, x2, y2 = unpack('>dddddd', wkb[17:65])
else:
raise ValueError("WKB has wrong header")
@@ -144,6 +146,7 @@ class Bbox:
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
@staticmethod
def from_point(pt: Point, buffer: float) -> 'Bbox':
""" Return a Bbox around the point with the buffer added to all sides.
"""