add lookup() call to the library API

Currently only looks places up in placex.
This commit is contained in:
Sarah Hoffmann
2023-02-01 09:56:33 +01:00
parent 4573389da7
commit df65c10360
8 changed files with 541 additions and 21 deletions

View File

@@ -15,4 +15,4 @@ ignored-classes=NominatimArgs,closing
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager
good-names=i,x,y,m,fd,db,cc
good-names=i,x,y,m,t,fd,db,cc

View File

@@ -14,6 +14,10 @@ import from this file, not from the source files directly.
# See also https://github.com/PyCQA/pylint/issues/6006
# pylint: disable=useless-import-alias
from nominatim.api.core import (NominatimAPI as NominatimAPI,
NominatimAPIAsync as NominatimAPIAsync)
from nominatim.api.status import (StatusResult as StatusResult)
from .core import (NominatimAPI as NominatimAPI,
NominatimAPIAsync as NominatimAPIAsync)
from .status import (StatusResult as StatusResult)
from .types import (PlaceID as PlaceID,
OsmID as OsmID,
PlaceRef as PlaceRef,
LookupDetails as LookupDetails)

View File

@@ -18,8 +18,12 @@ import asyncpg
from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.config import Configuration
from nominatim.api.status import get_status, StatusResult
from nominatim.api.connection import SearchConnection
from nominatim.api.status import get_status, StatusResult
from nominatim.api.lookup import get_place_by_id
from nominatim.api.types import PlaceRef, LookupDetails
from nominatim.api.results import SearchResult
class NominatimAPIAsync:
""" API loader asynchornous version.
@@ -122,6 +126,16 @@ class NominatimAPIAsync:
return status
async def lookup(self, place: PlaceRef,
details: LookupDetails) -> Optional[SearchResult]:
""" Get detailed information about a place in the database.
Returns None if there is no entry under the given ID.
"""
async with self.begin() as db:
return await get_place_by_id(db, place, details)
class NominatimAPI:
""" API loader, synchronous version.
"""
@@ -145,3 +159,10 @@ class NominatimAPI:
""" Return the status of the database.
"""
return self._loop.run_until_complete(self._async_api.status())
def lookup(self, place: PlaceRef,
details: LookupDetails) -> Optional[SearchResult]:
""" Get detailed information about a place in the database.
"""
return self._loop.run_until_complete(self._async_api.lookup(place, details))

81
nominatim/api/lookup.py Normal file
View File

@@ -0,0 +1,81 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of place lookup by ID.
"""
from typing import Optional
import sqlalchemy as sa
from nominatim.typing import SaColumn, SaLabel, SaRow
from nominatim.api.connection import SearchConnection
import nominatim.api.types as ntyp
import nominatim.api.results as nres
def _select_column_geometry(column: SaColumn,
geometry_output: ntyp.GeometryFormat) -> SaLabel:
""" Create the appropriate column expression for selecting a
geometry for the details response.
"""
if geometry_output & ntyp.GeometryFormat.GEOJSON:
return sa.literal_column(f"""
ST_AsGeoJSON(CASE WHEN ST_NPoints({0}) > 5000
THEN ST_SimplifyPreserveTopology({0}, 0.0001)
ELSE {column.name} END)
""").label('geometry_geojson')
return sa.func.ST_GeometryType(column).label('geometry_type')
async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef,
details: ntyp.LookupDetails) -> Optional[SaRow]:
""" Search for the given place in the placex table and return the
base information.
"""
t = conn.t.placex
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.admin_level,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia, t.c.indexed_date,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.linked_place_id,
sa.func.ST_X(t.c.centroid).label('x'),
sa.func.ST_Y(t.c.centroid).label('y'),
_select_column_geometry(t.c.geometry, details.geometry_output))
if isinstance(place, ntyp.PlaceID):
sql = sql.where(t.c.place_id == place.place_id)
elif isinstance(place, ntyp.OsmID):
sql = sql.where(t.c.osm_type == place.osm_type)\
.where(t.c.osm_id == place.osm_id)
if place.osm_class:
sql = sql.where(t.c.class_ == place.osm_class)
else:
sql = sql.order_by(t.c.class_)
sql = sql.limit(1)
else:
return None
return (await conn.execute(sql)).one_or_none()
async def get_place_by_id(conn: SearchConnection, place: ntyp.PlaceRef,
details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
""" Retrieve a place with additional details from the database.
"""
if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON:
raise ValueError("lookup only supports geojosn polygon output.")
row = await find_in_placex(conn, place, details)
if row is not None:
result = nres.create_from_placex_row(row=row)
await nres.add_result_details(conn, result, details)
return result
# Nothing found under this ID.
return None

295
nominatim/api/results.py Normal file
View File

@@ -0,0 +1,295 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Dataclasses for search results and helper functions to fill them.
Data classes are part of the public API while the functions are for
internal use only. That's why they are implemented as free-standing functions
instead of member functions.
"""
from typing import Optional, Tuple, Dict, Sequence, Any
import enum
import dataclasses
import datetime as dt
import sqlalchemy as sa
from nominatim.typing import SaSelect, SaRow
from nominatim.api.types import Point, LookupDetails
from nominatim.api.connection import SearchConnection
# This file defines complex result data classes.
# pylint: disable=too-many-instance-attributes
class SourceTable(enum.Enum):
""" Enumeration of kinds of results.
"""
PLACEX = 1
OSMLINE = 2
TIGER = 3
POSTCODE = 4
COUNTRY = 5
@dataclasses.dataclass
class AddressLine:
""" Detailed information about a related place.
"""
place_id: Optional[int]
osm_object: Optional[Tuple[str, int]]
category: Tuple[str, str]
names: Dict[str, str]
extratags: Optional[Dict[str, str]]
admin_level: int
fromarea: bool
isaddress: bool
rank_address: int
distance: float
AddressLines = Sequence[AddressLine]
@dataclasses.dataclass
class WordInfo:
""" Detailed information about a search term.
"""
word_id: int
word_token: str
word: Optional[str] = None
WordInfos = Sequence[WordInfo]
@dataclasses.dataclass
class SearchResult:
""" Data class collecting all available information about a search result.
"""
source_table: SourceTable
category: Tuple[str, str]
centroid: Point
place_id : Optional[int] = None
parent_place_id: Optional[int] = None
linked_place_id: Optional[int] = None
osm_object: Optional[Tuple[str, int]] = None
admin_level: int = 15
names: Optional[Dict[str, str]] = None
address: Optional[Dict[str, str]] = None
extratags: Optional[Dict[str, str]] = None
housenumber: Optional[str] = None
postcode: Optional[str] = None
wikipedia: Optional[str] = None
rank_address: int = 30
rank_search: int = 30
importance: Optional[float] = None
country_code: Optional[str] = None
indexed_date: Optional[dt.datetime] = None
address_rows: Optional[AddressLines] = None
linked_rows: Optional[AddressLines] = None
parented_rows: Optional[AddressLines] = None
name_keywords: Optional[WordInfos] = None
address_keywords: Optional[WordInfos] = None
geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
@property
def lat(self) -> float:
""" Get the latitude (or y) of the center point of the place.
"""
return self.centroid[1]
@property
def lon(self) -> float:
""" Get the longitude (or x) of the center point of the place.
"""
return self.centroid[0]
def calculated_importance(self) -> float:
""" Get a valid importance value. This is either the stored importance
of the value or an artificial value computed from the place's
search rank.
"""
return self.importance or (0.7500001 - (self.rank_search/40.0))
# pylint: disable=consider-using-f-string
def centroid_as_geojson(self) -> str:
""" Get the centroid in GeoJSON format.
"""
return '{"type": "Point","coordinates": [%f, %f]}' % self.centroid
def create_from_placex_row(row: SaRow) -> SearchResult:
""" Construct a new SearchResult and add the data from the result row
from the placex table.
"""
result = SearchResult(source_table=SourceTable.PLACEX,
place_id=row.place_id,
parent_place_id=row.parent_place_id,
linked_place_id=row.linked_place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
admin_level=row.admin_level,
names=row.name,
address=row.address,
extratags=row.extratags,
housenumber=row.housenumber,
postcode=row.postcode,
wikipedia=row.wikipedia,
rank_address=row.rank_address,
rank_search=row.rank_search,
importance=row.importance,
country_code=row.country_code,
indexed_date=getattr(row, 'indexed_date'),
centroid=Point(row.x, row.y))
result.geometry = {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
if k.startswith('geometry_')}
return result
async def add_result_details(conn: SearchConnection, result: SearchResult,
details: LookupDetails) -> None:
""" Retrieve more details from the database according to the
parameters specified in 'details'.
"""
if details.address_details:
await complete_address_details(conn, result)
if details.linked_places:
await complete_linked_places(conn, result)
if details.parented_places:
await complete_parented_places(conn, result)
if details.keywords:
await complete_keywords(conn, result)
def _result_row_to_address_row(row: SaRow) -> AddressLine:
""" Create a new AddressLine from the results of a datbase query.
"""
extratags: Dict[str, str] = getattr(row, 'extratags', {})
if 'place_type' in row:
extratags['place_type'] = row.place_type
return AddressLine(place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(getattr(row, 'class'), row.type),
names=row.name,
extratags=extratags,
admin_level=row.admin_level,
fromarea=row.fromarea,
isaddress=getattr(row, 'isaddress', True),
rank_address=row.rank_address,
distance=row.distance)
async def complete_address_details(conn: SearchConnection, result: SearchResult) -> None:
""" Retrieve information about places that make up the address of the result.
"""
housenumber = -1
if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
if result.housenumber is not None:
housenumber = int(result.housenumber)
elif result.extratags is not None and 'startnumber' in result.extratags:
# details requests do not come with a specific house number
housenumber = int(result.extratags['startnumber'])
sfn = sa.func.get_addressdata(result.place_id, housenumber)\
.table_valued( # type: ignore[no-untyped-call]
sa.column('place_id', type_=sa.Integer),
'osm_type',
sa.column('osm_id', type_=sa.BigInteger),
sa.column('name', type_=conn.t.types.Composite),
'class', 'type', 'place_type',
sa.column('admin_level', type_=sa.Integer),
sa.column('fromarea', type_=sa.Boolean),
sa.column('isaddress', type_=sa.Boolean),
sa.column('rank_address', type_=sa.SmallInteger),
sa.column('distance', type_=sa.Float))
sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
sa.column('isaddress').desc())
result.address_rows = []
for row in await conn.execute(sql):
result.address_rows.append(_result_row_to_address_row(row))
# pylint: disable=consider-using-f-string
def _placex_select_address_row(conn: SearchConnection,
centroid: Point) -> SaSelect:
t = conn.t.placex
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_.label('class'), t.c.type,
t.c.admin_level,
sa.literal_column("""ST_GeometryType(geometry) in
('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
t.c.rank_address,
sa.literal_column(
"""ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
""" % centroid).label('distance'))
async def complete_linked_places(conn: SearchConnection, result: SearchResult) -> None:
""" Retrieve information about places that link to the result.
"""
result.linked_rows = []
if result.source_table != SourceTable.PLACEX:
return
sql = _placex_select_address_row(conn, result.centroid)\
.where(conn.t.placex.c.linked_place_id == result.place_id)
for row in await conn.execute(sql):
result.linked_rows.append(_result_row_to_address_row(row))
async def complete_keywords(conn: SearchConnection, result: SearchResult) -> None:
""" Retrieve information about the search terms used for this place.
"""
t = conn.t.search_name
sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
.where(t.c.place_id == result.place_id)
result.name_keywords = []
result.address_keywords = []
for name_tokens, address_tokens in await conn.execute(sql):
t = conn.t.word
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
result.name_keywords.append(WordInfo(*row))
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
result.address_keywords.append(WordInfo(*row))
async def complete_parented_places(conn: SearchConnection, result: SearchResult) -> None:
""" Retrieve information about places that the result provides the
address for.
"""
result.parented_rows = []
if result.source_table != SourceTable.PLACEX:
return
sql = _placex_select_address_row(conn, result.centroid)\
.where(conn.t.placex.c.parent_place_id == result.place_id)\
.where(conn.t.placex.c.rank_search == 30)
for row in await conn.execute(sql):
result.parented_rows.append(_result_row_to_address_row(row))

91
nominatim/api/types.py Normal file
View File

@@ -0,0 +1,91 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Complex datatypes used by the Nominatim API.
"""
from typing import Optional, Union, NamedTuple
import dataclasses
import enum
@dataclasses.dataclass
class PlaceID:
""" Reference an object by Nominatim's internal ID.
"""
place_id: int
@dataclasses.dataclass
class OsmID:
""" Reference by the OSM ID and potentially the basic category.
"""
osm_type: str
osm_id: int
osm_class: Optional[str] = None
def __post_init__(self) -> None:
if self.osm_type not in ('N', 'W', 'R'):
raise ValueError(f"Illegal OSM type '{self.osm_type}'. Must be one of N, W, R.")
PlaceRef = Union[PlaceID, OsmID]
class Point(NamedTuple):
""" A geographic point in WGS84 projection.
"""
x: float
y: float
@property
def lat(self) -> float:
""" Return the latitude of the point.
"""
return self.y
@property
def lon(self) -> float:
""" Return the longitude of the point.
"""
return self.x
class GeometryFormat(enum.Flag):
""" Geometry output formats supported by Nominatim.
"""
NONE = 0
GEOJSON = enum.auto()
KML = enum.auto()
SVG = enum.auto()
TEXT = enum.auto()
@dataclasses.dataclass
class LookupDetails:
""" Collection of parameters that define the amount of details
returned with a search result.
"""
geometry_output: GeometryFormat = GeometryFormat.NONE
""" Add the full geometry of the place to the result. Multiple
formats may be selected. Note that geometries can become quite large.
"""
address_details: bool = False
""" Get detailed information on the places that make up the address
for the result.
"""
linked_places: bool = False
""" Get detailed information on the places that link to the result.
"""
parented_places: bool = False
""" Get detailed information on all places that this place is a parent
for, i.e. all places for which it provides the address details.
Only POI places can have parents.
"""
keywords: bool = False
""" Add information about the search terms used for this place.
"""

View File

@@ -14,6 +14,22 @@ from geoalchemy2 import Geometry
from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
class PostgresTypes:
""" Type definitions for complex types as used in Postgres variants.
"""
Composite = HSTORE
Json = JSONB
IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
class SqliteTypes:
""" Type definitions for complex types as used in Postgres variants.
"""
Composite = sqlite_json
Json = sqlite_json
IntArray = sqlite_json
#pylint: disable=too-many-instance-attributes
class SearchTables:
""" Data class that holds the tables of the Nominatim database.
@@ -21,13 +37,9 @@ class SearchTables:
def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
if engine_name == 'postgresql':
Composite: Any = HSTORE
Json: Any = JSONB
IntArray: Any = ARRAY(sa.Integer()) #pylint: disable=invalid-name
self.types: Any = PostgresTypes
elif engine_name == 'sqlite':
Composite = sqlite_json
Json = sqlite_json
IntArray = sqlite_json
self.types = SqliteTypes
else:
raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.")
@@ -57,9 +69,9 @@ class SearchTables:
sa.Column('class', sa.Text, nullable=False, key='class_'),
sa.Column('type', sa.Text, nullable=False),
sa.Column('admin_level', sa.SmallInteger),
sa.Column('name', Composite),
sa.Column('address', Composite),
sa.Column('extratags', Composite),
sa.Column('name', self.types.Composite),
sa.Column('address', self.types.Composite),
sa.Column('extratags', self.types.Composite),
sa.Column('geometry', Geometry(srid=4326), nullable=False),
sa.Column('wikipedia', sa.Text),
sa.Column('country_code', sa.String(2)),
@@ -97,7 +109,7 @@ class SearchTables:
sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('linegeo', Geometry(srid=4326)),
sa.Column('address', Composite),
sa.Column('address', self.types.Composite),
sa.Column('postcode', sa.Text),
sa.Column('country_code', sa.String(2)))
@@ -106,12 +118,12 @@ class SearchTables:
sa.Column('word_token', sa.Text, nullable=False),
sa.Column('type', sa.Text, nullable=False),
sa.Column('word', sa.Text),
sa.Column('info', Json))
sa.Column('info', self.types.Json))
self.country_name = sa.Table('country_name', meta,
sa.Column('country_code', sa.String(2)),
sa.Column('name', Composite),
sa.Column('derived_name', Composite),
sa.Column('name', self.types.Composite),
sa.Column('derived_name', self.types.Composite),
sa.Column('country_default_language_code', sa.Text),
sa.Column('partition', sa.Integer))
@@ -126,8 +138,8 @@ class SearchTables:
sa.Column('importance', sa.Float),
sa.Column('search_rank', sa.SmallInteger),
sa.Column('address_rank', sa.SmallInteger),
sa.Column('name_vector', IntArray, index=True),
sa.Column('nameaddress_vector', IntArray, index=True),
sa.Column('name_vector', self.types.IntArray, index=True),
sa.Column('nameaddress_vector', self.types.IntArray, index=True),
sa.Column('country_code', sa.String(2)),
sa.Column('centroid', Geometry(srid=4326)))

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Type definitions for typing annotations.
@@ -50,3 +50,19 @@ else:
Protocol = object
Final = 'Final'
TypedDict = dict
# SQLAlchemy introduced generic types in version 2.0 making typing
# inclompatiple with older versions. Add wrappers here so we don't have
# to litter the code with bare-string types.
if TYPE_CHECKING:
import sqlalchemy as sa
from typing_extensions import (TypeAlias as TypeAlias)
else:
TypeAlias = str
SaSelect: TypeAlias = 'sa.Select[Any]'
SaRow: TypeAlias = 'sa.Row[Any]'
SaColumn: TypeAlias = 'sa.Column[Any]'
SaLabel: TypeAlias = 'sa.Label[Any]'