mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
split code into submodules
This commit is contained in:
38
src/nominatim_api/__init__.py
Normal file
38
src/nominatim_api/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
The public interface of the Nominatim library.
|
||||
|
||||
Classes and functions defined in this file are considered stable. Always
|
||||
import from this file, not from the source files directly.
|
||||
"""
|
||||
|
||||
# See also https://github.com/PyCQA/pylint/issues/6006
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .core import (NominatimAPI as NominatimAPI,
|
||||
NominatimAPIAsync as NominatimAPIAsync)
|
||||
from .connection import (SearchConnection as SearchConnection)
|
||||
from .status import (StatusResult as StatusResult)
|
||||
from .types import (PlaceID as PlaceID,
|
||||
OsmID as OsmID,
|
||||
PlaceRef as PlaceRef,
|
||||
Point as Point,
|
||||
Bbox as Bbox,
|
||||
GeometryFormat as GeometryFormat,
|
||||
DataLayer as DataLayer)
|
||||
from .results import (SourceTable as SourceTable,
|
||||
AddressLine as AddressLine,
|
||||
AddressLines as AddressLines,
|
||||
WordInfo as WordInfo,
|
||||
WordInfos as WordInfos,
|
||||
DetailedResult as DetailedResult,
|
||||
ReverseResult as ReverseResult,
|
||||
ReverseResults as ReverseResults,
|
||||
SearchResult as SearchResult,
|
||||
SearchResults as SearchResults)
|
||||
from .localization import (Locales as Locales)
|
||||
149
src/nominatim_api/connection.py
Normal file
149
src/nominatim_api/connection.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Extended SQLAlchemy connection class that also includes access to the schema.
|
||||
"""
|
||||
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
|
||||
Awaitable, Callable, TypeVar
|
||||
import asyncio
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.asyncio import AsyncConnection
|
||||
|
||||
from nominatim_core.typing import SaFromClause
|
||||
from nominatim_core.db.sqlalchemy_schema import SearchTables
|
||||
from nominatim_core.db.sqlalchemy_types import Geometry
|
||||
from .logging import log
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
class SearchConnection:
|
||||
""" An extended SQLAlchemy connection class, that also contains
|
||||
the table definitions. The underlying asynchronous SQLAlchemy
|
||||
connection can be accessed with the 'connection' property.
|
||||
The 't' property is the collection of Nominatim tables.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: AsyncConnection,
|
||||
tables: SearchTables,
|
||||
properties: Dict[str, Any]) -> None:
|
||||
self.connection = conn
|
||||
self.t = tables # pylint: disable=invalid-name
|
||||
self._property_cache = properties
|
||||
self._classtables: Optional[Set[str]] = None
|
||||
self.query_timeout: Optional[int] = None
|
||||
|
||||
|
||||
def set_query_timeout(self, timeout: Optional[int]) -> None:
|
||||
""" Set the timeout after which a query over this connection
|
||||
is cancelled.
|
||||
"""
|
||||
self.query_timeout = timeout
|
||||
|
||||
|
||||
async def scalar(self, sql: sa.sql.base.Executable,
|
||||
params: Union[Mapping[str, Any], None] = None
|
||||
) -> Any:
|
||||
""" Execute a 'scalar()' query on the connection.
|
||||
"""
|
||||
log().sql(self.connection, sql, params)
|
||||
return await asyncio.wait_for(self.connection.scalar(sql, params), self.query_timeout)
|
||||
|
||||
|
||||
async def execute(self, sql: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None
|
||||
) -> 'sa.Result[Any]':
|
||||
""" Execute a 'execute()' query on the connection.
|
||||
"""
|
||||
log().sql(self.connection, sql, params)
|
||||
return await asyncio.wait_for(self.connection.execute(sql, params), self.query_timeout)
|
||||
|
||||
|
||||
async def get_property(self, name: str, cached: bool = True) -> str:
|
||||
""" Get a property from Nominatim's property table.
|
||||
|
||||
Property values are normally cached so that they are only
|
||||
retrieved from the database when they are queried for the
|
||||
first time with this function. Set 'cached' to False to force
|
||||
reading the property from the database.
|
||||
|
||||
Raises a ValueError if the property does not exist.
|
||||
"""
|
||||
lookup_name = f'DBPROP:{name}'
|
||||
|
||||
if cached and lookup_name in self._property_cache:
|
||||
return cast(str, self._property_cache[lookup_name])
|
||||
|
||||
sql = sa.select(self.t.properties.c.value)\
|
||||
.where(self.t.properties.c.property == name)
|
||||
value = await self.connection.scalar(sql)
|
||||
|
||||
if value is None:
|
||||
raise ValueError(f"Property '{name}' not found in database.")
|
||||
|
||||
self._property_cache[lookup_name] = cast(str, value)
|
||||
|
||||
return cast(str, value)
|
||||
|
||||
|
||||
async def get_db_property(self, name: str) -> Any:
|
||||
""" Get a setting from the database. At the moment, only
|
||||
'server_version', the version of the database software, can
|
||||
be retrieved with this function.
|
||||
|
||||
Raises a ValueError if the property does not exist.
|
||||
"""
|
||||
if name != 'server_version':
|
||||
raise ValueError(f"DB setting '{name}' not found in database.")
|
||||
|
||||
return self._property_cache['DB:server_version']
|
||||
|
||||
|
||||
async def get_cached_value(self, group: str, name: str,
|
||||
factory: Callable[[], Awaitable[T]]) -> T:
|
||||
""" Access the cache for this Nominatim instance.
|
||||
Each cache value needs to belong to a group and have a name.
|
||||
This function is for internal API use only.
|
||||
|
||||
`factory` is an async callback function that produces
|
||||
the value if it is not already cached.
|
||||
|
||||
Returns the cached value or the result of factory (also caching
|
||||
the result).
|
||||
"""
|
||||
full_name = f'{group}:{name}'
|
||||
|
||||
if full_name in self._property_cache:
|
||||
return cast(T, self._property_cache[full_name])
|
||||
|
||||
value = await factory()
|
||||
self._property_cache[full_name] = value
|
||||
|
||||
return value
|
||||
|
||||
|
||||
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
|
||||
""" Lookup up if there is a classtype table for the given category
|
||||
and return a SQLAlchemy table for it, if it exists.
|
||||
"""
|
||||
if self._classtables is None:
|
||||
res = await self.execute(sa.text("""SELECT tablename FROM pg_tables
|
||||
WHERE tablename LIKE 'place_classtype_%'
|
||||
"""))
|
||||
self._classtables = {r[0] for r in res}
|
||||
|
||||
tablename = f"place_classtype_{cls}_{typ}"
|
||||
|
||||
if tablename not in self._classtables:
|
||||
return None
|
||||
|
||||
if tablename in self.t.meta.tables:
|
||||
return self.t.meta.tables[tablename]
|
||||
|
||||
return sa.Table(tablename, self.t.meta,
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('centroid', Geometry))
|
||||
974
src/nominatim_api/core.py
Normal file
974
src/nominatim_api/core.py
Normal file
@@ -0,0 +1,974 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of classes for API access via libraries.
|
||||
"""
|
||||
from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
|
||||
import asyncio
|
||||
import sys
|
||||
import contextlib
|
||||
from pathlib import Path
|
||||
|
||||
import sqlalchemy as sa
|
||||
import sqlalchemy.ext.asyncio as sa_asyncio
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db.sqlalchemy_schema import SearchTables
|
||||
from nominatim_core.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
|
||||
from nominatim_core.config import Configuration
|
||||
from .sql import sqlite_functions, sqlalchemy_functions #pylint: disable=unused-import
|
||||
from .connection import SearchConnection
|
||||
from .status import get_status, StatusResult
|
||||
from .lookup import get_detailed_place, get_simple_place
|
||||
from .reverse import ReverseGeocoder
|
||||
from .search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer
|
||||
from . import types as ntyp
|
||||
from .results import DetailedResult, ReverseResult, SearchResults
|
||||
|
||||
|
||||
class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
|
||||
""" The main frontend to the Nominatim database implements the
|
||||
functions for lookup, forward and reverse geocoding using
|
||||
asynchronous functions.
|
||||
|
||||
This class shares most of the functions with its synchronous
|
||||
version. There are some additional functions or parameters,
|
||||
which are documented below.
|
||||
"""
|
||||
def __init__(self, project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None,
|
||||
loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
|
||||
""" Initiate a new frontend object with synchronous API functions.
|
||||
|
||||
Parameters:
|
||||
project_dir: Path to the
|
||||
[project directory](../admin/Import.md#creating-the-project-directory)
|
||||
of the local Nominatim installation.
|
||||
environ: Mapping of [configuration parameters](../customize/Settings.md).
|
||||
When set, replaces any configuration via environment variables.
|
||||
Settings in this mapping also have precedence over any
|
||||
parameters found in the `.env` file of the project directory.
|
||||
loop: The asyncio event loop that will be used when calling
|
||||
functions. Only needed, when a custom event loop is used
|
||||
and the Python version is 3.9 or earlier.
|
||||
"""
|
||||
self.config = Configuration(project_dir, environ)
|
||||
self.query_timeout = self.config.get_int('QUERY_TIMEOUT') \
|
||||
if self.config.QUERY_TIMEOUT else None
|
||||
self.reverse_restrict_to_country_area = self.config.get_bool('SEARCH_WITHIN_COUNTRIES')
|
||||
self.server_version = 0
|
||||
|
||||
if sys.version_info >= (3, 10):
|
||||
self._engine_lock = asyncio.Lock()
|
||||
else:
|
||||
self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
|
||||
self._engine: Optional[sa_asyncio.AsyncEngine] = None
|
||||
self._tables: Optional[SearchTables] = None
|
||||
self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
|
||||
|
||||
|
||||
async def setup_database(self) -> None:
|
||||
""" Set up the SQL engine and connections.
|
||||
|
||||
This function will be implicitly called when the database is
|
||||
accessed for the first time. You may also call it explicitly to
|
||||
avoid that the first call is delayed by the setup.
|
||||
"""
|
||||
async with self._engine_lock:
|
||||
if self._engine:
|
||||
return
|
||||
|
||||
extra_args: Dict[str, Any] = {'future': True,
|
||||
'echo': self.config.get_bool('DEBUG_SQL')}
|
||||
|
||||
if self.config.get_int('API_POOL_SIZE') == 0:
|
||||
extra_args['poolclass'] = sa.pool.NullPool
|
||||
else:
|
||||
extra_args['poolclass'] = sa.pool.AsyncAdaptedQueuePool
|
||||
extra_args['max_overflow'] = 0
|
||||
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
|
||||
|
||||
|
||||
is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
|
||||
|
||||
if is_sqlite:
|
||||
params = dict((p.split('=', 1)
|
||||
for p in self.config.DATABASE_DSN[7:].split(';')))
|
||||
dburl = sa.engine.URL.create('sqlite+aiosqlite',
|
||||
database=params.get('dbname'))
|
||||
|
||||
if not ('NOMINATIM_DATABASE_RW' in self.config.environ
|
||||
and self.config.get_bool('DATABASE_RW')) \
|
||||
and not Path(params.get('dbname', '')).is_file():
|
||||
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
|
||||
else:
|
||||
dsn = self.config.get_database_params()
|
||||
query = {k: v for k, v in dsn.items()
|
||||
if k not in ('user', 'password', 'dbname', 'host', 'port')}
|
||||
|
||||
dburl = sa.engine.URL.create(
|
||||
f'postgresql+{PGCORE_LIB}',
|
||||
database=dsn.get('dbname'),
|
||||
username=dsn.get('user'),
|
||||
password=dsn.get('password'),
|
||||
host=dsn.get('host'),
|
||||
port=int(dsn['port']) if 'port' in dsn else None,
|
||||
query=query)
|
||||
|
||||
engine = sa_asyncio.create_async_engine(dburl, **extra_args)
|
||||
|
||||
if is_sqlite:
|
||||
server_version = 0
|
||||
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
|
||||
sqlite_functions.install_custom_functions(dbapi_con)
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SELECT load_extension('mod_spatialite')")
|
||||
cursor.execute('SELECT SetDecimalPrecision(7)')
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
|
||||
else:
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
result = await conn.scalar(sa.text('SHOW server_version_num'))
|
||||
server_version = int(result)
|
||||
if server_version >= 110000:
|
||||
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
|
||||
await conn.execute(sa.text(
|
||||
"SET max_parallel_workers_per_gather TO '0'"))
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
server_version = 0
|
||||
|
||||
if server_version >= 110000:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_connect(dbapi_con: Any, _: Any) -> None:
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SET jit_above_cost TO '-1'")
|
||||
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
|
||||
|
||||
self._property_cache['DB:server_version'] = server_version
|
||||
|
||||
self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
|
||||
self._engine = engine
|
||||
|
||||
|
||||
async def close(self) -> None:
|
||||
""" Close all active connections to the database. The NominatimAPIAsync
|
||||
object remains usable after closing. If a new API functions is
|
||||
called, new connections are created.
|
||||
"""
|
||||
if self._engine is not None:
|
||||
await self._engine.dispose()
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def begin(self) -> AsyncIterator[SearchConnection]:
|
||||
""" Create a new connection with automatic transaction handling.
|
||||
|
||||
This function may be used to get low-level access to the database.
|
||||
Refer to the documentation of SQLAlchemy for details how to use
|
||||
the connection object.
|
||||
"""
|
||||
if self._engine is None:
|
||||
await self.setup_database()
|
||||
|
||||
assert self._engine is not None
|
||||
assert self._tables is not None
|
||||
|
||||
async with self._engine.begin() as conn:
|
||||
yield SearchConnection(conn, self._tables, self._property_cache)
|
||||
|
||||
|
||||
async def status(self) -> StatusResult:
|
||||
""" Return the status of the database.
|
||||
"""
|
||||
try:
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
status = await get_status(conn)
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
return StatusResult(700, 'Database connection failed')
|
||||
|
||||
return status
|
||||
|
||||
|
||||
async def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
|
||||
""" Get detailed information about a place in the database.
|
||||
|
||||
Returns None if there is no entry under the given ID.
|
||||
"""
|
||||
details = ntyp.LookupDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
return await get_detailed_place(conn, place, details)
|
||||
|
||||
|
||||
async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
|
||||
""" Get simple information about a list of places.
|
||||
|
||||
Returns a list of place information for all IDs that were found.
|
||||
"""
|
||||
details = ntyp.LookupDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
return SearchResults(filter(None,
|
||||
[await get_simple_place(conn, p, details) for p in places]))
|
||||
|
||||
|
||||
async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
|
||||
""" Find a place by its coordinates. Also known as reverse geocoding.
|
||||
|
||||
Returns the closest result that can be found or None if
|
||||
no place matches the given criteria.
|
||||
"""
|
||||
# The following negation handles NaN correctly. Don't change.
|
||||
if not abs(coord[0]) <= 180 or not abs(coord[1]) <= 90:
|
||||
# There are no results to be expected outside valid coordinates.
|
||||
return None
|
||||
|
||||
details = ntyp.ReverseDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
geocoder = ReverseGeocoder(conn, details,
|
||||
self.reverse_restrict_to_country_area)
|
||||
return await geocoder.lookup(coord)
|
||||
|
||||
|
||||
async def search(self, query: str, **params: Any) -> SearchResults:
|
||||
""" Find a place by free-text search. Also known as forward geocoding.
|
||||
"""
|
||||
query = query.strip()
|
||||
if not query:
|
||||
raise UsageError('Nothing to search for.')
|
||||
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
|
||||
self.config.get_int('REQUEST_TIMEOUT') \
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
|
||||
return await geocoder.lookup(phrases)
|
||||
|
||||
|
||||
# pylint: disable=too-many-arguments,too-many-branches
|
||||
async def search_address(self, amenity: Optional[str] = None,
|
||||
street: Optional[str] = None,
|
||||
city: Optional[str] = None,
|
||||
county: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
country: Optional[str] = None,
|
||||
postalcode: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an address using structured search.
|
||||
"""
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
details = ntyp.SearchDetails.from_kwargs(params)
|
||||
|
||||
phrases: List[Phrase] = []
|
||||
|
||||
if amenity:
|
||||
phrases.append(Phrase(PhraseType.AMENITY, amenity))
|
||||
if street:
|
||||
phrases.append(Phrase(PhraseType.STREET, street))
|
||||
if city:
|
||||
phrases.append(Phrase(PhraseType.CITY, city))
|
||||
if county:
|
||||
phrases.append(Phrase(PhraseType.COUNTY, county))
|
||||
if state:
|
||||
phrases.append(Phrase(PhraseType.STATE, state))
|
||||
if postalcode:
|
||||
phrases.append(Phrase(PhraseType.POSTCODE, postalcode))
|
||||
if country:
|
||||
phrases.append(Phrase(PhraseType.COUNTRY, country))
|
||||
|
||||
if not phrases:
|
||||
raise UsageError('Nothing to search for.')
|
||||
|
||||
if amenity or street:
|
||||
details.restrict_min_max_rank(26, 30)
|
||||
elif city:
|
||||
details.restrict_min_max_rank(13, 25)
|
||||
elif county:
|
||||
details.restrict_min_max_rank(10, 12)
|
||||
elif state:
|
||||
details.restrict_min_max_rank(5, 9)
|
||||
elif postalcode:
|
||||
details.restrict_min_max_rank(5, 11)
|
||||
else:
|
||||
details.restrict_min_max_rank(4, 4)
|
||||
|
||||
if 'layers' not in params:
|
||||
details.layers = ntyp.DataLayer.ADDRESS
|
||||
if amenity:
|
||||
details.layers |= ntyp.DataLayer.POI
|
||||
|
||||
geocoder = ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT') \
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
return await geocoder.lookup(phrases)
|
||||
|
||||
|
||||
async def search_category(self, categories: List[Tuple[str, str]],
|
||||
near_query: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an object of a certain category near another place.
|
||||
The near place may either be given as an unstructured search
|
||||
query in itself or as coordinates.
|
||||
"""
|
||||
if not categories:
|
||||
return SearchResults()
|
||||
|
||||
details = ntyp.SearchDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if near_query:
|
||||
phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
|
||||
else:
|
||||
phrases = []
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
|
||||
geocoder = ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT') \
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
return await geocoder.lookup_pois(categories, phrases)
|
||||
|
||||
|
||||
|
||||
class NominatimAPI:
|
||||
""" This class provides a thin synchronous wrapper around the asynchronous
|
||||
Nominatim functions. It creates its own event loop and runs each
|
||||
synchronous function call to completion using that loop.
|
||||
"""
|
||||
|
||||
def __init__(self, project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None) -> None:
|
||||
""" Initiate a new frontend object with synchronous API functions.
|
||||
|
||||
Parameters:
|
||||
project_dir: Path to the
|
||||
[project directory](../admin/Import.md#creating-the-project-directory)
|
||||
of the local Nominatim installation.
|
||||
environ: Mapping of [configuration parameters](../customize/Settings.md).
|
||||
When set, replaces any configuration via environment variables.
|
||||
Settings in this mapping also have precedence over any
|
||||
parameters found in the `.env` file of the project directory.
|
||||
"""
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
|
||||
|
||||
|
||||
def close(self) -> None:
|
||||
""" Close all active connections to the database.
|
||||
|
||||
This function also closes the asynchronous worker loop making
|
||||
the NominatimAPI object unusable.
|
||||
"""
|
||||
self._loop.run_until_complete(self._async_api.close())
|
||||
self._loop.close()
|
||||
|
||||
|
||||
@property
|
||||
def config(self) -> Configuration:
|
||||
""" Provide read-only access to the [configuration](#Configuration)
|
||||
used by the API.
|
||||
"""
|
||||
return self._async_api.config
|
||||
|
||||
def status(self) -> StatusResult:
|
||||
""" Return the status of the database as a dataclass object
|
||||
with the fields described below.
|
||||
|
||||
Returns:
|
||||
status(int): A status code as described on the status page.
|
||||
message(str): Either 'OK' or a human-readable message of the
|
||||
problem encountered.
|
||||
software_version(tuple): A tuple with the version of the
|
||||
Nominatim library consisting of (major, minor, patch, db-patch)
|
||||
version.
|
||||
database_version(tuple): A tuple with the version of the library
|
||||
which was used for the import or last migration.
|
||||
Also consists of (major, minor, patch, db-patch).
|
||||
data_updated(datetime): Timestamp with the age of the data.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.status())
|
||||
|
||||
|
||||
def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
|
||||
""" Get detailed information about a place in the database.
|
||||
|
||||
The result is a dataclass object with the fields described below
|
||||
or `None` if the place could not be found in the database.
|
||||
|
||||
Parameters:
|
||||
place: Description of the place to look up. See
|
||||
[Place identification](Input-Parameter-Types.md#place-identification)
|
||||
for the various ways to reference a place.
|
||||
|
||||
Other parameters:
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
parent_place_id (Optional(int]): Internal ID of the parent of this
|
||||
place. Only meaning full for POI-like objects (places with a
|
||||
rank_address of 30).
|
||||
linked_place_id (Optional[int]): Internal ID of the place this object
|
||||
links to. When this ID is set then there is no guarantee that
|
||||
the rest of the result information is complete.
|
||||
admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
|
||||
for administrative boundary objects.
|
||||
indexed_date (datetime): Timestamp when the place was last updated.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.details(place, **params))
|
||||
|
||||
|
||||
def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
|
||||
""" Get simple information about a list of places.
|
||||
|
||||
Returns a list of place information for all IDs that were found.
|
||||
Each result is a dataclass with the fields detailed below.
|
||||
|
||||
Parameters:
|
||||
places: List of descriptions of the place to look up. See
|
||||
[Place identification](Input-Parameter-Types.md#place-identification)
|
||||
for the various ways to reference a place.
|
||||
|
||||
Other parameters:
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.lookup(places, **params))
|
||||
|
||||
|
||||
def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
|
||||
""" Find a place by its coordinates. Also known as reverse geocoding.
|
||||
|
||||
Returns the closest result that can be found or `None` if
|
||||
no place matches the given criteria. The result is a dataclass
|
||||
with the fields as detailed below.
|
||||
|
||||
Parameters:
|
||||
coord: Coordinate to lookup the place for as a Point
|
||||
or a tuple (x, y). Must be in WGS84 projection.
|
||||
|
||||
Other parameters:
|
||||
max_rank (int): Highest address rank to return. Can be used to
|
||||
restrict search to streets or settlements.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See description of layers below. (Default: addresses and POIs)
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
distance (Optional[float]): Distance in degree from the input point.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
|
||||
|
||||
|
||||
def search(self, query: str, **params: Any) -> SearchResults:
|
||||
""" Find a place by free-text search. Also known as forward geocoding.
|
||||
|
||||
Parameters:
|
||||
query: Free-form text query searching for a place.
|
||||
|
||||
Other parameters:
|
||||
max_results (int): Maximum number of results to return. The
|
||||
actual number of results may be less. (Default: 10)
|
||||
min_rank (int): Lowest permissible rank for the result.
|
||||
For addressable places this is the minimum
|
||||
[address rank](../customize/Ranking.md#address-rank). For all
|
||||
other places the [search rank](../customize/Ranking.md#search-rank)
|
||||
is used.
|
||||
max_rank (int): Highest permissible rank for the result. See min_rank above.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See [layers section](Input-Parameter-Types.md#layers) for details.
|
||||
(Default: addresses and POIs)
|
||||
countries (list[str]): Restrict search to countries with the given
|
||||
ISO 3166-1 alpha-2 country code. An empty list (the default)
|
||||
disables this filter.
|
||||
excluded (list[int]): A list of internal IDs of places to exclude
|
||||
from the search.
|
||||
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
|
||||
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
|
||||
as a filter and return only results within the bounding box.
|
||||
near (Optional[Point]): Focus search around the given point and
|
||||
return results ordered by distance to the given point.
|
||||
near_radius (Optional[float]): Restrict results to results within
|
||||
the given distance in degrees of `near` point. Ignored, when
|
||||
`near` is not set.
|
||||
categories (list[tuple]): Restrict search to places of the given
|
||||
categories. The category is the main OSM tag assigned to each
|
||||
place. An empty list (the default) disables this filter.
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(
|
||||
self._async_api.search(query, **params))
|
||||
|
||||
|
||||
# pylint: disable=too-many-arguments
|
||||
def search_address(self, amenity: Optional[str] = None,
|
||||
street: Optional[str] = None,
|
||||
city: Optional[str] = None,
|
||||
county: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
country: Optional[str] = None,
|
||||
postalcode: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an address using structured search.
|
||||
|
||||
Parameters:
|
||||
amenity: Name of a POI.
|
||||
street: Street and optionally housenumber of the address. If the address
|
||||
does not have a street, then the place the housenumber references to.
|
||||
city: Postal city of the address.
|
||||
county: County equivalent of the address. Does not exist in all
|
||||
jurisdictions.
|
||||
state: State or province of the address.
|
||||
country: Country with its full name or its ISO 3166-1 alpha-2 country code.
|
||||
Do not use together with the country_code filter.
|
||||
postalcode: Post code or ZIP for the place.
|
||||
|
||||
Other parameters:
|
||||
max_results (int): Maximum number of results to return. The
|
||||
actual number of results may be less. (Default: 10)
|
||||
min_rank (int): Lowest permissible rank for the result.
|
||||
For addressable places this is the minimum
|
||||
[address rank](../customize/Ranking.md#address-rank). For all
|
||||
other places the [search rank](../customize/Ranking.md#search-rank)
|
||||
is used.
|
||||
max_rank (int): Highest permissible rank for the result. See min_rank above.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See [layers section](Input-Parameter-Types.md#layers) for details.
|
||||
(Default: addresses and POIs)
|
||||
countries (list[str]): Restrict search to countries with the given
|
||||
ISO 3166-1 alpha-2 country code. An empty list (the default)
|
||||
disables this filter. Do not use, when the country parameter
|
||||
is used.
|
||||
excluded (list[int]): A list of internal IDs of places to exclude
|
||||
from the search.
|
||||
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
|
||||
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
|
||||
as a filter and return only results within the bounding box.
|
||||
near (Optional[Point]): Focus search around the given point and
|
||||
return results ordered by distance to the given point.
|
||||
near_radius (Optional[float]): Restrict results to results within
|
||||
the given distance in degrees of `near` point. Ignored, when
|
||||
`near` is not set.
|
||||
categories (list[tuple]): Restrict search to places of the given
|
||||
categories. The category is the main OSM tag assigned to each
|
||||
place. An empty list (the default) disables this filter.
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(
|
||||
self._async_api.search_address(amenity, street, city, county,
|
||||
state, country, postalcode, **params))
|
||||
|
||||
|
||||
def search_category(self, categories: List[Tuple[str, str]],
|
||||
near_query: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an object of a certain category near another place.
|
||||
|
||||
The near place may either be given as an unstructured search
|
||||
query in itself or as a geographic area through the
|
||||
viewbox or near parameters.
|
||||
|
||||
Parameters:
|
||||
categories: Restrict search to places of the given
|
||||
categories. The category is the main OSM tag assigned to each
|
||||
place.
|
||||
near_query: Optional free-text query to define the are to
|
||||
restrict search to.
|
||||
|
||||
Other parameters:
|
||||
max_results (int): Maximum number of results to return. The
|
||||
actual number of results may be less. (Default: 10)
|
||||
min_rank (int): Lowest permissible rank for the result.
|
||||
For addressable places this is the minimum
|
||||
[address rank](../customize/Ranking.md#address-rank). For all
|
||||
other places the [search rank](../customize/Ranking.md#search-rank)
|
||||
is used.
|
||||
max_rank (int): Highest permissible rank for the result. See min_rank above.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See [layers section](Input-Parameter-Types.md#layers) for details.
|
||||
(Default: addresses and POIs)
|
||||
countries (list[str]): Restrict search to countries with the given
|
||||
ISO 3166-1 alpha-2 country code. An empty list (the default)
|
||||
disables this filter.
|
||||
excluded (list[int]): A list of internal IDs of places to exclude
|
||||
from the search.
|
||||
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
|
||||
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
|
||||
as a filter and return only results within the bounding box.
|
||||
near (Optional[Point]): Focus search around the given point and
|
||||
return results ordered by distance to the given point.
|
||||
near_radius (Optional[float]): Restrict results to results within
|
||||
the given distance in degrees of `near` point. Ignored, when
|
||||
`near` is not set.
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(
|
||||
self._async_api.search_category(categories, near_query, **params))
|
||||
97
src/nominatim_api/localization.py
Normal file
97
src/nominatim_api/localization.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for localizing names of results.
|
||||
"""
|
||||
from typing import Mapping, List, Optional
|
||||
|
||||
import re
|
||||
|
||||
class Locales:
|
||||
""" Helper class for localization of names.
|
||||
|
||||
It takes a list of language prefixes in their order of preferred
|
||||
usage.
|
||||
"""
|
||||
|
||||
def __init__(self, langs: Optional[List[str]] = None):
|
||||
self.languages = langs or []
|
||||
self.name_tags: List[str] = []
|
||||
|
||||
# Build the list of supported tags. It is currently hard-coded.
|
||||
self._add_lang_tags('name')
|
||||
self._add_tags('name', 'brand')
|
||||
self._add_lang_tags('official_name', 'short_name')
|
||||
self._add_tags('official_name', 'short_name', 'ref')
|
||||
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return len(self.languages) > 0
|
||||
|
||||
|
||||
def _add_tags(self, *tags: str) -> None:
|
||||
for tag in tags:
|
||||
self.name_tags.append(tag)
|
||||
self.name_tags.append(f"_place_{tag}")
|
||||
|
||||
|
||||
def _add_lang_tags(self, *tags: str) -> None:
|
||||
for tag in tags:
|
||||
for lang in self.languages:
|
||||
self.name_tags.append(f"{tag}:{lang}")
|
||||
self.name_tags.append(f"_place_{tag}:{lang}")
|
||||
|
||||
|
||||
def display_name(self, names: Optional[Mapping[str, str]]) -> str:
|
||||
""" Return the best matching name from a dictionary of names
|
||||
containing different name variants.
|
||||
|
||||
If 'names' is null or empty, an empty string is returned. If no
|
||||
appropriate localization is found, the first name is returned.
|
||||
"""
|
||||
if not names:
|
||||
return ''
|
||||
|
||||
if len(names) > 1:
|
||||
for tag in self.name_tags:
|
||||
if tag in names:
|
||||
return names[tag]
|
||||
|
||||
# Nothing? Return any of the other names as a default.
|
||||
return next(iter(names.values()))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_accept_languages(langstr: str) -> 'Locales':
|
||||
""" Create a localization object from a language list in the
|
||||
format of HTTP accept-languages header.
|
||||
|
||||
The functions tries to be forgiving of format errors by first splitting
|
||||
the string into comma-separated parts and then parsing each
|
||||
description separately. Badly formatted parts are then ignored.
|
||||
"""
|
||||
# split string into languages
|
||||
candidates = []
|
||||
for desc in langstr.split(','):
|
||||
m = re.fullmatch(r'\s*([a-z_-]+)(?:;\s*q\s*=\s*([01](?:\.\d+)?))?\s*',
|
||||
desc, flags=re.I)
|
||||
if m:
|
||||
candidates.append((m[1], float(m[2] or 1.0)))
|
||||
|
||||
# sort the results by the weight of each language (preserving order).
|
||||
candidates.sort(reverse=True, key=lambda e: e[1])
|
||||
|
||||
# If a language has a region variant, also add the language without
|
||||
# variant but only if it isn't already in the list to not mess up the weight.
|
||||
languages = []
|
||||
for lid, _ in candidates:
|
||||
languages.append(lid)
|
||||
parts = lid.split('-', 1)
|
||||
if len(parts) > 1 and all(c[0] != parts[0] for c in candidates):
|
||||
languages.append(parts[0])
|
||||
|
||||
return Locales(languages)
|
||||
433
src/nominatim_api/logging.py
Normal file
433
src/nominatim_api/logging.py
Normal file
@@ -0,0 +1,433 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for specialised logging with HTML output.
|
||||
"""
|
||||
from typing import Any, Iterator, Optional, List, Tuple, cast, Union, Mapping, Sequence
|
||||
from contextvars import ContextVar
|
||||
import datetime as dt
|
||||
import textwrap
|
||||
import io
|
||||
import re
|
||||
import html
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.asyncio import AsyncConnection
|
||||
|
||||
try:
|
||||
from pygments import highlight
|
||||
from pygments.lexers import PythonLexer, PostgresLexer
|
||||
from pygments.formatters import HtmlFormatter
|
||||
CODE_HIGHLIGHT = True
|
||||
except ModuleNotFoundError:
|
||||
CODE_HIGHLIGHT = False
|
||||
|
||||
|
||||
def _debug_name(res: Any) -> str:
|
||||
if res.names:
|
||||
return cast(str, res.names.get('name', next(iter(res.names.values()))))
|
||||
|
||||
return f"Hnr {res.housenumber}" if res.housenumber is not None else '[NONE]'
|
||||
|
||||
|
||||
class BaseLogger:
|
||||
""" Interface for logging function.
|
||||
|
||||
The base implementation does nothing. Overwrite the functions
|
||||
in derived classes which implement logging functionality.
|
||||
"""
|
||||
def get_buffer(self) -> str:
|
||||
""" Return the current content of the log buffer.
|
||||
"""
|
||||
return ''
|
||||
|
||||
def function(self, func: str, **kwargs: Any) -> None:
|
||||
""" Start a new debug chapter for the given function and its parameters.
|
||||
"""
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
""" Start a new section with the given title.
|
||||
"""
|
||||
|
||||
|
||||
def comment(self, text: str) -> None:
|
||||
""" Add a simple comment to the debug output.
|
||||
"""
|
||||
|
||||
|
||||
def var_dump(self, heading: str, var: Any) -> None:
|
||||
""" Print the content of the variable to the debug output prefixed by
|
||||
the given heading.
|
||||
"""
|
||||
|
||||
|
||||
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
|
||||
""" Print the table generated by the generator function.
|
||||
"""
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
""" Print a list of search results generated by the generator function.
|
||||
"""
|
||||
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
""" Print the SQL for the given statement.
|
||||
"""
|
||||
|
||||
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
extra_params: Union[Mapping[str, Any],
|
||||
Sequence[Mapping[str, Any]], None]) -> str:
|
||||
""" Return the compiled version of the statement.
|
||||
"""
|
||||
compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
|
||||
|
||||
params = dict(compiled.params)
|
||||
if isinstance(extra_params, Mapping):
|
||||
for k, v in extra_params.items():
|
||||
if hasattr(v, 'to_wkt'):
|
||||
params[k] = v.to_wkt()
|
||||
elif isinstance(v, (int, float)):
|
||||
params[k] = v
|
||||
else:
|
||||
params[k] = str(v)
|
||||
elif isinstance(extra_params, Sequence) and extra_params:
|
||||
for k in extra_params[0]:
|
||||
params[k] = f':{k}'
|
||||
|
||||
sqlstr = str(compiled)
|
||||
|
||||
if conn.dialect.name == 'postgresql':
|
||||
if sa.__version__.startswith('1'):
|
||||
try:
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
|
||||
return sqlstr % tuple((repr(params.get(name, None))
|
||||
for name in compiled.positiontup)) # type: ignore
|
||||
except TypeError:
|
||||
return sqlstr
|
||||
|
||||
# Fixes an odd issue with Python 3.7 where percentages are not
|
||||
# quoted correctly.
|
||||
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
|
||||
return sqlstr % params
|
||||
|
||||
assert conn.dialect.name == 'sqlite'
|
||||
|
||||
# params in positional order
|
||||
pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
|
||||
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
|
||||
sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
|
||||
|
||||
return sqlstr
|
||||
|
||||
class HTMLLogger(BaseLogger):
|
||||
""" Logger that formats messages in HTML.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.buffer = io.StringIO()
|
||||
|
||||
|
||||
def _timestamp(self) -> None:
|
||||
self._write(f'<p class="timestamp">[{dt.datetime.now()}]</p>')
|
||||
|
||||
|
||||
def get_buffer(self) -> str:
|
||||
return HTML_HEADER + self.buffer.getvalue() + HTML_FOOTER
|
||||
|
||||
|
||||
def function(self, func: str, **kwargs: Any) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"<h1>Debug output for {func}()</h1>\n<p>Parameters:<dl>")
|
||||
for name, value in kwargs.items():
|
||||
self._write(f'<dt>{name}</dt><dd>{self._python_var(value)}</dd>')
|
||||
self._write('</dl></p>')
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"<h2>{heading}</h2>")
|
||||
|
||||
|
||||
def comment(self, text: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"<p>{text}</p>")
|
||||
|
||||
|
||||
def var_dump(self, heading: str, var: Any) -> None:
|
||||
self._timestamp()
|
||||
if callable(var):
|
||||
var = var()
|
||||
|
||||
self._write(f'<h5>{heading}</h5>{self._python_var(var)}')
|
||||
|
||||
|
||||
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
|
||||
self._timestamp()
|
||||
head = next(rows)
|
||||
assert head
|
||||
self._write(f'<table><thead><tr><th colspan="{len(head)}">{heading}</th></tr><tr>')
|
||||
for cell in head:
|
||||
self._write(f'<th>{cell}</th>')
|
||||
self._write('</tr></thead><tbody>')
|
||||
for row in rows:
|
||||
if row is not None:
|
||||
self._write('<tr>')
|
||||
for cell in row:
|
||||
self._write(f'<td>{cell}</td>')
|
||||
self._write('</tr>')
|
||||
self._write('</tbody></table>')
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
""" Print a list of search results generated by the generator function.
|
||||
"""
|
||||
self._timestamp()
|
||||
def format_osm(osm_object: Optional[Tuple[str, int]]) -> str:
|
||||
if not osm_object:
|
||||
return '-'
|
||||
|
||||
t, i = osm_object
|
||||
if t == 'N':
|
||||
fullt = 'node'
|
||||
elif t == 'W':
|
||||
fullt = 'way'
|
||||
elif t == 'R':
|
||||
fullt = 'relation'
|
||||
else:
|
||||
return f'{t}{i}'
|
||||
|
||||
return f'<a href="https://www.openstreetmap.org/{fullt}/{i}">{t}{i}</a>'
|
||||
|
||||
self._write(f'<h5>{heading}</h5><p><dl>')
|
||||
total = 0
|
||||
for rank, res in results:
|
||||
self._write(f'<dt>[{rank:.3f}]</dt> <dd>{res.source_table.name}(')
|
||||
self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
|
||||
self._write(f"rank={res.rank_address}, ")
|
||||
self._write(f"osm={format_osm(res.osm_object)}, ")
|
||||
self._write(f'cc={res.country_code}, ')
|
||||
self._write(f'importance={res.importance or float("nan"):.5f})</dd>')
|
||||
total += 1
|
||||
self._write(f'</dl><b>TOTAL:</b> {total}</p>')
|
||||
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
self._timestamp()
|
||||
sqlstr = self.format_sql(conn, statement, params)
|
||||
if CODE_HIGHLIGHT:
|
||||
sqlstr = highlight(sqlstr, PostgresLexer(),
|
||||
HtmlFormatter(nowrap=True, lineseparator='<br />'))
|
||||
self._write(f'<div class="highlight"><code class="lang-sql">{sqlstr}</code></div>')
|
||||
else:
|
||||
self._write(f'<code class="lang-sql">{html.escape(sqlstr)}</code>')
|
||||
|
||||
|
||||
def _python_var(self, var: Any) -> str:
|
||||
if CODE_HIGHLIGHT:
|
||||
fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
|
||||
return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
|
||||
|
||||
return f'<code class="lang-python">{html.escape(str(var))}</code>'
|
||||
|
||||
|
||||
def _write(self, text: str) -> None:
|
||||
""" Add the raw text to the debug output.
|
||||
"""
|
||||
self.buffer.write(text)
|
||||
|
||||
|
||||
class TextLogger(BaseLogger):
|
||||
""" Logger creating output suitable for the console.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.buffer = io.StringIO()
|
||||
|
||||
|
||||
def _timestamp(self) -> None:
|
||||
self._write(f'[{dt.datetime.now()}]\n')
|
||||
|
||||
|
||||
def get_buffer(self) -> str:
|
||||
return self.buffer.getvalue()
|
||||
|
||||
|
||||
def function(self, func: str, **kwargs: Any) -> None:
|
||||
self._write(f"#### Debug output for {func}()\n\nParameters:\n")
|
||||
for name, value in kwargs.items():
|
||||
self._write(f' {name}: {self._python_var(value)}\n')
|
||||
self._write('\n')
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"\n# {heading}\n\n")
|
||||
|
||||
|
||||
def comment(self, text: str) -> None:
|
||||
self._write(f"{text}\n")
|
||||
|
||||
|
||||
def var_dump(self, heading: str, var: Any) -> None:
|
||||
if callable(var):
|
||||
var = var()
|
||||
|
||||
self._write(f'{heading}:\n {self._python_var(var)}\n\n')
|
||||
|
||||
|
||||
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
|
||||
self._write(f'{heading}:\n')
|
||||
data = [list(map(self._python_var, row)) if row else None for row in rows]
|
||||
assert data[0] is not None
|
||||
num_cols = len(data[0])
|
||||
|
||||
maxlens = [max(len(d[i]) for d in data if d) for i in range(num_cols)]
|
||||
tablewidth = sum(maxlens) + 3 * num_cols + 1
|
||||
row_format = '| ' +' | '.join(f'{{:<{l}}}' for l in maxlens) + ' |\n'
|
||||
self._write('-'*tablewidth + '\n')
|
||||
self._write(row_format.format(*data[0]))
|
||||
self._write('-'*tablewidth + '\n')
|
||||
for row in data[1:]:
|
||||
if row:
|
||||
self._write(row_format.format(*row))
|
||||
else:
|
||||
self._write('-'*tablewidth + '\n')
|
||||
if data[-1]:
|
||||
self._write('-'*tablewidth + '\n')
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
self._timestamp()
|
||||
self._write(f'{heading}:\n')
|
||||
total = 0
|
||||
for rank, res in results:
|
||||
self._write(f'[{rank:.3f}] {res.source_table.name}(')
|
||||
self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
|
||||
self._write(f"rank={res.rank_address}, ")
|
||||
self._write(f"osm={''.join(map(str, res.osm_object or []))}, ")
|
||||
self._write(f'cc={res.country_code}, ')
|
||||
self._write(f'importance={res.importance or -1:.5f})\n')
|
||||
total += 1
|
||||
self._write(f'TOTAL: {total}\n\n')
|
||||
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
self._timestamp()
|
||||
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
|
||||
self._write(f"| {sqlstr}\n\n")
|
||||
|
||||
|
||||
def _python_var(self, var: Any) -> str:
|
||||
return str(var)
|
||||
|
||||
|
||||
def _write(self, text: str) -> None:
|
||||
self.buffer.write(text)
|
||||
|
||||
|
||||
logger: ContextVar[BaseLogger] = ContextVar('logger', default=BaseLogger())
|
||||
|
||||
|
||||
def set_log_output(fmt: str) -> None:
|
||||
""" Enable collecting debug information.
|
||||
"""
|
||||
if fmt == 'html':
|
||||
logger.set(HTMLLogger())
|
||||
elif fmt == 'text':
|
||||
logger.set(TextLogger())
|
||||
else:
|
||||
logger.set(BaseLogger())
|
||||
|
||||
|
||||
def log() -> BaseLogger:
|
||||
""" Return the logger for the current context.
|
||||
"""
|
||||
return logger.get()
|
||||
|
||||
|
||||
def get_and_disable() -> str:
|
||||
""" Return the current content of the debug buffer and disable logging.
|
||||
"""
|
||||
buf = logger.get().get_buffer()
|
||||
logger.set(BaseLogger())
|
||||
return buf
|
||||
|
||||
|
||||
HTML_HEADER: str = """<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Nominatim - Debug</title>
|
||||
<style>
|
||||
""" + \
|
||||
(HtmlFormatter(nobackground=True).get_style_defs('.highlight') if CODE_HIGHLIGHT else '') +\
|
||||
"""
|
||||
h2 { font-size: x-large }
|
||||
|
||||
dl {
|
||||
padding-left: 10pt;
|
||||
font-family: monospace
|
||||
}
|
||||
|
||||
dt {
|
||||
float: left;
|
||||
font-weight: bold;
|
||||
margin-right: 0.5em
|
||||
}
|
||||
|
||||
dt::after { content: ": "; }
|
||||
|
||||
dd::after {
|
||||
clear: left;
|
||||
display: block
|
||||
}
|
||||
|
||||
.lang-sql {
|
||||
color: #555;
|
||||
font-size: small
|
||||
}
|
||||
|
||||
h5 {
|
||||
border: solid lightgrey 0.1pt;
|
||||
margin-bottom: 0;
|
||||
background-color: #f7f7f7
|
||||
}
|
||||
|
||||
h5 + .highlight {
|
||||
padding: 3pt;
|
||||
border: solid lightgrey 0.1pt
|
||||
}
|
||||
|
||||
table, th, tbody {
|
||||
border: thin solid;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td {
|
||||
border-right: thin solid;
|
||||
padding-left: 3pt;
|
||||
padding-right: 3pt;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
font-size: 0.8em;
|
||||
color: darkblue;
|
||||
width: calc(100% - 5pt);
|
||||
text-align: right;
|
||||
position: absolute;
|
||||
left: 0;
|
||||
margin-top: -5px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
"""
|
||||
|
||||
HTML_FOOTER: str = "</body></html>"
|
||||
250
src/nominatim_api/lookup.py
Normal file
250
src/nominatim_api/lookup.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of place lookup by ID.
|
||||
"""
|
||||
from typing import Optional, Callable, Tuple, Type
|
||||
import datetime as dt
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaColumn, SaRow, SaSelect
|
||||
from .connection import SearchConnection
|
||||
from .logging import log
|
||||
from . import types as ntyp
|
||||
from . import results as nres
|
||||
|
||||
RowFunc = Callable[[Optional[SaRow], Type[nres.BaseResultT]], Optional[nres.BaseResultT]]
|
||||
|
||||
GeomFunc = Callable[[SaSelect, SaColumn], SaSelect]
|
||||
|
||||
|
||||
async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the placex table and return the
|
||||
base information.
|
||||
"""
|
||||
log().section("Find in placex table")
|
||||
t = conn.t.placex
|
||||
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.admin_level,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia, t.c.indexed_date,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'),
|
||||
t.c.centroid)
|
||||
|
||||
if isinstance(place, ntyp.PlaceID):
|
||||
sql = sql.where(t.c.place_id == place.place_id)
|
||||
elif isinstance(place, ntyp.OsmID):
|
||||
sql = sql.where(t.c.osm_type == place.osm_type)\
|
||||
.where(t.c.osm_id == place.osm_id)
|
||||
if place.osm_class:
|
||||
sql = sql.where(t.c.class_ == place.osm_class)
|
||||
else:
|
||||
sql = sql.order_by(t.c.class_)
|
||||
sql = sql.limit(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the osmline table and return the
|
||||
base information.
|
||||
"""
|
||||
log().section("Find in interpolation table")
|
||||
t = conn.t.osmline
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id,
|
||||
t.c.indexed_date, t.c.startnumber, t.c.endnumber,
|
||||
t.c.step, t.c.address, t.c.postcode, t.c.country_code,
|
||||
t.c.linegeo.ST_Centroid().label('centroid'))
|
||||
|
||||
if isinstance(place, ntyp.PlaceID):
|
||||
sql = sql.where(t.c.place_id == place.place_id)
|
||||
elif isinstance(place, ntyp.OsmID) and place.osm_type == 'W':
|
||||
# There may be multiple interpolations for a single way.
|
||||
# If 'class' contains a number, return the one that belongs to that number.
|
||||
sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
|
||||
if place.osm_class and place.osm_class.isdigit():
|
||||
sql = sql.order_by(sa.func.greatest(0,
|
||||
int(place.osm_class) - t.c.endnumber,
|
||||
t.c.startnumber - int(place.osm_class)))
|
||||
else:
|
||||
return None
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_tiger(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the table of Tiger addresses and return
|
||||
the base information. Only lookup by place ID is supported.
|
||||
"""
|
||||
if not isinstance(place, ntyp.PlaceID):
|
||||
return None
|
||||
|
||||
log().section("Find in TIGER table")
|
||||
t = conn.t.tiger
|
||||
parent = conn.t.placex
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
parent.c.osm_type, parent.c.osm_id,
|
||||
t.c.startnumber, t.c.endnumber, t.c.step,
|
||||
t.c.postcode,
|
||||
t.c.linegeo.ST_Centroid().label('centroid'))\
|
||||
.where(t.c.place_id == place.place_id)\
|
||||
.join(parent, t.c.parent_place_id == parent.c.place_id, isouter=True)
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_postcode(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the postcode table and return the
|
||||
base information. Only lookup by place ID is supported.
|
||||
"""
|
||||
if not isinstance(place, ntyp.PlaceID):
|
||||
return None
|
||||
|
||||
log().section("Find in postcode table")
|
||||
t = conn.t.postcode
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.rank_search, t.c.rank_address,
|
||||
t.c.indexed_date, t.c.postcode, t.c.country_code,
|
||||
t.c.geometry.label('centroid')) \
|
||||
.where(t.c.place_id == place.place_id)
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_all_tables(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc
|
||||
) -> Tuple[Optional[SaRow], RowFunc[nres.BaseResultT]]:
|
||||
""" Search for the given place in all data tables
|
||||
and return the base information.
|
||||
"""
|
||||
row = await find_in_placex(conn, place, add_geometries)
|
||||
log().var_dump('Result (placex)', row)
|
||||
if row is not None:
|
||||
return row, nres.create_from_placex_row
|
||||
|
||||
row = await find_in_osmline(conn, place, add_geometries)
|
||||
log().var_dump('Result (osmline)', row)
|
||||
if row is not None:
|
||||
return row, nres.create_from_osmline_row
|
||||
|
||||
row = await find_in_postcode(conn, place, add_geometries)
|
||||
log().var_dump('Result (postcode)', row)
|
||||
if row is not None:
|
||||
return row, nres.create_from_postcode_row
|
||||
|
||||
row = await find_in_tiger(conn, place, add_geometries)
|
||||
log().var_dump('Result (tiger)', row)
|
||||
return row, nres.create_from_tiger_row
|
||||
|
||||
|
||||
async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
details: ntyp.LookupDetails) -> Optional[nres.DetailedResult]:
|
||||
""" Retrieve a place with additional details from the database.
|
||||
"""
|
||||
log().function('get_detailed_place', place=place, details=details)
|
||||
|
||||
if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON:
|
||||
raise ValueError("lookup only supports geojosn polygon output.")
|
||||
|
||||
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
|
||||
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
|
||||
return sql.add_columns(sa.func.ST_AsGeoJSON(
|
||||
sa.case((sa.func.ST_NPoints(column) > 5000,
|
||||
sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
|
||||
else_=column), 7).label('geometry_geojson'))
|
||||
else:
|
||||
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
|
||||
return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
|
||||
|
||||
row_func: RowFunc[nres.DetailedResult]
|
||||
row, row_func = await find_in_all_tables(conn, place, _add_geometry)
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
result = row_func(row, nres.DetailedResult)
|
||||
assert result is not None
|
||||
|
||||
# add missing details
|
||||
assert result is not None
|
||||
if 'type' in result.geometry:
|
||||
result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
|
||||
result.geometry['type'])
|
||||
indexed_date = getattr(row, 'indexed_date', None)
|
||||
if indexed_date is not None:
|
||||
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
await nres.add_result_details(conn, [result], details)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
|
||||
""" Retrieve a place as a simple search result from the database.
|
||||
"""
|
||||
log().function('get_simple_place', place=place, details=details)
|
||||
|
||||
def _add_geometry(sql: SaSelect, col: SaColumn) -> SaSelect:
|
||||
if not details.geometry_output:
|
||||
return sql
|
||||
|
||||
out = []
|
||||
|
||||
if details.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
row_func: RowFunc[nres.SearchResult]
|
||||
row, row_func = await find_in_all_tables(conn, place, _add_geometry)
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
result = row_func(row, nres.SearchResult)
|
||||
assert result is not None
|
||||
|
||||
# add missing details
|
||||
assert result is not None
|
||||
if hasattr(row, 'bbox'):
|
||||
result.bbox = ntyp.Bbox.from_wkb(row.bbox)
|
||||
|
||||
await nres.add_result_details(conn, [result], details)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
GEOMETRY_TYPE_MAP = {
|
||||
'POINT': 'ST_Point',
|
||||
'MULTIPOINT': 'ST_MultiPoint',
|
||||
'LINESTRING': 'ST_LineString',
|
||||
'MULTILINESTRING': 'ST_MultiLineString',
|
||||
'POLYGON': 'ST_Polygon',
|
||||
'MULTIPOLYGON': 'ST_MultiPolygon',
|
||||
'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
|
||||
}
|
||||
0
src/nominatim_api/py.typed
Normal file
0
src/nominatim_api/py.typed
Normal file
56
src/nominatim_api/result_formatting.py
Normal file
56
src/nominatim_api/result_formatting.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper classes and functions for formatting results into API responses.
|
||||
"""
|
||||
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping
|
||||
from collections import defaultdict
|
||||
|
||||
T = TypeVar('T') # pylint: disable=invalid-name
|
||||
FormatFunc = Callable[[T, Mapping[str, Any]], str]
|
||||
|
||||
|
||||
class FormatDispatcher:
|
||||
""" Helper class to conveniently create formatting functions in
|
||||
a module using decorators.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.format_functions: Dict[Type[Any], Dict[str, FormatFunc[Any]]] = defaultdict(dict)
|
||||
|
||||
|
||||
def format_func(self, result_class: Type[T],
|
||||
fmt: str) -> Callable[[FormatFunc[T]], FormatFunc[T]]:
|
||||
""" Decorator for a function that formats a given type of result into the
|
||||
selected format.
|
||||
"""
|
||||
def decorator(func: FormatFunc[T]) -> FormatFunc[T]:
|
||||
self.format_functions[result_class][fmt] = func
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def list_formats(self, result_type: Type[Any]) -> List[str]:
|
||||
""" Return a list of formats supported by this formatter.
|
||||
"""
|
||||
return list(self.format_functions[result_type].keys())
|
||||
|
||||
|
||||
def supports_format(self, result_type: Type[Any], fmt: str) -> bool:
|
||||
""" Check if the given format is supported by this formatter.
|
||||
"""
|
||||
return fmt in self.format_functions[result_type]
|
||||
|
||||
|
||||
def format_result(self, result: Any, fmt: str, options: Mapping[str, Any]) -> str:
|
||||
""" Convert the given result into a string using the given format.
|
||||
|
||||
The format is expected to be in the list returned by
|
||||
`list_formats()`.
|
||||
"""
|
||||
return self.format_functions[type(result)][fmt](result, options)
|
||||
752
src/nominatim_api/results.py
Normal file
752
src/nominatim_api/results.py
Normal file
@@ -0,0 +1,752 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Dataclasses for search results and helper functions to fill them.
|
||||
|
||||
Data classes are part of the public API while the functions are for
|
||||
internal use only. That's why they are implemented as free-standing functions
|
||||
instead of member functions.
|
||||
"""
|
||||
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
|
||||
import enum
|
||||
import dataclasses
|
||||
import datetime as dt
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaSelect, SaRow
|
||||
from nominatim_core.db.sqlalchemy_types import Geometry
|
||||
from .types import Point, Bbox, LookupDetails
|
||||
from .connection import SearchConnection
|
||||
from .logging import log
|
||||
from .localization import Locales
|
||||
|
||||
# This file defines complex result data classes.
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
|
||||
def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
|
||||
""" Mix-in names from linked places, so that they show up
|
||||
as standard names where necessary.
|
||||
"""
|
||||
if not names:
|
||||
return None
|
||||
|
||||
out = {}
|
||||
for k, v in names.items():
|
||||
if k.startswith('_place_'):
|
||||
outkey = k[7:]
|
||||
out[k if outkey in names else outkey] = v
|
||||
else:
|
||||
out[k] = v
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class SourceTable(enum.Enum):
|
||||
""" The `SourceTable` type lists the possible sources a result can have.
|
||||
"""
|
||||
PLACEX = 1
|
||||
""" The placex table is the main source for result usually containing
|
||||
OSM data.
|
||||
"""
|
||||
OSMLINE = 2
|
||||
""" The osmline table contains address interpolations from OSM data.
|
||||
Interpolation addresses are always approximate. The OSM id in the
|
||||
result refers to the OSM way with the interpolation line object.
|
||||
"""
|
||||
TIGER = 3
|
||||
""" TIGER address data contains US addresses imported on the side,
|
||||
see [Installing TIGER data](../customize/Tiger.md).
|
||||
TIGER address are also interpolations. The addresses always refer
|
||||
to a street from OSM data. The OSM id in the result refers to
|
||||
that street.
|
||||
"""
|
||||
POSTCODE = 4
|
||||
""" The postcode table contains artificial centroids for postcodes,
|
||||
computed from the postcodes available with address points. Results
|
||||
are always approximate.
|
||||
"""
|
||||
COUNTRY = 5
|
||||
""" The country table provides a fallback, when country data is missing
|
||||
in the OSM data.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AddressLine:
|
||||
""" The `AddressLine` may contain the following fields about a related place
|
||||
and its function as an address object. Most fields are optional.
|
||||
Their presence depends on the kind and function of the address part.
|
||||
"""
|
||||
category: Tuple[str, str]
|
||||
""" Main category of the place, described by a key-value pair.
|
||||
"""
|
||||
names: Dict[str, str]
|
||||
""" All available names for the place including references, alternative
|
||||
names and translations.
|
||||
"""
|
||||
fromarea: bool
|
||||
""" If true, then the exact area of the place is known. Without area
|
||||
information, Nominatim has to make an educated guess if an address
|
||||
belongs to one place or another.
|
||||
"""
|
||||
isaddress: bool
|
||||
""" If true, this place should be considered for the final address display.
|
||||
Nominatim will sometimes include more than one candidate for
|
||||
the address in the list when it cannot reliably determine where the
|
||||
place belongs. It will consider names of all candidates when searching
|
||||
but when displaying the result, only the most likely candidate should
|
||||
be shown.
|
||||
"""
|
||||
rank_address: int
|
||||
""" [Address rank](../customize/Ranking.md#address-rank) of the place.
|
||||
"""
|
||||
distance: float
|
||||
""" Distance in degrees between the result place and this address part.
|
||||
"""
|
||||
place_id: Optional[int] = None
|
||||
""" Internal ID of the place.
|
||||
"""
|
||||
osm_object: Optional[Tuple[str, int]] = None
|
||||
""" OSM type and ID of the place, if such an object exists.
|
||||
"""
|
||||
extratags: Optional[Dict[str, str]] = None
|
||||
""" Any extra information available about the place. This is a dictionary
|
||||
that usually contains OSM tag key-value pairs.
|
||||
"""
|
||||
|
||||
admin_level: Optional[int] = None
|
||||
""" The administrative level of a boundary as tagged in the input data.
|
||||
This field is only meaningful for places of the category
|
||||
(boundary, administrative).
|
||||
"""
|
||||
|
||||
local_name: Optional[str] = None
|
||||
""" Place holder for localization of this address part. See
|
||||
[Localization](#localization) below.
|
||||
"""
|
||||
|
||||
|
||||
class AddressLines(List[AddressLine]):
|
||||
""" Sequence of address lines order in descending order by their rank.
|
||||
"""
|
||||
|
||||
def localize(self, locales: Locales) -> List[str]:
|
||||
""" Set the local name of address parts according to the chosen
|
||||
locale. Return the list of local names without duplicates.
|
||||
|
||||
Only address parts that are marked as isaddress are localized
|
||||
and returned.
|
||||
"""
|
||||
label_parts: List[str] = []
|
||||
|
||||
for line in self:
|
||||
if line.isaddress and line.names:
|
||||
line.local_name = locales.display_name(line.names)
|
||||
if not label_parts or label_parts[-1] != line.local_name:
|
||||
label_parts.append(line.local_name)
|
||||
|
||||
return label_parts
|
||||
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WordInfo:
|
||||
""" Each entry in the list of search terms contains the
|
||||
following detailed information.
|
||||
"""
|
||||
word_id: int
|
||||
""" Internal identifier for the word.
|
||||
"""
|
||||
word_token: str
|
||||
""" Normalised and transliterated form of the word.
|
||||
This form is used for searching.
|
||||
"""
|
||||
word: Optional[str] = None
|
||||
""" Untransliterated form, if available.
|
||||
"""
|
||||
|
||||
|
||||
WordInfos = Sequence[WordInfo]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BaseResult:
|
||||
""" Data class collecting information common to all
|
||||
types of search results.
|
||||
"""
|
||||
source_table: SourceTable
|
||||
category: Tuple[str, str]
|
||||
centroid: Point
|
||||
|
||||
place_id : Optional[int] = None
|
||||
osm_object: Optional[Tuple[str, int]] = None
|
||||
parent_place_id: Optional[int] = None
|
||||
linked_place_id: Optional[int] = None
|
||||
admin_level: int = 15
|
||||
|
||||
locale_name: Optional[str] = None
|
||||
display_name: Optional[str] = None
|
||||
|
||||
names: Optional[Dict[str, str]] = None
|
||||
address: Optional[Dict[str, str]] = None
|
||||
extratags: Optional[Dict[str, str]] = None
|
||||
|
||||
housenumber: Optional[str] = None
|
||||
postcode: Optional[str] = None
|
||||
wikipedia: Optional[str] = None
|
||||
|
||||
rank_address: int = 30
|
||||
rank_search: int = 30
|
||||
importance: Optional[float] = None
|
||||
|
||||
country_code: Optional[str] = None
|
||||
|
||||
address_rows: Optional[AddressLines] = None
|
||||
linked_rows: Optional[AddressLines] = None
|
||||
parented_rows: Optional[AddressLines] = None
|
||||
name_keywords: Optional[WordInfos] = None
|
||||
address_keywords: Optional[WordInfos] = None
|
||||
|
||||
geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def lat(self) -> float:
|
||||
""" Get the latitude (or y) of the center point of the place.
|
||||
"""
|
||||
return self.centroid[1]
|
||||
|
||||
|
||||
@property
|
||||
def lon(self) -> float:
|
||||
""" Get the longitude (or x) of the center point of the place.
|
||||
"""
|
||||
return self.centroid[0]
|
||||
|
||||
|
||||
def calculated_importance(self) -> float:
|
||||
""" Get a valid importance value. This is either the stored importance
|
||||
of the value or an artificial value computed from the place's
|
||||
search rank.
|
||||
"""
|
||||
return self.importance or (0.40001 - (self.rank_search/75.0))
|
||||
|
||||
|
||||
def localize(self, locales: Locales) -> None:
|
||||
""" Fill the locale_name and the display_name field for the
|
||||
place and, if available, its address information.
|
||||
"""
|
||||
self.locale_name = locales.display_name(self.names)
|
||||
if self.address_rows:
|
||||
self.display_name = ', '.join(self.address_rows.localize(locales))
|
||||
else:
|
||||
self.display_name = self.locale_name
|
||||
|
||||
|
||||
|
||||
BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
|
||||
|
||||
@dataclasses.dataclass
|
||||
class DetailedResult(BaseResult):
|
||||
""" A search result with more internal information from the database
|
||||
added.
|
||||
"""
|
||||
indexed_date: Optional[dt.datetime] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReverseResult(BaseResult):
|
||||
""" A search result for reverse geocoding.
|
||||
"""
|
||||
distance: Optional[float] = None
|
||||
bbox: Optional[Bbox] = None
|
||||
|
||||
|
||||
class ReverseResults(List[ReverseResult]):
|
||||
""" Sequence of reverse lookup results ordered by distance.
|
||||
May be empty when no result was found.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SearchResult(BaseResult):
|
||||
""" A search result for forward geocoding.
|
||||
"""
|
||||
bbox: Optional[Bbox] = None
|
||||
accuracy: float = 0.0
|
||||
|
||||
|
||||
@property
|
||||
def ranking(self) -> float:
|
||||
""" Return the ranking, a combined measure of accuracy and importance.
|
||||
"""
|
||||
return (self.accuracy if self.accuracy is not None else 1) \
|
||||
- self.calculated_importance()
|
||||
|
||||
|
||||
class SearchResults(List[SearchResult]):
|
||||
""" Sequence of forward lookup results ordered by relevance.
|
||||
May be empty when no result was found.
|
||||
"""
|
||||
|
||||
|
||||
def _filter_geometries(row: SaRow) -> Dict[str, str]:
|
||||
return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
|
||||
if k.startswith('geometry_')}
|
||||
|
||||
|
||||
def create_from_placex_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the placex table. 'class_type' defines the type of result
|
||||
to return. Returns None if the row is None.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return class_type(source_table=SourceTable.PLACEX,
|
||||
place_id=row.place_id,
|
||||
osm_object=(row.osm_type, row.osm_id),
|
||||
category=(row.class_, row.type),
|
||||
parent_place_id = row.parent_place_id,
|
||||
linked_place_id = getattr(row, 'linked_place_id', None),
|
||||
admin_level = getattr(row, 'admin_level', 15),
|
||||
names=_mingle_name_tags(row.name),
|
||||
address=row.address,
|
||||
extratags=row.extratags,
|
||||
housenumber=row.housenumber,
|
||||
postcode=row.postcode,
|
||||
wikipedia=row.wikipedia,
|
||||
rank_address=row.rank_address,
|
||||
rank_search=row.rank_search,
|
||||
importance=row.importance,
|
||||
country_code=row.country_code,
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
def create_from_osmline_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the address interpolation table osmline. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
|
||||
If the row contains a housenumber, then the housenumber is filled out.
|
||||
Otherwise the result contains the interpolation information in extratags.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
hnr = getattr(row, 'housenumber', None)
|
||||
|
||||
res = class_type(source_table=SourceTable.OSMLINE,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
osm_object=('W', row.osm_id),
|
||||
category=('place', 'houses' if hnr is None else 'house'),
|
||||
address=row.address,
|
||||
postcode=row.postcode,
|
||||
country_code=row.country_code,
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
if hnr is None:
|
||||
res.extratags = {'startnumber': str(row.startnumber),
|
||||
'endnumber': str(row.endnumber),
|
||||
'step': str(row.step)}
|
||||
else:
|
||||
res.housenumber = str(hnr)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def create_from_tiger_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT],
|
||||
osm_type: Optional[str] = None,
|
||||
osm_id: Optional[int] = None) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the Tiger data interpolation table. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
|
||||
If the row contains a housenumber, then the housenumber is filled out.
|
||||
Otherwise the result contains the interpolation information in extratags.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
hnr = getattr(row, 'housenumber', None)
|
||||
|
||||
res = class_type(source_table=SourceTable.TIGER,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
|
||||
category=('place', 'houses' if hnr is None else 'house'),
|
||||
postcode=row.postcode,
|
||||
country_code='us',
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
if hnr is None:
|
||||
res.extratags = {'startnumber': str(row.startnumber),
|
||||
'endnumber': str(row.endnumber),
|
||||
'step': str(row.step)}
|
||||
else:
|
||||
res.housenumber = str(hnr)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def create_from_postcode_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the postcode table. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return class_type(source_table=SourceTable.POSTCODE,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
category=('place', 'postcode'),
|
||||
names={'ref': row.postcode},
|
||||
rank_search=row.rank_search,
|
||||
rank_address=row.rank_address,
|
||||
country_code=row.country_code,
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
def create_from_country_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the fallback country tables. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return class_type(source_table=SourceTable.COUNTRY,
|
||||
category=('place', 'country'),
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
names=row.name,
|
||||
rank_address=4, rank_search=4,
|
||||
country_code=row.country_code,
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
|
||||
details: LookupDetails) -> None:
|
||||
""" Retrieve more details from the database according to the
|
||||
parameters specified in 'details'.
|
||||
"""
|
||||
if results:
|
||||
log().section('Query details for result')
|
||||
if details.address_details:
|
||||
log().comment('Query address details')
|
||||
await complete_address_details(conn, results)
|
||||
if details.linked_places:
|
||||
log().comment('Query linked places')
|
||||
for result in results:
|
||||
await complete_linked_places(conn, result)
|
||||
if details.parented_places:
|
||||
log().comment('Query parent places')
|
||||
for result in results:
|
||||
await complete_parented_places(conn, result)
|
||||
if details.keywords:
|
||||
log().comment('Query keywords')
|
||||
for result in results:
|
||||
await complete_keywords(conn, result)
|
||||
for result in results:
|
||||
result.localize(details.locales)
|
||||
|
||||
|
||||
def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
|
||||
""" Create a new AddressLine from the results of a database query.
|
||||
"""
|
||||
extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
|
||||
if 'linked_place' in extratags:
|
||||
extratags['place'] = extratags['linked_place']
|
||||
|
||||
names = _mingle_name_tags(row.name) or {}
|
||||
if getattr(row, 'housenumber', None) is not None:
|
||||
names['housenumber'] = row.housenumber
|
||||
|
||||
if isaddress is None:
|
||||
isaddress = getattr(row, 'isaddress', True)
|
||||
|
||||
return AddressLine(place_id=row.place_id,
|
||||
osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
|
||||
category=(getattr(row, 'class'), row.type),
|
||||
names=names,
|
||||
extratags=extratags,
|
||||
admin_level=row.admin_level,
|
||||
fromarea=row.fromarea,
|
||||
isaddress=isaddress,
|
||||
rank_address=row.rank_address,
|
||||
distance=row.distance)
|
||||
|
||||
|
||||
def _get_address_lookup_id(result: BaseResultT) -> int:
|
||||
assert result.place_id
|
||||
if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
|
||||
return result.parent_place_id or result.place_id
|
||||
|
||||
return result.linked_place_id or result.place_id
|
||||
|
||||
|
||||
async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
|
||||
assert result.address_rows is not None
|
||||
if result.category[0] not in ('boundary', 'place')\
|
||||
or result.category[1] not in ('postal_code', 'postcode'):
|
||||
postcode = result.postcode
|
||||
if not postcode and result.address:
|
||||
postcode = result.address.get('postcode')
|
||||
if postcode and ',' not in postcode and ';' not in postcode:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'postcode'),
|
||||
names={'ref': postcode},
|
||||
fromarea=False, isaddress=True, rank_address=5,
|
||||
distance=0.0))
|
||||
if result.country_code:
|
||||
async def _get_country_names() -> Optional[Dict[str, str]]:
|
||||
t = conn.t.country_name
|
||||
sql = sa.select(t.c.name, t.c.derived_name)\
|
||||
.where(t.c.country_code == result.country_code)
|
||||
for cres in await conn.execute(sql):
|
||||
names = cast(Dict[str, str], cres[0])
|
||||
if cres[1]:
|
||||
names.update(cast(Dict[str, str], cres[1]))
|
||||
return names
|
||||
return None
|
||||
|
||||
country_names = await conn.get_cached_value('COUNTRY_NAME',
|
||||
result.country_code,
|
||||
_get_country_names)
|
||||
if country_names:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'country'),
|
||||
names=country_names,
|
||||
fromarea=False, isaddress=True, rank_address=4,
|
||||
distance=0.0))
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'country_code'),
|
||||
names={'ref': result.country_code}, extratags = {},
|
||||
fromarea=True, isaddress=False, rank_address=4,
|
||||
distance=0.0))
|
||||
|
||||
|
||||
def _setup_address_details(result: BaseResultT) -> None:
|
||||
""" Retrieve information about places that make up the address of the result.
|
||||
"""
|
||||
result.address_rows = AddressLines()
|
||||
if result.names:
|
||||
result.address_rows.append(AddressLine(
|
||||
place_id=result.place_id,
|
||||
osm_object=result.osm_object,
|
||||
category=result.category,
|
||||
names=result.names,
|
||||
extratags=result.extratags or {},
|
||||
admin_level=result.admin_level,
|
||||
fromarea=True, isaddress=True,
|
||||
rank_address=result.rank_address, distance=0.0))
|
||||
if result.source_table == SourceTable.PLACEX and result.address:
|
||||
housenumber = result.address.get('housenumber')\
|
||||
or result.address.get('streetnumber')\
|
||||
or result.address.get('conscriptionnumber')
|
||||
elif result.housenumber:
|
||||
housenumber = result.housenumber
|
||||
else:
|
||||
housenumber = None
|
||||
if housenumber:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'house_number'),
|
||||
names={'ref': housenumber},
|
||||
fromarea=True, isaddress=True, rank_address=28, distance=0))
|
||||
if result.address and '_unlisted_place' in result.address:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'locality'),
|
||||
names={'name': result.address['_unlisted_place']},
|
||||
fromarea=False, isaddress=True, rank_address=25, distance=0))
|
||||
|
||||
|
||||
async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
|
||||
""" Retrieve information about places that make up the address of the result.
|
||||
"""
|
||||
for result in results:
|
||||
_setup_address_details(result)
|
||||
|
||||
### Lookup entries from place_address line
|
||||
|
||||
lookup_ids = [{'pid': r.place_id,
|
||||
'lid': _get_address_lookup_id(r),
|
||||
'names': list(r.address.values()) if r.address else [],
|
||||
'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
|
||||
for r in results if r.place_id]
|
||||
|
||||
if not lookup_ids:
|
||||
return
|
||||
|
||||
ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
|
||||
t = conn.t.placex
|
||||
taddr = conn.t.addressline
|
||||
|
||||
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
|
||||
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.extratags,
|
||||
t.c.admin_level, taddr.c.fromarea,
|
||||
sa.case((t.c.rank_address == 11, 5),
|
||||
else_=t.c.rank_address).label('rank_address'),
|
||||
taddr.c.distance, t.c.country_code, t.c.postcode)\
|
||||
.join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
|
||||
taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
|
||||
.join(t, taddr.c.address_place_id == t.c.place_id)\
|
||||
.order_by('src_place_id')\
|
||||
.order_by(sa.column('rank_address').desc())\
|
||||
.order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
|
||||
.order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
|
||||
(taddr.c.isaddress, 0),
|
||||
(sa.and_(taddr.c.fromarea,
|
||||
t.c.geometry.ST_Contains(
|
||||
sa.func.ST_GeomFromEWKT(
|
||||
ltab.c.value['c'].as_string()))), 1),
|
||||
else_=-1).desc())\
|
||||
.order_by(taddr.c.fromarea.desc())\
|
||||
.order_by(taddr.c.distance.desc())\
|
||||
.order_by(t.c.rank_search.desc())
|
||||
|
||||
|
||||
current_result = None
|
||||
current_rank_address = -1
|
||||
for row in await conn.execute(sql):
|
||||
if current_result is None or row.src_place_id != current_result.place_id:
|
||||
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
|
||||
assert current_result is not None
|
||||
current_rank_address = -1
|
||||
|
||||
location_isaddress = row.rank_address != current_rank_address
|
||||
|
||||
if current_result.country_code is None and row.country_code:
|
||||
current_result.country_code = row.country_code
|
||||
|
||||
if row.type in ('postcode', 'postal_code') and location_isaddress:
|
||||
if not row.fromarea or \
|
||||
(current_result.address and 'postcode' in current_result.address):
|
||||
location_isaddress = False
|
||||
else:
|
||||
current_result.postcode = None
|
||||
|
||||
assert current_result.address_rows is not None
|
||||
current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
|
||||
current_rank_address = row.rank_address
|
||||
|
||||
for result in results:
|
||||
await _finalize_entry(conn, result)
|
||||
|
||||
|
||||
### Finally add the record for the parent entry where necessary.
|
||||
|
||||
parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
|
||||
if parent_lookup_ids:
|
||||
ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
|
||||
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.extratags,
|
||||
t.c.admin_level,
|
||||
t.c.rank_address)\
|
||||
.where(t.c.place_id == ltab.c.value['lid'].as_integer())
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
|
||||
assert current_result is not None
|
||||
assert current_result.address_rows is not None
|
||||
|
||||
current_result.address_rows.append(AddressLine(
|
||||
place_id=row.place_id,
|
||||
osm_object=(row.osm_type, row.osm_id),
|
||||
category=(row.class_, row.type),
|
||||
names=row.name, extratags=row.extratags or {},
|
||||
admin_level=row.admin_level,
|
||||
fromarea=True, isaddress=True,
|
||||
rank_address=row.rank_address, distance=0.0))
|
||||
|
||||
### Now sort everything
|
||||
def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
|
||||
return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
|
||||
|
||||
for result in results:
|
||||
assert result.address_rows is not None
|
||||
result.address_rows.sort(key=mk_sort_key(result.place_id))
|
||||
|
||||
|
||||
def _placex_select_address_row(conn: SearchConnection,
|
||||
centroid: Point) -> SaSelect:
|
||||
t = conn.t.placex
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_.label('class'), t.c.type,
|
||||
t.c.admin_level, t.c.housenumber,
|
||||
t.c.geometry.is_area().label('fromarea'),
|
||||
t.c.rank_address,
|
||||
t.c.geometry.distance_spheroid(
|
||||
sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
|
||||
|
||||
|
||||
async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
|
||||
""" Retrieve information about places that link to the result.
|
||||
"""
|
||||
result.linked_rows = AddressLines()
|
||||
if result.source_table != SourceTable.PLACEX:
|
||||
return
|
||||
|
||||
sql = _placex_select_address_row(conn, result.centroid)\
|
||||
.where(conn.t.placex.c.linked_place_id == result.place_id)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result.linked_rows.append(_result_row_to_address_row(row))
|
||||
|
||||
|
||||
async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
|
||||
""" Retrieve information about the search terms used for this place.
|
||||
|
||||
Requires that the query analyzer was initialised to get access to
|
||||
the word table.
|
||||
"""
|
||||
t = conn.t.search_name
|
||||
sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
|
||||
.where(t.c.place_id == result.place_id)
|
||||
|
||||
result.name_keywords = []
|
||||
result.address_keywords = []
|
||||
|
||||
t = conn.t.meta.tables['word']
|
||||
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
|
||||
|
||||
for name_tokens, address_tokens in await conn.execute(sql):
|
||||
for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
|
||||
result.name_keywords.append(WordInfo(*row))
|
||||
|
||||
for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
|
||||
result.address_keywords.append(WordInfo(*row))
|
||||
|
||||
|
||||
async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
|
||||
""" Retrieve information about places that the result provides the
|
||||
address for.
|
||||
"""
|
||||
result.parented_rows = AddressLines()
|
||||
if result.source_table != SourceTable.PLACEX:
|
||||
return
|
||||
|
||||
sql = _placex_select_address_row(conn, result.centroid)\
|
||||
.where(conn.t.placex.c.parent_place_id == result.place_id)\
|
||||
.where(conn.t.placex.c.rank_search == 30)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result.parented_rows.append(_result_row_to_address_row(row))
|
||||
603
src/nominatim_api/reverse.py
Normal file
603
src/nominatim_api/reverse.py
Normal file
@@ -0,0 +1,603 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of reverse geocoding.
|
||||
"""
|
||||
from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
|
||||
import functools
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
|
||||
SaBind, SaLambdaSelect
|
||||
from nominatim_core.db.sqlalchemy_types import Geometry
|
||||
from .connection import SearchConnection
|
||||
from . import results as nres
|
||||
from .logging import log
|
||||
from .types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
|
||||
|
||||
# In SQLAlchemy expression which compare with NULL need to be expressed with
|
||||
# the equal sign.
|
||||
# pylint: disable=singleton-comparison
|
||||
|
||||
RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
|
||||
|
||||
WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
|
||||
|
||||
|
||||
def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
|
||||
""" Create a select statement with the columns relevant for reverse
|
||||
results.
|
||||
"""
|
||||
if not use_wkt:
|
||||
distance = t.c.distance
|
||||
centroid = t.c.centroid
|
||||
else:
|
||||
distance = t.c.geometry.ST_Distance(WKT_PARAM)
|
||||
centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
|
||||
else_=t.c.centroid).label('centroid')
|
||||
|
||||
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
centroid,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
distance.label('distance'),
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
|
||||
def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
|
||||
return sa.cast(table.c.startnumber
|
||||
+ sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
|
||||
/ table.c.step) * table.c.step,
|
||||
sa.Integer).label('housenumber')
|
||||
|
||||
|
||||
def _interpolated_position(table: SaFromClause) -> SaLabel:
|
||||
fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
|
||||
rounded_pos = sa.func.round(table.c.position / fac) * fac
|
||||
return sa.case(
|
||||
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
|
||||
else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
|
||||
|
||||
|
||||
def _locate_interpolation(table: SaFromClause) -> SaLabel:
|
||||
""" Given a position, locate the closest point on the line.
|
||||
"""
|
||||
return sa.case((table.c.linegeo.is_line_like(),
|
||||
table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
|
||||
else_=0).label('position')
|
||||
|
||||
|
||||
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
|
||||
return min(rows, key=lambda row: 1000 if row is None else row.distance)
|
||||
|
||||
|
||||
class ReverseGeocoder:
|
||||
""" Class implementing the logic for looking up a place from a
|
||||
coordinate.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection, params: ReverseDetails,
|
||||
restrict_to_country_areas: bool = False) -> None:
|
||||
self.conn = conn
|
||||
self.params = params
|
||||
self.restrict_to_country_areas = restrict_to_country_areas
|
||||
|
||||
self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
|
||||
|
||||
|
||||
@property
|
||||
def max_rank(self) -> int:
|
||||
""" Return the maximum configured rank.
|
||||
"""
|
||||
return self.params.max_rank
|
||||
|
||||
|
||||
def has_geometries(self) -> bool:
|
||||
""" Check if any geometries are requested.
|
||||
"""
|
||||
return bool(self.params.geometry_output)
|
||||
|
||||
|
||||
def layer_enabled(self, *layer: DataLayer) -> bool:
|
||||
""" Return true when any of the given layer types are requested.
|
||||
"""
|
||||
return any(self.params.layers & l for l in layer)
|
||||
|
||||
|
||||
def layer_disabled(self, *layer: DataLayer) -> bool:
|
||||
""" Return true when none of the given layer types is requested.
|
||||
"""
|
||||
return not any(self.params.layers & l for l in layer)
|
||||
|
||||
|
||||
def has_feature_layers(self) -> bool:
|
||||
""" Return true if any layer other than ADDRESS or POI is requested.
|
||||
"""
|
||||
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
|
||||
|
||||
|
||||
def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
|
||||
out = []
|
||||
|
||||
if self.params.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
|
||||
|
||||
if self.params.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if self.params.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if self.params.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if self.params.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
|
||||
if self.layer_enabled(DataLayer.MANMADE):
|
||||
exclude = []
|
||||
if self.layer_disabled(DataLayer.RAILWAY):
|
||||
exclude.append('railway')
|
||||
if self.layer_disabled(DataLayer.NATURAL):
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
return table.c.class_.not_in(tuple(exclude))
|
||||
|
||||
include = []
|
||||
if self.layer_enabled(DataLayer.RAILWAY):
|
||||
include.append('railway')
|
||||
if self.layer_enabled(DataLayer.NATURAL):
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
return table.c.class_.in_(tuple(include))
|
||||
|
||||
|
||||
async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
|
||||
""" Look up the closest rank 26+ place in the database, which
|
||||
is closer than the given distance.
|
||||
"""
|
||||
t = self.conn.t.placex
|
||||
|
||||
# PostgreSQL must not get the distance as a parameter because
|
||||
# there is a danger it won't be able to properly estimate index use
|
||||
# when used with prepared statements
|
||||
diststr = sa.text(f"{distance}")
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, diststr))
|
||||
.where(t.c.indexed_status == 0)
|
||||
.where(t.c.linked_place_id == None)
|
||||
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
|
||||
t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
|
||||
.order_by('distance')
|
||||
.limit(2))
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, t.c.geometry)
|
||||
|
||||
restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
|
||||
|
||||
if self.layer_enabled(DataLayer.ADDRESS):
|
||||
max_rank = min(29, self.max_rank)
|
||||
restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
|
||||
if self.max_rank == 30:
|
||||
restrict.append(lambda: sa.func.IsAddressPoint(t))
|
||||
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
|
||||
restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
|
||||
t.c.class_.not_in(('place', 'building')),
|
||||
sa.not_(t.c.geometry.is_line_like())))
|
||||
if self.has_feature_layers():
|
||||
restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
|
||||
no_index(t.c.rank_address) == 0,
|
||||
self._filter_by_layer(t)))
|
||||
|
||||
if not restrict:
|
||||
return None
|
||||
|
||||
sql = sql.where(sa.or_(*restrict))
|
||||
|
||||
# If the closest object is inside an area, then check if there is a
|
||||
# POI node nearby and return that.
|
||||
prev_row = None
|
||||
for row in await self.conn.execute(sql, self.bind_params):
|
||||
if prev_row is None:
|
||||
if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0:
|
||||
return row
|
||||
prev_row = row
|
||||
else:
|
||||
if row.rank_search > 27 and row.osm_type == 'N'\
|
||||
and row.distance < 0.0001:
|
||||
return row
|
||||
|
||||
return prev_row
|
||||
|
||||
|
||||
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
|
||||
t = self.conn.t.placex
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_from_placex(t)\
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.where(sa.func.IsAddressPoint(t))\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_base_query(), t.c.geometry)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
|
||||
distance: float) -> Optional[SaRow]:
|
||||
t = self.conn.t.osmline
|
||||
|
||||
sql = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
|
||||
.where(t.c.startnumber != None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
if parent_place_id is not None:
|
||||
sql = sql.where(t.c.parent_place_id == parent_place_id)
|
||||
|
||||
inner = sql.subquery('ipol')
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.osm_id,
|
||||
inner.c.parent_place_id, inner.c.address,
|
||||
_interpolated_housenumber(inner),
|
||||
_interpolated_position(inner),
|
||||
inner.c.postcode, inner.c.country_code,
|
||||
inner.c.distance)
|
||||
|
||||
if self.has_geometries():
|
||||
sub = sql.subquery('geom')
|
||||
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
|
||||
t = self.conn.t.tiger
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
inner = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.order_by('distance')\
|
||||
.limit(1)\
|
||||
.subquery('tiger')
|
||||
|
||||
return sa.select(inner.c.place_id,
|
||||
inner.c.parent_place_id,
|
||||
_interpolated_housenumber(inner),
|
||||
_interpolated_position(inner),
|
||||
inner.c.postcode,
|
||||
inner.c.distance)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sub = _base_query().subquery('geom')
|
||||
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
|
||||
""" Find a street or POI/address for the given WKT point.
|
||||
"""
|
||||
log().section('Reverse lookup on street/address level')
|
||||
distance = 0.006
|
||||
parent_place_id = None
|
||||
|
||||
row = await self._find_closest_street_or_poi(distance)
|
||||
row_func: RowFunc = nres.create_from_placex_row
|
||||
log().var_dump('Result (street/building)', row)
|
||||
|
||||
# If the closest result was a street, but an address was requested,
|
||||
# check for a housenumber nearby which is part of the street.
|
||||
if row is not None:
|
||||
if self.max_rank > 27 \
|
||||
and self.layer_enabled(DataLayer.ADDRESS) \
|
||||
and row.rank_address <= 27:
|
||||
distance = 0.001
|
||||
parent_place_id = row.place_id
|
||||
log().comment('Find housenumber for street')
|
||||
addr_row = await self._find_housenumber_for_street(parent_place_id)
|
||||
log().var_dump('Result (street housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row = addr_row
|
||||
row_func = nres.create_from_placex_row
|
||||
distance = addr_row.distance
|
||||
elif row.country_code == 'us' and parent_place_id is not None:
|
||||
log().comment('Find TIGER housenumber for street')
|
||||
addr_row = await self._find_tiger_number_for_street(parent_place_id)
|
||||
log().var_dump('Result (street Tiger housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row_func = cast(RowFunc,
|
||||
functools.partial(nres.create_from_tiger_row,
|
||||
osm_type=row.osm_type,
|
||||
osm_id=row.osm_id))
|
||||
row = addr_row
|
||||
else:
|
||||
distance = row.distance
|
||||
|
||||
# Check for an interpolation that is either closer than our result
|
||||
# or belongs to a close street found.
|
||||
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
|
||||
log().comment('Find interpolation for street')
|
||||
addr_row = await self._find_interpolation_for_street(parent_place_id,
|
||||
distance)
|
||||
log().var_dump('Result (street interpolation)', addr_row)
|
||||
if addr_row is not None:
|
||||
row = addr_row
|
||||
row_func = nres.create_from_osmline_row
|
||||
|
||||
return row, row_func
|
||||
|
||||
|
||||
async def _lookup_area_address(self) -> Optional[SaRow]:
|
||||
""" Lookup large addressable areas for the given WKT point.
|
||||
"""
|
||||
log().comment('Reverse lookup by larger address area features')
|
||||
t = self.conn.t.placex
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
# The inner SQL brings results in the right order, so that
|
||||
# later only a minimum of results needs to be checked with ST_Contains.
|
||||
inner = sa.select(t, sa.literal(0.0).label('distance'))\
|
||||
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
|
||||
.where(t.c.geometry.intersects(WKT_PARAM))\
|
||||
.where(sa.func.PlacexGeometryReverseLookuppolygon())\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('area')
|
||||
|
||||
return _select_from_placex(inner, False)\
|
||||
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\
|
||||
.order_by(sa.desc(inner.c.rank_search))\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (area)', address_row)
|
||||
|
||||
if address_row is not None and address_row.rank_search < self.max_rank:
|
||||
log().comment('Search for better matching place nodes inside the area')
|
||||
|
||||
address_rank = address_row.rank_search
|
||||
address_id = address_row.place_id
|
||||
|
||||
def _place_inside_area_query() -> SaSelect:
|
||||
inner = \
|
||||
sa.select(t,
|
||||
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
|
||||
.where(t.c.rank_search > address_rank)\
|
||||
.where(t.c.rank_search <= MAX_RANK_PARAM)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('places')
|
||||
|
||||
touter = t.alias('outer')
|
||||
return _select_from_placex(inner, False)\
|
||||
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
|
||||
.where(touter.c.place_id == address_id)\
|
||||
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_place_inside_area_query(),
|
||||
sa.literal_column('places.geometry'))
|
||||
else:
|
||||
sql = sa.lambda_stmt(_place_inside_area_query)
|
||||
|
||||
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (place node)', place_address_row)
|
||||
|
||||
if place_address_row is not None:
|
||||
return place_address_row
|
||||
|
||||
return address_row
|
||||
|
||||
|
||||
async def _lookup_area_others(self) -> Optional[SaRow]:
|
||||
t = self.conn.t.placex
|
||||
|
||||
inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
|
||||
.where(t.c.rank_address == 0)\
|
||||
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
|
||||
.where(t.c.name != None)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.where(self._filter_by_layer(t))\
|
||||
.where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.order_by('distance')\
|
||||
.limit(50)\
|
||||
.subquery()
|
||||
|
||||
sql = _select_from_placex(inner, False)\
|
||||
.where(sa.or_(sa.not_(inner.c.geometry.is_area()),
|
||||
inner.c.geometry.ST_Contains(WKT_PARAM)))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, inner.c.geometry)
|
||||
|
||||
row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (non-address feature)', row)
|
||||
|
||||
return row
|
||||
|
||||
|
||||
async def lookup_area(self) -> Optional[SaRow]:
|
||||
""" Lookup large areas for the current search.
|
||||
"""
|
||||
log().section('Reverse lookup by larger area features')
|
||||
|
||||
if self.layer_enabled(DataLayer.ADDRESS):
|
||||
address_row = await self._lookup_area_address()
|
||||
else:
|
||||
address_row = None
|
||||
|
||||
if self.has_feature_layers():
|
||||
other_row = await self._lookup_area_others()
|
||||
else:
|
||||
other_row = None
|
||||
|
||||
return _get_closest(address_row, other_row)
|
||||
|
||||
|
||||
async def lookup_country_codes(self) -> List[str]:
|
||||
""" Lookup the country for the current search.
|
||||
"""
|
||||
log().section('Reverse lookup by country code')
|
||||
t = self.conn.t.country_grid
|
||||
sql = sa.select(t.c.country_code).distinct()\
|
||||
.where(t.c.geometry.ST_Contains(WKT_PARAM))
|
||||
|
||||
ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
|
||||
log().var_dump('Country codes', ccodes)
|
||||
return ccodes
|
||||
|
||||
|
||||
async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
|
||||
""" Lookup the country for the current search.
|
||||
"""
|
||||
if not ccodes:
|
||||
ccodes = await self.lookup_country_codes()
|
||||
|
||||
if not ccodes:
|
||||
return None
|
||||
|
||||
t = self.conn.t.placex
|
||||
if self.max_rank > 4:
|
||||
log().comment('Search for place nodes in country')
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
inner = \
|
||||
sa.select(t,
|
||||
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
|
||||
.where(t.c.rank_search > 4)\
|
||||
.where(t.c.rank_search <= MAX_RANK_PARAM)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('area')
|
||||
|
||||
return _select_from_placex(inner, False)\
|
||||
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_base_query(),
|
||||
sa.literal_column('area.geometry'))
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (addressable place node)', address_row)
|
||||
else:
|
||||
address_row = None
|
||||
|
||||
if address_row is None:
|
||||
# Still nothing, then return a country with the appropriate country code.
|
||||
def _country_base_query() -> SaSelect:
|
||||
return _select_from_placex(t)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)\
|
||||
.where(t.c.rank_search == 4)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_country_base_query)
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
return address_row
|
||||
|
||||
|
||||
async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
|
||||
""" Look up a single coordinate. Returns the place information,
|
||||
if a place was found near the coordinates or None otherwise.
|
||||
"""
|
||||
log().function('reverse_lookup', coord=coord, params=self.params)
|
||||
|
||||
|
||||
self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
|
||||
|
||||
row: Optional[SaRow] = None
|
||||
row_func: RowFunc = nres.create_from_placex_row
|
||||
|
||||
if self.max_rank >= 26:
|
||||
row, tmp_row_func = await self.lookup_street_poi()
|
||||
if row is not None:
|
||||
row_func = tmp_row_func
|
||||
|
||||
if row is None:
|
||||
if self.restrict_to_country_areas:
|
||||
ccodes = await self.lookup_country_codes()
|
||||
if not ccodes:
|
||||
return None
|
||||
else:
|
||||
ccodes = []
|
||||
|
||||
if self.max_rank > 4:
|
||||
row = await self.lookup_area()
|
||||
if row is None and self.layer_enabled(DataLayer.ADDRESS):
|
||||
row = await self.lookup_country(ccodes)
|
||||
|
||||
result = row_func(row, nres.ReverseResult)
|
||||
if result is not None:
|
||||
assert row is not None
|
||||
result.distance = row.distance
|
||||
if hasattr(row, 'bbox'):
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
await nres.add_result_details(self.conn, [result], self.params)
|
||||
|
||||
return result
|
||||
15
src/nominatim_api/search/__init__.py
Normal file
15
src/nominatim_api/search/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Module for forward search.
|
||||
"""
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .geocoder import (ForwardGeocoder as ForwardGeocoder)
|
||||
from .query import (Phrase as Phrase,
|
||||
PhraseType as PhraseType)
|
||||
from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer)
|
||||
459
src/nominatim_api/search/db_search_builder.py
Normal file
459
src/nominatim_api/search/db_search_builder.py
Normal file
@@ -0,0 +1,459 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Conversion from token assignment to an abstract DB search.
|
||||
"""
|
||||
from typing import Optional, List, Tuple, Iterator, Dict
|
||||
import heapq
|
||||
|
||||
from ..types import SearchDetails, DataLayer
|
||||
from .query import QueryStruct, Token, TokenType, TokenRange, BreakType
|
||||
from .token_assignment import TokenAssignment
|
||||
from . import db_search_fields as dbf
|
||||
from . import db_searches as dbs
|
||||
from . import db_search_lookups as lookups
|
||||
|
||||
|
||||
def wrap_near_search(categories: List[Tuple[str, str]],
|
||||
search: dbs.AbstractSearch) -> dbs.NearSearch:
|
||||
""" Create a new search that wraps the given search in a search
|
||||
for near places of the given category.
|
||||
"""
|
||||
return dbs.NearSearch(penalty=search.penalty,
|
||||
categories=dbf.WeightedCategories(categories,
|
||||
[0.0] * len(categories)),
|
||||
search=search)
|
||||
|
||||
|
||||
def build_poi_search(category: List[Tuple[str, str]],
|
||||
countries: Optional[List[str]]) -> dbs.PoiSearch:
|
||||
""" Create a new search for places by the given category, possibly
|
||||
constraint to the given countries.
|
||||
"""
|
||||
if countries:
|
||||
ccs = dbf.WeightedStrings(countries, [0.0] * len(countries))
|
||||
else:
|
||||
ccs = dbf.WeightedStrings([], [])
|
||||
|
||||
class _PoiData(dbf.SearchData):
|
||||
penalty = 0.0
|
||||
qualifiers = dbf.WeightedCategories(category, [0.0] * len(category))
|
||||
countries=ccs
|
||||
|
||||
return dbs.PoiSearch(_PoiData())
|
||||
|
||||
|
||||
class SearchBuilder:
|
||||
""" Build the abstract search queries from token assignments.
|
||||
"""
|
||||
|
||||
def __init__(self, query: QueryStruct, details: SearchDetails) -> None:
|
||||
self.query = query
|
||||
self.details = details
|
||||
|
||||
|
||||
@property
|
||||
def configured_for_country(self) -> bool:
|
||||
""" Return true if the search details are configured to
|
||||
allow countries in the result.
|
||||
"""
|
||||
return self.details.min_rank <= 4 and self.details.max_rank >= 4 \
|
||||
and self.details.layer_enabled(DataLayer.ADDRESS)
|
||||
|
||||
|
||||
@property
|
||||
def configured_for_postcode(self) -> bool:
|
||||
""" Return true if the search details are configured to
|
||||
allow postcodes in the result.
|
||||
"""
|
||||
return self.details.min_rank <= 5 and self.details.max_rank >= 11\
|
||||
and self.details.layer_enabled(DataLayer.ADDRESS)
|
||||
|
||||
|
||||
@property
|
||||
def configured_for_housenumbers(self) -> bool:
|
||||
""" Return true if the search details are configured to
|
||||
allow addresses in the result.
|
||||
"""
|
||||
return self.details.max_rank >= 30 \
|
||||
and self.details.layer_enabled(DataLayer.ADDRESS)
|
||||
|
||||
|
||||
def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Yield all possible abstract searches for the given token assignment.
|
||||
"""
|
||||
sdata = self.get_search_data(assignment)
|
||||
if sdata is None:
|
||||
return
|
||||
|
||||
near_items = self.get_near_items(assignment)
|
||||
if near_items is not None and not near_items:
|
||||
return # impossible compbination of near items and category parameter
|
||||
|
||||
if assignment.name is None:
|
||||
if near_items and not sdata.postcodes:
|
||||
sdata.qualifiers = near_items
|
||||
near_items = None
|
||||
builder = self.build_poi_search(sdata)
|
||||
elif assignment.housenumber:
|
||||
hnr_tokens = self.query.get_tokens(assignment.housenumber,
|
||||
TokenType.HOUSENUMBER)
|
||||
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
|
||||
else:
|
||||
builder = self.build_special_search(sdata, assignment.address,
|
||||
bool(near_items))
|
||||
else:
|
||||
builder = self.build_name_search(sdata, assignment.name, assignment.address,
|
||||
bool(near_items))
|
||||
|
||||
if near_items:
|
||||
penalty = min(near_items.penalties)
|
||||
near_items.penalties = [p - penalty for p in near_items.penalties]
|
||||
for search in builder:
|
||||
search_penalty = search.penalty
|
||||
search.penalty = 0.0
|
||||
yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
|
||||
near_items, search)
|
||||
else:
|
||||
for search in builder:
|
||||
search.penalty += assignment.penalty
|
||||
yield search
|
||||
|
||||
|
||||
def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search query for a simple category search.
|
||||
This kind of search requires an additional geographic constraint.
|
||||
"""
|
||||
if not sdata.housenumbers \
|
||||
and ((self.details.viewbox and self.details.bounded_viewbox) or self.details.near):
|
||||
yield dbs.PoiSearch(sdata)
|
||||
|
||||
|
||||
def build_special_search(self, sdata: dbf.SearchData,
|
||||
address: List[TokenRange],
|
||||
is_category: bool) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search queries for searches that do not involve
|
||||
a named place.
|
||||
"""
|
||||
if sdata.qualifiers:
|
||||
# No special searches over qualifiers supported.
|
||||
return
|
||||
|
||||
if sdata.countries and not address and not sdata.postcodes \
|
||||
and self.configured_for_country:
|
||||
yield dbs.CountrySearch(sdata)
|
||||
|
||||
if sdata.postcodes and (is_category or self.configured_for_postcode):
|
||||
penalty = 0.0 if sdata.countries else 0.1
|
||||
if address:
|
||||
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for r in address
|
||||
for t in self.query.get_partials_list(r)],
|
||||
lookups.Restrict)]
|
||||
penalty += 0.2
|
||||
yield dbs.PostcodeSearch(penalty, sdata)
|
||||
|
||||
|
||||
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
|
||||
address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build a simple address search for special entries where the
|
||||
housenumber is the main name token.
|
||||
"""
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
|
||||
partials = {t.token: t.addr_count for trange in address
|
||||
for t in self.query.get_partials_list(trange)}
|
||||
|
||||
if expected_count < 8000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.Restrict))
|
||||
elif len(partials) != 1 or list(partials.values())[0] < 10000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.LookupAll))
|
||||
else:
|
||||
addr_fulls = [t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)]
|
||||
if len(addr_fulls) > 5:
|
||||
return
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count)
|
||||
|
||||
|
||||
def build_name_search(self, sdata: dbf.SearchData,
|
||||
name: TokenRange, address: List[TokenRange],
|
||||
is_category: bool) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search queries for simple name or address searches.
|
||||
"""
|
||||
if is_category or not sdata.housenumbers or self.configured_for_housenumbers:
|
||||
ranking = self.get_name_ranking(name)
|
||||
name_penalty = ranking.normalize_penalty()
|
||||
if ranking.rankings:
|
||||
sdata.rankings.append(ranking)
|
||||
for penalty, count, lookup in self.yield_lookups(name, address):
|
||||
sdata.lookups = lookup
|
||||
yield dbs.PlaceSearch(penalty + name_penalty, sdata, count)
|
||||
|
||||
|
||||
def yield_lookups(self, name: TokenRange, address: List[TokenRange])\
|
||||
-> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
|
||||
""" Yield all variants how the given name and address should best
|
||||
be searched for. This takes into account how frequent the terms
|
||||
are and tries to find a lookup that optimizes index use.
|
||||
"""
|
||||
penalty = 0.0 # extra penalty
|
||||
name_partials = {t.token: t for t in self.query.get_partials_list(name)}
|
||||
|
||||
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
|
||||
addr_tokens = list({t.token for t in addr_partials})
|
||||
|
||||
partials_indexed = all(t.is_indexed for t in name_partials.values()) \
|
||||
and all(t.is_indexed for t in addr_partials)
|
||||
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
|
||||
|
||||
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
|
||||
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
|
||||
return
|
||||
|
||||
addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000
|
||||
# Partial term to frequent. Try looking up by rare full names first.
|
||||
name_fulls = self.query.get_tokens(name, TokenType.WORD)
|
||||
if name_fulls:
|
||||
fulls_count = sum(t.count for t in name_fulls)
|
||||
if partials_indexed:
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
|
||||
if fulls_count < 50000 or addr_count < 30000:
|
||||
yield penalty,fulls_count / (2**len(addr_tokens)), \
|
||||
self.get_full_name_ranking(name_fulls, addr_partials,
|
||||
fulls_count > 30000 / max(1, len(addr_tokens)))
|
||||
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
|
||||
if exp_count < 10000 and addr_count < 20000\
|
||||
and all(t.is_indexed for t in name_partials.values()):
|
||||
penalty += 0.35 * max(1 if name_fulls else 0.1,
|
||||
5 - len(name_partials) - len(addr_tokens))
|
||||
yield penalty, exp_count,\
|
||||
self.get_name_address_ranking(list(name_partials.keys()), addr_partials)
|
||||
|
||||
|
||||
def get_name_address_ranking(self, name_tokens: List[int],
|
||||
addr_partials: List[Token]) -> List[dbf.FieldLookup]:
|
||||
""" Create a ranking expression looking up by name and address.
|
||||
"""
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
|
||||
addr_restrict_tokens = []
|
||||
addr_lookup_tokens = []
|
||||
for t in addr_partials:
|
||||
if t.is_indexed:
|
||||
if t.addr_count > 20000:
|
||||
addr_restrict_tokens.append(t.token)
|
||||
else:
|
||||
addr_lookup_tokens.append(t.token)
|
||||
|
||||
if addr_restrict_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector',
|
||||
addr_restrict_tokens, lookups.Restrict))
|
||||
if addr_lookup_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector',
|
||||
addr_lookup_tokens, lookups.LookupAll))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def get_full_name_ranking(self, name_fulls: List[Token], addr_partials: List[Token],
|
||||
use_lookup: bool) -> List[dbf.FieldLookup]:
|
||||
""" Create a ranking expression with full name terms and
|
||||
additional address lookup. When 'use_lookup' is true, then
|
||||
address lookups will use the index, when the occurences are not
|
||||
too many.
|
||||
"""
|
||||
# At this point drop unindexed partials from the address.
|
||||
# This might yield wrong results, nothing we can do about that.
|
||||
if use_lookup:
|
||||
addr_restrict_tokens = []
|
||||
addr_lookup_tokens = []
|
||||
for t in addr_partials:
|
||||
if t.is_indexed:
|
||||
if t.addr_count > 20000:
|
||||
addr_restrict_tokens.append(t.token)
|
||||
else:
|
||||
addr_lookup_tokens.append(t.token)
|
||||
else:
|
||||
addr_restrict_tokens = [t.token for t in addr_partials if t.is_indexed]
|
||||
addr_lookup_tokens = []
|
||||
|
||||
return dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_restrict_tokens, addr_lookup_tokens)
|
||||
|
||||
|
||||
def get_name_ranking(self, trange: TokenRange,
|
||||
db_field: str = 'name_vector') -> dbf.FieldRanking:
|
||||
""" Create a ranking expression for a name term in the given range.
|
||||
"""
|
||||
name_fulls = self.query.get_tokens(trange, TokenType.WORD)
|
||||
ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
|
||||
ranks.sort(key=lambda r: r.penalty)
|
||||
# Fallback, sum of penalty for partials
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
return dbf.FieldRanking(db_field, default, ranks)
|
||||
|
||||
|
||||
def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
|
||||
""" Create a list of ranking expressions for an address term
|
||||
for the given ranges.
|
||||
"""
|
||||
todo: List[Tuple[int, int, dbf.RankedTokens]] = []
|
||||
heapq.heappush(todo, (0, trange.start, dbf.RankedTokens(0.0, [])))
|
||||
ranks: List[dbf.RankedTokens] = []
|
||||
|
||||
while todo: # pylint: disable=too-many-nested-blocks
|
||||
neglen, pos, rank = heapq.heappop(todo)
|
||||
for tlist in self.query.nodes[pos].starting:
|
||||
if tlist.ttype in (TokenType.PARTIAL, TokenType.WORD):
|
||||
if tlist.end < trange.end:
|
||||
chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
|
||||
if tlist.ttype == TokenType.PARTIAL:
|
||||
penalty = rank.penalty + chgpenalty \
|
||||
+ max(t.penalty for t in tlist.tokens)
|
||||
heapq.heappush(todo, (neglen - 1, tlist.end,
|
||||
dbf.RankedTokens(penalty, rank.tokens)))
|
||||
else:
|
||||
for t in tlist.tokens:
|
||||
heapq.heappush(todo, (neglen - 1, tlist.end,
|
||||
rank.with_token(t, chgpenalty)))
|
||||
elif tlist.end == trange.end:
|
||||
if tlist.ttype == TokenType.PARTIAL:
|
||||
ranks.append(dbf.RankedTokens(rank.penalty
|
||||
+ max(t.penalty for t in tlist.tokens),
|
||||
rank.tokens))
|
||||
else:
|
||||
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
|
||||
if len(ranks) >= 10:
|
||||
# Too many variants, bail out and only add
|
||||
# Worst-case Fallback: sum of penalty of partials
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
ranks.append(dbf.RankedTokens(rank.penalty + default, []))
|
||||
# Bail out of outer loop
|
||||
todo.clear()
|
||||
break
|
||||
|
||||
ranks.sort(key=lambda r: len(r.tokens))
|
||||
default = ranks[0].penalty + 0.3
|
||||
del ranks[0]
|
||||
ranks.sort(key=lambda r: r.penalty)
|
||||
|
||||
return dbf.FieldRanking('nameaddress_vector', default, ranks)
|
||||
|
||||
|
||||
def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchData]:
|
||||
""" Collect the tokens for the non-name search fields in the
|
||||
assignment.
|
||||
"""
|
||||
sdata = dbf.SearchData()
|
||||
sdata.penalty = assignment.penalty
|
||||
if assignment.country:
|
||||
tokens = self.get_country_tokens(assignment.country)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_strings('countries', tokens)
|
||||
elif self.details.countries:
|
||||
sdata.countries = dbf.WeightedStrings(self.details.countries,
|
||||
[0.0] * len(self.details.countries))
|
||||
if assignment.housenumber:
|
||||
sdata.set_strings('housenumbers',
|
||||
self.query.get_tokens(assignment.housenumber,
|
||||
TokenType.HOUSENUMBER))
|
||||
if assignment.postcode:
|
||||
sdata.set_strings('postcodes',
|
||||
self.query.get_tokens(assignment.postcode,
|
||||
TokenType.POSTCODE))
|
||||
if assignment.qualifier:
|
||||
tokens = self.get_qualifier_tokens(assignment.qualifier)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_qualifiers(tokens)
|
||||
elif self.details.categories:
|
||||
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
|
||||
[0.0] * len(self.details.categories))
|
||||
|
||||
if assignment.address:
|
||||
if not assignment.name and assignment.housenumber:
|
||||
# housenumber search: the first item needs to be handled like
|
||||
# a name in ranking or penalties are not comparable with
|
||||
# normal searches.
|
||||
sdata.set_ranking([self.get_name_ranking(assignment.address[0],
|
||||
db_field='nameaddress_vector')]
|
||||
+ [self.get_addr_ranking(r) for r in assignment.address[1:]])
|
||||
else:
|
||||
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
|
||||
else:
|
||||
sdata.rankings = []
|
||||
|
||||
return sdata
|
||||
|
||||
|
||||
def get_country_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of country tokens for the given range,
|
||||
optionally filtered by the country list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
|
||||
if self.details.countries:
|
||||
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of qualifier tokens for the given range,
|
||||
optionally filtered by the qualifier list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
|
||||
if self.details.categories:
|
||||
tokens = [t for t in tokens if t.get_category() in self.details.categories]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
|
||||
""" Collect tokens for near items search or use the categories
|
||||
requested per parameter.
|
||||
Returns None if no category search is requested.
|
||||
"""
|
||||
if assignment.near_item:
|
||||
tokens: Dict[Tuple[str, str], float] = {}
|
||||
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
|
||||
cat = t.get_category()
|
||||
# The category of a near search will be that of near_item.
|
||||
# Thus, if search is restricted to a category parameter,
|
||||
# the two sets must intersect.
|
||||
if (not self.details.categories or cat in self.details.categories)\
|
||||
and t.penalty < tokens.get(cat, 1000.0):
|
||||
tokens[cat] = t.penalty
|
||||
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
PENALTY_WORDCHANGE = {
|
||||
BreakType.START: 0.0,
|
||||
BreakType.END: 0.0,
|
||||
BreakType.PHRASE: 0.0,
|
||||
BreakType.WORD: 0.1,
|
||||
BreakType.PART: 0.2,
|
||||
BreakType.TOKEN: 0.4
|
||||
}
|
||||
254
src/nominatim_api/search/db_search_fields.py
Normal file
254
src/nominatim_api/search/db_search_fields.py
Normal file
@@ -0,0 +1,254 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Data structures for more complex fields in abstract search descriptions.
|
||||
"""
|
||||
from typing import List, Tuple, Iterator, Dict, Type
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaFromClause, SaColumn, SaExpression
|
||||
from .query import Token
|
||||
from . import db_search_lookups as lookups
|
||||
from nominatim_core.utils.json_writer import JsonWriter
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedStrings:
|
||||
""" A list of strings together with a penalty.
|
||||
"""
|
||||
values: List[str]
|
||||
penalties: List[float]
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self.values)
|
||||
|
||||
|
||||
def __iter__(self) -> Iterator[Tuple[str, float]]:
|
||||
return iter(zip(self.values, self.penalties))
|
||||
|
||||
|
||||
def get_penalty(self, value: str, default: float = 1000.0) -> float:
|
||||
""" Get the penalty for the given value. Returns the given default
|
||||
if the value does not exist.
|
||||
"""
|
||||
try:
|
||||
return self.penalties[self.values.index(value)]
|
||||
except ValueError:
|
||||
pass
|
||||
return default
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedCategories:
|
||||
""" A list of class/type tuples together with a penalty.
|
||||
"""
|
||||
values: List[Tuple[str, str]]
|
||||
penalties: List[float]
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self.values)
|
||||
|
||||
|
||||
def __iter__(self) -> Iterator[Tuple[Tuple[str, str], float]]:
|
||||
return iter(zip(self.values, self.penalties))
|
||||
|
||||
|
||||
def get_penalty(self, value: Tuple[str, str], default: float = 1000.0) -> float:
|
||||
""" Get the penalty for the given value. Returns the given default
|
||||
if the value does not exist.
|
||||
"""
|
||||
try:
|
||||
return self.penalties[self.values.index(value)]
|
||||
except ValueError:
|
||||
pass
|
||||
return default
|
||||
|
||||
|
||||
def sql_restrict(self, table: SaFromClause) -> SaExpression:
|
||||
""" Return an SQLAlcheny expression that restricts the
|
||||
class and type columns of the given table to the values
|
||||
in the list.
|
||||
Must not be used with an empty list.
|
||||
"""
|
||||
assert self.values
|
||||
if len(self.values) == 1:
|
||||
return sa.and_(table.c.class_ == self.values[0][0],
|
||||
table.c.type == self.values[0][1])
|
||||
|
||||
return sa.or_(*(sa.and_(table.c.class_ == c, table.c.type == t)
|
||||
for c, t in self.values))
|
||||
|
||||
|
||||
@dataclasses.dataclass(order=True)
|
||||
class RankedTokens:
|
||||
""" List of tokens together with the penalty of using it.
|
||||
"""
|
||||
penalty: float
|
||||
tokens: List[int]
|
||||
|
||||
def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
|
||||
""" Create a new RankedTokens list with the given token appended.
|
||||
The tokens penalty as well as the given transition penalty
|
||||
are added to the overall penalty.
|
||||
"""
|
||||
return RankedTokens(self.penalty + t.penalty + transition_penalty,
|
||||
self.tokens + [t.token])
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FieldRanking:
|
||||
""" A list of rankings to be applied sequentially until one matches.
|
||||
The matched ranking determines the penalty. If none matches a
|
||||
default penalty is applied.
|
||||
"""
|
||||
column: str
|
||||
default: float
|
||||
rankings: List[RankedTokens]
|
||||
|
||||
def normalize_penalty(self) -> float:
|
||||
""" Reduce the default and ranking penalties, such that the minimum
|
||||
penalty is 0. Return the penalty that was subtracted.
|
||||
"""
|
||||
if self.rankings:
|
||||
min_penalty = min(self.default, min(r.penalty for r in self.rankings))
|
||||
else:
|
||||
min_penalty = self.default
|
||||
if min_penalty > 0.0:
|
||||
self.default -= min_penalty
|
||||
for ranking in self.rankings:
|
||||
ranking.penalty -= min_penalty
|
||||
return min_penalty
|
||||
|
||||
|
||||
def sql_penalty(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the rankings.
|
||||
"""
|
||||
assert self.rankings
|
||||
|
||||
rout = JsonWriter().start_array()
|
||||
for rank in self.rankings:
|
||||
rout.start_array().value(rank.penalty).next()
|
||||
rout.start_array()
|
||||
for token in rank.tokens:
|
||||
rout.value(token).next()
|
||||
rout.end_array()
|
||||
rout.end_array().next()
|
||||
rout.end_array()
|
||||
|
||||
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FieldLookup:
|
||||
""" A list of tokens to be searched for. The column names the database
|
||||
column to search in and the lookup_type the operator that is applied.
|
||||
'lookup_all' requires all tokens to match. 'lookup_any' requires
|
||||
one of the tokens to match. 'restrict' requires to match all tokens
|
||||
but avoids the use of indexes.
|
||||
"""
|
||||
column: str
|
||||
tokens: List[int]
|
||||
lookup_type: Type[lookups.LookupType]
|
||||
|
||||
def sql_condition(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the given match condition.
|
||||
"""
|
||||
return self.lookup_type(table, self.column, self.tokens)
|
||||
|
||||
|
||||
class SearchData:
|
||||
""" Search fields derived from query and token assignment
|
||||
to be used with the SQL queries.
|
||||
"""
|
||||
penalty: float
|
||||
|
||||
lookups: List[FieldLookup] = []
|
||||
rankings: List[FieldRanking]
|
||||
|
||||
housenumbers: WeightedStrings = WeightedStrings([], [])
|
||||
postcodes: WeightedStrings = WeightedStrings([], [])
|
||||
countries: WeightedStrings = WeightedStrings([], [])
|
||||
|
||||
qualifiers: WeightedCategories = WeightedCategories([], [])
|
||||
|
||||
|
||||
def set_strings(self, field: str, tokens: List[Token]) -> None:
|
||||
""" Set on of the WeightedStrings properties from the given
|
||||
token list. Adapt the global penalty, so that the
|
||||
minimum penalty is 0.
|
||||
"""
|
||||
if tokens:
|
||||
min_penalty = min(t.penalty for t in tokens)
|
||||
self.penalty += min_penalty
|
||||
wstrs = WeightedStrings([t.lookup_word for t in tokens],
|
||||
[t.penalty - min_penalty for t in tokens])
|
||||
|
||||
setattr(self, field, wstrs)
|
||||
|
||||
|
||||
def set_qualifiers(self, tokens: List[Token]) -> None:
|
||||
""" Set the qulaifier field from the given tokens.
|
||||
"""
|
||||
if tokens:
|
||||
categories: Dict[Tuple[str, str], float] = {}
|
||||
min_penalty = 1000.0
|
||||
for t in tokens:
|
||||
min_penalty = min(min_penalty, t.penalty)
|
||||
cat = t.get_category()
|
||||
if t.penalty < categories.get(cat, 1000.0):
|
||||
categories[cat] = t.penalty
|
||||
self.penalty += min_penalty
|
||||
self.qualifiers = WeightedCategories(list(categories.keys()),
|
||||
list(categories.values()))
|
||||
|
||||
|
||||
def set_ranking(self, rankings: List[FieldRanking]) -> None:
|
||||
""" Set the list of rankings and normalize the ranking.
|
||||
"""
|
||||
self.rankings = []
|
||||
for ranking in rankings:
|
||||
if ranking.rankings:
|
||||
self.penalty += ranking.normalize_penalty()
|
||||
self.rankings.append(ranking)
|
||||
else:
|
||||
self.penalty += ranking.default
|
||||
|
||||
|
||||
def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_any_name(name_tokens: List[int], addr_restrict_tokens: List[int],
|
||||
addr_lookup_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and only one of the name tokens must be present.
|
||||
Potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
|
||||
if addr_restrict_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_restrict_tokens, lookups.Restrict))
|
||||
if addr_lookup_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_lookup_tokens, lookups.LookupAll))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where address tokens are looked up via index
|
||||
and the name tokens are only used to restrict the search further.
|
||||
"""
|
||||
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
|
||||
114
src/nominatim_api/search/db_search_lookups.py
Normal file
114
src/nominatim_api/search/db_search_lookups.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of lookup functions for the search_name table.
|
||||
"""
|
||||
from typing import List, Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
from nominatim_core.typing import SaFromClause
|
||||
from nominatim_core.db.sqlalchemy_types import IntArray
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
|
||||
LookupType = sa.sql.expression.FunctionElement[Any]
|
||||
|
||||
class LookupAll(LookupType):
|
||||
""" Find all entries in search_name table that contain all of
|
||||
a given list of tokens using an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
|
||||
@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, col, colname, tokens = list(element.clauses)
|
||||
return "(%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_intersect_fuzzy(places) as p FROM"\
|
||||
" (SELECT places FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s"\
|
||||
" ORDER BY length(places)) as x) as u,"\
|
||||
" json_each('[' || u.p || ']'))"\
|
||||
" AND array_contains(%s, %s))"\
|
||||
% (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw),
|
||||
compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw)
|
||||
)
|
||||
|
||||
|
||||
|
||||
class LookupAny(LookupType):
|
||||
""" Find all entries that contain at least one of the given tokens.
|
||||
Use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s && %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, _, colname, tokens = list(element.clauses)
|
||||
return "%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_union(places) as p FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s) as u,"\
|
||||
" json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw))
|
||||
|
||||
|
||||
|
||||
class Restrict(LookupType):
|
||||
""" Find all entries that contain all of the given tokens.
|
||||
Do not use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
|
||||
def _default_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
|
||||
874
src/nominatim_api/search/db_searches.py
Normal file
874
src/nominatim_api/search/db_searches.py
Normal file
@@ -0,0 +1,874 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the actual database accesses for forward search.
|
||||
"""
|
||||
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
|
||||
import abc
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaFromClause, SaScalarSelect, SaColumn, \
|
||||
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
|
||||
from nominatim_core.db.sqlalchemy_types import Geometry, IntArray
|
||||
from ..connection import SearchConnection
|
||||
from ..types import SearchDetails, DataLayer, GeometryFormat, Bbox
|
||||
from .. import results as nres
|
||||
from .db_search_fields import SearchData, WeightedCategories
|
||||
|
||||
#pylint: disable=singleton-comparison,not-callable
|
||||
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
|
||||
|
||||
|
||||
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
|
||||
""" Create a dictionary from search parameters that can be used
|
||||
as bind parameter for SQL execute.
|
||||
"""
|
||||
return {'limit': details.max_results,
|
||||
'min_rank': details.min_rank,
|
||||
'max_rank': details.max_rank,
|
||||
'viewbox': details.viewbox,
|
||||
'viewbox2': details.viewbox_x2,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded,
|
||||
'countries': details.countries}
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
|
||||
def filter_by_area(sql: SaSelect, t: SaFromClause,
|
||||
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
|
||||
""" Apply SQL statements for filtering by viewbox and near point,
|
||||
if applicable.
|
||||
"""
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1 and not avoid_index:
|
||||
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
|
||||
use_index=not avoid_index and
|
||||
details.viewbox.area < 0.2))
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
|
||||
|
||||
|
||||
def _select_placex(t: SaFromClause) -> SaSelect:
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
t.c.centroid,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
|
||||
def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
|
||||
out = []
|
||||
|
||||
if details.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
|
||||
all_ids = sa.func.ArrayAgg(table.c.place_id)
|
||||
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
|
||||
|
||||
if len(numerals) == 1:
|
||||
sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
|
||||
.where((numerals[0] - table.c.startnumber) % table.c.step == 0)
|
||||
else:
|
||||
sql = sql.where(sa.or_(
|
||||
*(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
|
||||
(n - table.c.startnumber) % table.c.step == 0)
|
||||
for n in numerals)))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(table))
|
||||
|
||||
return sql.scalar_subquery()
|
||||
|
||||
|
||||
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
orexpr: List[SaExpression] = []
|
||||
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 30))
|
||||
elif layers & DataLayer.ADDRESS:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 29))
|
||||
orexpr.append(sa.func.IsAddressPoint(table))
|
||||
elif layers & DataLayer.POI:
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
table.c.class_.not_in(('place', 'building'))))
|
||||
|
||||
if layers & DataLayer.MANMADE:
|
||||
exclude = []
|
||||
if not layers & DataLayer.RAILWAY:
|
||||
exclude.append('railway')
|
||||
if not layers & DataLayer.NATURAL:
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
else:
|
||||
include = []
|
||||
if layers & DataLayer.RAILWAY:
|
||||
include.append('railway')
|
||||
if layers & DataLayer.NATURAL:
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
|
||||
if len(orexpr) == 1:
|
||||
return orexpr[0]
|
||||
|
||||
return sa.or_(*orexpr)
|
||||
|
||||
|
||||
def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
|
||||
pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
|
||||
return sa.case(
|
||||
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
|
||||
else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
|
||||
|
||||
|
||||
async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
place_ids: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns(t.c.importance)\
|
||||
.where(t.c.place_id.in_(place_ids))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
yield result
|
||||
|
||||
|
||||
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
|
||||
""" Create a subselect that returns the given list of integers
|
||||
as rows in the column 'nr'.
|
||||
"""
|
||||
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
|
||||
|
||||
|
||||
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.osmline
|
||||
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id,
|
||||
t.c.parent_place_id, t.c.address,
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode, t.c.country_code)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_osmline_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int], osm_id: int,
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.tiger
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
sa.literal('W').label('osm_type'),
|
||||
sa.literal(osm_id).label('osm_id'),
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_tiger_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
class AbstractSearch(abc.ABC):
|
||||
""" Encapuslation of a single lookup in the database.
|
||||
"""
|
||||
SEARCH_PRIO: int = 2
|
||||
|
||||
def __init__(self, penalty: float) -> None:
|
||||
self.penalty = penalty
|
||||
|
||||
@abc.abstractmethod
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
|
||||
|
||||
class NearSearch(AbstractSearch):
|
||||
""" Category search of a place type near the result of another search.
|
||||
"""
|
||||
def __init__(self, penalty: float, categories: WeightedCategories,
|
||||
search: AbstractSearch) -> None:
|
||||
super().__init__(penalty)
|
||||
self.search = search
|
||||
self.categories = categories
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
results = nres.SearchResults()
|
||||
base = await self.search.lookup(conn, details)
|
||||
|
||||
if not base:
|
||||
return results
|
||||
|
||||
base.sort(key=lambda r: (r.accuracy, r.rank_search))
|
||||
max_accuracy = base[0].accuracy + 0.5
|
||||
if base[0].rank_address == 0:
|
||||
min_rank = 0
|
||||
max_rank = 0
|
||||
elif base[0].rank_address < 26:
|
||||
min_rank = 1
|
||||
max_rank = min(25, base[0].rank_address + 4)
|
||||
else:
|
||||
min_rank = 26
|
||||
max_rank = 30
|
||||
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
|
||||
and r.accuracy <= max_accuracy
|
||||
and r.bbox and r.bbox.area < 20
|
||||
and r.rank_address >= min_rank
|
||||
and r.rank_address <= max_rank)
|
||||
|
||||
if base:
|
||||
baseids = [b.place_id for b in base[:5] if b.place_id]
|
||||
|
||||
for category, penalty in self.categories:
|
||||
await self.lookup_category(results, conn, baseids, category, penalty, details)
|
||||
if len(results) >= details.max_results:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup_category(self, results: nres.SearchResults,
|
||||
conn: SearchConnection, ids: List[int],
|
||||
category: Tuple[str, str], penalty: float,
|
||||
details: SearchDetails) -> None:
|
||||
""" Find places of the given category near the list of
|
||||
place ids and add the results to 'results'.
|
||||
"""
|
||||
table = await conn.get_class_table(*category)
|
||||
|
||||
tgeom = conn.t.placex.alias('pgeom')
|
||||
|
||||
if table is None:
|
||||
# No classtype table available, do a simplified lookup in placex.
|
||||
table = conn.t.placex
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
|
||||
.where(table.c.class_ == category[0])\
|
||||
.where(table.c.type == category[1])
|
||||
else:
|
||||
# Use classtype table. We can afford to use a larger
|
||||
# radius for the lookup.
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom,
|
||||
table.c.centroid.ST_CoveredBy(
|
||||
sa.case((sa.and_(tgeom.c.rank_address > 9,
|
||||
tgeom.c.geometry.is_area()),
|
||||
tgeom.c.geometry),
|
||||
else_ = tgeom.c.centroid.ST_Expand(0.05))))
|
||||
|
||||
inner = sql.where(tgeom.c.place_id.in_(ids))\
|
||||
.group_by(table.c.place_id).subquery()
|
||||
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
|
||||
.join(inner, inner.c.place_id == t.c.place_id)\
|
||||
.order_by(inner.c.dist)
|
||||
|
||||
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
|
||||
if details.countries:
|
||||
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(_filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + penalty
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
|
||||
|
||||
class PoiSearch(AbstractSearch):
|
||||
""" Category search in a geographic area.
|
||||
"""
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.countries = sdata.countries
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
bind_params = _details_to_bind_params(details)
|
||||
t = conn.t.placex
|
||||
|
||||
rows: List[SaRow] = []
|
||||
|
||||
if details.near and details.near_radius is not None and details.near_radius < 0.2:
|
||||
# simply search in placex table
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_placex(t) \
|
||||
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))\
|
||||
.where(t.c.linked_place_id == None) \
|
||||
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
|
||||
.limit(LIMIT_PARAM)
|
||||
|
||||
classtype = self.qualifiers.values
|
||||
if len(classtype) == 1:
|
||||
cclass, ctype = classtype[0]
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
|
||||
.where(t.c.class_ == cclass)
|
||||
.where(t.c.type == ctype))
|
||||
else:
|
||||
sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
|
||||
for cls, typ in classtype)))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
else:
|
||||
# use the class type tables
|
||||
for category in self.qualifiers.values:
|
||||
table = await conn.get_class_table(*category)
|
||||
if table is not None:
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.join(table, t.c.place_id == table.c.place_id)\
|
||||
.where(t.c.class_ == category[0])\
|
||||
.where(t.c.type == category[1])
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
|
||||
|
||||
if details.near and details.near_radius is not None:
|
||||
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
|
||||
.where(table.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in rows:
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class CountrySearch(AbstractSearch):
|
||||
""" Search for a country name or country code.
|
||||
"""
|
||||
SEARCH_PRIO = 0
|
||||
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.countries = sdata.countries
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
|
||||
ccodes = self.countries.values
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
results = await self.lookup_in_country_table(conn, details)
|
||||
|
||||
if results:
|
||||
details.min_rank = min(5, details.max_rank)
|
||||
details.max_rank = min(25, details.max_rank)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup_in_country_table(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Look up the country in the fallback country tables.
|
||||
"""
|
||||
# Avoid the fallback search when this is a more search. Country results
|
||||
# usually are in the first batch of results and it is not possible
|
||||
# to exclude these fallbacks.
|
||||
if details.excluded:
|
||||
return nres.SearchResults()
|
||||
|
||||
t = conn.t.country_name
|
||||
tgrid = conn.t.country_grid
|
||||
|
||||
sql = sa.select(tgrid.c.country_code,
|
||||
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
|
||||
.label('centroid'),
|
||||
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
|
||||
.where(tgrid.c.country_code.in_(self.countries.values))\
|
||||
.group_by(tgrid.c.country_code)
|
||||
|
||||
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
|
||||
|
||||
sub = sql.subquery('grid')
|
||||
|
||||
sql = sa.select(t.c.country_code,
|
||||
t.c.name.merge(t.c.derived_name).label('name'),
|
||||
sub.c.centroid, sub.c.bbox)\
|
||||
.join(sub, t.c.country_code == sub.c.country_code)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, sub.c.centroid, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_country_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
class PostcodeSearch(AbstractSearch):
|
||||
""" Search for a postcode.
|
||||
"""
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.rank_search, t.c.rank_address,
|
||||
t.c.postcode, t.c.country_code,
|
||||
t.c.geometry.label('centroid'))\
|
||||
.where(t.c.postcode.in_(pcs))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
.contains(sa.type_coerce(self.lookups[0].tokens,
|
||||
IntArray)))
|
||||
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(conn.t.search_name)
|
||||
penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
|
||||
else_=1.0)
|
||||
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))
|
||||
sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
p = conn.t.placex
|
||||
placex_sql = _select_placex(p).add_columns(p.c.importance)\
|
||||
.where(sa.text("""class = 'boundary'
|
||||
AND type = 'postal_code'
|
||||
AND osm_type = 'R'"""))\
|
||||
.where(p.c.country_code == row.country_code)\
|
||||
.where(p.c.postcode == row.postcode)\
|
||||
.limit(1)
|
||||
|
||||
if details.geometry_output:
|
||||
placex_sql = _add_geometry_columns(placex_sql, p.c.geometry, details)
|
||||
|
||||
for prow in await conn.execute(placex_sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(prow, nres.SearchResult)
|
||||
break
|
||||
else:
|
||||
result = nres.create_from_postcode_row(row, nres.SearchResult)
|
||||
|
||||
assert result
|
||||
if result.place_id not in details.excluded:
|
||||
result.accuracy = row.accuracy
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
class PlaceSearch(AbstractSearch):
|
||||
""" Generic search for an address or named place.
|
||||
"""
|
||||
SEARCH_PRIO = 1
|
||||
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.housenumbers = sdata.housenumbers
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
self.expected_count = expected_count
|
||||
|
||||
|
||||
def _inner_search_name_cte(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> 'sa.CTE':
|
||||
""" Create a subquery that preselects the rows in the search_name
|
||||
table.
|
||||
"""
|
||||
t = conn.t.search_name
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(t)
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
|
||||
t.c.country_code, t.c.centroid,
|
||||
t.c.name_vector, t.c.nameaddress_vector,
|
||||
sa.case((t.c.importance > 0, t.c.importance),
|
||||
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
|
||||
.label('importance'),
|
||||
penalty.label('penalty'))
|
||||
|
||||
for lookup in self.lookups:
|
||||
sql = sql.where(lookup.sql_condition(t))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if self.postcodes:
|
||||
# if a postcode is given, don't search for state or country level objects
|
||||
sql = sql.where(t.c.address_rank > 9)
|
||||
if self.expected_count > 10000:
|
||||
# Many results expected. Restrict by postcode.
|
||||
tpc = conn.t.postcode
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(self.postcodes.values))
|
||||
.where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
|
||||
.exists())
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX2_PARAM,
|
||||
use_index=details.viewbox.area < 0.5))
|
||||
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
|
||||
if self.housenumbers:
|
||||
sql = sql.where(t.c.address_rank.between(16, 30))
|
||||
else:
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
if details.min_rank > 0:
|
||||
sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
|
||||
t.c.search_rank >= MIN_RANK_PARAM))
|
||||
if details.max_rank < 30:
|
||||
sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
|
||||
t.c.search_rank <= MAX_RANK_PARAM))
|
||||
|
||||
inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)
|
||||
|
||||
# If the query is not an address search or has a geographic preference,
|
||||
# preselect most important items to restrict the number of places
|
||||
# that need to be looked up in placex.
|
||||
if not self.housenumbers\
|
||||
and (details.viewbox is None or details.bounded_viewbox)\
|
||||
and (details.near is None or details.near_radius is not None)\
|
||||
and not self.qualifiers:
|
||||
sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
|
||||
.over(order_by=inner.c.penalty - inner.c.importance)
|
||||
.label('min_penalty'))
|
||||
|
||||
inner = sql.subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)\
|
||||
.where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
|
||||
|
||||
return sql.cte('searches')
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
tsearch = self._inner_search_name_cte(conn, details)
|
||||
|
||||
sql = _select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = tsearch.c.penalty
|
||||
|
||||
if self.postcodes:
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\
|
||||
.where(tpc.c.postcode.in_(pcs))\
|
||||
.scalar_subquery()
|
||||
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
|
||||
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
else:
|
||||
sql = sql.order_by(penalty - tsearch.c.importance)
|
||||
sql = sql.add_columns(tsearch.c.importance)
|
||||
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))\
|
||||
.order_by(sa.text('accuracy'))
|
||||
|
||||
if self.housenumbers:
|
||||
hnr_list = '|'.join(self.housenumbers.values)
|
||||
inner = sql.where(sa.or_(tsearch.c.address_rank < 30,
|
||||
sa.func.RegexpWord(hnr_list, t.c.housenumber)))\
|
||||
.subquery()
|
||||
|
||||
# Housenumbers from placex
|
||||
thnr = conn.t.placex.alias('hnr')
|
||||
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
|
||||
place_sql = sa.select(pid_list)\
|
||||
.where(thnr.c.parent_place_id == inner.c.place_id)\
|
||||
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
|
||||
.where(thnr.c.linked_place_id == None)\
|
||||
.where(thnr.c.indexed_status == 0)
|
||||
|
||||
if details.excluded:
|
||||
place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
|
||||
if self.qualifiers:
|
||||
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
|
||||
|
||||
numerals = [int(n) for n in self.housenumbers.values
|
||||
if n.isdigit() and len(n) < 8]
|
||||
interpol_sql: SaColumn
|
||||
tiger_sql: SaColumn
|
||||
if numerals and \
|
||||
(not self.qualifiers or ('place', 'house') in self.qualifiers.values):
|
||||
# Housenumbers from interpolations
|
||||
interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
|
||||
numerals, details)
|
||||
# Housenumbers from Tiger
|
||||
tiger_sql = sa.case((inner.c.country_code == 'us',
|
||||
_make_interpolation_subquery(conn.t.tiger, inner,
|
||||
numerals, details)
|
||||
), else_=None)
|
||||
else:
|
||||
interpol_sql = sa.null()
|
||||
tiger_sql = sa.null()
|
||||
|
||||
unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
|
||||
interpol_sql.label('interpol_hnr'),
|
||||
tiger_sql.label('tiger_hnr')).subquery('unsort')
|
||||
sql = sa.select(unsort)\
|
||||
.order_by(sa.case((unsort.c.placex_hnr != None, 1),
|
||||
(unsort.c.interpol_hnr != None, 2),
|
||||
(unsort.c.tiger_hnr != None, 3),
|
||||
else_=4),
|
||||
unsort.c.accuracy)
|
||||
else:
|
||||
sql = sql.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.indexed_status == 0)
|
||||
if self.qualifiers:
|
||||
sql = sql.where(self.qualifiers.sql_restrict(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(_filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = row.accuracy
|
||||
if self.housenumbers and row.rank_address < 30:
|
||||
if row.placex_hnr:
|
||||
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
|
||||
elif row.interpol_hnr:
|
||||
subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
|
||||
elif row.tiger_hnr:
|
||||
subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
|
||||
else:
|
||||
subs = None
|
||||
|
||||
if subs is not None:
|
||||
async for sub in subs:
|
||||
assert sub.housenumber
|
||||
sub.accuracy = result.accuracy
|
||||
if not any(nr in self.housenumbers.values
|
||||
for nr in sub.housenumber.split(';')):
|
||||
sub.accuracy += 0.6
|
||||
results.append(sub)
|
||||
|
||||
# Only add the street as a result, if it meets all other
|
||||
# filter conditions.
|
||||
if (not details.excluded or result.place_id not in details.excluded)\
|
||||
and (not self.qualifiers or result.category in self.qualifiers.values)\
|
||||
and result.rank_address >= details.min_rank:
|
||||
result.accuracy += 1.0 # penalty for missing housenumber
|
||||
results.append(result)
|
||||
else:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
274
src/nominatim_api/search/geocoder.py
Normal file
274
src/nominatim_api/search/geocoder.py
Normal file
@@ -0,0 +1,274 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Public interface to the search code.
|
||||
"""
|
||||
from typing import List, Any, Optional, Iterator, Tuple, Dict
|
||||
import itertools
|
||||
import re
|
||||
import datetime as dt
|
||||
import difflib
|
||||
|
||||
from ..connection import SearchConnection
|
||||
from ..types import SearchDetails
|
||||
from ..results import SearchResult, SearchResults, add_result_details
|
||||
from ..logging import log
|
||||
from .token_assignment import yield_token_assignments
|
||||
from .db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
|
||||
from .db_searches import AbstractSearch
|
||||
from .query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
|
||||
from .query import Phrase, QueryStruct
|
||||
|
||||
class ForwardGeocoder:
|
||||
""" Main class responsible for place search.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection,
|
||||
params: SearchDetails, timeout: Optional[int]) -> None:
|
||||
self.conn = conn
|
||||
self.params = params
|
||||
self.timeout = dt.timedelta(seconds=timeout or 1000000)
|
||||
self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
|
||||
|
||||
|
||||
@property
|
||||
def limit(self) -> int:
|
||||
""" Return the configured maximum number of search results.
|
||||
"""
|
||||
return self.params.max_results
|
||||
|
||||
|
||||
async def build_searches(self,
|
||||
phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
|
||||
""" Analyse the query and return the tokenized query and list of
|
||||
possible searches over it.
|
||||
"""
|
||||
if self.query_analyzer is None:
|
||||
self.query_analyzer = await make_query_analyzer(self.conn)
|
||||
|
||||
query = await self.query_analyzer.analyze_query(phrases)
|
||||
|
||||
searches: List[AbstractSearch] = []
|
||||
if query.num_token_slots() > 0:
|
||||
# 2. Compute all possible search interpretations
|
||||
log().section('Compute abstract searches')
|
||||
search_builder = SearchBuilder(query, self.params)
|
||||
num_searches = 0
|
||||
for assignment in yield_token_assignments(query):
|
||||
searches.extend(search_builder.build(assignment))
|
||||
if num_searches < len(searches):
|
||||
log().table_dump('Searches for assignment',
|
||||
_dump_searches(searches, query, num_searches))
|
||||
num_searches = len(searches)
|
||||
searches.sort(key=lambda s: (s.penalty, s.SEARCH_PRIO))
|
||||
|
||||
return query, searches
|
||||
|
||||
|
||||
async def execute_searches(self, query: QueryStruct,
|
||||
searches: List[AbstractSearch]) -> SearchResults:
|
||||
""" Run the abstract searches against the database until a result
|
||||
is found.
|
||||
"""
|
||||
log().section('Execute database searches')
|
||||
results: Dict[Any, SearchResult] = {}
|
||||
|
||||
end_time = dt.datetime.now() + self.timeout
|
||||
|
||||
min_ranking = searches[0].penalty + 2.0
|
||||
prev_penalty = 0.0
|
||||
for i, search in enumerate(searches):
|
||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
|
||||
break
|
||||
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
|
||||
log().var_dump('Params', self.params)
|
||||
lookup_results = await search.lookup(self.conn, self.params)
|
||||
for result in lookup_results:
|
||||
rhash = (result.source_table, result.place_id,
|
||||
result.housenumber, result.country_code)
|
||||
prevresult = results.get(rhash)
|
||||
if prevresult:
|
||||
prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
|
||||
else:
|
||||
results[rhash] = result
|
||||
min_ranking = min(min_ranking, result.accuracy * 1.2, 2.0)
|
||||
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
|
||||
prev_penalty = search.penalty
|
||||
if dt.datetime.now() >= end_time:
|
||||
break
|
||||
|
||||
return SearchResults(results.values())
|
||||
|
||||
|
||||
def pre_filter_results(self, results: SearchResults) -> SearchResults:
|
||||
""" Remove results that are significantly worse than the
|
||||
best match.
|
||||
"""
|
||||
if results:
|
||||
max_ranking = min(r.ranking for r in results) + 0.5
|
||||
results = SearchResults(r for r in results if r.ranking < max_ranking)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
|
||||
""" Remove badly matching results, sort by ranking and
|
||||
limit to the configured number of results.
|
||||
"""
|
||||
if results:
|
||||
results.sort(key=lambda r: r.ranking)
|
||||
min_rank = results[0].rank_search
|
||||
min_ranking = results[0].ranking
|
||||
results = SearchResults(r for r in results
|
||||
if r.ranking + 0.03 * (r.rank_search - min_rank)
|
||||
< min_ranking + 0.5)
|
||||
|
||||
results = SearchResults(results[:self.limit])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
|
||||
""" Adjust the accuracy of the localized result according to how well
|
||||
they match the original query.
|
||||
"""
|
||||
assert self.query_analyzer is not None
|
||||
qwords = [word for phrase in query.source
|
||||
for word in re.split('[, ]+', phrase.text) if word]
|
||||
if not qwords:
|
||||
return
|
||||
|
||||
for result in results:
|
||||
# Negative importance indicates ordering by distance, which is
|
||||
# more important than word matching.
|
||||
if not result.display_name\
|
||||
or (result.importance is not None and result.importance < 0):
|
||||
continue
|
||||
distance = 0.0
|
||||
norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
|
||||
result.country_code or '')))
|
||||
words = set((w for w in norm.split(' ') if w))
|
||||
if not words:
|
||||
continue
|
||||
for qword in qwords:
|
||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||
if wdist < 0.5:
|
||||
distance += len(qword)
|
||||
else:
|
||||
distance += (1.0 - wdist) * len(qword)
|
||||
# Compensate for the fact that country names do not get a
|
||||
# match penalty yet by the tokenizer.
|
||||
# Temporary hack that needs to be removed!
|
||||
if result.rank_address == 4:
|
||||
distance *= 2
|
||||
result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
|
||||
|
||||
|
||||
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
||||
phrases: List[Phrase]) -> SearchResults:
|
||||
""" Look up places by category. If phrase is given, a place search
|
||||
over the phrase will be executed first and places close to the
|
||||
results returned.
|
||||
"""
|
||||
log().function('forward_lookup_pois', categories=categories, params=self.params)
|
||||
|
||||
if phrases:
|
||||
query, searches = await self.build_searches(phrases)
|
||||
|
||||
if query:
|
||||
searches = [wrap_near_search(categories, s) for s in searches[:50]]
|
||||
results = await self.execute_searches(query, searches)
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
else:
|
||||
results = SearchResults()
|
||||
else:
|
||||
search = build_poi_search(categories, self.params.countries)
|
||||
results = await search.lookup(self.conn, self.params)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup(self, phrases: List[Phrase]) -> SearchResults:
|
||||
""" Look up a single free-text query.
|
||||
"""
|
||||
log().function('forward_lookup', phrases=phrases, params=self.params)
|
||||
results = SearchResults()
|
||||
|
||||
if self.params.is_impossible():
|
||||
return results
|
||||
|
||||
query, searches = await self.build_searches(phrases)
|
||||
|
||||
if searches:
|
||||
# Execute SQL until an appropriate result is found.
|
||||
results = await self.execute_searches(query, searches[:50])
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
self.rerank_by_query(query, results)
|
||||
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# pylint: disable=invalid-name,too-many-locals
|
||||
def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
|
||||
start: int = 0) -> Iterator[Optional[List[Any]]]:
|
||||
yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
|
||||
'Qualifier', 'Catgeory', 'Rankings']
|
||||
|
||||
def tk(tl: List[int]) -> str:
|
||||
tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
|
||||
|
||||
return f"[{','.join(tstr)}]"
|
||||
|
||||
def fmt_ranking(f: Any) -> str:
|
||||
if not f:
|
||||
return ''
|
||||
ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
|
||||
if len(ranks) > 100:
|
||||
ranks = ranks[:100] + '...'
|
||||
return f"{f.column}({ranks},def={f.default:.3g})"
|
||||
|
||||
def fmt_lookup(l: Any) -> str:
|
||||
if not l:
|
||||
return ''
|
||||
|
||||
return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
|
||||
|
||||
|
||||
def fmt_cstr(c: Any) -> str:
|
||||
if not c:
|
||||
return ''
|
||||
|
||||
return f'{c[0]}^{c[1]}'
|
||||
|
||||
for search in searches[start:]:
|
||||
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
|
||||
'postcodes', 'qualifiers')
|
||||
if hasattr(search, 'search'):
|
||||
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
|
||||
*(getattr(search.search, attr, []) for attr in fields),
|
||||
getattr(search, 'categories', []),
|
||||
fillvalue='')
|
||||
else:
|
||||
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
|
||||
*(getattr(search, attr, []) for attr in fields),
|
||||
[],
|
||||
fillvalue='')
|
||||
for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
|
||||
yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
|
||||
fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
|
||||
yield None
|
||||
314
src/nominatim_api/search/icu_tokenizer.py
Normal file
314
src/nominatim_api/search/icu_tokenizer.py
Normal file
@@ -0,0 +1,314 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of query analysis for the ICU tokenizer.
|
||||
"""
|
||||
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
import difflib
|
||||
|
||||
from icu import Transliterator
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaRow
|
||||
from nominatim_core.db.sqlalchemy_types import Json
|
||||
from ..connection import SearchConnection
|
||||
from ..logging import log
|
||||
from ..search import query as qmod
|
||||
from ..search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||
|
||||
|
||||
DB_TO_TOKEN_TYPE = {
|
||||
'W': qmod.TokenType.WORD,
|
||||
'w': qmod.TokenType.PARTIAL,
|
||||
'H': qmod.TokenType.HOUSENUMBER,
|
||||
'P': qmod.TokenType.POSTCODE,
|
||||
'C': qmod.TokenType.COUNTRY
|
||||
}
|
||||
|
||||
|
||||
class QueryPart(NamedTuple):
|
||||
""" Normalized and transliterated form of a single term in the query.
|
||||
When the term came out of a split during the transliteration,
|
||||
the normalized string is the full word before transliteration.
|
||||
The word number keeps track of the word before transliteration
|
||||
and can be used to identify partial transliterated terms.
|
||||
"""
|
||||
token: str
|
||||
normalized: str
|
||||
word_number: int
|
||||
|
||||
|
||||
QueryParts = List[QueryPart]
|
||||
WordDict = Dict[str, List[qmod.TokenRange]]
|
||||
|
||||
def yield_words(terms: List[QueryPart], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
|
||||
""" Return all combinations of words in the terms list after the
|
||||
given position.
|
||||
"""
|
||||
total = len(terms)
|
||||
for first in range(start, total):
|
||||
word = terms[first].token
|
||||
yield word, qmod.TokenRange(first, first + 1)
|
||||
for last in range(first + 1, min(first + 20, total)):
|
||||
word = ' '.join((word, terms[last].token))
|
||||
yield word, qmod.TokenRange(first, last + 1)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ICUToken(qmod.Token):
|
||||
""" Specialised token for ICU tokenizer.
|
||||
"""
|
||||
word_token: str
|
||||
info: Optional[Dict[str, Any]]
|
||||
|
||||
def get_category(self) -> Tuple[str, str]:
|
||||
assert self.info
|
||||
return self.info.get('class', ''), self.info.get('type', '')
|
||||
|
||||
|
||||
def rematch(self, norm: str) -> None:
|
||||
""" Check how well the token matches the given normalized string
|
||||
and add a penalty, if necessary.
|
||||
"""
|
||||
if not self.lookup_word:
|
||||
return
|
||||
|
||||
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
|
||||
distance = 0
|
||||
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
|
||||
if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
|
||||
distance += 1
|
||||
elif tag == 'replace':
|
||||
distance += max((ato-afrom), (bto-bfrom))
|
||||
elif tag != 'equal':
|
||||
distance += abs((ato-afrom) - (bto-bfrom))
|
||||
self.penalty += (distance/len(self.lookup_word))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_db_row(row: SaRow) -> 'ICUToken':
|
||||
""" Create a ICUToken from the row of the word table.
|
||||
"""
|
||||
count = 1 if row.info is None else row.info.get('count', 1)
|
||||
addr_count = 1 if row.info is None else row.info.get('addr_count', 1)
|
||||
|
||||
penalty = 0.0
|
||||
if row.type == 'w':
|
||||
penalty = 0.3
|
||||
elif row.type == 'W':
|
||||
if len(row.word_token) == 1 and row.word_token == row.word:
|
||||
penalty = 0.2 if row.word.isdigit() else 0.3
|
||||
elif row.type == 'H':
|
||||
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
|
||||
if all(not c.isdigit() for c in row.word_token):
|
||||
penalty += 0.2 * (len(row.word_token) - 1)
|
||||
elif row.type == 'C':
|
||||
if len(row.word_token) == 1:
|
||||
penalty = 0.3
|
||||
|
||||
if row.info is None:
|
||||
lookup_word = row.word
|
||||
else:
|
||||
lookup_word = row.info.get('lookup', row.word)
|
||||
if lookup_word:
|
||||
lookup_word = lookup_word.split('@', 1)[0]
|
||||
else:
|
||||
lookup_word = row.word_token
|
||||
|
||||
return ICUToken(penalty=penalty, token=row.word_id, count=max(1, count),
|
||||
lookup_word=lookup_word, is_indexed=True,
|
||||
word_token=row.word_token, info=row.info,
|
||||
addr_count=max(1, addr_count))
|
||||
|
||||
|
||||
|
||||
class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
""" Converter for query strings into a tokenized query
|
||||
using the tokens created by a ICU tokenizer.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection) -> None:
|
||||
self.conn = conn
|
||||
|
||||
|
||||
async def setup(self) -> None:
|
||||
""" Set up static data structures needed for the analysis.
|
||||
"""
|
||||
async def _make_normalizer() -> Any:
|
||||
rules = await self.conn.get_property('tokenizer_import_normalisation')
|
||||
return Transliterator.createFromRules("normalization", rules)
|
||||
|
||||
self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
|
||||
_make_normalizer)
|
||||
|
||||
async def _make_transliterator() -> Any:
|
||||
rules = await self.conn.get_property('tokenizer_import_transliteration')
|
||||
return Transliterator.createFromRules("transliteration", rules)
|
||||
|
||||
self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
|
||||
_make_transliterator)
|
||||
|
||||
if 'word' not in self.conn.t.meta.tables:
|
||||
sa.Table('word', self.conn.t.meta,
|
||||
sa.Column('word_id', sa.Integer),
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('info', Json))
|
||||
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
""" Analyze the given list of phrases and return the
|
||||
tokenized query.
|
||||
"""
|
||||
log().section('Analyze query (using ICU tokenizer)')
|
||||
normalized = list(filter(lambda p: p.text,
|
||||
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
|
||||
for p in phrases)))
|
||||
query = qmod.QueryStruct(normalized)
|
||||
log().var_dump('Normalized query', query.source)
|
||||
if not query.source:
|
||||
return query
|
||||
|
||||
parts, words = self.split_query(query)
|
||||
log().var_dump('Transliterated query', lambda: _dump_transliterated(query, parts))
|
||||
|
||||
for row in await self.lookup_in_db(list(words.keys())):
|
||||
for trange in words[row.word_token]:
|
||||
token = ICUToken.from_db_row(row)
|
||||
if row.type == 'S':
|
||||
if row.info['op'] in ('in', 'near'):
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
if trange.start == 0 and trange.end == query.num_token_slots():
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
else:
|
||||
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
|
||||
|
||||
self.add_extra_tokens(query, parts)
|
||||
self.rerank_tokens(query, parts)
|
||||
|
||||
log().table_dump('Word tokens', _dump_word_tokens(query))
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form. That is the
|
||||
standardized form search will work with. All information removed
|
||||
at this stage is inevitably lost.
|
||||
"""
|
||||
return cast(str, self.normalizer.transliterate(text))
|
||||
|
||||
|
||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
|
||||
""" Transliterate the phrases and split them into tokens.
|
||||
|
||||
Returns the list of transliterated tokens together with their
|
||||
normalized form and a dictionary of words for lookup together
|
||||
with their position.
|
||||
"""
|
||||
parts: QueryParts = []
|
||||
phrase_start = 0
|
||||
words = defaultdict(list)
|
||||
wordnr = 0
|
||||
for phrase in query.source:
|
||||
query.nodes[-1].ptype = phrase.ptype
|
||||
for word in phrase.text.split(' '):
|
||||
trans = self.transliterator.transliterate(word)
|
||||
if trans:
|
||||
for term in trans.split(' '):
|
||||
if term:
|
||||
parts.append(QueryPart(term, word, wordnr))
|
||||
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
|
||||
query.nodes[-1].btype = qmod.BreakType.WORD
|
||||
wordnr += 1
|
||||
query.nodes[-1].btype = qmod.BreakType.PHRASE
|
||||
|
||||
for word, wrange in yield_words(parts, phrase_start):
|
||||
words[word].append(wrange)
|
||||
|
||||
phrase_start = len(parts)
|
||||
query.nodes[-1].btype = qmod.BreakType.END
|
||||
|
||||
return parts, words
|
||||
|
||||
|
||||
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
|
||||
""" Return the token information from the database for the
|
||||
given word tokens.
|
||||
"""
|
||||
t = self.conn.t.meta.tables['word']
|
||||
return await self.conn.execute(t.select().where(t.c.word_token.in_(words)))
|
||||
|
||||
|
||||
def add_extra_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
|
||||
""" Add tokens to query that are not saved in the database.
|
||||
"""
|
||||
for part, node, i in zip(parts, query.nodes, range(1000)):
|
||||
if len(part.token) <= 4 and part[0].isdigit()\
|
||||
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
|
||||
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
|
||||
ICUToken(0.5, 0, 1, 1, part.token, True, part.token, None))
|
||||
|
||||
|
||||
def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
|
||||
""" Add penalties to tokens that depend on presence of other token.
|
||||
"""
|
||||
for i, node, tlist in query.iter_token_lists():
|
||||
if tlist.ttype == qmod.TokenType.POSTCODE:
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
|
||||
and (repl.ttype != qmod.TokenType.HOUSENUMBER
|
||||
or len(tlist.tokens[0].lookup_word) > 4):
|
||||
repl.add_penalty(0.39)
|
||||
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
|
||||
and len(tlist.tokens[0].lookup_word) <= 3:
|
||||
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
|
||||
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
|
||||
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
|
||||
norm = parts[i].normalized
|
||||
for j in range(i + 1, tlist.end):
|
||||
if parts[j - 1].word_number != parts[j].word_number:
|
||||
norm += ' ' + parts[j].normalized
|
||||
for token in tlist.tokens:
|
||||
cast(ICUToken, token).rematch(norm)
|
||||
|
||||
|
||||
def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str:
|
||||
out = query.nodes[0].btype.value
|
||||
for node, part in zip(query.nodes[1:], parts):
|
||||
out += part.token + node.btype.value
|
||||
return out
|
||||
|
||||
|
||||
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
|
||||
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
|
||||
for node in query.nodes:
|
||||
for tlist in node.starting:
|
||||
for token in tlist.tokens:
|
||||
t = cast(ICUToken, token)
|
||||
yield [tlist.ttype.name, t.token, t.word_token or '',
|
||||
t.lookup_word or '', t.penalty, t.count, t.info]
|
||||
|
||||
|
||||
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create and set up a new query analyzer for a database based
|
||||
on the ICU tokenizer.
|
||||
"""
|
||||
out = ICUQueryAnalyzer(conn)
|
||||
await out.setup()
|
||||
|
||||
return out
|
||||
272
src/nominatim_api/search/legacy_tokenizer.py
Normal file
272
src/nominatim_api/search/legacy_tokenizer.py
Normal file
@@ -0,0 +1,272 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of query analysis for the legacy tokenizer.
|
||||
"""
|
||||
from typing import Tuple, Dict, List, Optional, Iterator, Any, cast
|
||||
from copy import copy
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.typing import SaRow
|
||||
from ..connection import SearchConnection
|
||||
from ..logging import log
|
||||
from . import query as qmod
|
||||
from .query_analyzer_factory import AbstractQueryAnalyzer
|
||||
|
||||
def yield_words(terms: List[str], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
|
||||
""" Return all combinations of words in the terms list after the
|
||||
given position.
|
||||
"""
|
||||
total = len(terms)
|
||||
for first in range(start, total):
|
||||
word = terms[first]
|
||||
yield word, qmod.TokenRange(first, first + 1)
|
||||
for last in range(first + 1, min(first + 20, total)):
|
||||
word = ' '.join((word, terms[last]))
|
||||
yield word, qmod.TokenRange(first, last + 1)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class LegacyToken(qmod.Token):
|
||||
""" Specialised token for legacy tokenizer.
|
||||
"""
|
||||
word_token: str
|
||||
category: Optional[Tuple[str, str]]
|
||||
country: Optional[str]
|
||||
operator: Optional[str]
|
||||
|
||||
@property
|
||||
def info(self) -> Dict[str, Any]:
|
||||
""" Dictionary of additional properties of the token.
|
||||
Should only be used for debugging purposes.
|
||||
"""
|
||||
return {'category': self.category,
|
||||
'country': self.country,
|
||||
'operator': self.operator}
|
||||
|
||||
|
||||
def get_category(self) -> Tuple[str, str]:
|
||||
assert self.category
|
||||
return self.category
|
||||
|
||||
|
||||
class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
""" Converter for query strings into a tokenized query
|
||||
using the tokens created by a legacy tokenizer.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection) -> None:
|
||||
self.conn = conn
|
||||
|
||||
async def setup(self) -> None:
|
||||
""" Set up static data structures needed for the analysis.
|
||||
"""
|
||||
self.max_word_freq = int(await self.conn.get_property('tokenizer_maxwordfreq'))
|
||||
if 'word' not in self.conn.t.meta.tables:
|
||||
sa.Table('word', self.conn.t.meta,
|
||||
sa.Column('word_id', sa.Integer),
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('class', sa.Text),
|
||||
sa.Column('type', sa.Text),
|
||||
sa.Column('country_code', sa.Text),
|
||||
sa.Column('search_name_count', sa.Integer),
|
||||
sa.Column('operator', sa.Text))
|
||||
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
""" Analyze the given list of phrases and return the
|
||||
tokenized query.
|
||||
"""
|
||||
log().section('Analyze query (using Legacy tokenizer)')
|
||||
|
||||
normalized = []
|
||||
if phrases:
|
||||
for row in await self.conn.execute(sa.select(*(sa.func.make_standard_name(p.text)
|
||||
for p in phrases))):
|
||||
normalized = [qmod.Phrase(p.ptype, r) for r, p in zip(row, phrases) if r]
|
||||
break
|
||||
|
||||
query = qmod.QueryStruct(normalized)
|
||||
log().var_dump('Normalized query', query.source)
|
||||
if not query.source:
|
||||
return query
|
||||
|
||||
parts, words = self.split_query(query)
|
||||
lookup_words = list(words.keys())
|
||||
log().var_dump('Split query', parts)
|
||||
log().var_dump('Extracted words', lookup_words)
|
||||
|
||||
for row in await self.lookup_in_db(lookup_words):
|
||||
for trange in words[row.word_token.strip()]:
|
||||
token, ttype = self.make_token(row)
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
elif ttype == qmod.TokenType.QUALIFIER:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
if trange.start == 0 or trange.end == query.num_token_slots():
|
||||
token = copy(token)
|
||||
token.penalty += 0.1 * (query.num_token_slots())
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
|
||||
query.add_token(trange, ttype, token)
|
||||
|
||||
self.add_extra_tokens(query, parts)
|
||||
self.rerank_tokens(query)
|
||||
|
||||
log().table_dump('Word tokens', _dump_word_tokens(query))
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form.
|
||||
|
||||
This only removes case, so some difference with the normalization
|
||||
in the phrase remains.
|
||||
"""
|
||||
return text.lower()
|
||||
|
||||
|
||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
|
||||
Dict[str, List[qmod.TokenRange]]]:
|
||||
""" Transliterate the phrases and split them into tokens.
|
||||
|
||||
Returns a list of transliterated tokens and a dictionary
|
||||
of words for lookup together with their position.
|
||||
"""
|
||||
parts: List[str] = []
|
||||
phrase_start = 0
|
||||
words = defaultdict(list)
|
||||
for phrase in query.source:
|
||||
query.nodes[-1].ptype = phrase.ptype
|
||||
for trans in phrase.text.split(' '):
|
||||
if trans:
|
||||
for term in trans.split(' '):
|
||||
if term:
|
||||
parts.append(trans)
|
||||
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
|
||||
query.nodes[-1].btype = qmod.BreakType.WORD
|
||||
query.nodes[-1].btype = qmod.BreakType.PHRASE
|
||||
for word, wrange in yield_words(parts, phrase_start):
|
||||
words[word].append(wrange)
|
||||
phrase_start = len(parts)
|
||||
query.nodes[-1].btype = qmod.BreakType.END
|
||||
|
||||
return parts, words
|
||||
|
||||
|
||||
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
|
||||
""" Return the token information from the database for the
|
||||
given word tokens.
|
||||
"""
|
||||
t = self.conn.t.meta.tables['word']
|
||||
|
||||
sql = t.select().where(t.c.word_token.in_(words + [' ' + w for w in words]))
|
||||
|
||||
return await self.conn.execute(sql)
|
||||
|
||||
|
||||
def make_token(self, row: SaRow) -> Tuple[LegacyToken, qmod.TokenType]:
|
||||
""" Create a LegacyToken from the row of the word table.
|
||||
Also determines the type of token.
|
||||
"""
|
||||
penalty = 0.0
|
||||
is_indexed = True
|
||||
|
||||
rowclass = getattr(row, 'class')
|
||||
|
||||
if row.country_code is not None:
|
||||
ttype = qmod.TokenType.COUNTRY
|
||||
lookup_word = row.country_code
|
||||
elif rowclass is not None:
|
||||
if rowclass == 'place' and row.type == 'house':
|
||||
ttype = qmod.TokenType.HOUSENUMBER
|
||||
lookup_word = row.word_token[1:]
|
||||
elif rowclass == 'place' and row.type == 'postcode':
|
||||
ttype = qmod.TokenType.POSTCODE
|
||||
lookup_word = row.word_token[1:]
|
||||
else:
|
||||
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
|
||||
else qmod.TokenType.QUALIFIER
|
||||
lookup_word = row.word
|
||||
elif row.word_token.startswith(' '):
|
||||
ttype = qmod.TokenType.WORD
|
||||
lookup_word = row.word or row.word_token[1:]
|
||||
else:
|
||||
ttype = qmod.TokenType.PARTIAL
|
||||
lookup_word = row.word_token
|
||||
penalty = 0.21
|
||||
if row.search_name_count > self.max_word_freq:
|
||||
is_indexed = False
|
||||
|
||||
return LegacyToken(penalty=penalty, token=row.word_id,
|
||||
count=max(1, row.search_name_count or 1),
|
||||
addr_count=1, # not supported
|
||||
lookup_word=lookup_word,
|
||||
word_token=row.word_token.strip(),
|
||||
category=(rowclass, row.type) if rowclass is not None else None,
|
||||
country=row.country_code,
|
||||
operator=row.operator,
|
||||
is_indexed=is_indexed),\
|
||||
ttype
|
||||
|
||||
|
||||
def add_extra_tokens(self, query: qmod.QueryStruct, parts: List[str]) -> None:
|
||||
""" Add tokens to query that are not saved in the database.
|
||||
"""
|
||||
for part, node, i in zip(parts, query.nodes, range(1000)):
|
||||
if len(part) <= 4 and part.isdigit()\
|
||||
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
|
||||
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
|
||||
LegacyToken(penalty=0.5, token=0, count=1, addr_count=1,
|
||||
lookup_word=part, word_token=part,
|
||||
category=None, country=None,
|
||||
operator=None, is_indexed=True))
|
||||
|
||||
|
||||
def rerank_tokens(self, query: qmod.QueryStruct) -> None:
|
||||
""" Add penalties to tokens that depend on presence of other token.
|
||||
"""
|
||||
for _, node, tlist in query.iter_token_lists():
|
||||
if tlist.ttype == qmod.TokenType.POSTCODE:
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
|
||||
and (repl.ttype != qmod.TokenType.HOUSENUMBER
|
||||
or len(tlist.tokens[0].lookup_word) > 4):
|
||||
repl.add_penalty(0.39)
|
||||
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
|
||||
and len(tlist.tokens[0].lookup_word) <= 3:
|
||||
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
|
||||
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
|
||||
|
||||
|
||||
|
||||
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
|
||||
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
|
||||
for node in query.nodes:
|
||||
for tlist in node.starting:
|
||||
for token in tlist.tokens:
|
||||
t = cast(LegacyToken, token)
|
||||
yield [tlist.ttype.name, t.token, t.word_token or '',
|
||||
t.lookup_word or '', t.penalty, t.count, t.info]
|
||||
|
||||
|
||||
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create and set up a new query analyzer for a database based
|
||||
on the ICU tokenizer.
|
||||
"""
|
||||
out = LegacyQueryAnalyzer(conn)
|
||||
await out.setup()
|
||||
|
||||
return out
|
||||
297
src/nominatim_api/search/query.py
Normal file
297
src/nominatim_api/search/query.py
Normal file
@@ -0,0 +1,297 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Datastructures for a tokenized query.
|
||||
"""
|
||||
from typing import List, Tuple, Optional, Iterator
|
||||
from abc import ABC, abstractmethod
|
||||
import dataclasses
|
||||
import enum
|
||||
|
||||
class BreakType(enum.Enum):
|
||||
""" Type of break between tokens.
|
||||
"""
|
||||
START = '<'
|
||||
""" Begin of the query. """
|
||||
END = '>'
|
||||
""" End of the query. """
|
||||
PHRASE = ','
|
||||
""" Break between two phrases. """
|
||||
WORD = ' '
|
||||
""" Break between words. """
|
||||
PART = '-'
|
||||
""" Break inside a word, for example a hyphen or apostrophe. """
|
||||
TOKEN = '`'
|
||||
""" Break created as a result of tokenization.
|
||||
This may happen in languages without spaces between words.
|
||||
"""
|
||||
|
||||
|
||||
class TokenType(enum.Enum):
|
||||
""" Type of token.
|
||||
"""
|
||||
WORD = enum.auto()
|
||||
""" Full name of a place. """
|
||||
PARTIAL = enum.auto()
|
||||
""" Word term without breaks, does not necessarily represent a full name. """
|
||||
HOUSENUMBER = enum.auto()
|
||||
""" Housenumber term. """
|
||||
POSTCODE = enum.auto()
|
||||
""" Postal code term. """
|
||||
COUNTRY = enum.auto()
|
||||
""" Country name or reference. """
|
||||
QUALIFIER = enum.auto()
|
||||
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
|
||||
NEAR_ITEM = enum.auto()
|
||||
""" Special term used as searchable object(e.g. supermarket in ...). """
|
||||
|
||||
|
||||
class PhraseType(enum.Enum):
|
||||
""" Designation of a phrase.
|
||||
"""
|
||||
NONE = 0
|
||||
""" No specific designation (i.e. source is free-form query). """
|
||||
AMENITY = enum.auto()
|
||||
""" Contains name or type of a POI. """
|
||||
STREET = enum.auto()
|
||||
""" Contains a street name optionally with a housenumber. """
|
||||
CITY = enum.auto()
|
||||
""" Contains the postal city. """
|
||||
COUNTY = enum.auto()
|
||||
""" Contains the equivalent of a county. """
|
||||
STATE = enum.auto()
|
||||
""" Contains a state or province. """
|
||||
POSTCODE = enum.auto()
|
||||
""" Contains a postal code. """
|
||||
COUNTRY = enum.auto()
|
||||
""" Contains the country name or code. """
|
||||
|
||||
def compatible_with(self, ttype: TokenType,
|
||||
is_full_phrase: bool) -> bool:
|
||||
""" Check if the given token type can be used with the phrase type.
|
||||
"""
|
||||
if self == PhraseType.NONE:
|
||||
return not is_full_phrase or ttype != TokenType.QUALIFIER
|
||||
if self == PhraseType.AMENITY:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
|
||||
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
|
||||
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
|
||||
if self == PhraseType.STREET:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
|
||||
if self == PhraseType.POSTCODE:
|
||||
return ttype == TokenType.POSTCODE
|
||||
if self == PhraseType.COUNTRY:
|
||||
return ttype == TokenType.COUNTRY
|
||||
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Token(ABC):
|
||||
""" Base type for tokens.
|
||||
Specific query analyzers must implement the concrete token class.
|
||||
"""
|
||||
|
||||
penalty: float
|
||||
token: int
|
||||
count: int
|
||||
addr_count: int
|
||||
lookup_word: str
|
||||
is_indexed: bool
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def get_category(self) -> Tuple[str, str]:
|
||||
""" Return the category restriction for qualifier terms and
|
||||
category objects.
|
||||
"""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenRange:
|
||||
""" Indexes of query nodes over which a token spans.
|
||||
"""
|
||||
start: int
|
||||
end: int
|
||||
|
||||
def __lt__(self, other: 'TokenRange') -> bool:
|
||||
return self.end <= other.start
|
||||
|
||||
|
||||
def __le__(self, other: 'TokenRange') -> bool:
|
||||
return NotImplemented
|
||||
|
||||
|
||||
def __gt__(self, other: 'TokenRange') -> bool:
|
||||
return self.start >= other.end
|
||||
|
||||
|
||||
def __ge__(self, other: 'TokenRange') -> bool:
|
||||
return NotImplemented
|
||||
|
||||
|
||||
def replace_start(self, new_start: int) -> 'TokenRange':
|
||||
""" Return a new token range with the new start.
|
||||
"""
|
||||
return TokenRange(new_start, self.end)
|
||||
|
||||
|
||||
def replace_end(self, new_end: int) -> 'TokenRange':
|
||||
""" Return a new token range with the new end.
|
||||
"""
|
||||
return TokenRange(self.start, new_end)
|
||||
|
||||
|
||||
def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
|
||||
""" Split the span into two spans at the given index.
|
||||
The index must be within the span.
|
||||
"""
|
||||
return self.replace_end(index), self.replace_start(index)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenList:
|
||||
""" List of all tokens of a given type going from one breakpoint to another.
|
||||
"""
|
||||
end: int
|
||||
ttype: TokenType
|
||||
tokens: List[Token]
|
||||
|
||||
|
||||
def add_penalty(self, penalty: float) -> None:
|
||||
""" Add the given penalty to all tokens in the list.
|
||||
"""
|
||||
for token in self.tokens:
|
||||
token.penalty += penalty
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class QueryNode:
|
||||
""" A node of the query representing a break between terms.
|
||||
"""
|
||||
btype: BreakType
|
||||
ptype: PhraseType
|
||||
starting: List[TokenList] = dataclasses.field(default_factory=list)
|
||||
|
||||
def has_tokens(self, end: int, *ttypes: TokenType) -> bool:
|
||||
""" Check if there are tokens of the given types ending at the
|
||||
given node.
|
||||
"""
|
||||
return any(tl.end == end and tl.ttype in ttypes for tl in self.starting)
|
||||
|
||||
|
||||
def get_tokens(self, end: int, ttype: TokenType) -> Optional[List[Token]]:
|
||||
""" Get the list of tokens of the given type starting at this node
|
||||
and ending at the node 'end'. Returns 'None' if no such
|
||||
tokens exist.
|
||||
"""
|
||||
for tlist in self.starting:
|
||||
if tlist.end == end and tlist.ttype == ttype:
|
||||
return tlist.tokens
|
||||
return None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Phrase:
|
||||
""" A normalized query part. Phrases may be typed which means that
|
||||
they then represent a specific part of the address.
|
||||
"""
|
||||
ptype: PhraseType
|
||||
text: str
|
||||
|
||||
|
||||
class QueryStruct:
|
||||
""" A tokenized search query together with the normalized source
|
||||
from which the tokens have been parsed.
|
||||
|
||||
The query contains a list of nodes that represent the breaks
|
||||
between words. Tokens span between nodes, which don't necessarily
|
||||
need to be direct neighbours. Thus the query is represented as a
|
||||
directed acyclic graph.
|
||||
|
||||
When created, a query contains a single node: the start of the
|
||||
query. Further nodes can be added by appending to 'nodes'.
|
||||
"""
|
||||
|
||||
def __init__(self, source: List[Phrase]) -> None:
|
||||
self.source = source
|
||||
self.nodes: List[QueryNode] = \
|
||||
[QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)]
|
||||
|
||||
|
||||
def num_token_slots(self) -> int:
|
||||
""" Return the length of the query in vertice steps.
|
||||
"""
|
||||
return len(self.nodes) - 1
|
||||
|
||||
|
||||
def add_node(self, btype: BreakType, ptype: PhraseType) -> None:
|
||||
""" Append a new break node with the given break type.
|
||||
The phrase type denotes the type for any tokens starting
|
||||
at the node.
|
||||
"""
|
||||
self.nodes.append(QueryNode(btype, ptype))
|
||||
|
||||
|
||||
def add_token(self, trange: TokenRange, ttype: TokenType, token: Token) -> None:
|
||||
""" Add a token to the query. 'start' and 'end' are the indexes of the
|
||||
nodes from which to which the token spans. The indexes must exist
|
||||
and are expected to be in the same phrase.
|
||||
'ttype' denotes the type of the token and 'token' the token to
|
||||
be inserted.
|
||||
|
||||
If the token type is not compatible with the phrase it should
|
||||
be added to, then the token is silently dropped.
|
||||
"""
|
||||
snode = self.nodes[trange.start]
|
||||
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
|
||||
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
|
||||
if snode.ptype.compatible_with(ttype, full_phrase):
|
||||
tlist = snode.get_tokens(trange.end, ttype)
|
||||
if tlist is None:
|
||||
snode.starting.append(TokenList(trange.end, ttype, [token]))
|
||||
else:
|
||||
tlist.append(token)
|
||||
|
||||
|
||||
def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
|
||||
""" Get the list of tokens of a given type, spanning the given
|
||||
nodes. The nodes must exist. If no tokens exist, an
|
||||
empty list is returned.
|
||||
"""
|
||||
return self.nodes[trange.start].get_tokens(trange.end, ttype) or []
|
||||
|
||||
|
||||
def get_partials_list(self, trange: TokenRange) -> List[Token]:
|
||||
""" Create a list of partial tokens between the given nodes.
|
||||
The list is composed of the first token of type PARTIAL
|
||||
going to the subsequent node. Such PARTIAL tokens are
|
||||
assumed to exist.
|
||||
"""
|
||||
return [next(iter(self.get_tokens(TokenRange(i, i+1), TokenType.PARTIAL)))
|
||||
for i in range(trange.start, trange.end)]
|
||||
|
||||
|
||||
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
|
||||
""" Iterator over all token lists in the query.
|
||||
"""
|
||||
for i, node in enumerate(self.nodes):
|
||||
for tlist in node.starting:
|
||||
yield i, node, tlist
|
||||
|
||||
|
||||
def find_lookup_word_by_id(self, token: int) -> str:
|
||||
""" Find the first token with the given token ID and return
|
||||
its lookup word. Returns 'None' if no such token exists.
|
||||
The function is very slow and must only be used for
|
||||
debugging.
|
||||
"""
|
||||
for node in self.nodes:
|
||||
for tlist in node.starting:
|
||||
for t in tlist.tokens:
|
||||
if t.token == token:
|
||||
return f"[{tlist.ttype.name[0]}]{t.lookup_word}"
|
||||
return 'None'
|
||||
54
src/nominatim_api/search/query_analyzer_factory.py
Normal file
54
src/nominatim_api/search/query_analyzer_factory.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Factory for creating a query analyzer for the configured tokenizer.
|
||||
"""
|
||||
from typing import List, cast, TYPE_CHECKING
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
|
||||
from ..logging import log
|
||||
from ..connection import SearchConnection
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .query import Phrase, QueryStruct
|
||||
|
||||
class AbstractQueryAnalyzer(ABC):
|
||||
""" Class for analysing incoming queries.
|
||||
|
||||
Query analyzers are tied to the tokenizer used on import.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def analyze_query(self, phrases: List['Phrase']) -> 'QueryStruct':
|
||||
""" Analyze the given phrases and return the tokenized query.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form. That is the
|
||||
standardized form search will work with. All information removed
|
||||
at this stage is inevitably lost.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create a query analyzer for the tokenizer used by the database.
|
||||
"""
|
||||
name = await conn.get_property('tokenizer')
|
||||
|
||||
src_file = Path(__file__).parent / f'{name}_tokenizer.py'
|
||||
if not src_file.is_file():
|
||||
log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
|
||||
raise RuntimeError('Tokenizer not found')
|
||||
|
||||
module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
|
||||
|
||||
return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
|
||||
422
src/nominatim_api/search/token_assignment.py
Normal file
422
src/nominatim_api/search/token_assignment.py
Normal file
@@ -0,0 +1,422 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Create query interpretations where each vertice in the query is assigned
|
||||
a specific function (expressed as a token type).
|
||||
"""
|
||||
from typing import Optional, List, Iterator
|
||||
import dataclasses
|
||||
|
||||
from ..logging import log
|
||||
from . import query as qmod
|
||||
|
||||
# pylint: disable=too-many-return-statements,too-many-branches
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TypedRange:
|
||||
""" A token range for a specific type of tokens.
|
||||
"""
|
||||
ttype: qmod.TokenType
|
||||
trange: qmod.TokenRange
|
||||
|
||||
|
||||
PENALTY_TOKENCHANGE = {
|
||||
qmod.BreakType.START: 0.0,
|
||||
qmod.BreakType.END: 0.0,
|
||||
qmod.BreakType.PHRASE: 0.0,
|
||||
qmod.BreakType.WORD: 0.1,
|
||||
qmod.BreakType.PART: 0.2,
|
||||
qmod.BreakType.TOKEN: 0.4
|
||||
}
|
||||
|
||||
TypedRangeSeq = List[TypedRange]
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenAssignment: # pylint: disable=too-many-instance-attributes
|
||||
""" Representation of a possible assignment of token types
|
||||
to the tokens in a tokenized query.
|
||||
"""
|
||||
penalty: float = 0.0
|
||||
name: Optional[qmod.TokenRange] = None
|
||||
address: List[qmod.TokenRange] = dataclasses.field(default_factory=list)
|
||||
housenumber: Optional[qmod.TokenRange] = None
|
||||
postcode: Optional[qmod.TokenRange] = None
|
||||
country: Optional[qmod.TokenRange] = None
|
||||
near_item: Optional[qmod.TokenRange] = None
|
||||
qualifier: Optional[qmod.TokenRange] = None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_ranges(ranges: TypedRangeSeq) -> 'TokenAssignment':
|
||||
""" Create a new token assignment from a sequence of typed spans.
|
||||
"""
|
||||
out = TokenAssignment()
|
||||
for token in ranges:
|
||||
if token.ttype == qmod.TokenType.PARTIAL:
|
||||
out.address.append(token.trange)
|
||||
elif token.ttype == qmod.TokenType.HOUSENUMBER:
|
||||
out.housenumber = token.trange
|
||||
elif token.ttype == qmod.TokenType.POSTCODE:
|
||||
out.postcode = token.trange
|
||||
elif token.ttype == qmod.TokenType.COUNTRY:
|
||||
out.country = token.trange
|
||||
elif token.ttype == qmod.TokenType.NEAR_ITEM:
|
||||
out.near_item = token.trange
|
||||
elif token.ttype == qmod.TokenType.QUALIFIER:
|
||||
out.qualifier = token.trange
|
||||
return out
|
||||
|
||||
|
||||
class _TokenSequence:
|
||||
""" Working state used to put together the token assignments.
|
||||
|
||||
Represents an intermediate state while traversing the tokenized
|
||||
query.
|
||||
"""
|
||||
def __init__(self, seq: TypedRangeSeq,
|
||||
direction: int = 0, penalty: float = 0.0) -> None:
|
||||
self.seq = seq
|
||||
self.direction = direction
|
||||
self.penalty = penalty
|
||||
|
||||
|
||||
def __str__(self) -> str:
|
||||
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq)
|
||||
return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
|
||||
|
||||
|
||||
@property
|
||||
def end_pos(self) -> int:
|
||||
""" Return the index of the global end of the current sequence.
|
||||
"""
|
||||
return self.seq[-1].trange.end if self.seq else 0
|
||||
|
||||
|
||||
def has_types(self, *ttypes: qmod.TokenType) -> bool:
|
||||
""" Check if the current sequence contains any typed ranges of
|
||||
the given types.
|
||||
"""
|
||||
return any(s.ttype in ttypes for s in self.seq)
|
||||
|
||||
|
||||
def is_final(self) -> bool:
|
||||
""" Return true when the sequence cannot be extended by any
|
||||
form of token anymore.
|
||||
"""
|
||||
# Country and category must be the final term for left-to-right
|
||||
return len(self.seq) > 1 and \
|
||||
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
|
||||
|
||||
|
||||
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
|
||||
""" Check if the give token type is appendable to the existing sequence.
|
||||
|
||||
Returns None if the token type is not appendable, otherwise the
|
||||
new direction of the sequence after adding such a type. The
|
||||
token is not added.
|
||||
"""
|
||||
if ttype == qmod.TokenType.WORD:
|
||||
return None
|
||||
|
||||
if not self.seq:
|
||||
# Append unconditionally to the empty list
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return -1
|
||||
if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return 1
|
||||
return self.direction
|
||||
|
||||
# Name tokens are always acceptable and don't change direction
|
||||
if ttype == qmod.TokenType.PARTIAL:
|
||||
# qualifiers cannot appear in the middle of the query. They need
|
||||
# to be near the next phrase.
|
||||
if self.direction == -1 \
|
||||
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
|
||||
return None
|
||||
return self.direction
|
||||
|
||||
# Other tokens may only appear once
|
||||
if self.has_types(ttype):
|
||||
return None
|
||||
|
||||
if ttype == qmod.TokenType.HOUSENUMBER:
|
||||
if self.direction == 1:
|
||||
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER:
|
||||
return None
|
||||
if len(self.seq) > 2 \
|
||||
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return None # direction left-to-right: housenumber must come before anything
|
||||
elif self.direction == -1 \
|
||||
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return -1 # force direction right-to-left if after other terms
|
||||
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.POSTCODE:
|
||||
if self.direction == -1:
|
||||
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return None
|
||||
return -1
|
||||
if self.direction == 1:
|
||||
return None if self.has_types(qmod.TokenType.COUNTRY) else 1
|
||||
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return 1
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return None if self.direction == -1 else 1
|
||||
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.QUALIFIER:
|
||||
if self.direction == 1:
|
||||
if (len(self.seq) == 1
|
||||
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
|
||||
or (len(self.seq) == 2
|
||||
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
|
||||
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
|
||||
return 1
|
||||
return None
|
||||
if self.direction == -1:
|
||||
return -1
|
||||
|
||||
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
|
||||
if len(tempseq) == 0:
|
||||
return 1
|
||||
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
|
||||
return None
|
||||
if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return -1
|
||||
return 0
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def advance(self, ttype: qmod.TokenType, end_pos: int,
|
||||
btype: qmod.BreakType) -> Optional['_TokenSequence']:
|
||||
""" Return a new token sequence state with the given token type
|
||||
extended.
|
||||
"""
|
||||
newdir = self.appendable(ttype)
|
||||
if newdir is None:
|
||||
return None
|
||||
|
||||
if not self.seq:
|
||||
newseq = [TypedRange(ttype, qmod.TokenRange(0, end_pos))]
|
||||
new_penalty = 0.0
|
||||
else:
|
||||
last = self.seq[-1]
|
||||
if btype != qmod.BreakType.PHRASE and last.ttype == ttype:
|
||||
# extend the existing range
|
||||
newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
|
||||
new_penalty = 0.0
|
||||
else:
|
||||
# start a new range
|
||||
newseq = list(self.seq) + [TypedRange(ttype,
|
||||
qmod.TokenRange(last.trange.end, end_pos))]
|
||||
new_penalty = PENALTY_TOKENCHANGE[btype]
|
||||
|
||||
return _TokenSequence(newseq, newdir, self.penalty + new_penalty)
|
||||
|
||||
|
||||
def _adapt_penalty_from_priors(self, priors: int, new_dir: int) -> bool:
|
||||
if priors >= 2:
|
||||
if self.direction == 0:
|
||||
self.direction = new_dir
|
||||
else:
|
||||
if priors == 2:
|
||||
self.penalty += 0.8
|
||||
else:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def recheck_sequence(self) -> bool:
|
||||
""" Check that the sequence is a fully valid token assignment
|
||||
and adapt direction and penalties further if necessary.
|
||||
|
||||
This function catches some impossible assignments that need
|
||||
forward context and can therefore not be excluded when building
|
||||
the assignment.
|
||||
"""
|
||||
# housenumbers may not be further than 2 words from the beginning.
|
||||
# If there are two words in front, give it a penalty.
|
||||
hnrpos = next((i for i, tr in enumerate(self.seq)
|
||||
if tr.ttype == qmod.TokenType.HOUSENUMBER),
|
||||
None)
|
||||
if hnrpos is not None:
|
||||
if self.direction != -1:
|
||||
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, -1):
|
||||
return False
|
||||
if self.direction != 1:
|
||||
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, 1):
|
||||
return False
|
||||
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
|
||||
self.penalty += 1.0
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _get_assignments_postcode(self, base: TokenAssignment,
|
||||
query_len: int) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments of Postcode searches with an
|
||||
address component.
|
||||
"""
|
||||
assert base.postcode is not None
|
||||
|
||||
if (base.postcode.start == 0 and self.direction != -1)\
|
||||
or (base.postcode.end == query_len and self.direction != 1):
|
||||
log().comment('postcode search')
|
||||
# <address>,<postcode> should give preference to address search
|
||||
if base.postcode.start == 0:
|
||||
penalty = self.penalty
|
||||
self.direction = -1 # name searches are only possible backwards
|
||||
else:
|
||||
penalty = self.penalty + 0.1
|
||||
self.direction = 1 # name searches are only possible forwards
|
||||
yield dataclasses.replace(base, penalty=penalty)
|
||||
|
||||
|
||||
def _get_assignments_address_forward(self, base: TokenAssignment,
|
||||
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments of address searches with
|
||||
left-to-right reading.
|
||||
"""
|
||||
first = base.address[0]
|
||||
|
||||
log().comment('first word = name')
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
name=first, address=base.address[1:])
|
||||
|
||||
# To paraphrase:
|
||||
# * if another name term comes after the first one and before the
|
||||
# housenumber
|
||||
# * a qualifier comes after the name
|
||||
# * the containing phrase is strictly typed
|
||||
if (base.housenumber and first.end < base.housenumber.start)\
|
||||
or (base.qualifier and base.qualifier > first)\
|
||||
or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
|
||||
# Penalty for:
|
||||
# * <name>, <street>, <housenumber> , ...
|
||||
# * queries that are comma-separated
|
||||
if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
for i in range(first.start + 1, first.end):
|
||||
name, addr = first.split(i)
|
||||
log().comment(f'split first word = name ({i - first.start})')
|
||||
yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
|
||||
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
|
||||
|
||||
|
||||
def _get_assignments_address_backward(self, base: TokenAssignment,
|
||||
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments of address searches with
|
||||
right-to-left reading.
|
||||
"""
|
||||
last = base.address[-1]
|
||||
|
||||
if self.direction == -1 or len(base.address) > 1:
|
||||
log().comment('last word = name')
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
name=last, address=base.address[:-1])
|
||||
|
||||
# To paraphrase:
|
||||
# * if another name term comes before the last one and after the
|
||||
# housenumber
|
||||
# * a qualifier comes before the name
|
||||
# * the containing phrase is strictly typed
|
||||
if (base.housenumber and last.start > base.housenumber.end)\
|
||||
or (base.qualifier and base.qualifier < last)\
|
||||
or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if base.housenumber and base.housenumber < last:
|
||||
penalty += 0.4
|
||||
if len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
for i in range(last.start + 1, last.end):
|
||||
addr, name = last.split(i)
|
||||
log().comment(f'split last word = name ({i - last.start})')
|
||||
yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
|
||||
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
|
||||
|
||||
|
||||
def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments for the current sequence.
|
||||
|
||||
This function splits up general name assignments into name
|
||||
and address and yields all possible variants of that.
|
||||
"""
|
||||
base = TokenAssignment.from_ranges(self.seq)
|
||||
|
||||
num_addr_tokens = sum(t.end - t.start for t in base.address)
|
||||
if num_addr_tokens > 50:
|
||||
return
|
||||
|
||||
# Postcode search (postcode-only search is covered in next case)
|
||||
if base.postcode is not None and base.address:
|
||||
yield from self._get_assignments_postcode(base, query.num_token_slots())
|
||||
|
||||
# Postcode or country-only search
|
||||
if not base.address:
|
||||
if not base.housenumber and (base.postcode or base.country or base.near_item):
|
||||
log().comment('postcode/country search')
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
else:
|
||||
# <postcode>,<address> should give preference to postcode search
|
||||
if base.postcode and base.postcode.start == 0:
|
||||
self.penalty += 0.1
|
||||
|
||||
# Right-to-left reading of the address
|
||||
if self.direction != -1:
|
||||
yield from self._get_assignments_address_forward(base, query)
|
||||
|
||||
# Left-to-right reading of the address
|
||||
if self.direction != 1:
|
||||
yield from self._get_assignments_address_backward(base, query)
|
||||
|
||||
# variant for special housenumber searches
|
||||
if base.housenumber and not base.qualifier:
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
|
||||
|
||||
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Return possible word type assignments to word positions.
|
||||
|
||||
The assignments are computed from the concrete tokens listed
|
||||
in the tokenized query.
|
||||
|
||||
The result includes the penalty for transitions from one word type to
|
||||
another. It does not include penalties for transitions within a
|
||||
type.
|
||||
"""
|
||||
todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)]
|
||||
|
||||
while todo:
|
||||
state = todo.pop()
|
||||
node = query.nodes[state.end_pos]
|
||||
|
||||
for tlist in node.starting:
|
||||
newstate = state.advance(tlist.ttype, tlist.end, node.btype)
|
||||
if newstate is not None:
|
||||
if newstate.end_pos == query.num_token_slots():
|
||||
if newstate.recheck_sequence():
|
||||
log().var_dump('Assignment', newstate)
|
||||
yield from newstate.get_assignments(query)
|
||||
elif not newstate.is_final():
|
||||
todo.append(newstate)
|
||||
0
src/nominatim_api/server/__init__.py
Normal file
0
src/nominatim_api/server/__init__.py
Normal file
0
src/nominatim_api/server/falcon/__init__.py
Normal file
0
src/nominatim_api/server/falcon/__init__.py
Normal file
194
src/nominatim_api/server/falcon/server.py
Normal file
194
src/nominatim_api/server/falcon/server.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Server implementation using the falcon webserver framework.
|
||||
"""
|
||||
from typing import Optional, Mapping, cast, Any, List
|
||||
from pathlib import Path
|
||||
import datetime as dt
|
||||
import asyncio
|
||||
|
||||
from falcon.asgi import App, Request, Response
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from ...core import NominatimAPIAsync
|
||||
from ... import v1 as api_impl
|
||||
from ... import logging as loglib
|
||||
|
||||
class HTTPNominatimError(Exception):
|
||||
""" A special exception class for errors raised during processing.
|
||||
"""
|
||||
def __init__(self, msg: str, status: int, content_type: str) -> None:
|
||||
self.msg = msg
|
||||
self.status = status
|
||||
self.content_type = content_type
|
||||
|
||||
|
||||
async def nominatim_error_handler(req: Request, resp: Response, #pylint: disable=unused-argument
|
||||
exception: HTTPNominatimError,
|
||||
_: Any) -> None:
|
||||
""" Special error handler that passes message and content type as
|
||||
per exception info.
|
||||
"""
|
||||
resp.status = exception.status
|
||||
resp.text = exception.msg
|
||||
resp.content_type = exception.content_type
|
||||
|
||||
|
||||
async def timeout_error_handler(req: Request, resp: Response, #pylint: disable=unused-argument
|
||||
exception: TimeoutError, #pylint: disable=unused-argument
|
||||
_: Any) -> None:
|
||||
""" Special error handler that passes message and content type as
|
||||
per exception info.
|
||||
"""
|
||||
resp.status = 503
|
||||
|
||||
loglib.log().comment('Aborted: Query took too long to process.')
|
||||
logdata = loglib.get_and_disable()
|
||||
if logdata:
|
||||
resp.text = logdata
|
||||
resp.content_type = 'text/html; charset=utf-8'
|
||||
else:
|
||||
resp.text = "Query took too long to process."
|
||||
resp.content_type = 'text/plain; charset=utf-8'
|
||||
|
||||
|
||||
class ParamWrapper(api_impl.ASGIAdaptor):
|
||||
""" Adaptor class for server glue to Falcon framework.
|
||||
"""
|
||||
|
||||
def __init__(self, req: Request, resp: Response,
|
||||
config: Configuration) -> None:
|
||||
self.request = req
|
||||
self.response = resp
|
||||
self._config = config
|
||||
|
||||
|
||||
def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
return cast(Optional[str], self.request.get_param(name, default=default))
|
||||
|
||||
|
||||
def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
return cast(Optional[str], self.request.get_header(name, default=default))
|
||||
|
||||
|
||||
def error(self, msg: str, status: int = 400) -> HTTPNominatimError:
|
||||
return HTTPNominatimError(msg, status, self.content_type)
|
||||
|
||||
|
||||
def create_response(self, status: int, output: str, num_results: int) -> None:
|
||||
self.response.context.num_results = num_results
|
||||
self.response.status = status
|
||||
self.response.text = output
|
||||
self.response.content_type = self.content_type
|
||||
|
||||
|
||||
def base_uri(self) -> str:
|
||||
return cast (str, self.request.forwarded_prefix)
|
||||
|
||||
def config(self) -> Configuration:
|
||||
return self._config
|
||||
|
||||
|
||||
class EndpointWrapper:
|
||||
""" Converter for server glue endpoint functions to Falcon request handlers.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
|
||||
self.name = name
|
||||
self.func = func
|
||||
self.api = api
|
||||
|
||||
|
||||
async def on_get(self, req: Request, resp: Response) -> None:
|
||||
""" Implementation of the endpoint.
|
||||
"""
|
||||
await self.func(self.api, ParamWrapper(req, resp, self.api.config))
|
||||
|
||||
|
||||
class FileLoggingMiddleware:
|
||||
""" Middleware to log selected requests into a file.
|
||||
"""
|
||||
|
||||
def __init__(self, file_name: str):
|
||||
self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
|
||||
|
||||
|
||||
async def process_request(self, req: Request, _: Response) -> None:
|
||||
""" Callback before the request starts timing.
|
||||
"""
|
||||
req.context.start = dt.datetime.now(tz=dt.timezone.utc)
|
||||
|
||||
|
||||
async def process_response(self, req: Request, resp: Response,
|
||||
resource: Optional[EndpointWrapper],
|
||||
req_succeeded: bool) -> None:
|
||||
""" Callback after requests writes to the logfile. It only
|
||||
writes logs for successful requests for search, reverse and lookup.
|
||||
"""
|
||||
if not req_succeeded or resource is None or resp.status != 200\
|
||||
or resource.name not in ('reverse', 'search', 'lookup', 'details'):
|
||||
return
|
||||
|
||||
finish = dt.datetime.now(tz=dt.timezone.utc)
|
||||
duration = (finish - req.context.start).total_seconds()
|
||||
params = req.scope['query_string'].decode('utf8')
|
||||
start = req.context.start.replace(tzinfo=None)\
|
||||
.isoformat(sep=' ', timespec='milliseconds')
|
||||
|
||||
self.fd.write(f"[{start}] "
|
||||
f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} "
|
||||
f'{resource.name} "{params}"\n')
|
||||
|
||||
|
||||
class APIShutdown:
|
||||
""" Middleware that closes any open database connections.
|
||||
"""
|
||||
|
||||
def __init__(self, api: NominatimAPIAsync) -> None:
|
||||
self.api = api
|
||||
|
||||
async def process_shutdown(self, *_: Any) -> None:
|
||||
"""Process the ASGI lifespan shutdown event.
|
||||
"""
|
||||
await self.api.close()
|
||||
|
||||
|
||||
def get_application(project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None) -> App:
|
||||
""" Create a Nominatim Falcon ASGI application.
|
||||
"""
|
||||
api = NominatimAPIAsync(project_dir, environ)
|
||||
|
||||
middleware: List[object] = [APIShutdown(api)]
|
||||
log_file = api.config.LOG_FILE
|
||||
if log_file:
|
||||
middleware.append(FileLoggingMiddleware(log_file))
|
||||
|
||||
app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'),
|
||||
middleware=middleware)
|
||||
app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
|
||||
app.add_error_handler(TimeoutError, timeout_error_handler)
|
||||
# different from TimeoutError in Python <= 3.10
|
||||
app.add_error_handler(asyncio.TimeoutError, timeout_error_handler)
|
||||
|
||||
legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
|
||||
for name, func in api_impl.ROUTES:
|
||||
endpoint = EndpointWrapper(name, func, api)
|
||||
app.add_route(f"/{name}", endpoint)
|
||||
if legacy_urls:
|
||||
app.add_route(f"/{name}.php", endpoint)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def run_wsgi() -> App:
|
||||
""" Entry point for uvicorn.
|
||||
|
||||
Make sure uvicorn is run from the project directory.
|
||||
"""
|
||||
return get_application(Path('.'))
|
||||
0
src/nominatim_api/server/starlette/__init__.py
Normal file
0
src/nominatim_api/server/starlette/__init__.py
Normal file
174
src/nominatim_api/server/starlette/server.py
Normal file
174
src/nominatim_api/server/starlette/server.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Server implementation using the starlette webserver framework.
|
||||
"""
|
||||
from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, Awaitable
|
||||
from pathlib import Path
|
||||
import datetime as dt
|
||||
import asyncio
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.routing import Route
|
||||
from starlette.exceptions import HTTPException
|
||||
from starlette.responses import Response, PlainTextResponse, HTMLResponse
|
||||
from starlette.requests import Request
|
||||
from starlette.middleware import Middleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from ...core import NominatimAPIAsync
|
||||
from ... import v1 as api_impl
|
||||
from ... import logging as loglib
|
||||
|
||||
class ParamWrapper(api_impl.ASGIAdaptor):
|
||||
""" Adaptor class for server glue to Starlette framework.
|
||||
"""
|
||||
|
||||
def __init__(self, request: Request) -> None:
|
||||
self.request = request
|
||||
|
||||
|
||||
def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
return self.request.query_params.get(name, default=default)
|
||||
|
||||
|
||||
def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
return self.request.headers.get(name, default)
|
||||
|
||||
|
||||
def error(self, msg: str, status: int = 400) -> HTTPException:
|
||||
return HTTPException(status, detail=msg,
|
||||
headers={'content-type': self.content_type})
|
||||
|
||||
|
||||
def create_response(self, status: int, output: str, num_results: int) -> Response:
|
||||
self.request.state.num_results = num_results
|
||||
return Response(output, status_code=status, media_type=self.content_type)
|
||||
|
||||
|
||||
def base_uri(self) -> str:
|
||||
scheme = self.request.url.scheme
|
||||
host = self.request.url.hostname
|
||||
port = self.request.url.port
|
||||
root = self.request.scope['root_path']
|
||||
if (scheme == 'http' and port == 80) or (scheme == 'https' and port == 443):
|
||||
port = None
|
||||
if port is not None:
|
||||
return f"{scheme}://{host}:{port}{root}"
|
||||
|
||||
return f"{scheme}://{host}{root}"
|
||||
|
||||
|
||||
def config(self) -> Configuration:
|
||||
return cast(Configuration, self.request.app.state.API.config)
|
||||
|
||||
|
||||
def _wrap_endpoint(func: api_impl.EndpointFunc)\
|
||||
-> Callable[[Request], Coroutine[Any, Any, Response]]:
|
||||
async def _callback(request: Request) -> Response:
|
||||
return cast(Response, await func(request.app.state.API, ParamWrapper(request)))
|
||||
|
||||
return _callback
|
||||
|
||||
|
||||
class FileLoggingMiddleware(BaseHTTPMiddleware):
|
||||
""" Middleware to log selected requests into a file.
|
||||
"""
|
||||
|
||||
def __init__(self, app: Starlette, file_name: str = ''):
|
||||
super().__init__(app)
|
||||
self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
|
||||
|
||||
async def dispatch(self, request: Request,
|
||||
call_next: RequestResponseEndpoint) -> Response:
|
||||
start = dt.datetime.now(tz=dt.timezone.utc)
|
||||
response = await call_next(request)
|
||||
|
||||
if response.status_code != 200:
|
||||
return response
|
||||
|
||||
finish = dt.datetime.now(tz=dt.timezone.utc)
|
||||
|
||||
for endpoint in ('reverse', 'search', 'lookup', 'details'):
|
||||
if request.url.path.startswith('/' + endpoint):
|
||||
qtype = endpoint
|
||||
break
|
||||
else:
|
||||
return response
|
||||
|
||||
duration = (finish - start).total_seconds()
|
||||
params = request.scope['query_string'].decode('utf8')
|
||||
|
||||
self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] "
|
||||
f"{duration:.4f} {getattr(request.state, 'num_results', 0)} "
|
||||
f'{qtype} "{params}"\n')
|
||||
|
||||
return response
|
||||
|
||||
|
||||
async def timeout_error(request: Request, #pylint: disable=unused-argument
|
||||
_: Exception) -> Response:
|
||||
""" Error handler for query timeouts.
|
||||
"""
|
||||
loglib.log().comment('Aborted: Query took too long to process.')
|
||||
logdata = loglib.get_and_disable()
|
||||
|
||||
if logdata:
|
||||
return HTMLResponse(logdata)
|
||||
|
||||
return PlainTextResponse("Query took too long to process.", status_code=503)
|
||||
|
||||
|
||||
def get_application(project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None,
|
||||
debug: bool = True) -> Starlette:
|
||||
""" Create a Nominatim falcon ASGI application.
|
||||
"""
|
||||
config = Configuration(project_dir, environ)
|
||||
|
||||
routes = []
|
||||
legacy_urls = config.get_bool('SERVE_LEGACY_URLS')
|
||||
for name, func in api_impl.ROUTES:
|
||||
endpoint = _wrap_endpoint(func)
|
||||
routes.append(Route(f"/{name}", endpoint=endpoint))
|
||||
if legacy_urls:
|
||||
routes.append(Route(f"/{name}.php", endpoint=endpoint))
|
||||
|
||||
middleware = []
|
||||
if config.get_bool('CORS_NOACCESSCONTROL'):
|
||||
middleware.append(Middleware(CORSMiddleware,
|
||||
allow_origins=['*'],
|
||||
allow_methods=['GET', 'OPTIONS'],
|
||||
max_age=86400))
|
||||
|
||||
log_file = config.LOG_FILE
|
||||
if log_file:
|
||||
middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
|
||||
|
||||
exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
|
||||
TimeoutError: timeout_error,
|
||||
asyncio.TimeoutError: timeout_error
|
||||
}
|
||||
|
||||
async def _shutdown() -> None:
|
||||
await app.state.API.close()
|
||||
|
||||
app = Starlette(debug=debug, routes=routes, middleware=middleware,
|
||||
exception_handlers=exceptions,
|
||||
on_shutdown=[_shutdown])
|
||||
|
||||
app.state.API = NominatimAPIAsync(project_dir, environ)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def run_wsgi() -> Starlette:
|
||||
""" Entry point for uvicorn.
|
||||
"""
|
||||
return get_application(Path('.'), debug=False)
|
||||
221
src/nominatim_api/sql/sqlalchemy_functions.py
Normal file
221
src/nominatim_api/sql/sqlalchemy_functions.py
Normal file
@@ -0,0 +1,221 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom functions and expressions for SQLAlchemy.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
from nominatim_core.typing import SaColumn
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check for conditions that allow partial index use on
|
||||
'idx_placex_geometry_reverse_lookupPolygon'.
|
||||
|
||||
Needs to be constant, so that the query planner picks them up correctly
|
||||
in prepared statements.
|
||||
"""
|
||||
name = 'PlacexGeometryReverseLookuppolygon'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
|
||||
def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
|
||||
" AND placex.rank_address between 4 and 25"
|
||||
" AND placex.type != 'postcode'"
|
||||
" AND placex.name is not null"
|
||||
" AND placex.indexed_status = 0"
|
||||
" AND placex.linked_place_id is null)")
|
||||
|
||||
|
||||
@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
|
||||
" AND placex.rank_address between 4 and 25"
|
||||
" AND placex.type != 'postcode'"
|
||||
" AND placex.name is not null"
|
||||
" AND placex.indexed_status = 0"
|
||||
" AND placex.linked_place_id is null)")
|
||||
|
||||
|
||||
class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IntersectsReverseDistance'
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: sa.Table, geom: SaColumn) -> None:
|
||||
super().__init__(table.c.geometry,
|
||||
table.c.rank_search, geom)
|
||||
self.tablename = table.name
|
||||
|
||||
|
||||
@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
|
||||
def default_reverse_place_diameter(element: IntersectsReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
table = element.tablename
|
||||
return f"({table}.rank_address between 4 and 25"\
|
||||
f" AND {table}.type != 'postcode'"\
|
||||
f" AND {table}.name is not null"\
|
||||
f" AND {table}.linked_place_id is null"\
|
||||
f" AND {table}.osm_type = 'N'" + \
|
||||
" AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
|
||||
tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
|
||||
|
||||
|
||||
@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
geom1, rank, geom2 = list(element.clauses)
|
||||
table = element.tablename
|
||||
|
||||
return (f"({table}.rank_address between 4 and 25"\
|
||||
f" AND {table}.type != 'postcode'"\
|
||||
f" AND {table}.name is not null"\
|
||||
f" AND {table}.linked_place_id is null"\
|
||||
f" AND {table}.osm_type = 'N'"\
|
||||
" AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
|
||||
f" AND {table}.place_id IN"\
|
||||
" (SELECT place_id FROM placex_place_node_areas"\
|
||||
" WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
|
||||
" WHERE f_table_name = 'placex_place_node_areas'"\
|
||||
" AND search_frame = %s)))") % (
|
||||
compiler.process(geom1, **kw),
|
||||
compiler.process(geom2, **kw),
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(geom2, **kw))
|
||||
|
||||
|
||||
class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IsBelowReverseDistance'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
|
||||
def default_is_below_reverse_distance(element: IsBelowReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
dist, rank = list(element.clauses)
|
||||
return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
|
||||
compiler.process(rank, **kw))
|
||||
|
||||
|
||||
@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
dist, rank = list(element.clauses)
|
||||
return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
|
||||
compiler.process(rank, **kw))
|
||||
|
||||
|
||||
class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IsAddressPoint'
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: sa.Table) -> None:
|
||||
super().__init__(table.c.rank_address,
|
||||
table.c.housenumber, table.c.name)
|
||||
|
||||
|
||||
@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
|
||||
def default_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(hnr, **kw),
|
||||
compiler.process(name, **kw))
|
||||
|
||||
|
||||
@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(hnr, **kw),
|
||||
compiler.process(name, **kw))
|
||||
|
||||
|
||||
class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check if in the given list of names in parameters 1 any of the names
|
||||
from the JSON array in parameter 2 are contained.
|
||||
"""
|
||||
name = 'CrosscheckNames'
|
||||
inherit_cache = True
|
||||
|
||||
@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
|
||||
def compile_crosscheck_names(element: CrosscheckNames,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
|
||||
compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def compile_sqlite_crosscheck_names(element: CrosscheckNames,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "EXISTS(SELECT *"\
|
||||
" FROM json_each(%s) as name, json_each(%s) as match_name"\
|
||||
" WHERE name.value = match_name.value)"\
|
||||
% (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Return elements of a json array as a set.
|
||||
"""
|
||||
name = 'JsonArrayEach'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
|
||||
def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "json_each(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class Greatest(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Function to compute maximum of all its input parameters.
|
||||
"""
|
||||
name = 'greatest'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "max(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class RegexpWord(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check if a full word is in a given string.
|
||||
"""
|
||||
name = 'RegexpWord'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
|
||||
def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s ~* ('\\m(' || %s || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
|
||||
|
||||
|
||||
@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "regexp('\\b(' || %s || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
122
src/nominatim_api/sql/sqlite_functions.py
Normal file
122
src/nominatim_api/sql/sqlite_functions.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom functions for SQLite.
|
||||
"""
|
||||
from typing import cast, Optional, Set, Any
|
||||
import json
|
||||
|
||||
# pylint: disable=protected-access
|
||||
|
||||
def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
|
||||
""" Custom weight function for search results.
|
||||
"""
|
||||
if search_vector is not None:
|
||||
svec = [int(x) for x in search_vector.split(',')]
|
||||
for rank in json.loads(rankings):
|
||||
if all(r in svec for r in rank[1]):
|
||||
return cast(float, rank[0])
|
||||
|
||||
return default
|
||||
|
||||
|
||||
class ArrayIntersectFuzzy:
|
||||
""" Compute the array of common elements of all input integer arrays.
|
||||
Very large input parameters may be ignored to speed up
|
||||
computation. Therefore, the result is a superset of common elements.
|
||||
|
||||
Input and output arrays are given as comma-separated lists.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.first = ''
|
||||
self.values: Optional[Set[int]] = None
|
||||
|
||||
def step(self, value: Optional[str]) -> None:
|
||||
""" Add the next array to the intersection.
|
||||
"""
|
||||
if value is not None:
|
||||
if not self.first:
|
||||
self.first = value
|
||||
elif len(value) < 10000000:
|
||||
if self.values is None:
|
||||
self.values = {int(x) for x in self.first.split(',')}
|
||||
self.values.intersection_update((int(x) for x in value.split(',')))
|
||||
|
||||
def finalize(self) -> str:
|
||||
""" Return the final result.
|
||||
"""
|
||||
if self.values is not None:
|
||||
return ','.join(map(str, self.values))
|
||||
|
||||
return self.first
|
||||
|
||||
|
||||
class ArrayUnion:
|
||||
""" Compute the set of all elements of the input integer arrays.
|
||||
|
||||
Input and output arrays are given as strings of comma-separated lists.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.values: Optional[Set[str]] = None
|
||||
|
||||
def step(self, value: Optional[str]) -> None:
|
||||
""" Add the next array to the union.
|
||||
"""
|
||||
if value is not None:
|
||||
if self.values is None:
|
||||
self.values = set(value.split(','))
|
||||
else:
|
||||
self.values.update(value.split(','))
|
||||
|
||||
def finalize(self) -> str:
|
||||
""" Return the final result.
|
||||
"""
|
||||
return '' if self.values is None else ','.join(self.values)
|
||||
|
||||
|
||||
def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
|
||||
""" Is the array 'containee' completely contained in array 'container'.
|
||||
"""
|
||||
if container is None or containee is None:
|
||||
return None
|
||||
|
||||
vset = container.split(',')
|
||||
return all(v in vset for v in containee.split(','))
|
||||
|
||||
|
||||
def array_pair_contains(container1: Optional[str], container2: Optional[str],
|
||||
containee: Optional[str]) -> Optional[bool]:
|
||||
""" Is the array 'containee' completely contained in the union of
|
||||
array 'container1' and array 'container2'.
|
||||
"""
|
||||
if container1 is None or container2 is None or containee is None:
|
||||
return None
|
||||
|
||||
vset = container1.split(',') + container2.split(',')
|
||||
return all(v in vset for v in containee.split(','))
|
||||
|
||||
|
||||
def install_custom_functions(conn: Any) -> None:
|
||||
""" Install helper functions for Nominatim into the given SQLite
|
||||
database connection.
|
||||
"""
|
||||
conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
|
||||
conn.create_function('array_contains', 2, array_contains, deterministic=True)
|
||||
conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
|
||||
_create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
|
||||
_create_aggregate(conn, 'array_union', 1, ArrayUnion)
|
||||
|
||||
|
||||
async def _make_aggregate(aioconn: Any, *args: Any) -> None:
|
||||
await aioconn._execute(aioconn._conn.create_aggregate, *args)
|
||||
|
||||
|
||||
def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
|
||||
try:
|
||||
conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
|
||||
except Exception as error: # pylint: disable=broad-exception-caught
|
||||
conn._handle_exception(error)
|
||||
51
src/nominatim_api/status.py
Normal file
51
src/nominatim_api/status.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Classes and function related to status call.
|
||||
"""
|
||||
from typing import Optional
|
||||
import datetime as dt
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from .connection import SearchConnection
|
||||
from .version import NOMINATIM_API_VERSION
|
||||
|
||||
@dataclasses.dataclass
|
||||
class StatusResult:
|
||||
""" Result of a call to the status API.
|
||||
"""
|
||||
status: int
|
||||
message: str
|
||||
software_version = NOMINATIM_API_VERSION
|
||||
data_updated: Optional[dt.datetime] = None
|
||||
database_version: Optional[str] = None
|
||||
|
||||
|
||||
async def get_status(conn: SearchConnection) -> StatusResult:
|
||||
""" Execute a status API call.
|
||||
"""
|
||||
status = StatusResult(0, 'OK')
|
||||
|
||||
# Last update date
|
||||
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
|
||||
status.data_updated = await conn.scalar(sql)
|
||||
|
||||
if status.data_updated is not None:
|
||||
if status.data_updated.tzinfo is None:
|
||||
status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
|
||||
else:
|
||||
status.data_updated = status.data_updated.astimezone(dt.timezone.utc)
|
||||
|
||||
# Database version
|
||||
try:
|
||||
status.database_version = await conn.get_property('database_version')
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return status
|
||||
550
src/nominatim_api/types.py
Normal file
550
src/nominatim_api/types.py
Normal file
@@ -0,0 +1,550 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Complex datatypes used by the Nominatim API.
|
||||
"""
|
||||
from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
|
||||
Any, List, Sequence
|
||||
from collections import abc
|
||||
import dataclasses
|
||||
import enum
|
||||
import math
|
||||
from struct import unpack
|
||||
from binascii import unhexlify
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from .localization import Locales
|
||||
|
||||
# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PlaceID:
|
||||
""" Reference a place by Nominatim's internal ID.
|
||||
|
||||
A PlaceID may reference place from the main table placex, from
|
||||
the interpolation tables or the postcode tables. Place IDs are not
|
||||
stable between installations. You may use this type theefore only
|
||||
with place IDs obtained from the same database.
|
||||
"""
|
||||
place_id: int
|
||||
"""
|
||||
The internal ID of the place to reference.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class OsmID:
|
||||
""" Reference a place by its OSM ID and potentially the basic category.
|
||||
|
||||
The OSM ID may refer to places in the main table placex and OSM
|
||||
interpolation lines.
|
||||
"""
|
||||
osm_type: str
|
||||
""" OSM type of the object. Must be one of `N`(node), `W`(way) or
|
||||
`R`(relation).
|
||||
"""
|
||||
osm_id: int
|
||||
""" The OSM ID of the object.
|
||||
"""
|
||||
osm_class: Optional[str] = None
|
||||
""" The same OSM object may appear multiple times in the database under
|
||||
different categories. The optional class parameter allows to distinguish
|
||||
the different categories and corresponds to the key part of the category.
|
||||
If there are multiple objects in the database and `osm_class` is
|
||||
left out, then one of the objects is returned at random.
|
||||
"""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.osm_type not in ('N', 'W', 'R'):
|
||||
raise ValueError(f"Illegal OSM type '{self.osm_type}'. Must be one of N, W, R.")
|
||||
|
||||
|
||||
PlaceRef = Union[PlaceID, OsmID]
|
||||
|
||||
|
||||
class Point(NamedTuple):
|
||||
""" A geographic point in WGS84 projection.
|
||||
"""
|
||||
x: float
|
||||
y: float
|
||||
|
||||
|
||||
@property
|
||||
def lat(self) -> float:
|
||||
""" Return the latitude of the point.
|
||||
"""
|
||||
return self.y
|
||||
|
||||
|
||||
@property
|
||||
def lon(self) -> float:
|
||||
""" Return the longitude of the point.
|
||||
"""
|
||||
return self.x
|
||||
|
||||
|
||||
def to_geojson(self) -> str:
|
||||
""" Return the point in GeoJSON format.
|
||||
"""
|
||||
return f'{{"type": "Point","coordinates": [{self.x}, {self.y}]}}'
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_wkb(wkb: Union[str, bytes]) -> 'Point':
|
||||
""" Create a point from EWKB as returned from the database.
|
||||
"""
|
||||
if isinstance(wkb, str):
|
||||
wkb = unhexlify(wkb)
|
||||
if len(wkb) != 25:
|
||||
raise ValueError(f"Point wkb has unexpected length {len(wkb)}")
|
||||
if wkb[0] == 0:
|
||||
gtype, srid, x, y = unpack('>iidd', wkb[1:])
|
||||
elif wkb[0] == 1:
|
||||
gtype, srid, x, y = unpack('<iidd', wkb[1:])
|
||||
else:
|
||||
raise ValueError("WKB has unknown endian value.")
|
||||
|
||||
if gtype != 0x20000001:
|
||||
raise ValueError("WKB must be a point geometry.")
|
||||
if srid != 4326:
|
||||
raise ValueError("Only WGS84 WKB supported.")
|
||||
|
||||
return Point(x, y)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_param(inp: Any) -> 'Point':
|
||||
""" Create a point from an input parameter. The parameter
|
||||
may be given as a point, a string or a sequence of
|
||||
strings or floats. Raises a UsageError if the format is
|
||||
not correct.
|
||||
"""
|
||||
if isinstance(inp, Point):
|
||||
return inp
|
||||
|
||||
seq: Sequence[str]
|
||||
if isinstance(inp, str):
|
||||
seq = inp.split(',')
|
||||
elif isinstance(inp, abc.Sequence):
|
||||
seq = inp
|
||||
|
||||
if len(seq) != 2:
|
||||
raise UsageError('Point parameter needs 2 coordinates.')
|
||||
try:
|
||||
x, y = filter(math.isfinite, map(float, seq))
|
||||
except ValueError as exc:
|
||||
raise UsageError('Point parameter needs to be numbers.') from exc
|
||||
|
||||
if x < -180.0 or x > 180.0 or y < -90.0 or y > 90.0:
|
||||
raise UsageError('Point coordinates invalid.')
|
||||
|
||||
return Point(x, y)
|
||||
|
||||
|
||||
def to_wkt(self) -> str:
|
||||
""" Return the WKT representation of the point.
|
||||
"""
|
||||
return f'POINT({self.x} {self.y})'
|
||||
|
||||
|
||||
|
||||
AnyPoint = Union[Point, Tuple[float, float]]
|
||||
|
||||
WKB_BBOX_HEADER_LE = b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'
|
||||
WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
|
||||
|
||||
class Bbox:
|
||||
""" A bounding box in WGS84 projection.
|
||||
|
||||
The coordinates are available as an array in the 'coord'
|
||||
property in the order (minx, miny, maxx, maxy).
|
||||
"""
|
||||
def __init__(self, minx: float, miny: float, maxx: float, maxy: float) -> None:
|
||||
""" Create a new bounding box with the given coordinates in WGS84
|
||||
projection.
|
||||
"""
|
||||
self.coords = (minx, miny, maxx, maxy)
|
||||
|
||||
|
||||
@property
|
||||
def minlat(self) -> float:
|
||||
""" Southern-most latitude, corresponding to the minimum y coordinate.
|
||||
"""
|
||||
return self.coords[1]
|
||||
|
||||
|
||||
@property
|
||||
def maxlat(self) -> float:
|
||||
""" Northern-most latitude, corresponding to the maximum y coordinate.
|
||||
"""
|
||||
return self.coords[3]
|
||||
|
||||
|
||||
@property
|
||||
def minlon(self) -> float:
|
||||
""" Western-most longitude, corresponding to the minimum x coordinate.
|
||||
"""
|
||||
return self.coords[0]
|
||||
|
||||
|
||||
@property
|
||||
def maxlon(self) -> float:
|
||||
""" Eastern-most longitude, corresponding to the maximum x coordinate.
|
||||
"""
|
||||
return self.coords[2]
|
||||
|
||||
|
||||
@property
|
||||
def area(self) -> float:
|
||||
""" Return the area of the box in WGS84.
|
||||
"""
|
||||
return (self.coords[2] - self.coords[0]) * (self.coords[3] - self.coords[1])
|
||||
|
||||
|
||||
def contains(self, pt: Point) -> bool:
|
||||
""" Check if the point is inside or on the boundary of the box.
|
||||
"""
|
||||
return self.coords[0] <= pt[0] and self.coords[1] <= pt[1]\
|
||||
and self.coords[2] >= pt[0] and self.coords[3] >= pt[1]
|
||||
|
||||
|
||||
def to_wkt(self) -> str:
|
||||
""" Return the WKT representation of the Bbox. This
|
||||
is a simple polygon with four points.
|
||||
"""
|
||||
return 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'\
|
||||
.format(*self.coords) # pylint: disable=consider-using-f-string
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
|
||||
""" Create a Bbox from a bounding box polygon as returned by
|
||||
the database. Returns `None` if the input value is None.
|
||||
"""
|
||||
if wkb is None:
|
||||
return None
|
||||
|
||||
if isinstance(wkb, str):
|
||||
wkb = unhexlify(wkb)
|
||||
|
||||
if len(wkb) != 97:
|
||||
raise ValueError("WKB must be a bounding box polygon")
|
||||
if wkb.startswith(WKB_BBOX_HEADER_LE):
|
||||
x1, y1, _, _, x2, y2 = unpack('<dddddd', wkb[17:65])
|
||||
elif wkb.startswith(WKB_BBOX_HEADER_BE):
|
||||
x1, y1, _, _, x2, y2 = unpack('>dddddd', wkb[17:65])
|
||||
else:
|
||||
raise ValueError("WKB has wrong header")
|
||||
|
||||
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_point(pt: Point, buffer: float) -> 'Bbox':
|
||||
""" Return a Bbox around the point with the buffer added to all sides.
|
||||
"""
|
||||
return Bbox(pt[0] - buffer, pt[1] - buffer,
|
||||
pt[0] + buffer, pt[1] + buffer)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_param(inp: Any) -> 'Bbox':
|
||||
""" Return a Bbox from an input parameter. The box may be
|
||||
given as a Bbox, a string or a list or strings or integer.
|
||||
Raises a UsageError if the format is incorrect.
|
||||
"""
|
||||
if isinstance(inp, Bbox):
|
||||
return inp
|
||||
|
||||
seq: Sequence[str]
|
||||
if isinstance(inp, str):
|
||||
seq = inp.split(',')
|
||||
elif isinstance(inp, abc.Sequence):
|
||||
seq = inp
|
||||
|
||||
if len(seq) != 4:
|
||||
raise UsageError('Bounding box parameter needs 4 coordinates.')
|
||||
try:
|
||||
x1, y1, x2, y2 = filter(math.isfinite, map(float, seq))
|
||||
except ValueError as exc:
|
||||
raise UsageError('Bounding box parameter needs to be numbers.') from exc
|
||||
|
||||
x1 = min(180, max(-180, x1))
|
||||
x2 = min(180, max(-180, x2))
|
||||
y1 = min(90, max(-90, y1))
|
||||
y2 = min(90, max(-90, y2))
|
||||
|
||||
if x1 == x2 or y1 == y2:
|
||||
raise UsageError('Bounding box with invalid parameters.')
|
||||
|
||||
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
|
||||
|
||||
|
||||
class GeometryFormat(enum.Flag):
|
||||
""" All search functions support returning the full geometry of a place in
|
||||
various formats. The internal geometry is converted by PostGIS to
|
||||
the desired format and then returned as a string. It is possible to
|
||||
request multiple formats at the same time.
|
||||
"""
|
||||
NONE = 0
|
||||
""" No geometry requested. Alias for a empty flag.
|
||||
"""
|
||||
GEOJSON = enum.auto()
|
||||
"""
|
||||
[GeoJSON](https://geojson.org/) format
|
||||
"""
|
||||
KML = enum.auto()
|
||||
"""
|
||||
[KML](https://en.wikipedia.org/wiki/Keyhole_Markup_Language) format
|
||||
"""
|
||||
SVG = enum.auto()
|
||||
"""
|
||||
[SVG](http://www.w3.org/TR/SVG/paths.html) format
|
||||
"""
|
||||
TEXT = enum.auto()
|
||||
"""
|
||||
[WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format
|
||||
"""
|
||||
|
||||
|
||||
class DataLayer(enum.Flag):
|
||||
""" The `DataLayer` flag type defines the layers that can be selected
|
||||
for reverse and forward search.
|
||||
"""
|
||||
ADDRESS = enum.auto()
|
||||
""" The address layer contains all places relevant for addresses:
|
||||
fully qualified addresses with a house number (or a house name equivalent,
|
||||
for some addresses) and places that can be part of an address like
|
||||
roads, cities, states.
|
||||
"""
|
||||
POI = enum.auto()
|
||||
""" Layer for points of interest like shops, restaurants but also
|
||||
recycling bins or postboxes.
|
||||
"""
|
||||
RAILWAY = enum.auto()
|
||||
""" Layer with railway features including tracks and other infrastructure.
|
||||
Note that in Nominatim's standard configuration, only very few railway
|
||||
features are imported into the database. Thus a custom configuration
|
||||
is required to make full use of this layer.
|
||||
"""
|
||||
NATURAL = enum.auto()
|
||||
""" Layer with natural features like rivers, lakes and mountains.
|
||||
"""
|
||||
MANMADE = enum.auto()
|
||||
""" Layer with other human-made features and boundaries. This layer is
|
||||
the catch-all and includes all features not covered by the other
|
||||
layers. A typical example for this layer are national park boundaries.
|
||||
"""
|
||||
|
||||
|
||||
def format_country(cc: Any) -> List[str]:
|
||||
""" Extract a list of country codes from the input which may be either
|
||||
a string or list of strings. Filters out all values that are not
|
||||
a two-letter string.
|
||||
"""
|
||||
clist: Sequence[str]
|
||||
if isinstance(cc, str):
|
||||
clist = cc.split(',')
|
||||
elif isinstance(cc, abc.Sequence):
|
||||
clist = cc
|
||||
else:
|
||||
raise UsageError("Parameter 'country' needs to be a comma-separated list "
|
||||
"or a Python list of strings.")
|
||||
|
||||
return [cc.lower() for cc in clist if isinstance(cc, str) and len(cc) == 2]
|
||||
|
||||
|
||||
def format_excluded(ids: Any) -> List[int]:
|
||||
""" Extract a list of place ids from the input which may be either
|
||||
a string or a list of strings or ints. Ignores empty value but
|
||||
throws a UserError on anything that cannot be converted to int.
|
||||
"""
|
||||
plist: Sequence[str]
|
||||
if isinstance(ids, str):
|
||||
plist = [s.strip() for s in ids.split(',')]
|
||||
elif isinstance(ids, abc.Sequence):
|
||||
plist = ids
|
||||
else:
|
||||
raise UsageError("Parameter 'excluded' needs to be a comma-separated list "
|
||||
"or a Python list of numbers.")
|
||||
if not all(isinstance(i, int) or
|
||||
(isinstance(i, str) and (not i or i.isdigit())) for i in plist):
|
||||
raise UsageError("Parameter 'excluded' only takes place IDs.")
|
||||
|
||||
return [int(id) for id in plist if id] or [0]
|
||||
|
||||
|
||||
def format_categories(categories: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
""" Extract a list of categories. Currently a noop.
|
||||
"""
|
||||
return categories
|
||||
|
||||
TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
|
||||
|
||||
@dataclasses.dataclass
|
||||
class LookupDetails:
|
||||
""" Collection of parameters that define which kind of details are
|
||||
returned with a lookup or details result.
|
||||
"""
|
||||
geometry_output: GeometryFormat = GeometryFormat.NONE
|
||||
""" Add the full geometry of the place to the result. Multiple
|
||||
formats may be selected. Note that geometries can become quite large.
|
||||
"""
|
||||
address_details: bool = False
|
||||
""" Get detailed information on the places that make up the address
|
||||
for the result.
|
||||
"""
|
||||
linked_places: bool = False
|
||||
""" Get detailed information on the places that link to the result.
|
||||
"""
|
||||
parented_places: bool = False
|
||||
""" Get detailed information on all places that this place is a parent
|
||||
for, i.e. all places for which it provides the address details.
|
||||
Only POI places can have parents.
|
||||
"""
|
||||
keywords: bool = False
|
||||
""" Add information about the search terms used for this place.
|
||||
"""
|
||||
geometry_simplification: float = 0.0
|
||||
""" Simplification factor for a geometry in degrees WGS. A factor of
|
||||
0.0 means the original geometry is kept. The higher the value, the
|
||||
more the geometry gets simplified.
|
||||
"""
|
||||
locales: Locales = Locales()
|
||||
""" Preferred languages for localization of results.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
|
||||
""" Load the data fields of the class from a dictionary.
|
||||
Unknown entries in the dictionary are ignored, missing ones
|
||||
get the default setting.
|
||||
|
||||
The function supports type checking and throws a UsageError
|
||||
when the value does not fit.
|
||||
"""
|
||||
def _check_field(v: Any, field: 'dataclasses.Field[Any]') -> Any:
|
||||
if v is None:
|
||||
return field.default_factory() \
|
||||
if field.default_factory != dataclasses.MISSING \
|
||||
else field.default
|
||||
if field.metadata and 'transform' in field.metadata:
|
||||
return field.metadata['transform'](v)
|
||||
if not isinstance(v, field.type):
|
||||
raise UsageError(f"Parameter '{field.name}' needs to be of {field.type!s}.")
|
||||
return v
|
||||
|
||||
return cls(**{f.name: _check_field(kwargs[f.name], f)
|
||||
for f in dataclasses.fields(cls) if f.name in kwargs})
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReverseDetails(LookupDetails):
|
||||
""" Collection of parameters for the reverse call.
|
||||
"""
|
||||
max_rank: int = dataclasses.field(default=30,
|
||||
metadata={'transform': lambda v: max(0, min(v, 30))}
|
||||
)
|
||||
""" Highest address rank to return.
|
||||
"""
|
||||
layers: DataLayer = DataLayer.ADDRESS | DataLayer.POI
|
||||
""" Filter which kind of data to include.
|
||||
"""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SearchDetails(LookupDetails):
|
||||
""" Collection of parameters for the search call.
|
||||
"""
|
||||
max_results: int = 10
|
||||
""" Maximum number of results to be returned. The actual number of results
|
||||
may be less.
|
||||
"""
|
||||
min_rank: int = dataclasses.field(default=0,
|
||||
metadata={'transform': lambda v: max(0, min(v, 30))}
|
||||
)
|
||||
""" Lowest address rank to return.
|
||||
"""
|
||||
max_rank: int = dataclasses.field(default=30,
|
||||
metadata={'transform': lambda v: max(0, min(v, 30))}
|
||||
)
|
||||
""" Highest address rank to return.
|
||||
"""
|
||||
layers: Optional[DataLayer] = dataclasses.field(default=None,
|
||||
metadata={'transform': lambda r : r})
|
||||
""" Filter which kind of data to include. When 'None' (the default) then
|
||||
filtering by layers is disabled.
|
||||
"""
|
||||
countries: List[str] = dataclasses.field(default_factory=list,
|
||||
metadata={'transform': format_country})
|
||||
""" Restrict search results to the given countries. An empty list (the
|
||||
default) will disable this filter.
|
||||
"""
|
||||
excluded: List[int] = dataclasses.field(default_factory=list,
|
||||
metadata={'transform': format_excluded})
|
||||
""" List of OSM objects to exclude from the results. Currently only
|
||||
works when the internal place ID is given.
|
||||
An empty list (the default) will disable this filter.
|
||||
"""
|
||||
viewbox: Optional[Bbox] = dataclasses.field(default=None,
|
||||
metadata={'transform': Bbox.from_param})
|
||||
""" Focus the search on a given map area.
|
||||
"""
|
||||
bounded_viewbox: bool = False
|
||||
""" Use 'viewbox' as a filter and restrict results to places within the
|
||||
given area.
|
||||
"""
|
||||
near: Optional[Point] = dataclasses.field(default=None,
|
||||
metadata={'transform': Point.from_param})
|
||||
""" Order results by distance to the given point.
|
||||
"""
|
||||
near_radius: Optional[float] = dataclasses.field(default=None,
|
||||
metadata={'transform': lambda r : r})
|
||||
""" Use near point as a filter and drop results outside the given
|
||||
radius. Radius is given in degrees WSG84.
|
||||
"""
|
||||
categories: List[Tuple[str, str]] = dataclasses.field(default_factory=list,
|
||||
metadata={'transform': format_categories})
|
||||
""" Restrict search to places with one of the given class/type categories.
|
||||
An empty list (the default) will disable this filter.
|
||||
"""
|
||||
viewbox_x2: Optional[Bbox] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.viewbox is not None:
|
||||
xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
|
||||
yext = (self.viewbox.maxlat - self.viewbox.minlat)/2
|
||||
self.viewbox_x2 = Bbox(self.viewbox.minlon - xext, self.viewbox.minlat - yext,
|
||||
self.viewbox.maxlon + xext, self.viewbox.maxlat + yext)
|
||||
|
||||
|
||||
def restrict_min_max_rank(self, new_min: int, new_max: int) -> None:
|
||||
""" Change the min_rank and max_rank fields to respect the
|
||||
given boundaries.
|
||||
"""
|
||||
assert new_min <= new_max
|
||||
self.min_rank = max(self.min_rank, new_min)
|
||||
self.max_rank = min(self.max_rank, new_max)
|
||||
|
||||
|
||||
def is_impossible(self) -> bool:
|
||||
""" Check if the parameter configuration is contradictionary and
|
||||
cannot yield any results.
|
||||
"""
|
||||
return (self.min_rank > self.max_rank
|
||||
or (self.bounded_viewbox
|
||||
and self.viewbox is not None and self.near is not None
|
||||
and self.viewbox.contains(self.near))
|
||||
or (self.layers is not None and not self.layers)
|
||||
or (self.max_rank <= 4 and
|
||||
self.layers is not None and not self.layers & DataLayer.ADDRESS))
|
||||
|
||||
|
||||
def layer_enabled(self, layer: DataLayer) -> bool:
|
||||
""" Check if the given layer has been chosen. Also returns
|
||||
true when layer restriction has been disabled completely.
|
||||
"""
|
||||
return self.layers is None or bool(self.layers & layer)
|
||||
21
src/nominatim_api/v1/__init__.py
Normal file
21
src/nominatim_api/v1/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of API version v1 (aka the legacy version).
|
||||
"""
|
||||
|
||||
#pylint: disable=useless-import-alias
|
||||
|
||||
from .server_glue import (ASGIAdaptor as ASGIAdaptor,
|
||||
EndpointFunc as EndpointFunc,
|
||||
ROUTES as ROUTES)
|
||||
|
||||
from . import format as _format
|
||||
|
||||
list_formats = _format.dispatch.list_formats
|
||||
supports_format = _format.dispatch.supports_format
|
||||
format_result = _format.dispatch.format_result
|
||||
201
src/nominatim_api/v1/classtypes.py
Normal file
201
src/nominatim_api/v1/classtypes.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Hard-coded information about tag categories.
|
||||
|
||||
These tables have been copied verbatim from the old PHP code. For future
|
||||
version a more flexible formatting is required.
|
||||
"""
|
||||
from typing import Tuple, Optional, Mapping, Union
|
||||
|
||||
from ..results import ReverseResult, SearchResult
|
||||
from ..types import Bbox
|
||||
|
||||
def get_label_tag(category: Tuple[str, str], extratags: Optional[Mapping[str, str]],
|
||||
rank: int, country: Optional[str]) -> str:
|
||||
""" Create a label tag for the given place that can be used as an XML name.
|
||||
"""
|
||||
if rank < 26 and extratags and 'place' in extratags:
|
||||
label = extratags['place']
|
||||
elif rank < 26 and extratags and 'linked_place' in extratags:
|
||||
label = extratags['linked_place']
|
||||
elif category == ('boundary', 'administrative'):
|
||||
label = ADMIN_LABELS.get((country or '', int(rank/2)))\
|
||||
or ADMIN_LABELS.get(('', int(rank/2)))\
|
||||
or 'Administrative'
|
||||
elif category[1] == 'postal_code':
|
||||
label = 'postcode'
|
||||
elif rank < 26:
|
||||
label = category[1] if category[1] != 'yes' else category[0]
|
||||
elif rank < 28:
|
||||
label = 'road'
|
||||
elif category[0] == 'place'\
|
||||
and category[1] in ('house_number', 'house_name', 'country_code'):
|
||||
label = category[1]
|
||||
else:
|
||||
label = category[0]
|
||||
|
||||
return label.lower().replace(' ', '_')
|
||||
|
||||
|
||||
def bbox_from_result(result: Union[ReverseResult, SearchResult]) -> Bbox:
|
||||
""" Compute a bounding box for the result. For ways and relations
|
||||
a given boundingbox is used. For all other object, a box is computed
|
||||
around the centroid according to dimensions derived from the
|
||||
search rank.
|
||||
"""
|
||||
if (result.osm_object and result.osm_object[0] == 'N') or result.bbox is None:
|
||||
extent = NODE_EXTENT.get(result.category, 0.00005)
|
||||
return Bbox.from_point(result.centroid, extent)
|
||||
|
||||
return result.bbox
|
||||
|
||||
|
||||
# pylint: disable=line-too-long
|
||||
OSM_ATTRIBUTION = 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright'
|
||||
|
||||
|
||||
OSM_TYPE_NAME = {
|
||||
'N': 'node',
|
||||
'W': 'way',
|
||||
'R': 'relation'
|
||||
}
|
||||
|
||||
|
||||
ADMIN_LABELS = {
|
||||
('', 1): 'Continent',
|
||||
('', 2): 'Country',
|
||||
('', 3): 'Region',
|
||||
('', 4): 'State',
|
||||
('', 5): 'State District',
|
||||
('', 6): 'County',
|
||||
('', 7): 'Municipality',
|
||||
('', 8): 'City',
|
||||
('', 9): 'City District',
|
||||
('', 10): 'Suburb',
|
||||
('', 11): 'Neighbourhood',
|
||||
('', 12): 'City Block',
|
||||
('no', 3): 'State',
|
||||
('no', 4): 'County',
|
||||
('se', 3): 'State',
|
||||
('se', 4): 'County'
|
||||
}
|
||||
|
||||
|
||||
ICONS = {
|
||||
('boundary', 'administrative'): 'poi_boundary_administrative',
|
||||
('place', 'city'): 'poi_place_city',
|
||||
('place', 'town'): 'poi_place_town',
|
||||
('place', 'village'): 'poi_place_village',
|
||||
('place', 'hamlet'): 'poi_place_village',
|
||||
('place', 'suburb'): 'poi_place_village',
|
||||
('place', 'locality'): 'poi_place_village',
|
||||
('place', 'airport'): 'transport_airport2',
|
||||
('aeroway', 'aerodrome'): 'transport_airport2',
|
||||
('railway', 'station'): 'transport_train_station2',
|
||||
('amenity', 'place_of_worship'): 'place_of_worship_unknown3',
|
||||
('amenity', 'pub'): 'food_pub',
|
||||
('amenity', 'bar'): 'food_bar',
|
||||
('amenity', 'university'): 'education_university',
|
||||
('tourism', 'museum'): 'tourist_museum',
|
||||
('amenity', 'arts_centre'): 'tourist_art_gallery2',
|
||||
('tourism', 'zoo'): 'tourist_zoo',
|
||||
('tourism', 'theme_park'): 'poi_point_of_interest',
|
||||
('tourism', 'attraction'): 'poi_point_of_interest',
|
||||
('leisure', 'golf_course'): 'sport_golf',
|
||||
('historic', 'castle'): 'tourist_castle',
|
||||
('amenity', 'hospital'): 'health_hospital',
|
||||
('amenity', 'school'): 'education_school',
|
||||
('amenity', 'theatre'): 'tourist_theatre',
|
||||
('amenity', 'library'): 'amenity_library',
|
||||
('amenity', 'fire_station'): 'amenity_firestation3',
|
||||
('amenity', 'police'): 'amenity_police2',
|
||||
('amenity', 'bank'): 'money_bank2',
|
||||
('amenity', 'post_office'): 'amenity_post_office',
|
||||
('tourism', 'hotel'): 'accommodation_hotel2',
|
||||
('amenity', 'cinema'): 'tourist_cinema',
|
||||
('tourism', 'artwork'): 'tourist_art_gallery2',
|
||||
('historic', 'archaeological_site'): 'tourist_archaeological2',
|
||||
('amenity', 'doctors'): 'health_doctors',
|
||||
('leisure', 'sports_centre'): 'sport_leisure_centre',
|
||||
('leisure', 'swimming_pool'): 'sport_swimming_outdoor',
|
||||
('shop', 'supermarket'): 'shopping_supermarket',
|
||||
('shop', 'convenience'): 'shopping_convenience',
|
||||
('amenity', 'restaurant'): 'food_restaurant',
|
||||
('amenity', 'fast_food'): 'food_fastfood',
|
||||
('amenity', 'cafe'): 'food_cafe',
|
||||
('tourism', 'guest_house'): 'accommodation_bed_and_breakfast',
|
||||
('amenity', 'pharmacy'): 'health_pharmacy_dispensing',
|
||||
('amenity', 'fuel'): 'transport_fuel',
|
||||
('natural', 'peak'): 'poi_peak',
|
||||
('natural', 'wood'): 'landuse_coniferous_and_deciduous',
|
||||
('shop', 'bicycle'): 'shopping_bicycle',
|
||||
('shop', 'clothes'): 'shopping_clothes',
|
||||
('shop', 'hairdresser'): 'shopping_hairdresser',
|
||||
('shop', 'doityourself'): 'shopping_diy',
|
||||
('shop', 'estate_agent'): 'shopping_estateagent2',
|
||||
('shop', 'car'): 'shopping_car',
|
||||
('shop', 'garden_centre'): 'shopping_garden_centre',
|
||||
('shop', 'car_repair'): 'shopping_car_repair',
|
||||
('shop', 'bakery'): 'shopping_bakery',
|
||||
('shop', 'butcher'): 'shopping_butcher',
|
||||
('shop', 'apparel'): 'shopping_clothes',
|
||||
('shop', 'laundry'): 'shopping_laundrette',
|
||||
('shop', 'beverages'): 'shopping_alcohol',
|
||||
('shop', 'alcohol'): 'shopping_alcohol',
|
||||
('shop', 'optician'): 'health_opticians',
|
||||
('shop', 'chemist'): 'health_pharmacy',
|
||||
('shop', 'gallery'): 'tourist_art_gallery2',
|
||||
('shop', 'jewelry'): 'shopping_jewelry',
|
||||
('tourism', 'information'): 'amenity_information',
|
||||
('historic', 'ruins'): 'tourist_ruin',
|
||||
('amenity', 'college'): 'education_school',
|
||||
('historic', 'monument'): 'tourist_monument',
|
||||
('historic', 'memorial'): 'tourist_monument',
|
||||
('historic', 'mine'): 'poi_mine',
|
||||
('tourism', 'caravan_site'): 'accommodation_caravan_park',
|
||||
('amenity', 'bus_station'): 'transport_bus_station',
|
||||
('amenity', 'atm'): 'money_atm2',
|
||||
('tourism', 'viewpoint'): 'tourist_view_point',
|
||||
('tourism', 'guesthouse'): 'accommodation_bed_and_breakfast',
|
||||
('railway', 'tram'): 'transport_tram_stop',
|
||||
('amenity', 'courthouse'): 'amenity_court',
|
||||
('amenity', 'recycling'): 'amenity_recycling',
|
||||
('amenity', 'dentist'): 'health_dentist',
|
||||
('natural', 'beach'): 'tourist_beach',
|
||||
('railway', 'tram_stop'): 'transport_tram_stop',
|
||||
('amenity', 'prison'): 'amenity_prison',
|
||||
('highway', 'bus_stop'): 'transport_bus_stop2'
|
||||
}
|
||||
|
||||
NODE_EXTENT = {
|
||||
('place', 'continent'): 25,
|
||||
('place', 'country'): 7,
|
||||
('place', 'state'): 2.6,
|
||||
('place', 'province'): 2.6,
|
||||
('place', 'region'): 1.0,
|
||||
('place', 'county'): 0.7,
|
||||
('place', 'city'): 0.16,
|
||||
('place', 'municipality'): 0.16,
|
||||
('place', 'island'): 0.32,
|
||||
('place', 'postcode'): 0.16,
|
||||
('place', 'town'): 0.04,
|
||||
('place', 'village'): 0.02,
|
||||
('place', 'hamlet'): 0.02,
|
||||
('place', 'district'): 0.02,
|
||||
('place', 'borough'): 0.02,
|
||||
('place', 'suburb'): 0.02,
|
||||
('place', 'locality'): 0.01,
|
||||
('place', 'neighbourhood'): 0.01,
|
||||
('place', 'quarter'): 0.01,
|
||||
('place', 'city_block'): 0.01,
|
||||
('landuse', 'farm'): 0.01,
|
||||
('place', 'farm'): 0.01,
|
||||
('place', 'airport'): 0.015,
|
||||
('aeroway', 'aerodrome'): 0.015,
|
||||
('railway', 'station'): 0.005
|
||||
}
|
||||
259
src/nominatim_api/v1/format.py
Normal file
259
src/nominatim_api/v1/format.py
Normal file
@@ -0,0 +1,259 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Output formatters for API version v1.
|
||||
"""
|
||||
from typing import List, Dict, Mapping, Any
|
||||
import collections
|
||||
import datetime as dt
|
||||
|
||||
from nominatim_core.utils.json_writer import JsonWriter
|
||||
from ..status import StatusResult
|
||||
from ..results import DetailedResult, ReverseResults, SearchResults, \
|
||||
AddressLines, AddressLine
|
||||
from ..localization import Locales
|
||||
from ..result_formatting import FormatDispatcher
|
||||
from .classtypes import ICONS
|
||||
from . import format_json, format_xml
|
||||
|
||||
class RawDataList(List[Dict[str, Any]]):
|
||||
""" Data type for formatting raw data lists 'as is' in json.
|
||||
"""
|
||||
|
||||
dispatch = FormatDispatcher()
|
||||
|
||||
@dispatch.format_func(StatusResult, 'text')
|
||||
def _format_status_text(result: StatusResult, _: Mapping[str, Any]) -> str:
|
||||
if result.status:
|
||||
return f"ERROR: {result.message}"
|
||||
|
||||
return 'OK'
|
||||
|
||||
|
||||
@dispatch.format_func(StatusResult, 'json')
|
||||
def _format_status_json(result: StatusResult, _: Mapping[str, Any]) -> str:
|
||||
out = JsonWriter()
|
||||
|
||||
out.start_object()\
|
||||
.keyval('status', result.status)\
|
||||
.keyval('message', result.message)\
|
||||
.keyval_not_none('data_updated', result.data_updated,
|
||||
lambda v: v.isoformat())\
|
||||
.keyval('software_version', str(result.software_version))\
|
||||
.keyval_not_none('database_version', result.database_version, str)\
|
||||
.end_object()
|
||||
|
||||
return out()
|
||||
|
||||
|
||||
def _add_address_row(writer: JsonWriter, row: AddressLine,
|
||||
locales: Locales) -> None:
|
||||
writer.start_object()\
|
||||
.keyval('localname', locales.display_name(row.names))\
|
||||
.keyval_not_none('place_id', row.place_id)
|
||||
|
||||
if row.osm_object is not None:
|
||||
writer.keyval('osm_id', row.osm_object[1])\
|
||||
.keyval('osm_type', row.osm_object[0])
|
||||
|
||||
if row.extratags:
|
||||
writer.keyval_not_none('place_type', row.extratags.get('place_type'))
|
||||
|
||||
writer.keyval('class', row.category[0])\
|
||||
.keyval('type', row.category[1])\
|
||||
.keyval_not_none('admin_level', row.admin_level)\
|
||||
.keyval('rank_address', row.rank_address)\
|
||||
.keyval('distance', row.distance)\
|
||||
.keyval('isaddress', row.isaddress)\
|
||||
.end_object()
|
||||
|
||||
|
||||
def _add_address_rows(writer: JsonWriter, section: str, rows: AddressLines,
|
||||
locales: Locales) -> None:
|
||||
writer.key(section).start_array()
|
||||
for row in rows:
|
||||
_add_address_row(writer, row, locales)
|
||||
writer.next()
|
||||
writer.end_array().next()
|
||||
|
||||
|
||||
def _add_parent_rows_grouped(writer: JsonWriter, rows: AddressLines,
|
||||
locales: Locales) -> None:
|
||||
# group by category type
|
||||
data = collections.defaultdict(list)
|
||||
for row in rows:
|
||||
sub = JsonWriter()
|
||||
_add_address_row(sub, row, locales)
|
||||
data[row.category[1]].append(sub())
|
||||
|
||||
writer.key('hierarchy').start_object()
|
||||
for group, grouped in data.items():
|
||||
writer.key(group).start_array()
|
||||
grouped.sort() # sorts alphabetically by local name
|
||||
for line in grouped:
|
||||
writer.raw(line).next()
|
||||
writer.end_array().next()
|
||||
|
||||
writer.end_object().next()
|
||||
|
||||
|
||||
@dispatch.format_func(DetailedResult, 'json')
|
||||
def _format_details_json(result: DetailedResult, options: Mapping[str, Any]) -> str:
|
||||
locales = options.get('locales', Locales())
|
||||
geom = result.geometry.get('geojson')
|
||||
centroid = result.centroid.to_geojson()
|
||||
|
||||
out = JsonWriter()
|
||||
out.start_object()\
|
||||
.keyval_not_none('place_id', result.place_id)\
|
||||
.keyval_not_none('parent_place_id', result.parent_place_id)
|
||||
|
||||
if result.osm_object is not None:
|
||||
out.keyval('osm_type', result.osm_object[0])\
|
||||
.keyval('osm_id', result.osm_object[1])
|
||||
|
||||
out.keyval('category', result.category[0])\
|
||||
.keyval('type', result.category[1])\
|
||||
.keyval('admin_level', result.admin_level)\
|
||||
.keyval('localname', result.locale_name or '')\
|
||||
.keyval('names', result.names or {})\
|
||||
.keyval('addresstags', result.address or {})\
|
||||
.keyval_not_none('housenumber', result.housenumber)\
|
||||
.keyval_not_none('calculated_postcode', result.postcode)\
|
||||
.keyval_not_none('country_code', result.country_code)\
|
||||
.keyval_not_none('indexed_date', result.indexed_date, lambda v: v.isoformat())\
|
||||
.keyval_not_none('importance', result.importance)\
|
||||
.keyval('calculated_importance', result.calculated_importance())\
|
||||
.keyval('extratags', result.extratags or {})\
|
||||
.keyval_not_none('calculated_wikipedia', result.wikipedia)\
|
||||
.keyval('rank_address', result.rank_address)\
|
||||
.keyval('rank_search', result.rank_search)\
|
||||
.keyval('isarea', 'Polygon' in (geom or result.geometry.get('type') or ''))\
|
||||
.key('centroid').raw(centroid).next()\
|
||||
.key('geometry').raw(geom or centroid).next()
|
||||
|
||||
if options.get('icon_base_url', None):
|
||||
icon = ICONS.get(result.category)
|
||||
if icon:
|
||||
out.keyval('icon', f"{options['icon_base_url']}/{icon}.p.20.png")
|
||||
|
||||
if result.address_rows is not None:
|
||||
_add_address_rows(out, 'address', result.address_rows, locales)
|
||||
|
||||
if result.linked_rows:
|
||||
_add_address_rows(out, 'linked_places', result.linked_rows, locales)
|
||||
|
||||
if result.name_keywords is not None or result.address_keywords is not None:
|
||||
out.key('keywords').start_object()
|
||||
|
||||
for sec, klist in (('name', result.name_keywords), ('address', result.address_keywords)):
|
||||
out.key(sec).start_array()
|
||||
for word in (klist or []):
|
||||
out.start_object()\
|
||||
.keyval('id', word.word_id)\
|
||||
.keyval('token', word.word_token)\
|
||||
.end_object().next()
|
||||
out.end_array().next()
|
||||
|
||||
out.end_object().next()
|
||||
|
||||
if result.parented_rows is not None:
|
||||
if options.get('group_hierarchy', False):
|
||||
_add_parent_rows_grouped(out, result.parented_rows, locales)
|
||||
else:
|
||||
_add_address_rows(out, 'hierarchy', result.parented_rows, locales)
|
||||
|
||||
out.end_object()
|
||||
|
||||
return out()
|
||||
|
||||
|
||||
@dispatch.format_func(ReverseResults, 'xml')
|
||||
def _format_reverse_xml(results: ReverseResults, options: Mapping[str, Any]) -> str:
|
||||
return format_xml.format_base_xml(results,
|
||||
options, True, 'reversegeocode',
|
||||
{'querystring': options.get('query', '')})
|
||||
|
||||
|
||||
@dispatch.format_func(ReverseResults, 'geojson')
|
||||
def _format_reverse_geojson(results: ReverseResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_geojson(results, options, True)
|
||||
|
||||
|
||||
@dispatch.format_func(ReverseResults, 'geocodejson')
|
||||
def _format_reverse_geocodejson(results: ReverseResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_geocodejson(results, options, True)
|
||||
|
||||
|
||||
@dispatch.format_func(ReverseResults, 'json')
|
||||
def _format_reverse_json(results: ReverseResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_json(results, options, True,
|
||||
class_label='class')
|
||||
|
||||
|
||||
@dispatch.format_func(ReverseResults, 'jsonv2')
|
||||
def _format_reverse_jsonv2(results: ReverseResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_json(results, options, True,
|
||||
class_label='category')
|
||||
|
||||
|
||||
@dispatch.format_func(SearchResults, 'xml')
|
||||
def _format_search_xml(results: SearchResults, options: Mapping[str, Any]) -> str:
|
||||
extra = {'querystring': options.get('query', '')}
|
||||
for attr in ('more_url', 'exclude_place_ids', 'viewbox'):
|
||||
if options.get(attr):
|
||||
extra[attr] = options[attr]
|
||||
return format_xml.format_base_xml(results, options, False, 'searchresults',
|
||||
extra)
|
||||
|
||||
|
||||
|
||||
@dispatch.format_func(SearchResults, 'geojson')
|
||||
def _format_search_geojson(results: SearchResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_geojson(results, options, False)
|
||||
|
||||
|
||||
@dispatch.format_func(SearchResults, 'geocodejson')
|
||||
def _format_search_geocodejson(results: SearchResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_geocodejson(results, options, False)
|
||||
|
||||
|
||||
@dispatch.format_func(SearchResults, 'json')
|
||||
def _format_search_json(results: SearchResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_json(results, options, False,
|
||||
class_label='class')
|
||||
|
||||
|
||||
@dispatch.format_func(SearchResults, 'jsonv2')
|
||||
def _format_search_jsonv2(results: SearchResults,
|
||||
options: Mapping[str, Any]) -> str:
|
||||
return format_json.format_base_json(results, options, False,
|
||||
class_label='category')
|
||||
|
||||
@dispatch.format_func(RawDataList, 'json')
|
||||
def _format_raw_data_json(results: RawDataList, _: Mapping[str, Any]) -> str:
|
||||
out = JsonWriter()
|
||||
out.start_array()
|
||||
for res in results:
|
||||
out.start_object()
|
||||
for k, v in res.items():
|
||||
if isinstance(v, dt.datetime):
|
||||
out.keyval(k, v.isoformat(sep= ' ', timespec='seconds'))
|
||||
else:
|
||||
out.keyval(k, v)
|
||||
out.end_object().next()
|
||||
|
||||
out.end_array()
|
||||
|
||||
return out()
|
||||
275
src/nominatim_api/v1/format_json.py
Normal file
275
src/nominatim_api/v1/format_json.py
Normal file
@@ -0,0 +1,275 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for output of results in json formats.
|
||||
"""
|
||||
from typing import Mapping, Any, Optional, Tuple, Union
|
||||
|
||||
from nominatim_core.utils.json_writer import JsonWriter
|
||||
from ..results import AddressLines, ReverseResults, SearchResults
|
||||
from . import classtypes as cl
|
||||
|
||||
#pylint: disable=too-many-branches
|
||||
|
||||
def _write_osm_id(out: JsonWriter, osm_object: Optional[Tuple[str, int]]) -> None:
|
||||
if osm_object is not None:
|
||||
out.keyval_not_none('osm_type', cl.OSM_TYPE_NAME.get(osm_object[0], None))\
|
||||
.keyval('osm_id', osm_object[1])
|
||||
|
||||
|
||||
def _write_typed_address(out: JsonWriter, address: Optional[AddressLines],
|
||||
country_code: Optional[str]) -> None:
|
||||
parts = {}
|
||||
for line in (address or []):
|
||||
if line.isaddress:
|
||||
if line.local_name:
|
||||
label = cl.get_label_tag(line.category, line.extratags,
|
||||
line.rank_address, country_code)
|
||||
if label not in parts:
|
||||
parts[label] = line.local_name
|
||||
if line.names and 'ISO3166-2' in line.names and line.admin_level:
|
||||
parts[f"ISO3166-2-lvl{line.admin_level}"] = line.names['ISO3166-2']
|
||||
|
||||
for k, v in parts.items():
|
||||
out.keyval(k, v)
|
||||
|
||||
if country_code:
|
||||
out.keyval('country_code', country_code)
|
||||
|
||||
|
||||
def _write_geocodejson_address(out: JsonWriter,
|
||||
address: Optional[AddressLines],
|
||||
obj_place_id: Optional[int],
|
||||
country_code: Optional[str]) -> None:
|
||||
extra = {}
|
||||
for line in (address or []):
|
||||
if line.isaddress and line.local_name:
|
||||
if line.category[1] in ('postcode', 'postal_code'):
|
||||
out.keyval('postcode', line.local_name)
|
||||
elif line.category[1] == 'house_number':
|
||||
out.keyval('housenumber', line.local_name)
|
||||
elif (obj_place_id is None or obj_place_id != line.place_id) \
|
||||
and line.rank_address >= 4 and line.rank_address < 28:
|
||||
rank_name = GEOCODEJSON_RANKS[line.rank_address]
|
||||
if rank_name not in extra:
|
||||
extra[rank_name] = line.local_name
|
||||
|
||||
|
||||
for k, v in extra.items():
|
||||
out.keyval(k, v)
|
||||
|
||||
if country_code:
|
||||
out.keyval('country_code', country_code)
|
||||
|
||||
|
||||
def format_base_json(results: Union[ReverseResults, SearchResults],
|
||||
options: Mapping[str, Any], simple: bool,
|
||||
class_label: str) -> str:
|
||||
""" Return the result list as a simple json string in custom Nominatim format.
|
||||
"""
|
||||
out = JsonWriter()
|
||||
|
||||
if simple:
|
||||
if not results:
|
||||
return '{"error":"Unable to geocode"}'
|
||||
else:
|
||||
out.start_array()
|
||||
|
||||
for result in results:
|
||||
out.start_object()\
|
||||
.keyval_not_none('place_id', result.place_id)\
|
||||
.keyval('licence', cl.OSM_ATTRIBUTION)\
|
||||
|
||||
_write_osm_id(out, result.osm_object)
|
||||
|
||||
out.keyval('lat', f"{result.centroid.lat}")\
|
||||
.keyval('lon', f"{result.centroid.lon}")\
|
||||
.keyval(class_label, result.category[0])\
|
||||
.keyval('type', result.category[1])\
|
||||
.keyval('place_rank', result.rank_search)\
|
||||
.keyval('importance', result.calculated_importance())\
|
||||
.keyval('addresstype', cl.get_label_tag(result.category, result.extratags,
|
||||
result.rank_address,
|
||||
result.country_code))\
|
||||
.keyval('name', result.locale_name or '')\
|
||||
.keyval('display_name', result.display_name or '')
|
||||
|
||||
|
||||
if options.get('icon_base_url', None):
|
||||
icon = cl.ICONS.get(result.category)
|
||||
if icon:
|
||||
out.keyval('icon', f"{options['icon_base_url']}/{icon}.p.20.png")
|
||||
|
||||
if options.get('addressdetails', False):
|
||||
out.key('address').start_object()
|
||||
_write_typed_address(out, result.address_rows, result.country_code)
|
||||
out.end_object().next()
|
||||
|
||||
if options.get('extratags', False):
|
||||
out.keyval('extratags', result.extratags)
|
||||
|
||||
if options.get('namedetails', False):
|
||||
out.keyval('namedetails', result.names)
|
||||
|
||||
bbox = cl.bbox_from_result(result)
|
||||
out.key('boundingbox').start_array()\
|
||||
.value(f"{bbox.minlat:0.7f}").next()\
|
||||
.value(f"{bbox.maxlat:0.7f}").next()\
|
||||
.value(f"{bbox.minlon:0.7f}").next()\
|
||||
.value(f"{bbox.maxlon:0.7f}").next()\
|
||||
.end_array().next()
|
||||
|
||||
if result.geometry:
|
||||
for key in ('text', 'kml'):
|
||||
out.keyval_not_none('geo' + key, result.geometry.get(key))
|
||||
if 'geojson' in result.geometry:
|
||||
out.key('geojson').raw(result.geometry['geojson']).next()
|
||||
out.keyval_not_none('svg', result.geometry.get('svg'))
|
||||
|
||||
out.end_object()
|
||||
|
||||
if simple:
|
||||
return out()
|
||||
|
||||
out.next()
|
||||
|
||||
out.end_array()
|
||||
|
||||
return out()
|
||||
|
||||
|
||||
def format_base_geojson(results: Union[ReverseResults, SearchResults],
|
||||
options: Mapping[str, Any],
|
||||
simple: bool) -> str:
|
||||
""" Return the result list as a geojson string.
|
||||
"""
|
||||
if not results and simple:
|
||||
return '{"error":"Unable to geocode"}'
|
||||
|
||||
out = JsonWriter()
|
||||
|
||||
out.start_object()\
|
||||
.keyval('type', 'FeatureCollection')\
|
||||
.keyval('licence', cl.OSM_ATTRIBUTION)\
|
||||
.key('features').start_array()
|
||||
|
||||
for result in results:
|
||||
out.start_object()\
|
||||
.keyval('type', 'Feature')\
|
||||
.key('properties').start_object()
|
||||
|
||||
out.keyval_not_none('place_id', result.place_id)
|
||||
|
||||
_write_osm_id(out, result.osm_object)
|
||||
|
||||
out.keyval('place_rank', result.rank_search)\
|
||||
.keyval('category', result.category[0])\
|
||||
.keyval('type', result.category[1])\
|
||||
.keyval('importance', result.calculated_importance())\
|
||||
.keyval('addresstype', cl.get_label_tag(result.category, result.extratags,
|
||||
result.rank_address,
|
||||
result.country_code))\
|
||||
.keyval('name', result.locale_name or '')\
|
||||
.keyval('display_name', result.display_name or '')
|
||||
|
||||
if options.get('addressdetails', False):
|
||||
out.key('address').start_object()
|
||||
_write_typed_address(out, result.address_rows, result.country_code)
|
||||
out.end_object().next()
|
||||
|
||||
if options.get('extratags', False):
|
||||
out.keyval('extratags', result.extratags)
|
||||
|
||||
if options.get('namedetails', False):
|
||||
out.keyval('namedetails', result.names)
|
||||
|
||||
out.end_object().next() # properties
|
||||
|
||||
out.key('bbox').start_array()
|
||||
for coord in cl.bbox_from_result(result).coords:
|
||||
out.float(coord, 7).next()
|
||||
out.end_array().next()
|
||||
|
||||
out.key('geometry').raw(result.geometry.get('geojson')
|
||||
or result.centroid.to_geojson()).next()
|
||||
|
||||
out.end_object().next()
|
||||
|
||||
out.end_array().next().end_object()
|
||||
|
||||
return out()
|
||||
|
||||
|
||||
def format_base_geocodejson(results: Union[ReverseResults, SearchResults],
|
||||
options: Mapping[str, Any], simple: bool) -> str:
|
||||
""" Return the result list as a geocodejson string.
|
||||
"""
|
||||
if not results and simple:
|
||||
return '{"error":"Unable to geocode"}'
|
||||
|
||||
out = JsonWriter()
|
||||
|
||||
out.start_object()\
|
||||
.keyval('type', 'FeatureCollection')\
|
||||
.key('geocoding').start_object()\
|
||||
.keyval('version', '0.1.0')\
|
||||
.keyval('attribution', cl.OSM_ATTRIBUTION)\
|
||||
.keyval('licence', 'ODbL')\
|
||||
.keyval_not_none('query', options.get('query'))\
|
||||
.end_object().next()\
|
||||
.key('features').start_array()
|
||||
|
||||
for result in results:
|
||||
out.start_object()\
|
||||
.keyval('type', 'Feature')\
|
||||
.key('properties').start_object()\
|
||||
.key('geocoding').start_object()
|
||||
|
||||
out.keyval_not_none('place_id', result.place_id)
|
||||
|
||||
_write_osm_id(out, result.osm_object)
|
||||
|
||||
out.keyval('osm_key', result.category[0])\
|
||||
.keyval('osm_value', result.category[1])\
|
||||
.keyval('type', GEOCODEJSON_RANKS[max(3, min(28, result.rank_address))])\
|
||||
.keyval_not_none('accuracy', getattr(result, 'distance', None), transform=int)\
|
||||
.keyval('label', result.display_name or '')\
|
||||
.keyval_not_none('name', result.locale_name or None)\
|
||||
|
||||
if options.get('addressdetails', False):
|
||||
_write_geocodejson_address(out, result.address_rows, result.place_id,
|
||||
result.country_code)
|
||||
|
||||
out.key('admin').start_object()
|
||||
if result.address_rows:
|
||||
for line in result.address_rows:
|
||||
if line.isaddress and (line.admin_level or 15) < 15 and line.local_name \
|
||||
and line.category[0] == 'boundary' and line.category[1] == 'administrative':
|
||||
out.keyval(f"level{line.admin_level}", line.local_name)
|
||||
out.end_object().next()
|
||||
|
||||
out.end_object().next().end_object().next()
|
||||
|
||||
out.key('geometry').raw(result.geometry.get('geojson')
|
||||
or result.centroid.to_geojson()).next()
|
||||
|
||||
out.end_object().next()
|
||||
|
||||
out.end_array().next().end_object()
|
||||
|
||||
return out()
|
||||
|
||||
|
||||
GEOCODEJSON_RANKS = {
|
||||
3: 'locality',
|
||||
4: 'country',
|
||||
5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
|
||||
10: 'county', 11: 'county', 12: 'county',
|
||||
13: 'city', 14: 'city', 15: 'city', 16: 'city',
|
||||
17: 'district', 18: 'district', 19: 'district', 20: 'district', 21: 'district',
|
||||
22: 'locality', 23: 'locality', 24: 'locality',
|
||||
25: 'street', 26: 'street', 27: 'street', 28: 'house'}
|
||||
126
src/nominatim_api/v1/format_xml.py
Normal file
126
src/nominatim_api/v1/format_xml.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for output of results in XML format.
|
||||
"""
|
||||
from typing import Mapping, Any, Optional, Union
|
||||
import datetime as dt
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from ..results import AddressLines, ReverseResult, ReverseResults, \
|
||||
SearchResult, SearchResults
|
||||
from . import classtypes as cl
|
||||
|
||||
#pylint: disable=too-many-branches
|
||||
|
||||
def _write_xml_address(root: ET.Element, address: AddressLines,
|
||||
country_code: Optional[str]) -> None:
|
||||
parts = {}
|
||||
for line in address:
|
||||
if line.isaddress:
|
||||
if line.local_name:
|
||||
label = cl.get_label_tag(line.category, line.extratags,
|
||||
line.rank_address, country_code)
|
||||
if label not in parts:
|
||||
parts[label] = line.local_name
|
||||
if line.names and 'ISO3166-2' in line.names and line.admin_level:
|
||||
parts[f"ISO3166-2-lvl{line.admin_level}"] = line.names['ISO3166-2']
|
||||
|
||||
for k,v in parts.items():
|
||||
ET.SubElement(root, k).text = v
|
||||
|
||||
if country_code:
|
||||
ET.SubElement(root, 'country_code').text = country_code
|
||||
|
||||
|
||||
def _create_base_entry(result: Union[ReverseResult, SearchResult],
|
||||
root: ET.Element, simple: bool) -> ET.Element:
|
||||
place = ET.SubElement(root, 'result' if simple else 'place')
|
||||
if result.place_id is not None:
|
||||
place.set('place_id', str(result.place_id))
|
||||
if result.osm_object:
|
||||
osm_type = cl.OSM_TYPE_NAME.get(result.osm_object[0], None)
|
||||
if osm_type is not None:
|
||||
place.set('osm_type', osm_type)
|
||||
place.set('osm_id', str(result.osm_object[1]))
|
||||
if result.names and 'ref' in result.names:
|
||||
place.set('ref', result.names['ref'])
|
||||
elif result.locale_name:
|
||||
# bug reproduced from PHP
|
||||
place.set('ref', result.locale_name)
|
||||
place.set('lat', f"{result.centroid.lat:.7f}")
|
||||
place.set('lon', f"{result.centroid.lon:.7f}")
|
||||
|
||||
bbox = cl.bbox_from_result(result)
|
||||
place.set('boundingbox',
|
||||
f"{bbox.minlat:.7f},{bbox.maxlat:.7f},{bbox.minlon:.7f},{bbox.maxlon:.7f}")
|
||||
|
||||
place.set('place_rank', str(result.rank_search))
|
||||
place.set('address_rank', str(result.rank_address))
|
||||
|
||||
if result.geometry:
|
||||
for key in ('text', 'svg'):
|
||||
if key in result.geometry:
|
||||
place.set('geo' + key, result.geometry[key])
|
||||
if 'kml' in result.geometry:
|
||||
ET.SubElement(root if simple else place, 'geokml')\
|
||||
.append(ET.fromstring(result.geometry['kml']))
|
||||
if 'geojson' in result.geometry:
|
||||
place.set('geojson', result.geometry['geojson'])
|
||||
|
||||
if simple:
|
||||
place.text = result.display_name or ''
|
||||
else:
|
||||
place.set('display_name', result.display_name or '')
|
||||
place.set('class', result.category[0])
|
||||
place.set('type', result.category[1])
|
||||
place.set('importance', str(result.calculated_importance()))
|
||||
|
||||
return place
|
||||
|
||||
|
||||
def format_base_xml(results: Union[ReverseResults, SearchResults],
|
||||
options: Mapping[str, Any],
|
||||
simple: bool, xml_root_tag: str,
|
||||
xml_extra_info: Mapping[str, str]) -> str:
|
||||
""" Format the result into an XML response. With 'simple' exactly one
|
||||
result will be output, otherwise a list.
|
||||
"""
|
||||
root = ET.Element(xml_root_tag)
|
||||
root.set('timestamp', dt.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +00:00'))
|
||||
root.set('attribution', cl.OSM_ATTRIBUTION)
|
||||
for k, v in xml_extra_info.items():
|
||||
root.set(k, v)
|
||||
|
||||
if simple and not results:
|
||||
ET.SubElement(root, 'error').text = 'Unable to geocode'
|
||||
|
||||
for result in results:
|
||||
place = _create_base_entry(result, root, simple)
|
||||
|
||||
if not simple and options.get('icon_base_url', None):
|
||||
icon = cl.ICONS.get(result.category)
|
||||
if icon:
|
||||
place.set('icon', icon)
|
||||
|
||||
if options.get('addressdetails', False) and result.address_rows:
|
||||
_write_xml_address(ET.SubElement(root, 'addressparts') if simple else place,
|
||||
result.address_rows, result.country_code)
|
||||
|
||||
if options.get('extratags', False):
|
||||
eroot = ET.SubElement(root if simple else place, 'extratags')
|
||||
if result.extratags:
|
||||
for k, v in result.extratags.items():
|
||||
ET.SubElement(eroot, 'tag', attrib={'key': k, 'value': v})
|
||||
|
||||
if options.get('namedetails', False):
|
||||
eroot = ET.SubElement(root if simple else place, 'namedetails')
|
||||
if result.names:
|
||||
for k,v in result.names.items():
|
||||
ET.SubElement(eroot, 'name', attrib={'desc': k}).text = v
|
||||
|
||||
return '<?xml version="1.0" encoding="UTF-8" ?>\n' + ET.tostring(root, encoding='unicode')
|
||||
201
src/nominatim_api/v1/helpers.py
Normal file
201
src/nominatim_api/v1/helpers.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper function for parsing parameters and and outputting data
|
||||
specifically for the v1 version of the API.
|
||||
"""
|
||||
from typing import Tuple, Optional, Any, Dict, Iterable
|
||||
from itertools import chain
|
||||
import re
|
||||
|
||||
from ..results import SearchResult, SearchResults, SourceTable
|
||||
from ..types import SearchDetails, GeometryFormat
|
||||
|
||||
REVERSE_MAX_RANKS = [2, 2, 2, # 0-2 Continent/Sea
|
||||
4, 4, # 3-4 Country
|
||||
8, # 5 State
|
||||
10, 10, # 6-7 Region
|
||||
12, 12, # 8-9 County
|
||||
16, 17, # 10-11 City
|
||||
18, # 12 Town
|
||||
19, # 13 Village/Suburb
|
||||
22, # 14 Hamlet/Neighbourhood
|
||||
25, # 15 Localities
|
||||
26, # 16 Major Streets
|
||||
27, # 17 Minor Streets
|
||||
30 # 18 Building
|
||||
]
|
||||
|
||||
|
||||
def zoom_to_rank(zoom: int) -> int:
|
||||
""" Convert a zoom parameter into a rank according to the v1 API spec.
|
||||
"""
|
||||
return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
|
||||
|
||||
|
||||
FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
|
||||
'country': (4, 4),
|
||||
'state': (8, 8),
|
||||
'city': (14, 16),
|
||||
'settlement': (8, 20)
|
||||
}
|
||||
|
||||
|
||||
def feature_type_to_rank(feature_type: Optional[str]) -> Tuple[int, int]:
|
||||
""" Convert a feature type parameter to a tuple of
|
||||
feature type name, minimum rank and maximum rank.
|
||||
"""
|
||||
return FEATURE_TYPE_TO_RANK.get(feature_type, (0, 30))
|
||||
|
||||
|
||||
#pylint: disable=too-many-arguments,too-many-branches
|
||||
def extend_query_parts(queryparts: Dict[str, Any], details: Dict[str, Any],
|
||||
feature_type: Optional[str],
|
||||
namedetails: bool, extratags: bool,
|
||||
excluded: Iterable[str]) -> None:
|
||||
""" Add parameters from details dictionary to the query parts
|
||||
dictionary which is suitable as URL parameter dictionary.
|
||||
"""
|
||||
parsed = SearchDetails.from_kwargs(details)
|
||||
if parsed.geometry_output != GeometryFormat.NONE:
|
||||
if GeometryFormat.GEOJSON in parsed.geometry_output:
|
||||
queryparts['polygon_geojson'] = '1'
|
||||
if GeometryFormat.KML in parsed.geometry_output:
|
||||
queryparts['polygon_kml'] = '1'
|
||||
if GeometryFormat.SVG in parsed.geometry_output:
|
||||
queryparts['polygon_svg'] = '1'
|
||||
if GeometryFormat.TEXT in parsed.geometry_output:
|
||||
queryparts['polygon_text'] = '1'
|
||||
if parsed.address_details:
|
||||
queryparts['addressdetails'] = '1'
|
||||
if namedetails:
|
||||
queryparts['namedetails'] = '1'
|
||||
if extratags:
|
||||
queryparts['extratags'] = '1'
|
||||
if parsed.geometry_simplification > 0.0:
|
||||
queryparts['polygon_threshold'] = f"{parsed.geometry_simplification:.6g}"
|
||||
if parsed.max_results != 10:
|
||||
queryparts['limit'] = str(parsed.max_results)
|
||||
if parsed.countries:
|
||||
queryparts['countrycodes'] = ','.join(parsed.countries)
|
||||
queryparts['exclude_place_ids'] = \
|
||||
','.join(chain(excluded, map(str, (e for e in parsed.excluded if e > 0))))
|
||||
if parsed.viewbox:
|
||||
queryparts['viewbox'] = ','.join(f"{c:.7g}" for c in parsed.viewbox.coords)
|
||||
if parsed.bounded_viewbox:
|
||||
queryparts['bounded'] = '1'
|
||||
if not details['dedupe']:
|
||||
queryparts['dedupe'] = '0'
|
||||
if feature_type in FEATURE_TYPE_TO_RANK:
|
||||
queryparts['featureType'] = feature_type
|
||||
|
||||
|
||||
def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
|
||||
""" Remove results that look like duplicates.
|
||||
|
||||
Two results are considered the same if they have the same OSM ID
|
||||
or if they have the same category, display name and rank.
|
||||
"""
|
||||
osm_ids_done = set()
|
||||
classification_done = set()
|
||||
deduped = SearchResults()
|
||||
for result in results:
|
||||
if result.source_table == SourceTable.POSTCODE:
|
||||
assert result.names and 'ref' in result.names
|
||||
if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
|
||||
continue
|
||||
if result.source_table == SourceTable.PLACEX:
|
||||
classification = (result.osm_object[0] if result.osm_object else None,
|
||||
result.category,
|
||||
result.display_name,
|
||||
result.rank_address)
|
||||
if result.osm_object not in osm_ids_done \
|
||||
and classification not in classification_done:
|
||||
deduped.append(result)
|
||||
osm_ids_done.add(result.osm_object)
|
||||
classification_done.add(classification)
|
||||
else:
|
||||
deduped.append(result)
|
||||
if len(deduped) >= max_results:
|
||||
break
|
||||
|
||||
return deduped
|
||||
|
||||
|
||||
def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool:
|
||||
return result.source_table == SourceTable.PLACEX \
|
||||
and result.osm_object is not None \
|
||||
and result.osm_object[0] == 'R' \
|
||||
and result.category == ('boundary', 'postal_code') \
|
||||
and result.names is not None \
|
||||
and result.names.get('ref') == postcode
|
||||
|
||||
|
||||
def _deg(axis:str) -> str:
|
||||
return f"(?P<{axis}_deg>\\d+\\.\\d+)°?"
|
||||
|
||||
def _deg_min(axis: str) -> str:
|
||||
return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)[′']*"
|
||||
|
||||
def _deg_min_sec(axis: str) -> str:
|
||||
return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)[\"″]*"
|
||||
|
||||
COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?') for r in (
|
||||
r"(?P<ns>[NS])\s*" + _deg('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg('lon'),
|
||||
_deg('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg('lon') + r"\s*(?P<ew>[EW])",
|
||||
r"(?P<ns>[NS])\s*" + _deg_min('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min('lon'),
|
||||
_deg_min('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min('lon') + r"\s*(?P<ew>[EW])",
|
||||
r"(?P<ns>[NS])\s*" + _deg_min_sec('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min_sec('lon'),
|
||||
_deg_min_sec('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min_sec('lon') + r"\s*(?P<ew>[EW])",
|
||||
r"\[?(?P<lat_deg>[+-]?\d+\.\d+)[\s,]+(?P<lon_deg>[+-]?\d+\.\d+)\]?"
|
||||
)]
|
||||
|
||||
def extract_coords_from_query(query: str) -> Tuple[str, Optional[float], Optional[float]]:
|
||||
""" Look for something that is formatted like a coordinate at the
|
||||
beginning or end of the query. If found, extract the coordinate and
|
||||
return the remaining query (or the empty string if the query
|
||||
consisted of nothing but a coordinate).
|
||||
|
||||
Only the first match will be returned.
|
||||
"""
|
||||
for regex in COORD_REGEX:
|
||||
match = regex.fullmatch(query)
|
||||
if match is None:
|
||||
continue
|
||||
groups = match.groupdict()
|
||||
if not groups['pre'] or not groups['post']:
|
||||
x = float(groups['lon_deg']) \
|
||||
+ float(groups.get('lon_min', 0.0)) / 60.0 \
|
||||
+ float(groups.get('lon_sec', 0.0)) / 3600.0
|
||||
if groups.get('ew') == 'W':
|
||||
x = -x
|
||||
y = float(groups['lat_deg']) \
|
||||
+ float(groups.get('lat_min', 0.0)) / 60.0 \
|
||||
+ float(groups.get('lat_sec', 0.0)) / 3600.0
|
||||
if groups.get('ns') == 'S':
|
||||
y = -y
|
||||
return groups['pre'] or groups['post'] or '', x, y
|
||||
|
||||
return query, None, None
|
||||
|
||||
|
||||
CATEGORY_REGEX = re.compile(r'(?P<pre>.*?)\[(?P<cls>[a-zA-Z_]+)=(?P<typ>[a-zA-Z_]+)\](?P<post>.*)')
|
||||
|
||||
def extract_category_from_query(query: str) -> Tuple[str, Optional[str], Optional[str]]:
|
||||
""" Extract a hidden category specification of the form '[key=value]' from
|
||||
the query. If found, extract key and value and
|
||||
return the remaining query (or the empty string if the query
|
||||
consisted of nothing but a category).
|
||||
|
||||
Only the first match will be returned.
|
||||
"""
|
||||
match = CATEGORY_REGEX.search(query)
|
||||
if match is not None:
|
||||
return (match.group('pre').strip() + ' ' + match.group('post').strip()).strip(), \
|
||||
match.group('cls'), match.group('typ')
|
||||
|
||||
return query, None, None
|
||||
577
src/nominatim_api/v1/server_glue.py
Normal file
577
src/nominatim_api/v1/server_glue.py
Normal file
@@ -0,0 +1,577 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Generic part of the server implementation of the v1 API.
|
||||
Combine with the scaffolding provided for the various Python ASGI frameworks.
|
||||
"""
|
||||
from typing import Optional, Any, Type, Callable, NoReturn, Dict, cast
|
||||
from functools import reduce
|
||||
import abc
|
||||
import dataclasses
|
||||
import math
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from .. import logging as loglib
|
||||
from ..core import NominatimAPIAsync
|
||||
from .format import dispatch as formatting
|
||||
from .format import RawDataList
|
||||
from ..types import DataLayer, GeometryFormat, PlaceRef, PlaceID, OsmID, Point
|
||||
from ..status import StatusResult
|
||||
from ..results import DetailedResult, ReverseResults, SearchResult, SearchResults
|
||||
from ..localization import Locales
|
||||
from . import helpers
|
||||
|
||||
CONTENT_TEXT = 'text/plain; charset=utf-8'
|
||||
CONTENT_XML = 'text/xml; charset=utf-8'
|
||||
CONTENT_HTML = 'text/html; charset=utf-8'
|
||||
CONTENT_JSON = 'application/json; charset=utf-8'
|
||||
|
||||
CONTENT_TYPE = {'text': CONTENT_TEXT, 'xml': CONTENT_XML, 'debug': CONTENT_HTML}
|
||||
|
||||
class ASGIAdaptor(abc.ABC):
|
||||
""" Adapter class for the different ASGI frameworks.
|
||||
Wraps functionality over concrete requests and responses.
|
||||
"""
|
||||
content_type: str = CONTENT_TEXT
|
||||
|
||||
@abc.abstractmethod
|
||||
def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
""" Return an input parameter as a string. If the parameter was
|
||||
not provided, return the 'default' value.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
""" Return a HTTP header parameter as a string. If the parameter was
|
||||
not provided, return the 'default' value.
|
||||
"""
|
||||
|
||||
|
||||
@abc.abstractmethod
|
||||
def error(self, msg: str, status: int = 400) -> Exception:
|
||||
""" Construct an appropriate exception from the given error message.
|
||||
The exception must result in a HTTP error with the given status.
|
||||
"""
|
||||
|
||||
|
||||
@abc.abstractmethod
|
||||
def create_response(self, status: int, output: str, num_results: int) -> Any:
|
||||
""" Create a response from the given parameters. The result will
|
||||
be returned by the endpoint functions. The adaptor may also
|
||||
return None when the response is created internally with some
|
||||
different means.
|
||||
|
||||
The response must return the HTTP given status code 'status', set
|
||||
the HTTP content-type headers to the string provided and the
|
||||
body of the response to 'output'.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def base_uri(self) -> str:
|
||||
""" Return the URI of the original request.
|
||||
"""
|
||||
|
||||
|
||||
@abc.abstractmethod
|
||||
def config(self) -> Configuration:
|
||||
""" Return the current configuration object.
|
||||
"""
|
||||
|
||||
|
||||
def build_response(self, output: str, status: int = 200, num_results: int = 0) -> Any:
|
||||
""" Create a response from the given output. Wraps a JSONP function
|
||||
around the response, if necessary.
|
||||
"""
|
||||
if self.content_type == CONTENT_JSON and status == 200:
|
||||
jsonp = self.get('json_callback')
|
||||
if jsonp is not None:
|
||||
if any(not part.isidentifier() for part in jsonp.split('.')):
|
||||
self.raise_error('Invalid json_callback value')
|
||||
output = f"{jsonp}({output})"
|
||||
self.content_type = 'application/javascript; charset=utf-8'
|
||||
|
||||
return self.create_response(status, output, num_results)
|
||||
|
||||
|
||||
def raise_error(self, msg: str, status: int = 400) -> NoReturn:
|
||||
""" Raise an exception resulting in the given HTTP status and
|
||||
message. The message will be formatted according to the
|
||||
output format chosen by the request.
|
||||
"""
|
||||
if self.content_type == CONTENT_XML:
|
||||
msg = f"""<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<error>
|
||||
<code>{status}</code>
|
||||
<message>{msg}</message>
|
||||
</error>
|
||||
"""
|
||||
elif self.content_type == CONTENT_JSON:
|
||||
msg = f"""{{"error":{{"code":{status},"message":"{msg}"}}}}"""
|
||||
elif self.content_type == CONTENT_HTML:
|
||||
loglib.log().section('Execution error')
|
||||
loglib.log().var_dump('Status', status)
|
||||
loglib.log().var_dump('Message', msg)
|
||||
msg = loglib.get_and_disable()
|
||||
|
||||
raise self.error(msg, status)
|
||||
|
||||
|
||||
def get_int(self, name: str, default: Optional[int] = None) -> int:
|
||||
""" Return an input parameter as an int. Raises an exception if
|
||||
the parameter is given but not in an integer format.
|
||||
|
||||
If 'default' is given, then it will be returned when the parameter
|
||||
is missing completely. When 'default' is None, an error will be
|
||||
raised on a missing parameter.
|
||||
"""
|
||||
value = self.get(name)
|
||||
|
||||
if value is None:
|
||||
if default is not None:
|
||||
return default
|
||||
|
||||
self.raise_error(f"Parameter '{name}' missing.")
|
||||
|
||||
try:
|
||||
intval = int(value)
|
||||
except ValueError:
|
||||
self.raise_error(f"Parameter '{name}' must be a number.")
|
||||
|
||||
return intval
|
||||
|
||||
|
||||
def get_float(self, name: str, default: Optional[float] = None) -> float:
|
||||
""" Return an input parameter as a flaoting-point number. Raises an
|
||||
exception if the parameter is given but not in an float format.
|
||||
|
||||
If 'default' is given, then it will be returned when the parameter
|
||||
is missing completely. When 'default' is None, an error will be
|
||||
raised on a missing parameter.
|
||||
"""
|
||||
value = self.get(name)
|
||||
|
||||
if value is None:
|
||||
if default is not None:
|
||||
return default
|
||||
|
||||
self.raise_error(f"Parameter '{name}' missing.")
|
||||
|
||||
try:
|
||||
fval = float(value)
|
||||
except ValueError:
|
||||
self.raise_error(f"Parameter '{name}' must be a number.")
|
||||
|
||||
if math.isnan(fval) or math.isinf(fval):
|
||||
self.raise_error(f"Parameter '{name}' must be a number.")
|
||||
|
||||
return fval
|
||||
|
||||
|
||||
def get_bool(self, name: str, default: Optional[bool] = None) -> bool:
|
||||
""" Return an input parameter as bool. Only '0' is accepted as
|
||||
an input for 'false' all other inputs will be interpreted as 'true'.
|
||||
|
||||
If 'default' is given, then it will be returned when the parameter
|
||||
is missing completely. When 'default' is None, an error will be
|
||||
raised on a missing parameter.
|
||||
"""
|
||||
value = self.get(name)
|
||||
|
||||
if value is None:
|
||||
if default is not None:
|
||||
return default
|
||||
|
||||
self.raise_error(f"Parameter '{name}' missing.")
|
||||
|
||||
return value != '0'
|
||||
|
||||
|
||||
def get_accepted_languages(self) -> str:
|
||||
""" Return the accepted languages.
|
||||
"""
|
||||
return self.get('accept-language')\
|
||||
or self.get_header('accept-language')\
|
||||
or self.config().DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def setup_debugging(self) -> bool:
|
||||
""" Set up collection of debug information if requested.
|
||||
|
||||
Return True when debugging was requested.
|
||||
"""
|
||||
if self.get_bool('debug', False):
|
||||
loglib.set_log_output('html')
|
||||
self.content_type = CONTENT_HTML
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_layers(self) -> Optional[DataLayer]:
|
||||
""" Return a parsed version of the layer parameter.
|
||||
"""
|
||||
param = self.get('layer', None)
|
||||
if param is None:
|
||||
return None
|
||||
|
||||
return cast(DataLayer,
|
||||
reduce(DataLayer.__or__,
|
||||
(getattr(DataLayer, s.upper()) for s in param.split(','))))
|
||||
|
||||
|
||||
def parse_format(self, result_type: Type[Any], default: str) -> str:
|
||||
""" Get and check the 'format' parameter and prepare the formatter.
|
||||
`result_type` is the type of result to be returned by the function
|
||||
and `default` the format value to assume when no parameter is present.
|
||||
"""
|
||||
fmt = self.get('format', default=default)
|
||||
assert fmt is not None
|
||||
|
||||
if not formatting.supports_format(result_type, fmt):
|
||||
self.raise_error("Parameter 'format' must be one of: " +
|
||||
', '.join(formatting.list_formats(result_type)))
|
||||
|
||||
self.content_type = CONTENT_TYPE.get(fmt, CONTENT_JSON)
|
||||
return fmt
|
||||
|
||||
|
||||
def parse_geometry_details(self, fmt: str) -> Dict[str, Any]:
|
||||
""" Create details structure from the supplied geometry parameters.
|
||||
"""
|
||||
numgeoms = 0
|
||||
output = GeometryFormat.NONE
|
||||
if self.get_bool('polygon_geojson', False):
|
||||
output |= GeometryFormat.GEOJSON
|
||||
numgeoms += 1
|
||||
if fmt not in ('geojson', 'geocodejson'):
|
||||
if self.get_bool('polygon_text', False):
|
||||
output |= GeometryFormat.TEXT
|
||||
numgeoms += 1
|
||||
if self.get_bool('polygon_kml', False):
|
||||
output |= GeometryFormat.KML
|
||||
numgeoms += 1
|
||||
if self.get_bool('polygon_svg', False):
|
||||
output |= GeometryFormat.SVG
|
||||
numgeoms += 1
|
||||
|
||||
if numgeoms > self.config().get_int('POLYGON_OUTPUT_MAX_TYPES'):
|
||||
self.raise_error('Too many polygon output options selected.')
|
||||
|
||||
return {'address_details': True,
|
||||
'geometry_simplification': self.get_float('polygon_threshold', 0.0),
|
||||
'geometry_output': output
|
||||
}
|
||||
|
||||
|
||||
async def status_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /status endpoint. See API docs for details.
|
||||
"""
|
||||
result = await api.status()
|
||||
|
||||
fmt = params.parse_format(StatusResult, 'text')
|
||||
|
||||
if fmt == 'text' and result.status:
|
||||
status_code = 500
|
||||
else:
|
||||
status_code = 200
|
||||
|
||||
return params.build_response(formatting.format_result(result, fmt, {}),
|
||||
status=status_code)
|
||||
|
||||
|
||||
async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /details endpoint. See API docs for details.
|
||||
"""
|
||||
fmt = params.parse_format(DetailedResult, 'json')
|
||||
place_id = params.get_int('place_id', 0)
|
||||
place: PlaceRef
|
||||
if place_id:
|
||||
place = PlaceID(place_id)
|
||||
else:
|
||||
osmtype = params.get('osmtype')
|
||||
if osmtype is None:
|
||||
params.raise_error("Missing ID parameter 'place_id' or 'osmtype'.")
|
||||
place = OsmID(osmtype, params.get_int('osmid'), params.get('class'))
|
||||
|
||||
debug = params.setup_debugging()
|
||||
|
||||
locales = Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
result = await api.details(place,
|
||||
address_details=params.get_bool('addressdetails', False),
|
||||
linked_places=params.get_bool('linkedplaces', True),
|
||||
parented_places=params.get_bool('hierarchy', False),
|
||||
keywords=params.get_bool('keywords', False),
|
||||
geometry_output = GeometryFormat.GEOJSON
|
||||
if params.get_bool('polygon_geojson', False)
|
||||
else GeometryFormat.NONE,
|
||||
locales=locales
|
||||
)
|
||||
|
||||
if debug:
|
||||
return params.build_response(loglib.get_and_disable())
|
||||
|
||||
if result is None:
|
||||
params.raise_error('No place with that OSM ID found.', status=404)
|
||||
|
||||
output = formatting.format_result(result, fmt,
|
||||
{'locales': locales,
|
||||
'group_hierarchy': params.get_bool('group_hierarchy', False),
|
||||
'icon_base_url': params.config().MAPICON_URL})
|
||||
|
||||
return params.build_response(output, num_results=1)
|
||||
|
||||
|
||||
async def reverse_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /reverse endpoint. See API docs for details.
|
||||
"""
|
||||
fmt = params.parse_format(ReverseResults, 'xml')
|
||||
debug = params.setup_debugging()
|
||||
coord = Point(params.get_float('lon'), params.get_float('lat'))
|
||||
|
||||
details = params.parse_geometry_details(fmt)
|
||||
details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
|
||||
details['layers'] = params.get_layers()
|
||||
details['locales'] = Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
result = await api.reverse(coord, **details)
|
||||
|
||||
if debug:
|
||||
return params.build_response(loglib.get_and_disable(), num_results=1 if result else 0)
|
||||
|
||||
if fmt == 'xml':
|
||||
queryparts = {'lat': str(coord.lat), 'lon': str(coord.lon), 'format': 'xml'}
|
||||
zoom = params.get('zoom', None)
|
||||
if zoom:
|
||||
queryparts['zoom'] = zoom
|
||||
query = urlencode(queryparts)
|
||||
else:
|
||||
query = ''
|
||||
|
||||
fmt_options = {'query': query,
|
||||
'extratags': params.get_bool('extratags', False),
|
||||
'namedetails': params.get_bool('namedetails', False),
|
||||
'addressdetails': params.get_bool('addressdetails', True)}
|
||||
|
||||
output = formatting.format_result(ReverseResults([result] if result else []),
|
||||
fmt, fmt_options)
|
||||
|
||||
return params.build_response(output, num_results=1 if result else 0)
|
||||
|
||||
|
||||
async def lookup_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /lookup endpoint. See API docs for details.
|
||||
"""
|
||||
fmt = params.parse_format(SearchResults, 'xml')
|
||||
debug = params.setup_debugging()
|
||||
details = params.parse_geometry_details(fmt)
|
||||
details['locales'] = Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
places = []
|
||||
for oid in (params.get('osm_ids') or '').split(','):
|
||||
oid = oid.strip()
|
||||
if len(oid) > 1 and oid[0] in 'RNWrnw' and oid[1:].isdigit():
|
||||
places.append(OsmID(oid[0].upper(), int(oid[1:])))
|
||||
|
||||
if len(places) > params.config().get_int('LOOKUP_MAX_COUNT'):
|
||||
params.raise_error('Too many object IDs.')
|
||||
|
||||
if places:
|
||||
results = await api.lookup(places, **details)
|
||||
else:
|
||||
results = SearchResults()
|
||||
|
||||
if debug:
|
||||
return params.build_response(loglib.get_and_disable(), num_results=len(results))
|
||||
|
||||
fmt_options = {'extratags': params.get_bool('extratags', False),
|
||||
'namedetails': params.get_bool('namedetails', False),
|
||||
'addressdetails': params.get_bool('addressdetails', True)}
|
||||
|
||||
output = formatting.format_result(results, fmt, fmt_options)
|
||||
|
||||
return params.build_response(output, num_results=len(results))
|
||||
|
||||
|
||||
async def _unstructured_search(query: str, api: NominatimAPIAsync,
|
||||
details: Dict[str, Any]) -> SearchResults:
|
||||
if not query:
|
||||
return SearchResults()
|
||||
|
||||
# Extract special format for coordinates from query.
|
||||
query, x, y = helpers.extract_coords_from_query(query)
|
||||
if x is not None:
|
||||
assert y is not None
|
||||
details['near'] = Point(x, y)
|
||||
details['near_radius'] = 0.1
|
||||
|
||||
# If no query is left, revert to reverse search.
|
||||
if x is not None and not query:
|
||||
result = await api.reverse(details['near'], **details)
|
||||
if not result:
|
||||
return SearchResults()
|
||||
|
||||
return SearchResults(
|
||||
[SearchResult(**{f.name: getattr(result, f.name)
|
||||
for f in dataclasses.fields(SearchResult)
|
||||
if hasattr(result, f.name)})])
|
||||
|
||||
query, cls, typ = helpers.extract_category_from_query(query)
|
||||
if cls is not None:
|
||||
assert typ is not None
|
||||
return await api.search_category([(cls, typ)], near_query=query, **details)
|
||||
|
||||
return await api.search(query, **details)
|
||||
|
||||
|
||||
async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /search endpoint. See API docs for details.
|
||||
"""
|
||||
fmt = params.parse_format(SearchResults, 'jsonv2')
|
||||
debug = params.setup_debugging()
|
||||
details = params.parse_geometry_details(fmt)
|
||||
|
||||
details['countries'] = params.get('countrycodes', None)
|
||||
details['excluded'] = params.get('exclude_place_ids', None)
|
||||
details['viewbox'] = params.get('viewbox', None) or params.get('viewboxlbrt', None)
|
||||
details['bounded_viewbox'] = params.get_bool('bounded', False)
|
||||
details['dedupe'] = params.get_bool('dedupe', True)
|
||||
|
||||
max_results = max(1, min(50, params.get_int('limit', 10)))
|
||||
details['max_results'] = max_results + min(10, max_results) \
|
||||
if details['dedupe'] else max_results
|
||||
|
||||
details['min_rank'], details['max_rank'] = \
|
||||
helpers.feature_type_to_rank(params.get('featureType', ''))
|
||||
if params.get('featureType', None) is not None:
|
||||
details['layers'] = DataLayer.ADDRESS
|
||||
else:
|
||||
details['layers'] = params.get_layers()
|
||||
|
||||
details['locales'] = Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
# unstructured query parameters
|
||||
query = params.get('q', None)
|
||||
# structured query parameters
|
||||
queryparts = {}
|
||||
for key in ('amenity', 'street', 'city', 'county', 'state', 'postalcode', 'country'):
|
||||
details[key] = params.get(key, None)
|
||||
if details[key]:
|
||||
queryparts[key] = details[key]
|
||||
|
||||
try:
|
||||
if query is not None:
|
||||
if queryparts:
|
||||
params.raise_error("Structured query parameters"
|
||||
"(amenity, street, city, county, state, postalcode, country)"
|
||||
" cannot be used together with 'q' parameter.")
|
||||
queryparts['q'] = query
|
||||
results = await _unstructured_search(query, api, details)
|
||||
else:
|
||||
query = ', '.join(queryparts.values())
|
||||
|
||||
results = await api.search_address(**details)
|
||||
except UsageError as err:
|
||||
params.raise_error(str(err))
|
||||
|
||||
if details['dedupe'] and len(results) > 1:
|
||||
results = helpers.deduplicate_results(results, max_results)
|
||||
|
||||
if debug:
|
||||
return params.build_response(loglib.get_and_disable(), num_results=len(results))
|
||||
|
||||
if fmt == 'xml':
|
||||
helpers.extend_query_parts(queryparts, details,
|
||||
params.get('featureType', ''),
|
||||
params.get_bool('namedetails', False),
|
||||
params.get_bool('extratags', False),
|
||||
(str(r.place_id) for r in results if r.place_id))
|
||||
queryparts['format'] = fmt
|
||||
|
||||
moreurl = params.base_uri() + '/search?' + urlencode(queryparts)
|
||||
else:
|
||||
moreurl = ''
|
||||
|
||||
fmt_options = {'query': query, 'more_url': moreurl,
|
||||
'exclude_place_ids': queryparts.get('exclude_place_ids'),
|
||||
'viewbox': queryparts.get('viewbox'),
|
||||
'extratags': params.get_bool('extratags', False),
|
||||
'namedetails': params.get_bool('namedetails', False),
|
||||
'addressdetails': params.get_bool('addressdetails', False)}
|
||||
|
||||
output = formatting.format_result(results, fmt, fmt_options)
|
||||
|
||||
return params.build_response(output, num_results=len(results))
|
||||
|
||||
|
||||
async def deletable_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /deletable endpoint.
|
||||
This is a special endpoint that shows polygons that have been
|
||||
deleted or are broken in the OSM data but are kept in the
|
||||
Nominatim database to minimize disruption.
|
||||
"""
|
||||
fmt = params.parse_format(RawDataList, 'json')
|
||||
|
||||
async with api.begin() as conn:
|
||||
sql = sa.text(""" SELECT p.place_id, country_code,
|
||||
name->'name' as name, i.*
|
||||
FROM placex p, import_polygon_delete i
|
||||
WHERE p.osm_id = i.osm_id AND p.osm_type = i.osm_type
|
||||
AND p.class = i.class AND p.type = i.type
|
||||
""")
|
||||
results = RawDataList(r._asdict() for r in await conn.execute(sql))
|
||||
|
||||
return params.build_response(formatting.format_result(results, fmt, {}))
|
||||
|
||||
|
||||
async def polygons_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /polygons endpoint.
|
||||
This is a special endpoint that shows polygons that have changed
|
||||
their size but are kept in the Nominatim database with their
|
||||
old area to minimize disruption.
|
||||
"""
|
||||
fmt = params.parse_format(RawDataList, 'json')
|
||||
sql_params: Dict[str, Any] = {
|
||||
'days': params.get_int('days', -1),
|
||||
'cls': params.get('class')
|
||||
}
|
||||
reduced = params.get_bool('reduced', False)
|
||||
|
||||
async with api.begin() as conn:
|
||||
sql = sa.select(sa.text("""osm_type, osm_id, class, type,
|
||||
name->'name' as name,
|
||||
country_code, errormessage, updated"""))\
|
||||
.select_from(sa.text('import_polygon_error'))
|
||||
if sql_params['days'] > 0:
|
||||
sql = sql.where(sa.text("updated > 'now'::timestamp - make_interval(days => :days)"))
|
||||
if reduced:
|
||||
sql = sql.where(sa.text("errormessage like 'Area reduced%'"))
|
||||
if sql_params['cls'] is not None:
|
||||
sql = sql.where(sa.text("class = :cls"))
|
||||
|
||||
sql = sql.order_by(sa.literal_column('updated').desc()).limit(1000)
|
||||
|
||||
results = RawDataList(r._asdict() for r in await conn.execute(sql, sql_params))
|
||||
|
||||
return params.build_response(formatting.format_result(results, fmt, {}))
|
||||
|
||||
|
||||
EndpointFunc = Callable[[NominatimAPIAsync, ASGIAdaptor], Any]
|
||||
|
||||
ROUTES = [
|
||||
('status', status_endpoint),
|
||||
('details', details_endpoint),
|
||||
('reverse', reverse_endpoint),
|
||||
('lookup', lookup_endpoint),
|
||||
('search', search_endpoint),
|
||||
('deletable', deletable_endpoint),
|
||||
('polygons', polygons_endpoint),
|
||||
]
|
||||
11
src/nominatim_api/version.py
Normal file
11
src/nominatim_api/version.py
Normal file
@@ -0,0 +1,11 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Version information for the Nominatim API.
|
||||
"""
|
||||
|
||||
NOMINATIM_API_VERSION = '4.4.99'
|
||||
Reference in New Issue
Block a user