split code into submodules

2024-05-16 11:55:17 +02:00
parent 0fb4fe8e4d
commit 6e89310a92
137 changed files with 757 additions and 716 deletions
--- a/src/nominatim_api/init.py
+++ b/src/nominatim_api/init.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+The public interface of the Nominatim library.
+
+Classes and functions defined in this file are considered stable. Always
+import from this file, not from the source files directly.
+"""
+
+# See also https://github.com/PyCQA/pylint/issues/6006
+# pylint: disable=useless-import-alias
+
+from .core import (NominatimAPI as NominatimAPI,
+                   NominatimAPIAsync as NominatimAPIAsync)
+from .connection import (SearchConnection as SearchConnection)
+from .status import (StatusResult as StatusResult)
+from .types import (PlaceID as PlaceID,
+                    OsmID as OsmID,
+                    PlaceRef as PlaceRef,
+                    Point as Point,
+                    Bbox as Bbox,
+                    GeometryFormat as GeometryFormat,
+                    DataLayer as DataLayer)
+from .results import (SourceTable as SourceTable,
+                      AddressLine as AddressLine,
+                      AddressLines as AddressLines,
+                      WordInfo as WordInfo,
+                      WordInfos as WordInfos,
+                      DetailedResult as DetailedResult,
+                      ReverseResult as ReverseResult,
+                      ReverseResults as ReverseResults,
+                      SearchResult as SearchResult,
+                      SearchResults as SearchResults)
+from .localization import (Locales as Locales)
--- a/src/nominatim_api/connection.py
+++ b/src/nominatim_api/connection.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Extended SQLAlchemy connection class that also includes access to the schema.
+"""
+from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
+                   Awaitable, Callable, TypeVar
+import asyncio
+
+import sqlalchemy as sa
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from nominatim_core.typing import SaFromClause
+from nominatim_core.db.sqlalchemy_schema import SearchTables
+from nominatim_core.db.sqlalchemy_types import Geometry
+from .logging import log
+
+T = TypeVar('T')
+
+class SearchConnection:
+    """ An extended SQLAlchemy connection class, that also contains
+        the table definitions. The underlying asynchronous SQLAlchemy
+        connection can be accessed with the 'connection' property.
+        The 't' property is the collection of Nominatim tables.
+    """
+
+    def __init__(self, conn: AsyncConnection,
+                 tables: SearchTables,
+                 properties: Dict[str, Any]) -> None:
+        self.connection = conn
+        self.t = tables # pylint: disable=invalid-name
+        self._property_cache = properties
+        self._classtables: Optional[Set[str]] = None
+        self.query_timeout: Optional[int] = None
+
+
+    def set_query_timeout(self, timeout: Optional[int]) -> None:
+        """ Set the timeout after which a query over this connection
+            is cancelled.
+        """
+        self.query_timeout = timeout
+
+
+    async def scalar(self, sql: sa.sql.base.Executable,
+                     params: Union[Mapping[str, Any], None] = None
+                    ) -> Any:
+        """ Execute a 'scalar()' query on the connection.
+        """
+        log().sql(self.connection, sql, params)
+        return await asyncio.wait_for(self.connection.scalar(sql, params), self.query_timeout)
+
+
+    async def execute(self, sql: 'sa.Executable',
+                      params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None
+                     ) -> 'sa.Result[Any]':
+        """ Execute a 'execute()' query on the connection.
+        """
+        log().sql(self.connection, sql, params)
+        return await asyncio.wait_for(self.connection.execute(sql, params), self.query_timeout)
+
+
+    async def get_property(self, name: str, cached: bool = True) -> str:
+        """ Get a property from Nominatim's property table.
+
+            Property values are normally cached so that they are only
+            retrieved from the database when they are queried for the
+            first time with this function. Set 'cached' to False to force
+            reading the property from the database.
+
+            Raises a ValueError if the property does not exist.
+        """
+        lookup_name = f'DBPROP:{name}'
+
+        if cached and lookup_name in self._property_cache:
+            return cast(str, self._property_cache[lookup_name])
+
+        sql = sa.select(self.t.properties.c.value)\
+            .where(self.t.properties.c.property == name)
+        value = await self.connection.scalar(sql)
+
+        if value is None:
+            raise ValueError(f"Property '{name}' not found in database.")
+
+        self._property_cache[lookup_name] = cast(str, value)
+
+        return cast(str, value)
+
+
+    async def get_db_property(self, name: str) -> Any:
+        """ Get a setting from the database. At the moment, only
+            'server_version', the version of the database software, can
+            be retrieved with this function.
+
+            Raises a ValueError if the property does not exist.
+        """
+        if name != 'server_version':
+            raise ValueError(f"DB setting '{name}' not found in database.")
+
+        return self._property_cache['DB:server_version']
+
+
+    async def get_cached_value(self, group: str, name: str,
+                               factory: Callable[[], Awaitable[T]]) -> T:
+        """ Access the cache for this Nominatim instance.
+            Each cache value needs to belong to a group and have a name.
+            This function is for internal API use only.
+
+            `factory` is an async callback function that produces
+            the value if it is not already cached.
+
+            Returns the cached value or the result of factory (also caching
+            the result).
+        """
+        full_name = f'{group}:{name}'
+
+        if full_name in self._property_cache:
+            return cast(T, self._property_cache[full_name])
+
+        value = await factory()
+        self._property_cache[full_name] = value
+
+        return value
+
+
+    async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
+        """ Lookup up if there is a classtype table for the given category
+            and return a SQLAlchemy table for it, if it exists.
+        """
+        if self._classtables is None:
+            res = await self.execute(sa.text("""SELECT tablename FROM pg_tables
+                                                WHERE tablename LIKE 'place_classtype_%'
+                                             """))
+            self._classtables = {r[0] for r in res}
+
+        tablename = f"place_classtype_{cls}_{typ}"
+
+        if tablename not in self._classtables:
+            return None
+
+        if tablename in self.t.meta.tables:
+            return self.t.meta.tables[tablename]
+
+        return sa.Table(tablename, self.t.meta,
+                        sa.Column('place_id', sa.BigInteger),
+                        sa.Column('centroid', Geometry))
--- a/src/nominatim_api/core.py
+++ b/src/nominatim_api/core.py
@@ -0,0 +1,974 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of classes for API access via libraries.
+"""
+from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
+import asyncio
+import sys
+import contextlib
+from pathlib import Path
+
+import sqlalchemy as sa
+import sqlalchemy.ext.asyncio as sa_asyncio
+
+from nominatim_core.errors import UsageError
+from nominatim_core.db.sqlalchemy_schema import SearchTables
+from nominatim_core.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
+from nominatim_core.config import Configuration
+from .sql import sqlite_functions, sqlalchemy_functions #pylint: disable=unused-import
+from .connection import SearchConnection
+from .status import get_status, StatusResult
+from .lookup import get_detailed_place, get_simple_place
+from .reverse import ReverseGeocoder
+from .search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer
+from . import types as ntyp
+from .results import DetailedResult, ReverseResult, SearchResults
+
+
+class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
+    """ The main frontend to the Nominatim database implements the
+        functions for lookup, forward and reverse geocoding using
+        asynchronous functions.
+
+        This class shares most of the functions with its synchronous
+        version. There are some additional functions or parameters,
+        which are documented below.
+    """
+    def __init__(self, project_dir: Path,
+                 environ: Optional[Mapping[str, str]] = None,
+                 loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
+        """ Initiate a new frontend object with synchronous API functions.
+
+            Parameters:
+              project_dir: Path to the
+                  [project directory](../admin/Import.md#creating-the-project-directory)
+                  of the local Nominatim installation.
+              environ: Mapping of [configuration parameters](../customize/Settings.md).
+                  When set, replaces any configuration via environment variables.
+                  Settings in this mapping also have precedence over any
+                  parameters found in the `.env` file of the project directory.
+              loop: The asyncio event loop that will be used when calling
+                  functions. Only needed, when a custom event loop is used
+                  and the Python version is 3.9 or earlier.
+        """
+        self.config = Configuration(project_dir, environ)
+        self.query_timeout = self.config.get_int('QUERY_TIMEOUT') \
+                             if self.config.QUERY_TIMEOUT else None
+        self.reverse_restrict_to_country_area = self.config.get_bool('SEARCH_WITHIN_COUNTRIES')
+        self.server_version = 0
+
+        if sys.version_info >= (3, 10):
+            self._engine_lock = asyncio.Lock()
+        else:
+            self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
+        self._engine: Optional[sa_asyncio.AsyncEngine] = None
+        self._tables: Optional[SearchTables] = None
+        self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
+
+
+    async def setup_database(self) -> None:
+        """ Set up the SQL engine and connections.
+
+            This function will be implicitly called when the database is
+            accessed for the first time. You may also call it explicitly to
+            avoid that the first call is delayed by the setup.
+        """
+        async with self._engine_lock:
+            if self._engine:
+                return
+
+            extra_args: Dict[str, Any] = {'future': True,
+                                          'echo': self.config.get_bool('DEBUG_SQL')}
+
+            if self.config.get_int('API_POOL_SIZE') == 0:
+                extra_args['poolclass'] = sa.pool.NullPool
+            else:
+                extra_args['poolclass'] = sa.pool.AsyncAdaptedQueuePool
+                extra_args['max_overflow'] = 0
+                extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
+
+
+            is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
+
+            if is_sqlite:
+                params = dict((p.split('=', 1)
+                              for p in self.config.DATABASE_DSN[7:].split(';')))
+                dburl = sa.engine.URL.create('sqlite+aiosqlite',
+                                             database=params.get('dbname'))
+
+                if not ('NOMINATIM_DATABASE_RW' in self.config.environ
+                        and self.config.get_bool('DATABASE_RW')) \
+                   and not Path(params.get('dbname', '')).is_file():
+                    raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
+            else:
+                dsn = self.config.get_database_params()
+                query = {k: v for k, v in dsn.items()
+                         if k not in ('user', 'password', 'dbname', 'host', 'port')}
+
+                dburl = sa.engine.URL.create(
+                           f'postgresql+{PGCORE_LIB}',
+                           database=dsn.get('dbname'),
+                           username=dsn.get('user'),
+                           password=dsn.get('password'),
+                           host=dsn.get('host'),
+                           port=int(dsn['port']) if 'port' in dsn else None,
+                           query=query)
+
+            engine = sa_asyncio.create_async_engine(dburl, **extra_args)
+
+            if is_sqlite:
+                server_version = 0
+
+                @sa.event.listens_for(engine.sync_engine, "connect")
+                def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
+                    dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
+                    sqlite_functions.install_custom_functions(dbapi_con)
+                    cursor = dbapi_con.cursor()
+                    cursor.execute("SELECT load_extension('mod_spatialite')")
+                    cursor.execute('SELECT SetDecimalPrecision(7)')
+                    dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
+            else:
+                try:
+                    async with engine.begin() as conn:
+                        result = await conn.scalar(sa.text('SHOW server_version_num'))
+                        server_version = int(result)
+                        if server_version >= 110000:
+                            await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
+                            await conn.execute(sa.text(
+                                    "SET max_parallel_workers_per_gather TO '0'"))
+                except (PGCORE_ERROR, sa.exc.OperationalError):
+                    server_version = 0
+
+                if server_version >= 110000:
+                    @sa.event.listens_for(engine.sync_engine, "connect")
+                    def _on_connect(dbapi_con: Any, _: Any) -> None:
+                        cursor = dbapi_con.cursor()
+                        cursor.execute("SET jit_above_cost TO '-1'")
+                        cursor.execute("SET max_parallel_workers_per_gather TO '0'")
+
+            self._property_cache['DB:server_version'] = server_version
+
+            self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
+            self._engine = engine
+
+
+    async def close(self) -> None:
+        """ Close all active connections to the database. The NominatimAPIAsync
+            object remains usable after closing. If a new API functions is
+            called, new connections are created.
+        """
+        if self._engine is not None:
+            await self._engine.dispose()
+
+
+    @contextlib.asynccontextmanager
+    async def begin(self) -> AsyncIterator[SearchConnection]:
+        """ Create a new connection with automatic transaction handling.
+
+            This function may be used to get low-level access to the database.
+            Refer to the documentation of SQLAlchemy for details how to use
+            the connection object.
+        """
+        if self._engine is None:
+            await self.setup_database()
+
+        assert self._engine is not None
+        assert self._tables is not None
+
+        async with self._engine.begin() as conn:
+            yield SearchConnection(conn, self._tables, self._property_cache)
+
+
+    async def status(self) -> StatusResult:
+        """ Return the status of the database.
+        """
+        try:
+            async with self.begin() as conn:
+                conn.set_query_timeout(self.query_timeout)
+                status = await get_status(conn)
+        except (PGCORE_ERROR, sa.exc.OperationalError):
+            return StatusResult(700, 'Database connection failed')
+
+        return status
+
+
+    async def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
+        """ Get detailed information about a place in the database.
+
+            Returns None if there is no entry under the given ID.
+        """
+        details = ntyp.LookupDetails.from_kwargs(params)
+        async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
+            if details.keywords:
+                await make_query_analyzer(conn)
+            return await get_detailed_place(conn, place, details)
+
+
+    async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
+        """ Get simple information about a list of places.
+
+            Returns a list of place information for all IDs that were found.
+        """
+        details = ntyp.LookupDetails.from_kwargs(params)
+        async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
+            if details.keywords:
+                await make_query_analyzer(conn)
+            return SearchResults(filter(None,
+                                        [await get_simple_place(conn, p, details) for p in places]))
+
+
+    async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
+        """ Find a place by its coordinates. Also known as reverse geocoding.
+
+            Returns the closest result that can be found or None if
+            no place matches the given criteria.
+        """
+        # The following negation handles NaN correctly. Don't change.
+        if not abs(coord[0]) <= 180 or not abs(coord[1]) <= 90:
+            # There are no results to be expected outside valid coordinates.
+            return None
+
+        details = ntyp.ReverseDetails.from_kwargs(params)
+        async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
+            if details.keywords:
+                await make_query_analyzer(conn)
+            geocoder = ReverseGeocoder(conn, details,
+                                       self.reverse_restrict_to_country_area)
+            return await geocoder.lookup(coord)
+
+
+    async def search(self, query: str, **params: Any) -> SearchResults:
+        """ Find a place by free-text search. Also known as forward geocoding.
+        """
+        query = query.strip()
+        if not query:
+            raise UsageError('Nothing to search for.')
+
+        async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
+            geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
+                                       self.config.get_int('REQUEST_TIMEOUT') \
+                                         if self.config.REQUEST_TIMEOUT else None)
+            phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
+            return await geocoder.lookup(phrases)
+
+
+    # pylint: disable=too-many-arguments,too-many-branches
+    async def search_address(self, amenity: Optional[str] = None,
+                             street: Optional[str] = None,
+                             city: Optional[str] = None,
+                             county: Optional[str] = None,
+                             state: Optional[str] = None,
+                             country: Optional[str] = None,
+                             postalcode: Optional[str] = None,
+                             **params: Any) -> SearchResults:
+        """ Find an address using structured search.
+        """
+        async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
+            details = ntyp.SearchDetails.from_kwargs(params)
+
+            phrases: List[Phrase] = []
+
+            if amenity:
+                phrases.append(Phrase(PhraseType.AMENITY, amenity))
+            if street:
+                phrases.append(Phrase(PhraseType.STREET, street))
+            if city:
+                phrases.append(Phrase(PhraseType.CITY, city))
+            if county:
+                phrases.append(Phrase(PhraseType.COUNTY, county))
+            if state:
+                phrases.append(Phrase(PhraseType.STATE, state))
+            if postalcode:
+                phrases.append(Phrase(PhraseType.POSTCODE, postalcode))
+            if country:
+                phrases.append(Phrase(PhraseType.COUNTRY, country))
+
+            if not phrases:
+                raise UsageError('Nothing to search for.')
+
+            if amenity or street:
+                details.restrict_min_max_rank(26, 30)
+            elif city:
+                details.restrict_min_max_rank(13, 25)
+            elif county:
+                details.restrict_min_max_rank(10, 12)
+            elif state:
+                details.restrict_min_max_rank(5, 9)
+            elif postalcode:
+                details.restrict_min_max_rank(5, 11)
+            else:
+                details.restrict_min_max_rank(4, 4)
+
+            if 'layers' not in params:
+                details.layers = ntyp.DataLayer.ADDRESS
+                if amenity:
+                    details.layers |= ntyp.DataLayer.POI
+
+            geocoder = ForwardGeocoder(conn, details,
+                                       self.config.get_int('REQUEST_TIMEOUT') \
+                                         if self.config.REQUEST_TIMEOUT else None)
+            return await geocoder.lookup(phrases)
+
+
+    async def search_category(self, categories: List[Tuple[str, str]],
+                              near_query: Optional[str] = None,
+                              **params: Any) -> SearchResults:
+        """ Find an object of a certain category near another place.
+            The near place may either be given as an unstructured search
+            query in itself or as coordinates.
+        """
+        if not categories:
+            return SearchResults()
+
+        details = ntyp.SearchDetails.from_kwargs(params)
+        async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
+            if near_query:
+                phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
+            else:
+                phrases = []
+                if details.keywords:
+                    await make_query_analyzer(conn)
+
+            geocoder = ForwardGeocoder(conn, details,
+                                       self.config.get_int('REQUEST_TIMEOUT') \
+                                         if self.config.REQUEST_TIMEOUT else None)
+            return await geocoder.lookup_pois(categories, phrases)
+
+
+
+class NominatimAPI:
+    """ This class provides a thin synchronous wrapper around the asynchronous
+        Nominatim functions. It creates its own event loop and runs each
+        synchronous function call to completion using that loop.
+    """
+
+    def __init__(self, project_dir: Path,
+                 environ: Optional[Mapping[str, str]] = None) -> None:
+        """ Initiate a new frontend object with synchronous API functions.
+
+            Parameters:
+              project_dir: Path to the
+                  [project directory](../admin/Import.md#creating-the-project-directory)
+                  of the local Nominatim installation.
+              environ: Mapping of [configuration parameters](../customize/Settings.md).
+                  When set, replaces any configuration via environment variables.
+                  Settings in this mapping also have precedence over any
+                  parameters found in the `.env` file of the project directory.
+        """
+        self._loop = asyncio.new_event_loop()
+        self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
+
+
+    def close(self) -> None:
+        """ Close all active connections to the database.
+
+            This function also closes the asynchronous worker loop making
+            the NominatimAPI object unusable.
+        """
+        self._loop.run_until_complete(self._async_api.close())
+        self._loop.close()
+
+
+    @property
+    def config(self) -> Configuration:
+        """ Provide read-only access to the [configuration](#Configuration)
+            used by the API.
+        """
+        return self._async_api.config
+
+    def status(self) -> StatusResult:
+        """ Return the status of the database as a dataclass object
+            with the fields described below.
+
+            Returns:
+              status(int): A status code as described on the status page.
+              message(str): Either 'OK' or a human-readable message of the
+                  problem encountered.
+              software_version(tuple): A tuple with the version of the
+                  Nominatim library consisting of (major, minor, patch, db-patch)
+                  version.
+              database_version(tuple): A tuple with the version of the library
+                  which was used for the import or last migration.
+                  Also consists of (major, minor, patch, db-patch).
+              data_updated(datetime): Timestamp with the age of the data.
+        """
+        return self._loop.run_until_complete(self._async_api.status())
+
+
+    def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
+        """ Get detailed information about a place in the database.
+
+            The result is a dataclass object with the fields described below
+            or `None` if the place could not be found in the database.
+
+            Parameters:
+              place: Description of the place to look up. See
+                     [Place identification](Input-Parameter-Types.md#place-identification)
+                     for the various ways to reference a place.
+
+            Other parameters:
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              parent_place_id (Optional(int]): Internal ID of the parent of this
+                  place. Only meaning full for POI-like objects (places with a
+                  rank_address of 30).
+              linked_place_id (Optional[int]): Internal ID of the place this object
+                  links to. When this ID is set then there is no guarantee that
+                  the rest of the result information is complete.
+              admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
+                  for administrative boundary objects.
+              indexed_date (datetime): Timestamp when the place was last updated.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+        """
+        return self._loop.run_until_complete(self._async_api.details(place, **params))
+
+
+    def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
+        """ Get simple information about a list of places.
+
+            Returns a list of place information for all IDs that were found.
+            Each result is a dataclass with the fields detailed below.
+
+            Parameters:
+              places: List of descriptions of the place to look up. See
+                      [Place identification](Input-Parameter-Types.md#place-identification)
+                      for the various ways to reference a place.
+
+            Other parameters:
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+        """
+        return self._loop.run_until_complete(self._async_api.lookup(places, **params))
+
+
+    def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
+        """ Find a place by its coordinates. Also known as reverse geocoding.
+
+            Returns the closest result that can be found or `None` if
+            no place matches the given criteria. The result is a dataclass
+            with the fields as detailed below.
+
+            Parameters:
+              coord: Coordinate to lookup the place for as a Point
+                     or a tuple (x, y). Must be in WGS84 projection.
+
+            Other parameters:
+              max_rank (int): Highest address rank to return. Can be used to
+                restrict search to streets or settlements.
+              layers (enum): Defines the kind of data to take into account.
+                See description of layers below. (Default: addresses and POIs)
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+              distance (Optional[float]): Distance in degree from the input point.
+        """
+        return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
+
+
+    def search(self, query: str, **params: Any) -> SearchResults:
+        """ Find a place by free-text search. Also known as forward geocoding.
+
+            Parameters:
+              query: Free-form text query searching for a place.
+
+            Other parameters:
+              max_results (int): Maximum number of results to return. The
+                actual number of results may be less. (Default: 10)
+              min_rank (int): Lowest permissible rank for the result.
+                For addressable places this is the minimum
+                [address rank](../customize/Ranking.md#address-rank). For all
+                other places the [search rank](../customize/Ranking.md#search-rank)
+                is used.
+              max_rank (int): Highest permissible rank for the result. See min_rank above.
+              layers (enum): Defines the kind of data to take into account.
+                See [layers section](Input-Parameter-Types.md#layers) for details.
+                (Default: addresses and POIs)
+              countries (list[str]): Restrict search to countries with the given
+                ISO 3166-1 alpha-2 country code. An empty list (the default)
+                disables this filter.
+              excluded (list[int]): A list of internal IDs of places to exclude
+                from the search.
+              viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+              bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+                as a filter and return only results within the bounding box.
+              near (Optional[Point]): Focus search around the given point and
+                return results ordered by distance to the given point.
+              near_radius (Optional[float]): Restrict results to results within
+                the given distance in degrees of `near` point. Ignored, when
+                `near` is not set.
+              categories (list[tuple]): Restrict search to places of the given
+                categories. The category is the main OSM tag assigned to each
+                place. An empty list (the default) disables this filter.
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+        """
+        return self._loop.run_until_complete(
+                   self._async_api.search(query, **params))
+
+
+    # pylint: disable=too-many-arguments
+    def search_address(self, amenity: Optional[str] = None,
+                       street: Optional[str] = None,
+                       city: Optional[str] = None,
+                       county: Optional[str] = None,
+                       state: Optional[str] = None,
+                       country: Optional[str] = None,
+                       postalcode: Optional[str] = None,
+                       **params: Any) -> SearchResults:
+        """ Find an address using structured search.
+
+            Parameters:
+              amenity: Name of a POI.
+              street: Street and optionally housenumber of the address. If the address
+                does not have a street, then the place the housenumber references to.
+              city: Postal city of the address.
+              county: County equivalent of the address. Does not exist in all
+                jurisdictions.
+              state: State or province of the address.
+              country: Country with its full name or its ISO 3166-1 alpha-2 country code.
+                Do not use together with the country_code filter.
+              postalcode: Post code or ZIP for the place.
+
+            Other parameters:
+              max_results (int): Maximum number of results to return. The
+                actual number of results may be less. (Default: 10)
+              min_rank (int): Lowest permissible rank for the result.
+                For addressable places this is the minimum
+                [address rank](../customize/Ranking.md#address-rank). For all
+                other places the [search rank](../customize/Ranking.md#search-rank)
+                is used.
+              max_rank (int): Highest permissible rank for the result. See min_rank above.
+              layers (enum): Defines the kind of data to take into account.
+                See [layers section](Input-Parameter-Types.md#layers) for details.
+                (Default: addresses and POIs)
+              countries (list[str]): Restrict search to countries with the given
+                ISO 3166-1 alpha-2 country code. An empty list (the default)
+                disables this filter. Do not use, when the country parameter
+                is used.
+              excluded (list[int]): A list of internal IDs of places to exclude
+                from the search.
+              viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+              bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+                as a filter and return only results within the bounding box.
+              near (Optional[Point]): Focus search around the given point and
+                return results ordered by distance to the given point.
+              near_radius (Optional[float]): Restrict results to results within
+                the given distance in degrees of `near` point. Ignored, when
+                `near` is not set.
+              categories (list[tuple]): Restrict search to places of the given
+                categories. The category is the main OSM tag assigned to each
+                place. An empty list (the default) disables this filter.
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+        """
+        return self._loop.run_until_complete(
+                   self._async_api.search_address(amenity, street, city, county,
+                                                  state, country, postalcode, **params))
+
+
+    def search_category(self, categories: List[Tuple[str, str]],
+                        near_query: Optional[str] = None,
+                        **params: Any) -> SearchResults:
+        """ Find an object of a certain category near another place.
+
+            The near place may either be given as an unstructured search
+            query in itself or as a geographic area through the
+            viewbox or near parameters.
+
+            Parameters:
+              categories: Restrict search to places of the given
+                categories. The category is the main OSM tag assigned to each
+                place.
+              near_query: Optional free-text query to define the are to
+                restrict search to.
+
+            Other parameters:
+              max_results (int): Maximum number of results to return. The
+                actual number of results may be less. (Default: 10)
+              min_rank (int): Lowest permissible rank for the result.
+                For addressable places this is the minimum
+                [address rank](../customize/Ranking.md#address-rank). For all
+                other places the [search rank](../customize/Ranking.md#search-rank)
+                is used.
+              max_rank (int): Highest permissible rank for the result. See min_rank above.
+              layers (enum): Defines the kind of data to take into account.
+                See [layers section](Input-Parameter-Types.md#layers) for details.
+                (Default: addresses and POIs)
+              countries (list[str]): Restrict search to countries with the given
+                ISO 3166-1 alpha-2 country code. An empty list (the default)
+                disables this filter.
+              excluded (list[int]): A list of internal IDs of places to exclude
+                from the search.
+              viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+              bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+                as a filter and return only results within the bounding box.
+              near (Optional[Point]): Focus search around the given point and
+                return results ordered by distance to the given point.
+              near_radius (Optional[float]): Restrict results to results within
+                the given distance in degrees of `near` point. Ignored, when
+                `near` is not set.
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+        """
+        return self._loop.run_until_complete(
+                   self._async_api.search_category(categories, near_query, **params))
--- a/src/nominatim_api/localization.py
+++ b/src/nominatim_api/localization.py
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper functions for localizing names of results.
+"""
+from typing import Mapping, List, Optional
+
+import re
+
+class Locales:
+    """ Helper class for localization of names.
+
+        It takes a list of language prefixes in their order of preferred
+        usage.
+    """
+
+    def __init__(self, langs: Optional[List[str]] = None):
+        self.languages = langs or []
+        self.name_tags: List[str] = []
+
+        # Build the list of supported tags. It is currently hard-coded.
+        self._add_lang_tags('name')
+        self._add_tags('name', 'brand')
+        self._add_lang_tags('official_name', 'short_name')
+        self._add_tags('official_name', 'short_name', 'ref')
+
+
+    def __bool__(self) -> bool:
+        return len(self.languages) > 0
+
+
+    def _add_tags(self, *tags: str) -> None:
+        for tag in tags:
+            self.name_tags.append(tag)
+            self.name_tags.append(f"_place_{tag}")
+
+
+    def _add_lang_tags(self, *tags: str) -> None:
+        for tag in tags:
+            for lang in self.languages:
+                self.name_tags.append(f"{tag}:{lang}")
+                self.name_tags.append(f"_place_{tag}:{lang}")
+
+
+    def display_name(self, names: Optional[Mapping[str, str]]) -> str:
+        """ Return the best matching name from a dictionary of names
+            containing different name variants.
+
+            If 'names' is null or empty, an empty string is returned. If no
+            appropriate localization is found, the first name is returned.
+        """
+        if not names:
+            return ''
+
+        if len(names) > 1:
+            for tag in self.name_tags:
+                if tag in names:
+                    return names[tag]
+
+        # Nothing? Return any of the other names as a default.
+        return next(iter(names.values()))
+
+
+    @staticmethod
+    def from_accept_languages(langstr: str) -> 'Locales':
+        """ Create a localization object from a language list in the
+            format of HTTP accept-languages header.
+
+            The functions tries to be forgiving of format errors by first splitting
+            the string into comma-separated parts and then parsing each
+            description separately. Badly formatted parts are then ignored.
+        """
+        # split string into languages
+        candidates = []
+        for desc in langstr.split(','):
+            m = re.fullmatch(r'\s*([a-z_-]+)(?:;\s*q\s*=\s*([01](?:\.\d+)?))?\s*',
+                             desc, flags=re.I)
+            if m:
+                candidates.append((m[1], float(m[2] or 1.0)))
+
+        # sort the results by the weight of each language (preserving order).
+        candidates.sort(reverse=True, key=lambda e: e[1])
+
+        # If a language has a region variant, also add the language without
+        # variant but only if it isn't already in the list to not mess up the weight.
+        languages = []
+        for lid, _ in candidates:
+            languages.append(lid)
+            parts = lid.split('-', 1)
+            if len(parts) > 1 and all(c[0] != parts[0] for c in candidates):
+                languages.append(parts[0])
+
+        return Locales(languages)
--- a/src/nominatim_api/logging.py
+++ b/src/nominatim_api/logging.py
@@ -0,0 +1,433 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for specialised logging with HTML output.
+"""
+from typing import Any, Iterator, Optional, List, Tuple, cast, Union, Mapping, Sequence
+from contextvars import ContextVar
+import datetime as dt
+import textwrap
+import io
+import re
+import html
+
+import sqlalchemy as sa
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+try:
+    from pygments import highlight
+    from pygments.lexers import PythonLexer, PostgresLexer
+    from pygments.formatters import HtmlFormatter
+    CODE_HIGHLIGHT = True
+except ModuleNotFoundError:
+    CODE_HIGHLIGHT = False
+
+
+def _debug_name(res: Any) -> str:
+    if res.names:
+        return cast(str, res.names.get('name', next(iter(res.names.values()))))
+
+    return f"Hnr {res.housenumber}" if res.housenumber is not None else '[NONE]'
+
+
+class BaseLogger:
+    """ Interface for logging function.
+
+        The base implementation does nothing. Overwrite the functions
+        in derived classes which implement logging functionality.
+    """
+    def get_buffer(self) -> str:
+        """ Return the current content of the log buffer.
+        """
+        return ''
+
+    def function(self, func: str, **kwargs: Any) -> None:
+        """ Start a new debug chapter for the given function and its parameters.
+        """
+
+
+    def section(self, heading: str) -> None:
+        """ Start a new section with the given title.
+        """
+
+
+    def comment(self, text: str) -> None:
+        """ Add a simple comment to the debug output.
+        """
+
+
+    def var_dump(self, heading: str, var: Any) -> None:
+        """ Print the content of the variable to the debug output prefixed by
+            the given heading.
+        """
+
+
+    def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
+        """ Print the table generated by the generator function.
+        """
+
+
+    def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        """ Print a list of search results generated by the generator function.
+        """
+
+
+    def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
+            params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
+        """ Print the SQL for the given statement.
+        """
+
+    def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
+                   extra_params: Union[Mapping[str, Any],
+                                 Sequence[Mapping[str, Any]], None]) -> str:
+        """ Return the compiled version of the statement.
+        """
+        compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
+
+        params = dict(compiled.params)
+        if isinstance(extra_params, Mapping):
+            for k, v in extra_params.items():
+                if hasattr(v, 'to_wkt'):
+                    params[k] = v.to_wkt()
+                elif isinstance(v, (int, float)):
+                    params[k] = v
+                else:
+                    params[k] = str(v)
+        elif isinstance(extra_params, Sequence) and extra_params:
+            for k in extra_params[0]:
+                params[k] = f':{k}'
+
+        sqlstr = str(compiled)
+
+        if conn.dialect.name == 'postgresql':
+            if sa.__version__.startswith('1'):
+                try:
+                    sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
+                    return sqlstr % tuple((repr(params.get(name, None))
+                                          for name in compiled.positiontup)) # type: ignore
+                except TypeError:
+                    return sqlstr
+
+            # Fixes an odd issue with Python 3.7 where percentages are not
+            # quoted correctly.
+            sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
+            sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
+            return sqlstr % params
+
+        assert conn.dialect.name == 'sqlite'
+
+        # params in positional order
+        pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
+
+        sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
+        sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
+
+        return sqlstr
+
+class HTMLLogger(BaseLogger):
+    """ Logger that formats messages in HTML.
+    """
+    def __init__(self) -> None:
+        self.buffer = io.StringIO()
+
+
+    def _timestamp(self) -> None:
+        self._write(f'<p class="timestamp">[{dt.datetime.now()}]</p>')
+
+
+    def get_buffer(self) -> str:
+        return HTML_HEADER + self.buffer.getvalue() + HTML_FOOTER
+
+
+    def function(self, func: str, **kwargs: Any) -> None:
+        self._timestamp()
+        self._write(f"<h1>Debug output for {func}()</h1>\n<p>Parameters:<dl>")
+        for name, value in kwargs.items():
+            self._write(f'<dt>{name}</dt><dd>{self._python_var(value)}</dd>')
+        self._write('</dl></p>')
+
+
+    def section(self, heading: str) -> None:
+        self._timestamp()
+        self._write(f"<h2>{heading}</h2>")
+
+
+    def comment(self, text: str) -> None:
+        self._timestamp()
+        self._write(f"<p>{text}</p>")
+
+
+    def var_dump(self, heading: str, var: Any) -> None:
+        self._timestamp()
+        if callable(var):
+            var = var()
+
+        self._write(f'<h5>{heading}</h5>{self._python_var(var)}')
+
+
+    def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
+        self._timestamp()
+        head = next(rows)
+        assert head
+        self._write(f'<table><thead><tr><th colspan="{len(head)}">{heading}</th></tr><tr>')
+        for cell in head:
+            self._write(f'<th>{cell}</th>')
+        self._write('</tr></thead><tbody>')
+        for row in rows:
+            if row is not None:
+                self._write('<tr>')
+                for cell in row:
+                    self._write(f'<td>{cell}</td>')
+                self._write('</tr>')
+        self._write('</tbody></table>')
+
+
+    def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        """ Print a list of search results generated by the generator function.
+        """
+        self._timestamp()
+        def format_osm(osm_object: Optional[Tuple[str, int]]) -> str:
+            if not osm_object:
+                return '-'
+
+            t, i = osm_object
+            if t == 'N':
+                fullt = 'node'
+            elif t == 'W':
+                fullt = 'way'
+            elif t == 'R':
+                fullt = 'relation'
+            else:
+                return f'{t}{i}'
+
+            return f'<a href="https://www.openstreetmap.org/{fullt}/{i}">{t}{i}</a>'
+
+        self._write(f'<h5>{heading}</h5><p><dl>')
+        total = 0
+        for rank, res in results:
+            self._write(f'<dt>[{rank:.3f}]</dt>  <dd>{res.source_table.name}(')
+            self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
+            self._write(f"rank={res.rank_address}, ")
+            self._write(f"osm={format_osm(res.osm_object)}, ")
+            self._write(f'cc={res.country_code}, ')
+            self._write(f'importance={res.importance or float("nan"):.5f})</dd>')
+            total += 1
+        self._write(f'</dl><b>TOTAL:</b> {total}</p>')
+
+
+    def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
+            params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
+        self._timestamp()
+        sqlstr = self.format_sql(conn, statement, params)
+        if CODE_HIGHLIGHT:
+            sqlstr = highlight(sqlstr, PostgresLexer(),
+                               HtmlFormatter(nowrap=True, lineseparator='<br />'))
+            self._write(f'<div class="highlight"><code class="lang-sql">{sqlstr}</code></div>')
+        else:
+            self._write(f'<code class="lang-sql">{html.escape(sqlstr)}</code>')
+
+
+    def _python_var(self, var: Any) -> str:
+        if CODE_HIGHLIGHT:
+            fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
+            return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
+
+        return f'<code class="lang-python">{html.escape(str(var))}</code>'
+
+
+    def _write(self, text: str) -> None:
+        """ Add the raw text to the debug output.
+        """
+        self.buffer.write(text)
+
+
+class TextLogger(BaseLogger):
+    """ Logger creating output suitable for the console.
+    """
+    def __init__(self) -> None:
+        self.buffer = io.StringIO()
+
+
+    def _timestamp(self) -> None:
+        self._write(f'[{dt.datetime.now()}]\n')
+
+
+    def get_buffer(self) -> str:
+        return self.buffer.getvalue()
+
+
+    def function(self, func: str, **kwargs: Any) -> None:
+        self._write(f"#### Debug output for {func}()\n\nParameters:\n")
+        for name, value in kwargs.items():
+            self._write(f'  {name}: {self._python_var(value)}\n')
+        self._write('\n')
+
+
+    def section(self, heading: str) -> None:
+        self._timestamp()
+        self._write(f"\n# {heading}\n\n")
+
+
+    def comment(self, text: str) -> None:
+        self._write(f"{text}\n")
+
+
+    def var_dump(self, heading: str, var: Any) -> None:
+        if callable(var):
+            var = var()
+
+        self._write(f'{heading}:\n  {self._python_var(var)}\n\n')
+
+
+    def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
+        self._write(f'{heading}:\n')
+        data = [list(map(self._python_var, row)) if row else None for row in rows]
+        assert data[0] is not None
+        num_cols = len(data[0])
+
+        maxlens = [max(len(d[i]) for d in data if d) for i in range(num_cols)]
+        tablewidth = sum(maxlens) + 3 * num_cols + 1
+        row_format = '| ' +' | '.join(f'{{:<{l}}}' for l in maxlens) + ' |\n'
+        self._write('-'*tablewidth + '\n')
+        self._write(row_format.format(*data[0]))
+        self._write('-'*tablewidth + '\n')
+        for row in data[1:]:
+            if row:
+                self._write(row_format.format(*row))
+            else:
+                self._write('-'*tablewidth + '\n')
+        if data[-1]:
+            self._write('-'*tablewidth + '\n')
+
+
+    def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        self._timestamp()
+        self._write(f'{heading}:\n')
+        total = 0
+        for rank, res in results:
+            self._write(f'[{rank:.3f}]  {res.source_table.name}(')
+            self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
+            self._write(f"rank={res.rank_address}, ")
+            self._write(f"osm={''.join(map(str, res.osm_object or []))}, ")
+            self._write(f'cc={res.country_code}, ')
+            self._write(f'importance={res.importance or -1:.5f})\n')
+            total += 1
+        self._write(f'TOTAL: {total}\n\n')
+
+
+    def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
+            params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
+        self._timestamp()
+        sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
+        self._write(f"| {sqlstr}\n\n")
+
+
+    def _python_var(self, var: Any) -> str:
+        return str(var)
+
+
+    def _write(self, text: str) -> None:
+        self.buffer.write(text)
+
+
+logger: ContextVar[BaseLogger] = ContextVar('logger', default=BaseLogger())
+
+
+def set_log_output(fmt: str) -> None:
+    """ Enable collecting debug information.
+    """
+    if fmt == 'html':
+        logger.set(HTMLLogger())
+    elif fmt == 'text':
+        logger.set(TextLogger())
+    else:
+        logger.set(BaseLogger())
+
+
+def log() -> BaseLogger:
+    """ Return the logger for the current context.
+    """
+    return logger.get()
+
+
+def get_and_disable() -> str:
+    """ Return the current content of the debug buffer and disable logging.
+    """
+    buf = logger.get().get_buffer()
+    logger.set(BaseLogger())
+    return buf
+
+
+HTML_HEADER: str = """<!DOCTYPE html>
+<html>
+<head>
+  <title>Nominatim - Debug</title>
+  <style>
+""" + \
+(HtmlFormatter(nobackground=True).get_style_defs('.highlight') if CODE_HIGHLIGHT else '') +\
+"""
+    h2 { font-size: x-large }
+
+    dl {
+      padding-left: 10pt;
+      font-family: monospace
+    }
+
+    dt {
+      float: left;
+      font-weight: bold;
+      margin-right: 0.5em
+    }
+
+    dt::after { content: ": "; }
+
+    dd::after {
+      clear: left;
+      display: block
+    }
+
+    .lang-sql {
+      color: #555;
+      font-size: small
+    }
+
+    h5 {
+        border: solid lightgrey 0.1pt;
+        margin-bottom: 0;
+        background-color: #f7f7f7
+    }
+
+    h5 + .highlight {
+        padding: 3pt;
+        border: solid lightgrey 0.1pt
+    }
+
+    table, th, tbody {
+        border: thin solid;
+        border-collapse: collapse;
+    }
+    td {
+        border-right: thin solid;
+        padding-left: 3pt;
+        padding-right: 3pt;
+    }
+
+    .timestamp {
+        font-size: 0.8em;
+        color: darkblue;
+        width: calc(100% - 5pt);
+        text-align: right;
+        position: absolute;
+        left: 0;
+        margin-top: -5px;
+    }
+  </style>
+</head>
+<body>
+"""
+
+HTML_FOOTER: str = "</body></html>"
--- a/src/nominatim_api/lookup.py
+++ b/src/nominatim_api/lookup.py
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of place lookup by ID.
+"""
+from typing import Optional, Callable, Tuple, Type
+import datetime as dt
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaColumn, SaRow, SaSelect
+from .connection import SearchConnection
+from .logging import log
+from . import types as ntyp
+from . import results as nres
+
+RowFunc = Callable[[Optional[SaRow], Type[nres.BaseResultT]], Optional[nres.BaseResultT]]
+
+GeomFunc = Callable[[SaSelect, SaColumn], SaSelect]
+
+
+async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef,
+                         add_geometries: GeomFunc) -> Optional[SaRow]:
+    """ Search for the given place in the placex table and return the
+        base information.
+    """
+    log().section("Find in placex table")
+    t = conn.t.placex
+    sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                    t.c.class_, t.c.type, t.c.admin_level,
+                    t.c.address, t.c.extratags,
+                    t.c.housenumber, t.c.postcode, t.c.country_code,
+                    t.c.importance, t.c.wikipedia, t.c.indexed_date,
+                    t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                    t.c.linked_place_id,
+                    t.c.geometry.ST_Expand(0).label('bbox'),
+                    t.c.centroid)
+
+    if isinstance(place, ntyp.PlaceID):
+        sql = sql.where(t.c.place_id == place.place_id)
+    elif isinstance(place, ntyp.OsmID):
+        sql = sql.where(t.c.osm_type == place.osm_type)\
+                 .where(t.c.osm_id == place.osm_id)
+        if place.osm_class:
+            sql = sql.where(t.c.class_ == place.osm_class)
+        else:
+            sql = sql.order_by(t.c.class_)
+        sql = sql.limit(1)
+    else:
+        return None
+
+    return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
+
+
+async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
+                          add_geometries: GeomFunc) -> Optional[SaRow]:
+    """ Search for the given place in the osmline table and return the
+        base information.
+    """
+    log().section("Find in interpolation table")
+    t = conn.t.osmline
+    sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id,
+                    t.c.indexed_date, t.c.startnumber, t.c.endnumber,
+                    t.c.step, t.c.address, t.c.postcode, t.c.country_code,
+                    t.c.linegeo.ST_Centroid().label('centroid'))
+
+    if isinstance(place, ntyp.PlaceID):
+        sql = sql.where(t.c.place_id == place.place_id)
+    elif isinstance(place, ntyp.OsmID) and place.osm_type == 'W':
+        # There may be multiple interpolations for a single way.
+        # If 'class' contains a number, return the one that belongs to that number.
+        sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
+        if place.osm_class and place.osm_class.isdigit():
+            sql = sql.order_by(sa.func.greatest(0,
+                                                int(place.osm_class) - t.c.endnumber,
+                                                t.c.startnumber - int(place.osm_class)))
+    else:
+        return None
+
+    return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
+
+
+async def find_in_tiger(conn: SearchConnection, place: ntyp.PlaceRef,
+                        add_geometries: GeomFunc) -> Optional[SaRow]:
+    """ Search for the given place in the table of Tiger addresses and return
+        the base information. Only lookup by place ID is supported.
+    """
+    if not isinstance(place, ntyp.PlaceID):
+        return None
+
+    log().section("Find in TIGER table")
+    t = conn.t.tiger
+    parent = conn.t.placex
+    sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                    parent.c.osm_type, parent.c.osm_id,
+                    t.c.startnumber, t.c.endnumber, t.c.step,
+                    t.c.postcode,
+                    t.c.linegeo.ST_Centroid().label('centroid'))\
+            .where(t.c.place_id == place.place_id)\
+            .join(parent, t.c.parent_place_id == parent.c.place_id, isouter=True)
+
+    return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
+
+
+async def find_in_postcode(conn: SearchConnection, place: ntyp.PlaceRef,
+                           add_geometries: GeomFunc) -> Optional[SaRow]:
+    """ Search for the given place in the postcode table and return the
+        base information. Only lookup by place ID is supported.
+    """
+    if not isinstance(place, ntyp.PlaceID):
+        return None
+
+    log().section("Find in postcode table")
+    t = conn.t.postcode
+    sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                    t.c.rank_search, t.c.rank_address,
+                    t.c.indexed_date, t.c.postcode, t.c.country_code,
+                    t.c.geometry.label('centroid')) \
+            .where(t.c.place_id == place.place_id)
+
+    return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
+
+
+async def find_in_all_tables(conn: SearchConnection, place: ntyp.PlaceRef,
+                             add_geometries: GeomFunc
+                            ) -> Tuple[Optional[SaRow], RowFunc[nres.BaseResultT]]:
+    """ Search for the given place in all data tables
+        and return the base information.
+    """
+    row = await find_in_placex(conn, place, add_geometries)
+    log().var_dump('Result (placex)', row)
+    if row is not None:
+        return row, nres.create_from_placex_row
+
+    row = await find_in_osmline(conn, place, add_geometries)
+    log().var_dump('Result (osmline)', row)
+    if row is not None:
+        return row, nres.create_from_osmline_row
+
+    row = await find_in_postcode(conn, place, add_geometries)
+    log().var_dump('Result (postcode)', row)
+    if row is not None:
+        return row, nres.create_from_postcode_row
+
+    row = await find_in_tiger(conn, place, add_geometries)
+    log().var_dump('Result (tiger)', row)
+    return row, nres.create_from_tiger_row
+
+
+async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
+                             details: ntyp.LookupDetails) -> Optional[nres.DetailedResult]:
+    """ Retrieve a place with additional details from the database.
+    """
+    log().function('get_detailed_place', place=place, details=details)
+
+    if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON:
+        raise ValueError("lookup only supports geojosn polygon output.")
+
+    if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
+        def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
+            return sql.add_columns(sa.func.ST_AsGeoJSON(
+                                    sa.case((sa.func.ST_NPoints(column) > 5000,
+                                             sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
+                                            else_=column), 7).label('geometry_geojson'))
+    else:
+        def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
+            return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
+
+    row_func: RowFunc[nres.DetailedResult]
+    row, row_func = await find_in_all_tables(conn, place, _add_geometry)
+
+    if row is None:
+        return None
+
+    result = row_func(row, nres.DetailedResult)
+    assert result is not None
+
+    # add missing details
+    assert result is not None
+    if 'type' in result.geometry:
+        result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
+                                                        result.geometry['type'])
+    indexed_date = getattr(row, 'indexed_date', None)
+    if indexed_date is not None:
+        result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
+
+    await nres.add_result_details(conn, [result], details)
+
+    return result
+
+
+async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
+                           details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
+    """ Retrieve a place as a simple search result from the database.
+    """
+    log().function('get_simple_place', place=place, details=details)
+
+    def _add_geometry(sql: SaSelect, col: SaColumn) -> SaSelect:
+        if not details.geometry_output:
+            return sql
+
+        out = []
+
+        if details.geometry_simplification > 0.0:
+            col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
+
+        if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
+            out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
+        if details.geometry_output & ntyp.GeometryFormat.TEXT:
+            out.append(sa.func.ST_AsText(col).label('geometry_text'))
+        if details.geometry_output & ntyp.GeometryFormat.KML:
+            out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
+        if details.geometry_output & ntyp.GeometryFormat.SVG:
+            out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
+
+        return sql.add_columns(*out)
+
+
+    row_func: RowFunc[nres.SearchResult]
+    row, row_func = await find_in_all_tables(conn, place, _add_geometry)
+
+    if row is None:
+        return None
+
+    result = row_func(row, nres.SearchResult)
+    assert result is not None
+
+    # add missing details
+    assert result is not None
+    if hasattr(row, 'bbox'):
+        result.bbox = ntyp.Bbox.from_wkb(row.bbox)
+
+    await nres.add_result_details(conn, [result], details)
+
+    return result
+
+
+GEOMETRY_TYPE_MAP = {
+    'POINT': 'ST_Point',
+    'MULTIPOINT': 'ST_MultiPoint',
+    'LINESTRING': 'ST_LineString',
+    'MULTILINESTRING': 'ST_MultiLineString',
+    'POLYGON': 'ST_Polygon',
+    'MULTIPOLYGON': 'ST_MultiPolygon',
+    'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
+}
--- a/src/nominatim_api/py.typed
+++ b/src/nominatim_api/py.typed
--- a/src/nominatim_api/result_formatting.py
+++ b/src/nominatim_api/result_formatting.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper classes and functions for formatting results into API responses.
+"""
+from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping
+from collections import defaultdict
+
+T = TypeVar('T') # pylint: disable=invalid-name
+FormatFunc = Callable[[T, Mapping[str, Any]], str]
+
+
+class FormatDispatcher:
+    """ Helper class to conveniently create formatting functions in
+        a module using decorators.
+    """
+
+    def __init__(self) -> None:
+        self.format_functions: Dict[Type[Any], Dict[str, FormatFunc[Any]]] = defaultdict(dict)
+
+
+    def format_func(self, result_class: Type[T],
+                    fmt: str) -> Callable[[FormatFunc[T]], FormatFunc[T]]:
+        """ Decorator for a function that formats a given type of result into the
+            selected format.
+        """
+        def decorator(func: FormatFunc[T]) -> FormatFunc[T]:
+            self.format_functions[result_class][fmt] = func
+            return func
+
+        return decorator
+
+
+    def list_formats(self, result_type: Type[Any]) -> List[str]:
+        """ Return a list of formats supported by this formatter.
+        """
+        return list(self.format_functions[result_type].keys())
+
+
+    def supports_format(self, result_type: Type[Any], fmt: str) -> bool:
+        """ Check if the given format is supported by this formatter.
+        """
+        return fmt in self.format_functions[result_type]
+
+
+    def format_result(self, result: Any, fmt: str, options: Mapping[str, Any]) -> str:
+        """ Convert the given result into a string using the given format.
+
+            The format is expected to be in the list returned by
+            `list_formats()`.
+        """
+        return self.format_functions[type(result)][fmt](result, options)
--- a/src/nominatim_api/results.py
+++ b/src/nominatim_api/results.py
@@ -0,0 +1,752 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Dataclasses for search results and helper functions to fill them.
+
+Data classes are part of the public API while the functions are for
+internal use only. That's why they are implemented as free-standing functions
+instead of member functions.
+"""
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
+import enum
+import dataclasses
+import datetime as dt
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaSelect, SaRow
+from nominatim_core.db.sqlalchemy_types import Geometry
+from .types import Point, Bbox, LookupDetails
+from .connection import SearchConnection
+from .logging import log
+from .localization import Locales
+
+# This file defines complex result data classes.
+# pylint: disable=too-many-instance-attributes
+
+def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+    """ Mix-in names from linked places, so that they show up
+        as standard names where necessary.
+    """
+    if not names:
+        return None
+
+    out = {}
+    for k, v in names.items():
+        if k.startswith('_place_'):
+            outkey = k[7:]
+            out[k if outkey in names else outkey] = v
+        else:
+            out[k] = v
+
+    return out
+
+
+class SourceTable(enum.Enum):
+    """ The `SourceTable` type lists the possible sources a result can have.
+    """
+    PLACEX = 1
+    """ The placex table is the main source for result usually containing
+        OSM data.
+    """
+    OSMLINE = 2
+    """ The osmline table contains address interpolations from OSM data.
+        Interpolation addresses are always approximate. The OSM id in the
+        result refers to the OSM way with the interpolation line object.
+    """
+    TIGER = 3
+    """ TIGER address data contains US addresses imported on the side,
+        see [Installing TIGER data](../customize/Tiger.md).
+        TIGER address are also interpolations. The addresses always refer
+        to a street from OSM data. The OSM id in the result refers to
+        that street.
+    """
+    POSTCODE = 4
+    """ The postcode table contains artificial centroids for postcodes,
+        computed from the postcodes available with address points. Results
+        are always approximate.
+    """
+    COUNTRY = 5
+    """ The country table provides a fallback, when country data is missing
+        in the OSM data.
+    """
+
+
+@dataclasses.dataclass
+class AddressLine:
+    """ The `AddressLine` may contain the following fields about a related place
+        and its function as an address object. Most fields are optional.
+        Their presence depends on the kind and function of the address part.
+    """
+    category: Tuple[str, str]
+    """ Main category of the place, described by a key-value pair.
+    """
+    names: Dict[str, str]
+    """ All available names for the place including references, alternative
+        names and translations.
+    """
+    fromarea: bool
+    """ If true, then the exact area of the place is known. Without area
+        information, Nominatim has to make an educated guess if an address
+        belongs to one place or another.
+    """
+    isaddress: bool
+    """ If true, this place should be considered for the final address display.
+        Nominatim will sometimes include more than one candidate for
+        the address in the list when it cannot reliably determine where the
+        place belongs. It will consider names of all candidates when searching
+        but when displaying the result, only the most likely candidate should
+        be shown.
+    """
+    rank_address: int
+    """ [Address rank](../customize/Ranking.md#address-rank) of the place.
+    """
+    distance: float
+    """ Distance in degrees between the result place and this address part.
+    """
+    place_id: Optional[int] = None
+    """ Internal ID of the place.
+    """
+    osm_object: Optional[Tuple[str, int]] = None
+    """ OSM type and ID of the place, if such an object exists.
+    """
+    extratags: Optional[Dict[str, str]] = None
+    """ Any extra information available about the place. This is a dictionary
+        that usually contains OSM tag key-value pairs.
+    """
+
+    admin_level: Optional[int] = None
+    """ The administrative level of a boundary as tagged in the input data.
+        This field is only meaningful for places of the category
+        (boundary, administrative).
+    """
+
+    local_name: Optional[str] = None
+    """ Place holder for localization of this address part. See
+        [Localization](#localization) below.
+    """
+
+
+class AddressLines(List[AddressLine]):
+    """ Sequence of address lines order in descending order by their rank.
+    """
+
+    def localize(self, locales: Locales) -> List[str]:
+        """ Set the local name of address parts according to the chosen
+            locale. Return the list of local names without duplicates.
+
+            Only address parts that are marked as isaddress are localized
+            and returned.
+        """
+        label_parts: List[str] = []
+
+        for line in self:
+            if line.isaddress and line.names:
+                line.local_name = locales.display_name(line.names)
+                if not label_parts or label_parts[-1] != line.local_name:
+                    label_parts.append(line.local_name)
+
+        return label_parts
+
+
+
+@dataclasses.dataclass
+class WordInfo:
+    """ Each entry in the list of search terms contains the
+        following detailed information.
+    """
+    word_id: int
+    """ Internal identifier for the word.
+    """
+    word_token: str
+    """ Normalised and transliterated form of the word.
+        This form is used for searching.
+    """
+    word: Optional[str] = None
+    """ Untransliterated form, if available.
+    """
+
+
+WordInfos = Sequence[WordInfo]
+
+
+@dataclasses.dataclass
+class BaseResult:
+    """ Data class collecting information common to all
+        types of search results.
+    """
+    source_table: SourceTable
+    category: Tuple[str, str]
+    centroid: Point
+
+    place_id : Optional[int] = None
+    osm_object: Optional[Tuple[str, int]] = None
+    parent_place_id: Optional[int] = None
+    linked_place_id: Optional[int] = None
+    admin_level: int = 15
+
+    locale_name: Optional[str] = None
+    display_name: Optional[str] = None
+
+    names: Optional[Dict[str, str]] = None
+    address: Optional[Dict[str, str]] = None
+    extratags: Optional[Dict[str, str]] = None
+
+    housenumber: Optional[str] = None
+    postcode: Optional[str] = None
+    wikipedia: Optional[str] = None
+
+    rank_address: int = 30
+    rank_search: int = 30
+    importance: Optional[float] = None
+
+    country_code: Optional[str] = None
+
+    address_rows: Optional[AddressLines] = None
+    linked_rows: Optional[AddressLines] = None
+    parented_rows: Optional[AddressLines] = None
+    name_keywords: Optional[WordInfos] = None
+    address_keywords: Optional[WordInfos] = None
+
+    geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
+
+    @property
+    def lat(self) -> float:
+        """ Get the latitude (or y) of the center point of the place.
+        """
+        return self.centroid[1]
+
+
+    @property
+    def lon(self) -> float:
+        """ Get the longitude (or x) of the center point of the place.
+        """
+        return self.centroid[0]
+
+
+    def calculated_importance(self) -> float:
+        """ Get a valid importance value. This is either the stored importance
+            of the value or an artificial value computed from the place's
+            search rank.
+        """
+        return self.importance or (0.40001 - (self.rank_search/75.0))
+
+
+    def localize(self, locales: Locales) -> None:
+        """ Fill the locale_name and the display_name field for the
+            place and, if available, its address information.
+        """
+        self.locale_name = locales.display_name(self.names)
+        if self.address_rows:
+            self.display_name = ', '.join(self.address_rows.localize(locales))
+        else:
+            self.display_name = self.locale_name
+
+
+
+BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
+
+@dataclasses.dataclass
+class DetailedResult(BaseResult):
+    """ A search result with more internal information from the database
+        added.
+    """
+    indexed_date: Optional[dt.datetime] = None
+
+
+@dataclasses.dataclass
+class ReverseResult(BaseResult):
+    """ A search result for reverse geocoding.
+    """
+    distance: Optional[float] = None
+    bbox: Optional[Bbox] = None
+
+
+class ReverseResults(List[ReverseResult]):
+    """ Sequence of reverse lookup results ordered by distance.
+        May be empty when no result was found.
+    """
+
+
+@dataclasses.dataclass
+class SearchResult(BaseResult):
+    """ A search result for forward geocoding.
+    """
+    bbox: Optional[Bbox] = None
+    accuracy: float = 0.0
+
+
+    @property
+    def ranking(self) -> float:
+        """ Return the ranking, a combined measure of accuracy and importance.
+        """
+        return (self.accuracy if self.accuracy is not None else 1) \
+               - self.calculated_importance()
+
+
+class SearchResults(List[SearchResult]):
+    """ Sequence of forward lookup results ordered by relevance.
+        May be empty when no result was found.
+    """
+
+
+def _filter_geometries(row: SaRow) -> Dict[str, str]:
+    return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
+            if k.startswith('geometry_')}
+
+
+def create_from_placex_row(row: Optional[SaRow],
+                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the placex table. 'class_type' defines the type of result
+        to return. Returns None if the row is None.
+    """
+    if row is None:
+        return None
+
+    return class_type(source_table=SourceTable.PLACEX,
+                      place_id=row.place_id,
+                      osm_object=(row.osm_type, row.osm_id),
+                      category=(row.class_, row.type),
+                      parent_place_id = row.parent_place_id,
+                      linked_place_id = getattr(row, 'linked_place_id', None),
+                      admin_level = getattr(row, 'admin_level', 15),
+                      names=_mingle_name_tags(row.name),
+                      address=row.address,
+                      extratags=row.extratags,
+                      housenumber=row.housenumber,
+                      postcode=row.postcode,
+                      wikipedia=row.wikipedia,
+                      rank_address=row.rank_address,
+                      rank_search=row.rank_search,
+                      importance=row.importance,
+                      country_code=row.country_code,
+                      centroid=Point.from_wkb(row.centroid),
+                      geometry=_filter_geometries(row))
+
+
+def create_from_osmline_row(row: Optional[SaRow],
+                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the address interpolation table osmline. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+
+        If the row contains a housenumber, then the housenumber is filled out.
+        Otherwise the result contains the interpolation information in extratags.
+    """
+    if row is None:
+        return None
+
+    hnr = getattr(row, 'housenumber', None)
+
+    res = class_type(source_table=SourceTable.OSMLINE,
+                     place_id=row.place_id,
+                     parent_place_id = row.parent_place_id,
+                     osm_object=('W', row.osm_id),
+                     category=('place', 'houses' if hnr is None else 'house'),
+                     address=row.address,
+                     postcode=row.postcode,
+                     country_code=row.country_code,
+                     centroid=Point.from_wkb(row.centroid),
+                     geometry=_filter_geometries(row))
+
+    if hnr is None:
+        res.extratags = {'startnumber': str(row.startnumber),
+                         'endnumber': str(row.endnumber),
+                         'step': str(row.step)}
+    else:
+        res.housenumber = str(hnr)
+
+    return res
+
+
+def create_from_tiger_row(row: Optional[SaRow],
+                          class_type: Type[BaseResultT],
+                          osm_type: Optional[str] = None,
+                          osm_id: Optional[int] = None) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the Tiger data interpolation table. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+
+        If the row contains a housenumber, then the housenumber is filled out.
+        Otherwise the result contains the interpolation information in extratags.
+    """
+    if row is None:
+        return None
+
+    hnr = getattr(row, 'housenumber', None)
+
+    res = class_type(source_table=SourceTable.TIGER,
+                     place_id=row.place_id,
+                     parent_place_id = row.parent_place_id,
+                     osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
+                     category=('place', 'houses' if hnr is None else 'house'),
+                     postcode=row.postcode,
+                     country_code='us',
+                     centroid=Point.from_wkb(row.centroid),
+                     geometry=_filter_geometries(row))
+
+    if hnr is None:
+        res.extratags = {'startnumber': str(row.startnumber),
+                         'endnumber': str(row.endnumber),
+                         'step': str(row.step)}
+    else:
+        res.housenumber = str(hnr)
+
+    return res
+
+
+def create_from_postcode_row(row: Optional[SaRow],
+                          class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the postcode table. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+    """
+    if row is None:
+        return None
+
+    return class_type(source_table=SourceTable.POSTCODE,
+                      place_id=row.place_id,
+                      parent_place_id = row.parent_place_id,
+                      category=('place', 'postcode'),
+                      names={'ref': row.postcode},
+                      rank_search=row.rank_search,
+                      rank_address=row.rank_address,
+                      country_code=row.country_code,
+                      centroid=Point.from_wkb(row.centroid),
+                      geometry=_filter_geometries(row))
+
+
+def create_from_country_row(row: Optional[SaRow],
+                        class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the fallback country tables. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+    """
+    if row is None:
+        return None
+
+    return class_type(source_table=SourceTable.COUNTRY,
+                      category=('place', 'country'),
+                      centroid=Point.from_wkb(row.centroid),
+                      names=row.name,
+                      rank_address=4, rank_search=4,
+                      country_code=row.country_code,
+                      geometry=_filter_geometries(row))
+
+
+async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
+                             details: LookupDetails) -> None:
+    """ Retrieve more details from the database according to the
+        parameters specified in 'details'.
+    """
+    if results:
+        log().section('Query details for result')
+        if details.address_details:
+            log().comment('Query address details')
+            await complete_address_details(conn, results)
+        if details.linked_places:
+            log().comment('Query linked places')
+            for result in results:
+                await complete_linked_places(conn, result)
+        if details.parented_places:
+            log().comment('Query parent places')
+            for result in results:
+                await complete_parented_places(conn, result)
+        if details.keywords:
+            log().comment('Query keywords')
+            for result in results:
+                await complete_keywords(conn, result)
+        for result in results:
+            result.localize(details.locales)
+
+
+def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
+    """ Create a new AddressLine from the results of a database query.
+    """
+    extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
+    if 'linked_place' in extratags:
+        extratags['place'] = extratags['linked_place']
+
+    names = _mingle_name_tags(row.name) or {}
+    if getattr(row, 'housenumber', None) is not None:
+        names['housenumber'] = row.housenumber
+
+    if isaddress is None:
+        isaddress = getattr(row, 'isaddress', True)
+
+    return AddressLine(place_id=row.place_id,
+                       osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
+                       category=(getattr(row, 'class'), row.type),
+                       names=names,
+                       extratags=extratags,
+                       admin_level=row.admin_level,
+                       fromarea=row.fromarea,
+                       isaddress=isaddress,
+                       rank_address=row.rank_address,
+                       distance=row.distance)
+
+
+def _get_address_lookup_id(result: BaseResultT) -> int:
+    assert result.place_id
+    if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
+        return result.parent_place_id or result.place_id
+
+    return result.linked_place_id or result.place_id
+
+
+async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
+    assert result.address_rows is not None
+    if result.category[0] not in ('boundary', 'place')\
+       or result.category[1] not in ('postal_code', 'postcode'):
+        postcode = result.postcode
+        if not postcode and result.address:
+            postcode = result.address.get('postcode')
+        if postcode and ',' not in postcode and ';' not in postcode:
+            result.address_rows.append(AddressLine(
+                category=('place', 'postcode'),
+                names={'ref': postcode},
+                fromarea=False, isaddress=True, rank_address=5,
+                distance=0.0))
+    if result.country_code:
+        async def _get_country_names() -> Optional[Dict[str, str]]:
+            t = conn.t.country_name
+            sql = sa.select(t.c.name, t.c.derived_name)\
+                    .where(t.c.country_code == result.country_code)
+            for cres in await conn.execute(sql):
+                names = cast(Dict[str, str], cres[0])
+                if cres[1]:
+                    names.update(cast(Dict[str, str], cres[1]))
+                return names
+            return None
+
+        country_names = await conn.get_cached_value('COUNTRY_NAME',
+                                                    result.country_code,
+                                                    _get_country_names)
+        if country_names:
+            result.address_rows.append(AddressLine(
+                category=('place', 'country'),
+                names=country_names,
+                fromarea=False, isaddress=True, rank_address=4,
+                distance=0.0))
+        result.address_rows.append(AddressLine(
+            category=('place', 'country_code'),
+            names={'ref': result.country_code}, extratags = {},
+            fromarea=True, isaddress=False, rank_address=4,
+            distance=0.0))
+
+
+def _setup_address_details(result: BaseResultT) -> None:
+    """ Retrieve information about places that make up the address of the result.
+    """
+    result.address_rows = AddressLines()
+    if result.names:
+        result.address_rows.append(AddressLine(
+            place_id=result.place_id,
+            osm_object=result.osm_object,
+            category=result.category,
+            names=result.names,
+            extratags=result.extratags or {},
+            admin_level=result.admin_level,
+            fromarea=True, isaddress=True,
+            rank_address=result.rank_address, distance=0.0))
+    if result.source_table == SourceTable.PLACEX and result.address:
+        housenumber = result.address.get('housenumber')\
+                      or result.address.get('streetnumber')\
+                      or result.address.get('conscriptionnumber')
+    elif result.housenumber:
+        housenumber = result.housenumber
+    else:
+        housenumber = None
+    if housenumber:
+        result.address_rows.append(AddressLine(
+            category=('place', 'house_number'),
+            names={'ref': housenumber},
+            fromarea=True, isaddress=True, rank_address=28, distance=0))
+    if result.address and '_unlisted_place' in result.address:
+        result.address_rows.append(AddressLine(
+            category=('place', 'locality'),
+            names={'name': result.address['_unlisted_place']},
+            fromarea=False, isaddress=True, rank_address=25, distance=0))
+
+
+async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
+    """ Retrieve information about places that make up the address of the result.
+    """
+    for result in results:
+        _setup_address_details(result)
+
+    ### Lookup entries from place_address line
+
+    lookup_ids = [{'pid': r.place_id,
+                   'lid': _get_address_lookup_id(r),
+                   'names': list(r.address.values()) if r.address else [],
+                   'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
+                  for r in results if r.place_id]
+
+    if not lookup_ids:
+        return
+
+    ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
+             .table_valued(sa.column('value', type_=sa.JSON))
+
+    t = conn.t.placex
+    taddr = conn.t.addressline
+
+    sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
+                    t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                    t.c.class_, t.c.type, t.c.extratags,
+                    t.c.admin_level, taddr.c.fromarea,
+                    sa.case((t.c.rank_address == 11, 5),
+                            else_=t.c.rank_address).label('rank_address'),
+                    taddr.c.distance, t.c.country_code, t.c.postcode)\
+            .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
+                                taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
+            .join(t, taddr.c.address_place_id == t.c.place_id)\
+            .order_by('src_place_id')\
+            .order_by(sa.column('rank_address').desc())\
+            .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
+            .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
+                              (taddr.c.isaddress, 0),
+                              (sa.and_(taddr.c.fromarea,
+                                       t.c.geometry.ST_Contains(
+                                           sa.func.ST_GeomFromEWKT(
+                                               ltab.c.value['c'].as_string()))), 1),
+                              else_=-1).desc())\
+            .order_by(taddr.c.fromarea.desc())\
+            .order_by(taddr.c.distance.desc())\
+            .order_by(t.c.rank_search.desc())
+
+
+    current_result = None
+    current_rank_address = -1
+    for row in await conn.execute(sql):
+        if current_result is None or row.src_place_id != current_result.place_id:
+            current_result = next((r for r in results if r.place_id == row.src_place_id), None)
+            assert current_result is not None
+            current_rank_address = -1
+
+        location_isaddress = row.rank_address != current_rank_address
+
+        if current_result.country_code is None and row.country_code:
+            current_result.country_code = row.country_code
+
+        if row.type in ('postcode', 'postal_code') and location_isaddress:
+            if not row.fromarea or \
+               (current_result.address and 'postcode' in current_result.address):
+                location_isaddress = False
+            else:
+                current_result.postcode = None
+
+        assert current_result.address_rows is not None
+        current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
+        current_rank_address = row.rank_address
+
+    for result in results:
+        await _finalize_entry(conn, result)
+
+
+    ### Finally add the record for the parent entry where necessary.
+
+    parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
+    if parent_lookup_ids:
+        ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
+                 .table_valued(sa.column('value', type_=sa.JSON))
+        sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
+                        t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                        t.c.class_, t.c.type, t.c.extratags,
+                        t.c.admin_level,
+                        t.c.rank_address)\
+                 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
+
+        for row in await conn.execute(sql):
+            current_result = next((r for r in results if r.place_id == row.src_place_id), None)
+            assert current_result is not None
+            assert current_result.address_rows is not None
+
+            current_result.address_rows.append(AddressLine(
+                    place_id=row.place_id,
+                    osm_object=(row.osm_type, row.osm_id),
+                    category=(row.class_, row.type),
+                    names=row.name, extratags=row.extratags or {},
+                    admin_level=row.admin_level,
+                    fromarea=True, isaddress=True,
+                    rank_address=row.rank_address, distance=0.0))
+
+    ### Now sort everything
+    def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
+        return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
+
+    for result in results:
+        assert result.address_rows is not None
+        result.address_rows.sort(key=mk_sort_key(result.place_id))
+
+
+def _placex_select_address_row(conn: SearchConnection,
+                               centroid: Point) -> SaSelect:
+    t = conn.t.placex
+    return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                     t.c.class_.label('class'), t.c.type,
+                     t.c.admin_level, t.c.housenumber,
+                     t.c.geometry.is_area().label('fromarea'),
+                     t.c.rank_address,
+                     t.c.geometry.distance_spheroid(
+                       sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
+
+
+async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
+    """ Retrieve information about places that link to the result.
+    """
+    result.linked_rows = AddressLines()
+    if result.source_table != SourceTable.PLACEX:
+        return
+
+    sql = _placex_select_address_row(conn, result.centroid)\
+            .where(conn.t.placex.c.linked_place_id == result.place_id)
+
+    for row in await conn.execute(sql):
+        result.linked_rows.append(_result_row_to_address_row(row))
+
+
+async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
+    """ Retrieve information about the search terms used for this place.
+
+        Requires that the query analyzer was initialised to get access to
+        the word table.
+    """
+    t = conn.t.search_name
+    sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
+            .where(t.c.place_id == result.place_id)
+
+    result.name_keywords = []
+    result.address_keywords = []
+
+    t = conn.t.meta.tables['word']
+    sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
+
+    for name_tokens, address_tokens in await conn.execute(sql):
+        for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
+            result.name_keywords.append(WordInfo(*row))
+
+        for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
+            result.address_keywords.append(WordInfo(*row))
+
+
+async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
+    """ Retrieve information about places that the result provides the
+        address for.
+    """
+    result.parented_rows = AddressLines()
+    if result.source_table != SourceTable.PLACEX:
+        return
+
+    sql = _placex_select_address_row(conn, result.centroid)\
+            .where(conn.t.placex.c.parent_place_id == result.place_id)\
+            .where(conn.t.placex.c.rank_search == 30)
+
+    for row in await conn.execute(sql):
+        result.parented_rows.append(_result_row_to_address_row(row))
--- a/src/nominatim_api/reverse.py
+++ b/src/nominatim_api/reverse.py
@@ -0,0 +1,603 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of reverse geocoding.
+"""
+from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
+import functools
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
+                                  SaBind, SaLambdaSelect
+from nominatim_core.db.sqlalchemy_types import Geometry
+from .connection import SearchConnection
+from . import results as nres
+from .logging import log
+from .types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
+
+# In SQLAlchemy expression which compare with NULL need to be expressed with
+# the equal sign.
+# pylint: disable=singleton-comparison
+
+RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
+
+WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
+MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
+
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
+def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
+    """ Create a select statement with the columns relevant for reverse
+        results.
+    """
+    if not use_wkt:
+        distance = t.c.distance
+        centroid = t.c.centroid
+    else:
+        distance = t.c.geometry.ST_Distance(WKT_PARAM)
+        centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
+                           else_=t.c.centroid).label('centroid')
+
+
+    return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                     t.c.class_, t.c.type,
+                     t.c.address, t.c.extratags,
+                     t.c.housenumber, t.c.postcode, t.c.country_code,
+                     t.c.importance, t.c.wikipedia,
+                     t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                     centroid,
+                     t.c.linked_place_id, t.c.admin_level,
+                     distance.label('distance'),
+                     t.c.geometry.ST_Expand(0).label('bbox'))
+
+
+def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
+    return sa.cast(table.c.startnumber
+                    + sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
+                                    / table.c.step) * table.c.step,
+                   sa.Integer).label('housenumber')
+
+
+def _interpolated_position(table: SaFromClause) -> SaLabel:
+    fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
+    rounded_pos = sa.func.round(table.c.position / fac) * fac
+    return sa.case(
+             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
+              else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
+
+
+def _locate_interpolation(table: SaFromClause) -> SaLabel:
+    """ Given a position, locate the closest point on the line.
+    """
+    return sa.case((table.c.linegeo.is_line_like(),
+                    table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
+                   else_=0).label('position')
+
+
+def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
+    return min(rows, key=lambda row: 1000 if row is None else row.distance)
+
+
+class ReverseGeocoder:
+    """ Class implementing the logic for looking up a place from a
+        coordinate.
+    """
+
+    def __init__(self, conn: SearchConnection, params: ReverseDetails,
+                 restrict_to_country_areas: bool = False) -> None:
+        self.conn = conn
+        self.params = params
+        self.restrict_to_country_areas = restrict_to_country_areas
+
+        self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
+
+
+    @property
+    def max_rank(self) -> int:
+        """ Return the maximum configured rank.
+        """
+        return self.params.max_rank
+
+
+    def has_geometries(self) -> bool:
+        """ Check if any geometries are requested.
+        """
+        return bool(self.params.geometry_output)
+
+
+    def layer_enabled(self, *layer: DataLayer) -> bool:
+        """ Return true when any of the given layer types are requested.
+        """
+        return any(self.params.layers & l for l in layer)
+
+
+    def layer_disabled(self, *layer: DataLayer) -> bool:
+        """ Return true when none of the given layer types is requested.
+        """
+        return not any(self.params.layers & l for l in layer)
+
+
+    def has_feature_layers(self) -> bool:
+        """ Return true if any layer other than ADDRESS or POI is requested.
+        """
+        return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
+
+
+    def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
+        out = []
+
+        if self.params.geometry_simplification > 0.0:
+            col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
+
+        if self.params.geometry_output & GeometryFormat.GEOJSON:
+            out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
+        if self.params.geometry_output & GeometryFormat.TEXT:
+            out.append(sa.func.ST_AsText(col).label('geometry_text'))
+        if self.params.geometry_output & GeometryFormat.KML:
+            out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
+        if self.params.geometry_output & GeometryFormat.SVG:
+            out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
+
+        return sql.add_columns(*out)
+
+
+    def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
+        if self.layer_enabled(DataLayer.MANMADE):
+            exclude = []
+            if self.layer_disabled(DataLayer.RAILWAY):
+                exclude.append('railway')
+            if self.layer_disabled(DataLayer.NATURAL):
+                exclude.extend(('natural', 'water', 'waterway'))
+            return table.c.class_.not_in(tuple(exclude))
+
+        include = []
+        if self.layer_enabled(DataLayer.RAILWAY):
+            include.append('railway')
+        if self.layer_enabled(DataLayer.NATURAL):
+            include.extend(('natural', 'water', 'waterway'))
+        return table.c.class_.in_(tuple(include))
+
+
+    async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
+        """ Look up the closest rank 26+ place in the database, which
+            is closer than the given distance.
+        """
+        t = self.conn.t.placex
+
+        # PostgreSQL must not get the distance as a parameter because
+        # there is a danger it won't be able to properly estimate index use
+        # when used with prepared statements
+        diststr = sa.text(f"{distance}")
+
+        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
+                .where(t.c.geometry.within_distance(WKT_PARAM, diststr))
+                .where(t.c.indexed_status == 0)
+                .where(t.c.linked_place_id == None)
+                .where(sa.or_(sa.not_(t.c.geometry.is_area()),
+                              t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
+                .order_by('distance')
+                .limit(2))
+
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, t.c.geometry)
+
+        restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
+
+        if self.layer_enabled(DataLayer.ADDRESS):
+            max_rank = min(29, self.max_rank)
+            restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
+            if self.max_rank == 30:
+                restrict.append(lambda: sa.func.IsAddressPoint(t))
+        if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
+            restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
+                                            t.c.class_.not_in(('place', 'building')),
+                                            sa.not_(t.c.geometry.is_line_like())))
+        if self.has_feature_layers():
+            restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
+                                    no_index(t.c.rank_address) == 0,
+                                    self._filter_by_layer(t)))
+
+        if not restrict:
+            return None
+
+        sql = sql.where(sa.or_(*restrict))
+
+        # If the closest object is inside an area, then check if there is a
+        # POI node nearby and return that.
+        prev_row = None
+        for row in await self.conn.execute(sql, self.bind_params):
+            if prev_row is None:
+                if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0:
+                    return row
+                prev_row = row
+            else:
+                if row.rank_search > 27 and row.osm_type == 'N'\
+                   and row.distance < 0.0001:
+                    return row
+
+        return prev_row
+
+
+    async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
+        t = self.conn.t.placex
+
+        def _base_query() -> SaSelect:
+            return _select_from_placex(t)\
+                .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
+                .where(t.c.parent_place_id == parent_place_id)\
+                .where(sa.func.IsAddressPoint(t))\
+                .where(t.c.indexed_status == 0)\
+                .where(t.c.linked_place_id == None)\
+                .order_by('distance')\
+                .limit(1)
+
+        sql: SaLambdaSelect
+        if self.has_geometries():
+            sql = self._add_geometry_columns(_base_query(), t.c.geometry)
+        else:
+            sql = sa.lambda_stmt(_base_query)
+
+        return (await self.conn.execute(sql, self.bind_params)).one_or_none()
+
+
+    async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
+                                             distance: float) -> Optional[SaRow]:
+        t = self.conn.t.osmline
+
+        sql = sa.select(t,
+                        t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+                        _locate_interpolation(t))\
+                .where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
+                .where(t.c.startnumber != None)\
+                .order_by('distance')\
+                .limit(1)
+
+        if parent_place_id is not None:
+            sql = sql.where(t.c.parent_place_id == parent_place_id)
+
+        inner = sql.subquery('ipol')
+
+        sql = sa.select(inner.c.place_id, inner.c.osm_id,
+                             inner.c.parent_place_id, inner.c.address,
+                             _interpolated_housenumber(inner),
+                             _interpolated_position(inner),
+                             inner.c.postcode, inner.c.country_code,
+                             inner.c.distance)
+
+        if self.has_geometries():
+            sub = sql.subquery('geom')
+            sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
+
+        return (await self.conn.execute(sql, self.bind_params)).one_or_none()
+
+
+    async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
+        t = self.conn.t.tiger
+
+        def _base_query() -> SaSelect:
+            inner = sa.select(t,
+                              t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+                              _locate_interpolation(t))\
+                      .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
+                      .where(t.c.parent_place_id == parent_place_id)\
+                      .order_by('distance')\
+                      .limit(1)\
+                      .subquery('tiger')
+
+            return sa.select(inner.c.place_id,
+                             inner.c.parent_place_id,
+                             _interpolated_housenumber(inner),
+                             _interpolated_position(inner),
+                             inner.c.postcode,
+                             inner.c.distance)
+
+        sql: SaLambdaSelect
+        if self.has_geometries():
+            sub = _base_query().subquery('geom')
+            sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
+        else:
+            sql = sa.lambda_stmt(_base_query)
+
+        return (await self.conn.execute(sql, self.bind_params)).one_or_none()
+
+
+    async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
+        """ Find a street or POI/address for the given WKT point.
+        """
+        log().section('Reverse lookup on street/address level')
+        distance = 0.006
+        parent_place_id = None
+
+        row = await self._find_closest_street_or_poi(distance)
+        row_func: RowFunc = nres.create_from_placex_row
+        log().var_dump('Result (street/building)', row)
+
+        # If the closest result was a street, but an address was requested,
+        # check for a housenumber nearby which is part of the street.
+        if row is not None:
+            if self.max_rank > 27 \
+               and self.layer_enabled(DataLayer.ADDRESS) \
+               and row.rank_address <= 27:
+                distance = 0.001
+                parent_place_id = row.place_id
+                log().comment('Find housenumber for street')
+                addr_row = await self._find_housenumber_for_street(parent_place_id)
+                log().var_dump('Result (street housenumber)', addr_row)
+
+                if addr_row is not None:
+                    row = addr_row
+                    row_func = nres.create_from_placex_row
+                    distance = addr_row.distance
+                elif row.country_code == 'us' and parent_place_id is not None:
+                    log().comment('Find TIGER housenumber for street')
+                    addr_row = await self._find_tiger_number_for_street(parent_place_id)
+                    log().var_dump('Result (street Tiger housenumber)', addr_row)
+
+                    if addr_row is not None:
+                        row_func = cast(RowFunc,
+                                        functools.partial(nres.create_from_tiger_row,
+                                                          osm_type=row.osm_type,
+                                                          osm_id=row.osm_id))
+                        row = addr_row
+            else:
+                distance = row.distance
+
+        # Check for an interpolation that is either closer than our result
+        # or belongs to a close street found.
+        if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
+            log().comment('Find interpolation for street')
+            addr_row = await self._find_interpolation_for_street(parent_place_id,
+                                                                 distance)
+            log().var_dump('Result (street interpolation)', addr_row)
+            if addr_row is not None:
+                row = addr_row
+                row_func = nres.create_from_osmline_row
+
+        return row, row_func
+
+
+    async def _lookup_area_address(self) -> Optional[SaRow]:
+        """ Lookup large addressable areas for the given WKT point.
+        """
+        log().comment('Reverse lookup by larger address area features')
+        t = self.conn.t.placex
+
+        def _base_query() -> SaSelect:
+            # The inner SQL brings results in the right order, so that
+            # later only a minimum of results needs to be checked with ST_Contains.
+            inner = sa.select(t, sa.literal(0.0).label('distance'))\
+                      .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
+                      .where(t.c.geometry.intersects(WKT_PARAM))\
+                      .where(sa.func.PlacexGeometryReverseLookuppolygon())\
+                      .order_by(sa.desc(t.c.rank_search))\
+                      .limit(50)\
+                      .subquery('area')
+
+            return _select_from_placex(inner, False)\
+                      .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
+                      .order_by(sa.desc(inner.c.rank_search))\
+                      .limit(1)
+
+        sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
+
+        address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
+        log().var_dump('Result (area)', address_row)
+
+        if address_row is not None and address_row.rank_search < self.max_rank:
+            log().comment('Search for better matching place nodes inside the area')
+
+            address_rank = address_row.rank_search
+            address_id = address_row.place_id
+
+            def _place_inside_area_query() -> SaSelect:
+                inner = \
+                    sa.select(t,
+                              t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
+                      .where(t.c.rank_search > address_rank)\
+                      .where(t.c.rank_search <= MAX_RANK_PARAM)\
+                      .where(t.c.indexed_status == 0)\
+                      .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
+                      .order_by(sa.desc(t.c.rank_search))\
+                      .limit(50)\
+                      .subquery('places')
+
+                touter = t.alias('outer')
+                return _select_from_placex(inner, False)\
+                    .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
+                    .where(touter.c.place_id == address_id)\
+                    .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
+                    .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+                    .limit(1)
+
+            if self.has_geometries():
+                sql = self._add_geometry_columns(_place_inside_area_query(),
+                                                 sa.literal_column('places.geometry'))
+            else:
+                sql = sa.lambda_stmt(_place_inside_area_query)
+
+            place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
+            log().var_dump('Result (place node)', place_address_row)
+
+            if place_address_row is not None:
+                return place_address_row
+
+        return address_row
+
+
+    async def _lookup_area_others(self) -> Optional[SaRow]:
+        t = self.conn.t.placex
+
+        inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
+                  .where(t.c.rank_address == 0)\
+                  .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
+                  .where(t.c.name != None)\
+                  .where(t.c.indexed_status == 0)\
+                  .where(t.c.linked_place_id == None)\
+                  .where(self._filter_by_layer(t))\
+                  .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
+                  .order_by(sa.desc(t.c.rank_search))\
+                  .order_by('distance')\
+                  .limit(50)\
+                  .subquery()
+
+        sql = _select_from_placex(inner, False)\
+                  .where(sa.or_(sa.not_(inner.c.geometry.is_area()),
+                                inner.c.geometry.ST_Contains(WKT_PARAM)))\
+                  .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+                  .limit(1)
+
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, inner.c.geometry)
+
+        row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
+        log().var_dump('Result (non-address feature)', row)
+
+        return row
+
+
+    async def lookup_area(self) -> Optional[SaRow]:
+        """ Lookup large areas for the current search.
+        """
+        log().section('Reverse lookup by larger area features')
+
+        if self.layer_enabled(DataLayer.ADDRESS):
+            address_row = await self._lookup_area_address()
+        else:
+            address_row = None
+
+        if self.has_feature_layers():
+            other_row = await self._lookup_area_others()
+        else:
+            other_row = None
+
+        return _get_closest(address_row, other_row)
+
+
+    async def lookup_country_codes(self) -> List[str]:
+        """ Lookup the country for the current search.
+        """
+        log().section('Reverse lookup by country code')
+        t = self.conn.t.country_grid
+        sql = sa.select(t.c.country_code).distinct()\
+                .where(t.c.geometry.ST_Contains(WKT_PARAM))
+
+        ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
+        log().var_dump('Country codes', ccodes)
+        return ccodes
+
+
+    async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
+        """ Lookup the country for the current search.
+        """
+        if not ccodes:
+            ccodes = await self.lookup_country_codes()
+
+        if not ccodes:
+            return None
+
+        t = self.conn.t.placex
+        if self.max_rank > 4:
+            log().comment('Search for place nodes in country')
+
+            def _base_query() -> SaSelect:
+                inner = \
+                    sa.select(t,
+                              t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
+                      .where(t.c.rank_search > 4)\
+                      .where(t.c.rank_search <= MAX_RANK_PARAM)\
+                      .where(t.c.indexed_status == 0)\
+                      .where(t.c.country_code.in_(ccodes))\
+                      .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
+                      .order_by(sa.desc(t.c.rank_search))\
+                      .limit(50)\
+                      .subquery('area')
+
+                return _select_from_placex(inner, False)\
+                    .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
+                    .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+                    .limit(1)
+
+            sql: SaLambdaSelect
+            if self.has_geometries():
+                sql = self._add_geometry_columns(_base_query(),
+                                                 sa.literal_column('area.geometry'))
+            else:
+                sql = sa.lambda_stmt(_base_query)
+
+            address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
+            log().var_dump('Result (addressable place node)', address_row)
+        else:
+            address_row = None
+
+        if address_row is None:
+            # Still nothing, then return a country with the appropriate country code.
+            def _country_base_query() -> SaSelect:
+                return _select_from_placex(t)\
+                         .where(t.c.country_code.in_(ccodes))\
+                         .where(t.c.rank_address == 4)\
+                         .where(t.c.rank_search == 4)\
+                         .where(t.c.linked_place_id == None)\
+                         .order_by('distance')\
+                         .limit(1)
+
+            if self.has_geometries():
+                sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
+            else:
+                sql = sa.lambda_stmt(_country_base_query)
+
+            address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
+
+        return address_row
+
+
+    async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
+        """ Look up a single coordinate. Returns the place information,
+            if a place was found near the coordinates or None otherwise.
+        """
+        log().function('reverse_lookup', coord=coord, params=self.params)
+
+
+        self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
+
+        row: Optional[SaRow] = None
+        row_func: RowFunc = nres.create_from_placex_row
+
+        if self.max_rank >= 26:
+            row, tmp_row_func = await self.lookup_street_poi()
+            if row is not None:
+                row_func = tmp_row_func
+
+        if row is None:
+            if self.restrict_to_country_areas:
+                ccodes = await self.lookup_country_codes()
+                if not ccodes:
+                    return None
+            else:
+                ccodes = []
+
+            if self.max_rank > 4:
+                row = await self.lookup_area()
+            if row is None and self.layer_enabled(DataLayer.ADDRESS):
+                row = await self.lookup_country(ccodes)
+
+        result = row_func(row, nres.ReverseResult)
+        if result is not None:
+            assert row is not None
+            result.distance = row.distance
+            if hasattr(row, 'bbox'):
+                result.bbox = Bbox.from_wkb(row.bbox)
+            await nres.add_result_details(self.conn, [result], self.params)
+
+        return result
--- a/src/nominatim_api/search/init.py
+++ b/src/nominatim_api/search/init.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Module for forward search.
+"""
+# pylint: disable=useless-import-alias
+
+from .geocoder import (ForwardGeocoder as ForwardGeocoder)
+from .query import (Phrase as Phrase,
+                    PhraseType as PhraseType)
+from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer)
--- a/src/nominatim_api/search/db_search_builder.py
+++ b/src/nominatim_api/search/db_search_builder.py
@@ -0,0 +1,459 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Conversion from token assignment to an abstract DB search.
+"""
+from typing import Optional, List, Tuple, Iterator, Dict
+import heapq
+
+from ..types import SearchDetails, DataLayer
+from .query import QueryStruct, Token, TokenType, TokenRange, BreakType
+from .token_assignment import TokenAssignment
+from . import db_search_fields as dbf
+from . import db_searches as dbs
+from . import db_search_lookups as lookups
+
+
+def wrap_near_search(categories: List[Tuple[str, str]],
+                     search: dbs.AbstractSearch) -> dbs.NearSearch:
+    """ Create a new search that wraps the given search in a search
+        for near places of the given category.
+    """
+    return dbs.NearSearch(penalty=search.penalty,
+                          categories=dbf.WeightedCategories(categories,
+                                                            [0.0] * len(categories)),
+                          search=search)
+
+
+def build_poi_search(category: List[Tuple[str, str]],
+                     countries: Optional[List[str]]) -> dbs.PoiSearch:
+    """ Create a new search for places by the given category, possibly
+        constraint to the given countries.
+    """
+    if countries:
+        ccs = dbf.WeightedStrings(countries, [0.0] * len(countries))
+    else:
+        ccs = dbf.WeightedStrings([], [])
+
+    class _PoiData(dbf.SearchData):
+        penalty = 0.0
+        qualifiers = dbf.WeightedCategories(category, [0.0] * len(category))
+        countries=ccs
+
+    return dbs.PoiSearch(_PoiData())
+
+
+class SearchBuilder:
+    """ Build the abstract search queries from token assignments.
+    """
+
+    def __init__(self, query: QueryStruct, details: SearchDetails) -> None:
+        self.query = query
+        self.details = details
+
+
+    @property
+    def configured_for_country(self) -> bool:
+        """ Return true if the search details are configured to
+            allow countries in the result.
+        """
+        return self.details.min_rank <= 4 and self.details.max_rank >= 4 \
+               and self.details.layer_enabled(DataLayer.ADDRESS)
+
+
+    @property
+    def configured_for_postcode(self) -> bool:
+        """ Return true if the search details are configured to
+            allow postcodes in the result.
+        """
+        return self.details.min_rank <= 5 and self.details.max_rank >= 11\
+               and self.details.layer_enabled(DataLayer.ADDRESS)
+
+
+    @property
+    def configured_for_housenumbers(self) -> bool:
+        """ Return true if the search details are configured to
+            allow addresses in the result.
+        """
+        return self.details.max_rank >= 30 \
+               and self.details.layer_enabled(DataLayer.ADDRESS)
+
+
+    def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
+        """ Yield all possible abstract searches for the given token assignment.
+        """
+        sdata = self.get_search_data(assignment)
+        if sdata is None:
+            return
+
+        near_items = self.get_near_items(assignment)
+        if near_items is not None and not near_items:
+            return # impossible compbination of near items and category parameter
+
+        if assignment.name is None:
+            if near_items and not sdata.postcodes:
+                sdata.qualifiers = near_items
+                near_items = None
+                builder = self.build_poi_search(sdata)
+            elif assignment.housenumber:
+                hnr_tokens = self.query.get_tokens(assignment.housenumber,
+                                                   TokenType.HOUSENUMBER)
+                builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
+            else:
+                builder = self.build_special_search(sdata, assignment.address,
+                                                    bool(near_items))
+        else:
+            builder = self.build_name_search(sdata, assignment.name, assignment.address,
+                                             bool(near_items))
+
+        if near_items:
+            penalty = min(near_items.penalties)
+            near_items.penalties = [p - penalty for p in near_items.penalties]
+            for search in builder:
+                search_penalty = search.penalty
+                search.penalty = 0.0
+                yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
+                                     near_items, search)
+        else:
+            for search in builder:
+                search.penalty += assignment.penalty
+                yield search
+
+
+    def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
+        """ Build abstract search query for a simple category search.
+            This kind of search requires an additional geographic constraint.
+        """
+        if not sdata.housenumbers \
+           and ((self.details.viewbox and self.details.bounded_viewbox) or self.details.near):
+            yield dbs.PoiSearch(sdata)
+
+
+    def build_special_search(self, sdata: dbf.SearchData,
+                             address: List[TokenRange],
+                             is_category: bool) -> Iterator[dbs.AbstractSearch]:
+        """ Build abstract search queries for searches that do not involve
+            a named place.
+        """
+        if sdata.qualifiers:
+            # No special searches over qualifiers supported.
+            return
+
+        if sdata.countries and not address and not sdata.postcodes \
+           and self.configured_for_country:
+            yield dbs.CountrySearch(sdata)
+
+        if sdata.postcodes and (is_category or self.configured_for_postcode):
+            penalty = 0.0 if sdata.countries else 0.1
+            if address:
+                sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
+                                                 [t.token for r in address
+                                                  for t in self.query.get_partials_list(r)],
+                                                 lookups.Restrict)]
+                penalty += 0.2
+            yield dbs.PostcodeSearch(penalty, sdata)
+
+
+    def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
+                                 address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]:
+        """ Build a simple address search for special entries where the
+            housenumber is the main name token.
+        """
+        sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
+        expected_count = sum(t.count for t in hnrs)
+
+        partials = {t.token: t.addr_count for trange in address
+                       for t in self.query.get_partials_list(trange)}
+
+        if expected_count < 8000:
+            sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
+                                                 list(partials), lookups.Restrict))
+        elif len(partials) != 1 or list(partials.values())[0] < 10000:
+            sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
+                                                 list(partials), lookups.LookupAll))
+        else:
+            addr_fulls = [t.token for t
+                          in self.query.get_tokens(address[0], TokenType.WORD)]
+            if len(addr_fulls) > 5:
+                return
+            sdata.lookups.append(
+                dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
+
+        sdata.housenumbers = dbf.WeightedStrings([], [])
+        yield dbs.PlaceSearch(0.05, sdata, expected_count)
+
+
+    def build_name_search(self, sdata: dbf.SearchData,
+                          name: TokenRange, address: List[TokenRange],
+                          is_category: bool) -> Iterator[dbs.AbstractSearch]:
+        """ Build abstract search queries for simple name or address searches.
+        """
+        if is_category or not sdata.housenumbers or self.configured_for_housenumbers:
+            ranking = self.get_name_ranking(name)
+            name_penalty = ranking.normalize_penalty()
+            if ranking.rankings:
+                sdata.rankings.append(ranking)
+            for penalty, count, lookup in self.yield_lookups(name, address):
+                sdata.lookups = lookup
+                yield dbs.PlaceSearch(penalty + name_penalty, sdata, count)
+
+
+    def yield_lookups(self, name: TokenRange, address: List[TokenRange])\
+                          -> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
+        """ Yield all variants how the given name and address should best
+            be searched for. This takes into account how frequent the terms
+            are and tries to find a lookup that optimizes index use.
+        """
+        penalty = 0.0 # extra penalty
+        name_partials = {t.token: t for t in self.query.get_partials_list(name)}
+
+        addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
+        addr_tokens = list({t.token for t in addr_partials})
+
+        partials_indexed = all(t.is_indexed for t in name_partials.values()) \
+                           and all(t.is_indexed for t in addr_partials)
+        exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
+
+        if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
+            yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
+            return
+
+        addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000
+        # Partial term to frequent. Try looking up by rare full names first.
+        name_fulls = self.query.get_tokens(name, TokenType.WORD)
+        if name_fulls:
+            fulls_count = sum(t.count for t in name_fulls)
+            if partials_indexed:
+                penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
+
+            if fulls_count < 50000 or addr_count < 30000:
+                yield penalty,fulls_count / (2**len(addr_tokens)), \
+                    self.get_full_name_ranking(name_fulls, addr_partials,
+                                               fulls_count > 30000 / max(1, len(addr_tokens)))
+
+        # To catch remaining results, lookup by name and address
+        # We only do this if there is a reasonable number of results expected.
+        exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
+        if exp_count < 10000 and addr_count < 20000\
+           and all(t.is_indexed for t in name_partials.values()):
+            penalty += 0.35 * max(1 if name_fulls else 0.1,
+                                  5 - len(name_partials) - len(addr_tokens))
+            yield penalty, exp_count,\
+                  self.get_name_address_ranking(list(name_partials.keys()), addr_partials)
+
+
+    def get_name_address_ranking(self, name_tokens: List[int],
+                                 addr_partials: List[Token]) -> List[dbf.FieldLookup]:
+        """ Create a ranking expression looking up by name and address.
+        """
+        lookup = [dbf.FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
+
+        addr_restrict_tokens = []
+        addr_lookup_tokens = []
+        for t in addr_partials:
+            if t.is_indexed:
+                if t.addr_count > 20000:
+                    addr_restrict_tokens.append(t.token)
+                else:
+                    addr_lookup_tokens.append(t.token)
+
+        if addr_restrict_tokens:
+            lookup.append(dbf.FieldLookup('nameaddress_vector',
+                                          addr_restrict_tokens, lookups.Restrict))
+        if addr_lookup_tokens:
+            lookup.append(dbf.FieldLookup('nameaddress_vector',
+                                          addr_lookup_tokens, lookups.LookupAll))
+
+        return lookup
+
+
+    def get_full_name_ranking(self, name_fulls: List[Token], addr_partials: List[Token],
+                              use_lookup: bool) -> List[dbf.FieldLookup]:
+        """ Create a ranking expression with full name terms and
+            additional address lookup. When 'use_lookup' is true, then
+            address lookups will use the index, when the occurences are not
+            too many.
+        """
+        # At this point drop unindexed partials from the address.
+        # This might yield wrong results, nothing we can do about that.
+        if use_lookup:
+            addr_restrict_tokens = []
+            addr_lookup_tokens = []
+            for t in addr_partials:
+                if t.is_indexed:
+                    if t.addr_count > 20000:
+                        addr_restrict_tokens.append(t.token)
+                    else:
+                        addr_lookup_tokens.append(t.token)
+        else:
+            addr_restrict_tokens = [t.token for t in addr_partials if t.is_indexed]
+            addr_lookup_tokens = []
+
+        return dbf.lookup_by_any_name([t.token for t in name_fulls],
+                                      addr_restrict_tokens, addr_lookup_tokens)
+
+
+    def get_name_ranking(self, trange: TokenRange,
+                         db_field: str = 'name_vector') -> dbf.FieldRanking:
+        """ Create a ranking expression for a name term in the given range.
+        """
+        name_fulls = self.query.get_tokens(trange, TokenType.WORD)
+        ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
+        ranks.sort(key=lambda r: r.penalty)
+        # Fallback, sum of penalty for partials
+        name_partials = self.query.get_partials_list(trange)
+        default = sum(t.penalty for t in name_partials) + 0.2
+        return dbf.FieldRanking(db_field, default, ranks)
+
+
+    def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
+        """ Create a list of ranking expressions for an address term
+            for the given ranges.
+        """
+        todo: List[Tuple[int, int, dbf.RankedTokens]] = []
+        heapq.heappush(todo, (0, trange.start, dbf.RankedTokens(0.0, [])))
+        ranks: List[dbf.RankedTokens] = []
+
+        while todo: # pylint: disable=too-many-nested-blocks
+            neglen, pos, rank = heapq.heappop(todo)
+            for tlist in self.query.nodes[pos].starting:
+                if tlist.ttype in (TokenType.PARTIAL, TokenType.WORD):
+                    if tlist.end < trange.end:
+                        chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
+                        if tlist.ttype == TokenType.PARTIAL:
+                            penalty = rank.penalty + chgpenalty \
+                                      + max(t.penalty for t in tlist.tokens)
+                            heapq.heappush(todo, (neglen - 1, tlist.end,
+                                                  dbf.RankedTokens(penalty, rank.tokens)))
+                        else:
+                            for t in tlist.tokens:
+                                heapq.heappush(todo, (neglen - 1, tlist.end,
+                                                      rank.with_token(t, chgpenalty)))
+                    elif tlist.end == trange.end:
+                        if tlist.ttype == TokenType.PARTIAL:
+                            ranks.append(dbf.RankedTokens(rank.penalty
+                                                          + max(t.penalty for t in tlist.tokens),
+                                                          rank.tokens))
+                        else:
+                            ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
+                        if len(ranks) >= 10:
+                            # Too many variants, bail out and only add
+                            # Worst-case Fallback: sum of penalty of partials
+                            name_partials = self.query.get_partials_list(trange)
+                            default = sum(t.penalty for t in name_partials) + 0.2
+                            ranks.append(dbf.RankedTokens(rank.penalty + default, []))
+                            # Bail out of outer loop
+                            todo.clear()
+                            break
+
+        ranks.sort(key=lambda r: len(r.tokens))
+        default = ranks[0].penalty + 0.3
+        del ranks[0]
+        ranks.sort(key=lambda r: r.penalty)
+
+        return dbf.FieldRanking('nameaddress_vector', default, ranks)
+
+
+    def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchData]:
+        """ Collect the tokens for the non-name search fields in the
+            assignment.
+        """
+        sdata = dbf.SearchData()
+        sdata.penalty = assignment.penalty
+        if assignment.country:
+            tokens = self.get_country_tokens(assignment.country)
+            if not tokens:
+                return None
+            sdata.set_strings('countries', tokens)
+        elif self.details.countries:
+            sdata.countries = dbf.WeightedStrings(self.details.countries,
+                                                  [0.0] * len(self.details.countries))
+        if assignment.housenumber:
+            sdata.set_strings('housenumbers',
+                              self.query.get_tokens(assignment.housenumber,
+                                                    TokenType.HOUSENUMBER))
+        if assignment.postcode:
+            sdata.set_strings('postcodes',
+                              self.query.get_tokens(assignment.postcode,
+                                                    TokenType.POSTCODE))
+        if assignment.qualifier:
+            tokens = self.get_qualifier_tokens(assignment.qualifier)
+            if not tokens:
+                return None
+            sdata.set_qualifiers(tokens)
+        elif self.details.categories:
+            sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
+                                                      [0.0] * len(self.details.categories))
+
+        if assignment.address:
+            if not assignment.name and assignment.housenumber:
+                # housenumber search: the first item needs to be handled like
+                # a name in ranking or penalties are not comparable with
+                # normal searches.
+                sdata.set_ranking([self.get_name_ranking(assignment.address[0],
+                                                         db_field='nameaddress_vector')]
+                                  + [self.get_addr_ranking(r) for r in assignment.address[1:]])
+            else:
+                sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
+        else:
+            sdata.rankings = []
+
+        return sdata
+
+
+    def get_country_tokens(self, trange: TokenRange) -> List[Token]:
+        """ Return the list of country tokens for the given range,
+            optionally filtered by the country list from the details
+            parameters.
+        """
+        tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
+        if self.details.countries:
+            tokens = [t for t in tokens if t.lookup_word in self.details.countries]
+
+        return tokens
+
+
+    def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
+        """ Return the list of qualifier tokens for the given range,
+            optionally filtered by the qualifier list from the details
+            parameters.
+        """
+        tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
+        if self.details.categories:
+            tokens = [t for t in tokens if t.get_category() in self.details.categories]
+
+        return tokens
+
+
+    def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
+        """ Collect tokens for near items search or use the categories
+            requested per parameter.
+            Returns None if no category search is requested.
+        """
+        if assignment.near_item:
+            tokens: Dict[Tuple[str, str], float] = {}
+            for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
+                cat = t.get_category()
+                # The category of a near search will be that of near_item.
+                # Thus, if search is restricted to a category parameter,
+                # the two sets must intersect.
+                if (not self.details.categories or cat in self.details.categories)\
+                   and t.penalty < tokens.get(cat, 1000.0):
+                    tokens[cat] = t.penalty
+            return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
+
+        return None
+
+
+PENALTY_WORDCHANGE = {
+    BreakType.START: 0.0,
+    BreakType.END: 0.0,
+    BreakType.PHRASE: 0.0,
+    BreakType.WORD: 0.1,
+    BreakType.PART: 0.2,
+    BreakType.TOKEN: 0.4
+}
--- a/src/nominatim_api/search/db_search_fields.py
+++ b/src/nominatim_api/search/db_search_fields.py
@@ -0,0 +1,254 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Data structures for more complex fields in abstract search descriptions.
+"""
+from typing import List, Tuple, Iterator, Dict, Type
+import dataclasses
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaFromClause, SaColumn, SaExpression
+from .query import Token
+from . import db_search_lookups as lookups
+from nominatim_core.utils.json_writer import JsonWriter
+
+
+@dataclasses.dataclass
+class WeightedStrings:
+    """ A list of strings together with a penalty.
+    """
+    values: List[str]
+    penalties: List[float]
+
+    def __bool__(self) -> bool:
+        return bool(self.values)
+
+
+    def __iter__(self) -> Iterator[Tuple[str, float]]:
+        return iter(zip(self.values, self.penalties))
+
+
+    def get_penalty(self, value: str, default: float = 1000.0) -> float:
+        """ Get the penalty for the given value. Returns the given default
+            if the value does not exist.
+        """
+        try:
+            return self.penalties[self.values.index(value)]
+        except ValueError:
+            pass
+        return default
+
+
+@dataclasses.dataclass
+class WeightedCategories:
+    """ A list of class/type tuples together with a penalty.
+    """
+    values: List[Tuple[str, str]]
+    penalties: List[float]
+
+    def __bool__(self) -> bool:
+        return bool(self.values)
+
+
+    def __iter__(self) -> Iterator[Tuple[Tuple[str, str], float]]:
+        return iter(zip(self.values, self.penalties))
+
+
+    def get_penalty(self, value: Tuple[str, str], default: float = 1000.0) -> float:
+        """ Get the penalty for the given value. Returns the given default
+            if the value does not exist.
+        """
+        try:
+            return self.penalties[self.values.index(value)]
+        except ValueError:
+            pass
+        return default
+
+
+    def sql_restrict(self, table: SaFromClause) -> SaExpression:
+        """ Return an SQLAlcheny expression that restricts the
+            class and type columns of the given table to the values
+            in the list.
+            Must not be used with an empty list.
+        """
+        assert self.values
+        if len(self.values) == 1:
+            return sa.and_(table.c.class_ == self.values[0][0],
+                           table.c.type == self.values[0][1])
+
+        return sa.or_(*(sa.and_(table.c.class_ == c, table.c.type == t)
+                        for c, t in self.values))
+
+
+@dataclasses.dataclass(order=True)
+class RankedTokens:
+    """ List of tokens together with the penalty of using it.
+    """
+    penalty: float
+    tokens: List[int]
+
+    def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
+        """ Create a new RankedTokens list with the given token appended.
+            The tokens penalty as well as the given transition penalty
+            are added to the overall penalty.
+        """
+        return RankedTokens(self.penalty + t.penalty + transition_penalty,
+                            self.tokens + [t.token])
+
+
+@dataclasses.dataclass
+class FieldRanking:
+    """ A list of rankings to be applied sequentially until one matches.
+        The matched ranking determines the penalty. If none matches a
+        default penalty is applied.
+    """
+    column: str
+    default: float
+    rankings: List[RankedTokens]
+
+    def normalize_penalty(self) -> float:
+        """ Reduce the default and ranking penalties, such that the minimum
+            penalty is 0. Return the penalty that was subtracted.
+        """
+        if self.rankings:
+            min_penalty = min(self.default, min(r.penalty for r in self.rankings))
+        else:
+            min_penalty = self.default
+        if min_penalty > 0.0:
+            self.default -= min_penalty
+            for ranking in self.rankings:
+                ranking.penalty -= min_penalty
+        return min_penalty
+
+
+    def sql_penalty(self, table: SaFromClause) -> SaColumn:
+        """ Create an SQL expression for the rankings.
+        """
+        assert self.rankings
+
+        rout = JsonWriter().start_array()
+        for rank in self.rankings:
+            rout.start_array().value(rank.penalty).next()
+            rout.start_array()
+            for token in rank.tokens:
+                rout.value(token).next()
+            rout.end_array()
+            rout.end_array().next()
+        rout.end_array()
+
+        return sa.func.weigh_search(table.c[self.column], rout(), self.default)
+
+
+@dataclasses.dataclass
+class FieldLookup:
+    """ A list of tokens to be searched for. The column names the database
+        column to search in and the lookup_type the operator that is applied.
+        'lookup_all' requires all tokens to match. 'lookup_any' requires
+        one of the tokens to match. 'restrict' requires to match all tokens
+        but avoids the use of indexes.
+    """
+    column: str
+    tokens: List[int]
+    lookup_type: Type[lookups.LookupType]
+
+    def sql_condition(self, table: SaFromClause) -> SaColumn:
+        """ Create an SQL expression for the given match condition.
+        """
+        return self.lookup_type(table, self.column, self.tokens)
+
+
+class SearchData:
+    """ Search fields derived from query and token assignment
+        to be used with the SQL queries.
+    """
+    penalty: float
+
+    lookups: List[FieldLookup] = []
+    rankings: List[FieldRanking]
+
+    housenumbers: WeightedStrings = WeightedStrings([], [])
+    postcodes: WeightedStrings = WeightedStrings([], [])
+    countries: WeightedStrings = WeightedStrings([], [])
+
+    qualifiers: WeightedCategories = WeightedCategories([], [])
+
+
+    def set_strings(self, field: str, tokens: List[Token]) -> None:
+        """ Set on of the WeightedStrings properties from the given
+            token list. Adapt the global penalty, so that the
+            minimum penalty is 0.
+        """
+        if tokens:
+            min_penalty = min(t.penalty for t in tokens)
+            self.penalty += min_penalty
+            wstrs = WeightedStrings([t.lookup_word for t in tokens],
+                                    [t.penalty - min_penalty for t in tokens])
+
+            setattr(self, field, wstrs)
+
+
+    def set_qualifiers(self, tokens: List[Token]) -> None:
+        """ Set the qulaifier field from the given tokens.
+        """
+        if tokens:
+            categories: Dict[Tuple[str, str], float] = {}
+            min_penalty = 1000.0
+            for t in tokens:
+                min_penalty = min(min_penalty, t.penalty)
+                cat = t.get_category()
+                if t.penalty < categories.get(cat, 1000.0):
+                    categories[cat] = t.penalty
+            self.penalty += min_penalty
+            self.qualifiers = WeightedCategories(list(categories.keys()),
+                                                 list(categories.values()))
+
+
+    def set_ranking(self, rankings: List[FieldRanking]) -> None:
+        """ Set the list of rankings and normalize the ranking.
+        """
+        self.rankings = []
+        for ranking in rankings:
+            if ranking.rankings:
+                self.penalty += ranking.normalize_penalty()
+                self.rankings.append(ranking)
+            else:
+                self.penalty += ranking.default
+
+
+def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
+    """ Create a lookup list where name tokens are looked up via index
+        and potential address tokens are used to restrict the search further.
+    """
+    lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
+    if addr_tokens:
+        lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
+
+    return lookup
+
+
+def lookup_by_any_name(name_tokens: List[int], addr_restrict_tokens: List[int],
+                       addr_lookup_tokens: List[int]) -> List[FieldLookup]:
+    """ Create a lookup list where name tokens are looked up via index
+        and only one of the name tokens must be present.
+        Potential address tokens are used to restrict the search further.
+    """
+    lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
+    if addr_restrict_tokens:
+        lookup.append(FieldLookup('nameaddress_vector', addr_restrict_tokens, lookups.Restrict))
+    if addr_lookup_tokens:
+        lookup.append(FieldLookup('nameaddress_vector', addr_lookup_tokens, lookups.LookupAll))
+
+    return lookup
+
+
+def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
+    """ Create a lookup list where address tokens are looked up via index
+        and the name tokens are only used to restrict the search further.
+    """
+    return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
+            FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
--- a/src/nominatim_api/search/db_search_lookups.py
+++ b/src/nominatim_api/search/db_search_lookups.py
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of lookup functions for the search_name table.
+"""
+from typing import List, Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+
+from nominatim_core.typing import SaFromClause
+from nominatim_core.db.sqlalchemy_types import IntArray
+
+# pylint: disable=consider-using-f-string
+
+LookupType = sa.sql.expression.FunctionElement[Any]
+
+class LookupAll(LookupType):
+    """ Find all entries in search_name table that contain all of
+        a given list of tokens using an index for the search.
+    """
+    inherit_cache = True
+
+    def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
+        super().__init__(table.c.place_id, getattr(table.c, column), column,
+                         sa.type_coerce(tokens, IntArray))
+
+
+@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
+def _default_lookup_all(element: LookupAll,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    _, col, _, tokens = list(element.clauses)
+    return "(%s @> %s)" % (compiler.process(col, **kw),
+                           compiler.process(tokens, **kw))
+
+
+@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_lookup_all(element: LookupAll,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    place, col, colname, tokens = list(element.clauses)
+    return "(%s IN (SELECT CAST(value as bigint) FROM"\
+           " (SELECT array_intersect_fuzzy(places) as p FROM"\
+           "   (SELECT places FROM reverse_search_name"\
+           "   WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
+           "     AND column = %s"\
+           "   ORDER BY length(places)) as x) as u,"\
+           " json_each('[' || u.p || ']'))"\
+           " AND array_contains(%s, %s))"\
+             % (compiler.process(place, **kw),
+                compiler.process(tokens, **kw),
+                compiler.process(colname, **kw),
+                compiler.process(col, **kw),
+                compiler.process(tokens, **kw)
+                )
+
+
+
+class LookupAny(LookupType):
+    """ Find all entries that contain at least one of the given tokens.
+        Use an index for the search.
+    """
+    inherit_cache = True
+
+    def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
+        super().__init__(table.c.place_id, getattr(table.c, column), column,
+                         sa.type_coerce(tokens, IntArray))
+
+@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
+def _default_lookup_any(element: LookupAny,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    _, col, _, tokens = list(element.clauses)
+    return "(%s && %s)" % (compiler.process(col, **kw),
+                           compiler.process(tokens, **kw))
+
+@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_lookup_any(element: LookupAny,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    place, _, colname, tokens = list(element.clauses)
+    return "%s IN (SELECT CAST(value as bigint) FROM"\
+           " (SELECT array_union(places) as p FROM reverse_search_name"\
+           "   WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
+           "     AND column = %s) as u,"\
+           " json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
+                                               compiler.process(tokens, **kw),
+                                               compiler.process(colname, **kw))
+
+
+
+class Restrict(LookupType):
+    """ Find all entries that contain all of the given tokens.
+        Do not use an index for the search.
+    """
+    inherit_cache = True
+
+    def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
+        super().__init__(getattr(table.c, column),
+                         sa.type_coerce(tokens, IntArray))
+
+
+@compiles(Restrict) # type: ignore[no-untyped-call, misc]
+def _default_restrict(element: Restrict,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
+                                           compiler.process(arg2, **kw))
+
+@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_restrict(element: Restrict,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "array_contains(%s)" % compiler.process(element.clauses, **kw)
--- a/src/nominatim_api/search/db_searches.py
+++ b/src/nominatim_api/search/db_searches.py
@@ -0,0 +1,874 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the actual database accesses for forward search.
+"""
+from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
+import abc
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaFromClause, SaScalarSelect, SaColumn, \
+                                  SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
+from nominatim_core.db.sqlalchemy_types import Geometry, IntArray
+from ..connection import SearchConnection
+from ..types import SearchDetails, DataLayer, GeometryFormat, Bbox
+from .. import results as nres
+from .db_search_fields import SearchData, WeightedCategories
+
+#pylint: disable=singleton-comparison,not-callable
+#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
+
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
+def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
+    """ Create a dictionary from search parameters that can be used
+        as bind parameter for SQL execute.
+    """
+    return {'limit': details.max_results,
+            'min_rank': details.min_rank,
+            'max_rank': details.max_rank,
+            'viewbox': details.viewbox,
+            'viewbox2': details.viewbox_x2,
+            'near': details.near,
+            'near_radius': details.near_radius,
+            'excluded': details.excluded,
+            'countries': details.countries}
+
+
+LIMIT_PARAM: SaBind = sa.bindparam('limit')
+MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
+MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
+VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
+VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
+NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
+NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
+COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
+
+
+def filter_by_area(sql: SaSelect, t: SaFromClause,
+                   details: SearchDetails, avoid_index: bool = False) -> SaSelect:
+    """ Apply SQL statements for filtering by viewbox and near point,
+        if applicable.
+    """
+    if details.near is not None and details.near_radius is not None:
+        if details.near_radius < 0.1 and not avoid_index:
+            sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
+        else:
+            sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
+    if details.viewbox is not None and details.bounded_viewbox:
+        sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
+                                                use_index=not avoid_index and
+                                                          details.viewbox.area < 0.2))
+
+    return sql
+
+
+def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
+    return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
+
+
+def _select_placex(t: SaFromClause) -> SaSelect:
+    return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                     t.c.class_, t.c.type,
+                     t.c.address, t.c.extratags,
+                     t.c.housenumber, t.c.postcode, t.c.country_code,
+                     t.c.wikipedia,
+                     t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                     t.c.linked_place_id, t.c.admin_level,
+                     t.c.centroid,
+                     t.c.geometry.ST_Expand(0).label('bbox'))
+
+
+def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
+    out = []
+
+    if details.geometry_simplification > 0.0:
+        col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
+
+    if details.geometry_output & GeometryFormat.GEOJSON:
+        out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
+    if details.geometry_output & GeometryFormat.TEXT:
+        out.append(sa.func.ST_AsText(col).label('geometry_text'))
+    if details.geometry_output & GeometryFormat.KML:
+        out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
+    if details.geometry_output & GeometryFormat.SVG:
+        out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
+
+    return sql.add_columns(*out)
+
+
+def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
+                                 numerals: List[int], details: SearchDetails) -> SaScalarSelect:
+    all_ids = sa.func.ArrayAgg(table.c.place_id)
+    sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
+
+    if len(numerals) == 1:
+        sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
+                 .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
+    else:
+        sql = sql.where(sa.or_(
+                *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
+                          (n - table.c.startnumber) % table.c.step == 0)
+                  for n in numerals)))
+
+    if details.excluded:
+        sql = sql.where(_exclude_places(table))
+
+    return sql.scalar_subquery()
+
+
+def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
+    orexpr: List[SaExpression] = []
+    if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
+        orexpr.append(no_index(table.c.rank_address).between(1, 30))
+    elif layers & DataLayer.ADDRESS:
+        orexpr.append(no_index(table.c.rank_address).between(1, 29))
+        orexpr.append(sa.func.IsAddressPoint(table))
+    elif layers & DataLayer.POI:
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
+                              table.c.class_.not_in(('place', 'building'))))
+
+    if layers & DataLayer.MANMADE:
+        exclude = []
+        if not layers & DataLayer.RAILWAY:
+            exclude.append('railway')
+        if not layers & DataLayer.NATURAL:
+            exclude.extend(('natural', 'water', 'waterway'))
+        orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
+                              no_index(table.c.rank_address) == 0))
+    else:
+        include = []
+        if layers & DataLayer.RAILWAY:
+            include.append('railway')
+        if layers & DataLayer.NATURAL:
+            include.extend(('natural', 'water', 'waterway'))
+        orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
+                              no_index(table.c.rank_address) == 0))
+
+    if len(orexpr) == 1:
+        return orexpr[0]
+
+    return sa.or_(*orexpr)
+
+
+def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
+    pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
+    return sa.case(
+            (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
+            else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
+
+
+async def _get_placex_housenumbers(conn: SearchConnection,
+                                   place_ids: List[int],
+                                   details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
+    t = conn.t.placex
+    sql = _select_placex(t).add_columns(t.c.importance)\
+                           .where(t.c.place_id.in_(place_ids))
+
+    if details.geometry_output:
+        sql = _add_geometry_columns(sql, t.c.geometry, details)
+
+    for row in await conn.execute(sql):
+        result = nres.create_from_placex_row(row, nres.SearchResult)
+        assert result
+        result.bbox = Bbox.from_wkb(row.bbox)
+        yield result
+
+
+def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
+    """ Create a subselect that returns the given list of integers
+        as rows in the column 'nr'.
+    """
+    vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
+               .table_valued(sa.column('value', type_=sa.JSON))
+    return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
+
+
+async def _get_osmline(conn: SearchConnection, place_ids: List[int],
+                       numerals: List[int],
+                       details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
+    t = conn.t.osmline
+
+    values = _int_list_to_subquery(numerals)
+    sql = sa.select(t.c.place_id, t.c.osm_id,
+                    t.c.parent_place_id, t.c.address,
+                    values.c.nr.label('housenumber'),
+                    _interpolated_position(t, values.c.nr),
+                    t.c.postcode, t.c.country_code)\
+            .where(t.c.place_id.in_(place_ids))\
+            .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
+
+    if details.geometry_output:
+        sub = sql.subquery()
+        sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
+
+    for row in await conn.execute(sql):
+        result = nres.create_from_osmline_row(row, nres.SearchResult)
+        assert result
+        yield result
+
+
+async def _get_tiger(conn: SearchConnection, place_ids: List[int],
+                     numerals: List[int], osm_id: int,
+                     details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
+    t = conn.t.tiger
+    values = _int_list_to_subquery(numerals)
+    sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                    sa.literal('W').label('osm_type'),
+                    sa.literal(osm_id).label('osm_id'),
+                    values.c.nr.label('housenumber'),
+                    _interpolated_position(t, values.c.nr),
+                    t.c.postcode)\
+            .where(t.c.place_id.in_(place_ids))\
+            .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
+
+    if details.geometry_output:
+        sub = sql.subquery()
+        sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
+
+    for row in await conn.execute(sql):
+        result = nres.create_from_tiger_row(row, nres.SearchResult)
+        assert result
+        yield result
+
+
+class AbstractSearch(abc.ABC):
+    """ Encapuslation of a single lookup in the database.
+    """
+    SEARCH_PRIO: int = 2
+
+    def __init__(self, penalty: float) -> None:
+        self.penalty = penalty
+
+    @abc.abstractmethod
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+
+
+class NearSearch(AbstractSearch):
+    """ Category search of a place type near the result of another search.
+    """
+    def __init__(self, penalty: float, categories: WeightedCategories,
+                 search: AbstractSearch) -> None:
+        super().__init__(penalty)
+        self.search = search
+        self.categories = categories
+
+
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+        results = nres.SearchResults()
+        base = await self.search.lookup(conn, details)
+
+        if not base:
+            return results
+
+        base.sort(key=lambda r: (r.accuracy, r.rank_search))
+        max_accuracy = base[0].accuracy + 0.5
+        if base[0].rank_address == 0:
+            min_rank = 0
+            max_rank = 0
+        elif base[0].rank_address < 26:
+            min_rank = 1
+            max_rank = min(25, base[0].rank_address + 4)
+        else:
+            min_rank = 26
+            max_rank = 30
+        base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
+                                                     and r.accuracy <= max_accuracy
+                                                     and r.bbox and r.bbox.area < 20
+                                                     and r.rank_address >= min_rank
+                                                     and r.rank_address <= max_rank)
+
+        if base:
+            baseids = [b.place_id for b in base[:5] if b.place_id]
+
+            for category, penalty in self.categories:
+                await self.lookup_category(results, conn, baseids, category, penalty, details)
+                if len(results) >= details.max_results:
+                    break
+
+        return results
+
+
+    async def lookup_category(self, results: nres.SearchResults,
+                              conn: SearchConnection, ids: List[int],
+                              category: Tuple[str, str], penalty: float,
+                              details: SearchDetails) -> None:
+        """ Find places of the given category near the list of
+            place ids and add the results to 'results'.
+        """
+        table = await conn.get_class_table(*category)
+
+        tgeom = conn.t.placex.alias('pgeom')
+
+        if table is None:
+            # No classtype table available, do a simplified lookup in placex.
+            table = conn.t.placex
+            sql = sa.select(table.c.place_id,
+                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                              .label('dist'))\
+                    .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
+                    .where(table.c.class_ == category[0])\
+                    .where(table.c.type == category[1])
+        else:
+            # Use classtype table. We can afford to use a larger
+            # radius for the lookup.
+            sql = sa.select(table.c.place_id,
+                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                              .label('dist'))\
+                    .join(tgeom,
+                          table.c.centroid.ST_CoveredBy(
+                              sa.case((sa.and_(tgeom.c.rank_address > 9,
+                                                tgeom.c.geometry.is_area()),
+                                       tgeom.c.geometry),
+                                      else_ = tgeom.c.centroid.ST_Expand(0.05))))
+
+        inner = sql.where(tgeom.c.place_id.in_(ids))\
+                   .group_by(table.c.place_id).subquery()
+
+        t = conn.t.placex
+        sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
+                               .join(inner, inner.c.place_id == t.c.place_id)\
+                               .order_by(inner.c.dist)
+
+        sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
+        if details.countries:
+            sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
+        if details.excluded:
+            sql = sql.where(_exclude_places(t))
+        if details.layers is not None:
+            sql = sql.where(_filter_by_layer(t, details.layers))
+
+        sql = sql.limit(LIMIT_PARAM)
+        for row in await conn.execute(sql, _details_to_bind_params(details)):
+            result = nres.create_from_placex_row(row, nres.SearchResult)
+            assert result
+            result.accuracy = self.penalty + penalty
+            result.bbox = Bbox.from_wkb(row.bbox)
+            results.append(result)
+
+
+
+class PoiSearch(AbstractSearch):
+    """ Category search in a geographic area.
+    """
+    def __init__(self, sdata: SearchData) -> None:
+        super().__init__(sdata.penalty)
+        self.qualifiers = sdata.qualifiers
+        self.countries = sdata.countries
+
+
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+        bind_params = _details_to_bind_params(details)
+        t = conn.t.placex
+
+        rows: List[SaRow] = []
+
+        if details.near and details.near_radius is not None and details.near_radius < 0.2:
+            # simply search in placex table
+            def _base_query() -> SaSelect:
+                return _select_placex(t) \
+                           .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
+                                         .label('importance'))\
+                           .where(t.c.linked_place_id == None) \
+                           .where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
+                           .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
+                           .limit(LIMIT_PARAM)
+
+            classtype = self.qualifiers.values
+            if len(classtype) == 1:
+                cclass, ctype = classtype[0]
+                sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
+                                                 .where(t.c.class_ == cclass)
+                                                 .where(t.c.type == ctype))
+            else:
+                sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
+                                                   for cls, typ in classtype)))
+
+            if self.countries:
+                sql = sql.where(t.c.country_code.in_(self.countries.values))
+
+            if details.viewbox is not None and details.bounded_viewbox:
+                sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
+
+            rows.extend(await conn.execute(sql, bind_params))
+        else:
+            # use the class type tables
+            for category in self.qualifiers.values:
+                table = await conn.get_class_table(*category)
+                if table is not None:
+                    sql = _select_placex(t)\
+                               .add_columns(t.c.importance)\
+                               .join(table, t.c.place_id == table.c.place_id)\
+                               .where(t.c.class_ == category[0])\
+                               .where(t.c.type == category[1])
+
+                    if details.viewbox is not None and details.bounded_viewbox:
+                        sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
+
+                    if details.near and details.near_radius is not None:
+                        sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
+                                 .where(table.c.centroid.within_distance(NEAR_PARAM,
+                                                                         NEAR_RADIUS_PARAM))
+
+                    if self.countries:
+                        sql = sql.where(t.c.country_code.in_(self.countries.values))
+
+                    sql = sql.limit(LIMIT_PARAM)
+                    rows.extend(await conn.execute(sql, bind_params))
+
+        results = nres.SearchResults()
+        for row in rows:
+            result = nres.create_from_placex_row(row, nres.SearchResult)
+            assert result
+            result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
+            result.bbox = Bbox.from_wkb(row.bbox)
+            results.append(result)
+
+        return results
+
+
+class CountrySearch(AbstractSearch):
+    """ Search for a country name or country code.
+    """
+    SEARCH_PRIO = 0
+
+    def __init__(self, sdata: SearchData) -> None:
+        super().__init__(sdata.penalty)
+        self.countries = sdata.countries
+
+
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+        t = conn.t.placex
+
+        ccodes = self.countries.values
+        sql = _select_placex(t)\
+                .add_columns(t.c.importance)\
+                .where(t.c.country_code.in_(ccodes))\
+                .where(t.c.rank_address == 4)
+
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
+
+        if details.excluded:
+            sql = sql.where(_exclude_places(t))
+
+        sql = filter_by_area(sql, t, details)
+
+        results = nres.SearchResults()
+        for row in await conn.execute(sql, _details_to_bind_params(details)):
+            result = nres.create_from_placex_row(row, nres.SearchResult)
+            assert result
+            result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+            result.bbox = Bbox.from_wkb(row.bbox)
+            results.append(result)
+
+        if not results:
+            results = await self.lookup_in_country_table(conn, details)
+
+        if results:
+            details.min_rank = min(5, details.max_rank)
+            details.max_rank = min(25, details.max_rank)
+
+        return results
+
+
+    async def lookup_in_country_table(self, conn: SearchConnection,
+                                      details: SearchDetails) -> nres.SearchResults:
+        """ Look up the country in the fallback country tables.
+        """
+        # Avoid the fallback search when this is a more search. Country results
+        # usually are in the first batch of results and it is not possible
+        # to exclude these fallbacks.
+        if details.excluded:
+            return nres.SearchResults()
+
+        t = conn.t.country_name
+        tgrid = conn.t.country_grid
+
+        sql = sa.select(tgrid.c.country_code,
+                        tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
+                              .label('centroid'),
+                        tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
+                .where(tgrid.c.country_code.in_(self.countries.values))\
+                .group_by(tgrid.c.country_code)
+
+        sql = filter_by_area(sql, tgrid, details, avoid_index=True)
+
+        sub = sql.subquery('grid')
+
+        sql = sa.select(t.c.country_code,
+                        t.c.name.merge(t.c.derived_name).label('name'),
+                        sub.c.centroid, sub.c.bbox)\
+                .join(sub, t.c.country_code == sub.c.country_code)
+
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, sub.c.centroid, details)
+
+        results = nres.SearchResults()
+        for row in await conn.execute(sql, _details_to_bind_params(details)):
+            result = nres.create_from_country_row(row, nres.SearchResult)
+            assert result
+            result.bbox = Bbox.from_wkb(row.bbox)
+            result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+            results.append(result)
+
+        return results
+
+
+
+class PostcodeSearch(AbstractSearch):
+    """ Search for a postcode.
+    """
+    def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
+        super().__init__(sdata.penalty + extra_penalty)
+        self.countries = sdata.countries
+        self.postcodes = sdata.postcodes
+        self.lookups = sdata.lookups
+        self.rankings = sdata.rankings
+
+
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+        t = conn.t.postcode
+        pcs = self.postcodes.values
+
+        sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                        t.c.rank_search, t.c.rank_address,
+                        t.c.postcode, t.c.country_code,
+                        t.c.geometry.label('centroid'))\
+                .where(t.c.postcode.in_(pcs))
+
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
+
+        penalty: SaExpression = sa.literal(self.penalty)
+
+        if details.viewbox is not None and not details.bounded_viewbox:
+            penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
+                               (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                               else_=1.0)
+
+        if details.near is not None:
+            sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
+
+        sql = filter_by_area(sql, t, details)
+
+        if self.countries:
+            sql = sql.where(t.c.country_code.in_(self.countries.values))
+
+        if details.excluded:
+            sql = sql.where(_exclude_places(t))
+
+        if self.lookups:
+            assert len(self.lookups) == 1
+            tsearch = conn.t.search_name
+            sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
+                     .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
+                                     .contains(sa.type_coerce(self.lookups[0].tokens,
+                                                              IntArray)))
+
+        for ranking in self.rankings:
+            penalty += ranking.sql_penalty(conn.t.search_name)
+        penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
+                       else_=1.0)
+
+
+        sql = sql.add_columns(penalty.label('accuracy'))
+        sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
+
+        results = nres.SearchResults()
+        for row in await conn.execute(sql, _details_to_bind_params(details)):
+            p = conn.t.placex
+            placex_sql = _select_placex(p).add_columns(p.c.importance)\
+                             .where(sa.text("""class = 'boundary'
+                                               AND type = 'postal_code'
+                                               AND osm_type = 'R'"""))\
+                             .where(p.c.country_code == row.country_code)\
+                             .where(p.c.postcode == row.postcode)\
+                             .limit(1)
+
+            if details.geometry_output:
+                placex_sql = _add_geometry_columns(placex_sql, p.c.geometry, details)
+
+            for prow in await conn.execute(placex_sql, _details_to_bind_params(details)):
+                result = nres.create_from_placex_row(prow, nres.SearchResult)
+                break
+            else:
+                result = nres.create_from_postcode_row(row, nres.SearchResult)
+
+            assert result
+            if result.place_id not in details.excluded:
+                result.accuracy = row.accuracy
+                results.append(result)
+
+        return results
+
+
+
+class PlaceSearch(AbstractSearch):
+    """ Generic search for an address or named place.
+    """
+    SEARCH_PRIO = 1
+
+    def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
+        super().__init__(sdata.penalty + extra_penalty)
+        self.countries = sdata.countries
+        self.postcodes = sdata.postcodes
+        self.housenumbers = sdata.housenumbers
+        self.qualifiers = sdata.qualifiers
+        self.lookups = sdata.lookups
+        self.rankings = sdata.rankings
+        self.expected_count = expected_count
+
+
+    def _inner_search_name_cte(self, conn: SearchConnection,
+                               details: SearchDetails) -> 'sa.CTE':
+        """ Create a subquery that preselects the rows in the search_name
+            table.
+        """
+        t = conn.t.search_name
+
+        penalty: SaExpression = sa.literal(self.penalty)
+        for ranking in self.rankings:
+            penalty += ranking.sql_penalty(t)
+
+        sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
+                        t.c.country_code, t.c.centroid,
+                        t.c.name_vector, t.c.nameaddress_vector,
+                        sa.case((t.c.importance > 0, t.c.importance),
+                                else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
+                          .label('importance'),
+                        penalty.label('penalty'))
+
+        for lookup in self.lookups:
+            sql = sql.where(lookup.sql_condition(t))
+
+        if self.countries:
+            sql = sql.where(t.c.country_code.in_(self.countries.values))
+
+        if self.postcodes:
+            # if a postcode is given, don't search for state or country level objects
+            sql = sql.where(t.c.address_rank > 9)
+            if self.expected_count > 10000:
+                # Many results expected. Restrict by postcode.
+                tpc = conn.t.postcode
+                sql = sql.where(sa.select(tpc.c.postcode)
+                                  .where(tpc.c.postcode.in_(self.postcodes.values))
+                                  .where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
+                                  .exists())
+
+        if details.viewbox is not None:
+            if details.bounded_viewbox:
+                sql = sql.where(t.c.centroid
+                                   .intersects(VIEWBOX_PARAM,
+                                               use_index=details.viewbox.area < 0.2))
+            elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
+                sql = sql.where(t.c.centroid
+                                   .intersects(VIEWBOX2_PARAM,
+                                               use_index=details.viewbox.area < 0.5))
+
+        if details.near is not None and details.near_radius is not None:
+            if details.near_radius < 0.1:
+                sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
+                                                             NEAR_RADIUS_PARAM))
+            else:
+                sql = sql.where(t.c.centroid
+                                   .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
+
+        if self.housenumbers:
+            sql = sql.where(t.c.address_rank.between(16, 30))
+        else:
+            if details.excluded:
+                sql = sql.where(_exclude_places(t))
+            if details.min_rank > 0:
+                sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
+                                       t.c.search_rank >= MIN_RANK_PARAM))
+            if details.max_rank < 30:
+                sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
+                                       t.c.search_rank <= MAX_RANK_PARAM))
+
+        inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
+
+        sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
+                        inner.c.country_code, inner.c.centroid, inner.c.importance,
+                        inner.c.penalty)
+
+        # If the query is not an address search or has a geographic preference,
+        # preselect most important items to restrict the number of places
+        # that need to be looked up in placex.
+        if not self.housenumbers\
+           and (details.viewbox is None or details.bounded_viewbox)\
+           and (details.near is None or details.near_radius is not None)\
+           and not self.qualifiers:
+            sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
+                                       .over(order_by=inner.c.penalty - inner.c.importance)
+                                       .label('min_penalty'))
+
+            inner = sql.subquery()
+
+            sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
+                            inner.c.country_code, inner.c.centroid, inner.c.importance,
+                            inner.c.penalty)\
+                    .where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
+
+        return sql.cte('searches')
+
+
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+        t = conn.t.placex
+        tsearch = self._inner_search_name_cte(conn, details)
+
+        sql = _select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
+
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
+
+        penalty: SaExpression = tsearch.c.penalty
+
+        if self.postcodes:
+            tpc = conn.t.postcode
+            pcs = self.postcodes.values
+
+            pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\
+                      .where(tpc.c.postcode.in_(pcs))\
+                      .scalar_subquery()
+            penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
+                               else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
+
+        if details.viewbox is not None and not details.bounded_viewbox:
+            penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
+                               (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
+                               else_=1.0)
+
+        if details.near is not None:
+            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
+                                      .label('importance'))
+            sql = sql.order_by(sa.desc(sa.text('importance')))
+        else:
+            sql = sql.order_by(penalty - tsearch.c.importance)
+            sql = sql.add_columns(tsearch.c.importance)
+
+
+        sql = sql.add_columns(penalty.label('accuracy'))\
+                 .order_by(sa.text('accuracy'))
+
+        if self.housenumbers:
+            hnr_list = '|'.join(self.housenumbers.values)
+            inner = sql.where(sa.or_(tsearch.c.address_rank < 30,
+                                     sa.func.RegexpWord(hnr_list, t.c.housenumber)))\
+                       .subquery()
+
+            # Housenumbers from placex
+            thnr = conn.t.placex.alias('hnr')
+            pid_list = sa.func.ArrayAgg(thnr.c.place_id)
+            place_sql = sa.select(pid_list)\
+                          .where(thnr.c.parent_place_id == inner.c.place_id)\
+                          .where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
+                          .where(thnr.c.linked_place_id == None)\
+                          .where(thnr.c.indexed_status == 0)
+
+            if details.excluded:
+                place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
+            if self.qualifiers:
+                place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
+
+            numerals = [int(n) for n in self.housenumbers.values
+                        if n.isdigit() and len(n) < 8]
+            interpol_sql: SaColumn
+            tiger_sql: SaColumn
+            if numerals and \
+               (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
+                # Housenumbers from interpolations
+                interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
+                                                            numerals, details)
+                # Housenumbers from Tiger
+                tiger_sql = sa.case((inner.c.country_code == 'us',
+                                     _make_interpolation_subquery(conn.t.tiger, inner,
+                                                                  numerals, details)
+                                    ), else_=None)
+            else:
+                interpol_sql = sa.null()
+                tiger_sql = sa.null()
+
+            unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
+                               interpol_sql.label('interpol_hnr'),
+                               tiger_sql.label('tiger_hnr')).subquery('unsort')
+            sql = sa.select(unsort)\
+                    .order_by(sa.case((unsort.c.placex_hnr != None, 1),
+                                      (unsort.c.interpol_hnr != None, 2),
+                                      (unsort.c.tiger_hnr != None, 3),
+                                      else_=4),
+                              unsort.c.accuracy)
+        else:
+            sql = sql.where(t.c.linked_place_id == None)\
+                     .where(t.c.indexed_status == 0)
+            if self.qualifiers:
+                sql = sql.where(self.qualifiers.sql_restrict(t))
+            if details.layers is not None:
+                sql = sql.where(_filter_by_layer(t, details.layers))
+
+        sql = sql.limit(LIMIT_PARAM)
+
+        results = nres.SearchResults()
+        for row in await conn.execute(sql, _details_to_bind_params(details)):
+            result = nres.create_from_placex_row(row, nres.SearchResult)
+            assert result
+            result.bbox = Bbox.from_wkb(row.bbox)
+            result.accuracy = row.accuracy
+            if self.housenumbers and row.rank_address < 30:
+                if row.placex_hnr:
+                    subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
+                elif row.interpol_hnr:
+                    subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
+                elif row.tiger_hnr:
+                    subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
+                else:
+                    subs = None
+
+                if subs is not None:
+                    async for sub in subs:
+                        assert sub.housenumber
+                        sub.accuracy = result.accuracy
+                        if not any(nr in self.housenumbers.values
+                                   for nr in sub.housenumber.split(';')):
+                            sub.accuracy += 0.6
+                        results.append(sub)
+
+                # Only add the street as a result, if it meets all other
+                # filter conditions.
+                if (not details.excluded or result.place_id not in details.excluded)\
+                   and (not self.qualifiers or result.category in self.qualifiers.values)\
+                   and result.rank_address >= details.min_rank:
+                    result.accuracy += 1.0 # penalty for missing housenumber
+                    results.append(result)
+            else:
+                results.append(result)
+
+        return results
--- a/src/nominatim_api/search/geocoder.py
+++ b/src/nominatim_api/search/geocoder.py
@@ -0,0 +1,274 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Public interface to the search code.
+"""
+from typing import List, Any, Optional, Iterator, Tuple, Dict
+import itertools
+import re
+import datetime as dt
+import difflib
+
+from ..connection import SearchConnection
+from ..types import SearchDetails
+from ..results import SearchResult, SearchResults, add_result_details
+from ..logging import log
+from .token_assignment import yield_token_assignments
+from .db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
+from .db_searches import AbstractSearch
+from .query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
+from .query import Phrase, QueryStruct
+
+class ForwardGeocoder:
+    """ Main class responsible for place search.
+    """
+
+    def __init__(self, conn: SearchConnection,
+                 params: SearchDetails, timeout: Optional[int]) -> None:
+        self.conn = conn
+        self.params = params
+        self.timeout = dt.timedelta(seconds=timeout or 1000000)
+        self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
+
+
+    @property
+    def limit(self) -> int:
+        """ Return the configured maximum number of search results.
+        """
+        return self.params.max_results
+
+
+    async def build_searches(self,
+                             phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
+        """ Analyse the query and return the tokenized query and list of
+            possible searches over it.
+        """
+        if self.query_analyzer is None:
+            self.query_analyzer = await make_query_analyzer(self.conn)
+
+        query = await self.query_analyzer.analyze_query(phrases)
+
+        searches: List[AbstractSearch] = []
+        if query.num_token_slots() > 0:
+            # 2. Compute all possible search interpretations
+            log().section('Compute abstract searches')
+            search_builder = SearchBuilder(query, self.params)
+            num_searches = 0
+            for assignment in yield_token_assignments(query):
+                searches.extend(search_builder.build(assignment))
+                if num_searches < len(searches):
+                    log().table_dump('Searches for assignment',
+                                     _dump_searches(searches, query, num_searches))
+                num_searches = len(searches)
+            searches.sort(key=lambda s: (s.penalty, s.SEARCH_PRIO))
+
+        return query, searches
+
+
+    async def execute_searches(self, query: QueryStruct,
+                               searches: List[AbstractSearch]) -> SearchResults:
+        """ Run the abstract searches against the database until a result
+            is found.
+        """
+        log().section('Execute database searches')
+        results: Dict[Any, SearchResult] = {}
+
+        end_time = dt.datetime.now() + self.timeout
+
+        min_ranking = searches[0].penalty + 2.0
+        prev_penalty = 0.0
+        for i, search in enumerate(searches):
+            if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
+                break
+            log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
+            log().var_dump('Params', self.params)
+            lookup_results = await search.lookup(self.conn, self.params)
+            for result in lookup_results:
+                rhash = (result.source_table, result.place_id,
+                         result.housenumber, result.country_code)
+                prevresult = results.get(rhash)
+                if prevresult:
+                    prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
+                else:
+                    results[rhash] = result
+                min_ranking = min(min_ranking, result.accuracy * 1.2, 2.0)
+            log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
+            prev_penalty = search.penalty
+            if dt.datetime.now() >= end_time:
+                break
+
+        return SearchResults(results.values())
+
+
+    def pre_filter_results(self, results: SearchResults) -> SearchResults:
+        """ Remove results that are significantly worse than the
+            best match.
+        """
+        if results:
+            max_ranking = min(r.ranking for r in results) + 0.5
+            results = SearchResults(r for r in results if r.ranking < max_ranking)
+
+        return results
+
+
+    def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
+        """ Remove badly matching results, sort by ranking and
+            limit to the configured number of results.
+        """
+        if results:
+            results.sort(key=lambda r: r.ranking)
+            min_rank = results[0].rank_search
+            min_ranking = results[0].ranking
+            results = SearchResults(r for r in results
+                                    if r.ranking + 0.03 * (r.rank_search - min_rank)
+                                       < min_ranking + 0.5)
+
+            results = SearchResults(results[:self.limit])
+
+        return results
+
+
+    def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
+        """ Adjust the accuracy of the localized result according to how well
+            they match the original query.
+        """
+        assert self.query_analyzer is not None
+        qwords = [word for phrase in query.source
+                       for word in re.split('[, ]+', phrase.text) if word]
+        if not qwords:
+            return
+
+        for result in results:
+            # Negative importance indicates ordering by distance, which is
+            # more important than word matching.
+            if not result.display_name\
+               or (result.importance is not None and result.importance < 0):
+                continue
+            distance = 0.0
+            norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
+                                                                result.country_code or '')))
+            words = set((w for w in norm.split(' ') if w))
+            if not words:
+                continue
+            for qword in qwords:
+                wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
+                if wdist < 0.5:
+                    distance += len(qword)
+                else:
+                    distance += (1.0 - wdist) * len(qword)
+            # Compensate for the fact that country names do not get a
+            # match penalty yet by the tokenizer.
+            # Temporary hack that needs to be removed!
+            if result.rank_address == 4:
+                distance *= 2
+            result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
+
+
+    async def lookup_pois(self, categories: List[Tuple[str, str]],
+                          phrases: List[Phrase]) -> SearchResults:
+        """ Look up places by category. If phrase is given, a place search
+            over the phrase will be executed first and places close to the
+            results returned.
+        """
+        log().function('forward_lookup_pois', categories=categories, params=self.params)
+
+        if phrases:
+            query, searches = await self.build_searches(phrases)
+
+            if query:
+                searches = [wrap_near_search(categories, s) for s in searches[:50]]
+                results = await self.execute_searches(query, searches)
+                results = self.pre_filter_results(results)
+                await add_result_details(self.conn, results, self.params)
+                log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
+                results = self.sort_and_cut_results(results)
+            else:
+                results = SearchResults()
+        else:
+            search = build_poi_search(categories, self.params.countries)
+            results = await search.lookup(self.conn, self.params)
+            await add_result_details(self.conn, results, self.params)
+
+        log().result_dump('Final Results', ((r.accuracy, r) for r in results))
+
+        return results
+
+
+    async def lookup(self, phrases: List[Phrase]) -> SearchResults:
+        """ Look up a single free-text query.
+        """
+        log().function('forward_lookup', phrases=phrases, params=self.params)
+        results = SearchResults()
+
+        if self.params.is_impossible():
+            return results
+
+        query, searches = await self.build_searches(phrases)
+
+        if searches:
+            # Execute SQL until an appropriate result is found.
+            results = await self.execute_searches(query, searches[:50])
+            results = self.pre_filter_results(results)
+            await add_result_details(self.conn, results, self.params)
+            log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
+            self.rerank_by_query(query, results)
+            log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
+            results = self.sort_and_cut_results(results)
+            log().result_dump('Final Results', ((r.accuracy, r) for r in results))
+
+        return results
+
+
+# pylint: disable=invalid-name,too-many-locals
+def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
+                   start: int = 0) -> Iterator[Optional[List[Any]]]:
+    yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
+           'Qualifier', 'Catgeory', 'Rankings']
+
+    def tk(tl: List[int]) -> str:
+        tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
+
+        return f"[{','.join(tstr)}]"
+
+    def fmt_ranking(f: Any) -> str:
+        if not f:
+            return ''
+        ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
+        if len(ranks) > 100:
+            ranks = ranks[:100] + '...'
+        return f"{f.column}({ranks},def={f.default:.3g})"
+
+    def fmt_lookup(l: Any) -> str:
+        if not l:
+            return ''
+
+        return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
+
+
+    def fmt_cstr(c: Any) -> str:
+        if not c:
+            return ''
+
+        return f'{c[0]}^{c[1]}'
+
+    for search in searches[start:]:
+        fields = ('lookups', 'rankings', 'countries', 'housenumbers',
+                  'postcodes', 'qualifiers')
+        if hasattr(search, 'search'):
+            iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                          *(getattr(search.search, attr, []) for attr in fields),
+                                          getattr(search, 'categories', []),
+                                          fillvalue='')
+        else:
+            iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                          *(getattr(search, attr, []) for attr in fields),
+                                          [],
+                                          fillvalue='')
+        for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
+            yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
+                   fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
+        yield None
--- a/src/nominatim_api/search/icu_tokenizer.py
+++ b/src/nominatim_api/search/icu_tokenizer.py
@@ -0,0 +1,314 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of query analysis for the ICU tokenizer.
+"""
+from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
+from collections import defaultdict
+import dataclasses
+import difflib
+
+from icu import Transliterator
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaRow
+from nominatim_core.db.sqlalchemy_types import Json
+from ..connection import SearchConnection
+from ..logging import log
+from ..search import query as qmod
+from ..search.query_analyzer_factory import AbstractQueryAnalyzer
+
+
+DB_TO_TOKEN_TYPE = {
+    'W': qmod.TokenType.WORD,
+    'w': qmod.TokenType.PARTIAL,
+    'H': qmod.TokenType.HOUSENUMBER,
+    'P': qmod.TokenType.POSTCODE,
+    'C': qmod.TokenType.COUNTRY
+}
+
+
+class QueryPart(NamedTuple):
+    """ Normalized and transliterated form of a single term in the query.
+        When the term came out of a split during the transliteration,
+        the normalized string is the full word before transliteration.
+        The word number keeps track of the word before transliteration
+        and can be used to identify partial transliterated terms.
+    """
+    token: str
+    normalized: str
+    word_number: int
+
+
+QueryParts = List[QueryPart]
+WordDict = Dict[str, List[qmod.TokenRange]]
+
+def yield_words(terms: List[QueryPart], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
+    """ Return all combinations of words in the terms list after the
+        given position.
+    """
+    total = len(terms)
+    for first in range(start, total):
+        word = terms[first].token
+        yield word, qmod.TokenRange(first, first + 1)
+        for last in range(first + 1, min(first + 20, total)):
+            word = ' '.join((word, terms[last].token))
+            yield word, qmod.TokenRange(first, last + 1)
+
+
+@dataclasses.dataclass
+class ICUToken(qmod.Token):
+    """ Specialised token for ICU tokenizer.
+    """
+    word_token: str
+    info: Optional[Dict[str, Any]]
+
+    def get_category(self) -> Tuple[str, str]:
+        assert self.info
+        return self.info.get('class', ''), self.info.get('type', '')
+
+
+    def rematch(self, norm: str) -> None:
+        """ Check how well the token matches the given normalized string
+            and add a penalty, if necessary.
+        """
+        if not self.lookup_word:
+            return
+
+        seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
+        distance = 0
+        for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
+            if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
+                distance += 1
+            elif tag == 'replace':
+                distance += max((ato-afrom), (bto-bfrom))
+            elif tag != 'equal':
+                distance += abs((ato-afrom) - (bto-bfrom))
+        self.penalty += (distance/len(self.lookup_word))
+
+
+    @staticmethod
+    def from_db_row(row: SaRow) -> 'ICUToken':
+        """ Create a ICUToken from the row of the word table.
+        """
+        count = 1 if row.info is None else row.info.get('count', 1)
+        addr_count = 1 if row.info is None else row.info.get('addr_count', 1)
+
+        penalty = 0.0
+        if row.type == 'w':
+            penalty = 0.3
+        elif row.type == 'W':
+            if len(row.word_token) == 1 and row.word_token == row.word:
+                penalty = 0.2 if row.word.isdigit() else 0.3
+        elif row.type == 'H':
+            penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
+            if all(not c.isdigit() for c in row.word_token):
+                penalty += 0.2 * (len(row.word_token) - 1)
+        elif row.type == 'C':
+            if len(row.word_token) == 1:
+                penalty = 0.3
+
+        if row.info is None:
+            lookup_word = row.word
+        else:
+            lookup_word = row.info.get('lookup', row.word)
+        if lookup_word:
+            lookup_word = lookup_word.split('@', 1)[0]
+        else:
+            lookup_word = row.word_token
+
+        return ICUToken(penalty=penalty, token=row.word_id, count=max(1, count),
+                        lookup_word=lookup_word, is_indexed=True,
+                        word_token=row.word_token, info=row.info,
+                        addr_count=max(1, addr_count))
+
+
+
+class ICUQueryAnalyzer(AbstractQueryAnalyzer):
+    """ Converter for query strings into a tokenized query
+        using the tokens created by a ICU tokenizer.
+    """
+
+    def __init__(self, conn: SearchConnection) -> None:
+        self.conn = conn
+
+
+    async def setup(self) -> None:
+        """ Set up static data structures needed for the analysis.
+        """
+        async def _make_normalizer() -> Any:
+            rules = await self.conn.get_property('tokenizer_import_normalisation')
+            return Transliterator.createFromRules("normalization", rules)
+
+        self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
+                                                           _make_normalizer)
+
+        async def _make_transliterator() -> Any:
+            rules = await self.conn.get_property('tokenizer_import_transliteration')
+            return Transliterator.createFromRules("transliteration", rules)
+
+        self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
+                                                               _make_transliterator)
+
+        if 'word' not in self.conn.t.meta.tables:
+            sa.Table('word', self.conn.t.meta,
+                     sa.Column('word_id', sa.Integer),
+                     sa.Column('word_token', sa.Text, nullable=False),
+                     sa.Column('type', sa.Text, nullable=False),
+                     sa.Column('word', sa.Text),
+                     sa.Column('info', Json))
+
+
+    async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
+        """ Analyze the given list of phrases and return the
+            tokenized query.
+        """
+        log().section('Analyze query (using ICU tokenizer)')
+        normalized = list(filter(lambda p: p.text,
+                                 (qmod.Phrase(p.ptype, self.normalize_text(p.text))
+                                  for p in phrases)))
+        query = qmod.QueryStruct(normalized)
+        log().var_dump('Normalized query', query.source)
+        if not query.source:
+            return query
+
+        parts, words = self.split_query(query)
+        log().var_dump('Transliterated query', lambda: _dump_transliterated(query, parts))
+
+        for row in await self.lookup_in_db(list(words.keys())):
+            for trange in words[row.word_token]:
+                token = ICUToken.from_db_row(row)
+                if row.type == 'S':
+                    if row.info['op'] in ('in', 'near'):
+                        if trange.start == 0:
+                            query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
+                    else:
+                        if trange.start == 0 and trange.end == query.num_token_slots():
+                            query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
+                        else:
+                            query.add_token(trange, qmod.TokenType.QUALIFIER, token)
+                else:
+                    query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
+
+        self.add_extra_tokens(query, parts)
+        self.rerank_tokens(query, parts)
+
+        log().table_dump('Word tokens', _dump_word_tokens(query))
+
+        return query
+
+
+    def normalize_text(self, text: str) -> str:
+        """ Bring the given text into a normalized form. That is the
+            standardized form search will work with. All information removed
+            at this stage is inevitably lost.
+        """
+        return cast(str, self.normalizer.transliterate(text))
+
+
+    def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
+        """ Transliterate the phrases and split them into tokens.
+
+            Returns the list of transliterated tokens together with their
+            normalized form and a dictionary of words for lookup together
+            with their position.
+        """
+        parts: QueryParts = []
+        phrase_start = 0
+        words = defaultdict(list)
+        wordnr = 0
+        for phrase in query.source:
+            query.nodes[-1].ptype = phrase.ptype
+            for word in phrase.text.split(' '):
+                trans = self.transliterator.transliterate(word)
+                if trans:
+                    for term in trans.split(' '):
+                        if term:
+                            parts.append(QueryPart(term, word, wordnr))
+                            query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
+                    query.nodes[-1].btype = qmod.BreakType.WORD
+                wordnr += 1
+            query.nodes[-1].btype = qmod.BreakType.PHRASE
+
+            for word, wrange in yield_words(parts, phrase_start):
+                words[word].append(wrange)
+
+            phrase_start = len(parts)
+        query.nodes[-1].btype = qmod.BreakType.END
+
+        return parts, words
+
+
+    async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
+        """ Return the token information from the database for the
+            given word tokens.
+        """
+        t = self.conn.t.meta.tables['word']
+        return await self.conn.execute(t.select().where(t.c.word_token.in_(words)))
+
+
+    def add_extra_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
+        """ Add tokens to query that are not saved in the database.
+        """
+        for part, node, i in zip(parts, query.nodes, range(1000)):
+            if len(part.token) <= 4 and part[0].isdigit()\
+               and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
+                query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
+                                ICUToken(0.5, 0, 1, 1, part.token, True, part.token, None))
+
+
+    def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
+        """ Add penalties to tokens that depend on presence of other token.
+        """
+        for i, node, tlist in query.iter_token_lists():
+            if tlist.ttype == qmod.TokenType.POSTCODE:
+                for repl in node.starting:
+                    if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
+                       and (repl.ttype != qmod.TokenType.HOUSENUMBER
+                            or len(tlist.tokens[0].lookup_word) > 4):
+                        repl.add_penalty(0.39)
+            elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
+                 and len(tlist.tokens[0].lookup_word) <= 3:
+                if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
+                    for repl in node.starting:
+                        if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
+                            repl.add_penalty(0.5 - tlist.tokens[0].penalty)
+            elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
+                norm = parts[i].normalized
+                for j in range(i + 1, tlist.end):
+                    if parts[j - 1].word_number != parts[j].word_number:
+                        norm += '  ' + parts[j].normalized
+                for token in tlist.tokens:
+                    cast(ICUToken, token).rematch(norm)
+
+
+def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str:
+    out = query.nodes[0].btype.value
+    for node, part in zip(query.nodes[1:], parts):
+        out += part.token + node.btype.value
+    return out
+
+
+def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
+    yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
+    for node in query.nodes:
+        for tlist in node.starting:
+            for token in tlist.tokens:
+                t = cast(ICUToken, token)
+                yield [tlist.ttype.name, t.token, t.word_token or '',
+                       t.lookup_word or '', t.penalty, t.count, t.info]
+
+
+async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
+    """ Create and set up a new query analyzer for a database based
+        on the ICU tokenizer.
+    """
+    out = ICUQueryAnalyzer(conn)
+    await out.setup()
+
+    return out
--- a/src/nominatim_api/search/legacy_tokenizer.py
+++ b/src/nominatim_api/search/legacy_tokenizer.py
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of query analysis for the legacy tokenizer.
+"""
+from typing import Tuple, Dict, List, Optional, Iterator, Any, cast
+from copy import copy
+from collections import defaultdict
+import dataclasses
+
+import sqlalchemy as sa
+
+from nominatim_core.typing import SaRow
+from ..connection import SearchConnection
+from ..logging import log
+from . import query as qmod
+from .query_analyzer_factory import AbstractQueryAnalyzer
+
+def yield_words(terms: List[str], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
+    """ Return all combinations of words in the terms list after the
+        given position.
+    """
+    total = len(terms)
+    for first in range(start, total):
+        word = terms[first]
+        yield word, qmod.TokenRange(first, first + 1)
+        for last in range(first + 1, min(first + 20, total)):
+            word = ' '.join((word, terms[last]))
+            yield word, qmod.TokenRange(first, last + 1)
+
+
+@dataclasses.dataclass
+class LegacyToken(qmod.Token):
+    """ Specialised token for legacy tokenizer.
+    """
+    word_token: str
+    category: Optional[Tuple[str, str]]
+    country: Optional[str]
+    operator: Optional[str]
+
+    @property
+    def info(self) -> Dict[str, Any]:
+        """ Dictionary of additional properties of the token.
+            Should only be used for debugging purposes.
+        """
+        return {'category': self.category,
+                'country': self.country,
+                'operator': self.operator}
+
+
+    def get_category(self) -> Tuple[str, str]:
+        assert self.category
+        return self.category
+
+
+class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
+    """ Converter for query strings into a tokenized query
+        using the tokens created by a legacy tokenizer.
+    """
+
+    def __init__(self, conn: SearchConnection) -> None:
+        self.conn = conn
+
+    async def setup(self) -> None:
+        """ Set up static data structures needed for the analysis.
+        """
+        self.max_word_freq = int(await self.conn.get_property('tokenizer_maxwordfreq'))
+        if 'word' not in self.conn.t.meta.tables:
+            sa.Table('word', self.conn.t.meta,
+                     sa.Column('word_id', sa.Integer),
+                     sa.Column('word_token', sa.Text, nullable=False),
+                     sa.Column('word', sa.Text),
+                     sa.Column('class', sa.Text),
+                     sa.Column('type', sa.Text),
+                     sa.Column('country_code', sa.Text),
+                     sa.Column('search_name_count', sa.Integer),
+                     sa.Column('operator', sa.Text))
+
+
+    async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
+        """ Analyze the given list of phrases and return the
+            tokenized query.
+        """
+        log().section('Analyze query (using Legacy tokenizer)')
+
+        normalized = []
+        if phrases:
+            for row in await self.conn.execute(sa.select(*(sa.func.make_standard_name(p.text)
+                                                           for p in phrases))):
+                normalized = [qmod.Phrase(p.ptype, r) for r, p in zip(row, phrases) if r]
+                break
+
+        query = qmod.QueryStruct(normalized)
+        log().var_dump('Normalized query', query.source)
+        if not query.source:
+            return query
+
+        parts, words = self.split_query(query)
+        lookup_words = list(words.keys())
+        log().var_dump('Split query', parts)
+        log().var_dump('Extracted words', lookup_words)
+
+        for row in await self.lookup_in_db(lookup_words):
+            for trange in words[row.word_token.strip()]:
+                token, ttype = self.make_token(row)
+                if ttype == qmod.TokenType.NEAR_ITEM:
+                    if trange.start == 0:
+                        query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
+                elif ttype == qmod.TokenType.QUALIFIER:
+                    query.add_token(trange, qmod.TokenType.QUALIFIER, token)
+                    if trange.start == 0 or trange.end == query.num_token_slots():
+                        token = copy(token)
+                        token.penalty += 0.1 * (query.num_token_slots())
+                        query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
+                elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
+                    query.add_token(trange, ttype, token)
+
+        self.add_extra_tokens(query, parts)
+        self.rerank_tokens(query)
+
+        log().table_dump('Word tokens', _dump_word_tokens(query))
+
+        return query
+
+
+    def normalize_text(self, text: str) -> str:
+        """ Bring the given text into a normalized form.
+
+            This only removes case, so some difference with the normalization
+            in the phrase remains.
+        """
+        return text.lower()
+
+
+    def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
+                                                            Dict[str, List[qmod.TokenRange]]]:
+        """ Transliterate the phrases and split them into tokens.
+
+            Returns a list of transliterated tokens and a dictionary
+            of words for lookup together with their position.
+        """
+        parts: List[str] = []
+        phrase_start = 0
+        words = defaultdict(list)
+        for phrase in query.source:
+            query.nodes[-1].ptype = phrase.ptype
+            for trans in phrase.text.split(' '):
+                if trans:
+                    for term in trans.split(' '):
+                        if term:
+                            parts.append(trans)
+                            query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
+                    query.nodes[-1].btype = qmod.BreakType.WORD
+            query.nodes[-1].btype = qmod.BreakType.PHRASE
+            for word, wrange in yield_words(parts, phrase_start):
+                words[word].append(wrange)
+            phrase_start = len(parts)
+        query.nodes[-1].btype = qmod.BreakType.END
+
+        return parts, words
+
+
+    async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
+        """ Return the token information from the database for the
+            given word tokens.
+        """
+        t = self.conn.t.meta.tables['word']
+
+        sql = t.select().where(t.c.word_token.in_(words + [' ' + w for w in words]))
+
+        return await self.conn.execute(sql)
+
+
+    def make_token(self, row: SaRow) -> Tuple[LegacyToken, qmod.TokenType]:
+        """ Create a LegacyToken from the row of the word table.
+            Also determines the type of token.
+        """
+        penalty = 0.0
+        is_indexed = True
+
+        rowclass = getattr(row, 'class')
+
+        if row.country_code is not None:
+            ttype = qmod.TokenType.COUNTRY
+            lookup_word = row.country_code
+        elif rowclass is not None:
+            if rowclass == 'place' and  row.type == 'house':
+                ttype = qmod.TokenType.HOUSENUMBER
+                lookup_word = row.word_token[1:]
+            elif rowclass == 'place' and  row.type == 'postcode':
+                ttype = qmod.TokenType.POSTCODE
+                lookup_word = row.word_token[1:]
+            else:
+                ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
+                        else qmod.TokenType.QUALIFIER
+                lookup_word = row.word
+        elif row.word_token.startswith(' '):
+            ttype = qmod.TokenType.WORD
+            lookup_word = row.word or row.word_token[1:]
+        else:
+            ttype = qmod.TokenType.PARTIAL
+            lookup_word = row.word_token
+            penalty = 0.21
+            if row.search_name_count > self.max_word_freq:
+                is_indexed = False
+
+        return LegacyToken(penalty=penalty, token=row.word_id,
+                           count=max(1, row.search_name_count or 1),
+                           addr_count=1, # not supported
+                           lookup_word=lookup_word,
+                           word_token=row.word_token.strip(),
+                           category=(rowclass, row.type) if rowclass is not None else None,
+                           country=row.country_code,
+                           operator=row.operator,
+                           is_indexed=is_indexed),\
+               ttype
+
+
+    def add_extra_tokens(self, query: qmod.QueryStruct, parts: List[str]) -> None:
+        """ Add tokens to query that are not saved in the database.
+        """
+        for part, node, i in zip(parts, query.nodes, range(1000)):
+            if len(part) <= 4 and part.isdigit()\
+               and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
+                query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
+                                LegacyToken(penalty=0.5, token=0, count=1, addr_count=1,
+                                            lookup_word=part, word_token=part,
+                                            category=None, country=None,
+                                            operator=None, is_indexed=True))
+
+
+    def rerank_tokens(self, query: qmod.QueryStruct) -> None:
+        """ Add penalties to tokens that depend on presence of other token.
+        """
+        for _, node, tlist in query.iter_token_lists():
+            if tlist.ttype == qmod.TokenType.POSTCODE:
+                for repl in node.starting:
+                    if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
+                       and (repl.ttype != qmod.TokenType.HOUSENUMBER
+                            or len(tlist.tokens[0].lookup_word) > 4):
+                        repl.add_penalty(0.39)
+            elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
+                 and len(tlist.tokens[0].lookup_word) <= 3:
+                if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
+                    for repl in node.starting:
+                        if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
+                            repl.add_penalty(0.5 - tlist.tokens[0].penalty)
+
+
+
+def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
+    yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
+    for node in query.nodes:
+        for tlist in node.starting:
+            for token in tlist.tokens:
+                t = cast(LegacyToken, token)
+                yield [tlist.ttype.name, t.token, t.word_token or '',
+                       t.lookup_word or '', t.penalty, t.count, t.info]
+
+
+async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
+    """ Create and set up a new query analyzer for a database based
+        on the ICU tokenizer.
+    """
+    out = LegacyQueryAnalyzer(conn)
+    await out.setup()
+
+    return out
--- a/src/nominatim_api/search/query.py
+++ b/src/nominatim_api/search/query.py
@@ -0,0 +1,297 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Datastructures for a tokenized query.
+"""
+from typing import List, Tuple, Optional, Iterator
+from abc import ABC, abstractmethod
+import dataclasses
+import enum
+
+class BreakType(enum.Enum):
+    """ Type of break between tokens.
+    """
+    START = '<'
+    """ Begin of the query. """
+    END = '>'
+    """ End of the query. """
+    PHRASE = ','
+    """ Break between two phrases. """
+    WORD = ' '
+    """ Break between words. """
+    PART = '-'
+    """ Break inside a word, for example a hyphen or apostrophe. """
+    TOKEN = '`'
+    """ Break created as a result of tokenization.
+        This may happen in languages without spaces between words.
+    """
+
+
+class TokenType(enum.Enum):
+    """ Type of token.
+    """
+    WORD = enum.auto()
+    """ Full name of a place. """
+    PARTIAL = enum.auto()
+    """ Word term without breaks, does not necessarily represent a full name. """
+    HOUSENUMBER = enum.auto()
+    """ Housenumber term. """
+    POSTCODE = enum.auto()
+    """ Postal code term. """
+    COUNTRY = enum.auto()
+    """ Country name or reference. """
+    QUALIFIER = enum.auto()
+    """ Special term used together with name (e.g. _Hotel_ Bellevue). """
+    NEAR_ITEM = enum.auto()
+    """ Special term used as searchable object(e.g. supermarket in ...). """
+
+
+class PhraseType(enum.Enum):
+    """ Designation of a phrase.
+    """
+    NONE = 0
+    """ No specific designation (i.e. source is free-form query). """
+    AMENITY = enum.auto()
+    """ Contains name or type of a POI. """
+    STREET = enum.auto()
+    """ Contains a street name optionally with a housenumber. """
+    CITY = enum.auto()
+    """ Contains the postal city. """
+    COUNTY = enum.auto()
+    """ Contains the equivalent of a county. """
+    STATE = enum.auto()
+    """ Contains a state or province. """
+    POSTCODE = enum.auto()
+    """ Contains a postal code. """
+    COUNTRY = enum.auto()
+    """ Contains the country name or code. """
+
+    def compatible_with(self, ttype: TokenType,
+                        is_full_phrase: bool) -> bool:
+        """ Check if the given token type can be used with the phrase type.
+        """
+        if self == PhraseType.NONE:
+            return not is_full_phrase or ttype != TokenType.QUALIFIER
+        if self == PhraseType.AMENITY:
+            return ttype in (TokenType.WORD, TokenType.PARTIAL)\
+                   or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
+                   or (not is_full_phrase and ttype == TokenType.QUALIFIER)
+        if self == PhraseType.STREET:
+            return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
+        if self == PhraseType.POSTCODE:
+            return ttype == TokenType.POSTCODE
+        if self == PhraseType.COUNTRY:
+            return ttype == TokenType.COUNTRY
+
+        return ttype in (TokenType.WORD, TokenType.PARTIAL)
+
+
+@dataclasses.dataclass
+class Token(ABC):
+    """ Base type for tokens.
+        Specific query analyzers must implement the concrete token class.
+    """
+
+    penalty: float
+    token: int
+    count: int
+    addr_count: int
+    lookup_word: str
+    is_indexed: bool
+
+
+    @abstractmethod
+    def get_category(self) -> Tuple[str, str]:
+        """ Return the category restriction for qualifier terms and
+            category objects.
+        """
+
+@dataclasses.dataclass
+class TokenRange:
+    """ Indexes of query nodes over which a token spans.
+    """
+    start: int
+    end: int
+
+    def __lt__(self, other: 'TokenRange') -> bool:
+        return self.end <= other.start
+
+
+    def __le__(self, other: 'TokenRange') -> bool:
+        return NotImplemented
+
+
+    def __gt__(self, other: 'TokenRange') -> bool:
+        return self.start >= other.end
+
+
+    def __ge__(self, other: 'TokenRange') -> bool:
+        return NotImplemented
+
+
+    def replace_start(self, new_start: int) -> 'TokenRange':
+        """ Return a new token range with the new start.
+        """
+        return TokenRange(new_start, self.end)
+
+
+    def replace_end(self, new_end: int) -> 'TokenRange':
+        """ Return a new token range with the new end.
+        """
+        return TokenRange(self.start, new_end)
+
+
+    def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
+        """ Split the span into two spans at the given index.
+            The index must be within the span.
+        """
+        return self.replace_end(index), self.replace_start(index)
+
+
+@dataclasses.dataclass
+class TokenList:
+    """ List of all tokens of a given type going from one breakpoint to another.
+    """
+    end: int
+    ttype: TokenType
+    tokens: List[Token]
+
+
+    def add_penalty(self, penalty: float) -> None:
+        """ Add the given penalty to all tokens in the list.
+        """
+        for token in self.tokens:
+            token.penalty += penalty
+
+
+@dataclasses.dataclass
+class QueryNode:
+    """ A node of the query representing a break between terms.
+    """
+    btype: BreakType
+    ptype: PhraseType
+    starting: List[TokenList] = dataclasses.field(default_factory=list)
+
+    def has_tokens(self, end: int, *ttypes: TokenType) -> bool:
+        """ Check if there are tokens of the given types ending at the
+            given node.
+        """
+        return any(tl.end == end and tl.ttype in ttypes for tl in self.starting)
+
+
+    def get_tokens(self, end: int, ttype: TokenType) -> Optional[List[Token]]:
+        """ Get the list of tokens of the given type starting at this node
+            and ending at the node 'end'. Returns 'None' if no such
+            tokens exist.
+        """
+        for tlist in self.starting:
+            if tlist.end == end and tlist.ttype == ttype:
+                return tlist.tokens
+        return None
+
+
+@dataclasses.dataclass
+class Phrase:
+    """ A normalized query part. Phrases may be typed which means that
+        they then represent a specific part of the address.
+    """
+    ptype: PhraseType
+    text: str
+
+
+class QueryStruct:
+    """ A tokenized search query together with the normalized source
+        from which the tokens have been parsed.
+
+        The query contains a list of nodes that represent the breaks
+        between words. Tokens span between nodes, which don't necessarily
+        need to be direct neighbours. Thus the query is represented as a
+        directed acyclic graph.
+
+        When created, a query contains a single node: the start of the
+        query. Further nodes can be added by appending to 'nodes'.
+    """
+
+    def __init__(self, source: List[Phrase]) -> None:
+        self.source = source
+        self.nodes: List[QueryNode] = \
+            [QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)]
+
+
+    def num_token_slots(self) -> int:
+        """ Return the length of the query in vertice steps.
+        """
+        return len(self.nodes) - 1
+
+
+    def add_node(self, btype: BreakType, ptype: PhraseType) -> None:
+        """ Append a new break node with the given break type.
+            The phrase type denotes the type for any tokens starting
+            at the node.
+        """
+        self.nodes.append(QueryNode(btype, ptype))
+
+
+    def add_token(self, trange: TokenRange, ttype: TokenType, token: Token) -> None:
+        """ Add a token to the query. 'start' and 'end' are the indexes of the
+            nodes from which to which the token spans. The indexes must exist
+            and are expected to be in the same phrase.
+            'ttype' denotes the type of the token and 'token' the token to
+            be inserted.
+
+            If the token type is not compatible with the phrase it should
+            be added to, then the token is silently dropped.
+        """
+        snode = self.nodes[trange.start]
+        full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
+                      and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
+        if snode.ptype.compatible_with(ttype, full_phrase):
+            tlist = snode.get_tokens(trange.end, ttype)
+            if tlist is None:
+                snode.starting.append(TokenList(trange.end, ttype, [token]))
+            else:
+                tlist.append(token)
+
+
+    def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
+        """ Get the list of tokens of a given type, spanning the given
+            nodes. The nodes must exist. If no tokens exist, an
+            empty list is returned.
+        """
+        return self.nodes[trange.start].get_tokens(trange.end, ttype) or []
+
+
+    def get_partials_list(self, trange: TokenRange) -> List[Token]:
+        """ Create a list of partial tokens between the given nodes.
+            The list is composed of the first token of type PARTIAL
+            going to the subsequent node. Such PARTIAL tokens are
+            assumed to exist.
+        """
+        return [next(iter(self.get_tokens(TokenRange(i, i+1), TokenType.PARTIAL)))
+                          for i in range(trange.start, trange.end)]
+
+
+    def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
+        """ Iterator over all token lists in the query.
+        """
+        for i, node in enumerate(self.nodes):
+            for tlist in node.starting:
+                yield i, node, tlist
+
+
+    def find_lookup_word_by_id(self, token: int) -> str:
+        """ Find the first token with the given token ID and return
+            its lookup word. Returns 'None' if no such token exists.
+            The function is very slow and must only be used for
+            debugging.
+        """
+        for node in self.nodes:
+            for tlist in node.starting:
+                for t in tlist.tokens:
+                    if t.token == token:
+                        return f"[{tlist.ttype.name[0]}]{t.lookup_word}"
+        return 'None'
--- a/src/nominatim_api/search/query_analyzer_factory.py
+++ b/src/nominatim_api/search/query_analyzer_factory.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Factory for creating a query analyzer for the configured tokenizer.
+"""
+from typing import List, cast, TYPE_CHECKING
+from abc import ABC, abstractmethod
+from pathlib import Path
+import importlib
+
+from ..logging import log
+from ..connection import SearchConnection
+
+if TYPE_CHECKING:
+    from .query import Phrase, QueryStruct
+
+class AbstractQueryAnalyzer(ABC):
+    """ Class for analysing incoming queries.
+
+        Query analyzers are tied to the tokenizer used on import.
+    """
+
+    @abstractmethod
+    async def analyze_query(self, phrases: List['Phrase']) -> 'QueryStruct':
+        """ Analyze the given phrases and return the tokenized query.
+        """
+
+
+    @abstractmethod
+    def normalize_text(self, text: str) -> str:
+        """ Bring the given text into a normalized form. That is the
+            standardized form search will work with. All information removed
+            at this stage is inevitably lost.
+        """
+
+
+
+async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
+    """ Create a query analyzer for the tokenizer used by the database.
+    """
+    name = await conn.get_property('tokenizer')
+
+    src_file = Path(__file__).parent / f'{name}_tokenizer.py'
+    if not src_file.is_file():
+        log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
+        raise RuntimeError('Tokenizer not found')
+
+    module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
+
+    return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
--- a/src/nominatim_api/search/token_assignment.py
+++ b/src/nominatim_api/search/token_assignment.py
@@ -0,0 +1,422 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Create query interpretations where each vertice in the query is assigned
+a specific function (expressed as a token type).
+"""
+from typing import Optional, List, Iterator
+import dataclasses
+
+from ..logging import log
+from . import query as qmod
+
+# pylint: disable=too-many-return-statements,too-many-branches
+
+@dataclasses.dataclass
+class TypedRange:
+    """ A token range for a specific type of tokens.
+    """
+    ttype: qmod.TokenType
+    trange: qmod.TokenRange
+
+
+PENALTY_TOKENCHANGE = {
+    qmod.BreakType.START: 0.0,
+    qmod.BreakType.END: 0.0,
+    qmod.BreakType.PHRASE: 0.0,
+    qmod.BreakType.WORD: 0.1,
+    qmod.BreakType.PART: 0.2,
+    qmod.BreakType.TOKEN: 0.4
+}
+
+TypedRangeSeq = List[TypedRange]
+
+@dataclasses.dataclass
+class TokenAssignment: # pylint: disable=too-many-instance-attributes
+    """ Representation of a possible assignment of token types
+        to the tokens in a tokenized query.
+    """
+    penalty: float = 0.0
+    name: Optional[qmod.TokenRange] = None
+    address: List[qmod.TokenRange] = dataclasses.field(default_factory=list)
+    housenumber: Optional[qmod.TokenRange] = None
+    postcode: Optional[qmod.TokenRange] = None
+    country: Optional[qmod.TokenRange] = None
+    near_item: Optional[qmod.TokenRange] = None
+    qualifier: Optional[qmod.TokenRange] = None
+
+
+    @staticmethod
+    def from_ranges(ranges: TypedRangeSeq) -> 'TokenAssignment':
+        """ Create a new token assignment from a sequence of typed spans.
+        """
+        out = TokenAssignment()
+        for token in ranges:
+            if token.ttype == qmod.TokenType.PARTIAL:
+                out.address.append(token.trange)
+            elif token.ttype == qmod.TokenType.HOUSENUMBER:
+                out.housenumber = token.trange
+            elif token.ttype == qmod.TokenType.POSTCODE:
+                out.postcode = token.trange
+            elif token.ttype == qmod.TokenType.COUNTRY:
+                out.country = token.trange
+            elif token.ttype == qmod.TokenType.NEAR_ITEM:
+                out.near_item = token.trange
+            elif token.ttype == qmod.TokenType.QUALIFIER:
+                out.qualifier = token.trange
+        return out
+
+
+class _TokenSequence:
+    """ Working state used to put together the token assignments.
+
+        Represents an intermediate state while traversing the tokenized
+        query.
+    """
+    def __init__(self, seq: TypedRangeSeq,
+                 direction: int = 0, penalty: float = 0.0) -> None:
+        self.seq = seq
+        self.direction = direction
+        self.penalty = penalty
+
+
+    def __str__(self) -> str:
+        seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq)
+        return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
+
+
+    @property
+    def end_pos(self) -> int:
+        """ Return the index of the global end of the current sequence.
+        """
+        return self.seq[-1].trange.end if self.seq else 0
+
+
+    def has_types(self, *ttypes: qmod.TokenType) -> bool:
+        """ Check if the current sequence contains any typed ranges of
+            the given types.
+        """
+        return any(s.ttype in ttypes for s in self.seq)
+
+
+    def is_final(self) -> bool:
+        """ Return true when the sequence cannot be extended by any
+            form of token anymore.
+        """
+        # Country and category must be the final term for left-to-right
+        return len(self.seq) > 1 and \
+               self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
+
+
+    def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
+        """ Check if the give token type is appendable to the existing sequence.
+
+            Returns None if the token type is not appendable, otherwise the
+            new direction of the sequence after adding such a type. The
+            token is not added.
+        """
+        if ttype == qmod.TokenType.WORD:
+            return None
+
+        if not self.seq:
+            # Append unconditionally to the empty list
+            if ttype == qmod.TokenType.COUNTRY:
+                return -1
+            if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
+                return 1
+            return self.direction
+
+        # Name tokens are always acceptable and don't change direction
+        if ttype == qmod.TokenType.PARTIAL:
+            # qualifiers cannot appear in the middle of the query. They need
+            # to be near the next phrase.
+            if self.direction == -1 \
+               and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
+                return None
+            return self.direction
+
+        # Other tokens may only appear once
+        if self.has_types(ttype):
+            return None
+
+        if ttype == qmod.TokenType.HOUSENUMBER:
+            if self.direction == 1:
+                if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER:
+                    return None
+                if len(self.seq) > 2 \
+                   or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
+                    return None # direction left-to-right: housenumber must come before anything
+            elif self.direction == -1 \
+                 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
+                return -1 # force direction right-to-left if after other terms
+
+            return self.direction
+
+        if ttype == qmod.TokenType.POSTCODE:
+            if self.direction == -1:
+                if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
+                    return None
+                return -1
+            if self.direction == 1:
+                return None if self.has_types(qmod.TokenType.COUNTRY) else 1
+            if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
+                return 1
+            return self.direction
+
+        if ttype == qmod.TokenType.COUNTRY:
+            return None if self.direction == -1 else 1
+
+        if ttype == qmod.TokenType.NEAR_ITEM:
+            return self.direction
+
+        if ttype == qmod.TokenType.QUALIFIER:
+            if self.direction == 1:
+                if (len(self.seq) == 1
+                    and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
+                   or (len(self.seq) == 2
+                       and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
+                       and self.seq[1].ttype == qmod.TokenType.PARTIAL):
+                    return 1
+                return None
+            if self.direction == -1:
+                return -1
+
+            tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
+            if len(tempseq) == 0:
+                return 1
+            if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
+                return None
+            if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
+                return -1
+            return 0
+
+        return None
+
+
+    def advance(self, ttype: qmod.TokenType, end_pos: int,
+                btype: qmod.BreakType) -> Optional['_TokenSequence']:
+        """ Return a new token sequence state with the given token type
+            extended.
+        """
+        newdir = self.appendable(ttype)
+        if newdir is None:
+            return None
+
+        if not self.seq:
+            newseq = [TypedRange(ttype, qmod.TokenRange(0, end_pos))]
+            new_penalty = 0.0
+        else:
+            last = self.seq[-1]
+            if btype != qmod.BreakType.PHRASE and last.ttype == ttype:
+                # extend the existing range
+                newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
+                new_penalty = 0.0
+            else:
+                # start a new range
+                newseq = list(self.seq) + [TypedRange(ttype,
+                                                      qmod.TokenRange(last.trange.end, end_pos))]
+                new_penalty = PENALTY_TOKENCHANGE[btype]
+
+        return _TokenSequence(newseq, newdir, self.penalty + new_penalty)
+
+
+    def _adapt_penalty_from_priors(self, priors: int, new_dir: int) -> bool:
+        if priors >= 2:
+            if self.direction == 0:
+                self.direction = new_dir
+            else:
+                if priors == 2:
+                    self.penalty += 0.8
+                else:
+                    return False
+
+        return True
+
+
+    def recheck_sequence(self) -> bool:
+        """ Check that the sequence is a fully valid token assignment
+            and adapt direction and penalties further if necessary.
+
+            This function catches some impossible assignments that need
+            forward context and can therefore not be excluded when building
+            the assignment.
+        """
+        # housenumbers may not be further than 2 words from the beginning.
+        # If there are two words in front, give it a penalty.
+        hnrpos = next((i for i, tr in enumerate(self.seq)
+                       if tr.ttype == qmod.TokenType.HOUSENUMBER),
+                      None)
+        if hnrpos is not None:
+            if self.direction != -1:
+                priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL)
+                if not self._adapt_penalty_from_priors(priors, -1):
+                    return False
+            if self.direction != 1:
+                priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
+                if not self._adapt_penalty_from_priors(priors, 1):
+                    return False
+            if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
+                self.penalty += 1.0
+
+        return True
+
+
+    def _get_assignments_postcode(self, base: TokenAssignment,
+                                  query_len: int)  -> Iterator[TokenAssignment]:
+        """ Yield possible assignments of Postcode searches with an
+            address component.
+        """
+        assert base.postcode is not None
+
+        if (base.postcode.start == 0 and self.direction != -1)\
+           or (base.postcode.end == query_len and self.direction != 1):
+            log().comment('postcode search')
+            # <address>,<postcode> should give preference to address search
+            if base.postcode.start == 0:
+                penalty = self.penalty
+                self.direction = -1 # name searches are only possible backwards
+            else:
+                penalty = self.penalty + 0.1
+                self.direction = 1 # name searches are only possible forwards
+            yield dataclasses.replace(base, penalty=penalty)
+
+
+    def _get_assignments_address_forward(self, base: TokenAssignment,
+                                         query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+        """ Yield possible assignments of address searches with
+            left-to-right reading.
+        """
+        first = base.address[0]
+
+        log().comment('first word = name')
+        yield dataclasses.replace(base, penalty=self.penalty,
+                                  name=first, address=base.address[1:])
+
+        # To paraphrase:
+        #  * if another name term comes after the first one and before the
+        #    housenumber
+        #  * a qualifier comes after the name
+        #  * the containing phrase is strictly typed
+        if (base.housenumber and first.end < base.housenumber.start)\
+           or (base.qualifier and base.qualifier > first)\
+           or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
+            return
+
+        penalty = self.penalty
+
+        # Penalty for:
+        #  * <name>, <street>, <housenumber> , ...
+        #  * queries that are comma-separated
+        if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
+            penalty += 0.25
+
+        for i in range(first.start + 1, first.end):
+            name, addr = first.split(i)
+            log().comment(f'split first word = name ({i - first.start})')
+            yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
+                                      penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
+
+
+    def _get_assignments_address_backward(self, base: TokenAssignment,
+                                          query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+        """ Yield possible assignments of address searches with
+            right-to-left reading.
+        """
+        last = base.address[-1]
+
+        if self.direction == -1 or len(base.address) > 1:
+            log().comment('last word = name')
+            yield dataclasses.replace(base, penalty=self.penalty,
+                                      name=last, address=base.address[:-1])
+
+        # To paraphrase:
+        #  * if another name term comes before the last one and after the
+        #    housenumber
+        #  * a qualifier comes before the name
+        #  * the containing phrase is strictly typed
+        if (base.housenumber and last.start > base.housenumber.end)\
+           or (base.qualifier and base.qualifier < last)\
+           or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
+            return
+
+        penalty = self.penalty
+        if base.housenumber and base.housenumber < last:
+            penalty += 0.4
+        if len(query.source) > 1:
+            penalty += 0.25
+
+        for i in range(last.start + 1, last.end):
+            addr, name = last.split(i)
+            log().comment(f'split last word = name ({i - last.start})')
+            yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
+                                      penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
+
+
+    def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+        """ Yield possible assignments for the current sequence.
+
+            This function splits up general name assignments into name
+            and address and yields all possible variants of that.
+        """
+        base = TokenAssignment.from_ranges(self.seq)
+
+        num_addr_tokens = sum(t.end - t.start for t in base.address)
+        if num_addr_tokens > 50:
+            return
+
+        # Postcode search (postcode-only search is covered in next case)
+        if base.postcode is not None and base.address:
+            yield from self._get_assignments_postcode(base, query.num_token_slots())
+
+        # Postcode or country-only search
+        if not base.address:
+            if not base.housenumber and (base.postcode or base.country or base.near_item):
+                log().comment('postcode/country search')
+                yield dataclasses.replace(base, penalty=self.penalty)
+        else:
+            # <postcode>,<address> should give preference to postcode search
+            if base.postcode and base.postcode.start == 0:
+                self.penalty += 0.1
+
+            # Right-to-left reading of the address
+            if self.direction != -1:
+                yield from self._get_assignments_address_forward(base, query)
+
+            # Left-to-right reading of the address
+            if self.direction != 1:
+                yield from self._get_assignments_address_backward(base, query)
+
+            # variant for special housenumber searches
+            if base.housenumber and not base.qualifier:
+                yield dataclasses.replace(base, penalty=self.penalty)
+
+
+def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+    """ Return possible word type assignments to word positions.
+
+        The assignments are computed from the concrete tokens listed
+        in the tokenized query.
+
+        The result includes the penalty for transitions from one word type to
+        another. It does not include penalties for transitions within a
+        type.
+    """
+    todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)]
+
+    while todo:
+        state = todo.pop()
+        node = query.nodes[state.end_pos]
+
+        for tlist in node.starting:
+            newstate = state.advance(tlist.ttype, tlist.end, node.btype)
+            if newstate is not None:
+                if newstate.end_pos == query.num_token_slots():
+                    if newstate.recheck_sequence():
+                        log().var_dump('Assignment', newstate)
+                        yield from newstate.get_assignments(query)
+                elif not newstate.is_final():
+                    todo.append(newstate)
--- a/src/nominatim_api/server/init.py
+++ b/src/nominatim_api/server/init.py
--- a/src/nominatim_api/server/falcon/init.py
+++ b/src/nominatim_api/server/falcon/init.py
--- a/src/nominatim_api/server/falcon/server.py
+++ b/src/nominatim_api/server/falcon/server.py
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Server implementation using the falcon webserver framework.
+"""
+from typing import Optional, Mapping, cast, Any, List
+from pathlib import Path
+import datetime as dt
+import asyncio
+
+from falcon.asgi import App, Request, Response
+
+from nominatim_core.config import Configuration
+from ...core import NominatimAPIAsync
+from ... import v1 as api_impl
+from ... import logging as loglib
+
+class HTTPNominatimError(Exception):
+    """ A special exception class for errors raised during processing.
+    """
+    def __init__(self, msg: str, status: int, content_type: str) -> None:
+        self.msg = msg
+        self.status = status
+        self.content_type = content_type
+
+
+async def nominatim_error_handler(req: Request, resp: Response, #pylint: disable=unused-argument
+                                  exception: HTTPNominatimError,
+                                  _: Any) -> None:
+    """ Special error handler that passes message and content type as
+        per exception info.
+    """
+    resp.status = exception.status
+    resp.text = exception.msg
+    resp.content_type = exception.content_type
+
+
+async def timeout_error_handler(req: Request, resp: Response, #pylint: disable=unused-argument
+                                exception: TimeoutError, #pylint: disable=unused-argument
+                                _: Any) -> None:
+    """ Special error handler that passes message and content type as
+        per exception info.
+    """
+    resp.status = 503
+
+    loglib.log().comment('Aborted: Query took too long to process.')
+    logdata = loglib.get_and_disable()
+    if logdata:
+        resp.text = logdata
+        resp.content_type = 'text/html; charset=utf-8'
+    else:
+        resp.text = "Query took too long to process."
+        resp.content_type = 'text/plain; charset=utf-8'
+
+
+class ParamWrapper(api_impl.ASGIAdaptor):
+    """ Adaptor class for server glue to Falcon framework.
+    """
+
+    def __init__(self, req: Request, resp: Response,
+                 config: Configuration) -> None:
+        self.request = req
+        self.response = resp
+        self._config = config
+
+
+    def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
+        return cast(Optional[str], self.request.get_param(name, default=default))
+
+
+    def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
+        return cast(Optional[str], self.request.get_header(name, default=default))
+
+
+    def error(self, msg: str, status: int = 400) -> HTTPNominatimError:
+        return HTTPNominatimError(msg, status, self.content_type)
+
+
+    def create_response(self, status: int, output: str, num_results: int) -> None:
+        self.response.context.num_results = num_results
+        self.response.status = status
+        self.response.text = output
+        self.response.content_type = self.content_type
+
+
+    def base_uri(self) -> str:
+        return cast (str, self.request.forwarded_prefix)
+
+    def config(self) -> Configuration:
+        return self._config
+
+
+class EndpointWrapper:
+    """ Converter for server glue endpoint functions to Falcon request handlers.
+    """
+
+    def __init__(self, name: str, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+        self.name = name
+        self.func = func
+        self.api = api
+
+
+    async def on_get(self, req: Request, resp: Response) -> None:
+        """ Implementation of the endpoint.
+        """
+        await self.func(self.api, ParamWrapper(req, resp, self.api.config))
+
+
+class FileLoggingMiddleware:
+    """ Middleware to log selected requests into a file.
+    """
+
+    def __init__(self, file_name: str):
+        self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+
+    async def process_request(self, req: Request, _: Response) -> None:
+        """ Callback before the request starts timing.
+        """
+        req.context.start = dt.datetime.now(tz=dt.timezone.utc)
+
+
+    async def process_response(self, req: Request, resp: Response,
+                               resource: Optional[EndpointWrapper],
+                               req_succeeded: bool) -> None:
+        """ Callback after requests writes to the logfile. It only
+            writes logs for successful requests for search, reverse and lookup.
+        """
+        if not req_succeeded or resource is None or resp.status != 200\
+            or resource.name not in ('reverse', 'search', 'lookup', 'details'):
+            return
+
+        finish = dt.datetime.now(tz=dt.timezone.utc)
+        duration = (finish - req.context.start).total_seconds()
+        params = req.scope['query_string'].decode('utf8')
+        start = req.context.start.replace(tzinfo=None)\
+                                 .isoformat(sep=' ', timespec='milliseconds')
+
+        self.fd.write(f"[{start}] "
+                      f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} "
+                      f'{resource.name} "{params}"\n')
+
+
+class APIShutdown:
+    """ Middleware that closes any open database connections.
+    """
+
+    def __init__(self, api: NominatimAPIAsync) -> None:
+        self.api = api
+
+    async def process_shutdown(self, *_: Any) -> None:
+        """Process the ASGI lifespan shutdown event.
+        """
+        await self.api.close()
+
+
+def get_application(project_dir: Path,
+                    environ: Optional[Mapping[str, str]] = None) -> App:
+    """ Create a Nominatim Falcon ASGI application.
+    """
+    api = NominatimAPIAsync(project_dir, environ)
+
+    middleware: List[object] = [APIShutdown(api)]
+    log_file = api.config.LOG_FILE
+    if log_file:
+        middleware.append(FileLoggingMiddleware(log_file))
+
+    app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'),
+              middleware=middleware)
+    app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
+    app.add_error_handler(TimeoutError, timeout_error_handler)
+    # different from TimeoutError in Python <= 3.10
+    app.add_error_handler(asyncio.TimeoutError, timeout_error_handler)
+
+    legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
+    for name, func in api_impl.ROUTES:
+        endpoint = EndpointWrapper(name, func, api)
+        app.add_route(f"/{name}", endpoint)
+        if legacy_urls:
+            app.add_route(f"/{name}.php", endpoint)
+
+    return app
+
+
+def run_wsgi() -> App:
+    """ Entry point for uvicorn.
+
+        Make sure uvicorn is run from the project directory.
+    """
+    return get_application(Path('.'))
--- a/src/nominatim_api/server/starlette/init.py
+++ b/src/nominatim_api/server/starlette/init.py
--- a/src/nominatim_api/server/starlette/server.py
+++ b/src/nominatim_api/server/starlette/server.py
@@ -0,0 +1,174 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Server implementation using the starlette webserver framework.
+"""
+from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, Awaitable
+from pathlib import Path
+import datetime as dt
+import asyncio
+
+from starlette.applications import Starlette
+from starlette.routing import Route
+from starlette.exceptions import HTTPException
+from starlette.responses import Response, PlainTextResponse, HTMLResponse
+from starlette.requests import Request
+from starlette.middleware import Middleware
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
+from starlette.middleware.cors import CORSMiddleware
+
+from nominatim_core.config import Configuration
+from ...core import NominatimAPIAsync
+from ... import v1 as api_impl
+from ... import logging as loglib
+
+class ParamWrapper(api_impl.ASGIAdaptor):
+    """ Adaptor class for server glue to Starlette framework.
+    """
+
+    def __init__(self, request: Request) -> None:
+        self.request = request
+
+
+    def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
+        return self.request.query_params.get(name, default=default)
+
+
+    def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
+        return self.request.headers.get(name, default)
+
+
+    def error(self, msg: str, status: int = 400) -> HTTPException:
+        return HTTPException(status, detail=msg,
+                             headers={'content-type': self.content_type})
+
+
+    def create_response(self, status: int, output: str, num_results: int) -> Response:
+        self.request.state.num_results = num_results
+        return Response(output, status_code=status, media_type=self.content_type)
+
+
+    def base_uri(self) -> str:
+        scheme = self.request.url.scheme
+        host = self.request.url.hostname
+        port = self.request.url.port
+        root = self.request.scope['root_path']
+        if (scheme == 'http' and port == 80) or (scheme == 'https' and port == 443):
+            port = None
+        if port is not None:
+            return f"{scheme}://{host}:{port}{root}"
+
+        return f"{scheme}://{host}{root}"
+
+
+    def config(self) -> Configuration:
+        return cast(Configuration, self.request.app.state.API.config)
+
+
+def _wrap_endpoint(func: api_impl.EndpointFunc)\
+        -> Callable[[Request], Coroutine[Any, Any, Response]]:
+    async def _callback(request: Request) -> Response:
+        return cast(Response, await func(request.app.state.API, ParamWrapper(request)))
+
+    return _callback
+
+
+class FileLoggingMiddleware(BaseHTTPMiddleware):
+    """ Middleware to log selected requests into a file.
+    """
+
+    def __init__(self, app: Starlette, file_name: str = ''):
+        super().__init__(app)
+        self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+    async def dispatch(self, request: Request,
+                       call_next: RequestResponseEndpoint) -> Response:
+        start = dt.datetime.now(tz=dt.timezone.utc)
+        response = await call_next(request)
+
+        if response.status_code != 200:
+            return response
+
+        finish = dt.datetime.now(tz=dt.timezone.utc)
+
+        for endpoint in ('reverse', 'search', 'lookup', 'details'):
+            if request.url.path.startswith('/' + endpoint):
+                qtype = endpoint
+                break
+        else:
+            return response
+
+        duration = (finish - start).total_seconds()
+        params = request.scope['query_string'].decode('utf8')
+
+        self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] "
+                      f"{duration:.4f} {getattr(request.state, 'num_results', 0)} "
+                      f'{qtype} "{params}"\n')
+
+        return response
+
+
+async def timeout_error(request: Request, #pylint: disable=unused-argument
+                        _: Exception) -> Response:
+    """ Error handler for query timeouts.
+    """
+    loglib.log().comment('Aborted: Query took too long to process.')
+    logdata = loglib.get_and_disable()
+
+    if logdata:
+        return HTMLResponse(logdata)
+
+    return PlainTextResponse("Query took too long to process.", status_code=503)
+
+
+def get_application(project_dir: Path,
+                    environ: Optional[Mapping[str, str]] = None,
+                    debug: bool = True) -> Starlette:
+    """ Create a Nominatim falcon ASGI application.
+    """
+    config = Configuration(project_dir, environ)
+
+    routes = []
+    legacy_urls = config.get_bool('SERVE_LEGACY_URLS')
+    for name, func in api_impl.ROUTES:
+        endpoint = _wrap_endpoint(func)
+        routes.append(Route(f"/{name}", endpoint=endpoint))
+        if legacy_urls:
+            routes.append(Route(f"/{name}.php", endpoint=endpoint))
+
+    middleware = []
+    if config.get_bool('CORS_NOACCESSCONTROL'):
+        middleware.append(Middleware(CORSMiddleware,
+                                     allow_origins=['*'],
+                                     allow_methods=['GET', 'OPTIONS'],
+                                     max_age=86400))
+
+    log_file = config.LOG_FILE
+    if log_file:
+        middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
+
+    exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
+        TimeoutError: timeout_error,
+        asyncio.TimeoutError: timeout_error
+    }
+
+    async def _shutdown() -> None:
+        await app.state.API.close()
+
+    app = Starlette(debug=debug, routes=routes, middleware=middleware,
+                    exception_handlers=exceptions,
+                    on_shutdown=[_shutdown])
+
+    app.state.API = NominatimAPIAsync(project_dir, environ)
+
+    return app
+
+
+def run_wsgi() -> Starlette:
+    """ Entry point for uvicorn.
+    """
+    return get_application(Path('.'), debug=False)
--- a/src/nominatim_api/sql/sqlalchemy_functions.py
+++ b/src/nominatim_api/sql/sqlalchemy_functions.py
@@ -0,0 +1,221 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom functions and expressions for SQLAlchemy.
+"""
+from __future__ import annotations
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+
+from nominatim_core.typing import SaColumn
+
+# pylint: disable=all
+
+class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
+    """ Check for conditions that allow partial index use on
+        'idx_placex_geometry_reverse_lookupPolygon'.
+
+        Needs to be constant, so that the query planner picks them up correctly
+        in prepared statements.
+    """
+    name = 'PlacexGeometryReverseLookuppolygon'
+    inherit_cache = True
+
+
+@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
+def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
+            " AND placex.rank_address between 4 and 25"
+            " AND placex.type != 'postcode'"
+            " AND placex.name is not null"
+            " AND placex.indexed_status = 0"
+            " AND placex.linked_place_id is null)")
+
+
+@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
+                       compiler: 'sa.Compiled', **kw: Any) -> str:
+    return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
+            " AND placex.rank_address between 4 and 25"
+            " AND placex.type != 'postcode'"
+            " AND placex.name is not null"
+            " AND placex.indexed_status = 0"
+            " AND placex.linked_place_id is null)")
+
+
+class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
+    name = 'IntersectsReverseDistance'
+    inherit_cache = True
+
+    def __init__(self, table: sa.Table, geom: SaColumn) -> None:
+        super().__init__(table.c.geometry,
+                         table.c.rank_search, geom)
+        self.tablename = table.name
+
+
+@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
+def default_reverse_place_diameter(element: IntersectsReverseDistance,
+                                   compiler: 'sa.Compiled', **kw: Any) -> str:
+    table = element.tablename
+    return f"({table}.rank_address between 4 and 25"\
+           f" AND {table}.type != 'postcode'"\
+           f" AND {table}.name is not null"\
+           f" AND {table}.linked_place_id is null"\
+           f" AND {table}.osm_type = 'N'" + \
+           " AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
+               tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
+
+
+@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
+                                  compiler: 'sa.Compiled', **kw: Any) -> str:
+    geom1, rank, geom2 = list(element.clauses)
+    table = element.tablename
+
+    return (f"({table}.rank_address between 4 and 25"\
+            f" AND {table}.type != 'postcode'"\
+            f" AND {table}.name is not null"\
+            f" AND {table}.linked_place_id is null"\
+            f" AND {table}.osm_type = 'N'"\
+             " AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
+            f" AND {table}.place_id IN"\
+             " (SELECT place_id FROM placex_place_node_areas"\
+             "  WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
+             "  WHERE f_table_name = 'placex_place_node_areas'"\
+             "  AND search_frame = %s)))") % (
+                compiler.process(geom1, **kw),
+                compiler.process(geom2, **kw),
+                compiler.process(rank, **kw),
+                compiler.process(geom2, **kw))
+
+
+class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
+    name = 'IsBelowReverseDistance'
+    inherit_cache = True
+
+
+@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
+def default_is_below_reverse_distance(element: IsBelowReverseDistance,
+                                      compiler: 'sa.Compiled', **kw: Any) -> str:
+    dist, rank = list(element.clauses)
+    return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
+                                                compiler.process(rank, **kw))
+
+
+@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
+                                     compiler: 'sa.Compiled', **kw: Any) -> str:
+    dist, rank = list(element.clauses)
+    return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
+                                                  compiler.process(rank, **kw))
+
+
+class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
+    name = 'IsAddressPoint'
+    inherit_cache = True
+
+    def __init__(self, table: sa.Table) -> None:
+        super().__init__(table.c.rank_address,
+                         table.c.housenumber, table.c.name)
+
+
+@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
+def default_is_address_point(element: IsAddressPoint,
+                             compiler: 'sa.Compiled', **kw: Any) -> str:
+    rank, hnr, name = list(element.clauses)
+    return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
+                compiler.process(rank, **kw),
+                compiler.process(hnr, **kw),
+                compiler.process(name, **kw))
+
+
+@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_address_point(element: IsAddressPoint,
+                            compiler: 'sa.Compiled', **kw: Any) -> str:
+    rank, hnr, name = list(element.clauses)
+    return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
+                compiler.process(rank, **kw),
+                compiler.process(hnr, **kw),
+                compiler.process(name, **kw))
+
+
+class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
+    """ Check if in the given list of names in parameters 1 any of the names
+        from the JSON array in parameter 2 are contained.
+    """
+    name = 'CrosscheckNames'
+    inherit_cache = True
+
+@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
+def compile_crosscheck_names(element: CrosscheckNames,
+                             compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
+            compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
+def compile_sqlite_crosscheck_names(element: CrosscheckNames,
+                                    compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "EXISTS(SELECT *"\
+           " FROM json_each(%s) as name, json_each(%s) as match_name"\
+           " WHERE name.value = match_name.value)"\
+           % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
+    """ Return elements of a json array as a set.
+    """
+    name = 'JsonArrayEach'
+    inherit_cache = True
+
+
+@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
+def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
+
+
+@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "json_each(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class Greatest(sa.sql.functions.GenericFunction[Any]):
+    """ Function to compute maximum of all its input parameters.
+    """
+    name = 'greatest'
+    inherit_cache = True
+
+
+@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "max(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class RegexpWord(sa.sql.functions.GenericFunction[Any]):
+    """ Check if a full word is in a given string.
+    """
+    name = 'RegexpWord'
+    inherit_cache = True
+
+
+@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
+def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s ~* ('\\m(' || %s  || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
+
+
+@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "regexp('\\b(' || %s  || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
--- a/src/nominatim_api/sql/sqlite_functions.py
+++ b/src/nominatim_api/sql/sqlite_functions.py
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom functions for SQLite.
+"""
+from typing import cast, Optional, Set, Any
+import json
+
+# pylint: disable=protected-access
+
+def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
+    """ Custom weight function for search results.
+    """
+    if search_vector is not None:
+        svec = [int(x) for x in search_vector.split(',')]
+        for rank in json.loads(rankings):
+            if all(r in svec for r in rank[1]):
+                return cast(float, rank[0])
+
+    return default
+
+
+class ArrayIntersectFuzzy:
+    """ Compute the array of common elements of all input integer arrays.
+        Very large input parameters may be ignored to speed up
+        computation. Therefore, the result is a superset of common elements.
+
+        Input and output arrays are given as comma-separated lists.
+    """
+    def __init__(self) -> None:
+        self.first = ''
+        self.values: Optional[Set[int]] = None
+
+    def step(self, value: Optional[str]) -> None:
+        """ Add the next array to the intersection.
+        """
+        if value is not None:
+            if not self.first:
+                self.first = value
+            elif len(value) < 10000000:
+                if self.values is None:
+                    self.values = {int(x) for x in self.first.split(',')}
+                self.values.intersection_update((int(x) for x in value.split(',')))
+
+    def finalize(self) -> str:
+        """ Return the final result.
+        """
+        if self.values is not None:
+            return ','.join(map(str, self.values))
+
+        return self.first
+
+
+class ArrayUnion:
+    """ Compute the set of all elements of the input integer arrays.
+
+        Input and output arrays are given as strings of comma-separated lists.
+    """
+    def __init__(self) -> None:
+        self.values: Optional[Set[str]] = None
+
+    def step(self, value: Optional[str]) -> None:
+        """ Add the next array to the union.
+        """
+        if value is not None:
+            if self.values is None:
+                self.values = set(value.split(','))
+            else:
+                self.values.update(value.split(','))
+
+    def finalize(self) -> str:
+        """ Return the final result.
+        """
+        return '' if self.values is None else ','.join(self.values)
+
+
+def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
+    """ Is the array 'containee' completely contained in array 'container'.
+    """
+    if container is None or containee is None:
+        return None
+
+    vset = container.split(',')
+    return all(v in vset for v in containee.split(','))
+
+
+def array_pair_contains(container1: Optional[str], container2: Optional[str],
+                        containee: Optional[str]) -> Optional[bool]:
+    """ Is the array 'containee' completely contained in the union of
+        array 'container1' and array 'container2'.
+    """
+    if container1 is None or container2 is None or containee is None:
+        return None
+
+    vset = container1.split(',') + container2.split(',')
+    return all(v in vset for v in containee.split(','))
+
+
+def install_custom_functions(conn: Any) -> None:
+    """ Install helper functions for Nominatim into the given SQLite
+        database connection.
+    """
+    conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
+    conn.create_function('array_contains', 2, array_contains, deterministic=True)
+    conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
+    _create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
+    _create_aggregate(conn, 'array_union', 1, ArrayUnion)
+
+
+async def _make_aggregate(aioconn: Any, *args: Any) -> None:
+    await aioconn._execute(aioconn._conn.create_aggregate, *args)
+
+
+def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
+    try:
+        conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
+    except Exception as error: # pylint: disable=broad-exception-caught
+        conn._handle_exception(error)
--- a/src/nominatim_api/status.py
+++ b/src/nominatim_api/status.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Classes and function related to status call.
+"""
+from typing import Optional
+import datetime as dt
+import dataclasses
+
+import sqlalchemy as sa
+
+from .connection import SearchConnection
+from .version import NOMINATIM_API_VERSION
+
+@dataclasses.dataclass
+class StatusResult:
+    """ Result of a call to the status API.
+    """
+    status: int
+    message: str
+    software_version = NOMINATIM_API_VERSION
+    data_updated: Optional[dt.datetime] = None
+    database_version: Optional[str] = None
+
+
+async def get_status(conn: SearchConnection) -> StatusResult:
+    """ Execute a status API call.
+    """
+    status = StatusResult(0, 'OK')
+
+    # Last update date
+    sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
+    status.data_updated = await conn.scalar(sql)
+
+    if status.data_updated is not None:
+        if status.data_updated.tzinfo is None:
+            status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
+        else:
+            status.data_updated = status.data_updated.astimezone(dt.timezone.utc)
+
+    # Database version
+    try:
+        status.database_version = await conn.get_property('database_version')
+    except ValueError:
+        pass
+
+    return status
--- a/src/nominatim_api/types.py
+++ b/src/nominatim_api/types.py
@@ -0,0 +1,550 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Complex datatypes used by the Nominatim API.
+"""
+from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
+                   Any, List, Sequence
+from collections import abc
+import dataclasses
+import enum
+import math
+from struct import unpack
+from binascii import unhexlify
+
+from nominatim_core.errors import UsageError
+from .localization import Locales
+
+# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
+
+@dataclasses.dataclass
+class PlaceID:
+    """ Reference a place by Nominatim's internal ID.
+
+        A PlaceID may reference place from the main table placex, from
+        the interpolation tables or the postcode tables. Place IDs are not
+        stable between installations. You may use this type theefore only
+        with place IDs obtained from the same database.
+    """
+    place_id: int
+    """
+    The internal ID of the place to reference.
+    """
+
+
+@dataclasses.dataclass
+class OsmID:
+    """ Reference a place by its OSM ID and potentially the basic category.
+
+        The OSM ID may refer to places in the main table placex and OSM
+        interpolation lines.
+    """
+    osm_type: str
+    """ OSM type of the object. Must be one of `N`(node), `W`(way) or
+        `R`(relation).
+    """
+    osm_id: int
+    """ The OSM ID of the object.
+    """
+    osm_class: Optional[str] = None
+    """ The same OSM object may appear multiple times in the database under
+        different categories. The optional class parameter allows to distinguish
+        the different categories and corresponds to the key part of the category.
+        If there are multiple objects in the database and `osm_class` is
+        left out, then one of the objects is returned at random.
+    """
+
+    def __post_init__(self) -> None:
+        if self.osm_type not in ('N', 'W', 'R'):
+            raise ValueError(f"Illegal OSM type '{self.osm_type}'. Must be one of N, W, R.")
+
+
+PlaceRef = Union[PlaceID, OsmID]
+
+
+class Point(NamedTuple):
+    """ A geographic point in WGS84 projection.
+    """
+    x: float
+    y: float
+
+
+    @property
+    def lat(self) -> float:
+        """ Return the latitude of the point.
+        """
+        return self.y
+
+
+    @property
+    def lon(self) -> float:
+        """ Return the longitude of the point.
+        """
+        return self.x
+
+
+    def to_geojson(self) -> str:
+        """ Return the point in GeoJSON format.
+        """
+        return f'{{"type": "Point","coordinates": [{self.x}, {self.y}]}}'
+
+
+    @staticmethod
+    def from_wkb(wkb: Union[str, bytes]) -> 'Point':
+        """ Create a point from EWKB as returned from the database.
+        """
+        if isinstance(wkb, str):
+            wkb = unhexlify(wkb)
+        if len(wkb) != 25:
+            raise ValueError(f"Point wkb has unexpected length {len(wkb)}")
+        if wkb[0] == 0:
+            gtype, srid, x, y = unpack('>iidd', wkb[1:])
+        elif wkb[0] == 1:
+            gtype, srid, x, y = unpack('<iidd', wkb[1:])
+        else:
+            raise ValueError("WKB has unknown endian value.")
+
+        if gtype != 0x20000001:
+            raise ValueError("WKB must be a point geometry.")
+        if srid != 4326:
+            raise ValueError("Only WGS84 WKB supported.")
+
+        return Point(x, y)
+
+
+    @staticmethod
+    def from_param(inp: Any) -> 'Point':
+        """ Create a point from an input parameter. The parameter
+            may be given as a point, a string or a sequence of
+            strings or floats. Raises a UsageError if the format is
+            not correct.
+        """
+        if isinstance(inp, Point):
+            return inp
+
+        seq: Sequence[str]
+        if isinstance(inp, str):
+            seq = inp.split(',')
+        elif isinstance(inp, abc.Sequence):
+            seq = inp
+
+        if len(seq) != 2:
+            raise UsageError('Point parameter needs 2 coordinates.')
+        try:
+            x, y = filter(math.isfinite, map(float, seq))
+        except ValueError as exc:
+            raise UsageError('Point parameter needs to be numbers.') from exc
+
+        if x < -180.0 or x > 180.0 or y < -90.0 or y > 90.0:
+            raise UsageError('Point coordinates invalid.')
+
+        return Point(x, y)
+
+
+    def to_wkt(self) -> str:
+        """ Return the WKT representation of the point.
+        """
+        return f'POINT({self.x} {self.y})'
+
+
+
+AnyPoint = Union[Point, Tuple[float, float]]
+
+WKB_BBOX_HEADER_LE = b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'
+WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
+
+class Bbox:
+    """ A bounding box in WGS84 projection.
+
+        The coordinates are available as an array in the 'coord'
+        property in the order (minx, miny, maxx, maxy).
+    """
+    def __init__(self, minx: float, miny: float, maxx: float, maxy: float) -> None:
+        """ Create a new bounding box with the given coordinates in WGS84
+            projection.
+        """
+        self.coords = (minx, miny, maxx, maxy)
+
+
+    @property
+    def minlat(self) -> float:
+        """ Southern-most latitude, corresponding to the minimum y coordinate.
+        """
+        return self.coords[1]
+
+
+    @property
+    def maxlat(self) -> float:
+        """ Northern-most latitude, corresponding to the maximum y coordinate.
+        """
+        return self.coords[3]
+
+
+    @property
+    def minlon(self) -> float:
+        """ Western-most longitude, corresponding to the minimum x coordinate.
+        """
+        return self.coords[0]
+
+
+    @property
+    def maxlon(self) -> float:
+        """ Eastern-most longitude, corresponding to the maximum x coordinate.
+        """
+        return self.coords[2]
+
+
+    @property
+    def area(self) -> float:
+        """ Return the area of the box in WGS84.
+        """
+        return (self.coords[2] - self.coords[0]) * (self.coords[3] - self.coords[1])
+
+
+    def contains(self, pt: Point) -> bool:
+        """ Check if the point is inside or on the boundary of the box.
+        """
+        return self.coords[0] <= pt[0] and self.coords[1] <= pt[1]\
+               and self.coords[2] >= pt[0] and self.coords[3] >= pt[1]
+
+
+    def to_wkt(self) -> str:
+        """ Return the WKT representation of the Bbox. This
+            is a simple polygon with four points.
+        """
+        return 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'\
+                  .format(*self.coords) # pylint: disable=consider-using-f-string
+
+
+    @staticmethod
+    def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
+        """ Create a Bbox from a bounding box polygon as returned by
+            the database. Returns `None` if the input value is None.
+        """
+        if wkb is None:
+            return None
+
+        if isinstance(wkb, str):
+            wkb = unhexlify(wkb)
+
+        if len(wkb) != 97:
+            raise ValueError("WKB must be a bounding box polygon")
+        if wkb.startswith(WKB_BBOX_HEADER_LE):
+            x1, y1, _, _, x2, y2 = unpack('<dddddd', wkb[17:65])
+        elif wkb.startswith(WKB_BBOX_HEADER_BE):
+            x1, y1, _, _, x2, y2 = unpack('>dddddd', wkb[17:65])
+        else:
+            raise ValueError("WKB has wrong header")
+
+        return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
+
+
+    @staticmethod
+    def from_point(pt: Point, buffer: float) -> 'Bbox':
+        """ Return a Bbox around the point with the buffer added to all sides.
+        """
+        return Bbox(pt[0] - buffer, pt[1] - buffer,
+                    pt[0] + buffer, pt[1] + buffer)
+
+
+    @staticmethod
+    def from_param(inp: Any) -> 'Bbox':
+        """ Return a Bbox from an input parameter. The box may be
+            given as a Bbox, a string or a list or strings or integer.
+            Raises a UsageError if the format is incorrect.
+        """
+        if isinstance(inp, Bbox):
+            return inp
+
+        seq: Sequence[str]
+        if isinstance(inp, str):
+            seq = inp.split(',')
+        elif isinstance(inp, abc.Sequence):
+            seq = inp
+
+        if len(seq) != 4:
+            raise UsageError('Bounding box parameter needs 4 coordinates.')
+        try:
+            x1, y1, x2, y2 = filter(math.isfinite, map(float, seq))
+        except ValueError as exc:
+            raise UsageError('Bounding box parameter needs to be numbers.') from exc
+
+        x1 = min(180, max(-180, x1))
+        x2 = min(180, max(-180, x2))
+        y1 = min(90, max(-90, y1))
+        y2 = min(90, max(-90, y2))
+
+        if x1 == x2 or y1 == y2:
+            raise UsageError('Bounding box with invalid parameters.')
+
+        return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
+
+
+class GeometryFormat(enum.Flag):
+    """ All search functions support returning the full geometry of a place in
+        various formats. The internal geometry is converted by PostGIS to
+        the desired format and then returned as a string. It is possible to
+        request multiple formats at the same time.
+    """
+    NONE = 0
+    """ No geometry requested. Alias for a empty flag.
+    """
+    GEOJSON = enum.auto()
+    """
+    [GeoJSON](https://geojson.org/) format
+    """
+    KML = enum.auto()
+    """
+    [KML](https://en.wikipedia.org/wiki/Keyhole_Markup_Language) format
+    """
+    SVG = enum.auto()
+    """
+    [SVG](http://www.w3.org/TR/SVG/paths.html) format
+    """
+    TEXT = enum.auto()
+    """
+    [WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format
+    """
+
+
+class DataLayer(enum.Flag):
+    """ The `DataLayer` flag type defines the layers that can be selected
+        for reverse and forward search.
+    """
+    ADDRESS = enum.auto()
+    """ The address layer contains all places relevant for addresses:
+        fully qualified addresses with a house number (or a house name equivalent,
+        for some addresses) and places that can be part of an address like
+        roads, cities, states.
+    """
+    POI = enum.auto()
+    """ Layer for points of interest like shops, restaurants but also
+        recycling bins or postboxes.
+    """
+    RAILWAY = enum.auto()
+    """ Layer with railway features including tracks and other infrastructure.
+        Note that in Nominatim's standard configuration, only very few railway
+        features are imported into the database. Thus a custom configuration
+        is required to make full use of this layer.
+    """
+    NATURAL = enum.auto()
+    """ Layer with natural features like rivers, lakes and mountains.
+    """
+    MANMADE = enum.auto()
+    """ Layer with other human-made features and boundaries. This layer is
+        the catch-all and includes all features not covered by the other
+        layers. A typical example for this layer are national park boundaries.
+    """
+
+
+def format_country(cc: Any) -> List[str]:
+    """ Extract a list of country codes from the input which may be either
+        a string or list of strings. Filters out all values that are not
+        a two-letter string.
+    """
+    clist: Sequence[str]
+    if isinstance(cc, str):
+        clist = cc.split(',')
+    elif isinstance(cc, abc.Sequence):
+        clist = cc
+    else:
+        raise UsageError("Parameter 'country' needs to be a comma-separated list "
+                         "or a Python list of strings.")
+
+    return [cc.lower() for cc in clist if isinstance(cc, str) and len(cc) == 2]
+
+
+def format_excluded(ids: Any) -> List[int]:
+    """ Extract a list of place ids from the input which may be either
+        a string or a list of strings or ints. Ignores empty value but
+        throws a UserError on anything that cannot be converted to int.
+    """
+    plist: Sequence[str]
+    if isinstance(ids, str):
+        plist = [s.strip() for s in ids.split(',')]
+    elif isinstance(ids, abc.Sequence):
+        plist = ids
+    else:
+        raise UsageError("Parameter 'excluded' needs to be a comma-separated list "
+                         "or a Python list of numbers.")
+    if not all(isinstance(i, int) or
+               (isinstance(i, str) and (not i or i.isdigit())) for i in plist):
+        raise UsageError("Parameter 'excluded' only takes place IDs.")
+
+    return [int(id) for id in plist if id] or [0]
+
+
+def format_categories(categories: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+    """ Extract a list of categories. Currently a noop.
+    """
+    return categories
+
+TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
+
+@dataclasses.dataclass
+class LookupDetails:
+    """ Collection of parameters that define which kind of details are
+        returned with a lookup or details result.
+    """
+    geometry_output: GeometryFormat = GeometryFormat.NONE
+    """ Add the full geometry of the place to the result. Multiple
+        formats may be selected. Note that geometries can become quite large.
+    """
+    address_details: bool = False
+    """ Get detailed information on the places that make up the address
+        for the result.
+    """
+    linked_places: bool = False
+    """ Get detailed information on the places that link to the result.
+    """
+    parented_places: bool = False
+    """ Get detailed information on all places that this place is a parent
+        for, i.e. all places for which it provides the address details.
+        Only POI places can have parents.
+    """
+    keywords: bool = False
+    """ Add information about the search terms used for this place.
+    """
+    geometry_simplification: float = 0.0
+    """ Simplification factor for a geometry in degrees WGS. A factor of
+        0.0 means the original geometry is kept. The higher the value, the
+        more the geometry gets simplified.
+    """
+    locales: Locales = Locales()
+    """ Preferred languages for localization of results.
+    """
+
+    @classmethod
+    def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
+        """ Load the data fields of the class from a dictionary.
+            Unknown entries in the dictionary are ignored, missing ones
+            get the default setting.
+
+            The function supports type checking and throws a UsageError
+            when the value does not fit.
+        """
+        def _check_field(v: Any, field: 'dataclasses.Field[Any]') -> Any:
+            if v is None:
+                return field.default_factory() \
+                       if field.default_factory != dataclasses.MISSING \
+                       else field.default
+            if field.metadata and 'transform' in field.metadata:
+                return field.metadata['transform'](v)
+            if not isinstance(v, field.type):
+                raise UsageError(f"Parameter '{field.name}' needs to be of {field.type!s}.")
+            return v
+
+        return cls(**{f.name: _check_field(kwargs[f.name], f)
+                      for f in dataclasses.fields(cls) if f.name in kwargs})
+
+
+@dataclasses.dataclass
+class ReverseDetails(LookupDetails):
+    """ Collection of parameters for the reverse call.
+    """
+    max_rank: int = dataclasses.field(default=30,
+                                      metadata={'transform': lambda v: max(0, min(v, 30))}
+                                     )
+    """ Highest address rank to return.
+    """
+    layers: DataLayer = DataLayer.ADDRESS | DataLayer.POI
+    """ Filter which kind of data to include.
+    """
+
+@dataclasses.dataclass
+class SearchDetails(LookupDetails):
+    """ Collection of parameters for the search call.
+    """
+    max_results: int = 10
+    """ Maximum number of results to be returned. The actual number of results
+        may be less.
+    """
+    min_rank: int = dataclasses.field(default=0,
+                                      metadata={'transform': lambda v: max(0, min(v, 30))}
+                                     )
+    """ Lowest address rank to return.
+    """
+    max_rank: int = dataclasses.field(default=30,
+                                      metadata={'transform': lambda v: max(0, min(v, 30))}
+                                     )
+    """ Highest address rank to return.
+    """
+    layers: Optional[DataLayer] = dataclasses.field(default=None,
+                                                    metadata={'transform': lambda r : r})
+    """ Filter which kind of data to include. When 'None' (the default) then
+        filtering by layers is disabled.
+    """
+    countries: List[str] = dataclasses.field(default_factory=list,
+                                             metadata={'transform': format_country})
+    """ Restrict search results to the given countries. An empty list (the
+        default) will disable this filter.
+    """
+    excluded: List[int] = dataclasses.field(default_factory=list,
+                                            metadata={'transform': format_excluded})
+    """ List of OSM objects to exclude from the results. Currently only
+        works when the internal place ID is given.
+        An empty list (the default) will disable this filter.
+    """
+    viewbox: Optional[Bbox] = dataclasses.field(default=None,
+                                                metadata={'transform': Bbox.from_param})
+    """ Focus the search on a given map area.
+    """
+    bounded_viewbox: bool = False
+    """ Use 'viewbox' as a filter and restrict results to places within the
+        given area.
+    """
+    near: Optional[Point] = dataclasses.field(default=None,
+                                              metadata={'transform': Point.from_param})
+    """ Order results by distance to the given point.
+    """
+    near_radius: Optional[float] = dataclasses.field(default=None,
+                                              metadata={'transform': lambda r : r})
+    """ Use near point as a filter and drop results outside the given
+        radius. Radius is given in degrees WSG84.
+    """
+    categories: List[Tuple[str, str]] = dataclasses.field(default_factory=list,
+                                                          metadata={'transform': format_categories})
+    """ Restrict search to places with one of the given class/type categories.
+        An empty list (the default) will disable this filter.
+    """
+    viewbox_x2: Optional[Bbox] = None
+
+    def __post_init__(self) -> None:
+        if self.viewbox is not None:
+            xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
+            yext = (self.viewbox.maxlat - self.viewbox.minlat)/2
+            self.viewbox_x2 = Bbox(self.viewbox.minlon - xext, self.viewbox.minlat - yext,
+                                   self.viewbox.maxlon + xext, self.viewbox.maxlat + yext)
+
+
+    def restrict_min_max_rank(self, new_min: int, new_max: int) -> None:
+        """ Change the min_rank and max_rank fields to respect the
+            given boundaries.
+        """
+        assert new_min <= new_max
+        self.min_rank = max(self.min_rank, new_min)
+        self.max_rank = min(self.max_rank, new_max)
+
+
+    def is_impossible(self) -> bool:
+        """ Check if the parameter configuration is contradictionary and
+            cannot yield any results.
+        """
+        return (self.min_rank > self.max_rank
+                or (self.bounded_viewbox
+                    and self.viewbox is not None and self.near is not None
+                    and self.viewbox.contains(self.near))
+                or (self.layers is not None and not self.layers)
+                or (self.max_rank <= 4 and
+                    self.layers is not None and not self.layers & DataLayer.ADDRESS))
+
+
+    def layer_enabled(self, layer: DataLayer) -> bool:
+        """ Check if the given layer has been chosen. Also returns
+            true when layer restriction has been disabled completely.
+        """
+        return self.layers is None or bool(self.layers & layer)
--- a/src/nominatim_api/v1/init.py
+++ b/src/nominatim_api/v1/init.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of API version v1 (aka the legacy version).
+"""
+
+#pylint: disable=useless-import-alias
+
+from .server_glue import (ASGIAdaptor as ASGIAdaptor,
+                          EndpointFunc as EndpointFunc,
+                          ROUTES as ROUTES)
+
+from . import format as _format
+
+list_formats = _format.dispatch.list_formats
+supports_format = _format.dispatch.supports_format
+format_result = _format.dispatch.format_result
--- a/src/nominatim_api/v1/classtypes.py
+++ b/src/nominatim_api/v1/classtypes.py
@@ -0,0 +1,201 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Hard-coded information about tag categories.
+
+These tables have been copied verbatim from the old PHP code. For future
+version a more flexible formatting is required.
+"""
+from typing import Tuple, Optional, Mapping, Union
+
+from ..results import ReverseResult, SearchResult
+from ..types import Bbox
+
+def get_label_tag(category: Tuple[str, str], extratags: Optional[Mapping[str, str]],
+                  rank: int, country: Optional[str]) -> str:
+    """ Create a label tag for the given place that can be used as an XML name.
+    """
+    if rank < 26 and extratags and 'place' in extratags:
+        label = extratags['place']
+    elif rank < 26 and extratags and 'linked_place' in extratags:
+        label = extratags['linked_place']
+    elif category == ('boundary', 'administrative'):
+        label = ADMIN_LABELS.get((country or '', int(rank/2)))\
+                or ADMIN_LABELS.get(('', int(rank/2)))\
+                or 'Administrative'
+    elif category[1] == 'postal_code':
+        label = 'postcode'
+    elif rank < 26:
+        label = category[1] if category[1] != 'yes' else category[0]
+    elif rank < 28:
+        label = 'road'
+    elif category[0] == 'place'\
+         and category[1] in ('house_number', 'house_name', 'country_code'):
+        label = category[1]
+    else:
+        label = category[0]
+
+    return label.lower().replace(' ', '_')
+
+
+def bbox_from_result(result: Union[ReverseResult, SearchResult]) -> Bbox:
+    """ Compute a bounding box for the result. For ways and relations
+        a given boundingbox is used. For all other object, a box is computed
+        around the centroid according to dimensions derived from the
+        search rank.
+    """
+    if (result.osm_object and result.osm_object[0] == 'N') or result.bbox is None:
+        extent = NODE_EXTENT.get(result.category, 0.00005)
+        return Bbox.from_point(result.centroid, extent)
+
+    return result.bbox
+
+
+# pylint: disable=line-too-long
+OSM_ATTRIBUTION = 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright'
+
+
+OSM_TYPE_NAME = {
+    'N': 'node',
+    'W': 'way',
+    'R': 'relation'
+}
+
+
+ADMIN_LABELS = {
+  ('', 1): 'Continent',
+  ('', 2): 'Country',
+  ('', 3): 'Region',
+  ('', 4): 'State',
+  ('', 5): 'State District',
+  ('', 6): 'County',
+  ('', 7): 'Municipality',
+  ('', 8): 'City',
+  ('', 9): 'City District',
+  ('', 10): 'Suburb',
+  ('', 11): 'Neighbourhood',
+  ('', 12): 'City Block',
+  ('no', 3): 'State',
+  ('no', 4): 'County',
+  ('se', 3): 'State',
+  ('se', 4): 'County'
+}
+
+
+ICONS = {
+    ('boundary', 'administrative'): 'poi_boundary_administrative',
+    ('place', 'city'): 'poi_place_city',
+    ('place', 'town'): 'poi_place_town',
+    ('place', 'village'): 'poi_place_village',
+    ('place', 'hamlet'): 'poi_place_village',
+    ('place', 'suburb'): 'poi_place_village',
+    ('place', 'locality'): 'poi_place_village',
+    ('place', 'airport'): 'transport_airport2',
+    ('aeroway', 'aerodrome'): 'transport_airport2',
+    ('railway', 'station'): 'transport_train_station2',
+    ('amenity', 'place_of_worship'): 'place_of_worship_unknown3',
+    ('amenity', 'pub'): 'food_pub',
+    ('amenity', 'bar'): 'food_bar',
+    ('amenity', 'university'): 'education_university',
+    ('tourism', 'museum'): 'tourist_museum',
+    ('amenity', 'arts_centre'): 'tourist_art_gallery2',
+    ('tourism', 'zoo'): 'tourist_zoo',
+    ('tourism', 'theme_park'): 'poi_point_of_interest',
+    ('tourism', 'attraction'): 'poi_point_of_interest',
+    ('leisure', 'golf_course'): 'sport_golf',
+    ('historic', 'castle'): 'tourist_castle',
+    ('amenity', 'hospital'): 'health_hospital',
+    ('amenity', 'school'): 'education_school',
+    ('amenity', 'theatre'): 'tourist_theatre',
+    ('amenity', 'library'): 'amenity_library',
+    ('amenity', 'fire_station'): 'amenity_firestation3',
+    ('amenity', 'police'): 'amenity_police2',
+    ('amenity', 'bank'): 'money_bank2',
+    ('amenity', 'post_office'): 'amenity_post_office',
+    ('tourism', 'hotel'): 'accommodation_hotel2',
+    ('amenity', 'cinema'): 'tourist_cinema',
+    ('tourism', 'artwork'): 'tourist_art_gallery2',
+    ('historic', 'archaeological_site'): 'tourist_archaeological2',
+    ('amenity', 'doctors'): 'health_doctors',
+    ('leisure', 'sports_centre'): 'sport_leisure_centre',
+    ('leisure', 'swimming_pool'): 'sport_swimming_outdoor',
+    ('shop', 'supermarket'): 'shopping_supermarket',
+    ('shop', 'convenience'): 'shopping_convenience',
+    ('amenity', 'restaurant'): 'food_restaurant',
+    ('amenity', 'fast_food'): 'food_fastfood',
+    ('amenity', 'cafe'): 'food_cafe',
+    ('tourism', 'guest_house'): 'accommodation_bed_and_breakfast',
+    ('amenity', 'pharmacy'): 'health_pharmacy_dispensing',
+    ('amenity', 'fuel'): 'transport_fuel',
+    ('natural', 'peak'): 'poi_peak',
+    ('natural', 'wood'): 'landuse_coniferous_and_deciduous',
+    ('shop', 'bicycle'): 'shopping_bicycle',
+    ('shop', 'clothes'): 'shopping_clothes',
+    ('shop', 'hairdresser'): 'shopping_hairdresser',
+    ('shop', 'doityourself'): 'shopping_diy',
+    ('shop', 'estate_agent'): 'shopping_estateagent2',
+    ('shop', 'car'): 'shopping_car',
+    ('shop', 'garden_centre'): 'shopping_garden_centre',
+    ('shop', 'car_repair'): 'shopping_car_repair',
+    ('shop', 'bakery'): 'shopping_bakery',
+    ('shop', 'butcher'): 'shopping_butcher',
+    ('shop', 'apparel'): 'shopping_clothes',
+    ('shop', 'laundry'): 'shopping_laundrette',
+    ('shop', 'beverages'): 'shopping_alcohol',
+    ('shop', 'alcohol'): 'shopping_alcohol',
+    ('shop', 'optician'): 'health_opticians',
+    ('shop', 'chemist'): 'health_pharmacy',
+    ('shop', 'gallery'): 'tourist_art_gallery2',
+    ('shop', 'jewelry'): 'shopping_jewelry',
+    ('tourism', 'information'): 'amenity_information',
+    ('historic', 'ruins'): 'tourist_ruin',
+    ('amenity', 'college'): 'education_school',
+    ('historic', 'monument'): 'tourist_monument',
+    ('historic', 'memorial'): 'tourist_monument',
+    ('historic', 'mine'): 'poi_mine',
+    ('tourism', 'caravan_site'): 'accommodation_caravan_park',
+    ('amenity', 'bus_station'): 'transport_bus_station',
+    ('amenity', 'atm'): 'money_atm2',
+    ('tourism', 'viewpoint'): 'tourist_view_point',
+    ('tourism', 'guesthouse'): 'accommodation_bed_and_breakfast',
+    ('railway', 'tram'): 'transport_tram_stop',
+    ('amenity', 'courthouse'): 'amenity_court',
+    ('amenity', 'recycling'): 'amenity_recycling',
+    ('amenity', 'dentist'): 'health_dentist',
+    ('natural', 'beach'): 'tourist_beach',
+    ('railway', 'tram_stop'): 'transport_tram_stop',
+    ('amenity', 'prison'): 'amenity_prison',
+    ('highway', 'bus_stop'): 'transport_bus_stop2'
+}
+
+NODE_EXTENT = {
+    ('place', 'continent'): 25,
+    ('place', 'country'): 7,
+    ('place', 'state'): 2.6,
+    ('place', 'province'): 2.6,
+    ('place', 'region'): 1.0,
+    ('place', 'county'): 0.7,
+    ('place', 'city'): 0.16,
+    ('place', 'municipality'): 0.16,
+    ('place', 'island'): 0.32,
+    ('place', 'postcode'): 0.16,
+    ('place', 'town'): 0.04,
+    ('place', 'village'): 0.02,
+    ('place', 'hamlet'): 0.02,
+    ('place', 'district'): 0.02,
+    ('place', 'borough'): 0.02,
+    ('place', 'suburb'): 0.02,
+    ('place', 'locality'): 0.01,
+    ('place', 'neighbourhood'): 0.01,
+    ('place', 'quarter'): 0.01,
+    ('place', 'city_block'): 0.01,
+    ('landuse', 'farm'): 0.01,
+    ('place', 'farm'): 0.01,
+    ('place', 'airport'): 0.015,
+    ('aeroway', 'aerodrome'): 0.015,
+    ('railway', 'station'): 0.005
+}
--- a/src/nominatim_api/v1/format.py
+++ b/src/nominatim_api/v1/format.py
@@ -0,0 +1,259 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Output formatters for API version v1.
+"""
+from typing import List, Dict, Mapping, Any
+import collections
+import datetime as dt
+
+from nominatim_core.utils.json_writer import JsonWriter
+from ..status import StatusResult
+from ..results import DetailedResult, ReverseResults, SearchResults, \
+                      AddressLines, AddressLine
+from ..localization import Locales
+from ..result_formatting import FormatDispatcher
+from .classtypes import ICONS
+from . import format_json, format_xml
+
+class RawDataList(List[Dict[str, Any]]):
+    """ Data type for formatting raw data lists 'as is' in json.
+    """
+
+dispatch = FormatDispatcher()
+
+@dispatch.format_func(StatusResult, 'text')
+def _format_status_text(result: StatusResult, _: Mapping[str, Any]) -> str:
+    if result.status:
+        return f"ERROR: {result.message}"
+
+    return 'OK'
+
+
+@dispatch.format_func(StatusResult, 'json')
+def _format_status_json(result: StatusResult, _: Mapping[str, Any]) -> str:
+    out = JsonWriter()
+
+    out.start_object()\
+         .keyval('status', result.status)\
+         .keyval('message', result.message)\
+         .keyval_not_none('data_updated', result.data_updated,
+                          lambda v: v.isoformat())\
+         .keyval('software_version', str(result.software_version))\
+         .keyval_not_none('database_version', result.database_version, str)\
+       .end_object()
+
+    return out()
+
+
+def _add_address_row(writer: JsonWriter, row: AddressLine,
+                     locales: Locales) -> None:
+    writer.start_object()\
+            .keyval('localname', locales.display_name(row.names))\
+            .keyval_not_none('place_id', row.place_id)
+
+    if row.osm_object is not None:
+        writer.keyval('osm_id', row.osm_object[1])\
+              .keyval('osm_type', row.osm_object[0])
+
+    if row.extratags:
+        writer.keyval_not_none('place_type', row.extratags.get('place_type'))
+
+    writer.keyval('class', row.category[0])\
+          .keyval('type', row.category[1])\
+          .keyval_not_none('admin_level', row.admin_level)\
+          .keyval('rank_address', row.rank_address)\
+          .keyval('distance', row.distance)\
+          .keyval('isaddress', row.isaddress)\
+        .end_object()
+
+
+def _add_address_rows(writer: JsonWriter, section: str, rows: AddressLines,
+                      locales: Locales) -> None:
+    writer.key(section).start_array()
+    for row in rows:
+        _add_address_row(writer, row, locales)
+        writer.next()
+    writer.end_array().next()
+
+
+def _add_parent_rows_grouped(writer: JsonWriter, rows: AddressLines,
+                             locales: Locales) -> None:
+    # group by category type
+    data = collections.defaultdict(list)
+    for row in rows:
+        sub = JsonWriter()
+        _add_address_row(sub, row, locales)
+        data[row.category[1]].append(sub())
+
+    writer.key('hierarchy').start_object()
+    for group, grouped in data.items():
+        writer.key(group).start_array()
+        grouped.sort() # sorts alphabetically by local name
+        for line in grouped:
+            writer.raw(line).next()
+        writer.end_array().next()
+
+    writer.end_object().next()
+
+
+@dispatch.format_func(DetailedResult, 'json')
+def _format_details_json(result: DetailedResult, options: Mapping[str, Any]) -> str:
+    locales = options.get('locales', Locales())
+    geom = result.geometry.get('geojson')
+    centroid = result.centroid.to_geojson()
+
+    out = JsonWriter()
+    out.start_object()\
+         .keyval_not_none('place_id', result.place_id)\
+         .keyval_not_none('parent_place_id', result.parent_place_id)
+
+    if result.osm_object is not None:
+        out.keyval('osm_type', result.osm_object[0])\
+           .keyval('osm_id', result.osm_object[1])
+
+    out.keyval('category', result.category[0])\
+         .keyval('type', result.category[1])\
+         .keyval('admin_level', result.admin_level)\
+         .keyval('localname', result.locale_name or '')\
+         .keyval('names', result.names or {})\
+         .keyval('addresstags', result.address or {})\
+         .keyval_not_none('housenumber', result.housenumber)\
+         .keyval_not_none('calculated_postcode', result.postcode)\
+         .keyval_not_none('country_code', result.country_code)\
+         .keyval_not_none('indexed_date', result.indexed_date, lambda v: v.isoformat())\
+         .keyval_not_none('importance', result.importance)\
+         .keyval('calculated_importance', result.calculated_importance())\
+         .keyval('extratags', result.extratags or {})\
+         .keyval_not_none('calculated_wikipedia', result.wikipedia)\
+         .keyval('rank_address', result.rank_address)\
+         .keyval('rank_search', result.rank_search)\
+         .keyval('isarea', 'Polygon' in (geom or result.geometry.get('type') or ''))\
+         .key('centroid').raw(centroid).next()\
+         .key('geometry').raw(geom or centroid).next()
+
+    if options.get('icon_base_url', None):
+        icon = ICONS.get(result.category)
+        if icon:
+            out.keyval('icon', f"{options['icon_base_url']}/{icon}.p.20.png")
+
+    if result.address_rows is not None:
+        _add_address_rows(out, 'address', result.address_rows, locales)
+
+    if result.linked_rows:
+        _add_address_rows(out, 'linked_places', result.linked_rows, locales)
+
+    if result.name_keywords is not None or result.address_keywords is not None:
+        out.key('keywords').start_object()
+
+        for sec, klist in (('name', result.name_keywords), ('address', result.address_keywords)):
+            out.key(sec).start_array()
+            for word in (klist or []):
+                out.start_object()\
+                     .keyval('id', word.word_id)\
+                     .keyval('token', word.word_token)\
+                   .end_object().next()
+            out.end_array().next()
+
+        out.end_object().next()
+
+    if result.parented_rows is not None:
+        if options.get('group_hierarchy', False):
+            _add_parent_rows_grouped(out, result.parented_rows, locales)
+        else:
+            _add_address_rows(out, 'hierarchy', result.parented_rows, locales)
+
+    out.end_object()
+
+    return out()
+
+
+@dispatch.format_func(ReverseResults, 'xml')
+def _format_reverse_xml(results: ReverseResults, options: Mapping[str, Any]) -> str:
+    return format_xml.format_base_xml(results,
+                                      options, True, 'reversegeocode',
+                                      {'querystring': options.get('query', '')})
+
+
+@dispatch.format_func(ReverseResults, 'geojson')
+def _format_reverse_geojson(results: ReverseResults,
+                            options: Mapping[str, Any]) -> str:
+    return format_json.format_base_geojson(results, options, True)
+
+
+@dispatch.format_func(ReverseResults, 'geocodejson')
+def _format_reverse_geocodejson(results: ReverseResults,
+                                options: Mapping[str, Any]) -> str:
+    return format_json.format_base_geocodejson(results, options, True)
+
+
+@dispatch.format_func(ReverseResults, 'json')
+def _format_reverse_json(results: ReverseResults,
+                         options: Mapping[str, Any]) -> str:
+    return format_json.format_base_json(results, options, True,
+                                        class_label='class')
+
+
+@dispatch.format_func(ReverseResults, 'jsonv2')
+def _format_reverse_jsonv2(results: ReverseResults,
+                           options: Mapping[str, Any]) -> str:
+    return format_json.format_base_json(results, options, True,
+                                        class_label='category')
+
+
+@dispatch.format_func(SearchResults, 'xml')
+def _format_search_xml(results: SearchResults, options: Mapping[str, Any]) -> str:
+    extra = {'querystring': options.get('query', '')}
+    for attr in ('more_url', 'exclude_place_ids', 'viewbox'):
+        if options.get(attr):
+            extra[attr] = options[attr]
+    return format_xml.format_base_xml(results, options, False, 'searchresults',
+                                      extra)
+
+
+
+@dispatch.format_func(SearchResults, 'geojson')
+def _format_search_geojson(results: SearchResults,
+                            options: Mapping[str, Any]) -> str:
+    return format_json.format_base_geojson(results, options, False)
+
+
+@dispatch.format_func(SearchResults, 'geocodejson')
+def _format_search_geocodejson(results: SearchResults,
+                                options: Mapping[str, Any]) -> str:
+    return format_json.format_base_geocodejson(results, options, False)
+
+
+@dispatch.format_func(SearchResults, 'json')
+def _format_search_json(results: SearchResults,
+                         options: Mapping[str, Any]) -> str:
+    return format_json.format_base_json(results, options, False,
+                                        class_label='class')
+
+
+@dispatch.format_func(SearchResults, 'jsonv2')
+def _format_search_jsonv2(results: SearchResults,
+                           options: Mapping[str, Any]) -> str:
+    return format_json.format_base_json(results, options, False,
+                                        class_label='category')
+
+@dispatch.format_func(RawDataList, 'json')
+def _format_raw_data_json(results: RawDataList,  _: Mapping[str, Any]) -> str:
+    out = JsonWriter()
+    out.start_array()
+    for res in results:
+        out.start_object()
+        for k, v in res.items():
+            if isinstance(v, dt.datetime):
+                out.keyval(k, v.isoformat(sep= ' ', timespec='seconds'))
+            else:
+                out.keyval(k, v)
+        out.end_object().next()
+
+    out.end_array()
+
+    return out()
--- a/src/nominatim_api/v1/format_json.py
+++ b/src/nominatim_api/v1/format_json.py
@@ -0,0 +1,275 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper functions for output of results in json formats.
+"""
+from typing import Mapping, Any, Optional, Tuple, Union
+
+from nominatim_core.utils.json_writer import JsonWriter
+from ..results import AddressLines, ReverseResults, SearchResults
+from . import classtypes as cl
+
+#pylint: disable=too-many-branches
+
+def _write_osm_id(out: JsonWriter, osm_object: Optional[Tuple[str, int]]) -> None:
+    if osm_object is not None:
+        out.keyval_not_none('osm_type', cl.OSM_TYPE_NAME.get(osm_object[0], None))\
+           .keyval('osm_id', osm_object[1])
+
+
+def _write_typed_address(out: JsonWriter, address: Optional[AddressLines],
+                               country_code: Optional[str]) -> None:
+    parts = {}
+    for line in (address or []):
+        if line.isaddress:
+            if line.local_name:
+                label = cl.get_label_tag(line.category, line.extratags,
+                                         line.rank_address, country_code)
+                if label not in parts:
+                    parts[label] = line.local_name
+            if line.names and 'ISO3166-2' in line.names and line.admin_level:
+                parts[f"ISO3166-2-lvl{line.admin_level}"] = line.names['ISO3166-2']
+
+    for k, v in parts.items():
+        out.keyval(k, v)
+
+    if country_code:
+        out.keyval('country_code', country_code)
+
+
+def _write_geocodejson_address(out: JsonWriter,
+                               address: Optional[AddressLines],
+                               obj_place_id: Optional[int],
+                               country_code: Optional[str]) -> None:
+    extra = {}
+    for line in (address or []):
+        if line.isaddress and line.local_name:
+            if line.category[1] in ('postcode', 'postal_code'):
+                out.keyval('postcode', line.local_name)
+            elif line.category[1] == 'house_number':
+                out.keyval('housenumber', line.local_name)
+            elif (obj_place_id is None or obj_place_id != line.place_id) \
+                 and line.rank_address >= 4 and line.rank_address < 28:
+                rank_name = GEOCODEJSON_RANKS[line.rank_address]
+                if rank_name not in extra:
+                    extra[rank_name] = line.local_name
+
+
+    for k, v in extra.items():
+        out.keyval(k, v)
+
+    if country_code:
+        out.keyval('country_code', country_code)
+
+
+def format_base_json(results: Union[ReverseResults, SearchResults],
+                     options: Mapping[str, Any], simple: bool,
+                     class_label: str) -> str:
+    """ Return the result list as a simple json string in custom Nominatim format.
+    """
+    out = JsonWriter()
+
+    if simple:
+        if not results:
+            return '{"error":"Unable to geocode"}'
+    else:
+        out.start_array()
+
+    for result in results:
+        out.start_object()\
+             .keyval_not_none('place_id', result.place_id)\
+             .keyval('licence', cl.OSM_ATTRIBUTION)\
+
+        _write_osm_id(out, result.osm_object)
+
+        out.keyval('lat', f"{result.centroid.lat}")\
+             .keyval('lon', f"{result.centroid.lon}")\
+             .keyval(class_label, result.category[0])\
+             .keyval('type', result.category[1])\
+             .keyval('place_rank', result.rank_search)\
+             .keyval('importance', result.calculated_importance())\
+             .keyval('addresstype', cl.get_label_tag(result.category, result.extratags,
+                                                     result.rank_address,
+                                                     result.country_code))\
+             .keyval('name', result.locale_name or '')\
+             .keyval('display_name', result.display_name or '')
+
+
+        if options.get('icon_base_url', None):
+            icon = cl.ICONS.get(result.category)
+            if icon:
+                out.keyval('icon', f"{options['icon_base_url']}/{icon}.p.20.png")
+
+        if options.get('addressdetails', False):
+            out.key('address').start_object()
+            _write_typed_address(out, result.address_rows, result.country_code)
+            out.end_object().next()
+
+        if options.get('extratags', False):
+            out.keyval('extratags', result.extratags)
+
+        if options.get('namedetails', False):
+            out.keyval('namedetails', result.names)
+
+        bbox = cl.bbox_from_result(result)
+        out.key('boundingbox').start_array()\
+             .value(f"{bbox.minlat:0.7f}").next()\
+             .value(f"{bbox.maxlat:0.7f}").next()\
+             .value(f"{bbox.minlon:0.7f}").next()\
+             .value(f"{bbox.maxlon:0.7f}").next()\
+           .end_array().next()
+
+        if result.geometry:
+            for key in ('text', 'kml'):
+                out.keyval_not_none('geo' + key, result.geometry.get(key))
+            if 'geojson' in result.geometry:
+                out.key('geojson').raw(result.geometry['geojson']).next()
+            out.keyval_not_none('svg', result.geometry.get('svg'))
+
+        out.end_object()
+
+        if simple:
+            return out()
+
+        out.next()
+
+    out.end_array()
+
+    return out()
+
+
+def format_base_geojson(results: Union[ReverseResults, SearchResults],
+                        options: Mapping[str, Any],
+                        simple: bool) -> str:
+    """ Return the result list as a geojson string.
+    """
+    if not results and simple:
+        return '{"error":"Unable to geocode"}'
+
+    out = JsonWriter()
+
+    out.start_object()\
+         .keyval('type', 'FeatureCollection')\
+         .keyval('licence', cl.OSM_ATTRIBUTION)\
+         .key('features').start_array()
+
+    for result in results:
+        out.start_object()\
+             .keyval('type', 'Feature')\
+             .key('properties').start_object()
+
+        out.keyval_not_none('place_id', result.place_id)
+
+        _write_osm_id(out, result.osm_object)
+
+        out.keyval('place_rank', result.rank_search)\
+           .keyval('category', result.category[0])\
+           .keyval('type', result.category[1])\
+           .keyval('importance', result.calculated_importance())\
+           .keyval('addresstype', cl.get_label_tag(result.category, result.extratags,
+                                                   result.rank_address,
+                                                   result.country_code))\
+           .keyval('name', result.locale_name or '')\
+           .keyval('display_name', result.display_name or '')
+
+        if options.get('addressdetails', False):
+            out.key('address').start_object()
+            _write_typed_address(out, result.address_rows, result.country_code)
+            out.end_object().next()
+
+        if options.get('extratags', False):
+            out.keyval('extratags', result.extratags)
+
+        if options.get('namedetails', False):
+            out.keyval('namedetails', result.names)
+
+        out.end_object().next() # properties
+
+        out.key('bbox').start_array()
+        for coord in cl.bbox_from_result(result).coords:
+            out.float(coord, 7).next()
+        out.end_array().next()
+
+        out.key('geometry').raw(result.geometry.get('geojson')
+                                or result.centroid.to_geojson()).next()
+
+        out.end_object().next()
+
+    out.end_array().next().end_object()
+
+    return out()
+
+
+def format_base_geocodejson(results: Union[ReverseResults, SearchResults],
+                            options: Mapping[str, Any], simple: bool) -> str:
+    """ Return the result list as a geocodejson string.
+    """
+    if not results and simple:
+        return '{"error":"Unable to geocode"}'
+
+    out = JsonWriter()
+
+    out.start_object()\
+         .keyval('type', 'FeatureCollection')\
+         .key('geocoding').start_object()\
+           .keyval('version', '0.1.0')\
+           .keyval('attribution', cl.OSM_ATTRIBUTION)\
+           .keyval('licence', 'ODbL')\
+           .keyval_not_none('query', options.get('query'))\
+           .end_object().next()\
+         .key('features').start_array()
+
+    for result in results:
+        out.start_object()\
+             .keyval('type', 'Feature')\
+             .key('properties').start_object()\
+               .key('geocoding').start_object()
+
+        out.keyval_not_none('place_id', result.place_id)
+
+        _write_osm_id(out, result.osm_object)
+
+        out.keyval('osm_key', result.category[0])\
+           .keyval('osm_value', result.category[1])\
+           .keyval('type', GEOCODEJSON_RANKS[max(3, min(28, result.rank_address))])\
+           .keyval_not_none('accuracy', getattr(result, 'distance', None), transform=int)\
+           .keyval('label', result.display_name or '')\
+           .keyval_not_none('name', result.locale_name or None)\
+
+        if options.get('addressdetails', False):
+            _write_geocodejson_address(out, result.address_rows, result.place_id,
+                                       result.country_code)
+
+            out.key('admin').start_object()
+            if result.address_rows:
+                for line in result.address_rows:
+                    if line.isaddress and (line.admin_level or 15) < 15 and line.local_name \
+                       and line.category[0] == 'boundary' and line.category[1] == 'administrative':
+                        out.keyval(f"level{line.admin_level}", line.local_name)
+            out.end_object().next()
+
+        out.end_object().next().end_object().next()
+
+        out.key('geometry').raw(result.geometry.get('geojson')
+                                or result.centroid.to_geojson()).next()
+
+        out.end_object().next()
+
+    out.end_array().next().end_object()
+
+    return out()
+
+
+GEOCODEJSON_RANKS = {
+    3: 'locality',
+    4: 'country',
+    5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+    10: 'county', 11: 'county', 12: 'county',
+    13: 'city', 14: 'city', 15: 'city', 16: 'city',
+    17: 'district', 18: 'district', 19: 'district', 20: 'district', 21: 'district',
+    22: 'locality', 23: 'locality', 24: 'locality',
+    25: 'street', 26: 'street', 27: 'street', 28: 'house'}
--- a/src/nominatim_api/v1/format_xml.py
+++ b/src/nominatim_api/v1/format_xml.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper functions for output of results in XML format.
+"""
+from typing import Mapping, Any, Optional, Union
+import datetime as dt
+import xml.etree.ElementTree as ET
+
+from ..results import AddressLines, ReverseResult, ReverseResults, \
+                      SearchResult, SearchResults
+from . import classtypes as cl
+
+#pylint: disable=too-many-branches
+
+def _write_xml_address(root: ET.Element, address: AddressLines,
+                       country_code: Optional[str]) -> None:
+    parts = {}
+    for line in address:
+        if line.isaddress:
+            if line.local_name:
+                label = cl.get_label_tag(line.category, line.extratags,
+                                         line.rank_address, country_code)
+                if label not in parts:
+                    parts[label] = line.local_name
+            if line.names and 'ISO3166-2' in line.names and line.admin_level:
+                parts[f"ISO3166-2-lvl{line.admin_level}"] = line.names['ISO3166-2']
+
+    for k,v in parts.items():
+        ET.SubElement(root, k).text = v
+
+    if country_code:
+        ET.SubElement(root, 'country_code').text = country_code
+
+
+def _create_base_entry(result: Union[ReverseResult, SearchResult],
+                       root: ET.Element, simple: bool) -> ET.Element:
+    place = ET.SubElement(root, 'result' if simple else 'place')
+    if result.place_id is not None:
+        place.set('place_id', str(result.place_id))
+    if result.osm_object:
+        osm_type = cl.OSM_TYPE_NAME.get(result.osm_object[0], None)
+        if osm_type is not None:
+            place.set('osm_type', osm_type)
+        place.set('osm_id', str(result.osm_object[1]))
+    if result.names and 'ref' in result.names:
+        place.set('ref', result.names['ref'])
+    elif result.locale_name:
+        # bug reproduced from PHP
+        place.set('ref', result.locale_name)
+    place.set('lat', f"{result.centroid.lat:.7f}")
+    place.set('lon', f"{result.centroid.lon:.7f}")
+
+    bbox = cl.bbox_from_result(result)
+    place.set('boundingbox',
+              f"{bbox.minlat:.7f},{bbox.maxlat:.7f},{bbox.minlon:.7f},{bbox.maxlon:.7f}")
+
+    place.set('place_rank', str(result.rank_search))
+    place.set('address_rank', str(result.rank_address))
+
+    if result.geometry:
+        for key in ('text', 'svg'):
+            if key in result.geometry:
+                place.set('geo' + key, result.geometry[key])
+        if 'kml' in result.geometry:
+            ET.SubElement(root if simple else place, 'geokml')\
+              .append(ET.fromstring(result.geometry['kml']))
+        if 'geojson' in result.geometry:
+            place.set('geojson', result.geometry['geojson'])
+
+    if simple:
+        place.text = result.display_name or ''
+    else:
+        place.set('display_name', result.display_name or '')
+        place.set('class', result.category[0])
+        place.set('type', result.category[1])
+        place.set('importance', str(result.calculated_importance()))
+
+    return place
+
+
+def format_base_xml(results: Union[ReverseResults, SearchResults],
+                    options: Mapping[str, Any],
+                    simple: bool, xml_root_tag: str,
+                    xml_extra_info: Mapping[str, str]) -> str:
+    """ Format the result into an XML response. With 'simple' exactly one
+        result will be output, otherwise a list.
+    """
+    root = ET.Element(xml_root_tag)
+    root.set('timestamp', dt.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +00:00'))
+    root.set('attribution', cl.OSM_ATTRIBUTION)
+    for k, v in xml_extra_info.items():
+        root.set(k, v)
+
+    if simple and not results:
+        ET.SubElement(root, 'error').text = 'Unable to geocode'
+
+    for result in results:
+        place = _create_base_entry(result, root, simple)
+
+        if not simple and options.get('icon_base_url', None):
+            icon = cl.ICONS.get(result.category)
+            if icon:
+                place.set('icon', icon)
+
+        if options.get('addressdetails', False) and result.address_rows:
+            _write_xml_address(ET.SubElement(root, 'addressparts') if simple else place,
+                               result.address_rows, result.country_code)
+
+        if options.get('extratags', False):
+            eroot = ET.SubElement(root if simple else place, 'extratags')
+            if result.extratags:
+                for k, v in result.extratags.items():
+                    ET.SubElement(eroot, 'tag', attrib={'key': k, 'value': v})
+
+        if options.get('namedetails', False):
+            eroot = ET.SubElement(root if simple else place, 'namedetails')
+            if result.names:
+                for k,v in result.names.items():
+                    ET.SubElement(eroot, 'name', attrib={'desc': k}).text = v
+
+    return '<?xml version="1.0" encoding="UTF-8" ?>\n' + ET.tostring(root, encoding='unicode')
--- a/src/nominatim_api/v1/helpers.py
+++ b/src/nominatim_api/v1/helpers.py
@@ -0,0 +1,201 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper function for parsing parameters and and outputting data
+specifically for the v1 version of the API.
+"""
+from typing import Tuple, Optional, Any, Dict, Iterable
+from itertools import chain
+import re
+
+from ..results import SearchResult, SearchResults, SourceTable
+from ..types import SearchDetails, GeometryFormat
+
+REVERSE_MAX_RANKS = [2, 2, 2,   # 0-2   Continent/Sea
+                     4, 4,      # 3-4   Country
+                     8,         # 5     State
+                     10, 10,    # 6-7   Region
+                     12, 12,    # 8-9   County
+                     16, 17,    # 10-11 City
+                     18,        # 12    Town
+                     19,        # 13    Village/Suburb
+                     22,        # 14    Hamlet/Neighbourhood
+                     25,        # 15    Localities
+                     26,        # 16    Major Streets
+                     27,        # 17    Minor Streets
+                     30         # 18    Building
+                    ]
+
+
+def zoom_to_rank(zoom: int) -> int:
+    """ Convert a zoom parameter into a rank according to the v1 API spec.
+    """
+    return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
+
+
+FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
+    'country': (4, 4),
+    'state': (8, 8),
+    'city': (14, 16),
+    'settlement': (8, 20)
+}
+
+
+def feature_type_to_rank(feature_type: Optional[str]) -> Tuple[int, int]:
+    """ Convert a feature type parameter to a tuple of
+        feature type name, minimum rank and maximum rank.
+    """
+    return FEATURE_TYPE_TO_RANK.get(feature_type, (0, 30))
+
+
+#pylint: disable=too-many-arguments,too-many-branches
+def extend_query_parts(queryparts: Dict[str, Any], details: Dict[str, Any],
+                       feature_type: Optional[str],
+                       namedetails: bool, extratags: bool,
+                       excluded: Iterable[str]) -> None:
+    """ Add parameters from details dictionary to the query parts
+        dictionary which is suitable as URL parameter dictionary.
+    """
+    parsed = SearchDetails.from_kwargs(details)
+    if parsed.geometry_output != GeometryFormat.NONE:
+        if GeometryFormat.GEOJSON in parsed.geometry_output:
+            queryparts['polygon_geojson'] = '1'
+        if GeometryFormat.KML in parsed.geometry_output:
+            queryparts['polygon_kml'] = '1'
+        if GeometryFormat.SVG in parsed.geometry_output:
+            queryparts['polygon_svg'] = '1'
+        if GeometryFormat.TEXT in parsed.geometry_output:
+            queryparts['polygon_text'] = '1'
+    if parsed.address_details:
+        queryparts['addressdetails'] = '1'
+    if namedetails:
+        queryparts['namedetails'] = '1'
+    if extratags:
+        queryparts['extratags'] = '1'
+    if parsed.geometry_simplification > 0.0:
+        queryparts['polygon_threshold'] = f"{parsed.geometry_simplification:.6g}"
+    if parsed.max_results != 10:
+        queryparts['limit'] = str(parsed.max_results)
+    if parsed.countries:
+        queryparts['countrycodes'] = ','.join(parsed.countries)
+    queryparts['exclude_place_ids'] = \
+        ','.join(chain(excluded, map(str, (e for e in parsed.excluded if e > 0))))
+    if parsed.viewbox:
+        queryparts['viewbox'] = ','.join(f"{c:.7g}" for c in parsed.viewbox.coords)
+    if parsed.bounded_viewbox:
+        queryparts['bounded'] = '1'
+    if not details['dedupe']:
+        queryparts['dedupe'] = '0'
+    if feature_type in FEATURE_TYPE_TO_RANK:
+        queryparts['featureType'] = feature_type
+
+
+def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
+    """ Remove results that look like duplicates.
+
+        Two results are considered the same if they have the same OSM ID
+        or if they have the same category, display name and rank.
+    """
+    osm_ids_done = set()
+    classification_done = set()
+    deduped = SearchResults()
+    for result in results:
+        if result.source_table == SourceTable.POSTCODE:
+            assert result.names and 'ref' in result.names
+            if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
+                continue
+        if result.source_table == SourceTable.PLACEX:
+            classification = (result.osm_object[0] if result.osm_object else None,
+                              result.category,
+                              result.display_name,
+                              result.rank_address)
+            if result.osm_object not in osm_ids_done \
+               and classification not in classification_done:
+                deduped.append(result)
+            osm_ids_done.add(result.osm_object)
+            classification_done.add(classification)
+        else:
+            deduped.append(result)
+        if len(deduped) >= max_results:
+            break
+
+    return deduped
+
+
+def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool:
+    return result.source_table == SourceTable.PLACEX \
+           and result.osm_object is not None \
+           and result.osm_object[0] == 'R' \
+           and result.category == ('boundary', 'postal_code') \
+           and result.names is not None \
+           and result.names.get('ref') == postcode
+
+
+def _deg(axis:str) -> str:
+    return f"(?P<{axis}_deg>\\d+\\.\\d+)°?"
+
+def _deg_min(axis: str) -> str:
+    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)[′']*"
+
+def _deg_min_sec(axis: str) -> str:
+    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)[\"″]*"
+
+COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?') for r in (
+    r"(?P<ns>[NS])\s*" + _deg('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg('lon'),
+    _deg('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg('lon') + r"\s*(?P<ew>[EW])",
+    r"(?P<ns>[NS])\s*" + _deg_min('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min('lon'),
+    _deg_min('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min('lon') + r"\s*(?P<ew>[EW])",
+    r"(?P<ns>[NS])\s*" + _deg_min_sec('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min_sec('lon'),
+    _deg_min_sec('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min_sec('lon') + r"\s*(?P<ew>[EW])",
+    r"\[?(?P<lat_deg>[+-]?\d+\.\d+)[\s,]+(?P<lon_deg>[+-]?\d+\.\d+)\]?"
+)]
+
+def extract_coords_from_query(query: str) -> Tuple[str, Optional[float], Optional[float]]:
+    """ Look for something that is formatted like a coordinate at the
+        beginning or end of the query. If found, extract the coordinate and
+        return the remaining query (or the empty string if the query
+        consisted of nothing but a coordinate).
+
+        Only the first match will be returned.
+    """
+    for regex in COORD_REGEX:
+        match = regex.fullmatch(query)
+        if match is None:
+            continue
+        groups = match.groupdict()
+        if not groups['pre'] or not groups['post']:
+            x = float(groups['lon_deg']) \
+                + float(groups.get('lon_min', 0.0)) / 60.0 \
+                + float(groups.get('lon_sec', 0.0)) / 3600.0
+            if groups.get('ew') == 'W':
+                x = -x
+            y = float(groups['lat_deg']) \
+                + float(groups.get('lat_min', 0.0)) / 60.0 \
+                + float(groups.get('lat_sec', 0.0)) / 3600.0
+            if groups.get('ns') == 'S':
+                y = -y
+            return groups['pre'] or groups['post'] or '', x, y
+
+    return query, None, None
+
+
+CATEGORY_REGEX = re.compile(r'(?P<pre>.*?)\[(?P<cls>[a-zA-Z_]+)=(?P<typ>[a-zA-Z_]+)\](?P<post>.*)')
+
+def extract_category_from_query(query: str) -> Tuple[str, Optional[str], Optional[str]]:
+    """ Extract a hidden category specification of the form '[key=value]' from
+        the query. If found, extract key and value  and
+        return the remaining query (or the empty string if the query
+        consisted of nothing but a category).
+
+        Only the first match will be returned.
+    """
+    match = CATEGORY_REGEX.search(query)
+    if match is not None:
+        return (match.group('pre').strip() + ' ' + match.group('post').strip()).strip(), \
+               match.group('cls'), match.group('typ')
+
+    return query, None, None
--- a/src/nominatim_api/v1/server_glue.py
+++ b/src/nominatim_api/v1/server_glue.py
@@ -0,0 +1,577 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Generic part of the server implementation of the v1 API.
+Combine with the scaffolding provided for the various Python ASGI frameworks.
+"""
+from typing import Optional, Any, Type, Callable, NoReturn, Dict, cast
+from functools import reduce
+import abc
+import dataclasses
+import math
+from urllib.parse import urlencode
+
+import sqlalchemy as sa
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from .. import logging as loglib
+from ..core import NominatimAPIAsync
+from .format import dispatch as formatting
+from .format import RawDataList
+from ..types import DataLayer, GeometryFormat, PlaceRef, PlaceID, OsmID, Point
+from ..status import StatusResult
+from ..results import DetailedResult, ReverseResults, SearchResult, SearchResults
+from ..localization import Locales
+from . import helpers
+
+CONTENT_TEXT = 'text/plain; charset=utf-8'
+CONTENT_XML = 'text/xml; charset=utf-8'
+CONTENT_HTML = 'text/html; charset=utf-8'
+CONTENT_JSON = 'application/json; charset=utf-8'
+
+CONTENT_TYPE = {'text': CONTENT_TEXT, 'xml': CONTENT_XML, 'debug': CONTENT_HTML}
+
+class ASGIAdaptor(abc.ABC):
+    """ Adapter class for the different ASGI frameworks.
+        Wraps functionality over concrete requests and responses.
+    """
+    content_type: str = CONTENT_TEXT
+
+    @abc.abstractmethod
+    def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
+        """ Return an input parameter as a string. If the parameter was
+            not provided, return the 'default' value.
+        """
+
+    @abc.abstractmethod
+    def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
+        """ Return a HTTP header parameter as a string. If the parameter was
+            not provided, return the 'default' value.
+        """
+
+
+    @abc.abstractmethod
+    def error(self, msg: str, status: int = 400) -> Exception:
+        """ Construct an appropriate exception from the given error message.
+            The exception must result in a HTTP error with the given status.
+        """
+
+
+    @abc.abstractmethod
+    def create_response(self, status: int, output: str, num_results: int) -> Any:
+        """ Create a response from the given parameters. The result will
+            be returned by the endpoint functions. The adaptor may also
+            return None when the response is created internally with some
+            different means.
+
+            The response must return the HTTP given status code 'status', set
+            the HTTP content-type headers to the string provided and the
+            body of the response to 'output'.
+        """
+
+    @abc.abstractmethod
+    def base_uri(self) -> str:
+        """ Return the URI of the original request.
+        """
+
+
+    @abc.abstractmethod
+    def config(self) -> Configuration:
+        """ Return the current configuration object.
+        """
+
+
+    def build_response(self, output: str, status: int = 200, num_results: int = 0) -> Any:
+        """ Create a response from the given output. Wraps a JSONP function
+            around the response, if necessary.
+        """
+        if self.content_type == CONTENT_JSON and status == 200:
+            jsonp = self.get('json_callback')
+            if jsonp is not None:
+                if any(not part.isidentifier() for part in jsonp.split('.')):
+                    self.raise_error('Invalid json_callback value')
+                output = f"{jsonp}({output})"
+                self.content_type = 'application/javascript; charset=utf-8'
+
+        return self.create_response(status, output, num_results)
+
+
+    def raise_error(self, msg: str, status: int = 400) -> NoReturn:
+        """ Raise an exception resulting in the given HTTP status and
+            message. The message will be formatted according to the
+            output format chosen by the request.
+        """
+        if self.content_type == CONTENT_XML:
+            msg = f"""<?xml version="1.0" encoding="UTF-8" ?>
+                      <error>
+                        <code>{status}</code>
+                        <message>{msg}</message>
+                      </error>
+                   """
+        elif self.content_type == CONTENT_JSON:
+            msg = f"""{{"error":{{"code":{status},"message":"{msg}"}}}}"""
+        elif self.content_type == CONTENT_HTML:
+            loglib.log().section('Execution error')
+            loglib.log().var_dump('Status', status)
+            loglib.log().var_dump('Message', msg)
+            msg = loglib.get_and_disable()
+
+        raise self.error(msg, status)
+
+
+    def get_int(self, name: str, default: Optional[int] = None) -> int:
+        """ Return an input parameter as an int. Raises an exception if
+            the parameter is given but not in an integer format.
+
+            If 'default' is given, then it will be returned when the parameter
+            is missing completely. When 'default' is None, an error will be
+            raised on a missing parameter.
+        """
+        value = self.get(name)
+
+        if value is None:
+            if default is not None:
+                return default
+
+            self.raise_error(f"Parameter '{name}' missing.")
+
+        try:
+            intval = int(value)
+        except ValueError:
+            self.raise_error(f"Parameter '{name}' must be a number.")
+
+        return intval
+
+
+    def get_float(self, name: str, default: Optional[float] = None) -> float:
+        """ Return an input parameter as a flaoting-point number. Raises an
+            exception if the parameter is given but not in an float format.
+
+            If 'default' is given, then it will be returned when the parameter
+            is missing completely. When 'default' is None, an error will be
+            raised on a missing parameter.
+        """
+        value = self.get(name)
+
+        if value is None:
+            if default is not None:
+                return default
+
+            self.raise_error(f"Parameter '{name}' missing.")
+
+        try:
+            fval = float(value)
+        except ValueError:
+            self.raise_error(f"Parameter '{name}' must be a number.")
+
+        if math.isnan(fval) or math.isinf(fval):
+            self.raise_error(f"Parameter '{name}' must be a number.")
+
+        return fval
+
+
+    def get_bool(self, name: str, default: Optional[bool] = None) -> bool:
+        """ Return an input parameter as bool. Only '0' is accepted as
+            an input for 'false' all other inputs will be interpreted as 'true'.
+
+            If 'default' is given, then it will be returned when the parameter
+            is missing completely. When 'default' is None, an error will be
+            raised on a missing parameter.
+        """
+        value = self.get(name)
+
+        if value is None:
+            if default is not None:
+                return default
+
+            self.raise_error(f"Parameter '{name}' missing.")
+
+        return value != '0'
+
+
+    def get_accepted_languages(self) -> str:
+        """ Return the accepted languages.
+        """
+        return self.get('accept-language')\
+               or self.get_header('accept-language')\
+               or self.config().DEFAULT_LANGUAGE
+
+
+    def setup_debugging(self) -> bool:
+        """ Set up collection of debug information if requested.
+
+            Return True when debugging was requested.
+        """
+        if self.get_bool('debug', False):
+            loglib.set_log_output('html')
+            self.content_type = CONTENT_HTML
+            return True
+
+        return False
+
+
+    def get_layers(self) -> Optional[DataLayer]:
+        """ Return a parsed version of the layer parameter.
+        """
+        param = self.get('layer', None)
+        if param is None:
+            return None
+
+        return cast(DataLayer,
+                    reduce(DataLayer.__or__,
+                           (getattr(DataLayer, s.upper()) for s in param.split(','))))
+
+
+    def parse_format(self, result_type: Type[Any], default: str) -> str:
+        """ Get and check the 'format' parameter and prepare the formatter.
+            `result_type` is the type of result to be returned by the function
+            and `default` the format value to assume when no parameter is present.
+        """
+        fmt = self.get('format', default=default)
+        assert fmt is not None
+
+        if not formatting.supports_format(result_type, fmt):
+            self.raise_error("Parameter 'format' must be one of: " +
+                              ', '.join(formatting.list_formats(result_type)))
+
+        self.content_type = CONTENT_TYPE.get(fmt, CONTENT_JSON)
+        return fmt
+
+
+    def parse_geometry_details(self, fmt: str) -> Dict[str, Any]:
+        """ Create details structure from the supplied geometry parameters.
+        """
+        numgeoms = 0
+        output = GeometryFormat.NONE
+        if self.get_bool('polygon_geojson', False):
+            output |= GeometryFormat.GEOJSON
+            numgeoms += 1
+        if fmt not in ('geojson', 'geocodejson'):
+            if self.get_bool('polygon_text', False):
+                output |= GeometryFormat.TEXT
+                numgeoms += 1
+            if self.get_bool('polygon_kml', False):
+                output |= GeometryFormat.KML
+                numgeoms += 1
+            if self.get_bool('polygon_svg', False):
+                output |= GeometryFormat.SVG
+                numgeoms += 1
+
+        if numgeoms > self.config().get_int('POLYGON_OUTPUT_MAX_TYPES'):
+            self.raise_error('Too many polygon output options selected.')
+
+        return {'address_details': True,
+                'geometry_simplification': self.get_float('polygon_threshold', 0.0),
+                'geometry_output': output
+               }
+
+
+async def status_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /status endpoint. See API docs for details.
+    """
+    result = await api.status()
+
+    fmt = params.parse_format(StatusResult, 'text')
+
+    if fmt == 'text' and result.status:
+        status_code = 500
+    else:
+        status_code = 200
+
+    return params.build_response(formatting.format_result(result, fmt, {}),
+                                 status=status_code)
+
+
+async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /details endpoint. See API docs for details.
+    """
+    fmt = params.parse_format(DetailedResult, 'json')
+    place_id = params.get_int('place_id', 0)
+    place: PlaceRef
+    if place_id:
+        place = PlaceID(place_id)
+    else:
+        osmtype = params.get('osmtype')
+        if osmtype is None:
+            params.raise_error("Missing ID parameter 'place_id' or 'osmtype'.")
+        place = OsmID(osmtype, params.get_int('osmid'), params.get('class'))
+
+    debug = params.setup_debugging()
+
+    locales = Locales.from_accept_languages(params.get_accepted_languages())
+
+    result = await api.details(place,
+                               address_details=params.get_bool('addressdetails', False),
+                               linked_places=params.get_bool('linkedplaces', True),
+                               parented_places=params.get_bool('hierarchy', False),
+                               keywords=params.get_bool('keywords', False),
+                               geometry_output = GeometryFormat.GEOJSON
+                                                 if params.get_bool('polygon_geojson', False)
+                                                 else GeometryFormat.NONE,
+                               locales=locales
+                              )
+
+    if debug:
+        return params.build_response(loglib.get_and_disable())
+
+    if result is None:
+        params.raise_error('No place with that OSM ID found.', status=404)
+
+    output = formatting.format_result(result, fmt,
+                 {'locales': locales,
+                  'group_hierarchy': params.get_bool('group_hierarchy', False),
+                  'icon_base_url': params.config().MAPICON_URL})
+
+    return params.build_response(output, num_results=1)
+
+
+async def reverse_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /reverse endpoint. See API docs for details.
+    """
+    fmt = params.parse_format(ReverseResults, 'xml')
+    debug = params.setup_debugging()
+    coord = Point(params.get_float('lon'), params.get_float('lat'))
+
+    details = params.parse_geometry_details(fmt)
+    details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
+    details['layers'] = params.get_layers()
+    details['locales'] = Locales.from_accept_languages(params.get_accepted_languages())
+
+    result = await api.reverse(coord, **details)
+
+    if debug:
+        return params.build_response(loglib.get_and_disable(), num_results=1 if result else 0)
+
+    if fmt == 'xml':
+        queryparts = {'lat': str(coord.lat), 'lon': str(coord.lon), 'format': 'xml'}
+        zoom = params.get('zoom', None)
+        if zoom:
+            queryparts['zoom'] = zoom
+        query = urlencode(queryparts)
+    else:
+        query = ''
+
+    fmt_options = {'query': query,
+                   'extratags': params.get_bool('extratags', False),
+                   'namedetails': params.get_bool('namedetails', False),
+                   'addressdetails': params.get_bool('addressdetails', True)}
+
+    output = formatting.format_result(ReverseResults([result] if result else []),
+                                      fmt, fmt_options)
+
+    return params.build_response(output, num_results=1 if result else 0)
+
+
+async def lookup_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /lookup endpoint. See API docs for details.
+    """
+    fmt = params.parse_format(SearchResults, 'xml')
+    debug = params.setup_debugging()
+    details = params.parse_geometry_details(fmt)
+    details['locales'] = Locales.from_accept_languages(params.get_accepted_languages())
+
+    places = []
+    for oid in (params.get('osm_ids') or '').split(','):
+        oid = oid.strip()
+        if len(oid) > 1 and oid[0] in 'RNWrnw' and oid[1:].isdigit():
+            places.append(OsmID(oid[0].upper(), int(oid[1:])))
+
+    if len(places) > params.config().get_int('LOOKUP_MAX_COUNT'):
+        params.raise_error('Too many object IDs.')
+
+    if places:
+        results = await api.lookup(places, **details)
+    else:
+        results = SearchResults()
+
+    if debug:
+        return params.build_response(loglib.get_and_disable(), num_results=len(results))
+
+    fmt_options = {'extratags': params.get_bool('extratags', False),
+                   'namedetails': params.get_bool('namedetails', False),
+                   'addressdetails': params.get_bool('addressdetails', True)}
+
+    output = formatting.format_result(results, fmt, fmt_options)
+
+    return params.build_response(output, num_results=len(results))
+
+
+async def _unstructured_search(query: str, api: NominatimAPIAsync,
+                              details: Dict[str, Any]) -> SearchResults:
+    if not query:
+        return SearchResults()
+
+    # Extract special format for coordinates from query.
+    query, x, y = helpers.extract_coords_from_query(query)
+    if x is not None:
+        assert y is not None
+        details['near'] = Point(x, y)
+        details['near_radius'] = 0.1
+
+    # If no query is left, revert to reverse search.
+    if x is not None and not query:
+        result = await api.reverse(details['near'], **details)
+        if not result:
+            return SearchResults()
+
+        return SearchResults(
+                  [SearchResult(**{f.name: getattr(result, f.name)
+                                   for f in dataclasses.fields(SearchResult)
+                                   if hasattr(result, f.name)})])
+
+    query, cls, typ = helpers.extract_category_from_query(query)
+    if cls is not None:
+        assert typ is not None
+        return await api.search_category([(cls, typ)], near_query=query, **details)
+
+    return await api.search(query, **details)
+
+
+async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /search endpoint. See API docs for details.
+    """
+    fmt = params.parse_format(SearchResults, 'jsonv2')
+    debug = params.setup_debugging()
+    details = params.parse_geometry_details(fmt)
+
+    details['countries']  = params.get('countrycodes', None)
+    details['excluded'] = params.get('exclude_place_ids', None)
+    details['viewbox'] = params.get('viewbox', None) or params.get('viewboxlbrt', None)
+    details['bounded_viewbox'] = params.get_bool('bounded', False)
+    details['dedupe'] = params.get_bool('dedupe', True)
+
+    max_results = max(1, min(50, params.get_int('limit', 10)))
+    details['max_results'] = max_results + min(10, max_results) \
+                             if details['dedupe'] else max_results
+
+    details['min_rank'], details['max_rank'] = \
+        helpers.feature_type_to_rank(params.get('featureType', ''))
+    if params.get('featureType', None) is not None:
+        details['layers'] = DataLayer.ADDRESS
+    else:
+        details['layers'] = params.get_layers()
+
+    details['locales'] = Locales.from_accept_languages(params.get_accepted_languages())
+
+    # unstructured query parameters
+    query = params.get('q', None)
+    # structured query parameters
+    queryparts = {}
+    for key in ('amenity', 'street', 'city', 'county', 'state', 'postalcode', 'country'):
+        details[key] = params.get(key, None)
+        if details[key]:
+            queryparts[key] = details[key]
+
+    try:
+        if query is not None:
+            if queryparts:
+                params.raise_error("Structured query parameters"
+                                   "(amenity, street, city, county, state, postalcode, country)"
+                                   " cannot be used together with 'q' parameter.")
+            queryparts['q'] = query
+            results = await _unstructured_search(query, api, details)
+        else:
+            query = ', '.join(queryparts.values())
+
+            results = await api.search_address(**details)
+    except UsageError as err:
+        params.raise_error(str(err))
+
+    if details['dedupe'] and len(results) > 1:
+        results = helpers.deduplicate_results(results, max_results)
+
+    if debug:
+        return params.build_response(loglib.get_and_disable(), num_results=len(results))
+
+    if fmt == 'xml':
+        helpers.extend_query_parts(queryparts, details,
+                                   params.get('featureType', ''),
+                                   params.get_bool('namedetails', False),
+                                   params.get_bool('extratags', False),
+                                   (str(r.place_id) for r in results if r.place_id))
+        queryparts['format'] = fmt
+
+        moreurl = params.base_uri() + '/search?' + urlencode(queryparts)
+    else:
+        moreurl = ''
+
+    fmt_options = {'query': query, 'more_url': moreurl,
+                   'exclude_place_ids': queryparts.get('exclude_place_ids'),
+                   'viewbox': queryparts.get('viewbox'),
+                   'extratags': params.get_bool('extratags', False),
+                   'namedetails': params.get_bool('namedetails', False),
+                   'addressdetails': params.get_bool('addressdetails', False)}
+
+    output = formatting.format_result(results, fmt, fmt_options)
+
+    return params.build_response(output, num_results=len(results))
+
+
+async def deletable_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /deletable endpoint.
+        This is a special endpoint that shows polygons that have been
+        deleted or are broken in the OSM data but are kept in the
+        Nominatim database to minimize disruption.
+    """
+    fmt = params.parse_format(RawDataList, 'json')
+
+    async with api.begin() as conn:
+        sql = sa.text(""" SELECT p.place_id, country_code,
+                                 name->'name' as name, i.*
+                          FROM placex p, import_polygon_delete i
+                          WHERE p.osm_id = i.osm_id AND p.osm_type = i.osm_type
+                                AND p.class = i.class AND p.type = i.type
+                      """)
+        results = RawDataList(r._asdict() for r in await conn.execute(sql))
+
+    return params.build_response(formatting.format_result(results, fmt, {}))
+
+
+async def polygons_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /polygons endpoint.
+        This is a special endpoint that shows polygons that have changed
+        their size but are kept in the Nominatim database with their
+        old area to minimize disruption.
+    """
+    fmt = params.parse_format(RawDataList, 'json')
+    sql_params: Dict[str, Any] = {
+        'days': params.get_int('days', -1),
+        'cls': params.get('class')
+    }
+    reduced = params.get_bool('reduced', False)
+
+    async with api.begin() as conn:
+        sql = sa.select(sa.text("""osm_type, osm_id, class, type,
+                                   name->'name' as name,
+                                   country_code, errormessage, updated"""))\
+                .select_from(sa.text('import_polygon_error'))
+        if sql_params['days'] > 0:
+            sql = sql.where(sa.text("updated > 'now'::timestamp - make_interval(days => :days)"))
+        if reduced:
+            sql = sql.where(sa.text("errormessage like 'Area reduced%'"))
+        if sql_params['cls'] is not None:
+            sql = sql.where(sa.text("class = :cls"))
+
+        sql = sql.order_by(sa.literal_column('updated').desc()).limit(1000)
+
+        results = RawDataList(r._asdict() for r in await conn.execute(sql, sql_params))
+
+    return params.build_response(formatting.format_result(results, fmt, {}))
+
+
+EndpointFunc = Callable[[NominatimAPIAsync, ASGIAdaptor], Any]
+
+ROUTES = [
+    ('status', status_endpoint),
+    ('details', details_endpoint),
+    ('reverse', reverse_endpoint),
+    ('lookup', lookup_endpoint),
+    ('search', search_endpoint),
+    ('deletable', deletable_endpoint),
+    ('polygons', polygons_endpoint),
+]
--- a/src/nominatim_api/version.py
+++ b/src/nominatim_api/version.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Version information for the Nominatim API.
+"""
+
+NOMINATIM_API_VERSION = '4.4.99'
--- a/src/nominatim_core/init.py
+++ b/src/nominatim_core/init.py
--- a/src/nominatim_core/config.py
+++ b/src/nominatim_core/config.py
@@ -0,0 +1,374 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Nominatim configuration accessor.
+"""
+from typing import Dict, Any, List, Mapping, Optional
+import importlib.util
+import logging
+import os
+import sys
+from pathlib import Path
+import json
+import yaml
+
+from dotenv import dotenv_values
+from psycopg2.extensions import parse_dsn
+
+from .typing import StrPath
+from .errors import UsageError
+from . import paths
+
+LOG = logging.getLogger()
+CONFIG_CACHE : Dict[str, Any] = {}
+
+def flatten_config_list(content: Any, section: str = '') -> List[Any]:
+    """ Flatten YAML configuration lists that contain include sections
+        which are lists themselves.
+    """
+    if not content:
+        return []
+
+    if not isinstance(content, list):
+        raise UsageError(f"List expected in section '{section}'.")
+
+    output = []
+    for ele in content:
+        if isinstance(ele, list):
+            output.extend(flatten_config_list(ele, section))
+        else:
+            output.append(ele)
+
+    return output
+
+
+class Configuration:
+    """ This class wraps access to the configuration settings
+        for the Nominatim instance in use.
+
+        All Nominatim configuration options are prefixed with 'NOMINATIM_' to
+        avoid conflicts with other environment variables. All settings can
+        be accessed as properties of the class under the same name as the
+        setting but with the `NOMINATIM_` prefix removed. In addition, there
+        are accessor functions that convert the setting values to types
+        other than string.
+    """
+
+    def __init__(self, project_dir: Optional[Path],
+                 environ: Optional[Mapping[str, str]] = None) -> None:
+        self.environ = environ or os.environ
+        self.project_dir = project_dir
+        self.config_dir = paths.CONFIG_DIR
+        self._config = dotenv_values(str(self.config_dir / 'env.defaults'))
+        if self.project_dir is not None and (self.project_dir / '.env').is_file():
+            self.project_dir = self.project_dir.resolve()
+            self._config.update(dotenv_values(str(self.project_dir / '.env')))
+
+        class _LibDirs:
+            module: Path
+            osm2pgsql: Path
+            php = paths.PHPLIB_DIR
+            sql = paths.SQLLIB_DIR
+            data = paths.DATA_DIR
+
+        self.lib_dir = _LibDirs()
+        self._private_plugins: Dict[str, object] = {}
+
+
+    def set_libdirs(self, **kwargs: StrPath) -> None:
+        """ Set paths to library functions and data.
+        """
+        for key, value in kwargs.items():
+            setattr(self.lib_dir, key, None if value is None else Path(value))
+
+
+    def __getattr__(self, name: str) -> str:
+        name = 'NOMINATIM_' + name
+
+        if name in self.environ:
+            return self.environ[name]
+
+        return self._config[name] or ''
+
+
+    def get_bool(self, name: str) -> bool:
+        """ Return the given configuration parameter as a boolean.
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              `True` for values of '1', 'yes' and 'true', `False` otherwise.
+        """
+        return getattr(self, name).lower() in ('1', 'yes', 'true')
+
+
+    def get_int(self, name: str) -> int:
+        """ Return the given configuration parameter as an int.
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              The configuration value converted to int.
+
+            Raises:
+              ValueError: when the value is not a number.
+        """
+        try:
+            return int(getattr(self, name))
+        except ValueError as exp:
+            LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
+            raise UsageError("Configuration error.") from exp
+
+
+    def get_str_list(self, name: str) -> Optional[List[str]]:
+        """ Return the given configuration parameter as a list of strings.
+            The values are assumed to be given as a comma-sparated list and
+            will be stripped before returning them. 
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              (List[str]): The comma-split parameter as a list. The
+                elements are stripped of leading and final spaces before
+                being returned.
+              (None): The configuration parameter was unset or empty.
+        """
+        raw = getattr(self, name)
+
+        return [v.strip() for v in raw.split(',')] if raw else None
+
+
+    def get_path(self, name: str) -> Optional[Path]:
+        """ Return the given configuration parameter as a Path.
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              (Path): A Path object of the parameter value.
+                  If a relative path is configured, then the function converts this
+                  into an absolute path with the project directory as root path.
+              (None): The configuration parameter was unset or empty.
+        """
+        value = getattr(self, name)
+        if not value:
+            return None
+
+        cfgpath = Path(value)
+
+        if not cfgpath.is_absolute():
+            assert self.project_dir is not None
+            cfgpath = self.project_dir / cfgpath
+
+        return cfgpath.resolve()
+
+
+    def get_libpq_dsn(self) -> str:
+        """ Get configured database DSN converted into the key/value format
+            understood by libpq and psycopg.
+        """
+        dsn = self.DATABASE_DSN
+
+        def quote_param(param: str) -> str:
+            key, val = param.split('=')
+            val = val.replace('\\', '\\\\').replace("'", "\\'")
+            if ' ' in val:
+                val = "'" + val + "'"
+            return key + '=' + val
+
+        if dsn.startswith('pgsql:'):
+            # Old PHP DSN format. Convert before returning.
+            return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
+
+        return dsn
+
+
+    def get_database_params(self) -> Mapping[str, str]:
+        """ Get the configured parameters for the database connection
+            as a mapping.
+        """
+        dsn = self.DATABASE_DSN
+
+        if dsn.startswith('pgsql:'):
+            return dict((p.split('=', 1) for p in dsn[6:].split(';')))
+
+        return parse_dsn(dsn)
+
+
+    def get_import_style_file(self) -> Path:
+        """ Return the import style file as a path object. Translates the
+            name of the standard styles automatically into a file in the
+            config style.
+        """
+        style = getattr(self, 'IMPORT_STYLE')
+
+        if style in ('admin', 'street', 'address', 'full', 'extratags'):
+            return self.config_dir / f'import-{style}.lua'
+
+        return self.find_config_file('', 'IMPORT_STYLE')
+
+
+    def get_os_env(self) -> Dict[str, str]:
+        """ Return a copy of the OS environment with the Nominatim configuration
+            merged in.
+        """
+        env = {k: v for k, v in self._config.items() if v is not None}
+        env.update(self.environ)
+
+        return env
+
+
+    def load_sub_configuration(self, filename: StrPath,
+                               config: Optional[str] = None) -> Any:
+        """ Load additional configuration from a file. `filename` is the name
+            of the configuration file. The file is first searched in the
+            project directory and then in the global settings directory.
+
+            If `config` is set, then the name of the configuration file can
+            be additionally given through a .env configuration option. When
+            the option is set, then the file will be exclusively loaded as set:
+            if the name is an absolute path, the file name is taken as is,
+            if the name is relative, it is taken to be relative to the
+            project directory.
+
+            The format of the file is determined from the filename suffix.
+            Currently only files with extension '.yaml' are supported.
+
+            YAML files support a special '!include' construct. When the
+            directive is given, the value is taken to be a filename, the file
+            is loaded using this function and added at the position in the
+            configuration tree.
+        """
+        configfile = self.find_config_file(filename, config)
+
+        if str(configfile) in CONFIG_CACHE:
+            return CONFIG_CACHE[str(configfile)]
+
+        if configfile.suffix in ('.yaml', '.yml'):
+            result = self._load_from_yaml(configfile)
+        elif configfile.suffix == '.json':
+            with configfile.open('r', encoding='utf-8') as cfg:
+                result = json.load(cfg)
+        else:
+            raise UsageError(f"Config file '{configfile}' has unknown format.")
+
+        CONFIG_CACHE[str(configfile)] = result
+        return result
+
+
+    def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
+        """ Load a Python module as a plugin.
+
+            The module_name may have three variants:
+
+            * A name without any '.' is assumed to be an internal module
+              and will be searched relative to `internal_path`.
+            * If the name ends in `.py`, module_name is assumed to be a
+              file name relative to the project directory.
+            * Any other name is assumed to be an absolute module name.
+
+            In either of the variants the module name must start with a letter.
+        """
+        if not module_name or not module_name[0].isidentifier():
+            raise UsageError(f'Invalid module name {module_name}')
+
+        if '.' not in module_name:
+            module_name = module_name.replace('-', '_')
+            full_module = f'{internal_path}.{module_name}'
+            return sys.modules.get(full_module) or importlib.import_module(full_module)
+
+        if module_name.endswith('.py'):
+            if self.project_dir is None or not (self.project_dir / module_name).exists():
+                raise UsageError(f"Cannot find module '{module_name}' in project directory.")
+
+            if module_name in self._private_plugins:
+                return self._private_plugins[module_name]
+
+            file_path = str(self.project_dir / module_name)
+            spec = importlib.util.spec_from_file_location(module_name, file_path)
+            if spec:
+                module = importlib.util.module_from_spec(spec)
+                # Do not add to global modules because there is no standard
+                # module name that Python can resolve.
+                self._private_plugins[module_name] = module
+                assert spec.loader is not None
+                spec.loader.exec_module(module)
+
+                return module
+
+        return sys.modules.get(module_name) or importlib.import_module(module_name)
+
+
+    def find_config_file(self, filename: StrPath,
+                         config: Optional[str] = None) -> Path:
+        """ Resolve the location of a configuration file given a filename and
+            an optional configuration option with the file name.
+            Raises a UsageError when the file cannot be found or is not
+            a regular file.
+        """
+        if config is not None:
+            cfg_value = getattr(self, config)
+            if cfg_value:
+                cfg_filename = Path(cfg_value)
+
+                if cfg_filename.is_absolute():
+                    cfg_filename = cfg_filename.resolve()
+
+                    if not cfg_filename.is_file():
+                        LOG.fatal("Cannot find config file '%s'.", cfg_filename)
+                        raise UsageError("Config file not found.")
+
+                    return cfg_filename
+
+                filename = cfg_filename
+
+
+        search_paths = [self.project_dir, self.config_dir]
+        for path in search_paths:
+            if path is not None and (path / filename).is_file():
+                return path / filename
+
+        LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s",
+                  filename, search_paths)
+        raise UsageError("Config file not found.")
+
+
+    def _load_from_yaml(self, cfgfile: Path) -> Any:
+        """ Load a YAML configuration file. This installs a special handler that
+            allows to include other YAML files using the '!include' operator.
+        """
+        yaml.add_constructor('!include', self._yaml_include_representer,
+                             Loader=yaml.SafeLoader)
+        return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
+
+
+    def _yaml_include_representer(self, loader: Any, node: yaml.Node) -> Any:
+        """ Handler for the '!include' operator in YAML files.
+
+            When the filename is relative, then the file is first searched in the
+            project directory and then in the global settings directory.
+        """
+        fname = loader.construct_scalar(node)
+
+        if Path(fname).is_absolute():
+            configfile = Path(fname)
+        else:
+            configfile = self.find_config_file(loader.construct_scalar(node))
+
+        if configfile.suffix != '.yaml':
+            LOG.fatal("Format error while reading '%s': only YAML format supported.",
+                      configfile)
+            raise UsageError("Cannot handle config file format.")
+
+        return yaml.safe_load(configfile.read_text(encoding='utf-8'))
--- a/src/nominatim_core/db/init.py
+++ b/src/nominatim_core/db/init.py
--- a/src/nominatim_core/db/async_connection.py
+++ b/src/nominatim_core/db/async_connection.py
@@ -0,0 +1,236 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+""" Non-blocking database connections.
+"""
+from typing import Callable, Any, Optional, Iterator, Sequence
+import logging
+import select
+import time
+
+import psycopg2
+from psycopg2.extras import wait_select
+
+# psycopg2 emits different exceptions pre and post 2.8. Detect if the new error
+# module is available and adapt the error handling accordingly.
+try:
+    import psycopg2.errors # pylint: disable=no-name-in-module,import-error
+    __has_psycopg2_errors__ = True
+except ImportError:
+    __has_psycopg2_errors__ = False
+
+from ..typing import T_cursor, Query
+
+LOG = logging.getLogger()
+
+class DeadlockHandler:
+    """ Context manager that catches deadlock exceptions and calls
+        the given handler function. All other exceptions are passed on
+        normally.
+    """
+
+    def __init__(self, handler: Callable[[], None], ignore_sql_errors: bool = False) -> None:
+        self.handler = handler
+        self.ignore_sql_errors = ignore_sql_errors
+
+    def __enter__(self) -> 'DeadlockHandler':
+        return self
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool:
+        if __has_psycopg2_errors__:
+            if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
+                self.handler()
+                return True
+        elif exc_type == psycopg2.extensions.TransactionRollbackError \
+             and exc_value.pgcode == '40P01':
+            self.handler()
+            return True
+
+        if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
+            LOG.info("SQL error ignored: %s", exc_value)
+            return True
+
+        return False
+
+
+class DBConnection:
+    """ A single non-blocking database connection.
+    """
+
+    def __init__(self, dsn: str,
+                 cursor_factory: Optional[Callable[..., T_cursor]] = None,
+                 ignore_sql_errors: bool = False) -> None:
+        self.dsn = dsn
+
+        self.current_query: Optional[Query] = None
+        self.current_params: Optional[Sequence[Any]] = None
+        self.ignore_sql_errors = ignore_sql_errors
+
+        self.conn: Optional['psycopg2._psycopg.connection'] = None
+        self.cursor: Optional['psycopg2._psycopg.cursor'] = None
+        self.connect(cursor_factory=cursor_factory)
+
+    def close(self) -> None:
+        """ Close all open connections. Does not wait for pending requests.
+        """
+        if self.conn is not None:
+            if self.cursor is not None:
+                self.cursor.close()
+                self.cursor = None
+            self.conn.close()
+
+        self.conn = None
+
+    def connect(self, cursor_factory: Optional[Callable[..., T_cursor]] = None) -> None:
+        """ (Re)connect to the database. Creates an asynchronous connection
+            with JIT and parallel processing disabled. If a connection was
+            already open, it is closed and a new connection established.
+            The caller must ensure that no query is pending before reconnecting.
+        """
+        self.close()
+
+        # Use a dict to hand in the parameters because async is a reserved
+        # word in Python3.
+        self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True}) # type: ignore
+        assert self.conn
+        self.wait()
+
+        if cursor_factory is not None:
+            self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
+        else:
+            self.cursor = self.conn.cursor()
+        # Disable JIT and parallel workers as they are known to cause problems.
+        # Update pg_settings instead of using SET because it does not yield
+        # errors on older versions of Postgres where the settings are not
+        # implemented.
+        self.perform(
+            """ UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost';
+                UPDATE pg_settings SET setting = 0
+                   WHERE name = 'max_parallel_workers_per_gather';""")
+        self.wait()
+
+    def _deadlock_handler(self) -> None:
+        LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params))
+        assert self.cursor is not None
+        assert self.current_query is not None
+        assert self.current_params is not None
+
+        self.cursor.execute(self.current_query, self.current_params)
+
+    def wait(self) -> None:
+        """ Block until any pending operation is done.
+        """
+        while True:
+            with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
+                wait_select(self.conn)
+                self.current_query = None
+                return
+
+    def perform(self, sql: Query, args: Optional[Sequence[Any]] = None) -> None:
+        """ Send SQL query to the server. Returns immediately without
+            blocking.
+        """
+        assert self.cursor is not None
+        self.current_query = sql
+        self.current_params = args
+        self.cursor.execute(sql, args)
+
+    def fileno(self) -> int:
+        """ File descriptor to wait for. (Makes this class select()able.)
+        """
+        assert self.conn is not None
+        return self.conn.fileno()
+
+    def is_done(self) -> bool:
+        """ Check if the connection is available for a new query.
+
+            Also checks if the previous query has run into a deadlock.
+            If so, then the previous query is repeated.
+        """
+        assert self.conn is not None
+
+        if self.current_query is None:
+            return True
+
+        with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
+            if self.conn.poll() == psycopg2.extensions.POLL_OK:
+                self.current_query = None
+                return True
+
+        return False
+
+
+class WorkerPool:
+    """ A pool of asynchronous database connections.
+
+        The pool may be used as a context manager.
+    """
+    REOPEN_CONNECTIONS_AFTER = 100000
+
+    def __init__(self, dsn: str, pool_size: int, ignore_sql_errors: bool = False) -> None:
+        self.threads = [DBConnection(dsn, ignore_sql_errors=ignore_sql_errors)
+                        for _ in range(pool_size)]
+        self.free_workers = self._yield_free_worker()
+        self.wait_time = 0.0
+
+
+    def finish_all(self) -> None:
+        """ Wait for all connection to finish.
+        """
+        for thread in self.threads:
+            while not thread.is_done():
+                thread.wait()
+
+        self.free_workers = self._yield_free_worker()
+
+    def close(self) -> None:
+        """ Close all connections and clear the pool.
+        """
+        for thread in self.threads:
+            thread.close()
+        self.threads = []
+        self.free_workers = iter([])
+
+
+    def next_free_worker(self) -> DBConnection:
+        """ Get the next free connection.
+        """
+        return next(self.free_workers)
+
+
+    def _yield_free_worker(self) -> Iterator[DBConnection]:
+        ready = self.threads
+        command_stat = 0
+        while True:
+            for thread in ready:
+                if thread.is_done():
+                    command_stat += 1
+                    yield thread
+
+            if command_stat > self.REOPEN_CONNECTIONS_AFTER:
+                self._reconnect_threads()
+                ready = self.threads
+                command_stat = 0
+            else:
+                tstart = time.time()
+                _, ready, _ = select.select([], self.threads, [])
+                self.wait_time += time.time() - tstart
+
+
+    def _reconnect_threads(self) -> None:
+        for thread in self.threads:
+            while not thread.is_done():
+                thread.wait()
+            thread.connect()
+
+
+    def __enter__(self) -> 'WorkerPool':
+        return self
+
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.finish_all()
+        self.close()
--- a/src/nominatim_core/db/async_core_library.py
+++ b/src/nominatim_core/db/async_core_library.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Import the base library to use with asynchronous SQLAlchemy.
+"""
+# pylint: disable=invalid-name
+
+from typing import Any
+
+try:
+    import psycopg
+    PGCORE_LIB = 'psycopg'
+    PGCORE_ERROR: Any = psycopg.Error
+except ModuleNotFoundError:
+    import asyncpg
+    PGCORE_LIB = 'asyncpg'
+    PGCORE_ERROR = asyncpg.PostgresError
--- a/src/nominatim_core/db/connection.py
+++ b/src/nominatim_core/db/connection.py
@@ -0,0 +1,254 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Specialised connection and cursor functions.
+"""
+from typing import Optional, Any, Callable, ContextManager, Dict, cast, overload, Tuple, Iterable
+import contextlib
+import logging
+import os
+
+import psycopg2
+import psycopg2.extensions
+import psycopg2.extras
+from psycopg2 import sql as pysql
+
+from ..typing import SysEnv, Query, T_cursor
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+class Cursor(psycopg2.extras.DictCursor):
+    """ A cursor returning dict-like objects and providing specialised
+        execution functions.
+    """
+    # pylint: disable=arguments-renamed,arguments-differ
+    def execute(self, query: Query, args: Any = None) -> None:
+        """ Query execution that logs the SQL query when debugging is enabled.
+        """
+        if LOG.isEnabledFor(logging.DEBUG):
+            LOG.debug(self.mogrify(query, args).decode('utf-8'))
+
+        super().execute(query, args)
+
+
+    def execute_values(self, sql: Query, argslist: Iterable[Tuple[Any, ...]],
+                       template: Optional[Query] = None) -> None:
+        """ Wrapper for the psycopg2 convenience function to execute
+            SQL for a list of values.
+        """
+        LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
+
+        psycopg2.extras.execute_values(self, sql, argslist, template=template)
+
+
+    def scalar(self, sql: Query, args: Any = None) -> Any:
+        """ Execute query that returns a single value. The value is returned.
+            If the query yields more than one row, a ValueError is raised.
+        """
+        self.execute(sql, args)
+
+        if self.rowcount != 1:
+            raise RuntimeError("Query did not return a single row.")
+
+        result = self.fetchone()
+        assert result is not None
+
+        return result[0]
+
+
+    def drop_table(self, name: str, if_exists: bool = True, cascade: bool = False) -> None:
+        """ Drop the table with the given name.
+            Set `if_exists` to False if a non-existent table should raise
+            an exception instead of just being ignored. If 'cascade' is set
+            to True then all dependent tables are deleted as well.
+        """
+        sql = 'DROP TABLE '
+        if if_exists:
+            sql += 'IF EXISTS '
+        sql += '{}'
+        if cascade:
+            sql += ' CASCADE'
+
+        self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
+
+
+class Connection(psycopg2.extensions.connection):
+    """ A connection that provides the specialised cursor by default and
+        adds convenience functions for administrating the database.
+    """
+    @overload # type: ignore[override]
+    def cursor(self) -> Cursor:
+        ...
+
+    @overload
+    def cursor(self, name: str) -> Cursor:
+        ...
+
+    @overload
+    def cursor(self, cursor_factory: Callable[..., T_cursor]) -> T_cursor:
+        ...
+
+    def cursor(self, cursor_factory  = Cursor, **kwargs): # type: ignore
+        """ Return a new cursor. By default the specialised cursor is returned.
+        """
+        return super().cursor(cursor_factory=cursor_factory, **kwargs)
+
+
+    def table_exists(self, table: str) -> bool:
+        """ Check that a table with the given name exists in the database.
+        """
+        with self.cursor() as cur:
+            num = cur.scalar("""SELECT count(*) FROM pg_tables
+                                WHERE tablename = %s and schemaname = 'public'""", (table, ))
+            return num == 1 if isinstance(num, int) else False
+
+
+    def table_has_column(self, table: str, column: str) -> bool:
+        """ Check if the table 'table' exists and has a column with name 'column'.
+        """
+        with self.cursor() as cur:
+            has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
+                                       WHERE table_name = %s
+                                             and column_name = %s""",
+                                    (table, column))
+            return has_column > 0 if isinstance(has_column, int) else False
+
+
+    def index_exists(self, index: str, table: Optional[str] = None) -> bool:
+        """ Check that an index with the given name exists in the database.
+            If table is not None then the index must relate to the given
+            table.
+        """
+        with self.cursor() as cur:
+            cur.execute("""SELECT tablename FROM pg_indexes
+                           WHERE indexname = %s and schemaname = 'public'""", (index, ))
+            if cur.rowcount == 0:
+                return False
+
+            if table is not None:
+                row = cur.fetchone()
+                if row is None or not isinstance(row[0], str):
+                    return False
+                return row[0] == table
+
+        return True
+
+
+    def drop_table(self, name: str, if_exists: bool = True, cascade: bool = False) -> None:
+        """ Drop the table with the given name.
+            Set `if_exists` to False if a non-existent table should raise
+            an exception instead of just being ignored.
+        """
+        with self.cursor() as cur:
+            cur.drop_table(name, if_exists, cascade)
+        self.commit()
+
+
+    def server_version_tuple(self) -> Tuple[int, int]:
+        """ Return the server version as a tuple of (major, minor).
+            Converts correctly for pre-10 and post-10 PostgreSQL versions.
+        """
+        version = self.server_version
+        if version < 100000:
+            return (int(version / 10000), int((version % 10000) / 100))
+
+        return (int(version / 10000), version % 10000)
+
+
+    def postgis_version_tuple(self) -> Tuple[int, int]:
+        """ Return the postgis version installed in the database as a
+            tuple of (major, minor). Assumes that the PostGIS extension
+            has been installed already.
+        """
+        with self.cursor() as cur:
+            version = cur.scalar('SELECT postgis_lib_version()')
+
+        version_parts = version.split('.')
+        if len(version_parts) < 2:
+            raise UsageError(f"Error fetching Postgis version. Bad format: {version}")
+
+        return (int(version_parts[0]), int(version_parts[1]))
+
+
+    def extension_loaded(self, extension_name: str) -> bool:
+        """ Return True if the hstore extension is loaded in the database.
+        """
+        with self.cursor() as cur:
+            cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
+            return cur.rowcount > 0
+
+
+class ConnectionContext(ContextManager[Connection]):
+    """ Context manager of the connection that also provides direct access
+        to the underlying connection.
+    """
+    connection: Connection
+
+def connect(dsn: str) -> ConnectionContext:
+    """ Open a connection to the database using the specialised connection
+        factory. The returned object may be used in conjunction with 'with'.
+        When used outside a context manager, use the `connection` attribute
+        to get the connection.
+    """
+    try:
+        conn = psycopg2.connect(dsn, connection_factory=Connection)
+        ctxmgr = cast(ConnectionContext, contextlib.closing(conn))
+        ctxmgr.connection = conn
+        return ctxmgr
+    except psycopg2.OperationalError as err:
+        raise UsageError(f"Cannot connect to database: {err}") from err
+
+
+# Translation from PG connection string parameters to PG environment variables.
+# Derived from https://www.postgresql.org/docs/current/libpq-envars.html.
+_PG_CONNECTION_STRINGS = {
+    'host': 'PGHOST',
+    'hostaddr': 'PGHOSTADDR',
+    'port': 'PGPORT',
+    'dbname': 'PGDATABASE',
+    'user': 'PGUSER',
+    'password': 'PGPASSWORD',
+    'passfile': 'PGPASSFILE',
+    'channel_binding': 'PGCHANNELBINDING',
+    'service': 'PGSERVICE',
+    'options': 'PGOPTIONS',
+    'application_name': 'PGAPPNAME',
+    'sslmode': 'PGSSLMODE',
+    'requiressl': 'PGREQUIRESSL',
+    'sslcompression': 'PGSSLCOMPRESSION',
+    'sslcert': 'PGSSLCERT',
+    'sslkey': 'PGSSLKEY',
+    'sslrootcert': 'PGSSLROOTCERT',
+    'sslcrl': 'PGSSLCRL',
+    'requirepeer': 'PGREQUIREPEER',
+    'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION',
+    'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
+    'gssencmode': 'PGGSSENCMODE',
+    'krbsrvname': 'PGKRBSRVNAME',
+    'gsslib': 'PGGSSLIB',
+    'connect_timeout': 'PGCONNECT_TIMEOUT',
+    'target_session_attrs': 'PGTARGETSESSIONATTRS',
+}
+
+
+def get_pg_env(dsn: str,
+               base_env: Optional[SysEnv] = None) -> Dict[str, str]:
+    """ Return a copy of `base_env` with the environment variables for
+        PostgreSQL set up from the given database connection string.
+        If `base_env` is None, then the OS environment is used as a base
+        environment.
+    """
+    env = dict(base_env if base_env is not None else os.environ)
+
+    for param, value in psycopg2.extensions.parse_dsn(dsn).items():
+        if param in _PG_CONNECTION_STRINGS:
+            env[_PG_CONNECTION_STRINGS[param]] = value
+        else:
+            LOG.error("Unknown connection parameter '%s' ignored.", param)
+
+    return env
--- a/src/nominatim_core/db/properties.py
+++ b/src/nominatim_core/db/properties.py
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Query and access functions for the in-database property table.
+"""
+from typing import Optional, cast
+
+from .connection import Connection
+
+def set_property(conn: Connection, name: str, value: str) -> None:
+    """ Add or replace the property with the given name.
+    """
+    with conn.cursor() as cur:
+        cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
+                    (name, ))
+
+        if cur.rowcount == 0:
+            sql = 'INSERT INTO nominatim_properties (value, property) VALUES (%s, %s)'
+        else:
+            sql = 'UPDATE nominatim_properties SET value = %s WHERE property = %s'
+
+        cur.execute(sql, (value, name))
+    conn.commit()
+
+
+def get_property(conn: Connection, name: str) -> Optional[str]:
+    """ Return the current value of the given property or None if the property
+        is not set.
+    """
+    if not conn.table_exists('nominatim_properties'):
+        return None
+
+    with conn.cursor() as cur:
+        cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
+                    (name, ))
+
+        if cur.rowcount == 0:
+            return None
+
+        result = cur.fetchone()
+        assert result is not None
+
+        return cast(Optional[str], result[0])
--- a/src/nominatim_core/db/sql_preprocessor.py
+++ b/src/nominatim_core/db/sql_preprocessor.py
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Preprocessing of SQL files.
+"""
+from typing import Set, Dict, Any, cast
+import jinja2
+
+from .connection import Connection
+from .async_connection import WorkerPool
+from ..config import Configuration
+
+def _get_partitions(conn: Connection) -> Set[int]:
+    """ Get the set of partitions currently in use.
+    """
+    with conn.cursor() as cur:
+        cur.execute('SELECT DISTINCT partition FROM country_name')
+        partitions = set([0])
+        for row in cur:
+            partitions.add(row[0])
+
+    return partitions
+
+
+def _get_tables(conn: Connection) -> Set[str]:
+    """ Return the set of tables currently in use.
+    """
+    with conn.cursor() as cur:
+        cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
+
+        return set((row[0] for row in list(cur)))
+
+def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
+    """ Returns the version of the slim middle tables.
+    """
+    if 'osm2pgsql_properties' not in tables:
+        return '1'
+
+    with conn.cursor() as cur:
+        cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+        row = cur.fetchone()
+
+        return cast(str, row[0]) if row is not None else '1'
+
+
+def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
+    """ Returns a dict with tablespace expressions for the different tablespace
+        kinds depending on whether a tablespace is configured or not.
+    """
+    out = {}
+    for subset in ('ADDRESS', 'SEARCH', 'AUX'):
+        for kind in ('DATA', 'INDEX'):
+            tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
+            if tspace:
+                tspace = f'TABLESPACE "{tspace}"'
+            out[f'{subset.lower()}_{kind.lower()}'] = tspace
+
+    return out
+
+
+def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
+    """ Set up a dictionary with various optional Postgresql/Postgis features that
+        depend on the database version.
+    """
+    pg_version = conn.server_version_tuple()
+    postgis_version = conn.postgis_version_tuple()
+    pg11plus = pg_version >= (11, 0, 0)
+    ps3 = postgis_version >= (3, 0)
+    return {
+        'has_index_non_key_column': pg11plus,
+        'spgist_geom' : 'SPGIST' if pg11plus and ps3 else 'GIST'
+    }
+
+class SQLPreprocessor:
+    """ A environment for preprocessing SQL files from the
+        lib-sql directory.
+
+        The preprocessor provides a number of default filters and variables.
+        The variables may be overwritten when rendering an SQL file.
+
+        The preprocessing is currently based on the jinja2 templating library
+        and follows its syntax.
+    """
+
+    def __init__(self, conn: Connection, config: Configuration) -> None:
+        self.env = jinja2.Environment(autoescape=False,
+                                      loader=jinja2.FileSystemLoader(str(config.lib_dir.sql)))
+
+        db_info: Dict[str, Any] = {}
+        db_info['partitions'] = _get_partitions(conn)
+        db_info['tables'] = _get_tables(conn)
+        db_info['reverse_only'] = 'search_name' not in db_info['tables']
+        db_info['tablespace'] = _setup_tablespace_sql(config)
+        db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
+
+        self.env.globals['config'] = config
+        self.env.globals['db'] = db_info
+        self.env.globals['postgres'] = _setup_postgresql_features(conn)
+
+
+    def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
+        """ Execute the given SQL template string on the connection.
+            The keyword arguments may supply additional parameters
+            for preprocessing.
+        """
+        sql = self.env.from_string(template).render(**kwargs)
+
+        with conn.cursor() as cur:
+            cur.execute(sql)
+        conn.commit()
+
+
+    def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
+        """ Execute the given SQL file on the connection. The keyword arguments
+            may supply additional parameters for preprocessing.
+        """
+        sql = self.env.get_template(name).render(**kwargs)
+
+        with conn.cursor() as cur:
+            cur.execute(sql)
+        conn.commit()
+
+
+    def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
+                              **kwargs: Any) -> None:
+        """ Execute the given SQL files using parallel asynchronous connections.
+            The keyword arguments may supply additional parameters for
+            preprocessing.
+
+            After preprocessing the SQL code is cut at lines containing only
+            '---'. Each chunk is sent to one of the `num_threads` workers.
+        """
+        sql = self.env.get_template(name).render(**kwargs)
+
+        parts = sql.split('\n---\n')
+
+        with WorkerPool(dsn, num_threads) as pool:
+            for part in parts:
+                pool.next_free_worker().perform(part)
--- a/src/nominatim_core/db/sqlalchemy_schema.py
+++ b/src/nominatim_core/db/sqlalchemy_schema.py
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+SQLAlchemy definitions for all tables used by the frontend.
+"""
+import sqlalchemy as sa
+
+from .sqlalchemy_types import Geometry, KeyValueStore, IntArray
+
+#pylint: disable=too-many-instance-attributes
+class SearchTables:
+    """ Data class that holds the tables of the Nominatim database.
+
+        This schema strictly reflects the read-access view of the database.
+        Any data used for updates only will not be visible.
+    """
+
+    def __init__(self, meta: sa.MetaData) -> None:
+        self.meta = meta
+
+        self.import_status = sa.Table('import_status', meta,
+            sa.Column('lastimportdate', sa.DateTime(True), nullable=False),
+            sa.Column('sequence_id', sa.Integer),
+            sa.Column('indexed', sa.Boolean))
+
+        self.properties = sa.Table('nominatim_properties', meta,
+            sa.Column('property', sa.Text, nullable=False),
+            sa.Column('value', sa.Text))
+
+        self.placex = sa.Table('placex', meta,
+            sa.Column('place_id', sa.BigInteger, nullable=False),
+            sa.Column('parent_place_id', sa.BigInteger),
+            sa.Column('linked_place_id', sa.BigInteger),
+            sa.Column('importance', sa.Float),
+            sa.Column('indexed_date', sa.DateTime),
+            sa.Column('rank_address', sa.SmallInteger),
+            sa.Column('rank_search', sa.SmallInteger),
+            sa.Column('indexed_status', sa.SmallInteger),
+            sa.Column('osm_type', sa.String(1), nullable=False),
+            sa.Column('osm_id', sa.BigInteger, nullable=False),
+            sa.Column('class', sa.Text, nullable=False, key='class_'),
+            sa.Column('type', sa.Text, nullable=False),
+            sa.Column('admin_level', sa.SmallInteger),
+            sa.Column('name', KeyValueStore),
+            sa.Column('address', KeyValueStore),
+            sa.Column('extratags', KeyValueStore),
+            sa.Column('geometry', Geometry, nullable=False),
+            sa.Column('wikipedia', sa.Text),
+            sa.Column('country_code', sa.String(2)),
+            sa.Column('housenumber', sa.Text),
+            sa.Column('postcode', sa.Text),
+            sa.Column('centroid', Geometry))
+
+        self.addressline = sa.Table('place_addressline', meta,
+            sa.Column('place_id', sa.BigInteger),
+            sa.Column('address_place_id', sa.BigInteger),
+            sa.Column('distance', sa.Float),
+            sa.Column('fromarea', sa.Boolean),
+            sa.Column('isaddress', sa.Boolean))
+
+        self.postcode = sa.Table('location_postcode', meta,
+            sa.Column('place_id', sa.BigInteger),
+            sa.Column('parent_place_id', sa.BigInteger),
+            sa.Column('rank_search', sa.SmallInteger),
+            sa.Column('rank_address', sa.SmallInteger),
+            sa.Column('indexed_status', sa.SmallInteger),
+            sa.Column('indexed_date', sa.DateTime),
+            sa.Column('country_code', sa.String(2)),
+            sa.Column('postcode', sa.Text),
+            sa.Column('geometry', Geometry))
+
+        self.osmline = sa.Table('location_property_osmline', meta,
+            sa.Column('place_id', sa.BigInteger, nullable=False),
+            sa.Column('osm_id', sa.BigInteger),
+            sa.Column('parent_place_id', sa.BigInteger),
+            sa.Column('indexed_date', sa.DateTime),
+            sa.Column('startnumber', sa.Integer),
+            sa.Column('endnumber', sa.Integer),
+            sa.Column('step', sa.SmallInteger),
+            sa.Column('indexed_status', sa.SmallInteger),
+            sa.Column('linegeo', Geometry),
+            sa.Column('address', KeyValueStore),
+            sa.Column('postcode', sa.Text),
+            sa.Column('country_code', sa.String(2)))
+
+        self.country_name = sa.Table('country_name', meta,
+            sa.Column('country_code', sa.String(2)),
+            sa.Column('name', KeyValueStore),
+            sa.Column('derived_name', KeyValueStore),
+            sa.Column('partition', sa.Integer))
+
+        self.country_grid = sa.Table('country_osm_grid', meta,
+            sa.Column('country_code', sa.String(2)),
+            sa.Column('area', sa.Float),
+            sa.Column('geometry', Geometry))
+
+        # The following tables are not necessarily present.
+        self.search_name = sa.Table('search_name', meta,
+            sa.Column('place_id', sa.BigInteger),
+            sa.Column('importance', sa.Float),
+            sa.Column('search_rank', sa.SmallInteger),
+            sa.Column('address_rank', sa.SmallInteger),
+            sa.Column('name_vector', IntArray),
+            sa.Column('nameaddress_vector', IntArray),
+            sa.Column('country_code', sa.String(2)),
+            sa.Column('centroid', Geometry))
+
+        self.tiger = sa.Table('location_property_tiger', meta,
+            sa.Column('place_id', sa.BigInteger),
+            sa.Column('parent_place_id', sa.BigInteger),
+            sa.Column('startnumber', sa.Integer),
+            sa.Column('endnumber', sa.Integer),
+            sa.Column('step', sa.SmallInteger),
+            sa.Column('linegeo', Geometry),
+            sa.Column('postcode', sa.Text))
--- a/src/nominatim_core/db/sqlalchemy_types/init.py
+++ b/src/nominatim_core/db/sqlalchemy_types/init.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Module with custom types for SQLAlchemy
+"""
+
+# See also https://github.com/PyCQA/pylint/issues/6006
+# pylint: disable=useless-import-alias
+
+from .geometry import (Geometry as Geometry)
+from .int_array import (IntArray as IntArray)
+from .key_value import (KeyValueStore as KeyValueStore)
+from .json import (Json as Json)
--- a/src/nominatim_core/db/sqlalchemy_types/geometry.py
+++ b/src/nominatim_core/db/sqlalchemy_types/geometry.py
@@ -0,0 +1,308 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom types for SQLAlchemy.
+"""
+from __future__ import annotations
+from typing import Callable, Any, cast
+import sys
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+from sqlalchemy import types
+
+from ...typing import SaColumn, SaBind
+
+#pylint: disable=all
+
+class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):
+    """ Function to compute the spherical distance in meters.
+    """
+    type = sa.Float()
+    name = 'Geometry_DistanceSpheroid'
+    inherit_cache = True
+
+
+@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
+def _default_distance_spheroid(element: Geometry_DistanceSpheroid,
+                               compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_DistanceSpheroid(%s,"\
+           " 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
+             % compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _spatialite_distance_spheroid(element: Geometry_DistanceSpheroid,
+                                  compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)
+
+
+class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the geometry is a line or multiline.
+    """
+    name = 'Geometry_IsLineLike'
+    inherit_cache = True
+
+
+@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
+def _default_is_line_like(element: Geometry_IsLineLike,
+                          compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
+               compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_is_line_like(element: Geometry_IsLineLike,
+                         compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
+               compiler.process(element.clauses, **kw)
+
+
+class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the geometry is a polygon or multipolygon.
+    """
+    name = 'Geometry_IsLineLike'
+    inherit_cache = True
+
+
+@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
+def _default_is_area_like(element: Geometry_IsAreaLike,
+                          compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
+               compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_is_area_like(element: Geometry_IsAreaLike,
+                         compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
+               compiler.process(element.clauses, **kw)
+
+
+class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the bounding boxes of the given geometries intersect.
+    """
+    name = 'Geometry_IntersectsBbox'
+    inherit_cache = True
+
+
+@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
+def _default_intersects(element: Geometry_IntersectsBbox,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_intersects(element: Geometry_IntersectsBbox,
+                       compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)
+
+
+class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the bounding box of the geometry intersects with the
+        given table column, using the spatial index for the column.
+
+        The index must exist or the query may return nothing.
+    """
+    name = 'Geometry_ColumnIntersectsBbox'
+    inherit_cache = True
+
+
+@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
+def default_intersects_column(element: Geometry_ColumnIntersectsBbox,
+                              compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
+def spatialite_intersects_column(element: Geometry_ColumnIntersectsBbox,
+                                 compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "MbrIntersects(%s, %s) = 1 and "\
+           "%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
+                        "WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
+                        "AND search_frame = %s)" %(
+              compiler.process(arg1, **kw),
+              compiler.process(arg2, **kw),
+              arg1.table.name, arg1.table.name, arg1.name,
+              compiler.process(arg2, **kw))
+
+
+class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the geometry is within the distance of the
+        given table column, using the spatial index for the column.
+
+        The index must exist or the query may return nothing.
+    """
+    name = 'Geometry_ColumnDWithin'
+    inherit_cache = True
+
+
+@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
+def default_dwithin_column(element: Geometry_ColumnDWithin,
+                           compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)
+
+@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
+def spatialite_dwithin_column(element: Geometry_ColumnDWithin,
+                              compiler: 'sa.Compiled', **kw: Any) -> str:
+    geom1, geom2, dist = list(element.clauses)
+    return "ST_Distance(%s, %s) < %s and "\
+           "%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
+                        "WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
+                        "AND search_frame = ST_Expand(%s, %s))" %(
+              compiler.process(geom1, **kw),
+              compiler.process(geom2, **kw),
+              compiler.process(dist, **kw),
+              geom1.table.name, geom1.table.name, geom1.name,
+              compiler.process(geom2, **kw),
+              compiler.process(dist, **kw))
+
+
+class Geometry(types.UserDefinedType): # type: ignore[type-arg]
+    """ Simplified type decorator for PostGIS geometry. This type
+        only supports geometries in 4326 projection.
+    """
+    cache_ok = True
+
+    def __init__(self, subtype: str = 'Geometry'):
+        self.subtype = subtype
+
+
+    def get_col_spec(self) -> str:
+        return f'GEOMETRY({self.subtype}, 4326)'
+
+
+    def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
+        def process(value: Any) -> str:
+            if isinstance(value, str):
+                return value
+
+            return cast(str, value.to_wkt())
+        return process
+
+
+    def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]:
+        def process(value: Any) -> str:
+            assert isinstance(value, str)
+            return value
+        return process
+
+
+    def column_expression(self, col: SaColumn) -> SaColumn:
+        return sa.func.ST_AsEWKB(col)
+
+
+    def bind_expression(self, bindvalue: SaBind) -> SaColumn:
+        return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
+
+
+    class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
+
+        def intersects(self, other: SaColumn, use_index: bool = True) -> 'sa.Operators':
+            if not use_index:
+                return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self.expr), other)
+
+            if isinstance(self.expr, sa.Column):
+                return Geometry_ColumnIntersectsBbox(self.expr, other)
+
+            return Geometry_IntersectsBbox(self.expr, other)
+
+
+        def is_line_like(self) -> SaColumn:
+            return Geometry_IsLineLike(self)
+
+
+        def is_area(self) -> SaColumn:
+            return Geometry_IsAreaLike(self)
+
+
+        def within_distance(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+            if isinstance(self.expr, sa.Column):
+                return Geometry_ColumnDWithin(self.expr, other, distance)
+
+            return self.ST_Distance(other) < distance
+
+
+        def ST_Distance(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_Distance(self, other, type_=sa.Float)
+
+
+        def ST_Contains(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_Contains(self, other, type_=sa.Boolean)
+
+
+        def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
+
+
+        def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
+            return sa.func.coalesce(sa.func.ST_ClosestPoint(self, other, type_=Geometry),
+                                    other)
+
+
+        def ST_Buffer(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_Buffer(self, other, type_=Geometry)
+
+
+        def ST_Expand(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_Expand(self, other, type_=Geometry)
+
+
+        def ST_Collect(self) -> SaColumn:
+            return sa.func.ST_Collect(self, type_=Geometry)
+
+
+        def ST_Centroid(self) -> SaColumn:
+            return sa.func.ST_Centroid(self, type_=Geometry)
+
+
+        def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry)
+
+
+        def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)
+
+
+        def distance_spheroid(self, other: SaColumn) -> SaColumn:
+            return Geometry_DistanceSpheroid(self, other)
+
+
+@compiles(Geometry, 'sqlite') # type: ignore[no-untyped-call]
+def get_col_spec(self, *args, **kwargs): # type: ignore[no-untyped-def]
+    return 'GEOMETRY'
+
+
+SQLITE_FUNCTION_ALIAS = (
+    ('ST_AsEWKB', sa.Text, 'AsEWKB'),
+    ('ST_GeomFromEWKT', Geometry, 'GeomFromEWKT'),
+    ('ST_AsGeoJSON', sa.Text, 'AsGeoJSON'),
+    ('ST_AsKML', sa.Text, 'AsKML'),
+    ('ST_AsSVG', sa.Text, 'AsSVG'),
+    ('ST_LineLocatePoint', sa.Float, 'ST_Line_Locate_Point'),
+    ('ST_LineInterpolatePoint', sa.Float, 'ST_Line_Interpolate_Point'),
+)
+
+def _add_function_alias(func: str, ftype: type, alias: str) -> None:
+    _FuncDef = type(func, (sa.sql.functions.GenericFunction, ), {
+        "type": ftype(),
+        "name": func,
+        "identifier": func,
+        "inherit_cache": True})
+
+    func_templ = f"{alias}(%s)"
+
+    def _sqlite_impl(element: Any, compiler: Any, **kw: Any) -> Any:
+        return func_templ % compiler.process(element.clauses, **kw)
+
+    compiles(_FuncDef, 'sqlite')(_sqlite_impl) # type: ignore[no-untyped-call]
+
+for alias in SQLITE_FUNCTION_ALIAS:
+    _add_function_alias(*alias)
--- a/src/nominatim_core/db/sqlalchemy_types/int_array.py
+++ b/src/nominatim_core/db/sqlalchemy_types/int_array.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom type for an array of integers.
+"""
+from typing import Any, List, cast, Optional
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+from sqlalchemy.dialects.postgresql import ARRAY
+
+from ...typing import SaDialect, SaColumn
+
+# pylint: disable=all
+
+class IntList(sa.types.TypeDecorator[Any]):
+    """ A list of integers saved as a text of comma-separated numbers.
+    """
+    impl = sa.types.Unicode
+    cache_ok = True
+
+    def process_bind_param(self, value: Optional[Any], dialect: 'sa.Dialect') -> Optional[str]:
+        if value is None:
+            return None
+
+        assert isinstance(value, list)
+        return ','.join(map(str, value))
+
+    def process_result_value(self, value: Optional[Any],
+                             dialect: SaDialect) -> Optional[List[int]]:
+        return [int(v) for v in value.split(',')] if value is not None else None
+
+    def copy(self, **kw: Any) -> 'IntList':
+        return IntList(self.impl.length)
+
+
+class IntArray(sa.types.TypeDecorator[Any]):
+    """ Dialect-independent list of integers.
+    """
+    impl = IntList
+    cache_ok = True
+
+    def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
+        if dialect.name == 'postgresql':
+            return ARRAY(sa.Integer()) #pylint: disable=invalid-name
+
+        return IntList()
+
+
+    class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
+
+        def __add__(self, other: SaColumn) -> 'sa.ColumnOperators':
+            """ Concate the array with the given array. If one of the
+                operants is null, the value of the other will be returned.
+            """
+            return ArrayCat(self.expr, other)
+
+
+        def contains(self, other: SaColumn, **kwargs: Any) -> 'sa.ColumnOperators':
+            """ Return true if the array contains all the value of the argument
+                array.
+            """
+            return ArrayContains(self.expr, other)
+
+
+
+class ArrayAgg(sa.sql.functions.GenericFunction[Any]):
+    """ Aggregate function to collect elements in an array.
+    """
+    type = IntArray()
+    identifier = 'ArrayAgg'
+    name = 'array_agg'
+    inherit_cache = True
+
+
+@compiles(ArrayAgg, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_array_agg(element: ArrayAgg, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "group_concat(%s, ',')" % compiler.process(element.clauses, **kw)
+
+
+
+class ArrayContains(sa.sql.expression.FunctionElement[Any]):
+    """ Function to check if an array is fully contained in another.
+    """
+    name = 'ArrayContains'
+    inherit_cache = True
+
+
+@compiles(ArrayContains) # type: ignore[no-untyped-call, misc]
+def generic_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(%s @> %s)" % (compiler.process(arg1, **kw),
+                           compiler.process(arg2, **kw))
+
+
+@compiles(ArrayContains, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "array_contains(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class ArrayCat(sa.sql.expression.FunctionElement[Any]):
+    """ Function to check if an array is fully contained in another.
+    """
+    type = IntArray()
+    identifier = 'ArrayCat'
+    inherit_cache = True
+
+
+@compiles(ArrayCat) # type: ignore[no-untyped-call, misc]
+def generic_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "array_cat(%s)" % compiler.process(element.clauses, **kw)
+
+
+@compiles(ArrayCat, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(%s || ',' || %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
--- a/src/nominatim_core/db/sqlalchemy_types/json.py
+++ b/src/nominatim_core/db/sqlalchemy_types/json.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Common json type for different dialects.
+"""
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.dialects.sqlite import JSON as sqlite_json
+
+from ...typing import SaDialect
+
+# pylint: disable=all
+
+class Json(sa.types.TypeDecorator[Any]):
+    """ Dialect-independent type for JSON.
+    """
+    impl = sa.types.JSON
+    cache_ok = True
+
+    def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
+        if dialect.name == 'postgresql':
+            return JSONB(none_as_null=True) # type: ignore[no-untyped-call]
+
+        return sqlite_json(none_as_null=True)
--- a/src/nominatim_core/db/sqlalchemy_types/key_value.py
+++ b/src/nominatim_core/db/sqlalchemy_types/key_value.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+A custom type that implements a simple key-value store of strings.
+"""
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+from sqlalchemy.dialects.postgresql import HSTORE
+from sqlalchemy.dialects.sqlite import JSON as sqlite_json
+
+from ...typing import SaDialect, SaColumn
+
+# pylint: disable=all
+
+class KeyValueStore(sa.types.TypeDecorator[Any]):
+    """ Dialect-independent type of a simple key-value store of strings.
+    """
+    impl = HSTORE
+    cache_ok = True
+
+    def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
+        if dialect.name == 'postgresql':
+            return HSTORE() # type: ignore[no-untyped-call]
+
+        return sqlite_json(none_as_null=True)
+
+
+    class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
+
+        def merge(self, other: SaColumn) -> 'sa.Operators':
+            """ Merge the values from the given KeyValueStore into this
+                one, overwriting values where necessary. When the argument
+                is null, nothing happens.
+            """
+            return KeyValueConcat(self.expr, other)
+
+
+class KeyValueConcat(sa.sql.expression.FunctionElement[Any]):
+    """ Return the merged key-value store from the input parameters.
+    """
+    type = KeyValueStore()
+    name = 'JsonConcat'
+    inherit_cache = True
+
+@compiles(KeyValueConcat) # type: ignore[no-untyped-call, misc]
+def default_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(%s || coalesce(%s, ''::hstore))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+@compiles(KeyValueConcat, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "json_patch(%s, coalesce(%s, '{}'))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+
--- a/src/nominatim_core/db/status.py
+++ b/src/nominatim_core/db/status.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Access and helper functions for the status and status log table.
+"""
+from typing import Optional, Tuple, cast
+import datetime as dt
+import logging
+import re
+
+from .connection import Connection
+from ..utils.url_utils import get_url
+from ..errors import UsageError
+from ..typing import TypedDict
+
+LOG = logging.getLogger()
+ISODATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
+
+
+class StatusRow(TypedDict):
+    """ Dictionary of columns of the import_status table.
+    """
+    lastimportdate: dt.datetime
+    sequence_id: Optional[int]
+    indexed: Optional[bool]
+
+
+def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime:
+    """ Determine the date of the database from the newest object in the
+        data base.
+    """
+    # If there is a date from osm2pgsql available, use that.
+    if conn.table_exists('osm2pgsql_properties'):
+        with conn.cursor() as cur:
+            cur.execute(""" SELECT value FROM osm2pgsql_properties
+                            WHERE property = 'current_timestamp' """)
+            row = cur.fetchone()
+            if row is not None:
+                return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\
+                                  .replace(tzinfo=dt.timezone.utc)
+
+    if offline:
+        raise UsageError("Cannot determine database date from data in offline mode.")
+
+    # Else, find the node with the highest ID in the database
+    with conn.cursor() as cur:
+        if conn.table_exists('place'):
+            osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
+        else:
+            osmid = cur.scalar("SELECT max(osm_id) FROM placex WHERE osm_type='N'")
+
+        if osmid is None:
+            LOG.fatal("No data found in the database.")
+            raise UsageError("No data found in the database.")
+
+    LOG.info("Using node id %d for timestamp lookup", osmid)
+    # Get the node from the API to find the timestamp when it was created.
+    node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
+    data = get_url(node_url)
+
+    match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
+
+    if match is None:
+        LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
+                  "URL used: %s", node_url)
+        raise UsageError("Bad API data.")
+
+    LOG.debug("Found timestamp %s", match.group(1))
+
+    return dt.datetime.strptime(match.group(1), ISODATE_FORMAT).replace(tzinfo=dt.timezone.utc)
+
+
+def set_status(conn: Connection, date: Optional[dt.datetime],
+               seq: Optional[int] = None, indexed: bool = True) -> None:
+    """ Replace the current status with the given status. If date is `None`
+        then only sequence and indexed will be updated as given. Otherwise
+        the whole status is replaced.
+        The change will be committed to the database.
+    """
+    assert date is None or date.tzinfo == dt.timezone.utc
+    with conn.cursor() as cur:
+        if date is None:
+            cur.execute("UPDATE import_status set sequence_id = %s, indexed = %s",
+                        (seq, indexed))
+        else:
+            cur.execute("TRUNCATE TABLE import_status")
+            cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
+                           VALUES (%s, %s, %s)""", (date, seq, indexed))
+
+    conn.commit()
+
+
+def get_status(conn: Connection) -> Tuple[Optional[dt.datetime], Optional[int], Optional[bool]]:
+    """ Return the current status as a triple of (date, sequence, indexed).
+        If status has not been set up yet, a triple of None is returned.
+    """
+    with conn.cursor() as cur:
+        cur.execute("SELECT * FROM import_status LIMIT 1")
+        if cur.rowcount < 1:
+            return None, None, None
+
+        row = cast(StatusRow, cur.fetchone())
+        return row['lastimportdate'], row['sequence_id'], row['indexed']
+
+
+def set_indexed(conn: Connection, state: bool) -> None:
+    """ Set the indexed flag in the status table to the given state.
+    """
+    with conn.cursor() as cur:
+        cur.execute("UPDATE import_status SET indexed = %s", (state, ))
+    conn.commit()
+
+
+def log_status(conn: Connection, start: dt.datetime,
+               event: str, batchsize: Optional[int] = None) -> None:
+    """ Write a new status line to the `import_osmosis_log` table.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""INSERT INTO import_osmosis_log
+                       (batchend, batchseq, batchsize, starttime, endtime, event)
+                       SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
+                    (batchsize, start, event))
+    conn.commit()
--- a/src/nominatim_core/db/utils.py
+++ b/src/nominatim_core/db/utils.py
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper functions for handling DB accesses.
+"""
+from typing import IO, Optional, Union, Any, Iterable
+import subprocess
+import logging
+import gzip
+import io
+from pathlib import Path
+
+from .connection import get_pg_env, Cursor
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+def _pipe_to_proc(proc: 'subprocess.Popen[bytes]',
+                  fdesc: Union[IO[bytes], gzip.GzipFile]) -> int:
+    assert proc.stdin is not None
+    chunk = fdesc.read(2048)
+    while chunk and proc.poll() is None:
+        try:
+            proc.stdin.write(chunk)
+        except BrokenPipeError as exc:
+            raise UsageError("Failed to execute SQL file.") from exc
+        chunk = fdesc.read(2048)
+
+    return len(chunk)
+
+def execute_file(dsn: str, fname: Path,
+                 ignore_errors: bool = False,
+                 pre_code: Optional[str] = None,
+                 post_code: Optional[str] = None) -> None:
+    """ Read an SQL file and run its contents against the given database
+        using psql. Use `pre_code` and `post_code` to run extra commands
+        before or after executing the file. The commands are run within the
+        same session, so they may be used to wrap the file execution in a
+        transaction.
+    """
+    cmd = ['psql']
+    if not ignore_errors:
+        cmd.extend(('-v', 'ON_ERROR_STOP=1'))
+    if not LOG.isEnabledFor(logging.INFO):
+        cmd.append('--quiet')
+
+    with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
+        assert proc.stdin is not None
+        try:
+            if not LOG.isEnabledFor(logging.INFO):
+                proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
+
+            if pre_code:
+                proc.stdin.write((pre_code + ';').encode('utf-8'))
+
+            if fname.suffix == '.gz':
+                with gzip.open(str(fname), 'rb') as fdesc:
+                    remain = _pipe_to_proc(proc, fdesc)
+            else:
+                with fname.open('rb') as fdesc:
+                    remain = _pipe_to_proc(proc, fdesc)
+
+            if remain == 0 and post_code:
+                proc.stdin.write((';' + post_code).encode('utf-8'))
+        finally:
+            proc.stdin.close()
+            ret = proc.wait()
+
+    if ret != 0 or remain > 0:
+        raise UsageError("Failed to execute SQL file.")
+
+
+# List of characters that need to be quoted for the copy command.
+_SQL_TRANSLATION = {ord('\\'): '\\\\',
+                    ord('\t'): '\\t',
+                    ord('\n'): '\\n'}
+
+
+class CopyBuffer:
+    """ Data collector for the copy_from command.
+    """
+
+    def __init__(self) -> None:
+        self.buffer = io.StringIO()
+
+
+    def __enter__(self) -> 'CopyBuffer':
+        return self
+
+
+    def size(self) -> int:
+        """ Return the number of bytes the buffer currently contains.
+        """
+        return self.buffer.tell()
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        if self.buffer is not None:
+            self.buffer.close()
+
+
+    def add(self, *data: Any) -> None:
+        """ Add another row of data to the copy buffer.
+        """
+        first = True
+        for column in data:
+            if first:
+                first = False
+            else:
+                self.buffer.write('\t')
+            if column is None:
+                self.buffer.write('\\N')
+            else:
+                self.buffer.write(str(column).translate(_SQL_TRANSLATION))
+        self.buffer.write('\n')
+
+
+    def copy_out(self, cur: Cursor, table: str, columns: Optional[Iterable[str]] = None) -> None:
+        """ Copy all collected data into the given table.
+
+            The buffer is empty and reusable after this operation.
+        """
+        if self.buffer.tell() > 0:
+            self.buffer.seek(0)
+            cur.copy_from(self.buffer, table, columns=columns)
+            self.buffer = io.StringIO()
--- a/src/nominatim_core/errors.py
+++ b/src/nominatim_core/errors.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom exception and error classes for Nominatim.
+"""
+
+class UsageError(Exception):
+    """ An error raised because of bad user input. This error will usually
+        not cause a stack trace to be printed unless debugging is enabled.
+    """
--- a/src/nominatim_core/paths.py
+++ b/src/nominatim_core/paths.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Path settings for extra data used by Nominatim.
+"""
+from pathlib import Path
+
+PHPLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-php').resolve()
+SQLLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve()
+DATA_DIR = (Path(__file__) / '..' / '..' / '..' / 'data').resolve()
+CONFIG_DIR = (Path(__file__) / '..' / '..' / '..' / 'settings').resolve()
--- a/src/nominatim_core/py.typed
+++ b/src/nominatim_core/py.typed
--- a/src/nominatim_core/typing.py
+++ b/src/nominatim_core/typing.py
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Type definitions for typing annotations.
+
+Complex type definitions are moved here, to keep the source files readable.
+"""
+from typing import Any, Union, Mapping, TypeVar, Sequence, TYPE_CHECKING
+
+# Generics variable names do not confirm to naming styles, ignore globally here.
+# pylint: disable=invalid-name,abstract-method,multiple-statements
+# pylint: disable=missing-class-docstring,useless-import-alias
+
+if TYPE_CHECKING:
+    import psycopg2.sql
+    import psycopg2.extensions
+    import psycopg2.extras
+    import os
+
+StrPath = Union[str, 'os.PathLike[str]']
+
+SysEnv = Mapping[str, str]
+
+# psycopg2-related types
+
+Query = Union[str, bytes, 'psycopg2.sql.Composable']
+
+T_ResultKey = TypeVar('T_ResultKey', int, str)
+
+class DictCursorResult(Mapping[str, Any]):
+    def __getitem__(self, x: Union[int, str]) -> Any: ...
+
+DictCursorResults = Sequence[DictCursorResult]
+
+T_cursor = TypeVar('T_cursor', bound='psycopg2.extensions.cursor')
+
+# The following typing features require typing_extensions to work
+# on all supported Python versions.
+# Only require this for type checking but not for normal operations.
+
+if TYPE_CHECKING:
+    from typing_extensions import (Protocol as Protocol,
+                                   Final as Final,
+                                   TypedDict as TypedDict)
+else:
+    Protocol = object
+    Final = 'Final'
+    TypedDict = dict
+
+
+# SQLAlchemy introduced generic types in version 2.0 making typing
+# incompatible with older versions. Add wrappers here so we don't have
+# to litter the code with bare-string types.
+
+if TYPE_CHECKING:
+    import sqlalchemy as sa
+    from typing_extensions import (TypeAlias as TypeAlias)
+else:
+    TypeAlias = str
+
+SaLambdaSelect: TypeAlias = 'Union[sa.Select[Any], sa.StatementLambdaElement]'
+SaSelect: TypeAlias = 'sa.Select[Any]'
+SaScalarSelect: TypeAlias = 'sa.ScalarSelect[Any]'
+SaRow: TypeAlias = 'sa.Row[Any]'
+SaColumn: TypeAlias = 'sa.ColumnElement[Any]'
+SaExpression: TypeAlias = 'sa.ColumnElement[bool]'
+SaLabel: TypeAlias = 'sa.Label[Any]'
+SaFromClause: TypeAlias = 'sa.FromClause'
+SaSelectable: TypeAlias = 'sa.Selectable'
+SaBind: TypeAlias = 'sa.BindParameter[Any]'
+SaDialect: TypeAlias = 'sa.Dialect'
--- a/src/nominatim_core/utils/init.py
+++ b/src/nominatim_core/utils/init.py
--- a/src/nominatim_core/utils/centroid.py
+++ b/src/nominatim_core/utils/centroid.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for computation of centroids.
+"""
+from typing import Tuple, Any
+from collections.abc import Collection
+
+class PointsCentroid:
+    """ Centroid computation from single points using an online algorithm.
+        More points may be added at any time.
+
+        Coordinates are internally treated as a 7-digit fixed-point float
+        (i.e. in OSM style).
+    """
+
+    def __init__(self) -> None:
+        self.sum_x = 0
+        self.sum_y = 0
+        self.count = 0
+
+    def centroid(self) -> Tuple[float, float]:
+        """ Return the centroid of all points collected so far.
+        """
+        if self.count == 0:
+            raise ValueError("No points available for centroid.")
+
+        return (float(self.sum_x/self.count)/10000000,
+                float(self.sum_y/self.count)/10000000)
+
+
+    def __len__(self) -> int:
+        return self.count
+
+
+    def __iadd__(self, other: Any) -> 'PointsCentroid':
+        if isinstance(other, Collection) and len(other) == 2:
+            if all(isinstance(p, (float, int)) for p in other):
+                x, y = other
+                self.sum_x += int(x * 10000000)
+                self.sum_y += int(y * 10000000)
+                self.count += 1
+                return self
+
+        raise ValueError("Can only add 2-element tuples to centroid.")
--- a/src/nominatim_core/utils/json_writer.py
+++ b/src/nominatim_core/utils/json_writer.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Streaming JSON encoder.
+"""
+from typing import Any, TypeVar, Optional, Callable
+import io
+try:
+    import ujson as json
+except ModuleNotFoundError:
+    import json # type: ignore[no-redef]
+
+T = TypeVar('T') # pylint: disable=invalid-name
+
+class JsonWriter:
+    """ JSON encoder that renders the output directly into an output
+        stream. This is a very simple writer which produces JSON in a
+        compact as possible form.
+
+        The writer does not check for syntactic correctness. It is the
+        responsibility of the caller to call the write functions in an
+        order that produces correct JSON.
+
+        All functions return the writer object itself so that function
+        calls can be chained.
+    """
+
+    def __init__(self) -> None:
+        self.data = io.StringIO()
+        self.pending = ''
+
+
+    def __call__(self) -> str:
+        """ Return the rendered JSON content as a string.
+            The writer remains usable after calling this function.
+        """
+        if self.pending:
+            assert self.pending in (']', '}')
+            self.data.write(self.pending)
+            self.pending = ''
+        return self.data.getvalue()
+
+
+    def start_object(self) -> 'JsonWriter':
+        """ Write the open bracket of a JSON object.
+        """
+        if self.pending:
+            self.data.write(self.pending)
+        self.pending = '{'
+        return self
+
+
+    def end_object(self) -> 'JsonWriter':
+        """ Write the closing bracket of a JSON object.
+        """
+        assert self.pending in (',', '{', '')
+        if self.pending == '{':
+            self.data.write(self.pending)
+        self.pending = '}'
+        return self
+
+
+    def start_array(self) -> 'JsonWriter':
+        """ Write the opening bracket of a JSON array.
+        """
+        if self.pending:
+            self.data.write(self.pending)
+        self.pending = '['
+        return self
+
+
+    def end_array(self) -> 'JsonWriter':
+        """ Write the closing bracket of a JSON array.
+        """
+        assert self.pending in (',', '[', ']', ')', '')
+        if self.pending not in (',', ''):
+            self.data.write(self.pending)
+        self.pending = ']'
+        return self
+
+
+    def key(self, name: str) -> 'JsonWriter':
+        """ Write the key string of a JSON object.
+        """
+        assert self.pending
+        self.data.write(self.pending)
+        self.data.write(json.dumps(name, ensure_ascii=False))
+        self.pending = ':'
+        return self
+
+
+    def value(self, value: Any) -> 'JsonWriter':
+        """ Write out a value as JSON. The function uses the json.dumps()
+            function for encoding the JSON. Thus any value that can be
+            encoded by that function is permissible here.
+        """
+        return self.raw(json.dumps(value, ensure_ascii=False))
+
+
+    def float(self, value: float, precision: int) -> 'JsonWriter':
+        """ Write out a float value with the given precision.
+        """
+        return self.raw(f"{value:0.{precision}f}")
+
+    def next(self) -> 'JsonWriter':
+        """ Write out a delimiter comma between JSON object or array elements.
+        """
+        if self.pending:
+            self.data.write(self.pending)
+        self.pending = ','
+        return self
+
+
+    def raw(self, raw_json: str) -> 'JsonWriter':
+        """ Write out the given value as is. This function is useful if
+            a value is already available in JSON format.
+        """
+        if self.pending:
+            self.data.write(self.pending)
+            self.pending = ''
+        self.data.write(raw_json)
+        return self
+
+
+    def keyval(self, key: str, value: Any) -> 'JsonWriter':
+        """ Write out an object element with the given key and value.
+            This is a shortcut for calling 'key()', 'value()' and 'next()'.
+        """
+        self.key(key)
+        self.value(value)
+        return self.next()
+
+
+    def keyval_not_none(self, key: str, value: Optional[T],
+                        transform: Optional[Callable[[T], Any]] = None) -> 'JsonWriter':
+        """ Write out an object element only if the value is not None.
+            If 'transform' is given, it must be a function that takes the
+            value type and returns a JSON encodable type. The transform
+            function will be called before the value is written out.
+        """
+        if value is not None:
+            self.key(key)
+            self.value(transform(value) if transform else value)
+            self.next()
+        return self
--- a/src/nominatim_core/utils/url_utils.py
+++ b/src/nominatim_core/utils/url_utils.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper functions for accessing URL.
+"""
+from typing import IO
+import logging
+import urllib.request as urlrequest
+
+from ..version import NOMINATIM_CORE_VERSION
+
+LOG = logging.getLogger()
+
+def get_url(url: str) -> str:
+    """ Get the contents from the given URL and return it as a UTF-8 string.
+
+        This version makes sure that an appropriate user agent is sent.
+    """
+    headers = {"User-Agent": f"Nominatim/{NOMINATIM_CORE_VERSION!s}"}
+
+    try:
+        request = urlrequest.Request(url, headers=headers)
+        with urlrequest.urlopen(request) as response: # type: IO[bytes]
+            return response.read().decode('utf-8')
+    except Exception:
+        LOG.fatal('Failed to load URL: %s', url)
+        raise
--- a/src/nominatim_core/version.py
+++ b/src/nominatim_core/version.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Version information for the Nominatim core package.
+"""
+
+NOMINATIM_CORE_VERSION = '4.4.99'
--- a/src/nominatim_db/init.py
+++ b/src/nominatim_db/init.py
--- a/src/nominatim_db/cli.py
+++ b/src/nominatim_db/cli.py
@@ -0,0 +1,228 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Command-line interface to the Nominatim functions for import, update,
+database administration and querying.
+"""
+from typing import Optional, Any
+import importlib
+import logging
+import os
+import sys
+import argparse
+from pathlib import Path
+
+from nominatim_core.config import Configuration
+from nominatim_core.errors import UsageError
+from .tools.exec_utils import run_php_server
+from . import clicmd
+from . import version
+from .clicmd.args import NominatimArgs, Subcommand
+
+LOG = logging.getLogger()
+
+class CommandlineParser:
+    """ Wraps some of the common functions for parsing the command line
+        and setting up subcommands.
+    """
+    def __init__(self, prog: str, description: Optional[str]):
+        self.parser = argparse.ArgumentParser(
+            prog=prog,
+            description=description,
+            formatter_class=argparse.RawDescriptionHelpFormatter)
+
+        self.subs = self.parser.add_subparsers(title='available commands',
+                                               dest='subcommand')
+
+        # Global arguments that only work if no sub-command given
+        self.parser.add_argument('--version', action='store_true',
+                                 help='Print Nominatim version and exit')
+
+        # Arguments added to every sub-command
+        self.default_args = argparse.ArgumentParser(add_help=False)
+        group = self.default_args.add_argument_group('Default arguments')
+        group.add_argument('-h', '--help', action='help',
+                           help='Show this help message and exit')
+        group.add_argument('-q', '--quiet', action='store_const', const=0,
+                           dest='verbose', default=1,
+                           help='Print only error messages')
+        group.add_argument('-v', '--verbose', action='count', default=1,
+                           help='Increase verboseness of output')
+        group.add_argument('--project-dir', metavar='DIR', default='.',
+                           help='Base directory of the Nominatim installation (default:.)')
+        group.add_argument('-j', '--threads', metavar='NUM', type=int,
+                           help='Number of parallel threads to use')
+
+
+    def nominatim_version_text(self) -> str:
+        """ Program name and version number as string
+        """
+        text = f'Nominatim version {version.NOMINATIM_VERSION!s}'
+        if version.GIT_COMMIT_HASH is not None:
+            text += f' ({version.GIT_COMMIT_HASH})'
+        return text
+
+
+    def add_subcommand(self, name: str, cmd: Subcommand) -> None:
+        """ Add a subcommand to the parser. The subcommand must be a class
+            with a function add_args() that adds the parameters for the
+            subcommand and a run() function that executes the command.
+        """
+        assert cmd.__doc__ is not None
+
+        parser = self.subs.add_parser(name, parents=[self.default_args],
+                                      help=cmd.__doc__.split('\n', 1)[0],
+                                      description=cmd.__doc__,
+                                      formatter_class=argparse.RawDescriptionHelpFormatter,
+                                      add_help=False)
+        parser.set_defaults(command=cmd)
+        cmd.add_args(parser)
+
+
+    def run(self, **kwargs: Any) -> int:
+        """ Parse the command line arguments of the program and execute the
+            appropriate subcommand.
+        """
+        args = NominatimArgs()
+        try:
+            self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
+        except SystemExit:
+            return 1
+
+        if args.version:
+            print(self.nominatim_version_text())
+            return 0
+
+        if args.subcommand is None:
+            self.parser.print_help()
+            return 1
+
+        args.project_dir = Path(args.project_dir).resolve()
+
+        if 'cli_args' not in kwargs:
+            logging.basicConfig(stream=sys.stderr,
+                                format='%(asctime)s: %(message)s',
+                                datefmt='%Y-%m-%d %H:%M:%S',
+                                level=max(4 - args.verbose, 1) * 10)
+
+        args.config = Configuration(args.project_dir,
+                                    environ=kwargs.get('environ', os.environ))
+        args.config.set_libdirs(module=kwargs['module_dir'],
+                                osm2pgsql=kwargs['osm2pgsql_path'])
+
+        log = logging.getLogger()
+        log.warning('Using project directory: %s', str(args.project_dir))
+
+        try:
+            return args.command.run(args)
+        except UsageError as exception:
+            if log.isEnabledFor(logging.DEBUG):
+                raise # use Python's exception printing
+            log.fatal('FATAL: %s', exception)
+
+        # If we get here, then execution has failed in some way.
+        return 1
+
+
+# Subcommand classes
+#
+# Each class needs to implement two functions: add_args() adds the CLI parameters
+# for the subfunction, run() executes the subcommand.
+#
+# The class documentation doubles as the help text for the command. The
+# first line is also used in the summary when calling the program without
+# a subcommand.
+#
+# No need to document the functions each time.
+# pylint: disable=C0111
+class AdminServe:
+    """\
+    Start a simple web server for serving the API.
+
+    This command starts a built-in webserver to serve the website
+    from the current project directory. This webserver is only suitable
+    for testing and development. Do not use it in production setups!
+
+    There are different webservers available. The default 'php' engine
+    runs the classic PHP frontend. The other engines are Python servers
+    which run the new Python frontend code. This is highly experimental
+    at the moment and may not include the full API.
+
+    By the default, the webserver can be accessed at: http://127.0.0.1:8088
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Server arguments')
+        group.add_argument('--server', default='127.0.0.1:8088',
+                           help='The address the server will listen to.')
+        group.add_argument('--engine', default='falcon',
+                           choices=('php', 'falcon', 'starlette'),
+                           help='Webserver framework to run. (default: falcon)')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.engine == 'php':
+            if args.config.lib_dir.php is None:
+                raise UsageError("PHP frontend not configured.")
+            run_php_server(args.server, args.project_dir / 'website')
+        else:
+            import uvicorn # pylint: disable=import-outside-toplevel
+            server_info = args.server.split(':', 1)
+            host = server_info[0]
+            if len(server_info) > 1:
+                if not server_info[1].isdigit():
+                    raise UsageError('Invalid format for --server parameter. Use <host>:<port>')
+                port = int(server_info[1])
+            else:
+                port = 8088
+
+            server_module = importlib.import_module(f'nominatim.server.{args.engine}.server')
+
+            app = server_module.get_application(args.project_dir)
+            uvicorn.run(app, host=host, port=port)
+
+        return 0
+
+
+def get_set_parser() -> CommandlineParser:
+    """\
+    Initializes the parser and adds various subcommands for
+    nominatim cli.
+    """
+    parser = CommandlineParser('nominatim', nominatim.__doc__)
+
+    parser.add_subcommand('import', clicmd.SetupAll())
+    parser.add_subcommand('freeze', clicmd.SetupFreeze())
+    parser.add_subcommand('replication', clicmd.UpdateReplication())
+
+    parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases())
+
+    parser.add_subcommand('add-data', clicmd.UpdateAddData())
+    parser.add_subcommand('index', clicmd.UpdateIndex())
+    parser.add_subcommand('refresh', clicmd.UpdateRefresh())
+
+    parser.add_subcommand('admin', clicmd.AdminFuncs())
+
+    parser.add_subcommand('export', clicmd.QueryExport())
+    parser.add_subcommand('convert', clicmd.ConvertDB())
+    parser.add_subcommand('serve', AdminServe())
+
+    parser.add_subcommand('search', clicmd.APISearch())
+    parser.add_subcommand('reverse', clicmd.APIReverse())
+    parser.add_subcommand('lookup', clicmd.APILookup())
+    parser.add_subcommand('details', clicmd.APIDetails())
+    parser.add_subcommand('status', clicmd.APIStatus())
+
+    return parser
+
+
+def nominatim(**kwargs: Any) -> int:
+    """\
+    Command-line tools for importing, updating, administrating and
+    querying the Nominatim database.
+    """
+    return get_set_parser().run(**kwargs)
--- a/src/nominatim_db/clicmd/init.py
+++ b/src/nominatim_db/clicmd/init.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Subcommand definitions for the command-line tool.
+"""
+# mypy and pylint disagree about the style of explicit exports,
+# see https://github.com/PyCQA/pylint/issues/6006.
+# pylint: disable=useless-import-alias
+
+from .setup import SetupAll as SetupAll
+from .replication import UpdateReplication as UpdateReplication
+from .api import (APISearch as APISearch,
+                  APIReverse as APIReverse,
+                  APILookup as APILookup,
+                  APIDetails as APIDetails,
+                  APIStatus as APIStatus)
+from .index import UpdateIndex as UpdateIndex
+from .refresh import UpdateRefresh as UpdateRefresh
+from .add_data import UpdateAddData as UpdateAddData
+from .admin import AdminFuncs as AdminFuncs
+from .freeze import SetupFreeze as SetupFreeze
+from .special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
+from .export import QueryExport as QueryExport
+from .convert import ConvertDB as ConvertDB
--- a/src/nominatim_db/clicmd/add_data.py
+++ b/src/nominatim_db/clicmd/add_data.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'add-data' subcommand.
+"""
+from typing import cast
+import argparse
+import logging
+
+import psutil
+
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+class UpdateAddData:
+    """\
+    Add additional data from a file or an online source.
+
+    This command allows to add or update the search data in the database.
+    The data can come either from an OSM file or single OSM objects can
+    directly be downloaded from the OSM API. This function only loads the
+    data into the database. Afterwards it still needs to be integrated
+    in the search index. Use the `nominatim index` command for that.
+
+    The command can also be used to add external non-OSM data to the
+    database. At the moment the only supported format is TIGER housenumber
+    data. See the online documentation at
+    https://nominatim.org/release-docs/latest/admin/Import/#installing-tiger-housenumber-data-for-the-us
+    for more information.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group_name = parser.add_argument_group('Source')
+        group1 = group_name.add_mutually_exclusive_group(required=True)
+        group1.add_argument('--file', metavar='FILE',
+                            help='Import data from an OSM file or diff file')
+        group1.add_argument('--diff', metavar='FILE',
+                            help='Import data from an OSM diff file (deprecated: use --file)')
+        group1.add_argument('--node', metavar='ID', type=int,
+                            help='Import a single node from the API')
+        group1.add_argument('--way', metavar='ID', type=int,
+                            help='Import a single way from the API')
+        group1.add_argument('--relation', metavar='ID', type=int,
+                            help='Import a single relation from the API')
+        group1.add_argument('--tiger-data', metavar='DIR',
+                            help='Add housenumbers from the US TIGER census database')
+        group2 = parser.add_argument_group('Extra arguments')
+        group2.add_argument('--use-main-api', action='store_true',
+                            help='Use OSM API instead of Overpass to download objects')
+        group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                            help='Size of cache to be used by osm2pgsql (in MB)')
+        group2.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
+                            help='Set timeout for file downloads')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        from ..tokenizer import factory as tokenizer_factory
+        from ..tools import tiger_data, add_osm_data
+
+        if args.tiger_data:
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+            return tiger_data.add_tiger_data(args.tiger_data,
+                                             args.config,
+                                             args.threads or psutil.cpu_count()  or 1,
+                                             tokenizer)
+
+        osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
+        if args.file or args.diff:
+            return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
+                                                   cast(str, args.file or args.diff),
+                                                   osm2pgsql_params)
+
+        if args.node:
+            return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
+                                               'node', args.node,
+                                               args.use_main_api,
+                                               osm2pgsql_params)
+
+        if args.way:
+            return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
+                                               'way', args.way,
+                                               args.use_main_api,
+                                               osm2pgsql_params)
+
+        if args.relation:
+            return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
+                                               'relation', args.relation,
+                                               args.use_main_api,
+                                               osm2pgsql_params)
+
+        return 0
--- a/src/nominatim_db/clicmd/admin.py
+++ b/src/nominatim_db/clicmd/admin.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'admin' subcommand.
+"""
+import logging
+import argparse
+import random
+
+import nominatim_api as napi
+from nominatim_core.db.connection import connect
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+
+class AdminFuncs:
+    """\
+    Analyse and maintain the database.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Admin tasks')
+        objs = group.add_mutually_exclusive_group(required=True)
+        objs.add_argument('--warm', action='store_true',
+                          help='Warm database caches for search and reverse queries')
+        objs.add_argument('--check-database', action='store_true',
+                          help='Check that the database is complete and operational')
+        objs.add_argument('--migrate', action='store_true',
+                          help='Migrate the database to a new software version')
+        objs.add_argument('--analyse-indexing', action='store_true',
+                          help='Print performance analysis of the indexing process')
+        objs.add_argument('--collect-os-info', action="store_true",
+                          help="Generate a report about the host system information")
+        objs.add_argument('--clean-deleted', action='store', metavar='AGE',
+                          help='Clean up deleted relations')
+        group = parser.add_argument_group('Arguments for cache warming')
+        group.add_argument('--search-only', action='store_const', dest='target',
+                           const='search',
+                           help="Only pre-warm tables for search queries")
+        group.add_argument('--reverse-only', action='store_const', dest='target',
+                           const='reverse',
+                           help="Only pre-warm tables for reverse queries")
+        group = parser.add_argument_group('Arguments for index anaysis')
+        mgroup = group.add_mutually_exclusive_group()
+        mgroup.add_argument('--osm-id', type=str,
+                            help='Analyse indexing of the given OSM object')
+        mgroup.add_argument('--place-id', type=int,
+                            help='Analyse indexing of the given Nominatim object')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        # pylint: disable=too-many-return-statements
+        if args.warm:
+            return self._warm(args)
+
+        if args.check_database:
+            LOG.warning('Checking database')
+            from ..tools import check_database
+            return check_database.check_database(args.config)
+
+        if args.analyse_indexing:
+            LOG.warning('Analysing performance of indexing function')
+            from ..tools import admin
+            admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
+            return 0
+
+        if args.migrate:
+            LOG.warning('Checking for necessary database migrations')
+            from ..tools import migration
+            return migration.migrate(args.config, args)
+
+        if args.collect_os_info:
+            LOG.warning("Reporting System Information")
+            from ..tools import collect_os_info
+            collect_os_info.report_system_information(args.config)
+            return 0
+
+        if args.clean_deleted:
+            LOG.warning('Cleaning up deleted relations')
+            from ..tools import admin
+            admin.clean_deleted_relations(args.config, age=args.clean_deleted)
+            return 0
+
+        return 1
+
+
+    def _warm(self, args: NominatimArgs) -> int:
+        LOG.warning('Warming database caches')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        try:
+            if args.target != 'search':
+                for _ in range(1000):
+                    api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
+                                address_details=True)
+
+            if args.target != 'reverse':
+                from ..tokenizer import factory as tokenizer_factory
+
+                tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+                with connect(args.config.get_libpq_dsn()) as conn:
+                    if conn.table_exists('search_name'):
+                        words = tokenizer.most_frequent_words(conn, 1000)
+                    else:
+                        words = []
+
+                for word in words:
+                    api.search(word)
+        finally:
+            api.close()
+
+        return 0
--- a/src/nominatim_db/clicmd/api.py
+++ b/src/nominatim_db/clicmd/api.py
@@ -0,0 +1,374 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Subcommand definitions for API calls from the command line.
+"""
+from typing import Dict, Any
+import argparse
+import logging
+import json
+import sys
+
+import nominatim_api as napi
+import nominatim_api.v1 as api_output
+from nominatim_api.v1.helpers import zoom_to_rank, deduplicate_results
+from nominatim_api.v1.format import dispatch as formatting
+import nominatim_api.logging as loglib
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+
+LOG = logging.getLogger()
+
+STRUCTURED_QUERY = (
+    ('amenity', 'name and/or type of POI'),
+    ('street', 'housenumber and street'),
+    ('city', 'city, town or village'),
+    ('county', 'county'),
+    ('state', 'state'),
+    ('country', 'country'),
+    ('postalcode', 'postcode')
+)
+
+EXTRADATA_PARAMS = (
+    ('addressdetails', 'Include a breakdown of the address into elements'),
+    ('extratags', ("Include additional information if available "
+                   "(e.g. wikipedia link, opening hours)")),
+    ('namedetails', 'Include a list of alternative names')
+)
+
+def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
+    group = parser.add_argument_group('Output arguments')
+    group.add_argument('--format', default='jsonv2',
+                       choices=formatting.list_formats(napi.SearchResults) + ['debug'],
+                       help='Format of result')
+    for name, desc in EXTRADATA_PARAMS:
+        group.add_argument('--' + name, action='store_true', help=desc)
+
+    group.add_argument('--lang', '--accept-language', metavar='LANGS',
+                       help='Preferred language order for presenting search results')
+    group.add_argument('--polygon-output',
+                       choices=['geojson', 'kml', 'svg', 'text'],
+                       help='Output geometry of results as a GeoJSON, KML, SVG or WKT')
+    group.add_argument('--polygon-threshold', type=float, default = 0.0,
+                       metavar='TOLERANCE',
+                       help=("Simplify output geometry."
+                             "Parameter is difference tolerance in degrees."))
+
+
+class APISearch:
+    """\
+    Execute a search query.
+
+    This command works exactly the same as if calling the /search endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Search/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--query',
+                           help='Free-form query string')
+        for name, desc in STRUCTURED_QUERY:
+            group.add_argument('--' + name, help='Structured query: ' + desc)
+
+        _add_api_output_arguments(parser)
+
+        group = parser.add_argument_group('Result limitation')
+        group.add_argument('--countrycodes', metavar='CC,..',
+                           help='Limit search results to one or more countries')
+        group.add_argument('--exclude_place_ids', metavar='ID,..',
+                           help='List of search object to be excluded')
+        group.add_argument('--limit', type=int, default=10,
+                           help='Limit the number of returned results')
+        group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
+                           help='Preferred area to find search results')
+        group.add_argument('--bounded', action='store_true',
+                           help='Strictly restrict results to viewbox area')
+
+        group = parser.add_argument_group('Other arguments')
+        group.add_argument('--no-dedupe', action='store_false', dest='dedupe',
+                           help='Do not remove duplicates from the result list')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
+                                  'address_details': True, # needed for display name
+                                  'geometry_output': args.get_geometry_output(),
+                                  'geometry_simplification': args.polygon_threshold,
+                                  'countries': args.countrycodes,
+                                  'excluded': args.exclude_place_ids,
+                                  'viewbox': args.viewbox,
+                                  'bounded_viewbox': args.bounded,
+                                  'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
+                                 }
+
+        if args.query:
+            results = api.search(args.query, **params)
+        else:
+            results = api.search_address(amenity=args.amenity,
+                                         street=args.street,
+                                         city=args.city,
+                                         county=args.county,
+                                         state=args.state,
+                                         postalcode=args.postalcode,
+                                         country=args.country,
+                                         **params)
+
+        if args.dedupe and len(results) > 1:
+            results = deduplicate_results(results, args.limit)
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        output = api_output.format_result(
+                    results,
+                    args.format,
+                    {'extratags': args.extratags,
+                     'namedetails': args.namedetails,
+                     'addressdetails': args.addressdetails})
+        if args.format != 'xml':
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+        else:
+            sys.stdout.write(output)
+        sys.stdout.write('\n')
+
+        return 0
+
+
+class APIReverse:
+    """\
+    Execute API reverse query.
+
+    This command works exactly the same as if calling the /reverse endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Reverse/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--lat', type=float, required=True,
+                           help='Latitude of coordinate to look up (in WGS84)')
+        group.add_argument('--lon', type=float, required=True,
+                           help='Longitude of coordinate to look up (in WGS84)')
+        group.add_argument('--zoom', type=int,
+                           help='Level of detail required for the address')
+        group.add_argument('--layer', metavar='LAYER',
+                           choices=[n.name.lower() for n in napi.DataLayer if n.name],
+                           action='append', required=False, dest='layers',
+                           help='OSM id to lookup in format <NRW><id> (may be repeated)')
+
+        _add_api_output_arguments(parser)
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        result = api.reverse(napi.Point(args.lon, args.lat),
+                             max_rank=zoom_to_rank(args.zoom or 18),
+                             layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
+                             address_details=True, # needed for display name
+                             geometry_output=args.get_geometry_output(),
+                             geometry_simplification=args.polygon_threshold,
+                             locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        if result:
+            output = api_output.format_result(
+                        napi.ReverseResults([result]),
+                        args.format,
+                        {'extratags': args.extratags,
+                         'namedetails': args.namedetails,
+                         'addressdetails': args.addressdetails})
+            if args.format != 'xml':
+                # reformat the result, so it is pretty-printed
+                json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+            else:
+                sys.stdout.write(output)
+            sys.stdout.write('\n')
+
+            return 0
+
+        LOG.error("Unable to geocode.")
+        return 42
+
+
+
+class APILookup:
+    """\
+    Execute API lookup query.
+
+    This command works exactly the same as if calling the /lookup endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Lookup/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--id', metavar='OSMID',
+                           action='append', required=True, dest='ids',
+                           help='OSM id to lookup in format <NRW><id> (may be repeated)')
+
+        _add_api_output_arguments(parser)
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        places = [napi.OsmID(o[0], int(o[1:])) for o in args.ids]
+
+        results = api.lookup(places,
+                             address_details=True, # needed for display name
+                             geometry_output=args.get_geometry_output(),
+                             geometry_simplification=args.polygon_threshold or 0.0,
+                             locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
+
+        output = api_output.format_result(
+                    results,
+                    args.format,
+                    {'extratags': args.extratags,
+                     'namedetails': args.namedetails,
+                     'addressdetails': args.addressdetails})
+        if args.format != 'xml':
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+        else:
+            sys.stdout.write(output)
+        sys.stdout.write('\n')
+
+        return 0
+
+
+class APIDetails:
+    """\
+    Execute API details query.
+
+    This command works exactly the same as if calling the /details endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Details/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        objs = group.add_mutually_exclusive_group(required=True)
+        objs.add_argument('--node', '-n', type=int,
+                          help="Look up the OSM node with the given ID.")
+        objs.add_argument('--way', '-w', type=int,
+                          help="Look up the OSM way with the given ID.")
+        objs.add_argument('--relation', '-r', type=int,
+                          help="Look up the OSM relation with the given ID.")
+        objs.add_argument('--place_id', '-p', type=int,
+                          help='Database internal identifier of the OSM object to look up')
+        group.add_argument('--class', dest='object_class',
+                           help=("Class type to disambiguated multiple entries "
+                                 "of the same object."))
+
+        group = parser.add_argument_group('Output arguments')
+        group.add_argument('--addressdetails', action='store_true',
+                           help='Include a breakdown of the address into elements')
+        group.add_argument('--keywords', action='store_true',
+                           help='Include a list of name keywords and address keywords')
+        group.add_argument('--linkedplaces', action='store_true',
+                           help='Include a details of places that are linked with this one')
+        group.add_argument('--hierarchy', action='store_true',
+                           help='Include details of places lower in the address hierarchy')
+        group.add_argument('--group_hierarchy', action='store_true',
+                           help='Group the places by type')
+        group.add_argument('--polygon_geojson', action='store_true',
+                           help='Include geometry of result')
+        group.add_argument('--lang', '--accept-language', metavar='LANGS',
+                           help='Preferred language order for presenting search results')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        place: napi.PlaceRef
+        if args.node:
+            place = napi.OsmID('N', args.node, args.object_class)
+        elif args.way:
+            place = napi.OsmID('W', args.way, args.object_class)
+        elif args.relation:
+            place = napi.OsmID('R', args.relation, args.object_class)
+        else:
+            assert args.place_id is not None
+            place = napi.PlaceID(args.place_id)
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
+        result = api.details(place,
+                             address_details=args.addressdetails,
+                             linked_places=args.linkedplaces,
+                             parented_places=args.hierarchy,
+                             keywords=args.keywords,
+                             geometry_output=napi.GeometryFormat.GEOJSON
+                                             if args.polygon_geojson
+                                             else napi.GeometryFormat.NONE,
+                            locales=locales)
+
+
+        if result:
+            output = api_output.format_result(
+                        result,
+                        'json',
+                        {'locales': locales,
+                         'group_hierarchy': args.group_hierarchy})
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+            sys.stdout.write('\n')
+
+            return 0
+
+        LOG.error("Object not found in database.")
+        return 42
+
+
+class APIStatus:
+    """
+    Execute API status query.
+
+    This command works exactly the same as if calling the /status endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Status/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        formats = api_output.list_formats(napi.StatusResult)
+        group = parser.add_argument_group('API parameters')
+        group.add_argument('--format', default=formats[0], choices=formats,
+                           help='Format of result')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        status = napi.NominatimAPI(args.project_dir).status()
+        print(api_output.format_result(status, args.format, {}))
+        return 0
--- a/src/nominatim_db/clicmd/args.py
+++ b/src/nominatim_db/clicmd/args.py
@@ -0,0 +1,260 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Provides custom functions over command-line arguments.
+"""
+from typing import Optional, List, Dict, Any, Sequence, Tuple
+import argparse
+import logging
+from functools import reduce
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from nominatim_core.typing import Protocol
+import nominatim_api as napi
+
+LOG = logging.getLogger()
+
+class Subcommand(Protocol):
+    """
+    Interface to be implemented by classes implementing a CLI subcommand.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        """
+        Fill the given parser for the subcommand with the appropriate
+        parameters.
+        """
+
+    def run(self, args: 'NominatimArgs') -> int:
+        """
+        Run the subcommand with the given parsed arguments.
+        """
+
+
+class NominatimArgs:
+    """ Customized namespace class for the nominatim command line tool
+        to receive the command-line arguments.
+    """
+    # Basic environment set by root program.
+    config: Configuration
+    project_dir: Path
+
+    # Global switches
+    version: bool
+    subcommand: Optional[str]
+    command: Subcommand
+
+    # Shared parameters
+    osm2pgsql_cache: Optional[int]
+    socket_timeout: int
+
+    # Arguments added to all subcommands.
+    verbose: int
+    threads: Optional[int]
+
+    # Arguments to 'add-data'
+    file: Optional[str]
+    diff: Optional[str]
+    node: Optional[int]
+    way: Optional[int]
+    relation: Optional[int]
+    tiger_data: Optional[str]
+    use_main_api: bool
+
+    # Arguments to 'admin'
+    warm: bool
+    check_database: bool
+    migrate: bool
+    collect_os_info: bool
+    clean_deleted: str
+    analyse_indexing: bool
+    target: Optional[str]
+    osm_id: Optional[str]
+    place_id: Optional[int]
+
+    # Arguments to 'import'
+    osm_file: List[str]
+    continue_at: Optional[str]
+    reverse_only: bool
+    no_partitions: bool
+    no_updates: bool
+    offline: bool
+    ignore_errors: bool
+    index_noanalyse: bool
+    prepare_database: bool
+
+    # Arguments to 'index'
+    boundaries_only: bool
+    no_boundaries: bool
+    minrank: int
+    maxrank: int
+
+    # Arguments to 'export'
+    output_type: str
+    output_format: str
+    output_all_postcodes: bool
+    language: Optional[str]
+    restrict_to_country: Optional[str]
+
+    # Arguments to 'convert'
+    output: Path
+
+    # Arguments to 'refresh'
+    postcodes: bool
+    word_tokens: bool
+    word_counts: bool
+    address_levels: bool
+    functions: bool
+    wiki_data: bool
+    secondary_importance: bool
+    importance: bool
+    website: bool
+    diffs: bool
+    enable_debug_statements: bool
+    data_object: Sequence[Tuple[str, int]]
+    data_area: Sequence[Tuple[str, int]]
+
+    # Arguments to 'replication'
+    init: bool
+    update_functions: bool
+    check_for_updates: bool
+    once: bool
+    catch_up: bool
+    do_index: bool
+
+    # Arguments to 'serve'
+    server: str
+    engine: str
+
+    # Arguments to 'special-phrases
+    import_from_wiki: bool
+    import_from_csv: Optional[str]
+    no_replace: bool
+
+    # Arguments to all query functions
+    format: str
+    addressdetails: bool
+    extratags: bool
+    namedetails: bool
+    lang: Optional[str]
+    polygon_output: Optional[str]
+    polygon_threshold: Optional[float]
+
+    # Arguments to 'search'
+    query: Optional[str]
+    amenity: Optional[str]
+    street: Optional[str]
+    city: Optional[str]
+    county: Optional[str]
+    state: Optional[str]
+    country: Optional[str]
+    postalcode: Optional[str]
+    countrycodes: Optional[str]
+    exclude_place_ids: Optional[str]
+    limit: int
+    viewbox: Optional[str]
+    bounded: bool
+    dedupe: bool
+
+    # Arguments to 'reverse'
+    lat: float
+    lon: float
+    zoom: Optional[int]
+    layers: Optional[Sequence[str]]
+
+    # Arguments to 'lookup'
+    ids: Sequence[str]
+
+    # Arguments to 'details'
+    object_class: Optional[str]
+    linkedplaces: bool
+    hierarchy: bool
+    keywords: bool
+    polygon_geojson: bool
+    group_hierarchy: bool
+
+
+    def osm2pgsql_options(self, default_cache: int,
+                          default_threads: int) -> Dict[str, Any]:
+        """ Return the standard osm2pgsql options that can be derived
+            from the command line arguments. The resulting dict can be
+            further customized and then used in `run_osm2pgsql()`.
+        """
+        return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.config.lib_dir.osm2pgsql,
+                    osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
+                    osm2pgsql_style=self.config.get_import_style_file(),
+                    osm2pgsql_style_path=self.config.config_dir,
+                    threads=self.threads or default_threads,
+                    dsn=self.config.get_libpq_dsn(),
+                    flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
+                    tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
+                                     slim_index=self.config.TABLESPACE_OSM_INDEX,
+                                     main_data=self.config.TABLESPACE_PLACE_DATA,
+                                     main_index=self.config.TABLESPACE_PLACE_INDEX
+                                    )
+                   )
+
+
+    def get_osm_file_list(self) -> Optional[List[Path]]:
+        """ Return the --osm-file argument as a list of Paths or None
+            if no argument was given. The function also checks if the files
+            exist and raises a UsageError if one cannot be found.
+        """
+        if not self.osm_file:
+            return None
+
+        files = [Path(f) for f in self.osm_file]
+        for fname in files:
+            if not fname.is_file():
+                LOG.fatal("OSM file '%s' does not exist.", fname)
+                raise UsageError('Cannot access file.')
+
+        return files
+
+
+    def get_geometry_output(self) -> napi.GeometryFormat:
+        """ Get the requested geometry output format in a API-compatible
+            format.
+        """
+        if not self.polygon_output:
+            return napi.GeometryFormat.NONE
+        if self.polygon_output == 'geojson':
+            return napi.GeometryFormat.GEOJSON
+        if self.polygon_output == 'kml':
+            return napi.GeometryFormat.KML
+        if self.polygon_output == 'svg':
+            return napi.GeometryFormat.SVG
+        if self.polygon_output == 'text':
+            return napi.GeometryFormat.TEXT
+
+        try:
+            return napi.GeometryFormat[self.polygon_output.upper()]
+        except KeyError as exp:
+            raise UsageError(f"Unknown polygon output format '{self.polygon_output}'.") from exp
+
+
+    def get_locales(self, default: Optional[str]) -> napi.Locales:
+        """ Get the locales from the language parameter.
+        """
+        if self.lang:
+            return napi.Locales.from_accept_languages(self.lang)
+        if default:
+            return napi.Locales.from_accept_languages(default)
+
+        return napi.Locales()
+
+
+    def get_layers(self, default: napi.DataLayer) -> Optional[napi.DataLayer]:
+        """ Get the list of selected layers as a DataLayer enum.
+        """
+        if not self.layers:
+            return default
+
+        return reduce(napi.DataLayer.__or__,
+                      (napi.DataLayer[s.upper()] for s in self.layers))
--- a/src/nominatim_db/clicmd/convert.py
+++ b/src/nominatim_db/clicmd/convert.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'convert' subcommand.
+"""
+from typing import Set, Any, Union, Optional, Sequence
+import argparse
+import asyncio
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class WithAction(argparse.Action):
+    """ Special action that saves a list of flags, given on the command-line
+        as `--with-foo` or `--without-foo`.
+    """
+    def __init__(self, option_strings: Sequence[str], dest: Any,
+                 default: bool = True, **kwargs: Any) -> None:
+        if 'nargs' in kwargs:
+            raise ValueError("nargs not allowed.")
+        if option_strings is None:
+            raise ValueError("Positional parameter not allowed.")
+
+        self.dest_set = kwargs.pop('dest_set')
+        full_option_strings = []
+        for opt in option_strings:
+            if not opt.startswith('--'):
+                raise ValueError("short-form options not allowed")
+            if default:
+                self.dest_set.add(opt[2:])
+            full_option_strings.append(f"--with-{opt[2:]}")
+            full_option_strings.append(f"--without-{opt[2:]}")
+
+        super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
+
+
+    def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
+                 values: Union[str, Sequence[Any], None],
+                 option_string: Optional[str] = None) -> None:
+        assert option_string
+        if option_string.startswith('--with-'):
+            self.dest_set.add(option_string[7:])
+        if option_string.startswith('--without-'):
+            self.dest_set.discard(option_string[10:])
+
+
+class ConvertDB:
+    """ Convert an existing database into a different format. (EXPERIMENTAL)
+
+        Dump a read-only version of the database in a different format.
+        At the moment only a SQLite database suitable for reverse lookup
+        can be created.
+    """
+
+    def __init__(self) -> None:
+        self.options: Set[str] = set()
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        parser.add_argument('--format', default='sqlite',
+                            choices=('sqlite', ),
+                            help='Format of the output database (must be sqlite currently)')
+        parser.add_argument('--output', '-o', required=True, type=Path,
+                            help='File to write the database to.')
+        group = parser.add_argument_group('Switches to define database layout'
+                                          '(currently no effect)')
+        group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for reverse and lookup API'
+                                ' (default: enabled)')
+        group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for search API (default: disabled)')
+        group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for details API (default: enabled)')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.output.exists():
+            raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
+
+        if args.format == 'sqlite':
+            from ..tools import convert_sqlite
+
+            asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
+            return 0
+
+        return 1
--- a/src/nominatim_db/clicmd/export.py
+++ b/src/nominatim_db/clicmd/export.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'export' subcommand.
+"""
+from typing import Optional, List, cast
+import logging
+import argparse
+import asyncio
+import csv
+import sys
+
+import sqlalchemy as sa
+
+import nominatim_api as napi
+from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
+from nominatim_api.types import LookupDetails
+from nominatim_core.errors import UsageError
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+# Needed for SQLAlchemy
+# pylint: disable=singleton-comparison
+
+LOG = logging.getLogger()
+
+RANK_RANGE_MAP = {
+  'country': (4, 4),
+  'state': (5, 9),
+  'county': (10, 12),
+  'city': (13, 16),
+  'suburb': (17, 21),
+  'street': (26, 26),
+  'path': (27, 27)
+}
+
+RANK_TO_OUTPUT_MAP = {
+    4: 'country',
+    5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+    10: 'county', 11: 'county', 12: 'county',
+    13: 'city', 14: 'city', 15: 'city', 16: 'city',
+    17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
+    26: 'street', 27: 'path'}
+
+class QueryExport:
+    """\
+    Export places as CSV file from the database.
+
+
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Output arguments')
+        group.add_argument('--output-type', default='street',
+                           choices=('country', 'state', 'county',
+                                    'city', 'suburb', 'street', 'path'),
+                           help='Type of places to output (default: street)')
+        group.add_argument('--output-format',
+                           default='street;suburb;city;county;state;country',
+                           help=("Semicolon-separated list of address types "
+                                 "(see --output-type). Additionally accepts:"
+                                 "placeid,postcode"))
+        group.add_argument('--language',
+                           help=("Preferred language for output "
+                                 "(use local name, if omitted)"))
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
+                           help='Export only objects within country')
+        group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
+                           dest='node',
+                           help='Export only children of this OSM node')
+        group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
+                           dest='way',
+                           help='Export only children of this OSM way')
+        group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
+                           dest='relation',
+                           help='Export only children of this OSM relation')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        return asyncio.run(export(args))
+
+
+async def export(args: NominatimArgs) -> int:
+    """ The actual export as a asynchronous function.
+    """
+
+    api = napi.NominatimAPIAsync(args.project_dir)
+
+    try:
+        output_range = RANK_RANGE_MAP[args.output_type]
+
+        writer = init_csv_writer(args.output_format)
+
+        async with api.begin() as conn, api.begin() as detail_conn:
+            t = conn.t.placex
+
+            sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                        t.c.osm_type, t.c.osm_id, t.c.name,
+                        t.c.class_, t.c.type, t.c.admin_level,
+                        t.c.address, t.c.extratags,
+                        t.c.housenumber, t.c.postcode, t.c.country_code,
+                        t.c.importance, t.c.wikipedia, t.c.indexed_date,
+                        t.c.rank_address, t.c.rank_search,
+                        t.c.centroid)\
+                     .where(t.c.linked_place_id == None)\
+                     .where(t.c.rank_address.between(*output_range))
+
+            parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
+            if parent_place_id:
+                taddr = conn.t.addressline
+
+                sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
+                         .where(taddr.c.address_place_id == parent_place_id)\
+                         .where(taddr.c.isaddress)
+
+            if args.restrict_to_country:
+                sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
+
+            results = []
+            for row in await conn.execute(sql):
+                result = create_from_placex_row(row, ReverseResult)
+                if result is not None:
+                    results.append(result)
+
+                if len(results) == 1000:
+                    await dump_results(detail_conn, results, writer, args.language)
+                    results = []
+
+            if results:
+                await dump_results(detail_conn, results, writer, args.language)
+    finally:
+        await api.close()
+
+    return 0
+
+
+def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
+    fields = output_format.split(';')
+    writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
+    writer.writeheader()
+
+    return writer
+
+
+async def dump_results(conn: napi.SearchConnection,
+                       results: List[ReverseResult],
+                       writer: 'csv.DictWriter[str]',
+                       lang: Optional[str]) -> None:
+    locale = napi.Locales([lang] if lang else None)
+    await add_result_details(conn, results,
+                             LookupDetails(address_details=True, locales=locale))
+
+
+    for result in results:
+        data = {'placeid': result.place_id,
+                'postcode': result.postcode}
+
+        for line in (result.address_rows or []):
+            if line.isaddress and line.local_name:
+                if line.category[1] == 'postcode':
+                    data['postcode'] = line.local_name
+                elif line.rank_address in RANK_TO_OUTPUT_MAP:
+                    data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
+
+        writer.writerow(data)
+
+
+async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
+                        way_id: Optional[int],
+                        relation_id: Optional[int]) -> Optional[int]:
+    """ Get the place ID for the given OSM object.
+    """
+    if node_id is not None:
+        osm_type, osm_id = 'N', node_id
+    elif way_id is not None:
+        osm_type, osm_id = 'W', way_id
+    elif relation_id is not None:
+        osm_type, osm_id = 'R', relation_id
+    else:
+        return None
+
+    t = conn.t.placex
+    sql = sa.select(t.c.place_id).limit(1)\
+            .where(t.c.osm_type == osm_type)\
+            .where(t.c.osm_id == osm_id)\
+            .where(t.c.rank_address > 0)\
+            .order_by(t.c.rank_address)
+
+    for result in await conn.execute(sql):
+        return cast(int, result[0])
+
+    raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
--- a/src/nominatim_db/clicmd/freeze.py
+++ b/src/nominatim_db/clicmd/freeze.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'freeze' subcommand.
+"""
+import argparse
+
+from nominatim_core.db.connection import connect
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class SetupFreeze:
+    """\
+    Make database read-only.
+
+    About half of data in the Nominatim database is kept only to be able to
+    keep the data up-to-date with new changes made in OpenStreetMap. This
+    command drops all this data and only keeps the part needed for geocoding
+    itself.
+
+    This command has the same effect as the `--no-updates` option for imports.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        pass # No options
+
+
+    def run(self, args: NominatimArgs) -> int:
+        from ..tools import freeze
+
+        with connect(args.config.get_libpq_dsn()) as conn:
+            freeze.drop_update_tables(conn)
+        freeze.drop_flatnode_file(args.config.get_path('FLATNODE_FILE'))
+
+        return 0
--- a/src/nominatim_db/clicmd/index.py
+++ b/src/nominatim_db/clicmd/index.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'index' subcommand.
+"""
+import argparse
+
+import psutil
+
+from nominatim_core.db import status
+from nominatim_core.db.connection import connect
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+
+class UpdateIndex:
+    """\
+    Reindex all new and modified data.
+
+    Indexing is the process of computing the address and search terms for
+    the places in the database. Every time data is added or changed, indexing
+    needs to be run. Imports and replication updates automatically take care
+    of indexing. For other cases, this function allows to run indexing manually.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--boundaries-only', action='store_true',
+                           help="""Index only administrative boundaries.""")
+        group.add_argument('--no-boundaries', action='store_true',
+                           help="""Index everything except administrative boundaries.""")
+        group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
+                           help='Minimum/starting rank')
+        group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
+                           help='Maximum/finishing rank')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        from ..indexer.indexer import Indexer
+        from ..tokenizer import factory as tokenizer_factory
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+        indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
+                          args.threads or psutil.cpu_count() or 1)
+
+        if not args.no_boundaries:
+            indexer.index_boundaries(args.minrank, args.maxrank)
+        if not args.boundaries_only:
+            indexer.index_by_rank(args.minrank, args.maxrank)
+            indexer.index_postcodes()
+
+        if not args.no_boundaries and not args.boundaries_only \
+           and args.minrank == 0 and args.maxrank == 30:
+            with connect(args.config.get_libpq_dsn()) as conn:
+                status.set_indexed(conn, True)
+
+        return 0
--- a/src/nominatim_db/clicmd/refresh.py
+++ b/src/nominatim_db/clicmd/refresh.py
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of 'refresh' subcommand.
+"""
+from typing import Tuple, Optional
+import argparse
+import logging
+from pathlib import Path
+
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect
+from ..tokenizer.base import AbstractTokenizer
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+def _parse_osm_object(obj: str) -> Tuple[str, int]:
+    """ Parse the given argument into a tuple of OSM type and ID.
+        Raises an ArgumentError if the format is not recognized.
+    """
+    if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
+        raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
+
+    return (obj[0].upper(), int(obj[1:]))
+
+
+class UpdateRefresh:
+    """\
+    Recompute auxiliary data used by the indexing process.
+
+    This sub-commands updates various static data and functions in the database.
+    It usually needs to be run after changing various aspects of the
+    configuration. The configuration documentation will mention the exact
+    command to use in such case.
+
+    Warning: the 'update' command must not be run in parallel with other update
+             commands like 'replication' or 'add-data'.
+    """
+    def __init__(self) -> None:
+        self.tokenizer: Optional[AbstractTokenizer] = None
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Data arguments')
+        group.add_argument('--postcodes', action='store_true',
+                           help='Update postcode centroid table')
+        group.add_argument('--word-tokens', action='store_true',
+                           help='Clean up search terms')
+        group.add_argument('--word-counts', action='store_true',
+                           help='Compute frequency of full-word search terms')
+        group.add_argument('--address-levels', action='store_true',
+                           help='Reimport address level configuration')
+        group.add_argument('--functions', action='store_true',
+                           help='Update the PL/pgSQL functions in the database')
+        group.add_argument('--wiki-data', action='store_true',
+                           help='Update Wikipedia/data importance numbers')
+        group.add_argument('--secondary-importance', action='store_true',
+                           help='Update secondary importance raster data')
+        group.add_argument('--importance', action='store_true',
+                           help='Recompute place importances (expensive!)')
+        group.add_argument('--website', action='store_true',
+                           help='Refresh the directory that serves the scripts for the web API')
+        group.add_argument('--data-object', action='append',
+                           type=_parse_osm_object, metavar='OBJECT',
+                           help='Mark the given OSM object as requiring an update'
+                                ' (format: [NWR]<id>)')
+        group.add_argument('--data-area', action='append',
+                           type=_parse_osm_object, metavar='OBJECT',
+                           help='Mark the area around the given OSM object as requiring an update'
+                                ' (format: [NWR]<id>)')
+
+        group = parser.add_argument_group('Arguments for function refresh')
+        group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
+                           help='Do not enable code for propagating updates')
+        group.add_argument('--enable-debug-statements', action='store_true',
+                           help='Enable debug warning statements in functions')
+
+
+    def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
+        from ..tools import refresh, postcodes
+        from ..indexer.indexer import Indexer
+
+        need_function_refresh = args.functions
+
+        if args.postcodes:
+            if postcodes.can_compute(args.config.get_libpq_dsn()):
+                LOG.warning("Update postcodes centroid")
+                tokenizer = self._get_tokenizer(args.config)
+                postcodes.update_postcodes(args.config.get_libpq_dsn(),
+                                           args.project_dir, tokenizer)
+                indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
+                                  args.threads or 1)
+                indexer.index_postcodes()
+            else:
+                LOG.error("The place table doesn't exist. "
+                          "Postcode updates on a frozen database is not possible.")
+
+        if args.word_tokens:
+            LOG.warning('Updating word tokens')
+            tokenizer = self._get_tokenizer(args.config)
+            tokenizer.update_word_tokens()
+
+        if args.word_counts:
+            LOG.warning('Recompute word statistics')
+            self._get_tokenizer(args.config).update_statistics(args.config,
+                                                               threads=args.threads or 1)
+
+        if args.address_levels:
+            LOG.warning('Updating address levels')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.load_address_levels_from_config(conn, args.config)
+
+        # Attention: must come BEFORE functions
+        if args.secondary_importance:
+            with connect(args.config.get_libpq_dsn()) as conn:
+                # If the table did not exist before, then the importance code
+                # needs to be enabled.
+                if not conn.table_exists('secondary_importance'):
+                    args.functions = True
+
+            LOG.warning('Import secondary importance raster data from %s', args.project_dir)
+            if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+                                                args.project_dir) > 0:
+                LOG.fatal('FATAL: Cannot update secondary importance raster data')
+                return 1
+            need_function_refresh = True
+
+        if args.wiki_data:
+            data_path = Path(args.config.WIKIPEDIA_DATA_PATH
+                             or args.project_dir)
+            LOG.warning('Import wikipedia article importance from %s', data_path)
+            if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+                                                 data_path) > 0:
+                LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
+                return 1
+            need_function_refresh = True
+
+        if need_function_refresh:
+            LOG.warning('Create functions')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.create_functions(conn, args.config,
+                                         args.diffs, args.enable_debug_statements)
+                self._get_tokenizer(args.config).update_sql_functions(args.config)
+
+        # Attention: importance MUST come after wiki data import and after functions.
+        if args.importance:
+            LOG.warning('Update importance values for database')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.recompute_importance(conn)
+
+        if args.website:
+            webdir = args.project_dir / 'website'
+            LOG.warning('Setting up website directory at %s', webdir)
+            # This is a little bit hacky: call the tokenizer setup, so that
+            # the tokenizer directory gets repopulated as well, in case it
+            # wasn't there yet.
+            self._get_tokenizer(args.config)
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.setup_website(webdir, args.config, conn)
+
+        if args.data_object or args.data_area:
+            with connect(args.config.get_libpq_dsn()) as conn:
+                for obj in args.data_object or []:
+                    refresh.invalidate_osm_object(*obj, conn, recursive=False)
+                for obj in args.data_area or []:
+                    refresh.invalidate_osm_object(*obj, conn, recursive=True)
+                conn.commit()
+
+        return 0
+
+
+    def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
+        if self.tokenizer is None:
+            from ..tokenizer import factory as tokenizer_factory
+
+            self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+
+        return self.tokenizer
--- a/src/nominatim_db/clicmd/replication.py
+++ b/src/nominatim_db/clicmd/replication.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'replication' sub-command.
+"""
+from typing import Optional
+import argparse
+import datetime as dt
+import logging
+import socket
+import time
+
+from nominatim_core.db import status
+from nominatim_core.db.connection import connect
+from nominatim_core.errors import UsageError
+from .args import NominatimArgs
+
+LOG = logging.getLogger()
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to make pyosmium optional for replication only.
+# pylint: disable=C0415
+
+class UpdateReplication:
+    """\
+    Update the database using an online replication service.
+
+    An OSM replication service is an online service that provides regular
+    updates (OSM diff files) for the planet or update they provide. The OSMF
+    provides the primary replication service for the full planet at
+    https://planet.osm.org/replication/ but there are other providers of
+    extracts of OSM data who provide such a service as well.
+
+    This sub-command allows to set up such a replication service and download
+    and import updates at regular intervals. You need to call '--init' once to
+    set up the process or whenever you change the replication configuration
+    parameters. Without any arguments, the sub-command will go into a loop and
+    continuously apply updates as they become available. Giving `--once` just
+    downloads and imports the next batch of updates.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Arguments for initialisation')
+        group.add_argument('--init', action='store_true',
+                           help='Initialise the update process')
+        group.add_argument('--no-update-functions', dest='update_functions',
+                           action='store_false',
+                           help="Do not update the trigger function to "
+                                "support differential updates (EXPERT)")
+        group = parser.add_argument_group('Arguments for updates')
+        group.add_argument('--check-for-updates', action='store_true',
+                           help='Check if new updates are available and exit')
+        group.add_argument('--once', action='store_true',
+                           help="Download and apply updates only once. When "
+                                "not set, updates are continuously applied")
+        group.add_argument('--catch-up', action='store_true',
+                           help="Download and apply updates until no new "
+                                "data is available on the server")
+        group.add_argument('--no-index', action='store_false', dest='do_index',
+                           help=("Do not index the new data. Only usable "
+                                 "together with --once"))
+        group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                           help='Size of cache to be used by osm2pgsql (in MB)')
+        group = parser.add_argument_group('Download parameters')
+        group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
+                           help='Set timeout for file downloads')
+
+
+    def _init_replication(self, args: NominatimArgs) -> int:
+        from ..tools import replication, refresh
+
+        LOG.warning("Initialising replication updates")
+        with connect(args.config.get_libpq_dsn()) as conn:
+            replication.init_replication(conn, base_url=args.config.REPLICATION_URL,
+                                         socket_timeout=args.socket_timeout)
+            if args.update_functions:
+                LOG.warning("Create functions")
+                refresh.create_functions(conn, args.config, True, False)
+        return 0
+
+
+    def _check_for_updates(self, args: NominatimArgs) -> int:
+        from ..tools import replication
+
+        with connect(args.config.get_libpq_dsn()) as conn:
+            return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL,
+                                                 socket_timeout=args.socket_timeout)
+
+
+    def _report_update(self, batchdate: dt.datetime,
+                       start_import: dt.datetime,
+                       start_index: Optional[dt.datetime]) -> None:
+        def round_time(delta: dt.timedelta) -> dt.timedelta:
+            return dt.timedelta(seconds=int(delta.total_seconds()))
+
+        end = dt.datetime.now(dt.timezone.utc)
+        LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
+                    round_time((start_index or end) - start_import),
+                    f"Indexing: {round_time(end - start_index)} " if start_index else '',
+                    round_time(end - start_import),
+                    round_time(end - batchdate))
+
+
+    def _compute_update_interval(self, args: NominatimArgs) -> int:
+        if args.catch_up:
+            return 0
+
+        update_interval = args.config.get_int('REPLICATION_UPDATE_INTERVAL')
+        # Sanity check to not overwhelm the Geofabrik servers.
+        if 'download.geofabrik.de' in args.config.REPLICATION_URL\
+           and update_interval < 86400:
+            LOG.fatal("Update interval too low for download.geofabrik.de.\n"
+                      "Please check install documentation "
+                      "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
+                      "setting-up-the-update-process).")
+            raise UsageError("Invalid replication update interval setting.")
+
+        return update_interval
+
+
+    def _update(self, args: NominatimArgs) -> None:
+        # pylint: disable=too-many-locals
+        from ..tools import replication
+        from ..indexer.indexer import Indexer
+        from ..tokenizer import factory as tokenizer_factory
+
+        update_interval = self._compute_update_interval(args)
+
+        params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
+        params.update(base_url=args.config.REPLICATION_URL,
+                      update_interval=update_interval,
+                      import_file=args.project_dir / 'osmosischange.osc',
+                      max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
+                      indexed_only=not args.once)
+
+        if not args.once:
+            if not args.do_index:
+                LOG.fatal("Indexing cannot be disabled when running updates continuously.")
+                raise UsageError("Bad argument '--no-index'.")
+        recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+        indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)
+
+        dsn = args.config.get_libpq_dsn()
+
+        while True:
+            start = dt.datetime.now(dt.timezone.utc)
+            state = replication.update(dsn, params, socket_timeout=args.socket_timeout)
+
+            with connect(dsn) as conn:
+                if state is not replication.UpdateState.NO_CHANGES:
+                    status.log_status(conn, start, 'import')
+                batchdate, _, _ = status.get_status(conn)
+                conn.commit()
+
+            if state is not replication.UpdateState.NO_CHANGES and args.do_index:
+                index_start = dt.datetime.now(dt.timezone.utc)
+                indexer.index_full(analyse=False)
+
+                with connect(dsn) as conn:
+                    status.set_indexed(conn, True)
+                    status.log_status(conn, index_start, 'index')
+                    conn.commit()
+            else:
+                index_start = None
+
+            if state is replication.UpdateState.NO_CHANGES and \
+               args.catch_up or update_interval > 40*60:
+                while indexer.has_pending():
+                    indexer.index_full(analyse=False)
+
+            if LOG.isEnabledFor(logging.WARNING):
+                assert batchdate is not None
+                self._report_update(batchdate, start, index_start)
+
+            if args.once or (args.catch_up and state is replication.UpdateState.NO_CHANGES):
+                break
+
+            if state is replication.UpdateState.NO_CHANGES:
+                LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
+                time.sleep(recheck_interval)
+
+
+    def run(self, args: NominatimArgs) -> int:
+        socket.setdefaulttimeout(args.socket_timeout)
+
+        if args.init:
+            return self._init_replication(args)
+
+        if args.check_for_updates:
+            return self._check_for_updates(args)
+
+        self._update(args)
+        return 0
--- a/src/nominatim_db/clicmd/setup.py
+++ b/src/nominatim_db/clicmd/setup.py
@@ -0,0 +1,229 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'import' subcommand.
+"""
+from typing import Optional
+import argparse
+import logging
+from pathlib import Path
+
+import psutil
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect
+from nominatim_core.db import status, properties
+from ..tokenizer.base import AbstractTokenizer
+from ..version import NOMINATIM_VERSION
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=C0415
+
+LOG = logging.getLogger()
+
+class SetupAll:
+    """\
+    Create a new Nominatim database from an OSM file.
+
+    This sub-command sets up a new Nominatim database from scratch starting
+    with creating a new database in Postgresql. The user running this command
+    needs superuser rights on the database.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group1 = parser.add_argument_group('Required arguments')
+        group1.add_argument('--osm-file', metavar='FILE', action='append',
+                           help='OSM file to be imported'
+                                ' (repeat for importing multiple files)',
+                                default=None)
+        group1.add_argument('--continue', dest='continue_at',
+                           choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
+                           help='Continue an import that was interrupted',
+                           default=None)
+        group2 = parser.add_argument_group('Optional arguments')
+        group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                           help='Size of cache to be used by osm2pgsql (in MB)')
+        group2.add_argument('--reverse-only', action='store_true',
+                           help='Do not create tables and indexes for searching')
+        group2.add_argument('--no-partitions', action='store_true',
+                           help=("Do not partition search indices "
+                                 "(speeds up import of single country extracts)"))
+        group2.add_argument('--no-updates', action='store_true',
+                           help="Do not keep tables that are only needed for "
+                                "updating the database later")
+        group2.add_argument('--offline', action='store_true',
+                            help="Do not attempt to load any additional data from the internet")
+        group3 = parser.add_argument_group('Expert options')
+        group3.add_argument('--ignore-errors', action='store_true',
+                           help='Continue import even when errors in SQL are present')
+        group3.add_argument('--index-noanalyse', action='store_true',
+                           help='Do not perform analyse operations during index (expert only)')
+        group3.add_argument('--prepare-database', action='store_true',
+                            help='Create the database but do not import any data')
+
+
+    def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
+        from ..data import country_info
+        from ..tools import database_import, refresh, postcodes, freeze
+        from ..indexer.indexer import Indexer
+
+        num_threads = args.threads or psutil.cpu_count() or 1
+
+        country_info.setup_country_config(args.config)
+
+        if args.osm_file is None and args.continue_at is None and not args.prepare_database:
+            raise UsageError("No input files (use --osm-file).")
+
+        if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
+            raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
+
+        if args.continue_at is not None and args.prepare_database:
+            raise UsageError(
+                "Cannot use --continue and --prepare-database together."
+            )
+
+
+        if args.prepare_database or args.continue_at is None:
+            LOG.warning('Creating database')
+            database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
+                                                        rouser=args.config.DATABASE_WEBUSER)
+            if args.prepare_database:
+                return 0
+
+        if args.continue_at in (None, 'import-from-file'):
+            files = args.get_osm_file_list()
+            if not files:
+                raise UsageError("No input files (use --osm-file).")
+
+            if args.continue_at in ('import-from-file', None):
+                # Check if the correct plugins are installed
+                database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
+                LOG.warning('Setting up country tables')
+                country_info.setup_country_tables(args.config.get_libpq_dsn(),
+                                                args.config.lib_dir.data,
+                                                args.no_partitions)
+
+                LOG.warning('Importing OSM data file')
+                database_import.import_osm_data(files,
+                                                args.osm2pgsql_options(0, 1),
+                                                drop=args.no_updates,
+                                                ignore_errors=args.ignore_errors)
+
+                LOG.warning('Importing wikipedia importance data')
+                data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
+                if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+                                                    data_path) > 0:
+                    LOG.error('Wikipedia importance dump file not found. '
+                            'Calculating importance values of locations will not '
+                            'use Wikipedia importance data.')
+
+                LOG.warning('Importing secondary importance raster data')
+                if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+                                                    args.project_dir) != 0:
+                    LOG.error('Secondary importance file not imported. '
+                            'Falling back to default ranking.')
+
+                self._setup_tables(args.config, args.reverse_only)
+
+        if args.continue_at in ('import-from-file', 'load-data', None):
+            LOG.warning('Initialise tables')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                database_import.truncate_data_tables(conn)
+
+            LOG.warning('Load data into placex table')
+            database_import.load_data(args.config.get_libpq_dsn(), num_threads)
+
+        LOG.warning("Setting up tokenizer")
+        tokenizer = self._get_tokenizer(args.continue_at, args.config)
+
+        if args.continue_at in ('import-from-file', 'load-data', None):
+            LOG.warning('Calculate postcodes')
+            postcodes.update_postcodes(args.config.get_libpq_dsn(),
+                                       args.project_dir, tokenizer)
+
+        if args.continue_at in \
+            ('import-from-file', 'load-data', 'indexing', None):
+            LOG.warning('Indexing places')
+            indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
+            indexer.index_full(analyse=not args.index_noanalyse)
+
+        LOG.warning('Post-process tables')
+        with connect(args.config.get_libpq_dsn()) as conn:
+            database_import.create_search_indices(conn, args.config,
+                                                  drop=args.no_updates,
+                                                  threads=num_threads)
+            LOG.warning('Create search index for default country names.')
+            country_info.create_country_names(conn, tokenizer,
+                                              args.config.get_str_list('LANGUAGES'))
+            if args.no_updates:
+                freeze.drop_update_tables(conn)
+        tokenizer.finalize_import(args.config)
+
+        LOG.warning('Recompute word counts')
+        tokenizer.update_statistics(args.config, threads=num_threads)
+
+        webdir = args.project_dir / 'website'
+        LOG.warning('Setup website at %s', webdir)
+        with connect(args.config.get_libpq_dsn()) as conn:
+            refresh.setup_website(webdir, args.config, conn)
+
+        self._finalize_database(args.config.get_libpq_dsn(), args.offline)
+
+        return 0
+
+
+    def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
+        """ Set up the basic database layout: tables, indexes and functions.
+        """
+        from ..tools import database_import, refresh
+
+        with connect(config.get_libpq_dsn()) as conn:
+            LOG.warning('Create functions (1st pass)')
+            refresh.create_functions(conn, config, False, False)
+            LOG.warning('Create tables')
+            database_import.create_tables(conn, config, reverse_only=reverse_only)
+            refresh.load_address_levels_from_config(conn, config)
+            LOG.warning('Create functions (2nd pass)')
+            refresh.create_functions(conn, config, False, False)
+            LOG.warning('Create table triggers')
+            database_import.create_table_triggers(conn, config)
+            LOG.warning('Create partition tables')
+            database_import.create_partition_tables(conn, config)
+            LOG.warning('Create functions (3rd pass)')
+            refresh.create_functions(conn, config, False, False)
+
+
+    def _get_tokenizer(self, continue_at: Optional[str],
+                       config: Configuration) -> AbstractTokenizer:
+        """ Set up a new tokenizer or load an already initialised one.
+        """
+        from ..tokenizer import factory as tokenizer_factory
+
+        if continue_at in ('import-from-file', 'load-data', None):
+            # (re)initialise the tokenizer data
+            return tokenizer_factory.create_tokenizer(config)
+
+        # just load the tokenizer
+        return tokenizer_factory.get_tokenizer_for_db(config)
+
+
+    def _finalize_database(self, dsn: str, offline: bool) -> None:
+        """ Determine the database date and set the status accordingly.
+        """
+        with connect(dsn) as conn:
+            properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
+
+            try:
+                dbdate = status.compute_database_date(conn, offline)
+                status.set_status(conn, dbdate)
+                LOG.info('Database is at %s.', dbdate)
+            except Exception as exc: # pylint: disable=broad-except
+                LOG.error('Cannot determine date of database: %s', exc)
--- a/src/nominatim_db/clicmd/special_phrases.py
+++ b/src/nominatim_db/clicmd/special_phrases.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Implementation of the 'special-phrases' command.
+"""
+import argparse
+import logging
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from nominatim_core.db.connection import connect
+from ..tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
+from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
+from ..tools.special_phrases.sp_csv_loader import SPCsvLoader
+from .args import NominatimArgs
+
+LOG = logging.getLogger()
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class ImportSpecialPhrases:
+    """\
+    Import special phrases.
+
+    Special phrases are search terms that narrow down the type of object
+    that should be searched. For example, you might want to search for
+    'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
+    in many languages, which can be imported with this command.
+
+    You can also provide your own phrases in a CSV file. The file needs to have
+    the following five columns:
+     * phrase - the term expected for searching
+     * class - the OSM tag key of the object type
+     * type - the OSM tag value of the object type
+     * operator - the kind of search to be done (one of: in, near, name, -)
+     * plural - whether the term is a plural or not (Y/N)
+
+    An example file can be found in the Nominatim sources at
+    'test/testdb/full_en_phrases_test.csv'.
+
+    The import can be further configured to ignore specific key/value pairs.
+    This is particularly useful when importing phrases from the wiki. The
+    default configuration excludes some very common tags like building=yes.
+    The configuration can be customized by putting a file `phrase-settings.json`
+    with custom rules into the project directory or by using the `--config`
+    option to point to another configuration file.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Input arguments')
+        group.add_argument('--import-from-wiki', action='store_true',
+                           help='Import special phrases from the OSM wiki to the database')
+        group.add_argument('--import-from-csv', metavar='FILE',
+                           help='Import special phrases from a CSV file')
+        group.add_argument('--no-replace', action='store_true',
+                           help='Keep the old phrases and only add the new ones')
+
+
+    def run(self, args: NominatimArgs) -> int:
+
+        if args.import_from_wiki:
+            self.start_import(args, SPWikiLoader(args.config))
+
+        if args.import_from_csv:
+            if not Path(args.import_from_csv).is_file():
+                LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv)
+                raise UsageError('Cannot access file.')
+
+            self.start_import(args, SPCsvLoader(args.import_from_csv))
+
+        return 0
+
+
+    def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None:
+        """
+            Create the SPImporter object containing the right
+            sp loader and then start the import of special phrases.
+        """
+        from ..tokenizer import factory as tokenizer_factory
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+        should_replace = not args.no_replace
+        with connect(args.config.get_libpq_dsn()) as db_connection:
+            SPImporter(
+                args.config, db_connection, loader
+            ).import_phrases(tokenizer, should_replace)
--- a/src/nominatim_db/data/init.py
+++ b/src/nominatim_db/data/init.py
--- a/src/nominatim_db/data/country_info.py
+++ b/src/nominatim_db/data/country_info.py
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for importing and managing static country information.
+"""
+from typing import Dict, Any, Iterable, Tuple, Optional, Container, overload
+from pathlib import Path
+import psycopg2.extras
+
+from nominatim_core.db import utils as db_utils
+from nominatim_core.db.connection import connect, Connection
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from ..tokenizer.base import AbstractTokenizer
+
+def _flatten_name_list(names: Any) -> Dict[str, str]:
+    if names is None:
+        return {}
+
+    if not isinstance(names, dict):
+        raise UsageError("Expected key-value list for names in country_settings.py")
+
+    flat = {}
+    for prefix, remain in names.items():
+        if isinstance(remain, str):
+            flat[prefix] = remain
+        elif not isinstance(remain, dict):
+            raise UsageError("Entries in names must be key-value lists.")
+        else:
+            for suffix, name in remain.items():
+                if suffix == 'default':
+                    flat[prefix] = name
+                else:
+                    flat[f'{prefix}:{suffix}'] = name
+
+    return flat
+
+
+
+class _CountryInfo:
+    """ Caches country-specific properties from the configuration file.
+    """
+
+    def __init__(self) -> None:
+        self._info: Dict[str, Dict[str, Any]] = {}
+
+
+    def load(self, config: Configuration) -> None:
+        """ Load the country properties from the configuration files,
+            if they are not loaded yet.
+        """
+        if not self._info:
+            self._info = config.load_sub_configuration('country_settings.yaml')
+            for prop in self._info.values():
+                # Convert languages into a list for simpler handling.
+                if 'languages' not in prop:
+                    prop['languages'] = []
+                elif not isinstance(prop['languages'], list):
+                    prop['languages'] = [x.strip()
+                                         for x in prop['languages'].split(',')]
+                prop['names'] = _flatten_name_list(prop.get('names'))
+
+
+    def items(self) -> Iterable[Tuple[str, Dict[str, Any]]]:
+        """ Return tuples of (country_code, property dict) as iterable.
+        """
+        return self._info.items()
+
+    def get(self, country_code: str) -> Dict[str, Any]:
+        """ Get country information for the country with the given country code.
+        """
+        return self._info.get(country_code, {})
+
+
+
+_COUNTRY_INFO = _CountryInfo()
+
+
+def setup_country_config(config: Configuration) -> None:
+    """ Load country properties from the configuration file.
+        Needs to be called before using any other functions in this
+        file.
+    """
+    _COUNTRY_INFO.load(config)
+
+@overload
+def iterate() -> Iterable[Tuple[str, Dict[str, Any]]]:
+    ...
+
+@overload
+def iterate(prop: str) -> Iterable[Tuple[str, Any]]:
+    ...
+
+def iterate(prop: Optional[str] = None) -> Iterable[Tuple[str, Dict[str, Any]]]:
+    """ Iterate over country code and properties.
+
+        When `prop` is None, all countries are returned with their complete
+        set of properties.
+
+        If `prop` is given, then only countries are returned where the
+        given property is set. The second item of the tuple contains only
+        the content of the given property.
+    """
+    if prop is None:
+        return _COUNTRY_INFO.items()
+
+    return ((c, p[prop]) for c, p in _COUNTRY_INFO.items() if prop in p)
+
+
+def setup_country_tables(dsn: str, sql_dir: Path, ignore_partitions: bool = False) -> None:
+    """ Create and populate the tables with basic static data that provides
+        the background for geocoding. Data is assumed to not yet exist.
+    """
+    db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
+
+    params = []
+    for ccode, props in _COUNTRY_INFO.items():
+        if ccode is not None and props is not None:
+            if ignore_partitions:
+                partition = 0
+            else:
+                partition = props.get('partition', 0)
+            lang = props['languages'][0] if len(
+                props['languages']) == 1 else None
+
+            params.append((ccode, props['names'], lang, partition))
+    with connect(dsn) as conn:
+        with conn.cursor() as cur:
+            psycopg2.extras.register_hstore(cur)
+            cur.execute(
+                """ CREATE TABLE public.country_name (
+                        country_code character varying(2),
+                        name public.hstore,
+                        derived_name public.hstore,
+                        country_default_language_code text,
+                        partition integer
+                    ); """)
+            cur.execute_values(
+                """ INSERT INTO public.country_name
+                    (country_code, name, country_default_language_code, partition) VALUES %s
+                """, params)
+        conn.commit()
+
+
+def create_country_names(conn: Connection, tokenizer: AbstractTokenizer,
+                         languages: Optional[Container[str]] = None) -> None:
+    """ Add default country names to search index. `languages` is a comma-
+        separated list of language codes as used in OSM. If `languages` is not
+        empty then only name translations for the given languages are added
+        to the index.
+    """
+    def _include_key(key: str) -> bool:
+        return ':' not in key or not languages or \
+               key[key.index(':') + 1:] in languages
+
+    with conn.cursor() as cur:
+        psycopg2.extras.register_hstore(cur)
+        cur.execute("""SELECT country_code, name FROM country_name
+                       WHERE country_code is not null""")
+
+        with tokenizer.name_analyzer() as analyzer:
+            for code, name in cur:
+                names = {'countrycode': code}
+
+                # country names (only in languages as provided)
+                if name:
+                    names.update({k : v for k, v in name.items() if _include_key(k)})
+
+                analyzer.add_country_names(code, names)
+
+    conn.commit()
--- a/src/nominatim_db/data/place_info.py
+++ b/src/nominatim_db/data/place_info.py
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Wrapper around place information the indexer gets from the database and hands to
+the tokenizer.
+"""
+from typing import Optional, Mapping, Any, Tuple
+
+class PlaceInfo:
+    """ This data class contains all information the tokenizer can access
+        about a place.
+    """
+
+    def __init__(self, info: Mapping[str, Any]) -> None:
+        self._info = info
+
+
+    @property
+    def name(self) -> Optional[Mapping[str, str]]:
+        """ A dictionary with the names of the place. Keys and values represent
+            the full key and value of the corresponding OSM tag. Which tags
+            are saved as names is determined by the import style.
+            The property may be None if the place has no names.
+        """
+        return self._info.get('name')
+
+
+    @property
+    def address(self) -> Optional[Mapping[str, str]]:
+        """ A dictionary with the address elements of the place. They key
+            usually corresponds to the suffix part of the key of an OSM
+            'addr:*' or 'isin:*' tag. There are also some special keys like
+            `country` or `country_code` which merge OSM keys that contain
+            the same information. See [Import Styles][1] for details.
+
+            The property may be None if the place has no address information.
+
+            [1]: ../customize/Import-Styles.md
+        """
+        return self._info.get('address')
+
+
+    @property
+    def country_code(self) -> Optional[str]:
+        """ The country code of the country the place is in. Guaranteed
+            to be a two-letter lower-case string. If the place is not inside
+            any country, the property is set to None.
+        """
+        return self._info.get('country_code')
+
+
+    @property
+    def rank_address(self) -> int:
+        """ The [rank address][1] before any rank correction is applied.
+
+            [1]: ../customize/Ranking.md#address-rank
+        """
+        return self._info.get('rank_address', 0)
+
+
+    @property
+    def centroid(self) -> Optional[Tuple[float, float]]:
+        """ A center point of the place in WGS84. May be None when the
+            geometry of the place is unknown.
+        """
+        x, y = self._info.get('centroid_x'), self._info.get('centroid_y')
+        return None if x is None or y is None else (x, y)
+
+
+    def is_a(self, key: str, value: str) -> bool:
+        """ Set to True when the place's primary tag corresponds to the given
+            key and value.
+        """
+        return self._info.get('class') == key and self._info.get('type') == value
+
+
+    def is_country(self) -> bool:
+        """ Set to True when the place is a valid country boundary.
+        """
+        return self.rank_address == 4 \
+               and self.is_a('boundary', 'administrative') \
+               and self.country_code is not None
--- a/src/nominatim_db/data/place_name.py
+++ b/src/nominatim_db/data/place_name.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Data class for a single name of a place.
+"""
+from typing import Optional, Dict, Mapping
+
+class PlaceName:
+    """ Each name and address part of a place is encapsulated in an object of
+        this class. It saves not only the name proper but also describes the
+        kind of name with two properties:
+
+        * `kind` describes the name of the OSM key used without any suffixes
+          (i.e. the part after the colon removed)
+        * `suffix` contains the suffix of the OSM tag, if any. The suffix
+          is the part of the key after the first colon.
+
+        In addition to that, a name may have arbitrary additional attributes.
+        How attributes are used, depends on the sanitizers and token analysers.
+        The exception is the 'analyzer' attribute. This attribute determines
+        which token analysis module will be used to finalize the treatment of
+        names.
+    """
+
+    def __init__(self, name: str, kind: str, suffix: Optional[str]):
+        self.name = name
+        self.kind = kind
+        self.suffix = suffix
+        self.attr: Dict[str, str] = {}
+
+
+    def __repr__(self) -> str:
+        return f"PlaceName(name={self.name!r},kind={self.kind!r},suffix={self.suffix!r})"
+
+
+    def clone(self, name: Optional[str] = None,
+              kind: Optional[str] = None,
+              suffix: Optional[str] = None,
+              attr: Optional[Mapping[str, str]] = None) -> 'PlaceName':
+        """ Create a deep copy of the place name, optionally with the
+            given parameters replaced. In the attribute list only the given
+            keys are updated. The list is not replaced completely.
+            In particular, the function cannot to be used to remove an
+            attribute from a place name.
+        """
+        newobj = PlaceName(name or self.name,
+                           kind or self.kind,
+                           suffix or self.suffix)
+
+        newobj.attr.update(self.attr)
+        if attr:
+            newobj.attr.update(attr)
+
+        return newobj
+
+
+    def set_attr(self, key: str, value: str) -> None:
+        """ Add the given property to the name. If the property was already
+            set, then the value is overwritten.
+        """
+        self.attr[key] = value
+
+
+    def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]:
+        """ Return the given property or the value of 'default' if it
+            is not set.
+        """
+        return self.attr.get(key, default)
+
+
+    def has_attr(self, key: str) -> bool:
+        """ Check if the given attribute is set.
+        """
+        return key in self.attr
--- a/src/nominatim_db/data/postcode_format.py
+++ b/src/nominatim_db/data/postcode_format.py
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for formatting postcodes according to their country-specific
+format.
+"""
+from typing import Any, Mapping, Optional, Set, Match
+import re
+
+from nominatim_core.errors import UsageError
+from . import country_info
+
+class CountryPostcodeMatcher:
+    """ Matches and formats a postcode according to a format definition
+        of the given country.
+    """
+    def __init__(self, country_code: str, config: Mapping[str, Any]) -> None:
+        if 'pattern' not in config:
+            raise UsageError("Field 'pattern' required for 'postcode' "
+                             f"for country '{country_code}'")
+
+        pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
+
+        self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
+        self.pattern = re.compile(pc_pattern)
+
+        self.output = config.get('output', r'\g<0>')
+
+
+    def match(self, postcode: str) -> Optional[Match[str]]:
+        """ Match the given postcode against the postcode pattern for this
+            matcher. Returns a `re.Match` object if the match was successful
+            and None otherwise.
+        """
+        # Upper-case, strip spaces and leading country code.
+        normalized = self.norm_pattern.fullmatch(postcode.upper())
+
+        if normalized:
+            return self.pattern.fullmatch(normalized.group(1))
+
+        return None
+
+
+    def normalize(self, match: Match[str]) -> str:
+        """ Return the default format of the postcode for the given match.
+            `match` must be a `re.Match` object previously returned by
+            `match()`
+        """
+        return match.expand(self.output)
+
+
+class PostcodeFormatter:
+    """ Container for different postcode formats of the world and
+        access functions.
+    """
+    def __init__(self) -> None:
+        # Objects without a country code can't have a postcode per definition.
+        self.country_without_postcode: Set[Optional[str]] = {None}
+        self.country_matcher = {}
+        self.default_matcher = CountryPostcodeMatcher('', {'pattern': '.*'})
+
+        for ccode, prop in country_info.iterate('postcode'):
+            if prop is False:
+                self.country_without_postcode.add(ccode)
+            elif isinstance(prop, dict):
+                self.country_matcher[ccode] = CountryPostcodeMatcher(ccode, prop)
+            else:
+                raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
+
+
+    def set_default_pattern(self, pattern: str) -> None:
+        """ Set the postcode match pattern to use, when a country does not
+            have a specific pattern.
+        """
+        self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
+
+
+    def get_matcher(self, country_code: Optional[str]) -> Optional[CountryPostcodeMatcher]:
+        """ Return the CountryPostcodeMatcher for the given country.
+            Returns None if the country doesn't have a postcode and the
+            default matcher if there is no specific matcher configured for
+            the country.
+        """
+        if country_code in self.country_without_postcode:
+            return None
+
+        assert country_code is not None
+
+        return self.country_matcher.get(country_code, self.default_matcher)
+
+
+    def match(self, country_code: Optional[str], postcode: str) -> Optional[Match[str]]:
+        """ Match the given postcode against the postcode pattern for this
+            matcher. Returns a `re.Match` object if the country has a pattern
+            and the match was successful or None if the match failed.
+        """
+        if country_code in self.country_without_postcode:
+            return None
+
+        assert country_code is not None
+
+        return self.country_matcher.get(country_code, self.default_matcher).match(postcode)
+
+
+    def normalize(self, country_code: str, match: Match[str]) -> str:
+        """ Return the default format of the postcode for the given match.
+            `match` must be a `re.Match` object previously returned by
+            `match()`
+        """
+        return self.country_matcher.get(country_code, self.default_matcher).normalize(match)
--- a/src/nominatim_db/indexer/init.py
+++ b/src/nominatim_db/indexer/init.py
--- a/src/nominatim_db/indexer/indexer.py
+++ b/src/nominatim_db/indexer/indexer.py
@@ -0,0 +1,242 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Main work horse for indexing (computing addresses) the database.
+"""
+from typing import Optional, Any, cast
+import logging
+import time
+
+import psycopg2.extras
+
+from nominatim_core.typing import DictCursorResults
+from nominatim_core.db.async_connection import DBConnection, WorkerPool
+from nominatim_core.db.connection import connect, Connection, Cursor
+from ..tokenizer.base import AbstractTokenizer
+from .progress import ProgressLogger
+from . import runners
+
+LOG = logging.getLogger()
+
+
+class PlaceFetcher:
+    """ Asynchronous connection that fetches place details for processing.
+    """
+    def __init__(self, dsn: str, setup_conn: Connection) -> None:
+        self.wait_time = 0.0
+        self.current_ids: Optional[DictCursorResults] = None
+        self.conn: Optional[DBConnection] = DBConnection(dsn,
+                                               cursor_factory=psycopg2.extras.DictCursor)
+
+        with setup_conn.cursor() as cur:
+            # need to fetch those manually because register_hstore cannot
+            # fetch them on an asynchronous connection below.
+            hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid")
+            hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid")
+
+        psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid,
+                                        array_oid=hstore_array_oid)
+
+    def close(self) -> None:
+        """ Close the underlying asynchronous connection.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+
+
+    def fetch_next_batch(self, cur: Cursor, runner: runners.Runner) -> bool:
+        """ Send a request for the next batch of places.
+            If details for the places are required, they will be fetched
+            asynchronously.
+
+            Returns true if there is still data available.
+        """
+        ids = cast(Optional[DictCursorResults], cur.fetchmany(100))
+
+        if not ids:
+            self.current_ids = None
+            return False
+
+        assert self.conn is not None
+        self.current_ids = runner.get_place_details(self.conn, ids)
+
+        return True
+
+    def get_batch(self) -> DictCursorResults:
+        """ Get the next batch of data, previously requested with
+            `fetch_next_batch`.
+        """
+        assert self.conn is not None
+        assert self.conn.cursor is not None
+
+        if self.current_ids is not None and not self.current_ids:
+            tstart = time.time()
+            self.conn.wait()
+            self.wait_time += time.time() - tstart
+            self.current_ids = cast(Optional[DictCursorResults],
+                                    self.conn.cursor.fetchall())
+
+        return self.current_ids if self.current_ids is not None else []
+
+    def __enter__(self) -> 'PlaceFetcher':
+        return self
+
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        assert self.conn is not None
+        self.conn.wait()
+        self.close()
+
+
+class Indexer:
+    """ Main indexing routine.
+    """
+
+    def __init__(self, dsn: str, tokenizer: AbstractTokenizer, num_threads: int):
+        self.dsn = dsn
+        self.tokenizer = tokenizer
+        self.num_threads = num_threads
+
+
+    def has_pending(self) -> bool:
+        """ Check if any data still needs indexing.
+            This function must only be used after the import has finished.
+            Otherwise it will be very expensive.
+        """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
+                return cur.rowcount > 0
+
+
+    def index_full(self, analyse: bool = True) -> None:
+        """ Index the complete database. This will first index boundaries
+            followed by all other objects. When `analyse` is True, then the
+            database will be analysed at the appropriate places to
+            ensure that database statistics are updated.
+        """
+        with connect(self.dsn) as conn:
+            conn.autocommit = True
+
+            def _analyze() -> None:
+                if analyse:
+                    with conn.cursor() as cur:
+                        cur.execute('ANALYZE')
+
+            if self.index_by_rank(0, 4) > 0:
+                _analyze()
+
+            if self.index_boundaries(0, 30) > 100:
+                _analyze()
+
+            if self.index_by_rank(5, 25) > 100:
+                _analyze()
+
+            if self.index_by_rank(26, 30) > 1000:
+                _analyze()
+
+            if self.index_postcodes() > 100:
+                _analyze()
+
+
+    def index_boundaries(self, minrank: int, maxrank: int) -> int:
+        """ Index only administrative boundaries within the given rank range.
+        """
+        total = 0
+        LOG.warning("Starting indexing boundaries using %s threads",
+                    self.num_threads)
+
+        with self.tokenizer.name_analyzer() as analyzer:
+            for rank in range(max(minrank, 4), min(maxrank, 26)):
+                total += self._index(runners.BoundaryRunner(rank, analyzer))
+
+        return total
+
+    def index_by_rank(self, minrank: int, maxrank: int) -> int:
+        """ Index all entries of placex in the given rank range (inclusive)
+            in order of their address rank.
+
+            When rank 30 is requested then also interpolations and
+            places with address rank 0 will be indexed.
+        """
+        total = 0
+        maxrank = min(maxrank, 30)
+        LOG.warning("Starting indexing rank (%i to %i) using %i threads",
+                    minrank, maxrank, self.num_threads)
+
+        with self.tokenizer.name_analyzer() as analyzer:
+            for rank in range(max(1, minrank), maxrank + 1):
+                total += self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1)
+
+            if maxrank == 30:
+                total += self._index(runners.RankRunner(0, analyzer))
+                total += self._index(runners.InterpolationRunner(analyzer), 20)
+
+        return total
+
+
+    def index_postcodes(self) -> int:
+        """Index the entries of the location_postcode table.
+        """
+        LOG.warning("Starting indexing postcodes using %s threads", self.num_threads)
+
+        return self._index(runners.PostcodeRunner(), 20)
+
+
+    def update_status_table(self) -> None:
+        """ Update the status in the status table to 'indexed'.
+        """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                cur.execute('UPDATE import_status SET indexed = true')
+
+            conn.commit()
+
+    def _index(self, runner: runners.Runner, batch: int = 1) -> int:
+        """ Index a single rank or table. `runner` describes the SQL to use
+            for indexing. `batch` describes the number of objects that
+            should be processed with a single SQL statement
+        """
+        LOG.warning("Starting %s (using batch size %s)", runner.name(), batch)
+
+        with connect(self.dsn) as conn:
+            psycopg2.extras.register_hstore(conn)
+            with conn.cursor() as cur:
+                total_tuples = cur.scalar(runner.sql_count_objects())
+                LOG.debug("Total number of rows: %i", total_tuples)
+
+            conn.commit()
+
+            progress = ProgressLogger(runner.name(), total_tuples)
+
+            if total_tuples > 0:
+                with conn.cursor(name='places') as cur:
+                    cur.execute(runner.sql_get_objects())
+
+                    with PlaceFetcher(self.dsn, conn) as fetcher:
+                        with WorkerPool(self.dsn, self.num_threads) as pool:
+                            has_more = fetcher.fetch_next_batch(cur, runner)
+                            while has_more:
+                                places = fetcher.get_batch()
+
+                                # asynchronously get the next batch
+                                has_more = fetcher.fetch_next_batch(cur, runner)
+
+                                # And insert the current batch
+                                for idx in range(0, len(places), batch):
+                                    part = places[idx:idx + batch]
+                                    LOG.debug("Processing places: %s", str(part))
+                                    runner.index_places(pool.next_free_worker(), part)
+                                    progress.add(len(part))
+
+                            LOG.info("Wait time: fetcher: %.2fs,  pool: %.2fs",
+                                     fetcher.wait_time, pool.wait_time)
+
+                conn.commit()
+
+        return progress.done()
--- a/src/nominatim_db/indexer/progress.py
+++ b/src/nominatim_db/indexer/progress.py
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helpers for progress logging.
+"""
+import logging
+from datetime import datetime
+
+LOG = logging.getLogger()
+
+INITIAL_PROGRESS = 10
+
+class ProgressLogger:
+    """ Tracks and prints progress for the indexing process.
+        `name` is the name of the indexing step being tracked.
+        `total` sets up the total number of items that need processing.
+        `log_interval` denotes the interval in seconds at which progress
+        should be reported.
+    """
+
+    def __init__(self, name: str, total: int, log_interval: int = 1) -> None:
+        self.name = name
+        self.total_places = total
+        self.done_places = 0
+        self.rank_start_time = datetime.now()
+        self.log_interval = log_interval
+        self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1
+
+    def add(self, num: int = 1) -> None:
+        """ Mark `num` places as processed. Print a log message if the
+            logging is at least info and the log interval has passed.
+        """
+        self.done_places += num
+
+        if self.done_places < self.next_info:
+            return
+
+        now = datetime.now()
+        done_time = (now - self.rank_start_time).total_seconds()
+
+        if done_time < 2:
+            self.next_info = self.done_places + INITIAL_PROGRESS
+            return
+
+        places_per_sec = self.done_places / done_time
+        eta = (self.total_places - self.done_places) / places_per_sec
+
+        LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
+                    self.done_places, int(done_time),
+                    places_per_sec, self.name, eta)
+
+        self.next_info += int(places_per_sec) * self.log_interval
+
+    def done(self) -> int:
+        """ Print final statistics about the progress.
+        """
+        rank_end_time = datetime.now()
+
+        if rank_end_time == self.rank_start_time:
+            diff_seconds = 0.0
+            places_per_sec = float(self.done_places)
+        else:
+            diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
+            places_per_sec = self.done_places / diff_seconds
+
+        LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
+                    self.done_places, self.total_places, int(diff_seconds),
+                    places_per_sec, self.name)
+
+        return self.done_places
--- a/src/nominatim_db/indexer/runners.py
+++ b/src/nominatim_db/indexer/runners.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Mix-ins that provide the actual commands for the indexer for various indexing
+tasks.
+"""
+from typing import Any, List
+import functools
+
+from psycopg2 import sql as pysql
+import psycopg2.extras
+
+from nominatim_core.typing import Query, DictCursorResult, DictCursorResults, Protocol
+from nominatim_core.db.async_connection import DBConnection
+from ..data.place_info import PlaceInfo
+from ..tokenizer.base import AbstractAnalyzer
+
+# pylint: disable=C0111
+
+def _mk_valuelist(template: str, num: int) -> pysql.Composed:
+    return pysql.SQL(',').join([pysql.SQL(template)] * num)
+
+def _analyze_place(place: DictCursorResult, analyzer: AbstractAnalyzer) -> psycopg2.extras.Json:
+    return psycopg2.extras.Json(analyzer.process_place(PlaceInfo(place)))
+
+
+class Runner(Protocol):
+    def name(self) -> str: ...
+    def sql_count_objects(self) -> Query: ...
+    def sql_get_objects(self) -> Query: ...
+    def get_place_details(self, worker: DBConnection,
+                          ids: DictCursorResults) -> DictCursorResults: ...
+    def index_places(self, worker: DBConnection, places: DictCursorResults) -> None: ...
+
+
+class AbstractPlacexRunner:
+    """ Returns SQL commands for indexing of the placex table.
+    """
+    SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
+    UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
+
+    def __init__(self, rank: int, analyzer: AbstractAnalyzer) -> None:
+        self.rank = rank
+        self.analyzer = analyzer
+
+
+    @functools.lru_cache(maxsize=1)
+    def _index_sql(self, num_places: int) -> pysql.Composed:
+        return pysql.SQL(
+            """ UPDATE placex
+                SET indexed_status = 0, address = v.addr, token_info = v.ti,
+                    name = v.name, linked_place_id = v.linked_place_id
+                FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
+                WHERE place_id = v.id
+            """).format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places))
+
+
+    def get_place_details(self, worker: DBConnection, ids: DictCursorResults) -> DictCursorResults:
+        worker.perform("""SELECT place_id, extra.*
+                          FROM placex, LATERAL placex_indexing_prepare(placex) as extra
+                          WHERE place_id IN %s""",
+                       (tuple((p[0] for p in ids)), ))
+
+        return []
+
+
+    def index_places(self, worker: DBConnection, places: DictCursorResults) -> None:
+        values: List[Any] = []
+        for place in places:
+            for field in ('place_id', 'name', 'address', 'linked_place_id'):
+                values.append(place[field])
+            values.append(_analyze_place(place, self.analyzer))
+
+        worker.perform(self._index_sql(len(places)), values)
+
+
+class RankRunner(AbstractPlacexRunner):
+    """ Returns SQL commands for indexing one rank within the placex table.
+    """
+
+    def name(self) -> str:
+        return f"rank {self.rank}"
+
+    def sql_count_objects(self) -> pysql.Composed:
+        return pysql.SQL("""SELECT count(*) FROM placex
+                            WHERE rank_address = {} and indexed_status > 0
+                         """).format(pysql.Literal(self.rank))
+
+    def sql_get_objects(self) -> pysql.Composed:
+        return self.SELECT_SQL + pysql.SQL(
+            """WHERE indexed_status > 0 and rank_address = {}
+               ORDER BY geometry_sector
+            """).format(pysql.Literal(self.rank))
+
+
+class BoundaryRunner(AbstractPlacexRunner):
+    """ Returns SQL commands for indexing the administrative boundaries
+        of a certain rank.
+    """
+
+    def name(self) -> str:
+        return f"boundaries rank {self.rank}"
+
+    def sql_count_objects(self) -> pysql.Composed:
+        return pysql.SQL("""SELECT count(*) FROM placex
+                            WHERE indexed_status > 0
+                              AND rank_search = {}
+                              AND class = 'boundary' and type = 'administrative'
+                         """).format(pysql.Literal(self.rank))
+
+    def sql_get_objects(self) -> pysql.Composed:
+        return self.SELECT_SQL + pysql.SQL(
+            """WHERE indexed_status > 0 and rank_search = {}
+                     and class = 'boundary' and type = 'administrative'
+               ORDER BY partition, admin_level
+            """).format(pysql.Literal(self.rank))
+
+
+class InterpolationRunner:
+    """ Returns SQL commands for indexing the address interpolation table
+        location_property_osmline.
+    """
+
+    def __init__(self, analyzer: AbstractAnalyzer) -> None:
+        self.analyzer = analyzer
+
+
+    def name(self) -> str:
+        return "interpolation lines (location_property_osmline)"
+
+    def sql_count_objects(self) -> str:
+        return """SELECT count(*) FROM location_property_osmline
+                  WHERE indexed_status > 0"""
+
+    def sql_get_objects(self) -> str:
+        return """SELECT place_id
+                  FROM location_property_osmline
+                  WHERE indexed_status > 0
+                  ORDER BY geometry_sector"""
+
+
+    def get_place_details(self, worker: DBConnection, ids: DictCursorResults) -> DictCursorResults:
+        worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
+                          FROM location_property_osmline WHERE place_id IN %s""",
+                       (tuple((p[0] for p in ids)), ))
+        return []
+
+
+    @functools.lru_cache(maxsize=1)
+    def _index_sql(self, num_places: int) -> pysql.Composed:
+        return pysql.SQL("""UPDATE location_property_osmline
+                            SET indexed_status = 0, address = v.addr, token_info = v.ti
+                            FROM (VALUES {}) as v(id, addr, ti)
+                            WHERE place_id = v.id
+                         """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
+
+
+    def index_places(self, worker: DBConnection, places: DictCursorResults) -> None:
+        values: List[Any] = []
+        for place in places:
+            values.extend((place[x] for x in ('place_id', 'address')))
+            values.append(_analyze_place(place, self.analyzer))
+
+        worker.perform(self._index_sql(len(places)), values)
+
+
+
+class PostcodeRunner(Runner):
+    """ Provides the SQL commands for indexing the location_postcode table.
+    """
+
+    def name(self) -> str:
+        return "postcodes (location_postcode)"
+
+
+    def sql_count_objects(self) -> str:
+        return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
+
+
+    def sql_get_objects(self) -> str:
+        return """SELECT place_id FROM location_postcode
+                  WHERE indexed_status > 0
+                  ORDER BY country_code, postcode"""
+
+
+    def get_place_details(self, worker: DBConnection, ids: DictCursorResults) -> DictCursorResults:
+        return ids
+
+    def index_places(self, worker: DBConnection, places: DictCursorResults) -> None:
+        worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
+                                    WHERE place_id IN ({})""")
+                       .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in places))))
--- a/src/nominatim_db/tokenizer/init.py
+++ b/src/nominatim_db/tokenizer/init.py
--- a/src/nominatim_db/tokenizer/base.py
+++ b/src/nominatim_db/tokenizer/base.py
@@ -0,0 +1,253 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Abstract class definitions for tokenizers. These base classes are here
+mainly for documentation purposes.
+"""
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Dict, Any, Optional, Iterable
+from pathlib import Path
+
+from nominatim_core.typing import Protocol
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import Connection
+from ..data.place_info import PlaceInfo
+
+class AbstractAnalyzer(ABC):
+    """ The analyzer provides the functions for analysing names and building
+        the token database.
+
+        Analyzers are instantiated on a per-thread base. Access to global data
+        structures must be synchronised accordingly.
+    """
+
+    def __enter__(self) -> 'AbstractAnalyzer':
+        return self
+
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.close()
+
+
+    @abstractmethod
+    def close(self) -> None:
+        """ Free all resources used by the analyzer.
+        """
+
+
+    @abstractmethod
+    def get_word_token_info(self, words: List[str]) -> List[Tuple[str, str, int]]:
+        """ Return token information for the given list of words.
+
+            The function is used for testing and debugging only
+            and does not need to be particularly efficient.
+
+            Arguments:
+                words: A list of words to look up the tokens for.
+                       If a word starts with # it is assumed to be a full name
+                       otherwise is a partial term.
+
+            Returns:
+                The function returns the list of all tuples that could be
+                    found for the given words. Each list entry is a tuple of
+                    (original word, word token, word id).
+        """
+
+
+    @abstractmethod
+    def normalize_postcode(self, postcode: str) -> str:
+        """ Convert the postcode to its standardized form.
+
+            This function must yield exactly the same result as the SQL function
+            `token_normalized_postcode()`.
+
+            Arguments:
+                postcode: The postcode to be normalized.
+
+            Returns:
+                The given postcode after normalization.
+        """
+
+
+    @abstractmethod
+    def update_postcodes_from_db(self) -> None:
+        """ Update the tokenizer's postcode tokens from the current content
+            of the `location_postcode` table.
+        """
+
+
+    @abstractmethod
+    def update_special_phrases(self,
+                               phrases: Iterable[Tuple[str, str, str, str]],
+                               should_replace: bool) -> None:
+        """ Update the tokenizer's special phrase tokens from the given
+            list of special phrases.
+
+            Arguments:
+                phrases: The new list of special phrases. Each entry is
+                         a tuple of (phrase, class, type, operator).
+                should_replace: If true, replace the current list of phrases.
+                                When false, just add the given phrases to the
+                                ones that already exist.
+        """
+
+
+    @abstractmethod
+    def add_country_names(self, country_code: str, names: Dict[str, str]) -> None:
+        """ Add the given names to the tokenizer's list of country tokens.
+
+            Arguments:
+                country_code: two-letter country code for the country the names
+                              refer to.
+                names: Dictionary of name type to name.
+        """
+
+
+    @abstractmethod
+    def process_place(self, place: PlaceInfo) -> Any:
+        """ Extract tokens for the given place and compute the
+            information to be handed to the PL/pgSQL processor for building
+            the search index.
+
+            Arguments:
+                place: Place information retrieved from the database.
+
+            Returns:
+                A JSON-serialisable structure that will be handed into
+                    the database via the `token_info` field.
+        """
+
+
+
+class AbstractTokenizer(ABC):
+    """ The tokenizer instance is the central instance of the tokenizer in
+        the system. There will only be a single instance of the tokenizer
+        active at any time.
+    """
+
+    @abstractmethod
+    def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
+        """ Set up a new tokenizer for the database.
+
+            The function should copy all necessary data into the project
+            directory or save it in the property table to make sure that
+            the tokenizer remains stable over updates.
+
+            Arguments:
+              config: Read-only object with configuration options.
+
+              init_db: When set to False, then initialisation of database
+                tables should be skipped. This option is only required for
+                migration purposes and can be safely ignored by custom
+                tokenizers.
+        """
+
+
+    @abstractmethod
+    def init_from_project(self, config: Configuration) -> None:
+        """ Initialise the tokenizer from an existing database setup.
+
+            The function should load all previously saved configuration from
+            the project directory and/or the property table.
+
+            Arguments:
+              config: Read-only object with configuration options.
+        """
+
+
+    @abstractmethod
+    def finalize_import(self, config: Configuration) -> None:
+        """ This function is called at the very end of an import when all
+            data has been imported and indexed. The tokenizer may create
+            at this point any additional indexes and data structures needed
+            during query time.
+
+            Arguments:
+              config: Read-only object with configuration options.
+        """
+
+
+    @abstractmethod
+    def update_sql_functions(self, config: Configuration) -> None:
+        """ Update the SQL part of the tokenizer. This function is called
+            automatically on migrations or may be called explicitly by the
+            user through the `nominatim refresh --functions` command.
+
+            The tokenizer must only update the code of the tokenizer. The
+            data structures or data itself must not be changed by this function.
+
+            Arguments:
+              config: Read-only object with configuration options.
+        """
+
+
+    @abstractmethod
+    def check_database(self, config: Configuration) -> Optional[str]:
+        """ Check that the database is set up correctly and ready for being
+            queried.
+
+            Arguments:
+              config: Read-only object with configuration options.
+
+            Returns:
+              If an issue was found, return an error message with the
+                  description of the issue as well as hints for the user on
+                  how to resolve the issue. If everything is okay, return `None`.
+        """
+
+
+    @abstractmethod
+    def update_statistics(self, config: Configuration, threads: int = 1) -> None:
+        """ Recompute any tokenizer statistics necessary for efficient lookup.
+            This function is meant to be called from time to time by the user
+            to improve performance. However, the tokenizer must not depend on
+            it to be called in order to work.
+        """
+
+
+    @abstractmethod
+    def update_word_tokens(self) -> None:
+        """ Do house-keeping on the tokenizers internal data structures.
+            Remove unused word tokens, resort data etc.
+        """
+
+
+    @abstractmethod
+    def name_analyzer(self) -> AbstractAnalyzer:
+        """ Create a new analyzer for tokenizing names and queries
+            using this tokinzer. Analyzers are context managers and should
+            be used accordingly:
+
+            ```
+            with tokenizer.name_analyzer() as analyzer:
+                analyser.tokenize()
+            ```
+
+            When used outside the with construct, the caller must ensure to
+            call the close() function before destructing the analyzer.
+        """
+
+
+    @abstractmethod
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the most frequent full words in the database.
+
+            Arguments:
+              conn: Open connection to the database which may be used to
+                    retrieve the words.
+              num: Maximum number of words to return.
+        """
+
+
+class TokenizerModule(Protocol):
+    """ Interface that must be exported by modules that implement their
+        own tokenizer.
+    """
+
+    def create(self, dsn: str, data_dir: Path) -> AbstractTokenizer:
+        """ Factory for new tokenizers.
+        """
--- a/src/nominatim_db/tokenizer/factory.py
+++ b/src/nominatim_db/tokenizer/factory.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for creating a tokenizer or initialising the right one for an
+existing database.
+
+A tokenizer is something that is bound to the lifetime of a database. It
+can be chosen and configured before the initial import but then needs to
+be used consistently when querying and updating the database.
+
+This module provides the functions to create and configure a new tokenizer
+as well as instantiating the appropriate tokenizer for updating an existing
+database.
+
+A tokenizer usually also includes PHP code for querying. The appropriate PHP
+normalizer module is installed, when the tokenizer is created.
+"""
+from typing import Optional
+import logging
+import importlib
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from nominatim_core.db import properties
+from nominatim_core.db.connection import connect
+from nominatim_core.config import Configuration
+from ..tokenizer.base import AbstractTokenizer, TokenizerModule
+
+LOG = logging.getLogger()
+
+def _import_tokenizer(name: str) -> TokenizerModule:
+    """ Load the tokenizer.py module from project directory.
+    """
+    src_file = Path(__file__).parent / (name + '_tokenizer.py')
+    if not src_file.is_file():
+        LOG.fatal("No tokenizer named '%s' available. "
+                  "Check the setting of NOMINATIM_TOKENIZER.", name)
+        raise UsageError('Tokenizer not found')
+
+    return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
+
+
+def create_tokenizer(config: Configuration, init_db: bool = True,
+                     module_name: Optional[str] = None) -> AbstractTokenizer:
+    """ Create a new tokenizer as defined by the given configuration.
+
+        The tokenizer data and code is copied into the 'tokenizer' directory
+        of the project directory and the tokenizer loaded from its new location.
+    """
+    if module_name is None:
+        module_name = config.TOKENIZER
+
+    # Create the directory for the tokenizer data
+    assert config.project_dir is not None
+    basedir = config.project_dir / 'tokenizer'
+    if not basedir.exists():
+        basedir.mkdir()
+    elif not basedir.is_dir():
+        LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
+        raise UsageError("Tokenizer setup failed.")
+
+    # Import and initialize the tokenizer.
+    tokenizer_module = _import_tokenizer(module_name)
+
+    tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
+    tokenizer.init_new_db(config, init_db=init_db)
+
+    with connect(config.get_libpq_dsn()) as conn:
+        properties.set_property(conn, 'tokenizer', module_name)
+
+    return tokenizer
+
+
+def get_tokenizer_for_db(config: Configuration) -> AbstractTokenizer:
+    """ Instantiate a tokenizer for an existing database.
+
+        The function looks up the appropriate tokenizer in the database
+        and initialises it.
+    """
+    assert config.project_dir is not None
+    basedir = config.project_dir / 'tokenizer'
+    if not basedir.is_dir():
+        # Directory will be repopulated by tokenizer below.
+        basedir.mkdir()
+
+    with connect(config.get_libpq_dsn()) as conn:
+        name = properties.get_property(conn, 'tokenizer')
+
+    if name is None:
+        LOG.fatal("Tokenizer was not set up properly. Database property missing.")
+        raise UsageError('Cannot initialize tokenizer.')
+
+    tokenizer_module = _import_tokenizer(name)
+
+    tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
+    tokenizer.init_from_project(config)
+
+    return tokenizer
--- a/src/nominatim_db/tokenizer/icu_rule_loader.py
+++ b/src/nominatim_db/tokenizer/icu_rule_loader.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper class to create ICU rules from a configuration file.
+"""
+from typing import Mapping, Any, Dict, Optional
+import io
+import json
+import logging
+
+from icu import Transliterator
+
+from nominatim_core.config import flatten_config_list, Configuration
+from nominatim_core.db.properties import set_property, get_property
+from nominatim_core.db.connection import Connection
+from nominatim_core.errors import UsageError
+from .place_sanitizer import PlaceSanitizer
+from .icu_token_analysis import ICUTokenAnalysis
+from .token_analysis.base import AnalysisModule, Analyzer
+from ..data import country_info
+
+LOG = logging.getLogger()
+
+DBCFG_IMPORT_NORM_RULES = "tokenizer_import_normalisation"
+DBCFG_IMPORT_TRANS_RULES = "tokenizer_import_transliteration"
+DBCFG_IMPORT_ANALYSIS_RULES = "tokenizer_import_analysis_rules"
+
+
+def _get_section(rules: Mapping[str, Any], section: str) -> Any:
+    """ Get the section named 'section' from the rules. If the section does
+        not exist, raise a usage error with a meaningful message.
+    """
+    if section not in rules:
+        LOG.fatal("Section '%s' not found in tokenizer config.", section)
+        raise UsageError("Syntax error in tokenizer configuration file.")
+
+    return rules[section]
+
+
+class ICURuleLoader:
+    """ Compiler for ICU rules from a tokenizer configuration file.
+    """
+
+    def __init__(self, config: Configuration) -> None:
+        self.config = config
+        rules = config.load_sub_configuration('icu_tokenizer.yaml',
+                                              config='TOKENIZER_CONFIG')
+
+        # Make sure country information is available to analyzers and sanitizers.
+        country_info.setup_country_config(config)
+
+        self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
+        self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
+        self.analysis_rules = _get_section(rules, 'token-analysis')
+        self._setup_analysis()
+
+        # Load optional sanitizer rule set.
+        self.sanitizer_rules = rules.get('sanitizers', [])
+
+
+    def load_config_from_db(self, conn: Connection) -> None:
+        """ Get previously saved parts of the configuration from the
+            database.
+        """
+        rules = get_property(conn, DBCFG_IMPORT_NORM_RULES)
+        if rules is not None:
+            self.normalization_rules = rules
+
+        rules = get_property(conn, DBCFG_IMPORT_TRANS_RULES)
+        if rules is not None:
+            self.transliteration_rules = rules
+
+        rules = get_property(conn, DBCFG_IMPORT_ANALYSIS_RULES)
+        if rules:
+            self.analysis_rules = json.loads(rules)
+        else:
+            self.analysis_rules = []
+        self._setup_analysis()
+
+
+    def save_config_to_db(self, conn: Connection) -> None:
+        """ Save the part of the configuration that cannot be changed into
+            the database.
+        """
+        set_property(conn, DBCFG_IMPORT_NORM_RULES, self.normalization_rules)
+        set_property(conn, DBCFG_IMPORT_TRANS_RULES, self.transliteration_rules)
+        set_property(conn, DBCFG_IMPORT_ANALYSIS_RULES, json.dumps(self.analysis_rules))
+
+
+    def make_sanitizer(self) -> PlaceSanitizer:
+        """ Create a place sanitizer from the configured rules.
+        """
+        return PlaceSanitizer(self.sanitizer_rules, self.config)
+
+
+    def make_token_analysis(self) -> ICUTokenAnalysis:
+        """ Create a token analyser from the reviouly loaded rules.
+        """
+        return ICUTokenAnalysis(self.normalization_rules,
+                                self.transliteration_rules, self.analysis)
+
+
+    def get_search_rules(self) -> str:
+        """ Return the ICU rules to be used during search.
+            The rules combine normalization and transliteration.
+        """
+        # First apply the normalization rules.
+        rules = io.StringIO()
+        rules.write(self.normalization_rules)
+
+        # Then add transliteration.
+        rules.write(self.transliteration_rules)
+        return rules.getvalue()
+
+
+    def get_normalization_rules(self) -> str:
+        """ Return rules for normalisation of a term.
+        """
+        return self.normalization_rules
+
+
+    def get_transliteration_rules(self) -> str:
+        """ Return the rules for converting a string into its asciii representation.
+        """
+        return self.transliteration_rules
+
+
+    def _setup_analysis(self) -> None:
+        """ Process the rules used for creating the various token analyzers.
+        """
+        self.analysis: Dict[Optional[str], TokenAnalyzerRule]  = {}
+
+        if not isinstance(self.analysis_rules, list):
+            raise UsageError("Configuration section 'token-analysis' must be a list.")
+
+        norm = Transliterator.createFromRules("rule_loader_normalization",
+                                              self.normalization_rules)
+        trans = Transliterator.createFromRules("rule_loader_transliteration",
+                                              self.transliteration_rules)
+
+        for section in self.analysis_rules:
+            name = section.get('id', None)
+            if name in self.analysis:
+                if name is None:
+                    LOG.fatal("ICU tokenizer configuration has two default token analyzers.")
+                else:
+                    LOG.fatal("ICU tokenizer configuration has two token "
+                              "analyzers with id '%s'.", name)
+                raise UsageError("Syntax error in ICU tokenizer config.")
+            self.analysis[name] = TokenAnalyzerRule(section, norm, trans,
+                                                    self.config)
+
+
+    @staticmethod
+    def _cfg_to_icu_rules(rules: Mapping[str, Any], section: str) -> str:
+        """ Load an ICU ruleset from the given section. If the section is a
+            simple string, it is interpreted as a file name and the rules are
+            loaded verbatim from the given file. The filename is expected to be
+            relative to the tokenizer rule file. If the section is a list then
+            each line is assumed to be a rule. All rules are concatenated and returned.
+        """
+        content = _get_section(rules, section)
+
+        if content is None:
+            return ''
+
+        return ';'.join(flatten_config_list(content, section)) + ';'
+
+
+class TokenAnalyzerRule:
+    """ Factory for a single analysis module. The class saves the configuration
+        and creates a new token analyzer on request.
+    """
+
+    def __init__(self, rules: Mapping[str, Any],
+                 normalizer: Any, transliterator: Any,
+                 config: Configuration) -> None:
+        analyzer_name = _get_section(rules, 'analyzer')
+        if not analyzer_name or not isinstance(analyzer_name, str):
+            raise UsageError("'analyzer' parameter needs to be simple string")
+
+        self._analysis_mod: AnalysisModule = \
+            config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
+
+        self.config = self._analysis_mod.configure(rules, normalizer,
+                                                   transliterator)
+
+
+    def create(self, normalizer: Any, transliterator: Any) -> Analyzer:
+        """ Create a new analyser instance for the given rule.
+        """
+        return self._analysis_mod.create(normalizer, transliterator, self.config)
--- a/src/nominatim_db/tokenizer/icu_token_analysis.py
+++ b/src/nominatim_db/tokenizer/icu_token_analysis.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Container class collecting all components required to transform an OSM name
+into a Nominatim token.
+"""
+from typing import Mapping, Optional, TYPE_CHECKING
+from icu import Transliterator
+
+from .token_analysis.base import Analyzer
+
+if TYPE_CHECKING:
+    from typing import Any
+    from .icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import
+
+class ICUTokenAnalysis:
+    """ Container class collecting the transliterators and token analysis
+        modules for a single Analyser instance.
+    """
+
+    def __init__(self, norm_rules: str, trans_rules: str,
+                 analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']):
+        self.normalizer = Transliterator.createFromRules("icu_normalization",
+                                                         norm_rules)
+        trans_rules += ";[:Space:]+ > ' '"
+        self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
+                                                       trans_rules)
+        self.search = Transliterator.createFromRules("icu_search",
+                                                     norm_rules + trans_rules)
+
+        self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
+                         for name, arules in analysis_rules.items()}
+
+
+    def get_analyzer(self, name: Optional[str]) -> Analyzer:
+        """ Return the given named analyzer. If no analyzer with that
+            name exists, return the default analyzer.
+        """
+        return self.analysis.get(name) or self.analysis[None]
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -0,0 +1,952 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tokenizer implementing normalisation as used before Nominatim 4 but using
+libICU instead of the PostgreSQL module.
+"""
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
+                   Dict, Set, Iterable
+import itertools
+import json
+import logging
+from pathlib import Path
+from textwrap import dedent
+
+from nominatim_core.db.connection import connect, Connection, Cursor
+from nominatim_core.config import Configuration
+from nominatim_core.db.utils import CopyBuffer
+from nominatim_core.db.sql_preprocessor import SQLPreprocessor
+from ..data.place_info import PlaceInfo
+from ..data.place_name import PlaceName
+from .icu_rule_loader import ICURuleLoader
+from .place_sanitizer import PlaceSanitizer
+from .icu_token_analysis import ICUTokenAnalysis
+from .base import AbstractAnalyzer, AbstractTokenizer
+
+DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
+
+LOG = logging.getLogger()
+
+WORD_TYPES =(('country_names', 'C'),
+             ('postcodes', 'P'),
+             ('full_word', 'W'),
+             ('housenumbers', 'H'))
+
+def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
+    """ Create a new instance of the tokenizer provided by this module.
+    """
+    return ICUTokenizer(dsn, data_dir)
+
+
+class ICUTokenizer(AbstractTokenizer):
+    """ This tokenizer uses libICU to convert names and queries to ASCII.
+        Otherwise it uses the same algorithms and data structures as the
+        normalization routines in Nominatim 3.
+    """
+
+    def __init__(self, dsn: str, data_dir: Path) -> None:
+        self.dsn = dsn
+        self.data_dir = data_dir
+        self.loader: Optional[ICURuleLoader] = None
+
+
+    def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
+        """ Set up a new tokenizer for the database.
+
+            This copies all necessary data in the project directory to make
+            sure the tokenizer remains stable even over updates.
+        """
+        self.loader = ICURuleLoader(config)
+
+        self._install_php(config.lib_dir.php, overwrite=True)
+        self._save_config()
+
+        if init_db:
+            self.update_sql_functions(config)
+            self._setup_db_tables(config)
+            self._create_base_indices(config, 'word')
+
+
+    def init_from_project(self, config: Configuration) -> None:
+        """ Initialise the tokenizer from the project directory.
+        """
+        self.loader = ICURuleLoader(config)
+
+        with connect(self.dsn) as conn:
+            self.loader.load_config_from_db(conn)
+
+        self._install_php(config.lib_dir.php, overwrite=False)
+
+
+    def finalize_import(self, config: Configuration) -> None:
+        """ Do any required postprocessing to make the tokenizer data ready
+            for use.
+        """
+        self._create_lookup_indices(config, 'word')
+
+
+    def update_sql_functions(self, config: Configuration) -> None:
+        """ Reimport the SQL functions for this tokenizer.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
+
+
+    def check_database(self, config: Configuration) -> None:
+        """ Check that the tokenizer is set up correctly.
+        """
+        # Will throw an error if there is an issue.
+        self.init_from_project(config)
+
+
+    def update_statistics(self, config: Configuration, threads: int = 2) -> None:
+        """ Recompute frequencies for all name words.
+        """
+        with connect(self.dsn) as conn:
+            if not conn.table_exists('search_name'):
+                return
+
+            with conn.cursor() as cur:
+                cur.execute('ANALYSE search_name')
+                if threads > 1:
+                    cur.execute('SET max_parallel_workers_per_gather TO %s',
+                                (min(threads, 6),))
+
+                if conn.server_version_tuple() < (12, 0):
+                    LOG.info('Computing word frequencies')
+                    cur.drop_table('word_frequencies')
+                    cur.drop_table('addressword_frequencies')
+                    cur.execute("""CREATE TEMP TABLE word_frequencies AS
+                                     SELECT unnest(name_vector) as id, count(*)
+                                     FROM search_name GROUP BY id""")
+                    cur.execute('CREATE INDEX ON word_frequencies(id)')
+                    cur.execute("""CREATE TEMP TABLE addressword_frequencies AS
+                                     SELECT unnest(nameaddress_vector) as id, count(*)
+                                     FROM search_name GROUP BY id""")
+                    cur.execute('CREATE INDEX ON addressword_frequencies(id)')
+                    cur.execute("""CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
+                                                                               INOUT info JSONB)
+                                   AS $$
+                                   DECLARE rec RECORD;
+                                   BEGIN
+                                   IF info is null THEN
+                                     info = '{}'::jsonb;
+                                   END IF;
+                                   FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
+                                   LOOP
+                                     info = info || jsonb_build_object('count', rec.count);
+                                   END LOOP;
+                                   FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
+                                   LOOP
+                                     info = info || jsonb_build_object('addr_count', rec.count);
+                                   END LOOP;
+                                   IF info = '{}'::jsonb THEN
+                                     info = null;
+                                   END IF;
+                                   END;
+                                   $$ LANGUAGE plpgsql IMMUTABLE;
+                                """)
+                    LOG.info('Update word table with recomputed frequencies')
+                    cur.drop_table('tmp_word')
+                    cur.execute("""CREATE TABLE tmp_word AS
+                                    SELECT word_id, word_token, type, word,
+                                           word_freq_update(word_id, info) as info
+                                    FROM word
+                                """)
+                    cur.drop_table('word_frequencies')
+                    cur.drop_table('addressword_frequencies')
+                else:
+                    LOG.info('Computing word frequencies')
+                    cur.drop_table('word_frequencies')
+                    cur.execute("""
+                      CREATE TEMP TABLE word_frequencies AS
+                      WITH word_freq AS MATERIALIZED (
+                               SELECT unnest(name_vector) as id, count(*)
+                                     FROM search_name GROUP BY id),
+                           addr_freq AS MATERIALIZED (
+                               SELECT unnest(nameaddress_vector) as id, count(*)
+                                     FROM search_name GROUP BY id)
+                      SELECT coalesce(a.id, w.id) as id,
+                             (CASE WHEN w.count is null THEN '{}'::JSONB
+                                  ELSE jsonb_build_object('count', w.count) END
+                              ||
+                              CASE WHEN a.count is null THEN '{}'::JSONB
+                                  ELSE jsonb_build_object('addr_count', a.count) END) as info
+                      FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
+                      """)
+                    cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
+                    cur.execute('ANALYSE word_frequencies')
+                    LOG.info('Update word table with recomputed frequencies')
+                    cur.drop_table('tmp_word')
+                    cur.execute("""CREATE TABLE tmp_word AS
+                                    SELECT word_id, word_token, type, word,
+                                           (CASE WHEN wf.info is null THEN word.info
+                                            ELSE coalesce(word.info, '{}'::jsonb) || wf.info
+                                            END) as info
+                                    FROM word LEFT JOIN word_frequencies wf
+                                         ON word.word_id = wf.id
+                                """)
+                    cur.drop_table('word_frequencies')
+
+            with conn.cursor() as cur:
+                cur.execute('SET max_parallel_workers_per_gather TO 0')
+
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_string(conn,
+                            'GRANT SELECT ON tmp_word TO "{{config.DATABASE_WEBUSER}}"')
+            conn.commit()
+        self._create_base_indices(config, 'tmp_word')
+        self._create_lookup_indices(config, 'tmp_word')
+        self._move_temporary_word_table('tmp_word')
+
+
+
+    def _cleanup_housenumbers(self) -> None:
+        """ Remove unused house numbers.
+        """
+        with connect(self.dsn) as conn:
+            if not conn.table_exists('search_name'):
+                return
+            with conn.cursor(name="hnr_counter") as cur:
+                cur.execute("""SELECT DISTINCT word_id, coalesce(info->>'lookup', word_token)
+                               FROM word
+                               WHERE type = 'H'
+                                 AND NOT EXISTS(SELECT * FROM search_name
+                                                WHERE ARRAY[word.word_id] && name_vector)
+                                 AND (char_length(coalesce(word, word_token)) > 6
+                                      OR coalesce(word, word_token) not similar to '\\d+')
+                            """)
+                candidates = {token: wid for wid, token in cur}
+            with conn.cursor(name="hnr_counter") as cur:
+                cur.execute("""SELECT housenumber FROM placex
+                               WHERE housenumber is not null
+                                     AND (char_length(housenumber) > 6
+                                          OR housenumber not similar to '\\d+')
+                            """)
+                for row in cur:
+                    for hnr in row[0].split(';'):
+                        candidates.pop(hnr, None)
+            LOG.info("There are %s outdated housenumbers.", len(candidates))
+            LOG.debug("Outdated housenumbers: %s", candidates.keys())
+            if candidates:
+                with conn.cursor() as cur:
+                    cur.execute("""DELETE FROM word WHERE word_id = any(%s)""",
+                                (list(candidates.values()), ))
+                conn.commit()
+
+
+
+    def update_word_tokens(self) -> None:
+        """ Remove unused tokens.
+        """
+        LOG.warning("Cleaning up housenumber tokens.")
+        self._cleanup_housenumbers()
+        LOG.warning("Tokenizer house-keeping done.")
+
+
+    def name_analyzer(self) -> 'ICUNameAnalyzer':
+        """ Create a new analyzer for tokenizing names and queries
+            using this tokinzer. Analyzers are context managers and should
+            be used accordingly:
+
+            ```
+            with tokenizer.name_analyzer() as analyzer:
+                analyser.tokenize()
+            ```
+
+            When used outside the with construct, the caller must ensure to
+            call the close() function before destructing the analyzer.
+
+            Analyzers are not thread-safe. You need to instantiate one per thread.
+        """
+        assert self.loader is not None
+        return ICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(),
+                               self.loader.make_token_analysis())
+
+
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+        with conn.cursor() as cur:
+            cur.execute("""SELECT word, sum((info->>'count')::int) as count
+                             FROM word WHERE type = 'W'
+                             GROUP BY word
+                             ORDER BY count DESC LIMIT %s""", (num,))
+            return list(s[0].split('@')[0] for s in cur)
+
+
+    def _install_php(self, phpdir: Optional[Path], overwrite: bool = True) -> None:
+        """ Install the php script for the tokenizer.
+        """
+        if phpdir is not None:
+            assert self.loader is not None
+            php_file = self.data_dir / "tokenizer.php"
+
+            if not php_file.exists() or overwrite:
+                php_file.write_text(dedent(f"""\
+                    <?php
+                    @define('CONST_Max_Word_Frequency', 10000000);
+                    @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+                    @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+                    require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
+
+
+    def _save_config(self) -> None:
+        """ Save the configuration that needs to remain stable for the given
+            database as database properties.
+        """
+        assert self.loader is not None
+        with connect(self.dsn) as conn:
+            self.loader.save_config_to_db(conn)
+
+
+    def _setup_db_tables(self, config: Configuration) -> None:
+        """ Set up the word table and fill it with pre-computed word
+            frequencies.
+        """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                cur.drop_table('word')
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_string(conn, """
+                CREATE TABLE word (
+                      word_id INTEGER,
+                      word_token text NOT NULL,
+                      type text NOT NULL,
+                      word text,
+                      info jsonb
+                    ) {{db.tablespace.search_data}};
+                GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
+
+                DROP SEQUENCE IF EXISTS seq_word;
+                CREATE SEQUENCE seq_word start 1;
+                GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
+            """)
+            conn.commit()
+
+
+    def _create_base_indices(self, config: Configuration, table_name: str) -> None:
+        """ Set up the word table and fill it with pre-computed word
+            frequencies.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_string(conn,
+                            """CREATE INDEX idx_{{table_name}}_word_token ON {{table_name}}
+                               USING BTREE (word_token) {{db.tablespace.search_index}}""",
+                            table_name=table_name)
+            for name, ctype in WORD_TYPES:
+                sqlp.run_string(conn,
+                                """CREATE INDEX idx_{{table_name}}_{{idx_name}} ON {{table_name}}
+                                   USING BTREE (word) {{db.tablespace.address_index}}
+                                   WHERE type = '{{column_type}}'
+                                """,
+                                table_name=table_name, idx_name=name,
+                                column_type=ctype)
+            conn.commit()
+
+
+    def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
+        """ Create additional indexes used when running the API.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            # Index required for details lookup.
+            sqlp.run_string(conn, """
+                CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
+                  ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
+            """,
+            table_name=table_name)
+            conn.commit()
+
+
+    def _move_temporary_word_table(self, old: str) -> None:
+        """ Rename all tables and indexes used by the tokenizer.
+        """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                cur.drop_table('word')
+                cur.execute(f"ALTER TABLE {old} RENAME TO word")
+                for idx in ('word_token', 'word_id'):
+                    cur.execute(f"""ALTER INDEX idx_{old}_{idx}
+                                      RENAME TO idx_word_{idx}""")
+                for name, _ in WORD_TYPES:
+                    cur.execute(f"""ALTER INDEX idx_{old}_{name}
+                                    RENAME TO idx_word_{name}""")
+            conn.commit()
+
+
+
+
+class ICUNameAnalyzer(AbstractAnalyzer):
+    """ The ICU analyzer uses the ICU library for splitting names.
+
+        Each instance opens a connection to the database to request the
+        normalization.
+    """
+
+    def __init__(self, dsn: str, sanitizer: PlaceSanitizer,
+                 token_analysis: ICUTokenAnalysis) -> None:
+        self.conn: Optional[Connection] = connect(dsn).connection
+        self.conn.autocommit = True
+        self.sanitizer = sanitizer
+        self.token_analysis = token_analysis
+
+        self._cache = _TokenCache()
+
+
+    def close(self) -> None:
+        """ Free all resources used by the analyzer.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+
+
+    def _search_normalized(self, name: str) -> str:
+        """ Return the search token transliteration of the given name.
+        """
+        return cast(str, self.token_analysis.search.transliterate(name)).strip()
+
+
+    def _normalized(self, name: str) -> str:
+        """ Return the normalized version of the given name with all
+            non-relevant information removed.
+        """
+        return cast(str, self.token_analysis.normalizer.transliterate(name)).strip()
+
+
+    def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
+        """ Return token information for the given list of words.
+            If a word starts with # it is assumed to be a full name
+            otherwise is a partial name.
+
+            The function returns a list of tuples with
+            (original word, word token, word id).
+
+            The function is used for testing and debugging only
+            and not necessarily efficient.
+        """
+        assert self.conn is not None
+        full_tokens = {}
+        partial_tokens = {}
+        for word in words:
+            if word.startswith('#'):
+                full_tokens[word] = self._search_normalized(word[1:])
+            else:
+                partial_tokens[word] = self._search_normalized(word)
+
+        with self.conn.cursor() as cur:
+            cur.execute("""SELECT word_token, word_id
+                            FROM word WHERE word_token = ANY(%s) and type = 'W'
+                        """, (list(full_tokens.values()),))
+            full_ids = {r[0]: r[1] for r in cur}
+            cur.execute("""SELECT word_token, word_id
+                            FROM word WHERE word_token = ANY(%s) and type = 'w'""",
+                        (list(partial_tokens.values()),))
+            part_ids = {r[0]: r[1] for r in cur}
+
+        return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \
+               + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
+
+
+    def normalize_postcode(self, postcode: str) -> str:
+        """ Convert the postcode to a standardized form.
+
+            This function must yield exactly the same result as the SQL function
+            'token_normalized_postcode()'.
+        """
+        return postcode.strip().upper()
+
+
+    def update_postcodes_from_db(self) -> None:
+        """ Update postcode tokens in the word table from the location_postcode
+            table.
+        """
+        assert self.conn is not None
+        analyzer = self.token_analysis.analysis.get('@postcode')
+
+        with self.conn.cursor() as cur:
+            # First get all postcode names currently in the word table.
+            cur.execute("SELECT DISTINCT word FROM word WHERE type = 'P'")
+            word_entries = set((entry[0] for entry in cur))
+
+            # Then compute the required postcode names from the postcode table.
+            needed_entries = set()
+            cur.execute("SELECT country_code, postcode FROM location_postcode")
+            for cc, postcode in cur:
+                info = PlaceInfo({'country_code': cc,
+                                  'class': 'place', 'type': 'postcode',
+                                  'address': {'postcode': postcode}})
+                address = self.sanitizer.process_names(info)[1]
+                for place in address:
+                    if place.kind == 'postcode':
+                        if analyzer is None:
+                            postcode_name = place.name.strip().upper()
+                            variant_base = None
+                        else:
+                            postcode_name = analyzer.get_canonical_id(place)
+                            variant_base = place.get_attr("variant")
+
+                        if variant_base:
+                            needed_entries.add(f'{postcode_name}@{variant_base}')
+                        else:
+                            needed_entries.add(postcode_name)
+                        break
+
+        # Now update the word table.
+        self._delete_unused_postcode_words(word_entries - needed_entries)
+        self._add_missing_postcode_words(needed_entries - word_entries)
+
+    def _delete_unused_postcode_words(self, tokens: Iterable[str]) -> None:
+        assert self.conn is not None
+        if tokens:
+            with self.conn.cursor() as cur:
+                cur.execute("DELETE FROM word WHERE type = 'P' and word = any(%s)",
+                            (list(tokens), ))
+
+    def _add_missing_postcode_words(self, tokens: Iterable[str]) -> None:
+        assert self.conn is not None
+        if not tokens:
+            return
+
+        analyzer = self.token_analysis.analysis.get('@postcode')
+        terms = []
+
+        for postcode_name in tokens:
+            if '@' in postcode_name:
+                term, variant = postcode_name.split('@', 2)
+                term = self._search_normalized(term)
+                if analyzer is None:
+                    variants = [term]
+                else:
+                    variants = analyzer.compute_variants(variant)
+                    if term not in variants:
+                        variants.append(term)
+            else:
+                variants = [self._search_normalized(postcode_name)]
+            terms.append((postcode_name, variants))
+
+        if terms:
+            with self.conn.cursor() as cur:
+                cur.execute_values("""SELECT create_postcode_word(pc, var)
+                                      FROM (VALUES %s) AS v(pc, var)""",
+                                   terms)
+
+
+
+
+    def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
+                               should_replace: bool) -> None:
+        """ Replace the search index for special phrases with the new phrases.
+            If `should_replace` is True, then the previous set of will be
+            completely replaced. Otherwise the phrases are added to the
+            already existing ones.
+        """
+        assert self.conn is not None
+        norm_phrases = set(((self._normalized(p[0]), p[1], p[2], p[3])
+                            for p in phrases))
+
+        with self.conn.cursor() as cur:
+            # Get the old phrases.
+            existing_phrases = set()
+            cur.execute("SELECT word, info FROM word WHERE type = 'S'")
+            for word, info in cur:
+                existing_phrases.add((word, info['class'], info['type'],
+                                      info.get('op') or '-'))
+
+            added = self._add_special_phrases(cur, norm_phrases, existing_phrases)
+            if should_replace:
+                deleted = self._remove_special_phrases(cur, norm_phrases,
+                                                       existing_phrases)
+            else:
+                deleted = 0
+
+        LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+                 len(norm_phrases), added, deleted)
+
+
+    def _add_special_phrases(self, cursor: Cursor,
+                             new_phrases: Set[Tuple[str, str, str, str]],
+                             existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
+        """ Add all phrases to the database that are not yet there.
+        """
+        to_add = new_phrases - existing_phrases
+
+        added = 0
+        with CopyBuffer() as copystr:
+            for word, cls, typ, oper in to_add:
+                term = self._search_normalized(word)
+                if term:
+                    copystr.add(term, 'S', word,
+                                json.dumps({'class': cls, 'type': typ,
+                                            'op': oper if oper in ('in', 'near') else None}))
+                    added += 1
+
+            copystr.copy_out(cursor, 'word',
+                             columns=['word_token', 'type', 'word', 'info'])
+
+        return added
+
+
+    def _remove_special_phrases(self, cursor: Cursor,
+                             new_phrases: Set[Tuple[str, str, str, str]],
+                             existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
+        """ Remove all phrases from the database that are no longer in the
+            new phrase list.
+        """
+        to_delete = existing_phrases - new_phrases
+
+        if to_delete:
+            cursor.execute_values(
+                """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                    WHERE type = 'S' and word = name
+                          and info->>'class' = in_class and info->>'type' = in_type
+                          and ((op = '-' and info->>'op' is null) or op = info->>'op')
+                """, to_delete)
+
+        return len(to_delete)
+
+
+    def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
+        """ Add default names for the given country to the search index.
+        """
+        # Make sure any name preprocessing for country names applies.
+        info = PlaceInfo({'name': names, 'country_code': country_code,
+                          'rank_address': 4, 'class': 'boundary',
+                          'type': 'administrative'})
+        self._add_country_full_names(country_code,
+                                     self.sanitizer.process_names(info)[0],
+                                     internal=True)
+
+
+    def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
+                                internal: bool = False) -> None:
+        """ Add names for the given country from an already sanitized
+            name list.
+        """
+        assert self.conn is not None
+        word_tokens = set()
+        for name in names:
+            norm_name = self._search_normalized(name.name)
+            if norm_name:
+                word_tokens.add(norm_name)
+
+        with self.conn.cursor() as cur:
+            # Get existing names
+            cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
+                             FROM word
+                             WHERE type = 'C' and word = %s""",
+                        (country_code, ))
+            # internal/external names
+            existing_tokens: Dict[bool, Set[str]] = {True: set(), False: set()}
+            for word in cur:
+                existing_tokens[word[1]].add(word[0])
+
+            # Delete names that no longer exist.
+            gone_tokens = existing_tokens[internal] - word_tokens
+            if internal:
+                gone_tokens.update(existing_tokens[False] & word_tokens)
+            if gone_tokens:
+                cur.execute("""DELETE FROM word
+                               USING unnest(%s) as token
+                               WHERE type = 'C' and word = %s
+                                     and word_token = token""",
+                            (list(gone_tokens), country_code))
+
+            # Only add those names that are not yet in the list.
+            new_tokens = word_tokens - existing_tokens[True]
+            if not internal:
+                new_tokens -= existing_tokens[False]
+            if new_tokens:
+                if internal:
+                    sql = """INSERT INTO word (word_token, type, word, info)
+                               (SELECT token, 'C', %s, '{"internal": "yes"}'
+                                  FROM unnest(%s) as token)
+                           """
+                else:
+                    sql = """INSERT INTO word (word_token, type, word)
+                                   (SELECT token, 'C', %s
+                                    FROM unnest(%s) as token)
+                          """
+                cur.execute(sql, (country_code, list(new_tokens)))
+
+
+    def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
+        """ Determine tokenizer information about the given place.
+
+            Returns a JSON-serializable structure that will be handed into
+            the database via the token_info field.
+        """
+        token_info = _TokenInfo()
+
+        names, address = self.sanitizer.process_names(place)
+
+        if names:
+            token_info.set_names(*self._compute_name_tokens(names))
+
+            if place.is_country():
+                assert place.country_code is not None
+                self._add_country_full_names(place.country_code, names)
+
+        if address:
+            self._process_place_address(token_info, address)
+
+        return token_info.to_dict()
+
+
+    def _process_place_address(self, token_info: '_TokenInfo',
+                               address: Sequence[PlaceName]) -> None:
+        for item in address:
+            if item.kind == 'postcode':
+                token_info.set_postcode(self._add_postcode(item))
+            elif item.kind == 'housenumber':
+                token_info.add_housenumber(*self._compute_housenumber_token(item))
+            elif item.kind == 'street':
+                token_info.add_street(self._retrieve_full_tokens(item.name))
+            elif item.kind == 'place':
+                if not item.suffix:
+                    token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
+            elif not item.kind.startswith('_') and not item.suffix and \
+                 item.kind not in ('country', 'full', 'inclusion'):
+                token_info.add_address_term(item.kind,
+                                            itertools.chain(*self._compute_name_tokens([item])))
+
+
+    def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
+        """ Normalize the housenumber and return the word token and the
+            canonical form.
+        """
+        assert self.conn is not None
+        analyzer = self.token_analysis.analysis.get('@housenumber')
+        result: Tuple[Optional[int], Optional[str]] = (None, None)
+
+        if analyzer is None:
+            # When no custom analyzer is set, simply normalize and transliterate
+            norm_name = self._search_normalized(hnr.name)
+            if norm_name:
+                result = self._cache.housenumbers.get(norm_name, result)
+                if result[0] is None:
+                    with self.conn.cursor() as cur:
+                        hid = cur.scalar("SELECT getorcreate_hnr_id(%s)", (norm_name, ))
+
+                        result = hid, norm_name
+                        self._cache.housenumbers[norm_name] = result
+        else:
+            # Otherwise use the analyzer to determine the canonical name.
+            # Per convention we use the first variant as the 'lookup name', the
+            # name that gets saved in the housenumber field of the place.
+            word_id = analyzer.get_canonical_id(hnr)
+            if word_id:
+                result = self._cache.housenumbers.get(word_id, result)
+                if result[0] is None:
+                    variants = analyzer.compute_variants(word_id)
+                    if variants:
+                        with self.conn.cursor() as cur:
+                            hid = cur.scalar("SELECT create_analyzed_hnr_id(%s, %s)",
+                                             (word_id, list(variants)))
+                            result = hid, variants[0]
+                            self._cache.housenumbers[word_id] = result
+
+        return result
+
+
+    def _retrieve_full_tokens(self, name: str) -> List[int]:
+        """ Get the full name token for the given name, if it exists.
+            The name is only retrieved for the standard analyser.
+        """
+        assert self.conn is not None
+        norm_name = self._search_normalized(name)
+
+        # return cached if possible
+        if norm_name in self._cache.fulls:
+            return self._cache.fulls[norm_name]
+
+        with self.conn.cursor() as cur:
+            cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
+                        (norm_name, ))
+            full = [row[0] for row in cur]
+
+        self._cache.fulls[norm_name] = full
+
+        return full
+
+
+    def _compute_name_tokens(self, names: Sequence[PlaceName]) -> Tuple[Set[int], Set[int]]:
+        """ Computes the full name and partial name tokens for the given
+            dictionary of names.
+        """
+        assert self.conn is not None
+        full_tokens: Set[int] = set()
+        partial_tokens: Set[int] = set()
+
+        for name in names:
+            analyzer_id = name.get_attr('analyzer')
+            analyzer = self.token_analysis.get_analyzer(analyzer_id)
+            word_id = analyzer.get_canonical_id(name)
+            if analyzer_id is None:
+                token_id = word_id
+            else:
+                token_id = f'{word_id}@{analyzer_id}'
+
+            full, part = self._cache.names.get(token_id, (None, None))
+            if full is None:
+                variants = analyzer.compute_variants(word_id)
+                if not variants:
+                    continue
+
+                with self.conn.cursor() as cur:
+                    cur.execute("SELECT * FROM getorcreate_full_word(%s, %s)",
+                                (token_id, variants))
+                    full, part = cast(Tuple[int, List[int]], cur.fetchone())
+
+                self._cache.names[token_id] = (full, part)
+
+            assert part is not None
+
+            full_tokens.add(full)
+            partial_tokens.update(part)
+
+        return full_tokens, partial_tokens
+
+
+    def _add_postcode(self, item: PlaceName) -> Optional[str]:
+        """ Make sure the normalized postcode is present in the word table.
+        """
+        assert self.conn is not None
+        analyzer = self.token_analysis.analysis.get('@postcode')
+
+        if analyzer is None:
+            postcode_name = item.name.strip().upper()
+            variant_base = None
+        else:
+            postcode_name = analyzer.get_canonical_id(item)
+            variant_base = item.get_attr("variant")
+
+        if variant_base:
+            postcode = f'{postcode_name}@{variant_base}'
+        else:
+            postcode = postcode_name
+
+        if postcode not in self._cache.postcodes:
+            term = self._search_normalized(postcode_name)
+            if not term:
+                return None
+
+            variants = {term}
+            if analyzer is not None and variant_base:
+                variants.update(analyzer.compute_variants(variant_base))
+
+            with self.conn.cursor() as cur:
+                cur.execute("SELECT create_postcode_word(%s, %s)",
+                            (postcode, list(variants)))
+            self._cache.postcodes.add(postcode)
+
+        return postcode_name
+
+
+class _TokenInfo:
+    """ Collect token information to be sent back to the database.
+    """
+    def __init__(self) -> None:
+        self.names: Optional[str] = None
+        self.housenumbers: Set[str] = set()
+        self.housenumber_tokens: Set[int] = set()
+        self.street_tokens: Optional[Set[int]] = None
+        self.place_tokens: Set[int] = set()
+        self.address_tokens: Dict[str, str] = {}
+        self.postcode: Optional[str] = None
+
+
+    def _mk_array(self, tokens: Iterable[Any]) -> str:
+        return f"{{{','.join((str(s) for s in tokens))}}}"
+
+
+    def to_dict(self) -> Dict[str, Any]:
+        """ Return the token information in database importable format.
+        """
+        out: Dict[str, Any] = {}
+
+        if self.names:
+            out['names'] = self.names
+
+        if self.housenumbers:
+            out['hnr'] = ';'.join(self.housenumbers)
+            out['hnr_tokens'] = self._mk_array(self.housenumber_tokens)
+
+        if self.street_tokens is not None:
+            out['street'] = self._mk_array(self.street_tokens)
+
+        if self.place_tokens:
+            out['place'] = self._mk_array(self.place_tokens)
+
+        if self.address_tokens:
+            out['addr'] = self.address_tokens
+
+        if self.postcode:
+            out['postcode'] = self.postcode
+
+        return out
+
+
+    def set_names(self, fulls: Iterable[int], partials: Iterable[int]) -> None:
+        """ Adds token information for the normalised names.
+        """
+        self.names = self._mk_array(itertools.chain(fulls, partials))
+
+
+    def add_housenumber(self, token: Optional[int], hnr: Optional[str]) -> None:
+        """ Extract housenumber information from a list of normalised
+            housenumbers.
+        """
+        if token:
+            assert hnr is not None
+            self.housenumbers.add(hnr)
+            self.housenumber_tokens.add(token)
+
+
+    def add_street(self, tokens: Iterable[int]) -> None:
+        """ Add addr:street match terms.
+        """
+        if self.street_tokens is None:
+            self.street_tokens = set()
+        self.street_tokens.update(tokens)
+
+
+    def add_place(self, tokens: Iterable[int]) -> None:
+        """ Add addr:place search and match terms.
+        """
+        self.place_tokens.update(tokens)
+
+
+    def add_address_term(self, key: str, partials: Iterable[int]) -> None:
+        """ Add additional address terms.
+        """
+        array = self._mk_array(partials)
+        if len(array) > 2:
+            self.address_tokens[key] = array
+
+    def set_postcode(self, postcode: Optional[str]) -> None:
+        """ Set the postcode to the given one.
+        """
+        self.postcode = postcode
+
+
+class _TokenCache:
+    """ Cache for token information to avoid repeated database queries.
+
+        This cache is not thread-safe and needs to be instantiated per
+        analyzer.
+    """
+    def __init__(self) -> None:
+        self.names: Dict[str, Tuple[int, List[int]]] = {}
+        self.partials: Dict[str, int] = {}
+        self.fulls: Dict[str, List[int]] = {}
+        self.postcodes: Set[str] = set()
+        self.housenumbers: Dict[str, Tuple[Optional[int], Optional[str]]] = {}
--- a/src/nominatim_db/tokenizer/legacy_tokenizer.py
+++ b/src/nominatim_db/tokenizer/legacy_tokenizer.py
@@ -0,0 +1,681 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tokenizer implementing normalisation as used before Nominatim 4.
+"""
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, Callable, \
+                   cast, Dict, Set, Iterable
+from collections import OrderedDict
+import logging
+from pathlib import Path
+import re
+import shutil
+from textwrap import dedent
+
+from icu import Transliterator
+import psycopg2
+import psycopg2.extras
+
+from nominatim_core.errors import UsageError
+from nominatim_core.db.connection import connect, Connection
+from nominatim_core.config import Configuration
+from nominatim_core.db import properties
+from nominatim_core.db import utils as db_utils
+from nominatim_core.db.sql_preprocessor import SQLPreprocessor
+from ..data.place_info import PlaceInfo
+from .base import AbstractAnalyzer, AbstractTokenizer
+
+DBCFG_NORMALIZATION = "tokenizer_normalization"
+DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
+
+LOG = logging.getLogger()
+
+def create(dsn: str, data_dir: Path) -> 'LegacyTokenizer':
+    """ Create a new instance of the tokenizer provided by this module.
+    """
+    return LegacyTokenizer(dsn, data_dir)
+
+
+def _install_module(config_module_path: str, src_dir: Path, module_dir: Path) -> str:
+    """ Copies the PostgreSQL normalisation module into the project
+        directory if necessary. For historical reasons the module is
+        saved in the '/module' subdirectory and not with the other tokenizer
+        data.
+
+        The function detects when the installation is run from the
+        build directory. It doesn't touch the module in that case.
+    """
+    # Custom module locations are simply used as is.
+    if config_module_path:
+        LOG.info("Using custom path for database module at '%s'", config_module_path)
+        return config_module_path
+
+    # Compatibility mode for builddir installations.
+    if module_dir.exists() and src_dir.samefile(module_dir):
+        LOG.info('Running from build directory. Leaving database module as is.')
+        return str(module_dir)
+
+    # In any other case install the module in the project directory.
+    if not module_dir.exists():
+        module_dir.mkdir()
+
+    destfile = module_dir / 'nominatim.so'
+    shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
+    destfile.chmod(0o755)
+
+    LOG.info('Database module installed at %s', str(destfile))
+
+    return str(module_dir)
+
+
+def _check_module(module_dir: str, conn: Connection) -> None:
+    """ Try to use the PostgreSQL module to confirm that it is correctly
+        installed and accessible from PostgreSQL.
+    """
+    with conn.cursor() as cur:
+        try:
+            cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
+                           RETURNS text AS %s, 'transliteration'
+                           LANGUAGE c IMMUTABLE STRICT;
+                           DROP FUNCTION nominatim_test_import_func(text)
+                        """, (f'{module_dir}/nominatim.so', ))
+        except psycopg2.DatabaseError as err:
+            LOG.fatal("Error accessing database module: %s", err)
+            raise UsageError("Database module cannot be accessed.") from err
+
+
+class LegacyTokenizer(AbstractTokenizer):
+    """ The legacy tokenizer uses a special PostgreSQL module to normalize
+        names and queries. The tokenizer thus implements normalization through
+        calls to the database.
+    """
+
+    def __init__(self, dsn: str, data_dir: Path) -> None:
+        self.dsn = dsn
+        self.data_dir = data_dir
+        self.normalization: Optional[str] = None
+
+
+    def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
+        """ Set up a new tokenizer for the database.
+
+            This copies all necessary data in the project directory to make
+            sure the tokenizer remains stable even over updates.
+        """
+        assert config.project_dir is not None
+        module_dir = _install_module(config.DATABASE_MODULE_PATH,
+                                     config.lib_dir.module,
+                                     config.project_dir / 'module')
+
+        self.normalization = config.TERM_NORMALIZATION
+
+        self._install_php(config, overwrite=True)
+
+        with connect(self.dsn) as conn:
+            _check_module(module_dir, conn)
+            self._save_config(conn, config)
+            conn.commit()
+
+        if init_db:
+            self.update_sql_functions(config)
+            self._init_db_tables(config)
+
+
+    def init_from_project(self, config: Configuration) -> None:
+        """ Initialise the tokenizer from the project directory.
+        """
+        assert config.project_dir is not None
+
+        with connect(self.dsn) as conn:
+            self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+
+        if not (config.project_dir / 'module' / 'nominatim.so').exists():
+            _install_module(config.DATABASE_MODULE_PATH,
+                            config.lib_dir.module,
+                            config.project_dir / 'module')
+
+        self._install_php(config, overwrite=False)
+
+    def finalize_import(self, config: Configuration) -> None:
+        """ Do any required postprocessing to make the tokenizer data ready
+            for use.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
+    def update_sql_functions(self, config: Configuration) -> None:
+        """ Reimport the SQL functions for this tokenizer.
+        """
+        assert config.project_dir is not None
+
+        with connect(self.dsn) as conn:
+            max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
+            modulepath = config.DATABASE_MODULE_PATH or \
+                         str((config.project_dir / 'module').resolve())
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer.sql',
+                              max_word_freq=max_word_freq,
+                              modulepath=modulepath)
+
+
+    def check_database(self, _: Configuration) -> Optional[str]:
+        """ Check that the tokenizer is set up correctly.
+        """
+        hint = """\
+             The Postgresql extension nominatim.so was not correctly loaded.
+
+             Error: {error}
+
+             Hints:
+             * Check the output of the CMmake/make installation step
+             * Does nominatim.so exist?
+             * Does nominatim.so exist on the database server?
+             * Can nominatim.so be accessed by the database user?
+             """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                try:
+                    out = cur.scalar("SELECT make_standard_name('a')")
+                except psycopg2.Error as err:
+                    return hint.format(error=str(err))
+
+        if out != 'a':
+            return hint.format(error='Unexpected result for make_standard_name()')
+
+        return None
+
+
+    def migrate_database(self, config: Configuration) -> None:
+        """ Initialise the project directory of an existing database for
+            use with this tokenizer.
+
+            This is a special migration function for updating existing databases
+            to new software versions.
+        """
+        assert config.project_dir is not None
+
+        self.normalization = config.TERM_NORMALIZATION
+        module_dir = _install_module(config.DATABASE_MODULE_PATH,
+                                     config.lib_dir.module,
+                                     config.project_dir / 'module')
+
+        with connect(self.dsn) as conn:
+            _check_module(module_dir, conn)
+            self._save_config(conn, config)
+
+
+    def update_statistics(self, config: Configuration, threads: int = 1) -> None:
+        """ Recompute the frequency of full words.
+        """
+        with connect(self.dsn) as conn:
+            if conn.table_exists('search_name'):
+                with conn.cursor() as cur:
+                    cur.drop_table("word_frequencies")
+                    LOG.info("Computing word frequencies")
+                    cur.execute("""CREATE TEMP TABLE word_frequencies AS
+                                     SELECT unnest(name_vector) as id, count(*)
+                                     FROM search_name GROUP BY id""")
+                    cur.execute("CREATE INDEX ON word_frequencies(id)")
+                    LOG.info("Update word table with recomputed frequencies")
+                    cur.execute("""UPDATE word SET search_name_count = count
+                                   FROM word_frequencies
+                                   WHERE word_token like ' %' and word_id = id""")
+                    cur.drop_table("word_frequencies")
+            conn.commit()
+
+
+    def update_word_tokens(self) -> None:
+        """ No house-keeping implemented for the legacy tokenizer.
+        """
+        LOG.info("No tokenizer clean-up available.")
+
+
+    def name_analyzer(self) -> 'LegacyNameAnalyzer':
+        """ Create a new analyzer for tokenizing names and queries
+            using this tokinzer. Analyzers are context managers and should
+            be used accordingly:
+
+            ```
+            with tokenizer.name_analyzer() as analyzer:
+                analyser.tokenize()
+            ```
+
+            When used outside the with construct, the caller must ensure to
+            call the close() function before destructing the analyzer.
+
+            Analyzers are not thread-safe. You need to instantiate one per thread.
+        """
+        normalizer = Transliterator.createFromRules("phrase normalizer",
+                                                    self.normalization)
+        return LegacyNameAnalyzer(self.dsn, normalizer)
+
+
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+        with conn.cursor() as cur:
+            cur.execute(""" SELECT word FROM word WHERE word is not null
+                              ORDER BY search_name_count DESC LIMIT %s""", (num,))
+            return list(s[0] for s in cur)
+
+
+    def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
+        """ Install the php script for the tokenizer.
+        """
+        if config.lib_dir.php is not None:
+            php_file = self.data_dir / "tokenizer.php"
+
+            if not php_file.exists() or overwrite:
+                php_file.write_text(dedent(f"""\
+                    <?php
+                    @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
+                    @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
+                    require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+                    """), encoding='utf-8')
+
+
+    def _init_db_tables(self, config: Configuration) -> None:
+        """ Set up the word table and fill it with pre-computed word
+            frequencies.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
+            conn.commit()
+
+        LOG.warning("Precomputing word tokens")
+        db_utils.execute_file(self.dsn, config.lib_dir.data / 'words.sql')
+
+
+    def _save_config(self, conn: Connection, config: Configuration) -> None:
+        """ Save the configuration that needs to remain stable for the given
+            database as database properties.
+        """
+        assert self.normalization is not None
+
+        properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+        properties.set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
+
+
+class LegacyNameAnalyzer(AbstractAnalyzer):
+    """ The legacy analyzer uses the special Postgresql module for
+        splitting names.
+
+        Each instance opens a connection to the database to request the
+        normalization.
+    """
+
+    def __init__(self, dsn: str, normalizer: Any):
+        self.conn: Optional[Connection] = connect(dsn).connection
+        self.conn.autocommit = True
+        self.normalizer = normalizer
+        psycopg2.extras.register_hstore(self.conn)
+
+        self._cache = _TokenCache(self.conn)
+
+
+    def close(self) -> None:
+        """ Free all resources used by the analyzer.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+
+
+    def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
+        """ Return token information for the given list of words.
+            If a word starts with # it is assumed to be a full name
+            otherwise is a partial name.
+
+            The function returns a list of tuples with
+            (original word, word token, word id).
+
+            The function is used for testing and debugging only
+            and not necessarily efficient.
+        """
+        assert self.conn is not None
+        with self.conn.cursor() as cur:
+            cur.execute("""SELECT t.term, word_token, word_id
+                           FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                           WHERE word_token = (CASE
+                                   WHEN left(t.term, 1) = '#' THEN
+                                     ' ' || make_standard_name(substring(t.term from 2))
+                                   ELSE
+                                     make_standard_name(t.term)
+                                   END)
+                                 and class is null and country_code is null""",
+                        (words, ))
+
+            return [(r[0], r[1], r[2]) for r in cur]
+
+
+    def normalize(self, phrase: str) -> str:
+        """ Normalize the given phrase, i.e. remove all properties that
+            are irrelevant for search.
+        """
+        return cast(str, self.normalizer.transliterate(phrase))
+
+
+    def normalize_postcode(self, postcode: str) -> str:
+        """ Convert the postcode to a standardized form.
+
+            This function must yield exactly the same result as the SQL function
+            'token_normalized_postcode()'.
+        """
+        return postcode.strip().upper()
+
+
+    def update_postcodes_from_db(self) -> None:
+        """ Update postcode tokens in the word table from the location_postcode
+            table.
+        """
+        assert self.conn is not None
+
+        with self.conn.cursor() as cur:
+            # This finds us the rows in location_postcode and word that are
+            # missing in the other table.
+            cur.execute("""SELECT * FROM
+                            (SELECT pc, word FROM
+                              (SELECT distinct(postcode) as pc FROM location_postcode) p
+                              FULL JOIN
+                              (SELECT word FROM word
+                                WHERE class ='place' and type = 'postcode') w
+                              ON pc = word) x
+                           WHERE pc is null or word is null""")
+
+            to_delete = []
+            to_add = []
+
+            for postcode, word in cur:
+                if postcode is None:
+                    to_delete.append(word)
+                else:
+                    to_add.append(postcode)
+
+            if to_delete:
+                cur.execute("""DELETE FROM WORD
+                               WHERE class ='place' and type = 'postcode'
+                                     and word = any(%s)
+                            """, (to_delete, ))
+            if to_add:
+                cur.execute("""SELECT count(create_postcode_id(pc))
+                               FROM unnest(%s) as pc
+                            """, (to_add, ))
+
+
+
+    def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
+                               should_replace: bool) -> None:
+        """ Replace the search index for special phrases with the new phrases.
+        """
+        assert self.conn is not None
+
+        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
+                            for p in phrases))
+
+        with self.conn.cursor() as cur:
+            # Get the old phrases.
+            existing_phrases = set()
+            cur.execute("""SELECT word, class, type, operator FROM word
+                           WHERE class != 'place'
+                                 OR (type != 'house' AND type != 'postcode')""")
+            for label, cls, typ, oper in cur:
+                existing_phrases.add((label, cls, typ, oper or '-'))
+
+            to_add = norm_phrases - existing_phrases
+            to_delete = existing_phrases - norm_phrases
+
+            if to_add:
+                cur.execute_values(
+                    """ INSERT INTO word (word_id, word_token, word, class, type,
+                                          search_name_count, operator)
+                        (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
+                                class, type, 0,
+                                CASE WHEN op in ('in', 'near') THEN op ELSE null END
+                           FROM (VALUES %s) as v(name, class, type, op))""",
+                    to_add)
+
+            if to_delete and should_replace:
+                cur.execute_values(
+                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                        WHERE word = name and class = in_class and type = in_type
+                              and ((op = '-' and operator is null) or op = operator)""",
+                    to_delete)
+
+        LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+                 len(norm_phrases), len(to_add), len(to_delete))
+
+
+    def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
+        """ Add names for the given country to the search index.
+        """
+        assert self.conn is not None
+
+        with self.conn.cursor() as cur:
+            cur.execute(
+                """INSERT INTO word (word_id, word_token, country_code)
+                   (SELECT nextval('seq_word'), lookup_token, %s
+                      FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token
+                            FROM unnest(%s)n) y
+                      WHERE NOT EXISTS(SELECT * FROM word
+                                       WHERE word_token = lookup_token and country_code = %s))
+                """, (country_code, list(names.values()), country_code))
+
+
+    def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
+        """ Determine tokenizer information about the given place.
+
+            Returns a JSON-serialisable structure that will be handed into
+            the database via the token_info field.
+        """
+        assert self.conn is not None
+
+        token_info = _TokenInfo(self._cache)
+
+        names = place.name
+
+        if names:
+            token_info.add_names(self.conn, names)
+
+            if place.is_country():
+                assert place.country_code is not None
+                self.add_country_names(place.country_code, names)
+
+        address = place.address
+        if address:
+            self._process_place_address(token_info, address)
+
+        return token_info.data
+
+
+    def _process_place_address(self, token_info: '_TokenInfo', address: Mapping[str, str]) -> None:
+        assert self.conn is not None
+        hnrs = []
+        addr_terms = []
+
+        for key, value in address.items():
+            if key == 'postcode':
+                # Make sure the normalized postcode is present in the word table.
+                if re.search(r'[:,;]', value) is None:
+                    norm_pc = self.normalize_postcode(value)
+                    token_info.set_postcode(norm_pc)
+                    self._cache.add_postcode(self.conn, norm_pc)
+            elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                hnrs.append(value)
+            elif key == 'street':
+                token_info.add_street(self.conn, value)
+            elif key == 'place':
+                token_info.add_place(self.conn, value)
+            elif not key.startswith('_') \
+                 and key not in ('country', 'full', 'inclusion'):
+                addr_terms.append((key, value))
+
+        if hnrs:
+            token_info.add_housenumbers(self.conn, hnrs)
+
+        if addr_terms:
+            token_info.add_address_terms(self.conn, addr_terms)
+
+
+
+class _TokenInfo:
+    """ Collect token information to be sent back to the database.
+    """
+    def __init__(self, cache: '_TokenCache') -> None:
+        self.cache = cache
+        self.data: Dict[str, Any] = {}
+
+
+    def add_names(self, conn: Connection, names: Mapping[str, str]) -> None:
+        """ Add token information for the names of the place.
+        """
+        with conn.cursor() as cur:
+            # Create the token IDs for all names.
+            self.data['names'] = cur.scalar("SELECT make_keywords(%s)::text",
+                                            (names, ))
+
+
+    def add_housenumbers(self, conn: Connection, hnrs: Sequence[str]) -> None:
+        """ Extract housenumber information from the address.
+        """
+        if len(hnrs) == 1:
+            token = self.cache.get_housenumber(hnrs[0])
+            if token is not None:
+                self.data['hnr_tokens'] = token
+                self.data['hnr'] = hnrs[0]
+                return
+
+        # split numbers if necessary
+        simple_list: List[str] = []
+        for hnr in hnrs:
+            simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
+
+        if len(simple_list) > 1:
+            simple_list = list(set(simple_list))
+
+        with conn.cursor() as cur:
+            cur.execute("SELECT * FROM create_housenumbers(%s)", (simple_list, ))
+            result = cur.fetchone()
+            assert result is not None
+            self.data['hnr_tokens'], self.data['hnr'] = result
+
+
+    def set_postcode(self, postcode: str) -> None:
+        """ Set or replace the postcode token with the given value.
+        """
+        self.data['postcode'] = postcode
+
+    def add_street(self, conn: Connection, street: str) -> None:
+        """ Add addr:street match terms.
+        """
+        def _get_street(name: str) -> Optional[str]:
+            with conn.cursor() as cur:
+                return cast(Optional[str],
+                            cur.scalar("SELECT word_ids_from_name(%s)::text", (name, )))
+
+        tokens = self.cache.streets.get(street, _get_street)
+        self.data['street'] = tokens or '{}'
+
+
+    def add_place(self, conn: Connection, place: str) -> None:
+        """ Add addr:place search and match terms.
+        """
+        def _get_place(name: str) -> Tuple[List[int], List[int]]:
+            with conn.cursor() as cur:
+                cur.execute("""SELECT make_keywords(hstore('name' , %s))::text,
+                                      word_ids_from_name(%s)::text""",
+                            (name, name))
+                return cast(Tuple[List[int], List[int]], cur.fetchone())
+
+        self.data['place_search'], self.data['place_match'] = \
+            self.cache.places.get(place, _get_place)
+
+
+    def add_address_terms(self, conn: Connection, terms: Sequence[Tuple[str, str]]) -> None:
+        """ Add additional address terms.
+        """
+        def _get_address_term(name: str) -> Tuple[List[int], List[int]]:
+            with conn.cursor() as cur:
+                cur.execute("""SELECT addr_ids_from_name(%s)::text,
+                                      word_ids_from_name(%s)::text""",
+                            (name, name))
+                return cast(Tuple[List[int], List[int]], cur.fetchone())
+
+        tokens = {}
+        for key, value in terms:
+            items = self.cache.address_terms.get(value, _get_address_term)
+            if items[0] or items[1]:
+                tokens[key] = items
+
+        if tokens:
+            self.data['addr'] = tokens
+
+
+class _LRU:
+    """ Least recently used cache that accepts a generator function to
+        produce the item when there is a cache miss.
+    """
+
+    def __init__(self, maxsize: int = 128):
+        self.data: 'OrderedDict[str, Any]' = OrderedDict()
+        self.maxsize = maxsize
+
+
+    def get(self, key: str, generator: Callable[[str], Any]) -> Any:
+        """ Get the item with the given key from the cache. If nothing
+            is found in the cache, generate the value through the
+            generator function and store it in the cache.
+        """
+        value = self.data.get(key)
+        if value is not None:
+            self.data.move_to_end(key)
+        else:
+            value = generator(key)
+            if len(self.data) >= self.maxsize:
+                self.data.popitem(last=False)
+            self.data[key] = value
+
+        return value
+
+
+class _TokenCache:
+    """ Cache for token information to avoid repeated database queries.
+
+        This cache is not thread-safe and needs to be instantiated per
+        analyzer.
+    """
+    def __init__(self, conn: Connection):
+        # various LRU caches
+        self.streets = _LRU(maxsize=256)
+        self.places = _LRU(maxsize=128)
+        self.address_terms = _LRU(maxsize=1024)
+
+        # Lookup houseunumbers up to 100 and cache them
+        with conn.cursor() as cur:
+            cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
+                           FROM generate_series(1, 100) as i""")
+            self._cached_housenumbers: Dict[str, str] = {str(r[0]): r[1] for r in cur}
+
+        # For postcodes remember the ones that have already been added
+        self.postcodes: Set[str] = set()
+
+    def get_housenumber(self, number: str) -> Optional[str]:
+        """ Get a housenumber token from the cache.
+        """
+        return self._cached_housenumbers.get(number)
+
+
+    def add_postcode(self, conn: Connection, postcode: str) -> None:
+        """ Make sure the given postcode is in the database.
+        """
+        if postcode not in self.postcodes:
+            with conn.cursor() as cur:
+                cur.execute('SELECT create_postcode_id(%s)', (postcode, ))
+            self.postcodes.add(postcode)
--- a/src/nominatim_db/tokenizer/place_sanitizer.py
+++ b/src/nominatim_db/tokenizer/place_sanitizer.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Handler for cleaning name and address tags in place information before it
+is handed to the token analysis.
+"""
+from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from .sanitizers.config import SanitizerConfig
+from .sanitizers.base import SanitizerHandler, ProcessInfo
+from ..data.place_name import PlaceName
+from ..data.place_info import PlaceInfo
+
+
+class PlaceSanitizer:
+    """ Controller class which applies sanitizer functions on the place
+        names and address before they are used by the token analysers.
+    """
+
+    def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]],
+                 config: Configuration) -> None:
+        self.handlers: List[Callable[[ProcessInfo], None]] = []
+
+        if rules:
+            for func in rules:
+                if 'step' not in func:
+                    raise UsageError("Sanitizer rule is missing the 'step' attribute.")
+                if not isinstance(func['step'], str):
+                    raise UsageError("'step' attribute must be a simple string.")
+
+                module: SanitizerHandler = \
+                    config.load_plugin_module(func['step'], 'nominatim.tokenizer.sanitizers')
+
+                self.handlers.append(module.create(SanitizerConfig(func)))
+
+
+    def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]:
+        """ Extract a sanitized list of names and address parts from the
+            given place. The function returns a tuple
+            (list of names, list of address names)
+        """
+        obj = ProcessInfo(place)
+
+        for func in self.handlers:
+            func(obj)
+
+        return obj.names, obj.address
--- a/src/nominatim_db/tokenizer/sanitizers/init.py
+++ b/src/nominatim_db/tokenizer/sanitizers/init.py
--- a/src/nominatim_db/tokenizer/sanitizers/base.py
+++ b/src/nominatim_db/tokenizer/sanitizers/base.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Common data types and protocols for sanitizers.
+"""
+from typing import Optional, List, Mapping, Callable
+
+from nominatim_core.typing import Protocol, Final
+from ...data.place_info import PlaceInfo
+from ...data.place_name import PlaceName
+from .config import SanitizerConfig
+
+
+class ProcessInfo:
+    """ Container class for information handed into to handler functions.
+        The 'names' and 'address' members are mutable. A handler must change
+        them by either modifying the lists place or replacing the old content
+        with a new list.
+    """
+
+    def __init__(self, place: PlaceInfo):
+        self.place: Final = place
+        self.names = self._convert_name_dict(place.name)
+        self.address = self._convert_name_dict(place.address)
+
+
+    @staticmethod
+    def _convert_name_dict(names: Optional[Mapping[str, str]]) -> List[PlaceName]:
+        """ Convert a dictionary of names into a list of PlaceNames.
+            The dictionary key is split into the primary part of the key
+            and the suffix (the part after an optional colon).
+        """
+        out = []
+
+        if names:
+            for key, value in names.items():
+                parts = key.split(':', 1)
+                out.append(PlaceName(value.strip(),
+                                     parts[0].strip(),
+                                     parts[1].strip() if len(parts) > 1 else None))
+
+        return out
+
+
+class SanitizerHandler(Protocol):
+    """ Protocol for sanitizer modules.
+    """
+
+    def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+        """
+        Create a function for sanitizing a place.
+
+        Arguments:
+            config: A dictionary with the additional configuration options
+                    specified in the tokenizer configuration
+
+        Return:
+            The result must be a callable that takes a place description
+            and transforms name and address as required.
+        """
--- a/src/nominatim_db/tokenizer/sanitizers/clean_housenumbers.py
+++ b/src/nominatim_db/tokenizer/sanitizers/clean_housenumbers.py
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Sanitizer that preprocesses address tags for house numbers. The sanitizer
+allows to
+
+* define which tags are to be considered house numbers (see 'filter-kind')
+* split house number lists into individual numbers (see 'delimiters')
+
+Arguments:
+    delimiters: Define the set of characters to be used for
+                splitting a list of house numbers into parts. (default: ',;')
+    filter-kind: Define the address tags that are considered to be a
+                 house number. Either takes a single string or a list of strings,
+                 where each string is a regular expression. An address item
+                 is considered a house number if the 'kind' fully matches any
+                 of the given regular expressions. (default: 'housenumber')
+    convert-to-name: Define house numbers that should be treated as a name
+                     instead of a house number. Either takes a single string
+                     or a list of strings, where each string is a regular
+                     expression that must match the full house number value.
+"""
+from typing import Callable, Iterator, List
+
+from ...data.place_name import PlaceName
+from .base import ProcessInfo
+from .config import SanitizerConfig
+
+class _HousenumberSanitizer:
+
+    def __init__(self, config: SanitizerConfig) -> None:
+        self.filter_kind = config.get_filter('filter-kind', ['housenumber'])
+        self.split_regexp = config.get_delimiter()
+
+        self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL')
+
+
+    def __call__(self, obj: ProcessInfo) -> None:
+        if not obj.address:
+            return
+
+        new_address: List[PlaceName] = []
+        for item in obj.address:
+            if self.filter_kind(item.kind):
+                if self.filter_name(item.name):
+                    obj.names.append(item.clone(kind='housenumber'))
+                else:
+                    new_address.extend(item.clone(kind='housenumber', name=n)
+                                       for n in self.sanitize(item.name))
+            else:
+                # Don't touch other address items.
+                new_address.append(item)
+
+        obj.address = new_address
+
+
+    def sanitize(self, value: str) -> Iterator[str]:
+        """ Extract housenumbers in a regularized format from an OSM value.
+
+            The function works as a generator that yields all valid housenumbers
+            that can be created from the value.
+        """
+        for hnr in self.split_regexp.split(value):
+            if hnr:
+                yield from self._regularize(hnr)
+
+
+    def _regularize(self, hnr: str) -> Iterator[str]:
+        yield hnr
+
+
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+    """ Create a housenumber processing function.
+    """
+
+    return _HousenumberSanitizer(config)
--- a/src/nominatim_db/tokenizer/sanitizers/clean_postcodes.py
+++ b/src/nominatim_db/tokenizer/sanitizers/clean_postcodes.py
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Sanitizer that filters postcodes by their officially allowed pattern.
+
+Arguments:
+    convert-to-address: If set to 'yes' (the default), then postcodes that do
+                        not conform with their country-specific pattern are
+                        converted to an address component. That means that
+                        the postcode does not take part when computing the
+                        postcode centroids of a country but is still searchable.
+                        When set to 'no', non-conforming postcodes are not
+                        searchable either.
+    default-pattern:    Pattern to use, when there is none available for the
+                        country in question. Warning: will not be used for
+                        objects that have no country assigned. These are always
+                        assumed to have no postcode.
+"""
+from typing import Callable, Optional, Tuple
+
+from ...data.postcode_format import PostcodeFormatter
+from .base import ProcessInfo
+from .config import SanitizerConfig
+
+class _PostcodeSanitizer:
+
+    def __init__(self, config: SanitizerConfig) -> None:
+        self.convert_to_address = config.get_bool('convert-to-address', True)
+        self.matcher = PostcodeFormatter()
+
+        default_pattern = config.get('default-pattern')
+        if default_pattern is not None and isinstance(default_pattern, str):
+            self.matcher.set_default_pattern(default_pattern)
+
+
+    def __call__(self, obj: ProcessInfo) -> None:
+        if not obj.address:
+            return
+
+        postcodes = ((i, o) for i, o in enumerate(obj.address) if o.kind == 'postcode')
+
+        for pos, postcode in postcodes:
+            formatted = self.scan(postcode.name, obj.place.country_code)
+
+            if formatted is None:
+                if self.convert_to_address:
+                    postcode.kind = 'unofficial_postcode'
+                else:
+                    obj.address.pop(pos)
+            else:
+                postcode.name = formatted[0]
+                postcode.set_attr('variant', formatted[1])
+
+
+    def scan(self, postcode: str, country: Optional[str]) -> Optional[Tuple[str, str]]:
+        """ Check the postcode for correct formatting and return the
+            normalized version. Returns None if the postcode does not
+            correspond to the official format of the given country.
+        """
+        match = self.matcher.match(country, postcode)
+        if match is None:
+            return None
+
+        assert country is not None
+
+        return self.matcher.normalize(country, match),\
+               ' '.join(filter(lambda p: p is not None, match.groups()))
+
+
+
+
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+    """ Create a function that filters postcodes by their officially allowed pattern.
+    """
+
+    return _PostcodeSanitizer(config)
--- a/src/nominatim_db/tokenizer/sanitizers/clean_tiger_tags.py
+++ b/src/nominatim_db/tokenizer/sanitizers/clean_tiger_tags.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Sanitizer that preprocesses tags from the TIGER import.
+
+It makes the following changes:
+
+* remove state reference from tiger:county
+"""
+from typing import Callable
+import re
+
+from .base import ProcessInfo
+from .config import SanitizerConfig
+
+COUNTY_MATCH = re.compile('(.*), [A-Z][A-Z]')
+
+def _clean_tiger_county(obj: ProcessInfo) -> None:
+    """ Remove the state reference from tiger:county tags.
+
+        This transforms a name like 'Hamilton, AL' into 'Hamilton'.
+        If no state reference is detected at the end, the name is left as is.
+    """
+    if not obj.address:
+        return
+
+    for item in obj.address:
+        if item.kind == 'tiger' and item.suffix == 'county':
+            m = COUNTY_MATCH.fullmatch(item.name)
+            if m:
+                item.name = m[1]
+            # Switch kind and suffix, the split left them reversed.
+            item.kind = 'county'
+            item.suffix = 'tiger'
+
+            return
+
+
+def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+    """ Create a function that preprocesses tags from the TIGER import.
+    """
+    return _clean_tiger_county
--- a/Show More
+++ b/Show More