Merge pull request #3273 from lonvia/search-with-sqlite

Add forward search capability for SQLite databases
2023-12-12 12:15:22 +01:00
parent 8c7140d92b ab45db5360
commit af85ad390f
38 changed files with 1159 additions and 482 deletions
--- a/nominatim/api/core.py
+++ b/nominatim/api/core.py
@@ -19,6 +19,7 @@ import sqlalchemy.ext.asyncio as sa_asyncio
 from nominatim.errors import UsageError
 from nominatim.db.sqlalchemy_schema import SearchTables
 from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
+import nominatim.db.sqlite_functions
 from nominatim.config import Configuration
 from nominatim.api.connection import SearchConnection
 from nominatim.api.status import get_status, StatusResult
@@ -84,6 +85,14 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
            extra_args: Dict[str, Any] = {'future': True,
                                          'echo': self.config.get_bool('DEBUG_SQL')}

+            if self.config.get_int('API_POOL_SIZE') == 0:
+                extra_args['poolclass'] = sa.pool.NullPool
+            else:
+                extra_args['poolclass'] = sa.pool.QueuePool
+                extra_args['max_overflow'] = 0
+                extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
+
+
            is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')

            if is_sqlite:
@@ -92,6 +101,10 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
                dburl = sa.engine.URL.create('sqlite+aiosqlite',
                                             database=params.get('dbname'))

+                if not ('NOMINATIM_DATABASE_RW' in self.config.environ
+                        and self.config.get_bool('DATABASE_RW')) \
+                   and not Path(params.get('dbname', '')).is_file():
+                    raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
            else:
                dsn = self.config.get_database_params()
                query = {k: v for k, v in dsn.items()
@@ -105,39 +118,40 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
                           host=dsn.get('host'),
                           port=int(dsn['port']) if 'port' in dsn else None,
                           query=query)
-                extra_args['max_overflow'] = 0
-                extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')

            engine = sa_asyncio.create_async_engine(dburl, **extra_args)

-            try:
-                async with engine.begin() as conn:
-                    result = await conn.scalar(sa.text('SHOW server_version_num'))
-                    server_version = int(result)
-            except (PGCORE_ERROR, sa.exc.OperationalError):
+            if is_sqlite:
                server_version = 0

-            if server_version >= 110000 and not is_sqlite:
-                @sa.event.listens_for(engine.sync_engine, "connect")
-                def _on_connect(dbapi_con: Any, _: Any) -> None:
-                    cursor = dbapi_con.cursor()
-                    cursor.execute("SET jit_above_cost TO '-1'")
-                    cursor.execute("SET max_parallel_workers_per_gather TO '0'")
-                # Make sure that all connections get the new settings
-                await self.close()
-
-            if is_sqlite:
                @sa.event.listens_for(engine.sync_engine, "connect")
                def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
                    dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
+                    nominatim.db.sqlite_functions.install_custom_functions(dbapi_con)
                    cursor = dbapi_con.cursor()
                    cursor.execute("SELECT load_extension('mod_spatialite')")
                    cursor.execute('SELECT SetDecimalPrecision(7)')
                    dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
+            else:
+                try:
+                    async with engine.begin() as conn:
+                        result = await conn.scalar(sa.text('SHOW server_version_num'))
+                        server_version = int(result)
+                except (PGCORE_ERROR, sa.exc.OperationalError):
+                    server_version = 0
+
+                if server_version >= 110000:
+                    @sa.event.listens_for(engine.sync_engine, "connect")
+                    def _on_connect(dbapi_con: Any, _: Any) -> None:
+                        cursor = dbapi_con.cursor()
+                        cursor.execute("SET jit_above_cost TO '-1'")
+                        cursor.execute("SET max_parallel_workers_per_gather TO '0'")
+                    # Make sure that all connections get the new settings
+                    await engine.dispose()

            self._property_cache['DB:server_version'] = server_version

-            self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
+            self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
            self._engine = engine


--- a/nominatim/api/logging.py
+++ b/nominatim/api/logging.py
@@ -90,26 +90,42 @@ class BaseLogger:
        params = dict(compiled.params)
        if isinstance(extra_params, Mapping):
            for k, v in extra_params.items():
-                params[k] = str(v)
+                if hasattr(v, 'to_wkt'):
+                    params[k] = v.to_wkt()
+                elif isinstance(v, (int, float)):
+                    params[k] = v
+                else:
+                    params[k] = str(v)
        elif isinstance(extra_params, Sequence) and extra_params:
            for k in extra_params[0]:
                params[k] = f':{k}'

        sqlstr = str(compiled)

-        if sa.__version__.startswith('1'):
-            try:
-                sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
-                return sqlstr % tuple((repr(params.get(name, None))
-                                      for name in compiled.positiontup)) # type: ignore
-            except TypeError:
-                return sqlstr
+        if conn.dialect.name == 'postgresql':
+            if sa.__version__.startswith('1'):
+                try:
+                    sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
+                    return sqlstr % tuple((repr(params.get(name, None))
+                                          for name in compiled.positiontup)) # type: ignore
+                except TypeError:
+                    return sqlstr

-        # Fixes an odd issue with Python 3.7 where percentages are not
-        # quoted correctly.
-        sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
-        sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
-        return sqlstr % params
+            # Fixes an odd issue with Python 3.7 where percentages are not
+            # quoted correctly.
+            sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
+            sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
+            return sqlstr % params
+
+        assert conn.dialect.name == 'sqlite'
+
+        # params in positional order
+        pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
+
+        sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
+        sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
+
+        return sqlstr

 class HTMLLogger(BaseLogger):
    """ Logger that formats messages in HTML.
--- a/nominatim/api/reverse.py
+++ b/nominatim/api/reverse.py
@@ -180,7 +180,7 @@ class ReverseGeocoder:
        diststr = sa.text(f"{distance}")

        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
+                .where(t.c.geometry.within_distance(WKT_PARAM, diststr))
                .where(t.c.indexed_status == 0)
                .where(t.c.linked_place_id == None)
                .where(sa.or_(sa.not_(t.c.geometry.is_area()),
@@ -219,7 +219,7 @@ class ReverseGeocoder:
        t = self.conn.t.placex

        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
+                .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))
                .where(t.c.parent_place_id == parent_place_id)
                .where(sa.func.IsAddressPoint(t))
                .where(t.c.indexed_status == 0)
@@ -241,7 +241,7 @@ class ReverseGeocoder:
                   sa.select(t,
                             t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
                             _locate_interpolation(t))
-                     .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
+                     .where(t.c.linegeo.within_distance(WKT_PARAM, distance))
                     .where(t.c.startnumber != None)
                     .order_by('distance')
                     .limit(1))
@@ -275,7 +275,7 @@ class ReverseGeocoder:
            inner = sa.select(t,
                              t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
                              _locate_interpolation(t))\
-                      .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
+                      .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
                      .where(t.c.parent_place_id == parent_place_id)\
                      .order_by('distance')\
                      .limit(1)\
--- a/nominatim/api/search/db_search_builder.py
+++ b/nominatim/api/search/db_search_builder.py
@@ -15,6 +15,7 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
 from nominatim.api.search.token_assignment import TokenAssignment
 import nominatim.api.search.db_search_fields as dbf
 import nominatim.api.search.db_searches as dbs
+import nominatim.api.search.db_search_lookups as lookups


 def wrap_near_search(categories: List[Tuple[str, str]],
@@ -152,7 +153,7 @@ class SearchBuilder:
                sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
                                                 [t.token for r in address
                                                  for t in self.query.get_partials_list(r)],
-                                                 'restrict')]
+                                                 lookups.Restrict)]
                penalty += 0.2
            yield dbs.PostcodeSearch(penalty, sdata)

@@ -162,7 +163,7 @@ class SearchBuilder:
        """ Build a simple address search for special entries where the
            housenumber is the main name token.
        """
-        sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
+        sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
        expected_count = sum(t.count for t in hnrs)

        partials = [t for trange in address
@@ -170,16 +171,16 @@ class SearchBuilder:

        if expected_count < 8000:
            sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
-                                                 [t.token for t in partials], 'restrict'))
+                                                 [t.token for t in partials], lookups.Restrict))
        elif len(partials) != 1 or partials[0].count < 10000:
            sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
-                                                 [t.token for t in partials], 'lookup_all'))
+                                                 [t.token for t in partials], lookups.LookupAll))
        else:
            sdata.lookups.append(
                dbf.FieldLookup('nameaddress_vector',
                                [t.token for t
                                 in self.query.get_tokens(address[0], TokenType.WORD)],
-                                'lookup_any'))
+                                lookups.LookupAny))

        sdata.housenumbers = dbf.WeightedStrings([], [])
        yield dbs.PlaceSearch(0.05, sdata, expected_count)
@@ -232,16 +233,16 @@ class SearchBuilder:
                penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
            # Any of the full names applies with all of the partials from the address
            yield penalty, fulls_count / (2**len(addr_partials)),\
-                  dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
-                                         'restrict' if fulls_count < 10000 else 'lookup_all')
+                  dbf.lookup_by_any_name([t.token for t in name_fulls],
+                                         addr_tokens, fulls_count > 10000)

        # To catch remaining results, lookup by name and address
        # We only do this if there is a reasonable number of results expected.
        exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
        if exp_count < 10000 and all(t.is_indexed for t in name_partials):
-            lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
+            lookup = [dbf.FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
            if addr_tokens:
-                lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
+                lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
            penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
            yield penalty, exp_count, lookup

--- a/nominatim/api/search/db_search_fields.py
+++ b/nominatim/api/search/db_search_fields.py
@@ -7,14 +7,16 @@
 """
 Data structures for more complex fields in abstract search descriptions.
 """
-from typing import List, Tuple, Iterator, cast, Dict
+from typing import List, Tuple, Iterator, Dict, Type
 import dataclasses

 import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import ARRAY

 from nominatim.typing import SaFromClause, SaColumn, SaExpression
 from nominatim.api.search.query import Token
+import nominatim.api.search.db_search_lookups as lookups
+from nominatim.utils.json_writer import JsonWriter
+

@dataclasses.dataclass
 class WeightedStrings:
@@ -129,11 +131,17 @@ class FieldRanking:
        """
        assert self.rankings

-        return sa.func.weigh_search(table.c[self.column],
-                                    [f"{{{','.join((str(s) for s in r.tokens))}}}"
-                                     for r in self.rankings],
-                                    [r.penalty for r in self.rankings],
-                                    self.default)
+        rout = JsonWriter().start_array()
+        for rank in self.rankings:
+            rout.start_array().value(rank.penalty).next()
+            rout.start_array()
+            for token in rank.tokens:
+                rout.value(token).next()
+            rout.end_array()
+            rout.end_array().next()
+        rout.end_array()
+
+        return sa.func.weigh_search(table.c[self.column], rout(), self.default)


@dataclasses.dataclass
@@ -146,19 +154,12 @@ class FieldLookup:
    """
    column: str
    tokens: List[int]
-    lookup_type: str
+    lookup_type: Type[lookups.LookupType]

    def sql_condition(self, table: SaFromClause) -> SaColumn:
        """ Create an SQL expression for the given match condition.
        """
-        col = table.c[self.column]
-        if self.lookup_type == 'lookup_all':
-            return col.contains(self.tokens)
-        if self.lookup_type == 'lookup_any':
-            return cast(SaColumn, col.overlap(self.tokens))
-
-        return sa.func.array_cat(col, sa.text('ARRAY[]::integer[]'),
-                                 type_=ARRAY(sa.Integer())).contains(self.tokens)
+        return self.lookup_type(table, self.column, self.tokens)


 class SearchData:
@@ -224,22 +225,23 @@ def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[Fiel
    """ Create a lookup list where name tokens are looked up via index
        and potential address tokens are used to restrict the search further.
    """
-    lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
+    lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
    if addr_tokens:
-        lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
+        lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))

    return lookup


 def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
-                       lookup_type: str) -> List[FieldLookup]:
+                       use_index_for_addr: bool) -> List[FieldLookup]:
    """ Create a lookup list where name tokens are looked up via index
        and only one of the name tokens must be present.
        Potential address tokens are used to restrict the search further.
    """
-    lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
+    lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
    if addr_tokens:
-        lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
+        lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
+                                  lookups.LookupAll if use_index_for_addr else lookups.Restrict))

    return lookup

@@ -248,5 +250,5 @@ def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[Field
    """ Create a lookup list where address tokens are looked up via index
        and the name tokens are only used to restrict the search further.
    """
-    return [FieldLookup('name_vector', name_tokens, 'restrict'),
-            FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
+    return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
+            FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
--- a/nominatim/api/search/db_search_lookups.py
+++ b/nominatim/api/search/db_search_lookups.py
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of lookup functions for the search_name table.
+"""
+from typing import List, Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+
+from nominatim.typing import SaFromClause
+from nominatim.db.sqlalchemy_types import IntArray
+
+# pylint: disable=consider-using-f-string
+
+LookupType = sa.sql.expression.FunctionElement[Any]
+
+class LookupAll(LookupType):
+    """ Find all entries in search_name table that contain all of
+        a given list of tokens using an index for the search.
+    """
+    inherit_cache = True
+
+    def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
+        super().__init__(table.c.place_id, getattr(table.c, column), column,
+                         sa.type_coerce(tokens, IntArray))
+
+
+@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
+def _default_lookup_all(element: LookupAll,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    _, col, _, tokens = list(element.clauses)
+    return "(%s @> %s)" % (compiler.process(col, **kw),
+                           compiler.process(tokens, **kw))
+
+
+@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_lookup_all(element: LookupAll,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    place, col, colname, tokens = list(element.clauses)
+    return "(%s IN (SELECT CAST(value as bigint) FROM"\
+           " (SELECT array_intersect_fuzzy(places) as p FROM"\
+           "   (SELECT places FROM reverse_search_name"\
+           "   WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
+           "     AND column = %s"\
+           "   ORDER BY length(places)) as x) as u,"\
+           " json_each('[' || u.p || ']'))"\
+           " AND array_contains(%s, %s))"\
+             % (compiler.process(place, **kw),
+                compiler.process(tokens, **kw),
+                compiler.process(colname, **kw),
+                compiler.process(col, **kw),
+                compiler.process(tokens, **kw)
+                )
+
+
+
+class LookupAny(LookupType):
+    """ Find all entries that contain at least one of the given tokens.
+        Use an index for the search.
+    """
+    inherit_cache = True
+
+    def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
+        super().__init__(table.c.place_id, getattr(table.c, column), column,
+                         sa.type_coerce(tokens, IntArray))
+
+@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
+def _default_lookup_any(element: LookupAny,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    _, col, _, tokens = list(element.clauses)
+    return "(%s && %s)" % (compiler.process(col, **kw),
+                           compiler.process(tokens, **kw))
+
+@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_lookup_any(element: LookupAny,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    place, _, colname, tokens = list(element.clauses)
+    return "%s IN (SELECT CAST(value as bigint) FROM"\
+           " (SELECT array_union(places) as p FROM reverse_search_name"\
+           "   WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
+           "     AND column = %s) as u,"\
+           " json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
+                                               compiler.process(tokens, **kw),
+                                               compiler.process(colname, **kw))
+
+
+
+class Restrict(LookupType):
+    """ Find all entries that contain all of the given tokens.
+        Do not use an index for the search.
+    """
+    inherit_cache = True
+
+    def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
+        super().__init__(getattr(table.c, column),
+                         sa.type_coerce(tokens, IntArray))
+
+
+@compiles(Restrict) # type: ignore[no-untyped-call, misc]
+def _default_restrict(element: Restrict,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
+                                           compiler.process(arg2, **kw))
+
+@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_restrict(element: Restrict,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "array_contains(%s)" % compiler.process(element.clauses, **kw)
--- a/nominatim/api/search/db_searches.py
+++ b/nominatim/api/search/db_searches.py
@@ -11,7 +11,6 @@ from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
 import abc

 import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import ARRAY, array_agg

 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
                             SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
@@ -19,7 +18,7 @@ from nominatim.api.connection import SearchConnection
 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
 import nominatim.api.results as nres
 from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
-from nominatim.db.sqlalchemy_types import Geometry
+from nominatim.db.sqlalchemy_types import Geometry, IntArray

 #pylint: disable=singleton-comparison,not-callable
 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
@@ -55,12 +54,29 @@ NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')

-def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
-    return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
+
+def filter_by_area(sql: SaSelect, t: SaFromClause,
+                   details: SearchDetails, avoid_index: bool = False) -> SaSelect:
+    """ Apply SQL statements for filtering by viewbox and near point,
+        if applicable.
+    """
+    if details.near is not None and details.near_radius is not None:
+        if details.near_radius < 0.1 and not avoid_index:
+            sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
+        else:
+            sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
+    if details.viewbox is not None and details.bounded_viewbox:
+        sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
+                                                use_index=not avoid_index and
+                                                          details.viewbox.area < 0.2))
+
+    return sql
+

 def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
    return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))

+
 def _select_placex(t: SaFromClause) -> SaSelect:
    return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
                     t.c.class_, t.c.type,
@@ -93,7 +109,7 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet

 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
                                 numerals: List[int], details: SearchDetails) -> SaScalarSelect:
-    all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
+    all_ids = sa.func.ArrayAgg(table.c.place_id)
    sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)

    if len(numerals) == 1:
@@ -117,9 +133,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
        orexpr.append(no_index(table.c.rank_address).between(1, 30))
    elif layers & DataLayer.ADDRESS:
        orexpr.append(no_index(table.c.rank_address).between(1, 29))
-        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
-                              sa.or_(table.c.housenumber != None,
-                                     table.c.address.has_key('addr:housename'))))
+        orexpr.append(sa.func.IsAddressPoint(table))
    elif layers & DataLayer.POI:
        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                              table.c.class_.not_in(('place', 'building'))))
@@ -171,12 +185,21 @@ async def _get_placex_housenumbers(conn: SearchConnection,
        yield result


+def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
+    """ Create a subselect that returns the given list of integers
+        as rows in the column 'nr'.
+    """
+    vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
+               .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
+    return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
+
+
 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
                       numerals: List[int],
                       details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
    t = conn.t.osmline
-    values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
-               .data([(n,) for n in numerals])
+
+    values = _int_list_to_subquery(numerals)
    sql = sa.select(t.c.place_id, t.c.osm_id,
                    t.c.parent_place_id, t.c.address,
                    values.c.nr.label('housenumber'),
@@ -199,8 +222,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int],
                     numerals: List[int], osm_id: int,
                     details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
    t = conn.t.tiger
-    values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
-               .data([(n,) for n in numerals])
+    values = _int_list_to_subquery(numerals)
    sql = sa.select(t.c.place_id, t.c.parent_place_id,
                    sa.literal('W').label('osm_type'),
                    sa.literal(osm_id).label('osm_id'),
@@ -295,7 +317,7 @@ class NearSearch(AbstractSearch):

        if table is None:
            # No classtype table available, do a simplified lookup in placex.
-            table = conn.t.placex.alias('inner')
+            table = conn.t.placex
            sql = sa.select(table.c.place_id,
                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
                              .label('dist'))\
@@ -366,7 +388,7 @@ class PoiSearch(AbstractSearch):
                           .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
                                         .label('importance'))\
                           .where(t.c.linked_place_id == None) \
-                           .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
+                           .where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
                           .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
                           .limit(LIMIT_PARAM)

@@ -403,8 +425,8 @@ class PoiSearch(AbstractSearch):

                    if details.near and details.near_radius is not None:
                        sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
-                                 .where(table.c.centroid.ST_DWithin(NEAR_PARAM,
-                                                                    NEAR_RADIUS_PARAM))
+                                 .where(table.c.centroid.within_distance(NEAR_PARAM,
+                                                                         NEAR_RADIUS_PARAM))

                    if self.countries:
                        sql = sql.where(t.c.country_code.in_(self.countries.values))
@@ -449,11 +471,7 @@ class CountrySearch(AbstractSearch):
        if details.excluded:
            sql = sql.where(_exclude_places(t))

-        if details.viewbox is not None and details.bounded_viewbox:
-            sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
-
-        if details.near is not None and details.near_radius is not None:
-            sql = sql.where(_within_near(t))
+        sql = filter_by_area(sql, t, details)

        results = nres.SearchResults()
        for row in await conn.execute(sql, _details_to_bind_params(details)):
@@ -486,18 +504,12 @@ class CountrySearch(AbstractSearch):
                .where(tgrid.c.country_code.in_(self.countries.values))\
                .group_by(tgrid.c.country_code)

-        if details.viewbox is not None and details.bounded_viewbox:
-            sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
-        if details.near is not None and details.near_radius is not None:
-            sql = sql.where(_within_near(tgrid))
+        sql = filter_by_area(sql, tgrid, details, avoid_index=True)

        sub = sql.subquery('grid')

        sql = sa.select(t.c.country_code,
-                        (t.c.name
-                         + sa.func.coalesce(t.c.derived_name,
-                                            sa.cast('', type_=conn.t.types.Composite))
-                        ).label('name'),
+                        t.c.name.merge(t.c.derived_name).label('name'),
                        sub.c.centroid, sub.c.bbox)\
                .join(sub, t.c.country_code == sub.c.country_code)

@@ -545,19 +557,16 @@ class PostcodeSearch(AbstractSearch):

        penalty: SaExpression = sa.literal(self.penalty)

-        if details.viewbox is not None:
-            if details.bounded_viewbox:
-                sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
-            else:
-                penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
-                                   else_=1.0)
+        if details.viewbox is not None and not details.bounded_viewbox:
+            penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
+                               (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                               else_=1.0)

        if details.near is not None:
-            if details.near_radius is not None:
-                sql = sql.where(_within_near(t))
            sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))

+        sql = filter_by_area(sql, t, details)
+
        if self.countries:
            sql = sql.where(t.c.country_code.in_(self.countries.values))

@@ -566,13 +575,11 @@ class PostcodeSearch(AbstractSearch):

        if self.lookups:
            assert len(self.lookups) == 1
-            assert self.lookups[0].lookup_type == 'restrict'
            tsearch = conn.t.search_name
            sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
-                     .where(sa.func.array_cat(tsearch.c.name_vector,
-                                              tsearch.c.nameaddress_vector,
-                                              type_=ARRAY(sa.Integer))
-                                    .contains(self.lookups[0].tokens))
+                     .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
+                                     .contains(sa.type_coerce(self.lookups[0].tokens,
+                                                              IntArray)))

        for ranking in self.rankings:
            penalty += ranking.sql_penalty(conn.t.search_name)
@@ -637,11 +644,11 @@ class PlaceSearch(AbstractSearch):
            sql = sql.where(tsearch.c.address_rank > 9)
            tpc = conn.t.postcode
            pcs = self.postcodes.values
-            if self.expected_count > 1000:
+            if self.expected_count > 5000:
                # Many results expected. Restrict by postcode.
                sql = sql.where(sa.select(tpc.c.postcode)
                                  .where(tpc.c.postcode.in_(pcs))
-                                  .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
+                                  .where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
                                  .exists())

            # Less results, only have a preference for close postcodes
@@ -653,27 +660,26 @@ class PlaceSearch(AbstractSearch):

        if details.viewbox is not None:
            if details.bounded_viewbox:
-                if details.viewbox.area < 0.2:
-                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
-                else:
-                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
+                sql = sql.where(tsearch.c.centroid
+                                         .intersects(VIEWBOX_PARAM,
+                                                     use_index=details.viewbox.area < 0.2))
            elif self.expected_count >= 10000:
-                if details.viewbox.area < 0.5:
-                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
-                else:
-                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
+                sql = sql.where(tsearch.c.centroid
+                                         .intersects(VIEWBOX2_PARAM,
+                                                     use_index=details.viewbox.area < 0.5))
            else:
-                penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
+                                   (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
                                   else_=1.0)

        if details.near is not None:
            if details.near_radius is not None:
                if details.near_radius < 0.1:
-                    sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                    sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
+                                                                       NEAR_RADIUS_PARAM))
                else:
-                    sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
-                                                                           NEAR_RADIUS_PARAM))
+                    sql = sql.where(tsearch.c.centroid
+                                             .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
                                      .label('importance'))
            sql = sql.order_by(sa.desc(sa.text('importance')))
@@ -692,10 +698,10 @@ class PlaceSearch(AbstractSearch):
            sql = sql.order_by(sa.text('accuracy'))

        if self.housenumbers:
-            hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
+            hnr_list = '|'.join(self.housenumbers.values)
            sql = sql.where(tsearch.c.address_rank.between(16, 30))\
                     .where(sa.or_(tsearch.c.address_rank < 30,
-                                   t.c.housenumber.op('~*')(hnr_regexp)))
+                                   sa.func.RegexpWord(hnr_list, t.c.housenumber)))

            # Cross check for housenumbers, need to do that on a rather large
            # set. Worst case there are 40.000 main streets in OSM.
@@ -703,10 +709,10 @@ class PlaceSearch(AbstractSearch):

            # Housenumbers from placex
            thnr = conn.t.placex.alias('hnr')
-            pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
+            pid_list = sa.func.ArrayAgg(thnr.c.place_id)
            place_sql = sa.select(pid_list)\
                          .where(thnr.c.parent_place_id == inner.c.place_id)\
-                          .where(thnr.c.housenumber.op('~*')(hnr_regexp))\
+                          .where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
                          .where(thnr.c.linked_place_id == None)\
                          .where(thnr.c.indexed_status == 0)

--- a/nominatim/api/search/icu_tokenizer.py
+++ b/nominatim/api/search/icu_tokenizer.py
@@ -22,6 +22,7 @@ from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.search import query as qmod
 from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
+from nominatim.db.sqlalchemy_types import Json


 DB_TO_TOKEN_TYPE = {
@@ -159,7 +160,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
                     sa.Column('word_token', sa.Text, nullable=False),
                     sa.Column('type', sa.Text, nullable=False),
                     sa.Column('word', sa.Text),
-                     sa.Column('info', self.conn.t.types.Json))
+                     sa.Column('info', Json))


    async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
--- a/nominatim/clicmd/convert.py
+++ b/nominatim/clicmd/convert.py
@@ -76,7 +76,7 @@ class ConvertDB:
        group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
                           help='Enable/disable support for reverse and lookup API'
                                ' (default: enabled)')
-        group.add_argument('--search', action=WithAction, dest_set=self.options, default=False,
+        group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
                           help='Enable/disable support for search API (default: disabled)')
        group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
                           help='Enable/disable support for details API (default: enabled)')
--- a/nominatim/db/sqlalchemy_functions.py
+++ b/nominatim/db/sqlalchemy_functions.py
@@ -29,7 +29,7 @@ class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):


@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
-def _default_intersects(element: SaColumn,
+def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
                        compiler: 'sa.Compiled', **kw: Any) -> str:
    return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
            " AND placex.rank_address between 4 and 25"
@@ -40,7 +40,7 @@ def _default_intersects(element: SaColumn,


@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _sqlite_intersects(element: SaColumn,
+def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
                       compiler: 'sa.Compiled', **kw: Any) -> str:
    return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
            " AND placex.rank_address between 4 and 25"
@@ -61,7 +61,7 @@ class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):


@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
-def default_reverse_place_diameter(element: SaColumn,
+def default_reverse_place_diameter(element: IntersectsReverseDistance,
                                   compiler: 'sa.Compiled', **kw: Any) -> str:
    table = element.tablename
    return f"({table}.rank_address between 4 and 25"\
@@ -74,7 +74,7 @@ def default_reverse_place_diameter(element: SaColumn,


@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_reverse_place_diameter(element: SaColumn,
+def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
                                  compiler: 'sa.Compiled', **kw: Any) -> str:
    geom1, rank, geom2 = list(element.clauses)
    table = element.tablename
@@ -102,7 +102,7 @@ class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):


@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
-def default_is_below_reverse_distance(element: SaColumn,
+def default_is_below_reverse_distance(element: IsBelowReverseDistance,
                                      compiler: 'sa.Compiled', **kw: Any) -> str:
    dist, rank = list(element.clauses)
    return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
@@ -110,25 +110,13 @@ def default_is_below_reverse_distance(element: SaColumn,


@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_is_below_reverse_distance(element: SaColumn,
+def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
                                     compiler: 'sa.Compiled', **kw: Any) -> str:
    dist, rank = list(element.clauses)
    return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
                                                  compiler.process(rank, **kw))


-def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
-    """ Create an expression with the necessary conditions over a placex
-        table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
-        can be used.
-    """
-    return sa.text(f"{table}.rank_address between 4 and 25"
-                   f" AND {table}.type != 'postcode'"
-                   f" AND {table}.name is not null"
-                   f" AND {table}.linked_place_id is null"
-                   f" AND {table}.osm_type = 'N'")
-
-
 class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
    name = 'IsAddressPoint'
    inherit_cache = True
@@ -139,7 +127,7 @@ class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):


@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
-def default_is_address_point(element: SaColumn,
+def default_is_address_point(element: IsAddressPoint,
                             compiler: 'sa.Compiled', **kw: Any) -> str:
    rank, hnr, name = list(element.clauses)
    return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
@@ -149,7 +137,7 @@ def default_is_address_point(element: SaColumn,


@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_is_address_point(element: SaColumn,
+def sqlite_is_address_point(element: IsAddressPoint,
                            compiler: 'sa.Compiled', **kw: Any) -> str:
    rank, hnr, name = list(element.clauses)
    return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
@@ -166,7 +154,7 @@ class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
    inherit_cache = True

@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
-def compile_crosscheck_names(element: SaColumn,
+def compile_crosscheck_names(element: CrosscheckNames,
                             compiler: 'sa.Compiled', **kw: Any) -> str:
    arg1, arg2 = list(element.clauses)
    return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
@@ -174,7 +162,7 @@ def compile_crosscheck_names(element: SaColumn,


@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
-def compile_sqlite_crosscheck_names(element: SaColumn,
+def compile_sqlite_crosscheck_names(element: CrosscheckNames,
                                    compiler: 'sa.Compiled', **kw: Any) -> str:
    arg1, arg2 = list(element.clauses)
    return "EXISTS(SELECT *"\
@@ -191,15 +179,16 @@ class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):


@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
-def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
    return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)


@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
    return "json_each(%s)" % compiler.process(element.clauses, **kw)


+
 class Greatest(sa.sql.functions.GenericFunction[Any]):
    """ Function to compute maximum of all its input parameters.
    """
@@ -208,5 +197,25 @@ class Greatest(sa.sql.functions.GenericFunction[Any]):


@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_greatest(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
    return "max(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class RegexpWord(sa.sql.functions.GenericFunction[Any]):
+    """ Check if a full word is in a given string.
+    """
+    name = 'RegexpWord'
+    inherit_cache = True
+
+
+@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
+def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s ~* ('\\m(' || %s  || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
+
+
+@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "regexp('\\b(' || %s  || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
--- a/nominatim/db/sqlalchemy_schema.py
+++ b/nominatim/db/sqlalchemy_schema.py
@@ -7,37 +7,10 @@
 """
 SQLAlchemy definitions for all tables used by the frontend.
 """
-from typing import Any
-
 import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB, array
-from sqlalchemy.dialects.sqlite import JSON as sqlite_json

 import nominatim.db.sqlalchemy_functions #pylint: disable=unused-import
-from nominatim.db.sqlalchemy_types import Geometry
-
-class PostgresTypes:
-    """ Type definitions for complex types as used in Postgres variants.
-    """
-    Composite = HSTORE
-    Json = JSONB
-    IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
-    to_array = array
-
-
-class SqliteTypes:
-    """ Type definitions for complex types as used in Postgres variants.
-    """
-    Composite = sqlite_json
-    Json = sqlite_json
-    IntArray = sqlite_json
-
-    @staticmethod
-    def to_array(arr: Any) -> Any:
-        """ Sqlite has no special conversion for arrays.
-        """
-        return arr
-
+from nominatim.db.sqlalchemy_types import Geometry, KeyValueStore, IntArray

 #pylint: disable=too-many-instance-attributes
 class SearchTables:
@@ -47,14 +20,7 @@ class SearchTables:
        Any data used for updates only will not be visible.
    """

-    def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
-        if engine_name == 'postgresql':
-            self.types: Any = PostgresTypes
-        elif engine_name == 'sqlite':
-            self.types = SqliteTypes
-        else:
-            raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.")
-
+    def __init__(self, meta: sa.MetaData) -> None:
        self.meta = meta

        self.import_status = sa.Table('import_status', meta,
@@ -80,9 +46,9 @@ class SearchTables:
            sa.Column('class', sa.Text, nullable=False, key='class_'),
            sa.Column('type', sa.Text, nullable=False),
            sa.Column('admin_level', sa.SmallInteger),
-            sa.Column('name', self.types.Composite),
-            sa.Column('address', self.types.Composite),
-            sa.Column('extratags', self.types.Composite),
+            sa.Column('name', KeyValueStore),
+            sa.Column('address', KeyValueStore),
+            sa.Column('extratags', KeyValueStore),
            sa.Column('geometry', Geometry, nullable=False),
            sa.Column('wikipedia', sa.Text),
            sa.Column('country_code', sa.String(2)),
@@ -118,14 +84,14 @@ class SearchTables:
            sa.Column('step', sa.SmallInteger),
            sa.Column('indexed_status', sa.SmallInteger),
            sa.Column('linegeo', Geometry),
-            sa.Column('address', self.types.Composite),
+            sa.Column('address', KeyValueStore),
            sa.Column('postcode', sa.Text),
            sa.Column('country_code', sa.String(2)))

        self.country_name = sa.Table('country_name', meta,
            sa.Column('country_code', sa.String(2)),
-            sa.Column('name', self.types.Composite),
-            sa.Column('derived_name', self.types.Composite),
+            sa.Column('name', KeyValueStore),
+            sa.Column('derived_name', KeyValueStore),
            sa.Column('partition', sa.Integer))

        self.country_grid = sa.Table('country_osm_grid', meta,
@@ -139,8 +105,8 @@ class SearchTables:
            sa.Column('importance', sa.Float),
            sa.Column('search_rank', sa.SmallInteger),
            sa.Column('address_rank', sa.SmallInteger),
-            sa.Column('name_vector', self.types.IntArray),
-            sa.Column('nameaddress_vector', self.types.IntArray),
+            sa.Column('name_vector', IntArray),
+            sa.Column('nameaddress_vector', IntArray),
            sa.Column('country_code', sa.String(2)),
            sa.Column('centroid', Geometry))

--- a/nominatim/db/sqlalchemy_types/init.py
+++ b/nominatim/db/sqlalchemy_types/init.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Module with custom types for SQLAlchemy
+"""
+
+# See also https://github.com/PyCQA/pylint/issues/6006
+# pylint: disable=useless-import-alias
+
+from .geometry import (Geometry as Geometry)
+from .int_array import (IntArray as IntArray)
+from .key_value import (KeyValueStore as KeyValueStore)
+from .json import (Json as Json)
--- a/nominatim/db/sqlalchemy_types/geometry.py
+++ b/nominatim/db/sqlalchemy_types/geometry.py
@@ -28,7 +28,7 @@ class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):


@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
-def _default_distance_spheroid(element: SaColumn,
+def _default_distance_spheroid(element: Geometry_DistanceSpheroid,
                               compiler: 'sa.Compiled', **kw: Any) -> str:
    return "ST_DistanceSpheroid(%s,"\
           " 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
@@ -36,7 +36,7 @@ def _default_distance_spheroid(element: SaColumn,


@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _spatialite_distance_spheroid(element: SaColumn,
+def _spatialite_distance_spheroid(element: Geometry_DistanceSpheroid,
                                  compiler: 'sa.Compiled', **kw: Any) -> str:
    return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)

@@ -49,14 +49,14 @@ class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):


@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
-def _default_is_line_like(element: SaColumn,
+def _default_is_line_like(element: Geometry_IsLineLike,
                          compiler: 'sa.Compiled', **kw: Any) -> str:
    return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
               compiler.process(element.clauses, **kw)


@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _sqlite_is_line_like(element: SaColumn,
+def _sqlite_is_line_like(element: Geometry_IsLineLike,
                         compiler: 'sa.Compiled', **kw: Any) -> str:
    return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
               compiler.process(element.clauses, **kw)
@@ -70,14 +70,14 @@ class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):


@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
-def _default_is_area_like(element: SaColumn,
+def _default_is_area_like(element: Geometry_IsAreaLike,
                          compiler: 'sa.Compiled', **kw: Any) -> str:
    return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
               compiler.process(element.clauses, **kw)


@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _sqlite_is_area_like(element: SaColumn,
+def _sqlite_is_area_like(element: Geometry_IsAreaLike,
                         compiler: 'sa.Compiled', **kw: Any) -> str:
    return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
               compiler.process(element.clauses, **kw)
@@ -91,14 +91,14 @@ class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):


@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
-def _default_intersects(element: SaColumn,
+def _default_intersects(element: Geometry_IntersectsBbox,
                        compiler: 'sa.Compiled', **kw: Any) -> str:
    arg1, arg2 = list(element.clauses)
    return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))


@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _sqlite_intersects(element: SaColumn,
+def _sqlite_intersects(element: Geometry_IntersectsBbox,
                       compiler: 'sa.Compiled', **kw: Any) -> str:
    return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)

@@ -114,14 +114,14 @@ class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):


@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
-def default_intersects_column(element: SaColumn,
+def default_intersects_column(element: Geometry_ColumnIntersectsBbox,
                              compiler: 'sa.Compiled', **kw: Any) -> str:
    arg1, arg2 = list(element.clauses)
    return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))


@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
-def spatialite_intersects_column(element: SaColumn,
+def spatialite_intersects_column(element: Geometry_ColumnIntersectsBbox,
                                 compiler: 'sa.Compiled', **kw: Any) -> str:
    arg1, arg2 = list(element.clauses)
    return "MbrIntersects(%s, %s) = 1 and "\
@@ -145,12 +145,12 @@ class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):


@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
-def default_dwithin_column(element: SaColumn,
+def default_dwithin_column(element: Geometry_ColumnDWithin,
                           compiler: 'sa.Compiled', **kw: Any) -> str:
    return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)

@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
-def spatialite_dwithin_column(element: SaColumn,
+def spatialite_dwithin_column(element: Geometry_ColumnDWithin,
                              compiler: 'sa.Compiled', **kw: Any) -> str:
    geom1, geom2, dist = list(element.clauses)
    return "ST_Distance(%s, %s) < %s and "\
@@ -165,7 +165,6 @@ def spatialite_dwithin_column(element: SaColumn,
              compiler.process(dist, **kw))


-
 class Geometry(types.UserDefinedType): # type: ignore[type-arg]
    """ Simplified type decorator for PostGIS geometry. This type
        only supports geometries in 4326 projection.
@@ -206,7 +205,10 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]

    class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]

-        def intersects(self, other: SaColumn) -> 'sa.Operators':
+        def intersects(self, other: SaColumn, use_index: bool = True) -> 'sa.Operators':
+            if not use_index:
+                return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self.expr), other)
+
            if isinstance(self.expr, sa.Column):
                return Geometry_ColumnIntersectsBbox(self.expr, other)

@@ -221,20 +223,11 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
            return Geometry_IsAreaLike(self)


-        def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+        def within_distance(self, other: SaColumn, distance: SaColumn) -> SaColumn:
            if isinstance(self.expr, sa.Column):
                return Geometry_ColumnDWithin(self.expr, other, distance)

-            return sa.func.ST_DWithin(self.expr, other, distance)
-
-
-        def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
-            return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
-                                      other, distance)
-
-
-        def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
-            return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self), other)
+            return self.ST_Distance(other) < distance


        def ST_Distance(self, other: SaColumn) -> SaColumn:
@@ -313,18 +306,3 @@ def _add_function_alias(func: str, ftype: type, alias: str) -> None:

 for alias in SQLITE_FUNCTION_ALIAS:
    _add_function_alias(*alias)
-
-
-class ST_DWithin(sa.sql.functions.GenericFunction[Any]):
-    name = 'ST_DWithin'
-    inherit_cache = True
-
-
-@compiles(ST_DWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
-def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
-    geom1, geom2, dist = list(element.clauses)
-    return "(MbrIntersects(%s, ST_Expand(%s, %s)) = 1 AND ST_Distance(%s, %s) <= %s)" % (
-        compiler.process(geom1, **kw), compiler.process(geom2, **kw),
-        compiler.process(dist, **kw),
-        compiler.process(geom1, **kw), compiler.process(geom2, **kw),
-        compiler.process(dist, **kw))
--- a/nominatim/db/sqlalchemy_types/int_array.py
+++ b/nominatim/db/sqlalchemy_types/int_array.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom type for an array of integers.
+"""
+from typing import Any, List, cast, Optional
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+from sqlalchemy.dialects.postgresql import ARRAY
+
+from nominatim.typing import SaDialect, SaColumn
+
+# pylint: disable=all
+
+class IntList(sa.types.TypeDecorator[Any]):
+    """ A list of integers saved as a text of comma-separated numbers.
+    """
+    impl = sa.types.Unicode
+    cache_ok = True
+
+    def process_bind_param(self, value: Optional[Any], dialect: 'sa.Dialect') -> Optional[str]:
+        if value is None:
+            return None
+
+        assert isinstance(value, list)
+        return ','.join(map(str, value))
+
+    def process_result_value(self, value: Optional[Any],
+                             dialect: SaDialect) -> Optional[List[int]]:
+        return [int(v) for v in value.split(',')] if value is not None else None
+
+    def copy(self, **kw: Any) -> 'IntList':
+        return IntList(self.impl.length)
+
+
+class IntArray(sa.types.TypeDecorator[Any]):
+    """ Dialect-independent list of integers.
+    """
+    impl = IntList
+    cache_ok = True
+
+    def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
+        if dialect.name == 'postgresql':
+            return ARRAY(sa.Integer()) #pylint: disable=invalid-name
+
+        return IntList()
+
+
+    class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
+
+        def __add__(self, other: SaColumn) -> 'sa.ColumnOperators':
+            """ Concate the array with the given array. If one of the
+                operants is null, the value of the other will be returned.
+            """
+            return ArrayCat(self.expr, other)
+
+
+        def contains(self, other: SaColumn, **kwargs: Any) -> 'sa.ColumnOperators':
+            """ Return true if the array contains all the value of the argument
+                array.
+            """
+            return ArrayContains(self.expr, other)
+
+
+
+class ArrayAgg(sa.sql.functions.GenericFunction[Any]):
+    """ Aggregate function to collect elements in an array.
+    """
+    type = IntArray()
+    identifier = 'ArrayAgg'
+    name = 'array_agg'
+    inherit_cache = True
+
+
+@compiles(ArrayAgg, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_array_agg(element: ArrayAgg, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "group_concat(%s, ',')" % compiler.process(element.clauses, **kw)
+
+
+
+class ArrayContains(sa.sql.expression.FunctionElement[Any]):
+    """ Function to check if an array is fully contained in another.
+    """
+    name = 'ArrayContains'
+    inherit_cache = True
+
+
+@compiles(ArrayContains) # type: ignore[no-untyped-call, misc]
+def generic_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(%s @> %s)" % (compiler.process(arg1, **kw),
+                           compiler.process(arg2, **kw))
+
+
+@compiles(ArrayContains, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "array_contains(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class ArrayCat(sa.sql.expression.FunctionElement[Any]):
+    """ Function to check if an array is fully contained in another.
+    """
+    type = IntArray()
+    identifier = 'ArrayCat'
+    inherit_cache = True
+
+
+@compiles(ArrayCat) # type: ignore[no-untyped-call, misc]
+def generic_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "array_cat(%s)" % compiler.process(element.clauses, **kw)
+
+
+@compiles(ArrayCat, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(%s || ',' || %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
--- a/nominatim/db/sqlalchemy_types/json.py
+++ b/nominatim/db/sqlalchemy_types/json.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Common json type for different dialects.
+"""
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.dialects.sqlite import JSON as sqlite_json
+
+from nominatim.typing import SaDialect
+
+# pylint: disable=all
+
+class Json(sa.types.TypeDecorator[Any]):
+    """ Dialect-independent type for JSON.
+    """
+    impl = sa.types.JSON
+    cache_ok = True
+
+    def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
+        if dialect.name == 'postgresql':
+            return JSONB(none_as_null=True) # type: ignore[no-untyped-call]
+
+        return sqlite_json(none_as_null=True)
--- a/nominatim/db/sqlalchemy_types/key_value.py
+++ b/nominatim/db/sqlalchemy_types/key_value.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+A custom type that implements a simple key-value store of strings.
+"""
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+from sqlalchemy.dialects.postgresql import HSTORE
+from sqlalchemy.dialects.sqlite import JSON as sqlite_json
+
+from nominatim.typing import SaDialect, SaColumn
+
+# pylint: disable=all
+
+class KeyValueStore(sa.types.TypeDecorator[Any]):
+    """ Dialect-independent type of a simple key-value store of strings.
+    """
+    impl = HSTORE
+    cache_ok = True
+
+    def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
+        if dialect.name == 'postgresql':
+            return HSTORE() # type: ignore[no-untyped-call]
+
+        return sqlite_json(none_as_null=True)
+
+
+    class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
+
+        def merge(self, other: SaColumn) -> 'sa.Operators':
+            """ Merge the values from the given KeyValueStore into this
+                one, overwriting values where necessary. When the argument
+                is null, nothing happens.
+            """
+            return KeyValueConcat(self.expr, other)
+
+
+class KeyValueConcat(sa.sql.expression.FunctionElement[Any]):
+    """ Return the merged key-value store from the input parameters.
+    """
+    type = KeyValueStore()
+    name = 'JsonConcat'
+    inherit_cache = True
+
+@compiles(KeyValueConcat) # type: ignore[no-untyped-call, misc]
+def default_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "(%s || coalesce(%s, ''::hstore))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+@compiles(KeyValueConcat, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "json_patch(%s, coalesce(%s, '{}'))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+
--- a/nominatim/db/sqlite_functions.py
+++ b/nominatim/db/sqlite_functions.py
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom functions for SQLite.
+"""
+from typing import cast, Optional, Set, Any
+import json
+
+# pylint: disable=protected-access
+
+def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
+    """ Custom weight function for search results.
+    """
+    if search_vector is not None:
+        svec = [int(x) for x in search_vector.split(',')]
+        for rank in json.loads(rankings):
+            if all(r in svec for r in rank[1]):
+                return cast(float, rank[0])
+
+    return default
+
+
+class ArrayIntersectFuzzy:
+    """ Compute the array of common elements of all input integer arrays.
+        Very large input paramenters may be ignored to speed up
+        computation. Therefore, the result is a superset of common elements.
+
+        Input and output arrays are given as comma-separated lists.
+    """
+    def __init__(self) -> None:
+        self.first = ''
+        self.values: Optional[Set[int]] = None
+
+    def step(self, value: Optional[str]) -> None:
+        """ Add the next array to the intersection.
+        """
+        if value is not None:
+            if not self.first:
+                self.first = value
+            elif len(value) < 10000000:
+                if self.values is None:
+                    self.values = {int(x) for x in self.first.split(',')}
+                self.values.intersection_update((int(x) for x in value.split(',')))
+
+    def finalize(self) -> str:
+        """ Return the final result.
+        """
+        if self.values is not None:
+            return ','.join(map(str, self.values))
+
+        return self.first
+
+
+class ArrayUnion:
+    """ Compute the set of all elements of the input integer arrays.
+
+        Input and output arrays are given as strings of comma-separated lists.
+    """
+    def __init__(self) -> None:
+        self.values: Optional[Set[str]] = None
+
+    def step(self, value: Optional[str]) -> None:
+        """ Add the next array to the union.
+        """
+        if value is not None:
+            if self.values is None:
+                self.values = set(value.split(','))
+            else:
+                self.values.update(value.split(','))
+
+    def finalize(self) -> str:
+        """ Return the final result.
+        """
+        return '' if self.values is None else ','.join(self.values)
+
+
+def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
+    """ Is the array 'containee' completely contained in array 'container'.
+    """
+    if container is None or containee is None:
+        return None
+
+    vset = container.split(',')
+    return all(v in vset for v in containee.split(','))
+
+
+def array_pair_contains(container1: Optional[str], container2: Optional[str],
+                        containee: Optional[str]) -> Optional[bool]:
+    """ Is the array 'containee' completely contained in the union of
+        array 'container1' and array 'container2'.
+    """
+    if container1 is None or container2 is None or containee is None:
+        return None
+
+    vset = container1.split(',') + container2.split(',')
+    return all(v in vset for v in containee.split(','))
+
+
+def install_custom_functions(conn: Any) -> None:
+    """ Install helper functions for Nominatim into the given SQLite
+        database connection.
+    """
+    conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
+    conn.create_function('array_contains', 2, array_contains, deterministic=True)
+    conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
+    _create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
+    _create_aggregate(conn, 'array_union', 1, ArrayUnion)
+
+
+async def _make_aggregate(aioconn: Any, *args: Any) -> None:
+    await aioconn._execute(aioconn._conn.create_aggregate, *args)
+
+
+def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
+    try:
+        conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
+    except Exception as error: # pylint: disable=broad-exception-caught
+        conn._handle_exception(error)
--- a/nominatim/tools/convert_sqlite.py
+++ b/nominatim/tools/convert_sqlite.py
@@ -14,7 +14,8 @@ from pathlib import Path
 import sqlalchemy as sa

 from nominatim.typing import SaSelect
-from nominatim.db.sqlalchemy_types import Geometry
+from nominatim.db.sqlalchemy_types import Geometry, IntArray
+from nominatim.api.search.query_analyzer_factory import make_query_analyzer
 import nominatim.api as napi

 LOG = logging.getLogger()
@@ -27,11 +28,15 @@ async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:

    try:
        outapi = napi.NominatimAPIAsync(project_dir,
-                                        {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}"})
+                                        {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}",
+                                         'NOMINATIM_DATABASE_RW': '1'})

-        async with api.begin() as src, outapi.begin() as dest:
-            writer = SqliteWriter(src, dest, options)
-            await writer.write()
+        try:
+            async with api.begin() as src, outapi.begin() as dest:
+                writer = SqliteWriter(src, dest, options)
+                await writer.write()
+        finally:
+            await outapi.close()
    finally:
        await api.close()

@@ -51,18 +56,24 @@ class SqliteWriter:
        """ Create the database structure and copy the data from
            the source database to the destination.
        """
+        LOG.warning('Setting up spatialite')
        await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))

        await self.create_tables()
        await self.copy_data()
+        if 'search' in self.options:
+            await self.create_word_table()
        await self.create_indexes()


    async def create_tables(self) -> None:
        """ Set up the database tables.
        """
+        LOG.warning('Setting up tables')
        if 'search' not in self.options:
            self.dest.t.meta.remove(self.dest.t.search_name)
+        else:
+            await self.create_class_tables()

        await self.dest.connection.run_sync(self.dest.t.meta.create_all)

@@ -75,6 +86,41 @@ class SqliteWriter:
                                                      col.type.subtype.upper(), 'XY')))


+    async def create_class_tables(self) -> None:
+        """ Set up the table that serve class/type-specific geometries.
+        """
+        sql = sa.text("""SELECT tablename FROM pg_tables
+                         WHERE tablename LIKE 'place_classtype_%'""")
+        for res in await self.src.execute(sql):
+            for db in (self.src, self.dest):
+                sa.Table(res[0], db.t.meta,
+                         sa.Column('place_id', sa.BigInteger),
+                         sa.Column('centroid', Geometry))
+
+
+    async def create_word_table(self) -> None:
+        """ Create the word table.
+            This table needs the property information to determine the
+            correct format. Therefore needs to be done after all other
+            data has been copied.
+        """
+        await make_query_analyzer(self.src)
+        await make_query_analyzer(self.dest)
+        src = self.src.t.meta.tables['word']
+        dest = self.dest.t.meta.tables['word']
+
+        await self.dest.connection.run_sync(dest.create)
+
+        LOG.warning("Copying word table")
+        async_result = await self.src.connection.stream(sa.select(src))
+
+        async for partition in async_result.partitions(10000):
+            data = [{k: getattr(r, k) for k in r._fields} for r in partition]
+            await self.dest.execute(dest.insert(), data)
+
+        await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
+
+
    async def copy_data(self) -> None:
        """ Copy data for all registered tables.
        """
@@ -87,6 +133,14 @@ class SqliteWriter:
                        for r in partition]
                await self.dest.execute(table.insert(), data)

+        # Set up a minimal copy of pg_tables used to look up the class tables later.
+        pg_tables = sa.Table('pg_tables', self.dest.t.meta,
+                             sa.Column('schemaname', sa.Text, default='public'),
+                             sa.Column('tablename', sa.Text))
+        await self.dest.connection.run_sync(pg_tables.create)
+        data = [{'tablename': t} for t in self.dest.t.meta.tables]
+        await self.dest.execute(pg_tables.insert().values(data))
+

    async def create_indexes(self) -> None:
        """ Add indexes necessary for the frontend.
@@ -116,6 +170,22 @@ class SqliteWriter:
        await self.create_index('placex', 'parent_place_id')
        await self.create_index('placex', 'rank_address')
        await self.create_index('addressline', 'place_id')
+        await self.create_index('postcode', 'place_id')
+        await self.create_index('osmline', 'place_id')
+        await self.create_index('tiger', 'place_id')
+
+        if 'search' in self.options:
+            await self.create_spatial_index('postcode', 'geometry')
+            await self.create_spatial_index('search_name', 'centroid')
+            await self.create_index('search_name', 'place_id')
+            await self.create_index('osmline', 'parent_place_id')
+            await self.create_index('tiger', 'parent_place_id')
+            await self.create_search_index()
+
+            for t in self.dest.t.meta.tables:
+                if t.startswith('place_classtype_'):
+                    await self.dest.execute(sa.select(
+                      sa.func.CreateSpatialIndex(t, 'centroid')))


    async def create_spatial_index(self, table: str, column: str) -> None:
@@ -133,6 +203,36 @@ class SqliteWriter:
            sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)


+    async def create_search_index(self) -> None:
+        """ Create the tables and indexes needed for word lookup.
+        """
+        LOG.warning("Creating reverse search table")
+        rsn = sa.Table('reverse_search_name', self.dest.t.meta,
+                       sa.Column('word', sa.Integer()),
+                       sa.Column('column', sa.Text()),
+                       sa.Column('places', IntArray))
+        await self.dest.connection.run_sync(rsn.create)
+
+        tsrc = self.src.t.search_name
+        for column in ('name_vector', 'nameaddress_vector'):
+            sql = sa.select(sa.func.unnest(getattr(tsrc.c, column)).label('word'),
+                            sa.func.ArrayAgg(tsrc.c.place_id).label('places'))\
+                    .group_by('word')
+
+            async_result = await self.src.connection.stream(sql)
+            async for partition in async_result.partitions(100):
+                data = []
+                for row in partition:
+                    row.places.sort()
+                    data.append({'word': row.word,
+                                 'column': column,
+                                 'places': row.places})
+                await self.dest.execute(rsn.insert(), data)
+
+        await self.dest.connection.run_sync(
+            sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
+
+
    def select_from(self, table: str) -> SaSelect:
        """ Create the SQL statement to select the source columns and rows.
        """
--- a/nominatim/typing.py
+++ b/nominatim/typing.py
@@ -72,3 +72,4 @@ SaLabel: TypeAlias = 'sa.Label[Any]'
 SaFromClause: TypeAlias = 'sa.FromClause'
 SaSelectable: TypeAlias = 'sa.Selectable'
 SaBind: TypeAlias = 'sa.BindParameter[Any]'
+SaDialect: TypeAlias = 'sa.Dialect'
--- a/nominatim/utils/json_writer.py
+++ b/nominatim/utils/json_writer.py
@@ -76,8 +76,8 @@ class JsonWriter:
    def end_array(self) -> 'JsonWriter':
        """ Write the closing bracket of a JSON array.
        """
-        assert self.pending in (',', '[', '')
-        if self.pending == '[':
+        assert self.pending in (',', '[', ']', ')', '')
+        if self.pending not in (',', ''):
            self.data.write(self.pending)
        self.pending = ']'
        return self