split code into submodules

2026-03-06 18:14:16 +00:00 · 2024-05-16 11:55:17 +02:00
parent 0fb4fe8e4d
commit 6e89310a92
137 changed files with 757 additions and 716 deletions
--- a/src/nominatim_db/tools/init.py
+++ b/src/nominatim_db/tools/init.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Module with functions for importing, updating Nominatim databases
+as well as general maintenance helpers.
+"""
--- a/src/nominatim_db/tools/add_osm_data.py
+++ b/src/nominatim_db/tools/add_osm_data.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Function to add additional OSM data from a file or the API into the database.
+"""
+from typing import Any, MutableMapping
+from pathlib import Path
+import logging
+import urllib
+
+from nominatim_core.db.connection import connect
+from nominatim_core.utils.url_utils import get_url
+from .exec_utils import run_osm2pgsql
+
+LOG = logging.getLogger()
+
+def _run_osm2pgsql(dsn: str, options: MutableMapping[str, Any]) -> None:
+    run_osm2pgsql(options)
+
+    # Handle deletions
+    with connect(dsn) as conn:
+        with conn.cursor() as cur:
+            cur.execute('SELECT flush_deleted_places()')
+        conn.commit()
+
+
+def add_data_from_file(dsn: str, fname: str, options: MutableMapping[str, Any]) -> int:
+    """ Adds data from a OSM file to the database. The file may be a normal
+        OSM file or a diff file in all formats supported by libosmium.
+    """
+    options['import_file'] = Path(fname)
+    options['append'] = True
+    _run_osm2pgsql(dsn, options)
+
+    # No status update. We don't know where the file came from.
+    return 0
+
+
+def add_osm_object(dsn: str, osm_type: str, osm_id: int, use_main_api: bool,
+                   options: MutableMapping[str, Any]) -> int:
+    """ Add or update a single OSM object from the latest version of the
+        API.
+    """
+    if use_main_api:
+        base_url = f'https://www.openstreetmap.org/api/0.6/{osm_type}/{osm_id}'
+        if osm_type in ('way', 'relation'):
+            base_url += '/full'
+    else:
+        # use Overpass API
+        if osm_type == 'node':
+            data = f'node({osm_id});out meta;'
+        elif osm_type == 'way':
+            data = f'(way({osm_id});>;);out meta;'
+        else:
+            data = f'(rel(id:{osm_id});>;);out meta;'
+        base_url = 'https://overpass-api.de/api/interpreter?' \
+                   + urllib.parse.urlencode({'data': data})
+
+    options['append'] = True
+    options['import_data'] = get_url(base_url).encode('utf-8')
+
+    _run_osm2pgsql(dsn, options)
+
+    return 0
--- a/src/nominatim_db/tools/admin.py
+++ b/src/nominatim_db/tools/admin.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for database analysis and maintenance.
+"""
+from typing import Optional, Tuple, Any, cast
+import logging
+
+from psycopg2.extras import Json, register_hstore
+from psycopg2 import DataError
+
+from nominatim_core.typing import DictCursorResult
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect, Cursor
+from nominatim_core.errors import UsageError
+from ..tokenizer import factory as tokenizer_factory
+from ..data.place_info import PlaceInfo
+
+LOG = logging.getLogger()
+
+def _get_place_info(cursor: Cursor, osm_id: Optional[str],
+                    place_id: Optional[int]) -> DictCursorResult:
+    sql = """SELECT place_id, extra.*
+             FROM placex, LATERAL placex_indexing_prepare(placex) as extra
+          """
+
+    values: Tuple[Any, ...]
+    if osm_id:
+        osm_type = osm_id[0].upper()
+        if osm_type not in 'NWR' or not osm_id[1:].isdigit():
+            LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
+            raise UsageError("OSM ID parameter badly formatted")
+
+        sql += ' WHERE placex.osm_type = %s AND placex.osm_id = %s'
+        values = (osm_type, int(osm_id[1:]))
+    elif place_id is not None:
+        sql += ' WHERE placex.place_id = %s'
+        values = (place_id, )
+    else:
+        LOG.fatal("No OSM object given to index.")
+        raise UsageError("OSM object not found")
+
+    cursor.execute(sql + ' LIMIT 1', values)
+
+    if cursor.rowcount < 1:
+        LOG.fatal("OSM object %s not found in database.", osm_id)
+        raise UsageError("OSM object not found")
+
+    return cast(DictCursorResult, cursor.fetchone())
+
+
+def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
+                     place_id: Optional[int] = None) -> None:
+    """ Analyse indexing of a single Nominatim object.
+    """
+    with connect(config.get_libpq_dsn()) as conn:
+        register_hstore(conn)
+        with conn.cursor() as cur:
+            place = _get_place_info(cur, osm_id, place_id)
+
+            cur.execute("update placex set indexed_status = 2 where place_id = %s",
+                        (place['place_id'], ))
+
+            cur.execute("""SET auto_explain.log_min_duration = '0';
+                           SET auto_explain.log_analyze = 'true';
+                           SET auto_explain.log_nested_statements = 'true';
+                           LOAD 'auto_explain';
+                           SET client_min_messages = LOG;
+                           SET log_min_messages = FATAL""")
+
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+
+            with tokenizer.name_analyzer() as analyzer:
+                cur.execute("""UPDATE placex
+                               SET indexed_status = 0, address = %s, token_info = %s,
+                               name = %s, linked_place_id = %s
+                               WHERE place_id = %s""",
+                            (place['address'],
+                             Json(analyzer.process_place(PlaceInfo(place))),
+                             place['name'], place['linked_place_id'], place['place_id']))
+
+        # we do not want to keep the results
+        conn.rollback()
+
+        for msg in conn.notices:
+            print(msg)
+
+
+def clean_deleted_relations(config: Configuration, age: str) -> None:
+    """ Clean deleted relations older than a given age
+    """
+    with connect(config.get_libpq_dsn()) as conn:
+        with conn.cursor() as cur:
+            try:
+                cur.execute("""SELECT place_force_delete(p.place_id)
+                            FROM import_polygon_delete d, placex p
+                            WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
+                            AND age(p.indexed_date) > %s::interval""",
+                            (age, ))
+            except DataError as exc:
+                raise UsageError('Invalid PostgreSQL time interval format') from exc
+        conn.commit()
--- a/src/nominatim_db/tools/check_database.py
+++ b/src/nominatim_db/tools/check_database.py
@@ -0,0 +1,350 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Collection of functions that check if the database is complete and functional.
+"""
+from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
+from enum import Enum
+from textwrap import dedent
+
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect, Connection
+from nominatim_core.db import properties
+from nominatim_core.errors import UsageError
+from ..tokenizer import factory as tokenizer_factory
+from . import freeze
+from ..version import NOMINATIM_VERSION, parse_version
+
+CHECKLIST = []
+
+class CheckState(Enum):
+    """ Possible states of a check. FATAL stops check execution entirely.
+    """
+    OK = 0
+    FAIL = 1
+    FATAL = 2
+    NOT_APPLICABLE = 3
+    WARN = 4
+
+CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
+CheckFunc = Callable[[Connection, Configuration], CheckResult]
+
+def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
+    """ Decorator for checks. It adds the function to the list of
+        checks to execute and adds the code for printing progress messages.
+    """
+    def decorator(func: CheckFunc) -> CheckFunc:
+        title = (func.__doc__ or '').split('\n', 1)[0].strip()
+
+        def run_check(conn: Connection, config: Configuration) -> CheckState:
+            print(title, end=' ... ')
+            ret = func(conn, config)
+            if isinstance(ret, tuple):
+                ret, params = ret
+            else:
+                params = {}
+            if ret == CheckState.OK:
+                print('\033[92mOK\033[0m')
+            elif ret == CheckState.WARN:
+                print('\033[93mWARNING\033[0m')
+                if hint:
+                    print('')
+                    print(dedent(hint.format(**params)))
+            elif ret == CheckState.NOT_APPLICABLE:
+                print('not applicable')
+            else:
+                print('\x1B[31mFailed\033[0m')
+                if hint:
+                    print(dedent(hint.format(**params)))
+            return ret
+
+        CHECKLIST.append(run_check)
+        return run_check
+
+    return decorator
+
+class _BadConnection:
+
+    def __init__(self, msg: str) -> None:
+        self.msg = msg
+
+    def close(self) -> None:
+        """ Dummy function to provide the implementation.
+        """
+
+def check_database(config: Configuration) -> int:
+    """ Run a number of checks on the database and return the status.
+    """
+    try:
+        conn = connect(config.get_libpq_dsn()).connection
+    except UsageError as err:
+        conn = _BadConnection(str(err)) # type: ignore[assignment]
+
+    overall_result = 0
+    for check in CHECKLIST:
+        ret = check(conn, config)
+        if ret == CheckState.FATAL:
+            conn.close()
+            return 1
+        if ret in (CheckState.FATAL, CheckState.FAIL):
+            overall_result = 1
+
+    conn.close()
+    return overall_result
+
+
+def _get_indexes(conn: Connection) -> List[str]:
+    indexes = ['idx_place_addressline_address_place_id',
+               'idx_placex_rank_search',
+               'idx_placex_rank_address',
+               'idx_placex_parent_place_id',
+               'idx_placex_geometry_reverse_lookuppolygon',
+               'idx_placex_geometry_placenode',
+               'idx_osmline_parent_place_id',
+               'idx_osmline_parent_osm_id',
+               'idx_postcode_id',
+               'idx_postcode_postcode'
+              ]
+    if conn.table_exists('search_name'):
+        indexes.extend(('idx_search_name_nameaddress_vector',
+                        'idx_search_name_name_vector',
+                        'idx_search_name_centroid'))
+        if conn.server_version_tuple() >= (11, 0, 0):
+            indexes.extend(('idx_placex_housenumber',
+                            'idx_osmline_parent_osm_id_with_hnr'))
+    if conn.table_exists('place'):
+        indexes.extend(('idx_location_area_country_place_id',
+                        'idx_place_osm_unique',
+                        'idx_placex_rank_address_sector',
+                        'idx_placex_rank_boundaries_sector'))
+
+    return indexes
+
+
+# CHECK FUNCTIONS
+#
+# Functions are executed in the order they appear here.
+
+@_check(hint="""\
+             {error}
+
+             Hints:
+             * Is the database server started?
+             * Check the NOMINATIM_DATABASE_DSN variable in your local .env
+             * Try connecting to the database with the same settings
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_connection(conn: Any, config: Configuration) -> CheckResult:
+    """ Checking database connection
+    """
+    if isinstance(conn, _BadConnection):
+        return CheckState.FATAL, dict(error=conn.msg, config=config)
+
+    return CheckState.OK
+
+@_check(hint="""\
+             Database version ({db_version}) doesn't match Nominatim version ({nom_version})
+
+             Hints:
+             * Are you connecting to the correct database?
+             
+             {instruction}
+
+             Check the Migration chapter of the Administration Guide.
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
+    """ Checking database_version matches Nominatim software version
+    """
+
+    if conn.table_exists('nominatim_properties'):
+        db_version_str = properties.get_property(conn, 'database_version')
+    else:
+        db_version_str = None
+
+    if db_version_str is not None:
+        db_version = parse_version(db_version_str)
+
+        if db_version == NOMINATIM_VERSION:
+            return CheckState.OK
+
+        instruction = (
+            'Run migrations: nominatim admin --migrate'
+            if db_version < NOMINATIM_VERSION
+            else 'You need to upgrade the Nominatim software.'
+        )
+    else:
+        instruction = ''
+
+    return CheckState.FATAL, dict(db_version=db_version_str,
+                                  nom_version=NOMINATIM_VERSION,
+                                  instruction=instruction,
+                                  config=config)
+
+@_check(hint="""\
+             placex table not found
+
+             Hints:
+             * Are you connecting to the correct database?
+             * Did the import process finish without errors?
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
+    """ Checking for placex table
+    """
+    if conn.table_exists('placex'):
+        return CheckState.OK
+
+    return CheckState.FATAL, dict(config=config)
+
+
+@_check(hint="""placex table has no data. Did the import finish successfully?""")
+def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking for placex content
+    """
+    with conn.cursor() as cur:
+        cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
+
+    return CheckState.OK if cnt > 0 else CheckState.FATAL
+
+
+@_check(hint="""{msg}""")
+def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
+    """ Checking that tokenizer works
+    """
+    try:
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+    except UsageError:
+        return CheckState.FAIL, dict(msg="""\
+            Cannot load tokenizer. Did the import finish successfully?""")
+
+    result = tokenizer.check_database(config)
+
+    if result is None:
+        return CheckState.OK
+
+    return CheckState.FAIL, dict(msg=result)
+
+
+@_check(hint="""\
+             Wikipedia/Wikidata importance tables missing.
+             Quality of search results may be degraded. Reverse geocoding is unaffected.
+             See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
+             """)
+def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking for wikipedia/wikidata data
+    """
+    if not conn.table_exists('search_name') or not conn.table_exists('place'):
+        return CheckState.NOT_APPLICABLE
+
+    with conn.cursor() as cur:
+        if conn.table_exists('wikimedia_importance'):
+            cnt = cur.scalar('SELECT count(*) FROM wikimedia_importance')
+        else:
+            cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
+
+        return CheckState.WARN if cnt == 0 else CheckState.OK
+
+
+@_check(hint="""\
+             The indexing didn't finish. {count} entries are not yet indexed.
+
+             To index the remaining entries, run:   {index_cmd}
+             """)
+def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking indexing status
+    """
+    with conn.cursor() as cur:
+        cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
+
+    if cnt == 0:
+        return CheckState.OK
+
+    if freeze.is_frozen(conn):
+        index_cmd="""\
+            Database is marked frozen, it cannot be updated.
+            Low counts of unindexed places are fine."""
+        return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
+
+    if conn.index_exists('idx_placex_rank_search'):
+        # Likely just an interrupted update.
+        index_cmd = 'nominatim index'
+    else:
+        # Looks like the import process got interrupted.
+        index_cmd = 'nominatim import --continue indexing'
+
+    return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
+
+
+@_check(hint="""\
+             The following indexes are missing:
+               {indexes}
+
+             Rerun the index creation with:   nominatim import --continue db-postprocess
+             """)
+def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking that database indexes are complete
+    """
+    missing = []
+    for index in _get_indexes(conn):
+        if not conn.index_exists(index):
+            missing.append(index)
+
+    if missing:
+        return CheckState.FAIL, dict(indexes='\n  '.join(missing))
+
+    return CheckState.OK
+
+
+@_check(hint="""\
+             At least one index is invalid. That can happen, e.g. when index creation was
+             disrupted and later restarted. You should delete the affected indices
+             and recreate them.
+
+             Invalid indexes:
+               {indexes}
+             """)
+def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking that all database indexes are valid
+    """
+    with conn.cursor() as cur:
+        cur.execute(""" SELECT relname FROM pg_class, pg_index
+                        WHERE pg_index.indisvalid = false
+                        AND pg_index.indexrelid = pg_class.oid""")
+
+        broken = [c[0] for c in cur]
+
+    if broken:
+        return CheckState.FAIL, dict(indexes='\n  '.join(broken))
+
+    return CheckState.OK
+
+
+@_check(hint="""\
+             {error}
+             Run TIGER import again:   nominatim add-data --tiger-data <DIR>
+             """)
+def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
+    """ Checking TIGER external data table.
+    """
+    if not config.get_bool('USE_US_TIGER_DATA'):
+        return CheckState.NOT_APPLICABLE
+
+    if not conn.table_exists('location_property_tiger'):
+        return CheckState.FAIL, dict(error='TIGER data table not found.')
+
+    with conn.cursor() as cur:
+        if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
+            return CheckState.FAIL, dict(error='TIGER data table is empty.')
+
+    return CheckState.OK
--- a/src/nominatim_db/tools/collect_os_info.py
+++ b/src/nominatim_db/tools/collect_os_info.py
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Collection of host system information including software versions, memory,
+storage, and database configuration.
+"""
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple, Union
+
+import psutil
+from psycopg2.extensions import make_dsn, parse_dsn
+
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect
+from ..version import NOMINATIM_VERSION
+
+
+def convert_version(ver_tup: Tuple[int, int]) -> str:
+    """converts tuple version (ver_tup) to a string representation"""
+    return ".".join(map(str, ver_tup))
+
+
+def friendly_memory_string(mem: float) -> str:
+    """Create a user friendly string for the amount of memory specified as mem"""
+    mem_magnitude = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+    mag = 0
+    # determine order of magnitude
+    while mem > 1000:
+        mem /= 1000
+        mag += 1
+
+    return f"{mem:.1f} {mem_magnitude[mag]}"
+
+
+def run_command(cmd: Union[str, List[str]]) -> str:
+    """Runs a command using the shell and returns the output from stdout"""
+    try:
+        if sys.version_info < (3, 7):
+            cap_out = subprocess.run(cmd, stdout=subprocess.PIPE, check=False)
+        else:
+            cap_out = subprocess.run(cmd, capture_output=True, check=False)
+        return cap_out.stdout.decode("utf-8")
+    except FileNotFoundError:
+        # non-Linux system should end up here
+        return f"Unknown (unable to find the '{cmd}' command)"
+
+
+def os_name_info() -> str:
+    """Obtain Operating System Name (and possibly the version)"""
+    os_info = None
+    # man page os-release(5) details meaning of the fields
+    if Path("/etc/os-release").is_file():
+        os_info = from_file_find_line_portion(
+            "/etc/os-release", "PRETTY_NAME", "=")
+    # alternative location
+    elif Path("/usr/lib/os-release").is_file():
+        os_info = from_file_find_line_portion(
+            "/usr/lib/os-release", "PRETTY_NAME", "="
+        )
+
+    # fallback on Python's os name
+    if os_info is None or os_info == "":
+        os_info = os.name
+
+    # if the above is insufficient, take a look at neofetch's approach to OS detection
+    return os_info
+
+
+# Note: Intended to be used on informational files like /proc
+def from_file_find_line_portion(
+    filename: str, start: str, sep: str, fieldnum: int = 1
+) -> Optional[str]:
+    """open filename, finds the line starting with the 'start' string.
+    Splits the line using separator and returns a "fieldnum" from the split."""
+    with open(filename, encoding='utf8') as file:
+        result = ""
+        for line in file:
+            if line.startswith(start):
+                result = line.split(sep)[fieldnum].strip()
+        return result
+
+
+def get_postgresql_config(version: int) -> str:
+    """Retrieve postgres configuration file"""
+    try:
+        with open(f"/etc/postgresql/{version}/main/postgresql.conf", encoding='utf8') as file:
+            db_config = file.read()
+            file.close()
+            return db_config
+    except IOError:
+        return f"**Could not read '/etc/postgresql/{version}/main/postgresql.conf'**"
+
+
+def report_system_information(config: Configuration) -> None:
+    """Generate a report about the host system including software versions, memory,
+    storage, and database configuration."""
+
+    with connect(make_dsn(config.get_libpq_dsn(), dbname='postgres')) as conn:
+        postgresql_ver: str = convert_version(conn.server_version_tuple())
+
+        with conn.cursor() as cur:
+            num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s",
+                             (parse_dsn(config.get_libpq_dsn())['dbname'], ))
+            nominatim_db_exists = num == 1 if isinstance(num, int) else False
+
+    if nominatim_db_exists:
+        with connect(config.get_libpq_dsn()) as conn:
+            postgis_ver: str = convert_version(conn.postgis_version_tuple())
+    else:
+        postgis_ver = "Unable to connect to database"
+
+    postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
+
+    # Note: psutil.disk_partitions() is similar to run_command("lsblk")
+
+    # Note: run_command("systemd-detect-virt") only works on Linux, on other OSes
+    # should give a message: "Unknown (unable to find the 'systemd-detect-virt' command)"
+
+    # Generates the Markdown report.
+
+    report = f"""
+    **Instructions**
+    Use this information in your issue report at https://github.com/osm-search/Nominatim/issues
+    Redirect the output to a file:
+    $ ./collect_os_info.py > report.md
+
+
+    **Software Environment:**
+    - Python version: {sys.version}
+    - Nominatim version: {NOMINATIM_VERSION!s}
+    - PostgreSQL version: {postgresql_ver}
+    - PostGIS version: {postgis_ver}
+    - OS: {os_name_info()}
+    
+    
+    **Hardware Configuration:**
+    - RAM: {friendly_memory_string(psutil.virtual_memory().total)}
+    - number of CPUs: {psutil.cpu_count(logical=False)}
+    - bare metal/AWS/other cloud service (per systemd-detect-virt(1)): {run_command("systemd-detect-virt")} 
+    - type and size of disks:
+    **`df -h` - df - report file system disk space usage: **
+    ```
+    {run_command(["df", "-h"])}
+    ```
+    
+    **lsblk - list block devices: **
+    ```
+    {run_command("lsblk")}
+    ```
+    
+    
+    **Postgresql Configuration:**
+    ```
+    {postgresql_config}
+    ```
+    **Notes**
+    Please add any notes about anything above anything above that is incorrect.
+"""
+    print(report)
--- a/src/nominatim_db/tools/convert_sqlite.py
+++ b/src/nominatim_db/tools/convert_sqlite.py
@@ -0,0 +1,265 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Exporting a Nominatim database to SQlite.
+"""
+from typing import Set, Any
+import datetime as dt
+import logging
+from pathlib import Path
+
+import sqlalchemy as sa
+
+import nominatim_api as napi
+from nominatim_api.search.query_analyzer_factory import make_query_analyzer
+from nominatim_core.typing import SaSelect, SaRow
+from nominatim_core.db.sqlalchemy_types import Geometry, IntArray
+
+LOG = logging.getLogger()
+
+async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:
+    """ Export an existing database to sqlite. The resulting database
+        will be usable against the Python frontend of Nominatim.
+    """
+    api = napi.NominatimAPIAsync(project_dir)
+
+    try:
+        outapi = napi.NominatimAPIAsync(project_dir,
+                                        {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}",
+                                         'NOMINATIM_DATABASE_RW': '1'})
+
+        try:
+            async with api.begin() as src, outapi.begin() as dest:
+                writer = SqliteWriter(src, dest, options)
+                await writer.write()
+        finally:
+            await outapi.close()
+    finally:
+        await api.close()
+
+
+class SqliteWriter:
+    """ Worker class which creates a new SQLite database.
+    """
+
+    def __init__(self, src: napi.SearchConnection,
+                 dest: napi.SearchConnection, options: Set[str]) -> None:
+        self.src = src
+        self.dest = dest
+        self.options = options
+
+
+    async def write(self) -> None:
+        """ Create the database structure and copy the data from
+            the source database to the destination.
+        """
+        LOG.warning('Setting up spatialite')
+        await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))
+
+        await self.create_tables()
+        await self.copy_data()
+        if 'search' in self.options:
+            await self.create_word_table()
+        await self.create_indexes()
+
+
+    async def create_tables(self) -> None:
+        """ Set up the database tables.
+        """
+        LOG.warning('Setting up tables')
+        if 'search' not in self.options:
+            self.dest.t.meta.remove(self.dest.t.search_name)
+        else:
+            await self.create_class_tables()
+
+        await self.dest.connection.run_sync(self.dest.t.meta.create_all)
+
+        # Convert all Geometry columns to Spatialite geometries
+        for table in self.dest.t.meta.sorted_tables:
+            for col in table.c:
+                if isinstance(col.type, Geometry):
+                    await self.dest.execute(sa.select(
+                        sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
+                                                      col.type.subtype.upper(), 'XY')))
+
+
+    async def create_class_tables(self) -> None:
+        """ Set up the table that serve class/type-specific geometries.
+        """
+        sql = sa.text("""SELECT tablename FROM pg_tables
+                         WHERE tablename LIKE 'place_classtype_%'""")
+        for res in await self.src.execute(sql):
+            for db in (self.src, self.dest):
+                sa.Table(res[0], db.t.meta,
+                         sa.Column('place_id', sa.BigInteger),
+                         sa.Column('centroid', Geometry))
+
+
+    async def create_word_table(self) -> None:
+        """ Create the word table.
+            This table needs the property information to determine the
+            correct format. Therefore needs to be done after all other
+            data has been copied.
+        """
+        await make_query_analyzer(self.src)
+        await make_query_analyzer(self.dest)
+        src = self.src.t.meta.tables['word']
+        dest = self.dest.t.meta.tables['word']
+
+        await self.dest.connection.run_sync(dest.create)
+
+        LOG.warning("Copying word table")
+        async_result = await self.src.connection.stream(sa.select(src))
+
+        async for partition in async_result.partitions(10000):
+            data = [{k: getattr(r, k) for k in r._fields} for r in partition]
+            await self.dest.execute(dest.insert(), data)
+
+        await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
+
+
+    async def copy_data(self) -> None:
+        """ Copy data for all registered tables.
+        """
+        def _getfield(row: SaRow, key: str) -> Any:
+            value = getattr(row, key)
+            if isinstance(value, dt.datetime):
+                if value.tzinfo is not None:
+                    value = value.astimezone(dt.timezone.utc)
+            return value
+
+        for table in self.dest.t.meta.sorted_tables:
+            LOG.warning("Copying '%s'", table.name)
+            async_result = await self.src.connection.stream(self.select_from(table.name))
+
+            async for partition in async_result.partitions(10000):
+                data = [{('class_' if k == 'class' else k): _getfield(r, k)
+                         for k in r._fields}
+                        for r in partition]
+                await self.dest.execute(table.insert(), data)
+
+        # Set up a minimal copy of pg_tables used to look up the class tables later.
+        pg_tables = sa.Table('pg_tables', self.dest.t.meta,
+                             sa.Column('schemaname', sa.Text, default='public'),
+                             sa.Column('tablename', sa.Text))
+        await self.dest.connection.run_sync(pg_tables.create)
+        data = [{'tablename': t} for t in self.dest.t.meta.tables]
+        await self.dest.execute(pg_tables.insert().values(data))
+
+
+    async def create_indexes(self) -> None:
+        """ Add indexes necessary for the frontend.
+        """
+        # reverse place node lookup needs an extra table to simulate a
+        # partial index with adaptive buffering.
+        await self.dest.execute(sa.text(
+            """ CREATE TABLE placex_place_node_areas AS
+                  SELECT place_id, ST_Expand(geometry,
+                                             14.0 * exp(-0.2 * rank_search) - 0.03) as geometry
+                  FROM placex
+                  WHERE rank_address between 5 and 25
+                        and osm_type = 'N'
+                        and linked_place_id is NULL """))
+        await self.dest.execute(sa.select(
+            sa.func.RecoverGeometryColumn('placex_place_node_areas', 'geometry',
+                                          4326, 'GEOMETRY', 'XY')))
+        await self.dest.execute(sa.select(sa.func.CreateSpatialIndex(
+                                             'placex_place_node_areas', 'geometry')))
+
+        # Remaining indexes.
+        await self.create_spatial_index('country_grid', 'geometry')
+        await self.create_spatial_index('placex', 'geometry')
+        await self.create_spatial_index('osmline', 'linegeo')
+        await self.create_spatial_index('tiger', 'linegeo')
+        await self.create_index('placex', 'place_id')
+        await self.create_index('placex', 'parent_place_id')
+        await self.create_index('placex', 'rank_address')
+        await self.create_index('addressline', 'place_id')
+        await self.create_index('postcode', 'place_id')
+        await self.create_index('osmline', 'place_id')
+        await self.create_index('tiger', 'place_id')
+
+        if 'search' in self.options:
+            await self.create_spatial_index('postcode', 'geometry')
+            await self.create_spatial_index('search_name', 'centroid')
+            await self.create_index('search_name', 'place_id')
+            await self.create_index('osmline', 'parent_place_id')
+            await self.create_index('tiger', 'parent_place_id')
+            await self.create_search_index()
+
+            for t in self.dest.t.meta.tables:
+                if t.startswith('place_classtype_'):
+                    await self.dest.execute(sa.select(
+                      sa.func.CreateSpatialIndex(t, 'centroid')))
+
+
+    async def create_spatial_index(self, table: str, column: str) -> None:
+        """ Create a spatial index on the given table and column.
+        """
+        await self.dest.execute(sa.select(
+                  sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
+
+
+    async def create_index(self, table_name: str, column: str) -> None:
+        """ Create a simple index on the given table and column.
+        """
+        table = getattr(self.dest.t, table_name)
+        await self.dest.connection.run_sync(
+            sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
+
+
+    async def create_search_index(self) -> None:
+        """ Create the tables and indexes needed for word lookup.
+        """
+        LOG.warning("Creating reverse search table")
+        rsn = sa.Table('reverse_search_name', self.dest.t.meta,
+                       sa.Column('word', sa.Integer()),
+                       sa.Column('column', sa.Text()),
+                       sa.Column('places', IntArray))
+        await self.dest.connection.run_sync(rsn.create)
+
+        tsrc = self.src.t.search_name
+        for column in ('name_vector', 'nameaddress_vector'):
+            sql = sa.select(sa.func.unnest(getattr(tsrc.c, column)).label('word'),
+                            sa.func.ArrayAgg(tsrc.c.place_id).label('places'))\
+                    .group_by('word')
+
+            async_result = await self.src.connection.stream(sql)
+            async for partition in async_result.partitions(100):
+                data = []
+                for row in partition:
+                    row.places.sort()
+                    data.append({'word': row.word,
+                                 'column': column,
+                                 'places': row.places})
+                await self.dest.execute(rsn.insert(), data)
+
+        await self.dest.connection.run_sync(
+            sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
+
+
+    def select_from(self, table: str) -> SaSelect:
+        """ Create the SQL statement to select the source columns and rows.
+        """
+        columns = self.src.t.meta.tables[table].c
+
+        if table == 'placex':
+            # SQLite struggles with Geometries that are larger than 5MB,
+            # so simplify those.
+            return sa.select(*(c for c in columns if not isinstance(c.type, Geometry)),
+                             sa.func.ST_AsText(columns.centroid).label('centroid'),
+                             sa.func.ST_AsText(
+                               sa.case((sa.func.ST_MemSize(columns.geometry) < 5000000,
+                                        columns.geometry),
+                                       else_=sa.func.ST_SimplifyPreserveTopology(
+                                                columns.geometry, 0.0001)
+                                )).label('geometry'))
+
+        sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
+                             if isinstance(c.type, Geometry) else c for c in columns))
+
+        return sql
--- a/src/nominatim_db/tools/database_import.py
+++ b/src/nominatim_db/tools/database_import.py
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for setting up and importing a new Nominatim database.
+"""
+from typing import Tuple, Optional, Union, Sequence, MutableMapping, Any
+import logging
+import os
+import selectors
+import subprocess
+from pathlib import Path
+
+import psutil
+from psycopg2 import sql as pysql
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect, get_pg_env, Connection
+from nominatim_core.db.async_connection import DBConnection
+from nominatim_core.db.sql_preprocessor import SQLPreprocessor
+from .exec_utils import run_osm2pgsql
+from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+
+LOG = logging.getLogger()
+
+def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int, int]) -> None:
+    """ Compares the version for the given module and raises an exception
+        if the actual version is too old.
+    """
+    if actual < expected:
+        LOG.fatal('Minimum supported version of %s is %d.%d. '
+                  'Found version %d.%d.',
+                  module, expected[0], expected[1], actual[0], actual[1])
+        raise UsageError(f'{module} is too old.')
+
+
+def _require_loaded(extension_name: str, conn: Connection) -> None:
+    """ Check that the given extension is loaded. """
+    if not conn.extension_loaded(extension_name):
+        LOG.fatal('Required module %s is not loaded.', extension_name)
+        raise UsageError(f'{extension_name} is not loaded.')
+
+
+def check_existing_database_plugins(dsn: str) -> None:
+    """ Check that the database has the required plugins installed."""
+    with connect(dsn) as conn:
+        _require_version('PostgreSQL server',
+                         conn.server_version_tuple(),
+                         POSTGRESQL_REQUIRED_VERSION)
+        _require_version('PostGIS',
+                         conn.postgis_version_tuple(),
+                         POSTGIS_REQUIRED_VERSION)
+        _require_loaded('hstore', conn)
+
+
+def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
+    """ Create a new database for Nominatim and populate it with the
+        essential extensions.
+
+        The function fails when the database already exists or Postgresql or
+        PostGIS versions are too old.
+
+        Uses `createdb` to create the database.
+
+        If 'rouser' is given, then the function also checks that the user
+        with that given name exists.
+
+        Requires superuser rights by the caller.
+    """
+    proc = subprocess.run(['createdb'], env=get_pg_env(dsn), check=False)
+
+    if proc.returncode != 0:
+        raise UsageError('Creating new database failed.')
+
+    with connect(dsn) as conn:
+        _require_version('PostgreSQL server',
+                         conn.server_version_tuple(),
+                         POSTGRESQL_REQUIRED_VERSION)
+
+        if rouser is not None:
+            with conn.cursor() as cur:
+                cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
+                                 (rouser, ))
+                if cnt == 0:
+                    LOG.fatal("Web user '%s' does not exist. Create it with:\n"
+                              "\n      createuser %s", rouser, rouser)
+                    raise UsageError('Missing read-only user.')
+
+        # Create extensions.
+        with conn.cursor() as cur:
+            cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
+            cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
+
+            postgis_version = conn.postgis_version_tuple()
+            if postgis_version[0] >= 3:
+                cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
+
+        conn.commit()
+
+        _require_version('PostGIS',
+                         conn.postgis_version_tuple(),
+                         POSTGIS_REQUIRED_VERSION)
+
+
+def import_osm_data(osm_files: Union[Path, Sequence[Path]],
+                    options: MutableMapping[str, Any],
+                    drop: bool = False, ignore_errors: bool = False) -> None:
+    """ Import the given OSM files. 'options' contains the list of
+        default settings for osm2pgsql.
+    """
+    options['import_file'] = osm_files
+    options['append'] = False
+    options['threads'] = 1
+
+    if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
+        # Make some educated guesses about cache size based on the size
+        # of the import file and the available memory.
+        mem = psutil.virtual_memory()
+        fsize = 0
+        if isinstance(osm_files, list):
+            for fname in osm_files:
+                fsize += os.stat(str(fname)).st_size
+        else:
+            fsize = os.stat(str(osm_files)).st_size
+        options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
+                                             fsize * 2) / 1024 / 1024) + 1
+
+    run_osm2pgsql(options)
+
+    with connect(options['dsn']) as conn:
+        if not ignore_errors:
+            with conn.cursor() as cur:
+                cur.execute('SELECT * FROM place LIMIT 1')
+                if cur.rowcount == 0:
+                    raise UsageError('No data imported by osm2pgsql.')
+
+        if drop:
+            conn.drop_table('planet_osm_nodes')
+
+    if drop and options['flatnode_file']:
+        Path(options['flatnode_file']).unlink()
+
+
+def create_tables(conn: Connection, config: Configuration, reverse_only: bool = False) -> None:
+    """ Create the set of basic tables.
+        When `reverse_only` is True, then the main table for searching will
+        be skipped and only reverse search is possible.
+    """
+    sql = SQLPreprocessor(conn, config)
+    sql.env.globals['db']['reverse_only'] = reverse_only
+
+    sql.run_sql_file(conn, 'tables.sql')
+
+
+def create_table_triggers(conn: Connection, config: Configuration) -> None:
+    """ Create the triggers for the tables. The trigger functions must already
+        have been imported with refresh.create_functions().
+    """
+    sql = SQLPreprocessor(conn, config)
+    sql.run_sql_file(conn, 'table-triggers.sql')
+
+
+def create_partition_tables(conn: Connection, config: Configuration) -> None:
+    """ Create tables that have explicit partitioning.
+    """
+    sql = SQLPreprocessor(conn, config)
+    sql.run_sql_file(conn, 'partition-tables.src.sql')
+
+
+def truncate_data_tables(conn: Connection) -> None:
+    """ Truncate all data tables to prepare for a fresh load.
+    """
+    with conn.cursor() as cur:
+        cur.execute('TRUNCATE placex')
+        cur.execute('TRUNCATE place_addressline')
+        cur.execute('TRUNCATE location_area')
+        cur.execute('TRUNCATE location_area_country')
+        cur.execute('TRUNCATE location_property_tiger')
+        cur.execute('TRUNCATE location_property_osmline')
+        cur.execute('TRUNCATE location_postcode')
+        if conn.table_exists('search_name'):
+            cur.execute('TRUNCATE search_name')
+        cur.execute('DROP SEQUENCE IF EXISTS seq_place')
+        cur.execute('CREATE SEQUENCE seq_place start 100000')
+
+        cur.execute("""SELECT tablename FROM pg_tables
+                       WHERE tablename LIKE 'location_road_%'""")
+
+        for table in [r[0] for r in list(cur)]:
+            cur.execute('TRUNCATE ' + table)
+
+    conn.commit()
+
+
+_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
+                                        ('osm_type', 'osm_id', 'class', 'type',
+                                         'name', 'admin_level', 'address',
+                                         'extratags', 'geometry')))
+
+
+def load_data(dsn: str, threads: int) -> None:
+    """ Copy data into the word and placex table.
+    """
+    sel = selectors.DefaultSelector()
+    # Then copy data from place to placex in <threads - 1> chunks.
+    place_threads = max(1, threads - 1)
+    for imod in range(place_threads):
+        conn = DBConnection(dsn)
+        conn.connect()
+        conn.perform(
+            pysql.SQL("""INSERT INTO placex ({columns})
+                           SELECT {columns} FROM place
+                           WHERE osm_id % {total} = {mod}
+                             AND NOT (class='place' and (type='houses' or type='postcode'))
+                             AND ST_IsValid(geometry)
+                      """).format(columns=_COPY_COLUMNS,
+                                  total=pysql.Literal(place_threads),
+                                  mod=pysql.Literal(imod)))
+        sel.register(conn, selectors.EVENT_READ, conn)
+
+    # Address interpolations go into another table.
+    conn = DBConnection(dsn)
+    conn.connect()
+    conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
+                      SELECT osm_id, address, geometry FROM place
+                      WHERE class='place' and type='houses' and osm_type='W'
+                            and ST_GeometryType(geometry) = 'ST_LineString'
+                 """)
+    sel.register(conn, selectors.EVENT_READ, conn)
+
+    # Now wait for all of them to finish.
+    todo = place_threads + 1
+    while todo > 0:
+        for key, _ in sel.select(1):
+            conn = key.data
+            sel.unregister(conn)
+            conn.wait()
+            conn.close()
+            todo -= 1
+        print('.', end='', flush=True)
+    print('\n')
+
+    with connect(dsn) as syn_conn:
+        with syn_conn.cursor() as cur:
+            cur.execute('ANALYSE')
+
+
+def create_search_indices(conn: Connection, config: Configuration,
+                          drop: bool = False, threads: int = 1) -> None:
+    """ Create tables that have explicit partitioning.
+    """
+
+    # If index creation failed and left an index invalid, they need to be
+    # cleaned out first, so that the script recreates them.
+    with conn.cursor() as cur:
+        cur.execute("""SELECT relname FROM pg_class, pg_index
+                       WHERE pg_index.indisvalid = false
+                             AND pg_index.indexrelid = pg_class.oid""")
+        bad_indices = [row[0] for row in list(cur)]
+        for idx in bad_indices:
+            LOG.info("Drop invalid index %s.", idx)
+            cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
+    conn.commit()
+
+    sql = SQLPreprocessor(conn, config)
+
+    sql.run_parallel_sql_file(config.get_libpq_dsn(),
+                              'indices.sql', min(8, threads), drop=drop)
--- a/src/nominatim_db/tools/exec_utils.py
+++ b/src/nominatim_db/tools/exec_utils.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Helper functions for executing external programs.
+"""
+from typing import Any, Mapping
+import logging
+import os
+import subprocess
+import shutil
+
+from nominatim_core.typing import StrPath
+from nominatim_core.db.connection import get_pg_env
+
+LOG = logging.getLogger()
+
+def run_php_server(server_address: str, base_dir: StrPath) -> None:
+    """ Run the built-in server from the given directory.
+    """
+    subprocess.run(['/usr/bin/env', 'php', '-S', server_address],
+                   cwd=str(base_dir), check=True)
+
+
+def run_osm2pgsql(options: Mapping[str, Any]) -> None:
+    """ Run osm2pgsql with the given options.
+    """
+    env = get_pg_env(options['dsn'])
+
+    osm2pgsql_cmd = options['osm2pgsql']
+    if osm2pgsql_cmd is None:
+        osm2pgsql_cmd = shutil.which('osm2pgsql')
+        if osm2pgsql_cmd is None:
+            raise RuntimeError('osm2pgsql executable not found. Please install osm2pgsql first.')
+
+    cmd = [str(osm2pgsql_cmd),
+           '--slim',
+           '--log-progress', 'true',
+           '--number-processes', '1' if options['append'] else str(options['threads']),
+           '--cache', str(options['osm2pgsql_cache']),
+           '--style', str(options['osm2pgsql_style'])
+          ]
+
+    if str(options['osm2pgsql_style']).endswith('.lua'):
+        env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
+                                    os.environ.get('LUAPATH', ';')))
+        cmd.extend(('--output', 'flex'))
+    else:
+        cmd.extend(('--output', 'gazetteer', '--hstore', '--latlon'))
+
+    cmd.append('--append' if options['append'] else '--create')
+
+    if options['flatnode_file']:
+        cmd.extend(('--flat-nodes', options['flatnode_file']))
+
+    for key, param in (('slim_data', '--tablespace-slim-data'),
+                       ('slim_index', '--tablespace-slim-index'),
+                       ('main_data', '--tablespace-main-data'),
+                       ('main_index', '--tablespace-main-index')):
+        if options['tablespaces'][key]:
+            cmd.extend((param, options['tablespaces'][key]))
+
+    if options['tablespaces']['main_data']:
+        env['NOMINATIM_TABLESPACE_PLACE_DATA'] = options['tablespaces']['main_data']
+    if options['tablespaces']['main_index']:
+        env['NOMINATIM_TABLESPACE_PLACE_INDEX'] = options['tablespaces']['main_index']
+
+    if options.get('disable_jit', False):
+        env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
+
+    if 'import_data' in options:
+        cmd.extend(('-r', 'xml', '-'))
+    elif isinstance(options['import_file'], list):
+        for fname in options['import_file']:
+            cmd.append(str(fname))
+    else:
+        cmd.append(str(options['import_file']))
+
+    subprocess.run(cmd, cwd=options.get('cwd', '.'),
+                   input=options.get('import_data'),
+                   env=env, check=True)
--- a/src/nominatim_db/tools/freeze.py
+++ b/src/nominatim_db/tools/freeze.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for removing unnecessary data from the database.
+"""
+from typing import Optional
+from pathlib import Path
+
+from psycopg2 import sql as pysql
+
+from nominatim_core.db.connection import Connection
+
+UPDATE_TABLES = [
+    'address_levels',
+    'gb_postcode',
+    'import_osmosis_log',
+    'import_polygon_%',
+    'location_area%',
+    'location_road%',
+    'place',
+    'planet_osm_%',
+    'search_name_%',
+    'us_postcode',
+    'wikipedia_%'
+]
+
+def drop_update_tables(conn: Connection) -> None:
+    """ Drop all tables only necessary for updating the database from
+        OSM replication data.
+    """
+    parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
+
+    with conn.cursor() as cur:
+        cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
+                    + pysql.SQL(' or ').join(parts))
+        tables = [r[0] for r in cur]
+
+        for table in tables:
+            cur.drop_table(table, cascade=True)
+
+    conn.commit()
+
+
+def drop_flatnode_file(fpath: Optional[Path]) -> None:
+    """ Remove the flatnode file if it exists.
+    """
+    if fpath and fpath.exists():
+        fpath.unlink()
+
+def is_frozen(conn: Connection) -> bool:
+    """ Returns true if database is in a frozen state
+    """
+
+    return conn.table_exists('place') is False
--- a/src/nominatim_db/tools/migration.py
+++ b/src/nominatim_db/tools/migration.py
@@ -0,0 +1,405 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for database migration to newer software versions.
+"""
+from typing import List, Tuple, Callable, Any
+import logging
+
+from psycopg2 import sql as pysql
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from nominatim_core.db import properties
+from nominatim_core.db.connection import connect, Connection
+from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
+from ..tokenizer import factory as tokenizer_factory
+from . import refresh
+
+LOG = logging.getLogger()
+
+_MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = []
+
+def migrate(config: Configuration, paths: Any) -> int:
+    """ Check for the current database version and execute migrations,
+        if necesssary.
+    """
+    with connect(config.get_libpq_dsn()) as conn:
+        if conn.table_exists('nominatim_properties'):
+            db_version_str = properties.get_property(conn, 'database_version')
+        else:
+            db_version_str = None
+
+        if db_version_str is not None:
+            db_version = parse_version(db_version_str)
+
+            if db_version == NOMINATIM_VERSION:
+                LOG.warning("Database already at latest version (%s)", db_version_str)
+                return 0
+
+            LOG.info("Detected database version: %s", db_version_str)
+        else:
+            db_version = _guess_version(conn)
+
+
+        for version, func in _MIGRATION_FUNCTIONS:
+            if db_version < version or \
+               (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
+                title = func.__doc__ or ''
+                LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version)
+                kwargs = dict(conn=conn, config=config, paths=paths)
+                func(**kwargs)
+                conn.commit()
+
+        LOG.warning('Updating SQL functions.')
+        refresh.create_functions(conn, config)
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+        tokenizer.update_sql_functions(config)
+
+        properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
+
+        conn.commit()
+
+    return 0
+
+
+def _guess_version(conn: Connection) -> NominatimVersion:
+    """ Guess a database version when there is no property table yet.
+        Only migrations for 3.6 and later are supported, so bail out
+        when the version seems older.
+    """
+    with conn.cursor() as cur:
+        # In version 3.6, the country_name table was updated. Check for that.
+        cnt = cur.scalar("""SELECT count(*) FROM
+                            (SELECT svals(name) FROM  country_name
+                             WHERE country_code = 'gb')x;
+                         """)
+        if cnt < 100:
+            LOG.fatal('It looks like your database was imported with a version '
+                      'prior to 3.6.0. Automatic migration not possible.')
+            raise UsageError('Migration not possible.')
+
+    return NominatimVersion(3, 5, 0, 99)
+
+
+
+def _migration(major: int, minor: int, patch: int = 0,
+               dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]:
+    """ Decorator for a single migration step. The parameters describe the
+        version after which the migration is applicable, i.e before changing
+        from the given version to the next, the migration is required.
+
+        All migrations are run in the order in which they are defined in this
+        file. Do not run global SQL scripts for migrations as you cannot be sure
+        that these scripts do the same in later versions.
+
+        Functions will always be reimported in full at the end of the migration
+        process, so the migration functions may leave a temporary state behind
+        there.
+    """
+    def decorator(func: Callable[..., None]) -> Callable[..., None]:
+        version = NominatimVersion(major, minor, patch, dbpatch)
+        _MIGRATION_FUNCTIONS.append((version, func))
+        return func
+
+    return decorator
+
+
+@_migration(3, 5, 0, 99)
+def import_status_timestamp_change(conn: Connection, **_: Any) -> None:
+    """ Add timezone to timestamp in status table.
+
+        The import_status table has been changed to include timezone information
+        with the time stamp.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate
+                       TYPE timestamp with time zone;""")
+
+
+@_migration(3, 5, 0, 99)
+def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None:
+    """ Add nominatim_property table.
+    """
+    if not conn.table_exists('nominatim_properties'):
+        with conn.cursor() as cur:
+            cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
+                                        property TEXT,
+                                        value TEXT);
+                                     GRANT SELECT ON TABLE nominatim_properties TO {};
+                                  """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
+
+@_migration(3, 6, 0, 0)
+def change_housenumber_transliteration(conn: Connection, **_: Any) -> None:
+    """ Transliterate housenumbers.
+
+        The database schema switched from saving raw housenumbers in
+        placex.housenumber to saving transliterated ones.
+
+        Note: the function create_housenumber_id() has been dropped in later
+              versions.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
+                       RETURNS TEXT AS $$
+                       DECLARE
+                         normtext TEXT;
+                       BEGIN
+                         SELECT array_to_string(array_agg(trans), ';')
+                           INTO normtext
+                           FROM (SELECT lookup_word as trans,
+                                        getorcreate_housenumber_id(lookup_word)
+                                 FROM (SELECT make_standard_name(h) as lookup_word
+                                       FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
+                         return normtext;
+                       END;
+                       $$ LANGUAGE plpgsql STABLE STRICT;""")
+        cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
+        cur.execute("""UPDATE placex
+                       SET housenumber = create_housenumber_id(housenumber)
+                       WHERE housenumber is not null""")
+
+
+@_migration(3, 7, 0, 0)
+def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None:
+    """ Replace idx_placex_geometry_reverse_placeNode index.
+
+        Make the index slightly more permissive, so that it can also be used
+        when matching up boundaries and place nodes. It makes the index
+        idx_placex_adminname index unnecessary.
+    """
+    with conn.cursor() as cur:
+        cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
+                        USING GIST (geometry)
+                        WHERE osm_type = 'N' and rank_search < 26
+                              and class = 'place' and type != 'postcode'
+                              and linked_place_id is null""")
+        cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
+
+
+@_migration(3, 7, 0, 1)
+def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None:
+    """ Setup legacy tokenizer.
+
+        If no other tokenizer has been configured yet, then create the
+        configuration for the backwards-compatible legacy tokenizer
+    """
+    if properties.get_property(conn, 'tokenizer') is None:
+        with conn.cursor() as cur:
+            for table in ('placex', 'location_property_osmline'):
+                has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
+                                           WHERE table_name = %s
+                                           and column_name = 'token_info'""",
+                                        (table, ))
+                if has_column == 0:
+                    cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
+                                .format(pysql.Identifier(table)))
+        tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
+                                                       module_name='legacy')
+
+        tokenizer.migrate_database(config) # type: ignore[attr-defined]
+
+
+@_migration(4, 0, 99, 0)
+def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None:
+    """ Create idx_location_property_tiger_parent_place_id with included
+        house number.
+
+        The inclusion is needed for efficient lookup of housenumbers in
+        full address searches.
+    """
+    if conn.server_version_tuple() >= (11, 0, 0):
+        with conn.cursor() as cur:
+            cur.execute(""" CREATE INDEX IF NOT EXISTS
+                                idx_location_property_tiger_housenumber_migrated
+                            ON location_property_tiger
+                            USING btree(parent_place_id)
+                            INCLUDE (startnumber, endnumber) """)
+
+
+@_migration(4, 0, 99, 1)
+def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None:
+    """ Create idx_place_interpolations for lookup of interpolation lines
+        on updates.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations
+                       ON place USING gist(geometry)
+                       WHERE osm_type = 'W' and address ? 'interpolation'""")
+
+
+@_migration(4, 0, 99, 2)
+def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None:
+    """ Add a new column 'step' to the interpolations table.
+
+        Also converts the data into the stricter format which requires that
+        startnumbers comply with the odd/even requirements.
+    """
+    if conn.table_has_column('location_property_osmline', 'step'):
+        return
+
+    with conn.cursor() as cur:
+        # Mark invalid all interpolations with no intermediate numbers.
+        cur.execute("""UPDATE location_property_osmline SET startnumber = null
+                       WHERE endnumber - startnumber <= 1 """)
+        # Align the start numbers where odd/even does not match.
+        cur.execute("""UPDATE location_property_osmline
+                       SET startnumber = startnumber + 1,
+                           linegeo = ST_LineSubString(linegeo,
+                                                      1.0 / (endnumber - startnumber)::float,
+                                                      1)
+                       WHERE (interpolationtype = 'odd' and startnumber % 2 = 0)
+                              or (interpolationtype = 'even' and startnumber % 2 = 1)
+                    """)
+        # Mark invalid odd/even interpolations with no intermediate numbers.
+        cur.execute("""UPDATE location_property_osmline SET startnumber = null
+                       WHERE interpolationtype in ('odd', 'even')
+                             and endnumber - startnumber = 2""")
+        # Finally add the new column and populate it.
+        cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT")
+        cur.execute("""UPDATE location_property_osmline
+                         SET step = CASE WHEN interpolationtype = 'all'
+                                         THEN 1 ELSE 2 END
+                    """)
+
+
+@_migration(4, 0, 99, 3)
+def add_step_column_for_tiger(conn: Connection, **_: Any) -> None:
+    """ Add a new column 'step' to the tiger data table.
+    """
+    if conn.table_has_column('location_property_tiger', 'step'):
+        return
+
+    with conn.cursor() as cur:
+        cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
+        cur.execute("""UPDATE location_property_tiger
+                         SET step = CASE WHEN interpolationtype = 'all'
+                                         THEN 1 ELSE 2 END
+                    """)
+
+
+@_migration(4, 0, 99, 4)
+def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None:
+    """ Add a new column 'derived_name' which in the future takes the
+        country names as imported from OSM data.
+    """
+    if not conn.table_has_column('country_name', 'derived_name'):
+        with conn.cursor() as cur:
+            cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+
+
+@_migration(4, 0, 99, 5)
+def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None:
+    """ Names from the country table should be marked as internal to prevent
+        them from being deleted. Only necessary for ICU tokenizer.
+    """
+    import psycopg2.extras # pylint: disable=import-outside-toplevel
+
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+    with tokenizer.name_analyzer() as analyzer:
+        with conn.cursor() as cur:
+            psycopg2.extras.register_hstore(cur)
+            cur.execute("SELECT country_code, name FROM country_name")
+
+            for country_code, names in cur:
+                if not names:
+                    names = {}
+                names['countrycode'] = country_code
+                analyzer.add_country_names(country_code, names)
+
+
+@_migration(4, 1, 99, 0)
+def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None:
+    """ Add helper table for deleting data on updates.
+
+        The table is only necessary when updates are possible, i.e.
+        the database is not in freeze mode.
+    """
+    if conn.table_exists('place'):
+        with conn.cursor() as cur:
+            cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted (
+                             osm_type CHAR(1),
+                             osm_id BIGINT,
+                             class TEXT,
+                             type TEXT,
+                             deferred BOOLEAN)""")
+
+
+@_migration(4, 1, 99, 1)
+def split_pending_index(conn: Connection, **_: Any) -> None:
+    """ Reorganise indexes for pending updates.
+    """
+    if conn.table_exists('place'):
+        with conn.cursor() as cur:
+            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
+                           ON placex USING BTREE (rank_address, geometry_sector)
+                           WHERE indexed_status > 0""")
+            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
+                           ON placex USING BTREE (rank_search, geometry_sector)
+                           WHERE class = 'boundary' and type = 'administrative'
+                                 and indexed_status > 0""")
+            cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
+
+
+@_migration(4, 2, 99, 0)
+def enable_forward_dependencies(conn: Connection, **_: Any) -> None:
+    """ Create indexes for updates with forward dependency tracking (long-running).
+    """
+    if conn.table_exists('planet_osm_ways'):
+        with conn.cursor() as cur:
+            cur.execute("""SELECT * FROM pg_indexes
+                           WHERE tablename = 'planet_osm_ways'
+                                 and indexdef LIKE '%nodes%'""")
+            if cur.rowcount == 0:
+                cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[])
+                               RETURNS bigint[]
+                               LANGUAGE sql IMMUTABLE
+                                AS $function$
+                                  SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5)
+                                $function$""")
+                cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx
+                                 ON planet_osm_ways
+                                 USING gin (planet_osm_index_bucket(nodes))
+                                 WITH (fastupdate=off)""")
+                cur.execute("""CREATE INDEX planet_osm_rels_parts_idx
+                                 ON planet_osm_rels USING gin (parts)
+                                 WITH (fastupdate=off)""")
+                cur.execute("ANALYZE planet_osm_ways")
+
+
+@_migration(4, 2, 99, 1)
+def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None:
+    """ Create improved index for reverse lookup of place nodes.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
+                       ON placex
+                       USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
+                       WHERE rank_address between 4 and 25 AND type != 'postcode'
+                         AND name is not null AND linked_place_id is null AND osm_type = 'N'
+                    """)
+
+@_migration(4, 4, 99, 0)
+def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None:
+    """ Create index needed for looking up postcode areas from postocde points.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_postcode_areas
+                       ON placex USING BTREE (country_code, postcode)
+                       WHERE osm_type = 'R' AND class = 'boundary' AND type = 'postal_code'
+                    """)
+
+
+@_migration(4, 4, 99, 1)
+def create_postcode_parent_index(conn: Connection, **_: Any) -> None:
+    """ Create index needed for updating postcodes when a parent changes.
+    """
+    if conn.table_exists('planet_osm_ways'):
+        with conn.cursor() as cur:
+            cur.execute("""CREATE INDEX IF NOT EXISTS
+                             idx_location_postcode_parent_place_id
+                             ON location_postcode USING BTREE (parent_place_id)""")
--- a/src/nominatim_db/tools/postcodes.py
+++ b/src/nominatim_db/tools/postcodes.py
@@ -0,0 +1,234 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for importing, updating and otherwise maintaining the table
+of artificial postcode centroids.
+"""
+from typing import Optional, Tuple, Dict, List, TextIO
+from collections import defaultdict
+from pathlib import Path
+import csv
+import gzip
+import logging
+from math import isfinite
+
+from psycopg2 import sql as pysql
+
+from nominatim_core.db.connection import connect, Connection
+from nominatim_core.utils.centroid import PointsCentroid
+from ..data.postcode_format import PostcodeFormatter, CountryPostcodeMatcher
+from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
+
+LOG = logging.getLogger()
+
+def _to_float(numstr: str, max_value: float) -> float:
+    """ Convert the number in string into a float. The number is expected
+        to be in the range of [-max_value, max_value]. Otherwise rises a
+        ValueError.
+    """
+    num = float(numstr)
+    if not isfinite(num) or num <= -max_value or num >= max_value:
+        raise ValueError()
+
+    return num
+
+class _PostcodeCollector:
+    """ Collector for postcodes of a single country.
+    """
+
+    def __init__(self, country: str, matcher: Optional[CountryPostcodeMatcher]):
+        self.country = country
+        self.matcher = matcher
+        self.collected: Dict[str, PointsCentroid] = defaultdict(PointsCentroid)
+        self.normalization_cache: Optional[Tuple[str, Optional[str]]] = None
+
+
+    def add(self, postcode: str, x: float, y: float) -> None:
+        """ Add the given postcode to the collection cache. If the postcode
+            already existed, it is overwritten with the new centroid.
+        """
+        if self.matcher is not None:
+            normalized: Optional[str]
+            if self.normalization_cache and self.normalization_cache[0] == postcode:
+                normalized = self.normalization_cache[1]
+            else:
+                match = self.matcher.match(postcode)
+                normalized = self.matcher.normalize(match) if match else None
+                self.normalization_cache = (postcode, normalized)
+
+            if normalized:
+                self.collected[normalized] += (x, y)
+
+
+    def commit(self, conn: Connection, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
+        """ Update postcodes for the country from the postcodes selected so far
+            as well as any externally supplied postcodes.
+        """
+        self._update_from_external(analyzer, project_dir)
+        to_add, to_delete, to_update = self._compute_changes(conn)
+
+        LOG.info("Processing country '%s' (%s added, %s deleted, %s updated).",
+                 self.country, len(to_add), len(to_delete), len(to_update))
+
+        with conn.cursor() as cur:
+            if to_add:
+                cur.execute_values(
+                    """INSERT INTO location_postcode
+                         (place_id, indexed_status, country_code,
+                          postcode, geometry) VALUES %s""",
+                    to_add,
+                    template=pysql.SQL("""(nextval('seq_place'), 1, {},
+                                          %s, 'SRID=4326;POINT(%s %s)')
+                                       """).format(pysql.Literal(self.country)))
+            if to_delete:
+                cur.execute("""DELETE FROM location_postcode
+                               WHERE country_code = %s and postcode = any(%s)
+                            """, (self.country, to_delete))
+            if to_update:
+                cur.execute_values(
+                    pysql.SQL("""UPDATE location_postcode
+                                 SET indexed_status = 2,
+                                     geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
+                                 FROM (VALUES %s) AS v (pc, x, y)
+                                 WHERE country_code = {} and postcode = pc
+                              """).format(pysql.Literal(self.country)), to_update)
+
+
+    def _compute_changes(self, conn: Connection) \
+          -> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[str, float, float]]]:
+        """ Compute which postcodes from the collected postcodes have to be
+            added or modified and which from the location_postcode table
+            have to be deleted.
+        """
+        to_update = []
+        to_delete = []
+        with conn.cursor() as cur:
+            cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
+                           FROM location_postcode
+                           WHERE country_code = %s""",
+                        (self.country, ))
+            for postcode, x, y in cur:
+                pcobj = self.collected.pop(postcode, None)
+                if pcobj:
+                    newx, newy = pcobj.centroid()
+                    if (x - newx) > 0.0000001 or (y - newy) > 0.0000001:
+                        to_update.append((postcode, newx, newy))
+                else:
+                    to_delete.append(postcode)
+
+        to_add = [(k, *v.centroid()) for k, v in self.collected.items()]
+        self.collected = defaultdict(PointsCentroid)
+
+        return to_add, to_delete, to_update
+
+
+    def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
+        """ Look for an external postcode file for the active country in
+            the project directory and add missing postcodes when found.
+        """
+        csvfile = self._open_external(project_dir)
+        if csvfile is None:
+            return
+
+        try:
+            reader = csv.DictReader(csvfile)
+            for row in reader:
+                if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
+                    LOG.warning("Bad format for external postcode file for country '%s'."
+                                " Ignored.", self.country)
+                    return
+                postcode = analyzer.normalize_postcode(row['postcode'])
+                if postcode not in self.collected:
+                    try:
+                        # Do float conversation separately, it might throw
+                        centroid = (_to_float(row['lon'], 180),
+                                    _to_float(row['lat'], 90))
+                        self.collected[postcode] += centroid
+                    except ValueError:
+                        LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
+                                    row['lat'], row['lon'], self.country)
+
+        finally:
+            csvfile.close()
+
+
+    def _open_external(self, project_dir: Path) -> Optional[TextIO]:
+        fname = project_dir / f'{self.country}_postcodes.csv'
+
+        if fname.is_file():
+            LOG.info("Using external postcode file '%s'.", fname)
+            return open(fname, 'r', encoding='utf-8')
+
+        fname = project_dir / f'{self.country}_postcodes.csv.gz'
+
+        if fname.is_file():
+            LOG.info("Using external postcode file '%s'.", fname)
+            return gzip.open(fname, 'rt')
+
+        return None
+
+
+def update_postcodes(dsn: str, project_dir: Path, tokenizer: AbstractTokenizer) -> None:
+    """ Update the table of artificial postcodes.
+
+        Computes artificial postcode centroids from the placex table,
+        potentially enhances it with external data and then updates the
+        postcodes in the table 'location_postcode'.
+    """
+    matcher = PostcodeFormatter()
+    with tokenizer.name_analyzer() as analyzer:
+        with connect(dsn) as conn:
+            # First get the list of countries that currently have postcodes.
+            # (Doing this before starting to insert, so it is fast on import.)
+            with conn.cursor() as cur:
+                cur.execute("SELECT DISTINCT country_code FROM location_postcode")
+                todo_countries = set((row[0] for row in cur))
+
+            # Recompute the list of valid postcodes from placex.
+            with conn.cursor(name="placex_postcodes") as cur:
+                cur.execute("""
+                SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
+                FROM (SELECT
+                        COALESCE(plx.country_code,
+                                 get_country_code(ST_Centroid(pl.geometry))) as cc,
+                        pl.address->'postcode' as pc,
+                        COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
+                      FROM place AS pl LEFT OUTER JOIN placex AS plx
+                             ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
+                    WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
+                WHERE pc IS NOT null AND cc IS NOT null
+                ORDER BY cc, pc""")
+
+                collector = None
+
+                for country, postcode, x, y in cur:
+                    if collector is None or country != collector.country:
+                        if collector is not None:
+                            collector.commit(conn, analyzer, project_dir)
+                        collector = _PostcodeCollector(country, matcher.get_matcher(country))
+                        todo_countries.discard(country)
+                    collector.add(postcode, x, y)
+
+                if collector is not None:
+                    collector.commit(conn, analyzer, project_dir)
+
+            # Now handle any countries that are only in the postcode table.
+            for country in todo_countries:
+                fmt = matcher.get_matcher(country)
+                _PostcodeCollector(country, fmt).commit(conn, analyzer, project_dir)
+
+            conn.commit()
+
+        analyzer.update_postcodes_from_db()
+
+def can_compute(dsn: str) -> bool:
+    """
+        Check that the place table exists so that
+        postcodes can be computed.
+    """
+    with connect(dsn) as conn:
+        return conn.table_exists('place')
--- a/src/nominatim_db/tools/refresh.py
+++ b/src/nominatim_db/tools/refresh.py
@@ -0,0 +1,346 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for bringing auxiliary data in the database up-to-date.
+"""
+from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
+import csv
+import gzip
+import logging
+from textwrap import dedent
+from pathlib import Path
+
+from psycopg2 import sql as pysql
+
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import Connection, connect
+from nominatim_core.db.utils import execute_file, CopyBuffer
+from nominatim_core.db.sql_preprocessor import SQLPreprocessor
+from ..version import NOMINATIM_VERSION
+
+LOG = logging.getLogger()
+
+OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
+
+def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
+                                       entry: Mapping[str, Any]) -> None:
+    """ Converts a single entry from the JSON format for address rank
+        descriptions into a flat format suitable for inserting into a
+        PostgreSQL table and adds these lines to `rows`.
+    """
+    countries = entry.get('countries') or (None, )
+    for key, values in entry['tags'].items():
+        for value, ranks in values.items():
+            if isinstance(ranks, list):
+                rank_search, rank_address = ranks
+            else:
+                rank_search = rank_address = ranks
+            if not value:
+                value = None
+            for country in countries:
+                rows.append((country, key, value, rank_search, rank_address))
+
+
+def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[str, Any]]) -> None:
+    """ Replace the `address_levels` table with the contents of `levels'.
+
+        A new table is created any previously existing table is dropped.
+        The table has the following columns:
+            country, class, type, rank_search, rank_address
+    """
+    rows: List[Tuple[Any, ...]]  = []
+    for entry in levels:
+        _add_address_level_rows_from_entry(rows, entry)
+
+    with conn.cursor() as cur:
+        cur.drop_table(table)
+
+        cur.execute(pysql.SQL("""CREATE TABLE {} (
+                                        country_code varchar(2),
+                                        class TEXT,
+                                        type TEXT,
+                                        rank_search SMALLINT,
+                                        rank_address SMALLINT)
+                              """).format(pysql.Identifier(table)))
+
+        cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
+                           .format(pysql.Identifier(table)), rows)
+
+        cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
+                    .format(pysql.Identifier(table)))
+
+    conn.commit()
+
+
+def load_address_levels_from_config(conn: Connection, config: Configuration) -> None:
+    """ Replace the `address_levels` table with the content as
+        defined in the given configuration. Uses the parameter
+        NOMINATIM_ADDRESS_LEVEL_CONFIG to determine the location of the
+        configuration file.
+    """
+    cfg = config.load_sub_configuration('', config='ADDRESS_LEVEL_CONFIG')
+    load_address_levels(conn, 'address_levels', cfg)
+
+
+def create_functions(conn: Connection, config: Configuration,
+                     enable_diff_updates: bool = True,
+                     enable_debug: bool = False) -> None:
+    """ (Re)create the PL/pgSQL functions.
+    """
+    sql = SQLPreprocessor(conn, config)
+
+    sql.run_sql_file(conn, 'functions.sql',
+                     disable_diff_updates=not enable_diff_updates,
+                     debug=enable_debug)
+
+
+
+WEBSITE_SCRIPTS = (
+    'deletable.php',
+    'details.php',
+    'lookup.php',
+    'polygons.php',
+    'reverse.php',
+    'search.php',
+    'status.php'
+)
+
+# constants needed by PHP scripts: PHP name, config name, type
+PHP_CONST_DEFS = (
+    ('Database_DSN', 'DATABASE_DSN', str),
+    ('Default_Language', 'DEFAULT_LANGUAGE', str),
+    ('Log_DB', 'LOG_DB', bool),
+    ('Log_File', 'LOG_FILE', Path),
+    ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
+    ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
+    ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
+    ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
+    ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
+    ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
+    ('MapIcon_URL', 'MAPICON_URL', str),
+    ('Search_WithinCountries', 'SEARCH_WITHIN_COUNTRIES', bool),
+)
+
+
+def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
+    """ Replaces the wikipedia importance tables with new data.
+        The import is run in a single transaction so that the new data
+        is replace seamlessly.
+
+        Returns 0 if all was well and 1 if the importance file could not
+        be found. Throws an exception if there was an error reading the file.
+    """
+    if import_importance_csv(dsn, data_path / 'wikimedia-importance.csv.gz') == 0 \
+       or import_importance_sql(dsn, data_path / 'wikimedia-importance.sql.gz',
+                                ignore_errors) == 0:
+        return 0
+
+    return 1
+
+
+def import_importance_csv(dsn: str, data_file: Path) -> int:
+    """ Replace wikipedia importance table with data from a
+        single CSV file.
+
+        The file must be a gzipped CSV and have the following columns:
+        language, title, importance, wikidata_id
+
+        Other columns may be present but will be ignored.
+    """
+    if not data_file.exists():
+        return 1
+
+    # Only import the first occurance of a wikidata ID.
+    # This keeps indexes and table small.
+    wd_done = set()
+
+    with connect(dsn) as conn:
+        with conn.cursor() as cur:
+            cur.drop_table('wikipedia_article')
+            cur.drop_table('wikipedia_redirect')
+            cur.drop_table('wikimedia_importance')
+            cur.execute("""CREATE TABLE wikimedia_importance (
+                             language TEXT NOT NULL,
+                             title TEXT NOT NULL,
+                             importance double precision NOT NULL,
+                             wikidata TEXT
+                           ) """)
+
+        with gzip.open(str(data_file), 'rt') as fd, CopyBuffer() as buf:
+            for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
+                wd_id = int(row['wikidata_id'][1:])
+                buf.add(row['language'], row['title'], row['importance'],
+                        None if wd_id in wd_done else row['wikidata_id'])
+                wd_done.add(wd_id)
+
+                if buf.size() > 10000000:
+                    with conn.cursor() as cur:
+                        buf.copy_out(cur, 'wikimedia_importance',
+                                     columns=['language', 'title', 'importance',
+                                              'wikidata'])
+
+            with conn.cursor() as cur:
+                buf.copy_out(cur, 'wikimedia_importance',
+                             columns=['language', 'title', 'importance', 'wikidata'])
+
+        with conn.cursor() as cur:
+            cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_title
+                           ON wikimedia_importance (title)""")
+            cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_wikidata
+                           ON wikimedia_importance (wikidata)
+                           WHERE wikidata is not null""")
+
+        conn.commit()
+
+    return 0
+
+
+def import_importance_sql(dsn: str, data_file: Path, ignore_errors: bool) -> int:
+    """ Replace wikipedia importance table with data from an SQL file.
+    """
+    if not data_file.exists():
+        return 1
+
+    pre_code = """BEGIN;
+                  DROP TABLE IF EXISTS "wikipedia_article";
+                  DROP TABLE IF EXISTS "wikipedia_redirect";
+                  DROP TABLE IF EXISTS "wikipedia_importance";
+               """
+    post_code = "COMMIT"
+    execute_file(dsn, data_file, ignore_errors=ignore_errors,
+                 pre_code=pre_code, post_code=post_code)
+
+    return 0
+
+
+def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
+    """ Replaces the secondary importance raster data table with new data.
+
+        Returns 0 if all was well and 1 if the raster SQL file could not
+        be found. Throws an exception if there was an error reading the file.
+    """
+    datafile = data_path / 'secondary_importance.sql.gz'
+    if not datafile.exists():
+        return 1
+
+    with connect(dsn) as conn:
+        postgis_version = conn.postgis_version_tuple()
+        if postgis_version[0] < 3:
+            LOG.error('PostGIS version is too old for using OSM raster data.')
+            return 2
+
+    execute_file(dsn, datafile, ignore_errors=ignore_errors)
+
+    return 0
+
+def recompute_importance(conn: Connection) -> None:
+    """ Recompute wikipedia links and importance for all entries in placex.
+        This is a long-running operations that must not be executed in
+        parallel with updates.
+    """
+    with conn.cursor() as cur:
+        cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
+        cur.execute("""
+            UPDATE placex SET (wikipedia, importance) =
+               (SELECT wikipedia, importance
+                FROM compute_importance(extratags, country_code, rank_search, centroid))
+            """)
+        cur.execute("""
+            UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
+             FROM placex d
+             WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
+                   and (s.wikipedia is null or s.importance < d.importance);
+            """)
+
+        cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
+    conn.commit()
+
+
+def _quote_php_variable(var_type: Type[Any], config: Configuration,
+                        conf_name: str) -> str:
+    if var_type == bool:
+        return 'true' if config.get_bool(conf_name) else 'false'
+
+    if var_type == int:
+        return cast(str, getattr(config, conf_name))
+
+    if not getattr(config, conf_name):
+        return 'false'
+
+    if var_type == Path:
+        value = str(config.get_path(conf_name) or '')
+    else:
+        value = getattr(config, conf_name)
+
+    quoted = value.replace("'", "\\'")
+    return f"'{quoted}'"
+
+
+def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
+    """ Create the website script stubs.
+    """
+    if config.lib_dir.php is None:
+        LOG.info("Python frontend does not require website setup. Skipping.")
+        return
+
+    if not basedir.exists():
+        LOG.info('Creating website directory.')
+        basedir.mkdir()
+
+    assert config.project_dir is not None
+    basedata = dedent(f"""\
+                      <?php
+
+                      @define('CONST_Debug', $_GET['debug'] ?? false);
+                      @define('CONST_LibDir', '{config.lib_dir.php}');
+                      @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
+                      @define('CONST_NominatimVersion', '{NOMINATIM_VERSION!s}');
+
+                      """)
+
+    for php_name, conf_name, var_type in PHP_CONST_DEFS:
+        varout = _quote_php_variable(var_type, config, conf_name)
+
+        basedata += f"@define('CONST_{php_name}', {varout});\n"
+
+    template = "\nrequire_once(CONST_LibDir.'/website/{}');\n"
+
+    search_name_table_exists = bool(conn and conn.table_exists('search_name'))
+
+    for script in WEBSITE_SCRIPTS:
+        if not search_name_table_exists and script == 'search.php':
+            out = template.format('reverse-only-search.php')
+        else:
+            out = template.format(script)
+
+        (basedir / script).write_text(basedata + out, 'utf-8')
+
+
+def invalidate_osm_object(osm_type: str, osm_id: int, conn: Connection,
+                          recursive: bool = True) -> None:
+    """ Mark the given OSM object for reindexing. When 'recursive' is set
+        to True (the default), then all dependent objects are marked for
+        reindexing as well.
+
+        'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
+        If the given object does not exist, then nothing happens.
+    """
+    assert osm_type in ('N', 'R', 'W')
+
+    LOG.warning("Invalidating OSM %s %s%s.",
+                OSM_TYPE[osm_type], osm_id,
+                ' and its dependent places' if recursive else '')
+
+    with conn.cursor() as cur:
+        if recursive:
+            sql = """SELECT place_force_update(place_id)
+                     FROM placex WHERE osm_type = %s and osm_id = %s"""
+        else:
+            sql = """UPDATE placex SET indexed_status = 2
+                     WHERE osm_type = %s and osm_id = %s"""
+
+        cur.execute(sql, (osm_type, osm_id))
--- a/src/nominatim_db/tools/replication.py
+++ b/src/nominatim_db/tools/replication.py
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for updating a database from a replication source.
+"""
+from typing import ContextManager, MutableMapping, Any, Generator, cast, Iterator
+from contextlib import contextmanager
+import datetime as dt
+from enum import Enum
+import logging
+import time
+import types
+import urllib.request as urlrequest
+
+import requests
+
+from nominatim_core.errors import UsageError
+from nominatim_core.db import status
+from nominatim_core.db.connection import Connection, connect
+from .exec_utils import run_osm2pgsql
+
+try:
+    from osmium.replication.server import ReplicationServer
+    from osmium import WriteHandler
+    from osmium import version as pyo_version
+except ImportError as exc:
+    logging.getLogger().critical("pyosmium not installed. Replication functions not available.\n"
+                                 "To install pyosmium via pip: pip3 install osmium")
+    raise UsageError("replication tools not available") from exc
+
+LOG = logging.getLogger()
+
+def init_replication(conn: Connection, base_url: str,
+                     socket_timeout: int = 60) -> None:
+    """ Set up replication for the server at the given base URL.
+    """
+    LOG.info("Using replication source: %s", base_url)
+    date = status.compute_database_date(conn)
+
+    # margin of error to make sure we get all data
+    date -= dt.timedelta(hours=3)
+
+    with _make_replication_server(base_url, socket_timeout) as repl:
+        seq = repl.timestamp_to_sequence(date)
+
+    if seq is None:
+        LOG.fatal("Cannot reach the configured replication service '%s'.\n"
+                  "Does the URL point to a directory containing OSM update data?",
+                  base_url)
+        raise UsageError("Failed to reach replication service")
+
+    status.set_status(conn, date=date, seq=seq)
+
+    LOG.warning("Updates initialised at sequence %s (%s)", seq, date)
+
+
+def check_for_updates(conn: Connection, base_url: str,
+                      socket_timeout: int = 60) -> int:
+    """ Check if new data is available from the replication service at the
+        given base URL.
+    """
+    _, seq, _ = status.get_status(conn)
+
+    if seq is None:
+        LOG.error("Replication not set up. "
+                  "Please run 'nominatim replication --init' first.")
+        return 254
+
+    with _make_replication_server(base_url, socket_timeout) as repl:
+        state = repl.get_state_info()
+
+    if state is None:
+        LOG.error("Cannot get state for URL %s.", base_url)
+        return 253
+
+    if state.sequence <= seq:
+        LOG.warning("Database is up to date.")
+        return 2
+
+    LOG.warning("New data available (%i => %i).", seq, state.sequence)
+    return 0
+
+class UpdateState(Enum):
+    """ Possible states after an update has run.
+    """
+
+    UP_TO_DATE = 0
+    MORE_PENDING = 2
+    NO_CHANGES = 3
+
+
+def update(dsn: str, options: MutableMapping[str, Any],
+           socket_timeout: int = 60) -> UpdateState:
+    """ Update database from the next batch of data. Returns the state of
+        updates according to `UpdateState`.
+    """
+    with connect(dsn) as conn:
+        startdate, startseq, indexed = status.get_status(conn)
+        conn.commit()
+
+    if startseq is None:
+        LOG.error("Replication not set up. "
+                  "Please run 'nominatim replication --init' first.")
+        raise UsageError("Replication not set up.")
+
+    assert startdate is not None
+
+    if not indexed and options['indexed_only']:
+        LOG.info("Skipping update. There is data that needs indexing.")
+        return UpdateState.MORE_PENDING
+
+    last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
+    update_interval = dt.timedelta(seconds=options['update_interval'])
+    if last_since_update < update_interval:
+        duration = (update_interval - last_since_update).seconds
+        LOG.warning("Sleeping for %s sec before next update.", duration)
+        time.sleep(duration)
+
+    if options['import_file'].exists():
+        options['import_file'].unlink()
+
+    # Read updates into file.
+    with _make_replication_server(options['base_url'], socket_timeout) as repl:
+        outhandler = WriteHandler(str(options['import_file']))
+        endseq = repl.apply_diffs(outhandler, startseq + 1,
+                                  max_size=options['max_diff_size'] * 1024)
+        outhandler.close()
+
+        if endseq is None:
+            return UpdateState.NO_CHANGES
+
+        with connect(dsn) as conn:
+            run_osm2pgsql_updates(conn, options)
+
+            # Write the current status to the file
+            endstate = repl.get_state_info(endseq)
+            status.set_status(conn, endstate.timestamp if endstate else None,
+                              seq=endseq, indexed=False)
+            conn.commit()
+
+    return UpdateState.UP_TO_DATE
+
+
+def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) -> None:
+    """ Run osm2pgsql in append mode.
+    """
+    # Remove any stale deletion marks.
+    with conn.cursor() as cur:
+        cur.execute('TRUNCATE place_to_be_deleted')
+    conn.commit()
+
+    # Consume updates with osm2pgsql.
+    options['append'] = True
+    options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
+    run_osm2pgsql(options)
+
+    # Handle deletions
+    with conn.cursor() as cur:
+        cur.execute('SELECT flush_deleted_places()')
+    conn.commit()
+
+
+def _make_replication_server(url: str, timeout: int) -> ContextManager[ReplicationServer]:
+    """ Returns a ReplicationServer in form of a context manager.
+
+        Creates a light wrapper around older versions of pyosmium that did
+        not support the context manager interface.
+    """
+    if hasattr(ReplicationServer, '__enter__'):
+        # Patches the open_url function for pyosmium >= 3.2
+        # where the socket timeout is no longer respected.
+        def patched_open_url(self: ReplicationServer, url: urlrequest.Request) -> Any:
+            """ Download a resource from the given URL and return a byte sequence
+                of the content.
+            """
+            headers = {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
+
+            if self.session is not None:
+                return self.session.get(url.get_full_url(),
+                                       headers=headers, timeout=timeout or None,
+                                       stream=True)
+
+            @contextmanager
+            def _get_url_with_session() -> Iterator[requests.Response]:
+                with requests.Session() as session:
+                    request = session.get(url.get_full_url(),
+                                          headers=headers, timeout=timeout or None,
+                                          stream=True)
+                    yield request
+
+            return _get_url_with_session()
+
+        repl = ReplicationServer(url)
+        setattr(repl, 'open_url', types.MethodType(patched_open_url, repl))
+
+        return cast(ContextManager[ReplicationServer], repl)
+
+    @contextmanager
+    def get_cm() -> Generator[ReplicationServer, None, None]:
+        yield ReplicationServer(url)
+
+    return get_cm()
--- a/src/nominatim_db/tools/special_phrases/init.py
+++ b/src/nominatim_db/tools/special_phrases/init.py
--- a/src/nominatim_db/tools/special_phrases/importer_statistics.py
+++ b/src/nominatim_db/tools/special_phrases/importer_statistics.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Contains the class which handles statistics for the
+    import of special phrases.
+"""
+import logging
+LOG = logging.getLogger()
+
+class SpecialPhrasesImporterStatistics():
+    """
+        Class handling statistics of the import
+        process of special phrases.
+    """
+    def __init__(self) -> None:
+        self._intialize_values()
+
+    def _intialize_values(self) -> None:
+        """
+            Set all counts for the global
+            import to 0.
+        """
+        self.tables_created = 0
+        self.tables_deleted = 0
+        self.tables_ignored = 0
+        self.invalids = 0
+
+    def notify_one_phrase_invalid(self) -> None:
+        """
+            Add +1 to the count of invalid entries
+            fetched from the wiki.
+        """
+        self.invalids += 1
+
+    def notify_one_table_created(self) -> None:
+        """
+            Add +1 to the count of created tables.
+        """
+        self.tables_created += 1
+
+    def notify_one_table_deleted(self) -> None:
+        """
+            Add +1 to the count of deleted tables.
+        """
+        self.tables_deleted += 1
+
+    def notify_one_table_ignored(self) -> None:
+        """
+            Add +1 to the count of ignored tables.
+        """
+        self.tables_ignored += 1
+
+    def notify_import_done(self) -> None:
+        """
+            Print stats for the whole import process
+            and reset all values.
+        """
+        LOG.info('====================================================================')
+        LOG.info('Final statistics of the import:')
+        LOG.info('- %s phrases were invalid.', self.invalids)
+        if self.invalids > 0:
+            LOG.info('  Those invalid phrases have been skipped.')
+        LOG.info('- %s tables were ignored as they already exist on the database',
+                 self.tables_ignored)
+        LOG.info('- %s tables were created', self.tables_created)
+        LOG.info('- %s tables were deleted from the database', self.tables_deleted)
+        if self.tables_deleted > 0:
+            LOG.info('  They were deleted as they are not valid anymore.')
+
+        if self.invalids > 0:
+            LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
+                        self.invalids)
+
+        self._intialize_values()
--- a/src/nominatim_db/tools/special_phrases/sp_csv_loader.py
+++ b/src/nominatim_db/tools/special_phrases/sp_csv_loader.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Module containing the SPCsvLoader class.
+
+    The class allows to load phrases from a csv file.
+"""
+from typing import Iterable
+import csv
+import os
+
+from nominatim_core.errors import UsageError
+from .special_phrase import SpecialPhrase
+
+class SPCsvLoader:
+    """
+        Handles loading of special phrases from external csv file.
+    """
+    def __init__(self, csv_path: str) -> None:
+        self.csv_path = csv_path
+
+
+    def generate_phrases(self) -> Iterable[SpecialPhrase]:
+        """ Open and parse the given csv file.
+            Create the corresponding SpecialPhrases.
+        """
+        self._check_csv_validity()
+
+        with open(self.csv_path, encoding='utf-8') as fd:
+            reader = csv.DictReader(fd, delimiter=',')
+            for row in reader:
+                yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
+
+
+    def _check_csv_validity(self) -> None:
+        """
+            Check that the csv file has the right extension.
+        """
+        _, extension = os.path.splitext(self.csv_path)
+
+        if extension != '.csv':
+            raise UsageError(f'The file {self.csv_path} is not a csv file.')
--- a/src/nominatim_db/tools/special_phrases/sp_importer.py
+++ b/src/nominatim_db/tools/special_phrases/sp_importer.py
@@ -0,0 +1,274 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Module containing the class handling the import
+    of the special phrases.
+
+    Phrases are analyzed and imported into the database.
+
+    The phrases already present in the database which are not
+    valids anymore are removed.
+"""
+from typing import Iterable, Tuple, Mapping, Sequence, Optional, Set
+import logging
+import re
+
+from psycopg2.sql import Identifier, SQL
+
+from nominatim_core.typing import Protocol
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import Connection
+from .importer_statistics import SpecialPhrasesImporterStatistics
+from .special_phrase import SpecialPhrase
+from ...tokenizer.base import AbstractTokenizer
+
+LOG = logging.getLogger()
+
+def _classtype_table(phrase_class: str, phrase_type: str) -> str:
+    """ Return the name of the table for the given class and type.
+    """
+    return f'place_classtype_{phrase_class}_{phrase_type}'
+
+
+class SpecialPhraseLoader(Protocol):
+    """ Protocol for classes implementing a loader for special phrases.
+    """
+
+    def generate_phrases(self) -> Iterable[SpecialPhrase]:
+        """ Generates all special phrase terms this loader can produce.
+        """
+
+
+class SPImporter():
+    # pylint: disable-msg=too-many-instance-attributes
+    """
+        Class handling the process of special phrases importation into the database.
+
+        Take a sp loader which load the phrases from an external source.
+    """
+    def __init__(self, config: Configuration, conn: Connection,
+                 sp_loader: SpecialPhraseLoader) -> None:
+        self.config = config
+        self.db_connection = conn
+        self.sp_loader = sp_loader
+        self.statistics_handler = SpecialPhrasesImporterStatistics()
+        self.black_list, self.white_list = self._load_white_and_black_lists()
+        self.sanity_check_pattern = re.compile(r'^\w+$')
+        # This set will contain all existing phrases to be added.
+        # It contains tuples with the following format: (label, class, type, operator)
+        self.word_phrases: Set[Tuple[str, str, str, str]] = set()
+        # This set will contain all existing place_classtype tables which doesn't match any
+        # special phrases class/type on the wiki.
+        self.table_phrases_to_delete: Set[str] = set()
+
+    def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
+        """
+            Iterate through all SpecialPhrases extracted from the
+            loader and import them into the database.
+
+            If should_replace is set to True only the loaded phrases
+            will be kept into the database. All other phrases already
+            in the database will be removed.
+        """
+        LOG.warning('Special phrases importation starting')
+        self._fetch_existing_place_classtype_tables()
+
+        # Store pairs of class/type for further processing
+        class_type_pairs = set()
+
+        for phrase in self.sp_loader.generate_phrases():
+            result = self._process_phrase(phrase)
+            if result:
+                class_type_pairs.add(result)
+
+        self._create_classtype_table_and_indexes(class_type_pairs)
+        if should_replace:
+            self._remove_non_existent_tables_from_db()
+        self.db_connection.commit()
+
+        with tokenizer.name_analyzer() as analyzer:
+            analyzer.update_special_phrases(self.word_phrases, should_replace)
+
+        LOG.warning('Import done.')
+        self.statistics_handler.notify_import_done()
+
+
+    def _fetch_existing_place_classtype_tables(self) -> None:
+        """
+            Fetch existing place_classtype tables.
+            Fill the table_phrases_to_delete set of the class.
+        """
+        query = """
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_schema='public'
+            AND table_name like 'place_classtype_%';
+        """
+        with self.db_connection.cursor() as db_cursor:
+            db_cursor.execute(SQL(query))
+            for row in db_cursor:
+                self.table_phrases_to_delete.add(row[0])
+
+    def _load_white_and_black_lists(self) \
+          -> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
+        """
+            Load white and black lists from phrases-settings.json.
+        """
+        settings = self.config.load_sub_configuration('phrase-settings.json')
+
+        return settings['blackList'], settings['whiteList']
+
+    def _check_sanity(self, phrase: SpecialPhrase) -> bool:
+        """
+            Check sanity of given inputs in case somebody added garbage in the wiki.
+            If a bad class/type is detected the system will exit with an error.
+        """
+        class_matchs = self.sanity_check_pattern.findall(phrase.p_class)
+        type_matchs = self.sanity_check_pattern.findall(phrase.p_type)
+
+        if not class_matchs or not type_matchs:
+            LOG.warning("Bad class/type: %s=%s. It will not be imported",
+                        phrase.p_class, phrase.p_type)
+            return False
+        return True
+
+    def _process_phrase(self, phrase: SpecialPhrase) -> Optional[Tuple[str, str]]:
+        """
+            Processes the given phrase by checking black and white list
+            and sanity.
+            Return the class/type pair corresponding to the phrase.
+        """
+
+        # blacklisting: disallow certain class/type combinations
+        if phrase.p_class in self.black_list.keys() \
+           and phrase.p_type in self.black_list[phrase.p_class]:
+            return None
+
+        # whitelisting: if class is in whitelist, allow only tags in the list
+        if phrase.p_class in self.white_list.keys() \
+           and phrase.p_type not in self.white_list[phrase.p_class]:
+            return None
+
+        # sanity check, in case somebody added garbage in the wiki
+        if not self._check_sanity(phrase):
+            self.statistics_handler.notify_one_phrase_invalid()
+            return None
+
+        self.word_phrases.add((phrase.p_label, phrase.p_class,
+                               phrase.p_type, phrase.p_operator))
+
+        return (phrase.p_class, phrase.p_type)
+
+
+    def _create_classtype_table_and_indexes(self,
+                                            class_type_pairs: Iterable[Tuple[str, str]]) -> None:
+        """
+            Create table place_classtype for each given pair.
+            Also create indexes on place_id and centroid.
+        """
+        LOG.warning('Create tables and indexes...')
+
+        sql_tablespace = self.config.TABLESPACE_AUX_DATA
+        if sql_tablespace:
+            sql_tablespace = ' TABLESPACE ' + sql_tablespace
+
+        with self.db_connection.cursor() as db_cursor:
+            db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
+
+        for pair in class_type_pairs:
+            phrase_class = pair[0]
+            phrase_type = pair[1]
+
+            table_name = _classtype_table(phrase_class, phrase_type)
+
+            if table_name in self.table_phrases_to_delete:
+                self.statistics_handler.notify_one_table_ignored()
+                # Remove this table from the ones to delete as it match a
+                # class/type still existing on the special phrases of the wiki.
+                self.table_phrases_to_delete.remove(table_name)
+                # So don't need to create the table and indexes.
+                continue
+
+            # Table creation
+            self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
+
+            # Indexes creation
+            self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
+
+            # Grant access on read to the web user.
+            self._grant_access_to_webuser(phrase_class, phrase_type)
+
+            self.statistics_handler.notify_one_table_created()
+
+        with self.db_connection.cursor() as db_cursor:
+            db_cursor.execute("DROP INDEX idx_placex_classtype")
+
+
+    def _create_place_classtype_table(self, sql_tablespace: str,
+                                      phrase_class: str, phrase_type: str) -> None:
+        """
+            Create table place_classtype of the given phrase_class/phrase_type
+            if doesn't exit.
+        """
+        table_name = _classtype_table(phrase_class, phrase_type)
+        with self.db_connection.cursor() as cur:
+            cur.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
+                                 SELECT place_id AS place_id,
+                                        st_centroid(geometry) AS centroid
+                                 FROM placex
+                                 WHERE class = %s AND type = %s
+                             """).format(Identifier(table_name), SQL(sql_tablespace)),
+                        (phrase_class, phrase_type))
+
+
+    def _create_place_classtype_indexes(self, sql_tablespace: str,
+                                        phrase_class: str, phrase_type: str) -> None:
+        """
+            Create indexes on centroid and place_id for the place_classtype table.
+        """
+        index_prefix = f'idx_place_classtype_{phrase_class}_{phrase_type}_'
+        base_table = _classtype_table(phrase_class, phrase_type)
+        # Index on centroid
+        if not self.db_connection.index_exists(index_prefix + 'centroid'):
+            with self.db_connection.cursor() as db_cursor:
+                db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
+                                  .format(Identifier(index_prefix + 'centroid'),
+                                          Identifier(base_table),
+                                          SQL(sql_tablespace)))
+
+        # Index on place_id
+        if not self.db_connection.index_exists(index_prefix + 'place_id'):
+            with self.db_connection.cursor() as db_cursor:
+                db_cursor.execute(SQL("CREATE INDEX {} ON {} USING btree(place_id) {}")
+                                  .format(Identifier(index_prefix + 'place_id'),
+                                          Identifier(base_table),
+                                          SQL(sql_tablespace)))
+
+
+    def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
+        """
+            Grant access on read to the table place_classtype for the webuser.
+        """
+        table_name = _classtype_table(phrase_class, phrase_type)
+        with self.db_connection.cursor() as db_cursor:
+            db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
+                              .format(Identifier(table_name),
+                                      Identifier(self.config.DATABASE_WEBUSER)))
+
+    def _remove_non_existent_tables_from_db(self) -> None:
+        """
+            Remove special phrases which doesn't exist on the wiki anymore.
+            Delete the place_classtype tables.
+        """
+        LOG.warning('Cleaning database...')
+
+        # Delete place_classtype tables corresponding to class/type which
+        # are not on the wiki anymore.
+        with self.db_connection.cursor() as db_cursor:
+            for table in self.table_phrases_to_delete:
+                self.statistics_handler.notify_one_table_deleted()
+                db_cursor.drop_table(table)
--- a/src/nominatim_db/tools/special_phrases/sp_wiki_loader.py
+++ b/src/nominatim_db/tools/special_phrases/sp_wiki_loader.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Module containing the SPWikiLoader class.
+"""
+from typing import Iterable
+import re
+import logging
+
+from nominatim_core.config import Configuration
+from nominatim_core.utils.url_utils import get_url
+from .special_phrase import SpecialPhrase
+
+LOG = logging.getLogger()
+
+def _get_wiki_content(lang: str) -> str:
+    """
+        Request and return the wiki page's content
+        corresponding to special phrases for a given lang.
+        Requested URL Example :
+            https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+    """
+    url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+          + lang.upper()
+    return get_url(url)
+
+
+class SPWikiLoader:
+    """
+        Handles loading of special phrases from the wiki.
+    """
+    def __init__(self, config: Configuration) -> None:
+        self.config = config
+        # Compile the regex here to increase performances.
+        self.occurence_pattern = re.compile(
+            r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
+        )
+        # Hack around a bug where building=yes was imported with quotes into the wiki
+        self.type_fix_pattern = re.compile(r'\"|&quot;')
+
+        self.languages = self.config.get_str_list('LANGUAGES') or \
+                         ['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
+                          'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
+                          'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
+                          'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi',
+                          'lv', 'tr']
+
+
+    def generate_phrases(self) -> Iterable[SpecialPhrase]:
+        """ Download the wiki pages for the configured languages
+            and extract the phrases from the page.
+        """
+        for lang in self.languages:
+            LOG.warning('Importing phrases for lang: %s...', lang)
+            loaded_xml = _get_wiki_content(lang)
+
+            # One match will be of format [label, class, type, operator, plural]
+            matches = self.occurence_pattern.findall(loaded_xml)
+
+            for match in matches:
+                yield SpecialPhrase(match[0],
+                                    match[1],
+                                    self.type_fix_pattern.sub('', match[2]),
+                                    match[3])
--- a/src/nominatim_db/tools/special_phrases/special_phrase.py
+++ b/src/nominatim_db/tools/special_phrases/special_phrase.py
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Module containing the class SpecialPhrase.
+
+    This class is a model used to transfer a special phrase through
+    the process of load and importation.
+"""
+from typing import Any
+
+class SpecialPhrase:
+    """
+        Model representing a special phrase.
+    """
+    def __init__(self, p_label: str, p_class: str, p_type: str, p_operator: str) -> None:
+        self.p_label = p_label.strip()
+        self.p_class = p_class.strip()
+        self.p_type = p_type.strip()
+        # Needed if some operator in the wiki are not written in english
+        p_operator = p_operator.strip().lower()
+        self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
+
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, SpecialPhrase):
+            return False
+
+        return self.p_label == other.p_label \
+               and self.p_class == other.p_class \
+               and self.p_type == other.p_type \
+               and self.p_operator == other.p_operator
+
+    def __hash__(self) -> int:
+        return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
--- a/src/nominatim_db/tools/tiger_data.py
+++ b/src/nominatim_db/tools/tiger_data.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for importing tiger data and handling tarbar and directory files
+"""
+from typing import Any, TextIO, List, Union, cast
+import csv
+import io
+import logging
+import os
+import tarfile
+
+from psycopg2.extras import Json
+
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect
+from nominatim_core.db.async_connection import WorkerPool
+from nominatim_core.db.sql_preprocessor import SQLPreprocessor
+from nominatim_core.errors import UsageError
+from ..data.place_info import PlaceInfo
+from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
+from . import freeze
+
+LOG = logging.getLogger()
+
+class TigerInput:
+    """ Context manager that goes through Tiger input files which may
+        either be in a directory or gzipped together in a tar file.
+    """
+
+    def __init__(self, data_dir: str) -> None:
+        self.tar_handle = None
+        self.files: List[Union[str, tarfile.TarInfo]] = []
+
+        if data_dir.endswith('.tar.gz'):
+            try:
+                self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
+            except tarfile.ReadError as err:
+                LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
+                raise UsageError("Cannot open Tiger data file.") from err
+
+            self.files = [i for i in self.tar_handle.getmembers() if i.name.endswith('.csv')]
+            LOG.warning("Found %d CSV files in tarfile with path %s", len(self.files), data_dir)
+        else:
+            files = os.listdir(data_dir)
+            self.files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
+            LOG.warning("Found %d CSV files in path %s", len(self.files), data_dir)
+
+        if not self.files:
+            LOG.warning("Tiger data import selected but no files found at %s", data_dir)
+
+
+    def __enter__(self) -> 'TigerInput':
+        return self
+
+
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        if self.tar_handle:
+            self.tar_handle.close()
+            self.tar_handle = None
+
+
+    def next_file(self) -> TextIO:
+        """ Return a file handle to the next file to be processed.
+            Raises an IndexError if there is no file left.
+        """
+        fname = self.files.pop(0)
+
+        if self.tar_handle is not None:
+            extracted = self.tar_handle.extractfile(fname)
+            assert extracted is not None
+            return io.TextIOWrapper(extracted)
+
+        return open(cast(str, fname), encoding='utf-8')
+
+
+    def __len__(self) -> int:
+        return len(self.files)
+
+
+def handle_threaded_sql_statements(pool: WorkerPool, fd: TextIO,
+                                   analyzer: AbstractAnalyzer) -> None:
+    """ Handles sql statement with multiplexing
+    """
+    lines = 0
+    # Using pool of database connections to execute sql statements
+
+    sql = "SELECT tiger_line_import(%s, %s, %s, %s, %s, %s)"
+
+    for row in csv.DictReader(fd, delimiter=';'):
+        try:
+            address = dict(street=row['street'], postcode=row['postcode'])
+            args = ('SRID=4326;' + row['geometry'],
+                    int(row['from']), int(row['to']), row['interpolation'],
+                    Json(analyzer.process_place(PlaceInfo({'address': address}))),
+                    analyzer.normalize_postcode(row['postcode']))
+        except ValueError:
+            continue
+        pool.next_free_worker().perform(sql, args=args)
+
+        lines += 1
+        if lines == 1000:
+            print('.', end='', flush=True)
+            lines = 0
+
+
+def add_tiger_data(data_dir: str, config: Configuration, threads: int,
+                   tokenizer: AbstractTokenizer) -> int:
+    """ Import tiger data from directory or tar file `data dir`.
+    """
+    dsn = config.get_libpq_dsn()
+
+    with connect(dsn) as conn:
+        is_frozen = freeze.is_frozen(conn)
+        conn.close()
+
+        if is_frozen:
+            raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)")
+
+    with TigerInput(data_dir) as tar:
+        if not tar:
+            return 1
+
+        with connect(dsn) as conn:
+            sql = SQLPreprocessor(conn, config)
+            sql.run_sql_file(conn, 'tiger_import_start.sql')
+
+        # Reading files and then for each file line handling
+        # sql_query in <threads - 1> chunks.
+        place_threads = max(1, threads - 1)
+
+        with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
+            with tokenizer.name_analyzer() as analyzer:
+                while tar:
+                    with tar.next_file() as fd:
+                        handle_threaded_sql_statements(pool, fd, analyzer)
+
+        print('\n')
+
+    LOG.warning("Creating indexes on Tiger data")
+    with connect(dsn) as conn:
+        sql = SQLPreprocessor(conn, config)
+        sql.run_sql_file(conn, 'tiger_import_finish.sql')
+
+    return 0