split code into submodules

2026-03-12 05:44:06 +00:00 · 2024-05-16 11:55:17 +02:00
parent 0fb4fe8e4d
commit 6e89310a92
137 changed files with 757 additions and 716 deletions
--- a/src/nominatim_db/clicmd/init.py
+++ b/src/nominatim_db/clicmd/init.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Subcommand definitions for the command-line tool.
+"""
+# mypy and pylint disagree about the style of explicit exports,
+# see https://github.com/PyCQA/pylint/issues/6006.
+# pylint: disable=useless-import-alias
+
+from .setup import SetupAll as SetupAll
+from .replication import UpdateReplication as UpdateReplication
+from .api import (APISearch as APISearch,
+                  APIReverse as APIReverse,
+                  APILookup as APILookup,
+                  APIDetails as APIDetails,
+                  APIStatus as APIStatus)
+from .index import UpdateIndex as UpdateIndex
+from .refresh import UpdateRefresh as UpdateRefresh
+from .add_data import UpdateAddData as UpdateAddData
+from .admin import AdminFuncs as AdminFuncs
+from .freeze import SetupFreeze as SetupFreeze
+from .special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
+from .export import QueryExport as QueryExport
+from .convert import ConvertDB as ConvertDB
--- a/src/nominatim_db/clicmd/add_data.py
+++ b/src/nominatim_db/clicmd/add_data.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'add-data' subcommand.
+"""
+from typing import cast
+import argparse
+import logging
+
+import psutil
+
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+class UpdateAddData:
+    """\
+    Add additional data from a file or an online source.
+
+    This command allows to add or update the search data in the database.
+    The data can come either from an OSM file or single OSM objects can
+    directly be downloaded from the OSM API. This function only loads the
+    data into the database. Afterwards it still needs to be integrated
+    in the search index. Use the `nominatim index` command for that.
+
+    The command can also be used to add external non-OSM data to the
+    database. At the moment the only supported format is TIGER housenumber
+    data. See the online documentation at
+    https://nominatim.org/release-docs/latest/admin/Import/#installing-tiger-housenumber-data-for-the-us
+    for more information.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group_name = parser.add_argument_group('Source')
+        group1 = group_name.add_mutually_exclusive_group(required=True)
+        group1.add_argument('--file', metavar='FILE',
+                            help='Import data from an OSM file or diff file')
+        group1.add_argument('--diff', metavar='FILE',
+                            help='Import data from an OSM diff file (deprecated: use --file)')
+        group1.add_argument('--node', metavar='ID', type=int,
+                            help='Import a single node from the API')
+        group1.add_argument('--way', metavar='ID', type=int,
+                            help='Import a single way from the API')
+        group1.add_argument('--relation', metavar='ID', type=int,
+                            help='Import a single relation from the API')
+        group1.add_argument('--tiger-data', metavar='DIR',
+                            help='Add housenumbers from the US TIGER census database')
+        group2 = parser.add_argument_group('Extra arguments')
+        group2.add_argument('--use-main-api', action='store_true',
+                            help='Use OSM API instead of Overpass to download objects')
+        group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                            help='Size of cache to be used by osm2pgsql (in MB)')
+        group2.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
+                            help='Set timeout for file downloads')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        from ..tokenizer import factory as tokenizer_factory
+        from ..tools import tiger_data, add_osm_data
+
+        if args.tiger_data:
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+            return tiger_data.add_tiger_data(args.tiger_data,
+                                             args.config,
+                                             args.threads or psutil.cpu_count()  or 1,
+                                             tokenizer)
+
+        osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
+        if args.file or args.diff:
+            return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
+                                                   cast(str, args.file or args.diff),
+                                                   osm2pgsql_params)
+
+        if args.node:
+            return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
+                                               'node', args.node,
+                                               args.use_main_api,
+                                               osm2pgsql_params)
+
+        if args.way:
+            return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
+                                               'way', args.way,
+                                               args.use_main_api,
+                                               osm2pgsql_params)
+
+        if args.relation:
+            return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
+                                               'relation', args.relation,
+                                               args.use_main_api,
+                                               osm2pgsql_params)
+
+        return 0
--- a/src/nominatim_db/clicmd/admin.py
+++ b/src/nominatim_db/clicmd/admin.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'admin' subcommand.
+"""
+import logging
+import argparse
+import random
+
+import nominatim_api as napi
+from nominatim_core.db.connection import connect
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+
+class AdminFuncs:
+    """\
+    Analyse and maintain the database.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Admin tasks')
+        objs = group.add_mutually_exclusive_group(required=True)
+        objs.add_argument('--warm', action='store_true',
+                          help='Warm database caches for search and reverse queries')
+        objs.add_argument('--check-database', action='store_true',
+                          help='Check that the database is complete and operational')
+        objs.add_argument('--migrate', action='store_true',
+                          help='Migrate the database to a new software version')
+        objs.add_argument('--analyse-indexing', action='store_true',
+                          help='Print performance analysis of the indexing process')
+        objs.add_argument('--collect-os-info', action="store_true",
+                          help="Generate a report about the host system information")
+        objs.add_argument('--clean-deleted', action='store', metavar='AGE',
+                          help='Clean up deleted relations')
+        group = parser.add_argument_group('Arguments for cache warming')
+        group.add_argument('--search-only', action='store_const', dest='target',
+                           const='search',
+                           help="Only pre-warm tables for search queries")
+        group.add_argument('--reverse-only', action='store_const', dest='target',
+                           const='reverse',
+                           help="Only pre-warm tables for reverse queries")
+        group = parser.add_argument_group('Arguments for index anaysis')
+        mgroup = group.add_mutually_exclusive_group()
+        mgroup.add_argument('--osm-id', type=str,
+                            help='Analyse indexing of the given OSM object')
+        mgroup.add_argument('--place-id', type=int,
+                            help='Analyse indexing of the given Nominatim object')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        # pylint: disable=too-many-return-statements
+        if args.warm:
+            return self._warm(args)
+
+        if args.check_database:
+            LOG.warning('Checking database')
+            from ..tools import check_database
+            return check_database.check_database(args.config)
+
+        if args.analyse_indexing:
+            LOG.warning('Analysing performance of indexing function')
+            from ..tools import admin
+            admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
+            return 0
+
+        if args.migrate:
+            LOG.warning('Checking for necessary database migrations')
+            from ..tools import migration
+            return migration.migrate(args.config, args)
+
+        if args.collect_os_info:
+            LOG.warning("Reporting System Information")
+            from ..tools import collect_os_info
+            collect_os_info.report_system_information(args.config)
+            return 0
+
+        if args.clean_deleted:
+            LOG.warning('Cleaning up deleted relations')
+            from ..tools import admin
+            admin.clean_deleted_relations(args.config, age=args.clean_deleted)
+            return 0
+
+        return 1
+
+
+    def _warm(self, args: NominatimArgs) -> int:
+        LOG.warning('Warming database caches')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        try:
+            if args.target != 'search':
+                for _ in range(1000):
+                    api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
+                                address_details=True)
+
+            if args.target != 'reverse':
+                from ..tokenizer import factory as tokenizer_factory
+
+                tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+                with connect(args.config.get_libpq_dsn()) as conn:
+                    if conn.table_exists('search_name'):
+                        words = tokenizer.most_frequent_words(conn, 1000)
+                    else:
+                        words = []
+
+                for word in words:
+                    api.search(word)
+        finally:
+            api.close()
+
+        return 0
--- a/src/nominatim_db/clicmd/api.py
+++ b/src/nominatim_db/clicmd/api.py
@@ -0,0 +1,374 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Subcommand definitions for API calls from the command line.
+"""
+from typing import Dict, Any
+import argparse
+import logging
+import json
+import sys
+
+import nominatim_api as napi
+import nominatim_api.v1 as api_output
+from nominatim_api.v1.helpers import zoom_to_rank, deduplicate_results
+from nominatim_api.v1.format import dispatch as formatting
+import nominatim_api.logging as loglib
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+
+LOG = logging.getLogger()
+
+STRUCTURED_QUERY = (
+    ('amenity', 'name and/or type of POI'),
+    ('street', 'housenumber and street'),
+    ('city', 'city, town or village'),
+    ('county', 'county'),
+    ('state', 'state'),
+    ('country', 'country'),
+    ('postalcode', 'postcode')
+)
+
+EXTRADATA_PARAMS = (
+    ('addressdetails', 'Include a breakdown of the address into elements'),
+    ('extratags', ("Include additional information if available "
+                   "(e.g. wikipedia link, opening hours)")),
+    ('namedetails', 'Include a list of alternative names')
+)
+
+def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
+    group = parser.add_argument_group('Output arguments')
+    group.add_argument('--format', default='jsonv2',
+                       choices=formatting.list_formats(napi.SearchResults) + ['debug'],
+                       help='Format of result')
+    for name, desc in EXTRADATA_PARAMS:
+        group.add_argument('--' + name, action='store_true', help=desc)
+
+    group.add_argument('--lang', '--accept-language', metavar='LANGS',
+                       help='Preferred language order for presenting search results')
+    group.add_argument('--polygon-output',
+                       choices=['geojson', 'kml', 'svg', 'text'],
+                       help='Output geometry of results as a GeoJSON, KML, SVG or WKT')
+    group.add_argument('--polygon-threshold', type=float, default = 0.0,
+                       metavar='TOLERANCE',
+                       help=("Simplify output geometry."
+                             "Parameter is difference tolerance in degrees."))
+
+
+class APISearch:
+    """\
+    Execute a search query.
+
+    This command works exactly the same as if calling the /search endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Search/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--query',
+                           help='Free-form query string')
+        for name, desc in STRUCTURED_QUERY:
+            group.add_argument('--' + name, help='Structured query: ' + desc)
+
+        _add_api_output_arguments(parser)
+
+        group = parser.add_argument_group('Result limitation')
+        group.add_argument('--countrycodes', metavar='CC,..',
+                           help='Limit search results to one or more countries')
+        group.add_argument('--exclude_place_ids', metavar='ID,..',
+                           help='List of search object to be excluded')
+        group.add_argument('--limit', type=int, default=10,
+                           help='Limit the number of returned results')
+        group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
+                           help='Preferred area to find search results')
+        group.add_argument('--bounded', action='store_true',
+                           help='Strictly restrict results to viewbox area')
+
+        group = parser.add_argument_group('Other arguments')
+        group.add_argument('--no-dedupe', action='store_false', dest='dedupe',
+                           help='Do not remove duplicates from the result list')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
+                                  'address_details': True, # needed for display name
+                                  'geometry_output': args.get_geometry_output(),
+                                  'geometry_simplification': args.polygon_threshold,
+                                  'countries': args.countrycodes,
+                                  'excluded': args.exclude_place_ids,
+                                  'viewbox': args.viewbox,
+                                  'bounded_viewbox': args.bounded,
+                                  'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
+                                 }
+
+        if args.query:
+            results = api.search(args.query, **params)
+        else:
+            results = api.search_address(amenity=args.amenity,
+                                         street=args.street,
+                                         city=args.city,
+                                         county=args.county,
+                                         state=args.state,
+                                         postalcode=args.postalcode,
+                                         country=args.country,
+                                         **params)
+
+        if args.dedupe and len(results) > 1:
+            results = deduplicate_results(results, args.limit)
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        output = api_output.format_result(
+                    results,
+                    args.format,
+                    {'extratags': args.extratags,
+                     'namedetails': args.namedetails,
+                     'addressdetails': args.addressdetails})
+        if args.format != 'xml':
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+        else:
+            sys.stdout.write(output)
+        sys.stdout.write('\n')
+
+        return 0
+
+
+class APIReverse:
+    """\
+    Execute API reverse query.
+
+    This command works exactly the same as if calling the /reverse endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Reverse/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--lat', type=float, required=True,
+                           help='Latitude of coordinate to look up (in WGS84)')
+        group.add_argument('--lon', type=float, required=True,
+                           help='Longitude of coordinate to look up (in WGS84)')
+        group.add_argument('--zoom', type=int,
+                           help='Level of detail required for the address')
+        group.add_argument('--layer', metavar='LAYER',
+                           choices=[n.name.lower() for n in napi.DataLayer if n.name],
+                           action='append', required=False, dest='layers',
+                           help='OSM id to lookup in format <NRW><id> (may be repeated)')
+
+        _add_api_output_arguments(parser)
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        result = api.reverse(napi.Point(args.lon, args.lat),
+                             max_rank=zoom_to_rank(args.zoom or 18),
+                             layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
+                             address_details=True, # needed for display name
+                             geometry_output=args.get_geometry_output(),
+                             geometry_simplification=args.polygon_threshold,
+                             locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        if result:
+            output = api_output.format_result(
+                        napi.ReverseResults([result]),
+                        args.format,
+                        {'extratags': args.extratags,
+                         'namedetails': args.namedetails,
+                         'addressdetails': args.addressdetails})
+            if args.format != 'xml':
+                # reformat the result, so it is pretty-printed
+                json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+            else:
+                sys.stdout.write(output)
+            sys.stdout.write('\n')
+
+            return 0
+
+        LOG.error("Unable to geocode.")
+        return 42
+
+
+
+class APILookup:
+    """\
+    Execute API lookup query.
+
+    This command works exactly the same as if calling the /lookup endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Lookup/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--id', metavar='OSMID',
+                           action='append', required=True, dest='ids',
+                           help='OSM id to lookup in format <NRW><id> (may be repeated)')
+
+        _add_api_output_arguments(parser)
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        places = [napi.OsmID(o[0], int(o[1:])) for o in args.ids]
+
+        results = api.lookup(places,
+                             address_details=True, # needed for display name
+                             geometry_output=args.get_geometry_output(),
+                             geometry_simplification=args.polygon_threshold or 0.0,
+                             locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
+
+        output = api_output.format_result(
+                    results,
+                    args.format,
+                    {'extratags': args.extratags,
+                     'namedetails': args.namedetails,
+                     'addressdetails': args.addressdetails})
+        if args.format != 'xml':
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+        else:
+            sys.stdout.write(output)
+        sys.stdout.write('\n')
+
+        return 0
+
+
+class APIDetails:
+    """\
+    Execute API details query.
+
+    This command works exactly the same as if calling the /details endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Details/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Query arguments')
+        objs = group.add_mutually_exclusive_group(required=True)
+        objs.add_argument('--node', '-n', type=int,
+                          help="Look up the OSM node with the given ID.")
+        objs.add_argument('--way', '-w', type=int,
+                          help="Look up the OSM way with the given ID.")
+        objs.add_argument('--relation', '-r', type=int,
+                          help="Look up the OSM relation with the given ID.")
+        objs.add_argument('--place_id', '-p', type=int,
+                          help='Database internal identifier of the OSM object to look up')
+        group.add_argument('--class', dest='object_class',
+                           help=("Class type to disambiguated multiple entries "
+                                 "of the same object."))
+
+        group = parser.add_argument_group('Output arguments')
+        group.add_argument('--addressdetails', action='store_true',
+                           help='Include a breakdown of the address into elements')
+        group.add_argument('--keywords', action='store_true',
+                           help='Include a list of name keywords and address keywords')
+        group.add_argument('--linkedplaces', action='store_true',
+                           help='Include a details of places that are linked with this one')
+        group.add_argument('--hierarchy', action='store_true',
+                           help='Include details of places lower in the address hierarchy')
+        group.add_argument('--group_hierarchy', action='store_true',
+                           help='Group the places by type')
+        group.add_argument('--polygon_geojson', action='store_true',
+                           help='Include geometry of result')
+        group.add_argument('--lang', '--accept-language', metavar='LANGS',
+                           help='Preferred language order for presenting search results')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        place: napi.PlaceRef
+        if args.node:
+            place = napi.OsmID('N', args.node, args.object_class)
+        elif args.way:
+            place = napi.OsmID('W', args.way, args.object_class)
+        elif args.relation:
+            place = napi.OsmID('R', args.relation, args.object_class)
+        else:
+            assert args.place_id is not None
+            place = napi.PlaceID(args.place_id)
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
+        result = api.details(place,
+                             address_details=args.addressdetails,
+                             linked_places=args.linkedplaces,
+                             parented_places=args.hierarchy,
+                             keywords=args.keywords,
+                             geometry_output=napi.GeometryFormat.GEOJSON
+                                             if args.polygon_geojson
+                                             else napi.GeometryFormat.NONE,
+                            locales=locales)
+
+
+        if result:
+            output = api_output.format_result(
+                        result,
+                        'json',
+                        {'locales': locales,
+                         'group_hierarchy': args.group_hierarchy})
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
+            sys.stdout.write('\n')
+
+            return 0
+
+        LOG.error("Object not found in database.")
+        return 42
+
+
+class APIStatus:
+    """
+    Execute API status query.
+
+    This command works exactly the same as if calling the /status endpoint on
+    the web API. See the online documentation for more details on the
+    various parameters:
+    https://nominatim.org/release-docs/latest/api/Status/
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        formats = api_output.list_formats(napi.StatusResult)
+        group = parser.add_argument_group('API parameters')
+        group.add_argument('--format', default=formats[0], choices=formats,
+                           help='Format of result')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        status = napi.NominatimAPI(args.project_dir).status()
+        print(api_output.format_result(status, args.format, {}))
+        return 0
--- a/src/nominatim_db/clicmd/args.py
+++ b/src/nominatim_db/clicmd/args.py
@@ -0,0 +1,260 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Provides custom functions over command-line arguments.
+"""
+from typing import Optional, List, Dict, Any, Sequence, Tuple
+import argparse
+import logging
+from functools import reduce
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from nominatim_core.typing import Protocol
+import nominatim_api as napi
+
+LOG = logging.getLogger()
+
+class Subcommand(Protocol):
+    """
+    Interface to be implemented by classes implementing a CLI subcommand.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        """
+        Fill the given parser for the subcommand with the appropriate
+        parameters.
+        """
+
+    def run(self, args: 'NominatimArgs') -> int:
+        """
+        Run the subcommand with the given parsed arguments.
+        """
+
+
+class NominatimArgs:
+    """ Customized namespace class for the nominatim command line tool
+        to receive the command-line arguments.
+    """
+    # Basic environment set by root program.
+    config: Configuration
+    project_dir: Path
+
+    # Global switches
+    version: bool
+    subcommand: Optional[str]
+    command: Subcommand
+
+    # Shared parameters
+    osm2pgsql_cache: Optional[int]
+    socket_timeout: int
+
+    # Arguments added to all subcommands.
+    verbose: int
+    threads: Optional[int]
+
+    # Arguments to 'add-data'
+    file: Optional[str]
+    diff: Optional[str]
+    node: Optional[int]
+    way: Optional[int]
+    relation: Optional[int]
+    tiger_data: Optional[str]
+    use_main_api: bool
+
+    # Arguments to 'admin'
+    warm: bool
+    check_database: bool
+    migrate: bool
+    collect_os_info: bool
+    clean_deleted: str
+    analyse_indexing: bool
+    target: Optional[str]
+    osm_id: Optional[str]
+    place_id: Optional[int]
+
+    # Arguments to 'import'
+    osm_file: List[str]
+    continue_at: Optional[str]
+    reverse_only: bool
+    no_partitions: bool
+    no_updates: bool
+    offline: bool
+    ignore_errors: bool
+    index_noanalyse: bool
+    prepare_database: bool
+
+    # Arguments to 'index'
+    boundaries_only: bool
+    no_boundaries: bool
+    minrank: int
+    maxrank: int
+
+    # Arguments to 'export'
+    output_type: str
+    output_format: str
+    output_all_postcodes: bool
+    language: Optional[str]
+    restrict_to_country: Optional[str]
+
+    # Arguments to 'convert'
+    output: Path
+
+    # Arguments to 'refresh'
+    postcodes: bool
+    word_tokens: bool
+    word_counts: bool
+    address_levels: bool
+    functions: bool
+    wiki_data: bool
+    secondary_importance: bool
+    importance: bool
+    website: bool
+    diffs: bool
+    enable_debug_statements: bool
+    data_object: Sequence[Tuple[str, int]]
+    data_area: Sequence[Tuple[str, int]]
+
+    # Arguments to 'replication'
+    init: bool
+    update_functions: bool
+    check_for_updates: bool
+    once: bool
+    catch_up: bool
+    do_index: bool
+
+    # Arguments to 'serve'
+    server: str
+    engine: str
+
+    # Arguments to 'special-phrases
+    import_from_wiki: bool
+    import_from_csv: Optional[str]
+    no_replace: bool
+
+    # Arguments to all query functions
+    format: str
+    addressdetails: bool
+    extratags: bool
+    namedetails: bool
+    lang: Optional[str]
+    polygon_output: Optional[str]
+    polygon_threshold: Optional[float]
+
+    # Arguments to 'search'
+    query: Optional[str]
+    amenity: Optional[str]
+    street: Optional[str]
+    city: Optional[str]
+    county: Optional[str]
+    state: Optional[str]
+    country: Optional[str]
+    postalcode: Optional[str]
+    countrycodes: Optional[str]
+    exclude_place_ids: Optional[str]
+    limit: int
+    viewbox: Optional[str]
+    bounded: bool
+    dedupe: bool
+
+    # Arguments to 'reverse'
+    lat: float
+    lon: float
+    zoom: Optional[int]
+    layers: Optional[Sequence[str]]
+
+    # Arguments to 'lookup'
+    ids: Sequence[str]
+
+    # Arguments to 'details'
+    object_class: Optional[str]
+    linkedplaces: bool
+    hierarchy: bool
+    keywords: bool
+    polygon_geojson: bool
+    group_hierarchy: bool
+
+
+    def osm2pgsql_options(self, default_cache: int,
+                          default_threads: int) -> Dict[str, Any]:
+        """ Return the standard osm2pgsql options that can be derived
+            from the command line arguments. The resulting dict can be
+            further customized and then used in `run_osm2pgsql()`.
+        """
+        return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.config.lib_dir.osm2pgsql,
+                    osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
+                    osm2pgsql_style=self.config.get_import_style_file(),
+                    osm2pgsql_style_path=self.config.config_dir,
+                    threads=self.threads or default_threads,
+                    dsn=self.config.get_libpq_dsn(),
+                    flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
+                    tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
+                                     slim_index=self.config.TABLESPACE_OSM_INDEX,
+                                     main_data=self.config.TABLESPACE_PLACE_DATA,
+                                     main_index=self.config.TABLESPACE_PLACE_INDEX
+                                    )
+                   )
+
+
+    def get_osm_file_list(self) -> Optional[List[Path]]:
+        """ Return the --osm-file argument as a list of Paths or None
+            if no argument was given. The function also checks if the files
+            exist and raises a UsageError if one cannot be found.
+        """
+        if not self.osm_file:
+            return None
+
+        files = [Path(f) for f in self.osm_file]
+        for fname in files:
+            if not fname.is_file():
+                LOG.fatal("OSM file '%s' does not exist.", fname)
+                raise UsageError('Cannot access file.')
+
+        return files
+
+
+    def get_geometry_output(self) -> napi.GeometryFormat:
+        """ Get the requested geometry output format in a API-compatible
+            format.
+        """
+        if not self.polygon_output:
+            return napi.GeometryFormat.NONE
+        if self.polygon_output == 'geojson':
+            return napi.GeometryFormat.GEOJSON
+        if self.polygon_output == 'kml':
+            return napi.GeometryFormat.KML
+        if self.polygon_output == 'svg':
+            return napi.GeometryFormat.SVG
+        if self.polygon_output == 'text':
+            return napi.GeometryFormat.TEXT
+
+        try:
+            return napi.GeometryFormat[self.polygon_output.upper()]
+        except KeyError as exp:
+            raise UsageError(f"Unknown polygon output format '{self.polygon_output}'.") from exp
+
+
+    def get_locales(self, default: Optional[str]) -> napi.Locales:
+        """ Get the locales from the language parameter.
+        """
+        if self.lang:
+            return napi.Locales.from_accept_languages(self.lang)
+        if default:
+            return napi.Locales.from_accept_languages(default)
+
+        return napi.Locales()
+
+
+    def get_layers(self, default: napi.DataLayer) -> Optional[napi.DataLayer]:
+        """ Get the list of selected layers as a DataLayer enum.
+        """
+        if not self.layers:
+            return default
+
+        return reduce(napi.DataLayer.__or__,
+                      (napi.DataLayer[s.upper()] for s in self.layers))
--- a/src/nominatim_db/clicmd/convert.py
+++ b/src/nominatim_db/clicmd/convert.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'convert' subcommand.
+"""
+from typing import Set, Any, Union, Optional, Sequence
+import argparse
+import asyncio
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class WithAction(argparse.Action):
+    """ Special action that saves a list of flags, given on the command-line
+        as `--with-foo` or `--without-foo`.
+    """
+    def __init__(self, option_strings: Sequence[str], dest: Any,
+                 default: bool = True, **kwargs: Any) -> None:
+        if 'nargs' in kwargs:
+            raise ValueError("nargs not allowed.")
+        if option_strings is None:
+            raise ValueError("Positional parameter not allowed.")
+
+        self.dest_set = kwargs.pop('dest_set')
+        full_option_strings = []
+        for opt in option_strings:
+            if not opt.startswith('--'):
+                raise ValueError("short-form options not allowed")
+            if default:
+                self.dest_set.add(opt[2:])
+            full_option_strings.append(f"--with-{opt[2:]}")
+            full_option_strings.append(f"--without-{opt[2:]}")
+
+        super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
+
+
+    def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
+                 values: Union[str, Sequence[Any], None],
+                 option_string: Optional[str] = None) -> None:
+        assert option_string
+        if option_string.startswith('--with-'):
+            self.dest_set.add(option_string[7:])
+        if option_string.startswith('--without-'):
+            self.dest_set.discard(option_string[10:])
+
+
+class ConvertDB:
+    """ Convert an existing database into a different format. (EXPERIMENTAL)
+
+        Dump a read-only version of the database in a different format.
+        At the moment only a SQLite database suitable for reverse lookup
+        can be created.
+    """
+
+    def __init__(self) -> None:
+        self.options: Set[str] = set()
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        parser.add_argument('--format', default='sqlite',
+                            choices=('sqlite', ),
+                            help='Format of the output database (must be sqlite currently)')
+        parser.add_argument('--output', '-o', required=True, type=Path,
+                            help='File to write the database to.')
+        group = parser.add_argument_group('Switches to define database layout'
+                                          '(currently no effect)')
+        group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for reverse and lookup API'
+                                ' (default: enabled)')
+        group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for search API (default: disabled)')
+        group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for details API (default: enabled)')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.output.exists():
+            raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
+
+        if args.format == 'sqlite':
+            from ..tools import convert_sqlite
+
+            asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
+            return 0
+
+        return 1
--- a/src/nominatim_db/clicmd/export.py
+++ b/src/nominatim_db/clicmd/export.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'export' subcommand.
+"""
+from typing import Optional, List, cast
+import logging
+import argparse
+import asyncio
+import csv
+import sys
+
+import sqlalchemy as sa
+
+import nominatim_api as napi
+from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
+from nominatim_api.types import LookupDetails
+from nominatim_core.errors import UsageError
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+# Needed for SQLAlchemy
+# pylint: disable=singleton-comparison
+
+LOG = logging.getLogger()
+
+RANK_RANGE_MAP = {
+  'country': (4, 4),
+  'state': (5, 9),
+  'county': (10, 12),
+  'city': (13, 16),
+  'suburb': (17, 21),
+  'street': (26, 26),
+  'path': (27, 27)
+}
+
+RANK_TO_OUTPUT_MAP = {
+    4: 'country',
+    5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+    10: 'county', 11: 'county', 12: 'county',
+    13: 'city', 14: 'city', 15: 'city', 16: 'city',
+    17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
+    26: 'street', 27: 'path'}
+
+class QueryExport:
+    """\
+    Export places as CSV file from the database.
+
+
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Output arguments')
+        group.add_argument('--output-type', default='street',
+                           choices=('country', 'state', 'county',
+                                    'city', 'suburb', 'street', 'path'),
+                           help='Type of places to output (default: street)')
+        group.add_argument('--output-format',
+                           default='street;suburb;city;county;state;country',
+                           help=("Semicolon-separated list of address types "
+                                 "(see --output-type). Additionally accepts:"
+                                 "placeid,postcode"))
+        group.add_argument('--language',
+                           help=("Preferred language for output "
+                                 "(use local name, if omitted)"))
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
+                           help='Export only objects within country')
+        group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
+                           dest='node',
+                           help='Export only children of this OSM node')
+        group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
+                           dest='way',
+                           help='Export only children of this OSM way')
+        group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
+                           dest='relation',
+                           help='Export only children of this OSM relation')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        return asyncio.run(export(args))
+
+
+async def export(args: NominatimArgs) -> int:
+    """ The actual export as a asynchronous function.
+    """
+
+    api = napi.NominatimAPIAsync(args.project_dir)
+
+    try:
+        output_range = RANK_RANGE_MAP[args.output_type]
+
+        writer = init_csv_writer(args.output_format)
+
+        async with api.begin() as conn, api.begin() as detail_conn:
+            t = conn.t.placex
+
+            sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                        t.c.osm_type, t.c.osm_id, t.c.name,
+                        t.c.class_, t.c.type, t.c.admin_level,
+                        t.c.address, t.c.extratags,
+                        t.c.housenumber, t.c.postcode, t.c.country_code,
+                        t.c.importance, t.c.wikipedia, t.c.indexed_date,
+                        t.c.rank_address, t.c.rank_search,
+                        t.c.centroid)\
+                     .where(t.c.linked_place_id == None)\
+                     .where(t.c.rank_address.between(*output_range))
+
+            parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
+            if parent_place_id:
+                taddr = conn.t.addressline
+
+                sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
+                         .where(taddr.c.address_place_id == parent_place_id)\
+                         .where(taddr.c.isaddress)
+
+            if args.restrict_to_country:
+                sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
+
+            results = []
+            for row in await conn.execute(sql):
+                result = create_from_placex_row(row, ReverseResult)
+                if result is not None:
+                    results.append(result)
+
+                if len(results) == 1000:
+                    await dump_results(detail_conn, results, writer, args.language)
+                    results = []
+
+            if results:
+                await dump_results(detail_conn, results, writer, args.language)
+    finally:
+        await api.close()
+
+    return 0
+
+
+def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
+    fields = output_format.split(';')
+    writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
+    writer.writeheader()
+
+    return writer
+
+
+async def dump_results(conn: napi.SearchConnection,
+                       results: List[ReverseResult],
+                       writer: 'csv.DictWriter[str]',
+                       lang: Optional[str]) -> None:
+    locale = napi.Locales([lang] if lang else None)
+    await add_result_details(conn, results,
+                             LookupDetails(address_details=True, locales=locale))
+
+
+    for result in results:
+        data = {'placeid': result.place_id,
+                'postcode': result.postcode}
+
+        for line in (result.address_rows or []):
+            if line.isaddress and line.local_name:
+                if line.category[1] == 'postcode':
+                    data['postcode'] = line.local_name
+                elif line.rank_address in RANK_TO_OUTPUT_MAP:
+                    data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
+
+        writer.writerow(data)
+
+
+async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
+                        way_id: Optional[int],
+                        relation_id: Optional[int]) -> Optional[int]:
+    """ Get the place ID for the given OSM object.
+    """
+    if node_id is not None:
+        osm_type, osm_id = 'N', node_id
+    elif way_id is not None:
+        osm_type, osm_id = 'W', way_id
+    elif relation_id is not None:
+        osm_type, osm_id = 'R', relation_id
+    else:
+        return None
+
+    t = conn.t.placex
+    sql = sa.select(t.c.place_id).limit(1)\
+            .where(t.c.osm_type == osm_type)\
+            .where(t.c.osm_id == osm_id)\
+            .where(t.c.rank_address > 0)\
+            .order_by(t.c.rank_address)
+
+    for result in await conn.execute(sql):
+        return cast(int, result[0])
+
+    raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
--- a/src/nominatim_db/clicmd/freeze.py
+++ b/src/nominatim_db/clicmd/freeze.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'freeze' subcommand.
+"""
+import argparse
+
+from nominatim_core.db.connection import connect
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class SetupFreeze:
+    """\
+    Make database read-only.
+
+    About half of data in the Nominatim database is kept only to be able to
+    keep the data up-to-date with new changes made in OpenStreetMap. This
+    command drops all this data and only keeps the part needed for geocoding
+    itself.
+
+    This command has the same effect as the `--no-updates` option for imports.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        pass # No options
+
+
+    def run(self, args: NominatimArgs) -> int:
+        from ..tools import freeze
+
+        with connect(args.config.get_libpq_dsn()) as conn:
+            freeze.drop_update_tables(conn)
+        freeze.drop_flatnode_file(args.config.get_path('FLATNODE_FILE'))
+
+        return 0
--- a/src/nominatim_db/clicmd/index.py
+++ b/src/nominatim_db/clicmd/index.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'index' subcommand.
+"""
+import argparse
+
+import psutil
+
+from nominatim_core.db import status
+from nominatim_core.db.connection import connect
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+
+class UpdateIndex:
+    """\
+    Reindex all new and modified data.
+
+    Indexing is the process of computing the address and search terms for
+    the places in the database. Every time data is added or changed, indexing
+    needs to be run. Imports and replication updates automatically take care
+    of indexing. For other cases, this function allows to run indexing manually.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--boundaries-only', action='store_true',
+                           help="""Index only administrative boundaries.""")
+        group.add_argument('--no-boundaries', action='store_true',
+                           help="""Index everything except administrative boundaries.""")
+        group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
+                           help='Minimum/starting rank')
+        group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
+                           help='Maximum/finishing rank')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        from ..indexer.indexer import Indexer
+        from ..tokenizer import factory as tokenizer_factory
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+        indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
+                          args.threads or psutil.cpu_count() or 1)
+
+        if not args.no_boundaries:
+            indexer.index_boundaries(args.minrank, args.maxrank)
+        if not args.boundaries_only:
+            indexer.index_by_rank(args.minrank, args.maxrank)
+            indexer.index_postcodes()
+
+        if not args.no_boundaries and not args.boundaries_only \
+           and args.minrank == 0 and args.maxrank == 30:
+            with connect(args.config.get_libpq_dsn()) as conn:
+                status.set_indexed(conn, True)
+
+        return 0
--- a/src/nominatim_db/clicmd/refresh.py
+++ b/src/nominatim_db/clicmd/refresh.py
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of 'refresh' subcommand.
+"""
+from typing import Tuple, Optional
+import argparse
+import logging
+from pathlib import Path
+
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect
+from ..tokenizer.base import AbstractTokenizer
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+def _parse_osm_object(obj: str) -> Tuple[str, int]:
+    """ Parse the given argument into a tuple of OSM type and ID.
+        Raises an ArgumentError if the format is not recognized.
+    """
+    if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
+        raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
+
+    return (obj[0].upper(), int(obj[1:]))
+
+
+class UpdateRefresh:
+    """\
+    Recompute auxiliary data used by the indexing process.
+
+    This sub-commands updates various static data and functions in the database.
+    It usually needs to be run after changing various aspects of the
+    configuration. The configuration documentation will mention the exact
+    command to use in such case.
+
+    Warning: the 'update' command must not be run in parallel with other update
+             commands like 'replication' or 'add-data'.
+    """
+    def __init__(self) -> None:
+        self.tokenizer: Optional[AbstractTokenizer] = None
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Data arguments')
+        group.add_argument('--postcodes', action='store_true',
+                           help='Update postcode centroid table')
+        group.add_argument('--word-tokens', action='store_true',
+                           help='Clean up search terms')
+        group.add_argument('--word-counts', action='store_true',
+                           help='Compute frequency of full-word search terms')
+        group.add_argument('--address-levels', action='store_true',
+                           help='Reimport address level configuration')
+        group.add_argument('--functions', action='store_true',
+                           help='Update the PL/pgSQL functions in the database')
+        group.add_argument('--wiki-data', action='store_true',
+                           help='Update Wikipedia/data importance numbers')
+        group.add_argument('--secondary-importance', action='store_true',
+                           help='Update secondary importance raster data')
+        group.add_argument('--importance', action='store_true',
+                           help='Recompute place importances (expensive!)')
+        group.add_argument('--website', action='store_true',
+                           help='Refresh the directory that serves the scripts for the web API')
+        group.add_argument('--data-object', action='append',
+                           type=_parse_osm_object, metavar='OBJECT',
+                           help='Mark the given OSM object as requiring an update'
+                                ' (format: [NWR]<id>)')
+        group.add_argument('--data-area', action='append',
+                           type=_parse_osm_object, metavar='OBJECT',
+                           help='Mark the area around the given OSM object as requiring an update'
+                                ' (format: [NWR]<id>)')
+
+        group = parser.add_argument_group('Arguments for function refresh')
+        group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
+                           help='Do not enable code for propagating updates')
+        group.add_argument('--enable-debug-statements', action='store_true',
+                           help='Enable debug warning statements in functions')
+
+
+    def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
+        from ..tools import refresh, postcodes
+        from ..indexer.indexer import Indexer
+
+        need_function_refresh = args.functions
+
+        if args.postcodes:
+            if postcodes.can_compute(args.config.get_libpq_dsn()):
+                LOG.warning("Update postcodes centroid")
+                tokenizer = self._get_tokenizer(args.config)
+                postcodes.update_postcodes(args.config.get_libpq_dsn(),
+                                           args.project_dir, tokenizer)
+                indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
+                                  args.threads or 1)
+                indexer.index_postcodes()
+            else:
+                LOG.error("The place table doesn't exist. "
+                          "Postcode updates on a frozen database is not possible.")
+
+        if args.word_tokens:
+            LOG.warning('Updating word tokens')
+            tokenizer = self._get_tokenizer(args.config)
+            tokenizer.update_word_tokens()
+
+        if args.word_counts:
+            LOG.warning('Recompute word statistics')
+            self._get_tokenizer(args.config).update_statistics(args.config,
+                                                               threads=args.threads or 1)
+
+        if args.address_levels:
+            LOG.warning('Updating address levels')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.load_address_levels_from_config(conn, args.config)
+
+        # Attention: must come BEFORE functions
+        if args.secondary_importance:
+            with connect(args.config.get_libpq_dsn()) as conn:
+                # If the table did not exist before, then the importance code
+                # needs to be enabled.
+                if not conn.table_exists('secondary_importance'):
+                    args.functions = True
+
+            LOG.warning('Import secondary importance raster data from %s', args.project_dir)
+            if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+                                                args.project_dir) > 0:
+                LOG.fatal('FATAL: Cannot update secondary importance raster data')
+                return 1
+            need_function_refresh = True
+
+        if args.wiki_data:
+            data_path = Path(args.config.WIKIPEDIA_DATA_PATH
+                             or args.project_dir)
+            LOG.warning('Import wikipedia article importance from %s', data_path)
+            if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+                                                 data_path) > 0:
+                LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
+                return 1
+            need_function_refresh = True
+
+        if need_function_refresh:
+            LOG.warning('Create functions')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.create_functions(conn, args.config,
+                                         args.diffs, args.enable_debug_statements)
+                self._get_tokenizer(args.config).update_sql_functions(args.config)
+
+        # Attention: importance MUST come after wiki data import and after functions.
+        if args.importance:
+            LOG.warning('Update importance values for database')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.recompute_importance(conn)
+
+        if args.website:
+            webdir = args.project_dir / 'website'
+            LOG.warning('Setting up website directory at %s', webdir)
+            # This is a little bit hacky: call the tokenizer setup, so that
+            # the tokenizer directory gets repopulated as well, in case it
+            # wasn't there yet.
+            self._get_tokenizer(args.config)
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.setup_website(webdir, args.config, conn)
+
+        if args.data_object or args.data_area:
+            with connect(args.config.get_libpq_dsn()) as conn:
+                for obj in args.data_object or []:
+                    refresh.invalidate_osm_object(*obj, conn, recursive=False)
+                for obj in args.data_area or []:
+                    refresh.invalidate_osm_object(*obj, conn, recursive=True)
+                conn.commit()
+
+        return 0
+
+
+    def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
+        if self.tokenizer is None:
+            from ..tokenizer import factory as tokenizer_factory
+
+            self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+
+        return self.tokenizer
--- a/src/nominatim_db/clicmd/replication.py
+++ b/src/nominatim_db/clicmd/replication.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'replication' sub-command.
+"""
+from typing import Optional
+import argparse
+import datetime as dt
+import logging
+import socket
+import time
+
+from nominatim_core.db import status
+from nominatim_core.db.connection import connect
+from nominatim_core.errors import UsageError
+from .args import NominatimArgs
+
+LOG = logging.getLogger()
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to make pyosmium optional for replication only.
+# pylint: disable=C0415
+
+class UpdateReplication:
+    """\
+    Update the database using an online replication service.
+
+    An OSM replication service is an online service that provides regular
+    updates (OSM diff files) for the planet or update they provide. The OSMF
+    provides the primary replication service for the full planet at
+    https://planet.osm.org/replication/ but there are other providers of
+    extracts of OSM data who provide such a service as well.
+
+    This sub-command allows to set up such a replication service and download
+    and import updates at regular intervals. You need to call '--init' once to
+    set up the process or whenever you change the replication configuration
+    parameters. Without any arguments, the sub-command will go into a loop and
+    continuously apply updates as they become available. Giving `--once` just
+    downloads and imports the next batch of updates.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Arguments for initialisation')
+        group.add_argument('--init', action='store_true',
+                           help='Initialise the update process')
+        group.add_argument('--no-update-functions', dest='update_functions',
+                           action='store_false',
+                           help="Do not update the trigger function to "
+                                "support differential updates (EXPERT)")
+        group = parser.add_argument_group('Arguments for updates')
+        group.add_argument('--check-for-updates', action='store_true',
+                           help='Check if new updates are available and exit')
+        group.add_argument('--once', action='store_true',
+                           help="Download and apply updates only once. When "
+                                "not set, updates are continuously applied")
+        group.add_argument('--catch-up', action='store_true',
+                           help="Download and apply updates until no new "
+                                "data is available on the server")
+        group.add_argument('--no-index', action='store_false', dest='do_index',
+                           help=("Do not index the new data. Only usable "
+                                 "together with --once"))
+        group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                           help='Size of cache to be used by osm2pgsql (in MB)')
+        group = parser.add_argument_group('Download parameters')
+        group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
+                           help='Set timeout for file downloads')
+
+
+    def _init_replication(self, args: NominatimArgs) -> int:
+        from ..tools import replication, refresh
+
+        LOG.warning("Initialising replication updates")
+        with connect(args.config.get_libpq_dsn()) as conn:
+            replication.init_replication(conn, base_url=args.config.REPLICATION_URL,
+                                         socket_timeout=args.socket_timeout)
+            if args.update_functions:
+                LOG.warning("Create functions")
+                refresh.create_functions(conn, args.config, True, False)
+        return 0
+
+
+    def _check_for_updates(self, args: NominatimArgs) -> int:
+        from ..tools import replication
+
+        with connect(args.config.get_libpq_dsn()) as conn:
+            return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL,
+                                                 socket_timeout=args.socket_timeout)
+
+
+    def _report_update(self, batchdate: dt.datetime,
+                       start_import: dt.datetime,
+                       start_index: Optional[dt.datetime]) -> None:
+        def round_time(delta: dt.timedelta) -> dt.timedelta:
+            return dt.timedelta(seconds=int(delta.total_seconds()))
+
+        end = dt.datetime.now(dt.timezone.utc)
+        LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
+                    round_time((start_index or end) - start_import),
+                    f"Indexing: {round_time(end - start_index)} " if start_index else '',
+                    round_time(end - start_import),
+                    round_time(end - batchdate))
+
+
+    def _compute_update_interval(self, args: NominatimArgs) -> int:
+        if args.catch_up:
+            return 0
+
+        update_interval = args.config.get_int('REPLICATION_UPDATE_INTERVAL')
+        # Sanity check to not overwhelm the Geofabrik servers.
+        if 'download.geofabrik.de' in args.config.REPLICATION_URL\
+           and update_interval < 86400:
+            LOG.fatal("Update interval too low for download.geofabrik.de.\n"
+                      "Please check install documentation "
+                      "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
+                      "setting-up-the-update-process).")
+            raise UsageError("Invalid replication update interval setting.")
+
+        return update_interval
+
+
+    def _update(self, args: NominatimArgs) -> None:
+        # pylint: disable=too-many-locals
+        from ..tools import replication
+        from ..indexer.indexer import Indexer
+        from ..tokenizer import factory as tokenizer_factory
+
+        update_interval = self._compute_update_interval(args)
+
+        params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
+        params.update(base_url=args.config.REPLICATION_URL,
+                      update_interval=update_interval,
+                      import_file=args.project_dir / 'osmosischange.osc',
+                      max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
+                      indexed_only=not args.once)
+
+        if not args.once:
+            if not args.do_index:
+                LOG.fatal("Indexing cannot be disabled when running updates continuously.")
+                raise UsageError("Bad argument '--no-index'.")
+        recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+        indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)
+
+        dsn = args.config.get_libpq_dsn()
+
+        while True:
+            start = dt.datetime.now(dt.timezone.utc)
+            state = replication.update(dsn, params, socket_timeout=args.socket_timeout)
+
+            with connect(dsn) as conn:
+                if state is not replication.UpdateState.NO_CHANGES:
+                    status.log_status(conn, start, 'import')
+                batchdate, _, _ = status.get_status(conn)
+                conn.commit()
+
+            if state is not replication.UpdateState.NO_CHANGES and args.do_index:
+                index_start = dt.datetime.now(dt.timezone.utc)
+                indexer.index_full(analyse=False)
+
+                with connect(dsn) as conn:
+                    status.set_indexed(conn, True)
+                    status.log_status(conn, index_start, 'index')
+                    conn.commit()
+            else:
+                index_start = None
+
+            if state is replication.UpdateState.NO_CHANGES and \
+               args.catch_up or update_interval > 40*60:
+                while indexer.has_pending():
+                    indexer.index_full(analyse=False)
+
+            if LOG.isEnabledFor(logging.WARNING):
+                assert batchdate is not None
+                self._report_update(batchdate, start, index_start)
+
+            if args.once or (args.catch_up and state is replication.UpdateState.NO_CHANGES):
+                break
+
+            if state is replication.UpdateState.NO_CHANGES:
+                LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
+                time.sleep(recheck_interval)
+
+
+    def run(self, args: NominatimArgs) -> int:
+        socket.setdefaulttimeout(args.socket_timeout)
+
+        if args.init:
+            return self._init_replication(args)
+
+        if args.check_for_updates:
+            return self._check_for_updates(args)
+
+        self._update(args)
+        return 0
--- a/src/nominatim_db/clicmd/setup.py
+++ b/src/nominatim_db/clicmd/setup.py
@@ -0,0 +1,229 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'import' subcommand.
+"""
+from typing import Optional
+import argparse
+import logging
+from pathlib import Path
+
+import psutil
+
+from nominatim_core.errors import UsageError
+from nominatim_core.config import Configuration
+from nominatim_core.db.connection import connect
+from nominatim_core.db import status, properties
+from ..tokenizer.base import AbstractTokenizer
+from ..version import NOMINATIM_VERSION
+from .args import NominatimArgs
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=C0415
+
+LOG = logging.getLogger()
+
+class SetupAll:
+    """\
+    Create a new Nominatim database from an OSM file.
+
+    This sub-command sets up a new Nominatim database from scratch starting
+    with creating a new database in Postgresql. The user running this command
+    needs superuser rights on the database.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group1 = parser.add_argument_group('Required arguments')
+        group1.add_argument('--osm-file', metavar='FILE', action='append',
+                           help='OSM file to be imported'
+                                ' (repeat for importing multiple files)',
+                                default=None)
+        group1.add_argument('--continue', dest='continue_at',
+                           choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
+                           help='Continue an import that was interrupted',
+                           default=None)
+        group2 = parser.add_argument_group('Optional arguments')
+        group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                           help='Size of cache to be used by osm2pgsql (in MB)')
+        group2.add_argument('--reverse-only', action='store_true',
+                           help='Do not create tables and indexes for searching')
+        group2.add_argument('--no-partitions', action='store_true',
+                           help=("Do not partition search indices "
+                                 "(speeds up import of single country extracts)"))
+        group2.add_argument('--no-updates', action='store_true',
+                           help="Do not keep tables that are only needed for "
+                                "updating the database later")
+        group2.add_argument('--offline', action='store_true',
+                            help="Do not attempt to load any additional data from the internet")
+        group3 = parser.add_argument_group('Expert options')
+        group3.add_argument('--ignore-errors', action='store_true',
+                           help='Continue import even when errors in SQL are present')
+        group3.add_argument('--index-noanalyse', action='store_true',
+                           help='Do not perform analyse operations during index (expert only)')
+        group3.add_argument('--prepare-database', action='store_true',
+                            help='Create the database but do not import any data')
+
+
+    def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
+        from ..data import country_info
+        from ..tools import database_import, refresh, postcodes, freeze
+        from ..indexer.indexer import Indexer
+
+        num_threads = args.threads or psutil.cpu_count() or 1
+
+        country_info.setup_country_config(args.config)
+
+        if args.osm_file is None and args.continue_at is None and not args.prepare_database:
+            raise UsageError("No input files (use --osm-file).")
+
+        if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
+            raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
+
+        if args.continue_at is not None and args.prepare_database:
+            raise UsageError(
+                "Cannot use --continue and --prepare-database together."
+            )
+
+
+        if args.prepare_database or args.continue_at is None:
+            LOG.warning('Creating database')
+            database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
+                                                        rouser=args.config.DATABASE_WEBUSER)
+            if args.prepare_database:
+                return 0
+
+        if args.continue_at in (None, 'import-from-file'):
+            files = args.get_osm_file_list()
+            if not files:
+                raise UsageError("No input files (use --osm-file).")
+
+            if args.continue_at in ('import-from-file', None):
+                # Check if the correct plugins are installed
+                database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
+                LOG.warning('Setting up country tables')
+                country_info.setup_country_tables(args.config.get_libpq_dsn(),
+                                                args.config.lib_dir.data,
+                                                args.no_partitions)
+
+                LOG.warning('Importing OSM data file')
+                database_import.import_osm_data(files,
+                                                args.osm2pgsql_options(0, 1),
+                                                drop=args.no_updates,
+                                                ignore_errors=args.ignore_errors)
+
+                LOG.warning('Importing wikipedia importance data')
+                data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
+                if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+                                                    data_path) > 0:
+                    LOG.error('Wikipedia importance dump file not found. '
+                            'Calculating importance values of locations will not '
+                            'use Wikipedia importance data.')
+
+                LOG.warning('Importing secondary importance raster data')
+                if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+                                                    args.project_dir) != 0:
+                    LOG.error('Secondary importance file not imported. '
+                            'Falling back to default ranking.')
+
+                self._setup_tables(args.config, args.reverse_only)
+
+        if args.continue_at in ('import-from-file', 'load-data', None):
+            LOG.warning('Initialise tables')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                database_import.truncate_data_tables(conn)
+
+            LOG.warning('Load data into placex table')
+            database_import.load_data(args.config.get_libpq_dsn(), num_threads)
+
+        LOG.warning("Setting up tokenizer")
+        tokenizer = self._get_tokenizer(args.continue_at, args.config)
+
+        if args.continue_at in ('import-from-file', 'load-data', None):
+            LOG.warning('Calculate postcodes')
+            postcodes.update_postcodes(args.config.get_libpq_dsn(),
+                                       args.project_dir, tokenizer)
+
+        if args.continue_at in \
+            ('import-from-file', 'load-data', 'indexing', None):
+            LOG.warning('Indexing places')
+            indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
+            indexer.index_full(analyse=not args.index_noanalyse)
+
+        LOG.warning('Post-process tables')
+        with connect(args.config.get_libpq_dsn()) as conn:
+            database_import.create_search_indices(conn, args.config,
+                                                  drop=args.no_updates,
+                                                  threads=num_threads)
+            LOG.warning('Create search index for default country names.')
+            country_info.create_country_names(conn, tokenizer,
+                                              args.config.get_str_list('LANGUAGES'))
+            if args.no_updates:
+                freeze.drop_update_tables(conn)
+        tokenizer.finalize_import(args.config)
+
+        LOG.warning('Recompute word counts')
+        tokenizer.update_statistics(args.config, threads=num_threads)
+
+        webdir = args.project_dir / 'website'
+        LOG.warning('Setup website at %s', webdir)
+        with connect(args.config.get_libpq_dsn()) as conn:
+            refresh.setup_website(webdir, args.config, conn)
+
+        self._finalize_database(args.config.get_libpq_dsn(), args.offline)
+
+        return 0
+
+
+    def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
+        """ Set up the basic database layout: tables, indexes and functions.
+        """
+        from ..tools import database_import, refresh
+
+        with connect(config.get_libpq_dsn()) as conn:
+            LOG.warning('Create functions (1st pass)')
+            refresh.create_functions(conn, config, False, False)
+            LOG.warning('Create tables')
+            database_import.create_tables(conn, config, reverse_only=reverse_only)
+            refresh.load_address_levels_from_config(conn, config)
+            LOG.warning('Create functions (2nd pass)')
+            refresh.create_functions(conn, config, False, False)
+            LOG.warning('Create table triggers')
+            database_import.create_table_triggers(conn, config)
+            LOG.warning('Create partition tables')
+            database_import.create_partition_tables(conn, config)
+            LOG.warning('Create functions (3rd pass)')
+            refresh.create_functions(conn, config, False, False)
+
+
+    def _get_tokenizer(self, continue_at: Optional[str],
+                       config: Configuration) -> AbstractTokenizer:
+        """ Set up a new tokenizer or load an already initialised one.
+        """
+        from ..tokenizer import factory as tokenizer_factory
+
+        if continue_at in ('import-from-file', 'load-data', None):
+            # (re)initialise the tokenizer data
+            return tokenizer_factory.create_tokenizer(config)
+
+        # just load the tokenizer
+        return tokenizer_factory.get_tokenizer_for_db(config)
+
+
+    def _finalize_database(self, dsn: str, offline: bool) -> None:
+        """ Determine the database date and set the status accordingly.
+        """
+        with connect(dsn) as conn:
+            properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
+
+            try:
+                dbdate = status.compute_database_date(conn, offline)
+                status.set_status(conn, dbdate)
+                LOG.info('Database is at %s.', dbdate)
+            except Exception as exc: # pylint: disable=broad-except
+                LOG.error('Cannot determine date of database: %s', exc)
--- a/src/nominatim_db/clicmd/special_phrases.py
+++ b/src/nominatim_db/clicmd/special_phrases.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+    Implementation of the 'special-phrases' command.
+"""
+import argparse
+import logging
+from pathlib import Path
+
+from nominatim_core.errors import UsageError
+from nominatim_core.db.connection import connect
+from ..tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
+from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
+from ..tools.special_phrases.sp_csv_loader import SPCsvLoader
+from .args import NominatimArgs
+
+LOG = logging.getLogger()
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class ImportSpecialPhrases:
+    """\
+    Import special phrases.
+
+    Special phrases are search terms that narrow down the type of object
+    that should be searched. For example, you might want to search for
+    'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
+    in many languages, which can be imported with this command.
+
+    You can also provide your own phrases in a CSV file. The file needs to have
+    the following five columns:
+     * phrase - the term expected for searching
+     * class - the OSM tag key of the object type
+     * type - the OSM tag value of the object type
+     * operator - the kind of search to be done (one of: in, near, name, -)
+     * plural - whether the term is a plural or not (Y/N)
+
+    An example file can be found in the Nominatim sources at
+    'test/testdb/full_en_phrases_test.csv'.
+
+    The import can be further configured to ignore specific key/value pairs.
+    This is particularly useful when importing phrases from the wiki. The
+    default configuration excludes some very common tags like building=yes.
+    The configuration can be customized by putting a file `phrase-settings.json`
+    with custom rules into the project directory or by using the `--config`
+    option to point to another configuration file.
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Input arguments')
+        group.add_argument('--import-from-wiki', action='store_true',
+                           help='Import special phrases from the OSM wiki to the database')
+        group.add_argument('--import-from-csv', metavar='FILE',
+                           help='Import special phrases from a CSV file')
+        group.add_argument('--no-replace', action='store_true',
+                           help='Keep the old phrases and only add the new ones')
+
+
+    def run(self, args: NominatimArgs) -> int:
+
+        if args.import_from_wiki:
+            self.start_import(args, SPWikiLoader(args.config))
+
+        if args.import_from_csv:
+            if not Path(args.import_from_csv).is_file():
+                LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv)
+                raise UsageError('Cannot access file.')
+
+            self.start_import(args, SPCsvLoader(args.import_from_csv))
+
+        return 0
+
+
+    def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None:
+        """
+            Create the SPImporter object containing the right
+            sp loader and then start the import of special phrases.
+        """
+        from ..tokenizer import factory as tokenizer_factory
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+        should_replace = not args.no_replace
+        with connect(args.config.get_libpq_dsn()) as db_connection:
+            SPImporter(
+                args.config, db_connection, loader
+            ).import_phrases(tokenizer, should_replace)