split code into submodules

This commit is contained in:
Sarah Hoffmann
2024-05-16 11:55:17 +02:00
parent 0fb4fe8e4d
commit 6e89310a92
137 changed files with 757 additions and 716 deletions

View File

@@ -0,0 +1,28 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Subcommand definitions for the command-line tool.
"""
# mypy and pylint disagree about the style of explicit exports,
# see https://github.com/PyCQA/pylint/issues/6006.
# pylint: disable=useless-import-alias
from .setup import SetupAll as SetupAll
from .replication import UpdateReplication as UpdateReplication
from .api import (APISearch as APISearch,
APIReverse as APIReverse,
APILookup as APILookup,
APIDetails as APIDetails,
APIStatus as APIStatus)
from .index import UpdateIndex as UpdateIndex
from .refresh import UpdateRefresh as UpdateRefresh
from .add_data import UpdateAddData as UpdateAddData
from .admin import AdminFuncs as AdminFuncs
from .freeze import SetupFreeze as SetupFreeze
from .special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
from .export import QueryExport as QueryExport
from .convert import ConvertDB as ConvertDB

View File

@@ -0,0 +1,101 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'add-data' subcommand.
"""
from typing import cast
import argparse
import logging
import psutil
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class UpdateAddData:
"""\
Add additional data from a file or an online source.
This command allows to add or update the search data in the database.
The data can come either from an OSM file or single OSM objects can
directly be downloaded from the OSM API. This function only loads the
data into the database. Afterwards it still needs to be integrated
in the search index. Use the `nominatim index` command for that.
The command can also be used to add external non-OSM data to the
database. At the moment the only supported format is TIGER housenumber
data. See the online documentation at
https://nominatim.org/release-docs/latest/admin/Import/#installing-tiger-housenumber-data-for-the-us
for more information.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group_name = parser.add_argument_group('Source')
group1 = group_name.add_mutually_exclusive_group(required=True)
group1.add_argument('--file', metavar='FILE',
help='Import data from an OSM file or diff file')
group1.add_argument('--diff', metavar='FILE',
help='Import data from an OSM diff file (deprecated: use --file)')
group1.add_argument('--node', metavar='ID', type=int,
help='Import a single node from the API')
group1.add_argument('--way', metavar='ID', type=int,
help='Import a single way from the API')
group1.add_argument('--relation', metavar='ID', type=int,
help='Import a single relation from the API')
group1.add_argument('--tiger-data', metavar='DIR',
help='Add housenumbers from the US TIGER census database')
group2 = parser.add_argument_group('Extra arguments')
group2.add_argument('--use-main-api', action='store_true',
help='Use OSM API instead of Overpass to download objects')
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group2.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
help='Set timeout for file downloads')
def run(self, args: NominatimArgs) -> int:
from ..tokenizer import factory as tokenizer_factory
from ..tools import tiger_data, add_osm_data
if args.tiger_data:
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
return tiger_data.add_tiger_data(args.tiger_data,
args.config,
args.threads or psutil.cpu_count() or 1,
tokenizer)
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
if args.file or args.diff:
return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
cast(str, args.file or args.diff),
osm2pgsql_params)
if args.node:
return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
'node', args.node,
args.use_main_api,
osm2pgsql_params)
if args.way:
return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
'way', args.way,
args.use_main_api,
osm2pgsql_params)
if args.relation:
return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
'relation', args.relation,
args.use_main_api,
osm2pgsql_params)
return 0

View File

@@ -0,0 +1,123 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'admin' subcommand.
"""
import logging
import argparse
import random
import nominatim_api as napi
from nominatim_core.db.connection import connect
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class AdminFuncs:
"""\
Analyse and maintain the database.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Admin tasks')
objs = group.add_mutually_exclusive_group(required=True)
objs.add_argument('--warm', action='store_true',
help='Warm database caches for search and reverse queries')
objs.add_argument('--check-database', action='store_true',
help='Check that the database is complete and operational')
objs.add_argument('--migrate', action='store_true',
help='Migrate the database to a new software version')
objs.add_argument('--analyse-indexing', action='store_true',
help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information")
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
help='Clean up deleted relations')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
help="Only pre-warm tables for search queries")
group.add_argument('--reverse-only', action='store_const', dest='target',
const='reverse',
help="Only pre-warm tables for reverse queries")
group = parser.add_argument_group('Arguments for index anaysis')
mgroup = group.add_mutually_exclusive_group()
mgroup.add_argument('--osm-id', type=str,
help='Analyse indexing of the given OSM object')
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
def run(self, args: NominatimArgs) -> int:
# pylint: disable=too-many-return-statements
if args.warm:
return self._warm(args)
if args.check_database:
LOG.warning('Checking database')
from ..tools import check_database
return check_database.check_database(args.config)
if args.analyse_indexing:
LOG.warning('Analysing performance of indexing function')
from ..tools import admin
admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
return 0
if args.migrate:
LOG.warning('Checking for necessary database migrations')
from ..tools import migration
return migration.migrate(args.config, args)
if args.collect_os_info:
LOG.warning("Reporting System Information")
from ..tools import collect_os_info
collect_os_info.report_system_information(args.config)
return 0
if args.clean_deleted:
LOG.warning('Cleaning up deleted relations')
from ..tools import admin
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
return 0
return 1
def _warm(self, args: NominatimArgs) -> int:
LOG.warning('Warming database caches')
api = napi.NominatimAPI(args.project_dir)
try:
if args.target != 'search':
for _ in range(1000):
api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
address_details=True)
if args.target != 'reverse':
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
with connect(args.config.get_libpq_dsn()) as conn:
if conn.table_exists('search_name'):
words = tokenizer.most_frequent_words(conn, 1000)
else:
words = []
for word in words:
api.search(word)
finally:
api.close()
return 0

View File

@@ -0,0 +1,374 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Subcommand definitions for API calls from the command line.
"""
from typing import Dict, Any
import argparse
import logging
import json
import sys
import nominatim_api as napi
import nominatim_api.v1 as api_output
from nominatim_api.v1.helpers import zoom_to_rank, deduplicate_results
from nominatim_api.v1.format import dispatch as formatting
import nominatim_api.logging as loglib
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
LOG = logging.getLogger()
STRUCTURED_QUERY = (
('amenity', 'name and/or type of POI'),
('street', 'housenumber and street'),
('city', 'city, town or village'),
('county', 'county'),
('state', 'state'),
('country', 'country'),
('postalcode', 'postcode')
)
EXTRADATA_PARAMS = (
('addressdetails', 'Include a breakdown of the address into elements'),
('extratags', ("Include additional information if available "
"(e.g. wikipedia link, opening hours)")),
('namedetails', 'Include a list of alternative names')
)
def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Output arguments')
group.add_argument('--format', default='jsonv2',
choices=formatting.list_formats(napi.SearchResults) + ['debug'],
help='Format of result')
for name, desc in EXTRADATA_PARAMS:
group.add_argument('--' + name, action='store_true', help=desc)
group.add_argument('--lang', '--accept-language', metavar='LANGS',
help='Preferred language order for presenting search results')
group.add_argument('--polygon-output',
choices=['geojson', 'kml', 'svg', 'text'],
help='Output geometry of results as a GeoJSON, KML, SVG or WKT')
group.add_argument('--polygon-threshold', type=float, default = 0.0,
metavar='TOLERANCE',
help=("Simplify output geometry."
"Parameter is difference tolerance in degrees."))
class APISearch:
"""\
Execute a search query.
This command works exactly the same as if calling the /search endpoint on
the web API. See the online documentation for more details on the
various parameters:
https://nominatim.org/release-docs/latest/api/Search/
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Query arguments')
group.add_argument('--query',
help='Free-form query string')
for name, desc in STRUCTURED_QUERY:
group.add_argument('--' + name, help='Structured query: ' + desc)
_add_api_output_arguments(parser)
group = parser.add_argument_group('Result limitation')
group.add_argument('--countrycodes', metavar='CC,..',
help='Limit search results to one or more countries')
group.add_argument('--exclude_place_ids', metavar='ID,..',
help='List of search object to be excluded')
group.add_argument('--limit', type=int, default=10,
help='Limit the number of returned results')
group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
help='Preferred area to find search results')
group.add_argument('--bounded', action='store_true',
help='Strictly restrict results to viewbox area')
group = parser.add_argument_group('Other arguments')
group.add_argument('--no-dedupe', action='store_false', dest='dedupe',
help='Do not remove duplicates from the result list')
def run(self, args: NominatimArgs) -> int:
if args.format == 'debug':
loglib.set_log_output('text')
api = napi.NominatimAPI(args.project_dir)
params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
'address_details': True, # needed for display name
'geometry_output': args.get_geometry_output(),
'geometry_simplification': args.polygon_threshold,
'countries': args.countrycodes,
'excluded': args.exclude_place_ids,
'viewbox': args.viewbox,
'bounded_viewbox': args.bounded,
'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
}
if args.query:
results = api.search(args.query, **params)
else:
results = api.search_address(amenity=args.amenity,
street=args.street,
city=args.city,
county=args.county,
state=args.state,
postalcode=args.postalcode,
country=args.country,
**params)
if args.dedupe and len(results) > 1:
results = deduplicate_results(results, args.limit)
if args.format == 'debug':
print(loglib.get_and_disable())
return 0
output = api_output.format_result(
results,
args.format,
{'extratags': args.extratags,
'namedetails': args.namedetails,
'addressdetails': args.addressdetails})
if args.format != 'xml':
# reformat the result, so it is pretty-printed
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
else:
sys.stdout.write(output)
sys.stdout.write('\n')
return 0
class APIReverse:
"""\
Execute API reverse query.
This command works exactly the same as if calling the /reverse endpoint on
the web API. See the online documentation for more details on the
various parameters:
https://nominatim.org/release-docs/latest/api/Reverse/
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Query arguments')
group.add_argument('--lat', type=float, required=True,
help='Latitude of coordinate to look up (in WGS84)')
group.add_argument('--lon', type=float, required=True,
help='Longitude of coordinate to look up (in WGS84)')
group.add_argument('--zoom', type=int,
help='Level of detail required for the address')
group.add_argument('--layer', metavar='LAYER',
choices=[n.name.lower() for n in napi.DataLayer if n.name],
action='append', required=False, dest='layers',
help='OSM id to lookup in format <NRW><id> (may be repeated)')
_add_api_output_arguments(parser)
def run(self, args: NominatimArgs) -> int:
if args.format == 'debug':
loglib.set_log_output('text')
api = napi.NominatimAPI(args.project_dir)
result = api.reverse(napi.Point(args.lon, args.lat),
max_rank=zoom_to_rank(args.zoom or 18),
layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
address_details=True, # needed for display name
geometry_output=args.get_geometry_output(),
geometry_simplification=args.polygon_threshold,
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
if args.format == 'debug':
print(loglib.get_and_disable())
return 0
if result:
output = api_output.format_result(
napi.ReverseResults([result]),
args.format,
{'extratags': args.extratags,
'namedetails': args.namedetails,
'addressdetails': args.addressdetails})
if args.format != 'xml':
# reformat the result, so it is pretty-printed
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
else:
sys.stdout.write(output)
sys.stdout.write('\n')
return 0
LOG.error("Unable to geocode.")
return 42
class APILookup:
"""\
Execute API lookup query.
This command works exactly the same as if calling the /lookup endpoint on
the web API. See the online documentation for more details on the
various parameters:
https://nominatim.org/release-docs/latest/api/Lookup/
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Query arguments')
group.add_argument('--id', metavar='OSMID',
action='append', required=True, dest='ids',
help='OSM id to lookup in format <NRW><id> (may be repeated)')
_add_api_output_arguments(parser)
def run(self, args: NominatimArgs) -> int:
if args.format == 'debug':
loglib.set_log_output('text')
api = napi.NominatimAPI(args.project_dir)
if args.format == 'debug':
print(loglib.get_and_disable())
return 0
places = [napi.OsmID(o[0], int(o[1:])) for o in args.ids]
results = api.lookup(places,
address_details=True, # needed for display name
geometry_output=args.get_geometry_output(),
geometry_simplification=args.polygon_threshold or 0.0,
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
output = api_output.format_result(
results,
args.format,
{'extratags': args.extratags,
'namedetails': args.namedetails,
'addressdetails': args.addressdetails})
if args.format != 'xml':
# reformat the result, so it is pretty-printed
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
else:
sys.stdout.write(output)
sys.stdout.write('\n')
return 0
class APIDetails:
"""\
Execute API details query.
This command works exactly the same as if calling the /details endpoint on
the web API. See the online documentation for more details on the
various parameters:
https://nominatim.org/release-docs/latest/api/Details/
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Query arguments')
objs = group.add_mutually_exclusive_group(required=True)
objs.add_argument('--node', '-n', type=int,
help="Look up the OSM node with the given ID.")
objs.add_argument('--way', '-w', type=int,
help="Look up the OSM way with the given ID.")
objs.add_argument('--relation', '-r', type=int,
help="Look up the OSM relation with the given ID.")
objs.add_argument('--place_id', '-p', type=int,
help='Database internal identifier of the OSM object to look up')
group.add_argument('--class', dest='object_class',
help=("Class type to disambiguated multiple entries "
"of the same object."))
group = parser.add_argument_group('Output arguments')
group.add_argument('--addressdetails', action='store_true',
help='Include a breakdown of the address into elements')
group.add_argument('--keywords', action='store_true',
help='Include a list of name keywords and address keywords')
group.add_argument('--linkedplaces', action='store_true',
help='Include a details of places that are linked with this one')
group.add_argument('--hierarchy', action='store_true',
help='Include details of places lower in the address hierarchy')
group.add_argument('--group_hierarchy', action='store_true',
help='Group the places by type')
group.add_argument('--polygon_geojson', action='store_true',
help='Include geometry of result')
group.add_argument('--lang', '--accept-language', metavar='LANGS',
help='Preferred language order for presenting search results')
def run(self, args: NominatimArgs) -> int:
place: napi.PlaceRef
if args.node:
place = napi.OsmID('N', args.node, args.object_class)
elif args.way:
place = napi.OsmID('W', args.way, args.object_class)
elif args.relation:
place = napi.OsmID('R', args.relation, args.object_class)
else:
assert args.place_id is not None
place = napi.PlaceID(args.place_id)
api = napi.NominatimAPI(args.project_dir)
locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
result = api.details(place,
address_details=args.addressdetails,
linked_places=args.linkedplaces,
parented_places=args.hierarchy,
keywords=args.keywords,
geometry_output=napi.GeometryFormat.GEOJSON
if args.polygon_geojson
else napi.GeometryFormat.NONE,
locales=locales)
if result:
output = api_output.format_result(
result,
'json',
{'locales': locales,
'group_hierarchy': args.group_hierarchy})
# reformat the result, so it is pretty-printed
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
sys.stdout.write('\n')
return 0
LOG.error("Object not found in database.")
return 42
class APIStatus:
"""
Execute API status query.
This command works exactly the same as if calling the /status endpoint on
the web API. See the online documentation for more details on the
various parameters:
https://nominatim.org/release-docs/latest/api/Status/
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
formats = api_output.list_formats(napi.StatusResult)
group = parser.add_argument_group('API parameters')
group.add_argument('--format', default=formats[0], choices=formats,
help='Format of result')
def run(self, args: NominatimArgs) -> int:
status = napi.NominatimAPI(args.project_dir).status()
print(api_output.format_result(status, args.format, {}))
return 0

View File

@@ -0,0 +1,260 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Provides custom functions over command-line arguments.
"""
from typing import Optional, List, Dict, Any, Sequence, Tuple
import argparse
import logging
from functools import reduce
from pathlib import Path
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from nominatim_core.typing import Protocol
import nominatim_api as napi
LOG = logging.getLogger()
class Subcommand(Protocol):
"""
Interface to be implemented by classes implementing a CLI subcommand.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
"""
Fill the given parser for the subcommand with the appropriate
parameters.
"""
def run(self, args: 'NominatimArgs') -> int:
"""
Run the subcommand with the given parsed arguments.
"""
class NominatimArgs:
""" Customized namespace class for the nominatim command line tool
to receive the command-line arguments.
"""
# Basic environment set by root program.
config: Configuration
project_dir: Path
# Global switches
version: bool
subcommand: Optional[str]
command: Subcommand
# Shared parameters
osm2pgsql_cache: Optional[int]
socket_timeout: int
# Arguments added to all subcommands.
verbose: int
threads: Optional[int]
# Arguments to 'add-data'
file: Optional[str]
diff: Optional[str]
node: Optional[int]
way: Optional[int]
relation: Optional[int]
tiger_data: Optional[str]
use_main_api: bool
# Arguments to 'admin'
warm: bool
check_database: bool
migrate: bool
collect_os_info: bool
clean_deleted: str
analyse_indexing: bool
target: Optional[str]
osm_id: Optional[str]
place_id: Optional[int]
# Arguments to 'import'
osm_file: List[str]
continue_at: Optional[str]
reverse_only: bool
no_partitions: bool
no_updates: bool
offline: bool
ignore_errors: bool
index_noanalyse: bool
prepare_database: bool
# Arguments to 'index'
boundaries_only: bool
no_boundaries: bool
minrank: int
maxrank: int
# Arguments to 'export'
output_type: str
output_format: str
output_all_postcodes: bool
language: Optional[str]
restrict_to_country: Optional[str]
# Arguments to 'convert'
output: Path
# Arguments to 'refresh'
postcodes: bool
word_tokens: bool
word_counts: bool
address_levels: bool
functions: bool
wiki_data: bool
secondary_importance: bool
importance: bool
website: bool
diffs: bool
enable_debug_statements: bool
data_object: Sequence[Tuple[str, int]]
data_area: Sequence[Tuple[str, int]]
# Arguments to 'replication'
init: bool
update_functions: bool
check_for_updates: bool
once: bool
catch_up: bool
do_index: bool
# Arguments to 'serve'
server: str
engine: str
# Arguments to 'special-phrases
import_from_wiki: bool
import_from_csv: Optional[str]
no_replace: bool
# Arguments to all query functions
format: str
addressdetails: bool
extratags: bool
namedetails: bool
lang: Optional[str]
polygon_output: Optional[str]
polygon_threshold: Optional[float]
# Arguments to 'search'
query: Optional[str]
amenity: Optional[str]
street: Optional[str]
city: Optional[str]
county: Optional[str]
state: Optional[str]
country: Optional[str]
postalcode: Optional[str]
countrycodes: Optional[str]
exclude_place_ids: Optional[str]
limit: int
viewbox: Optional[str]
bounded: bool
dedupe: bool
# Arguments to 'reverse'
lat: float
lon: float
zoom: Optional[int]
layers: Optional[Sequence[str]]
# Arguments to 'lookup'
ids: Sequence[str]
# Arguments to 'details'
object_class: Optional[str]
linkedplaces: bool
hierarchy: bool
keywords: bool
polygon_geojson: bool
group_hierarchy: bool
def osm2pgsql_options(self, default_cache: int,
default_threads: int) -> Dict[str, Any]:
""" Return the standard osm2pgsql options that can be derived
from the command line arguments. The resulting dict can be
further customized and then used in `run_osm2pgsql()`.
"""
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.config.lib_dir.osm2pgsql,
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
osm2pgsql_style=self.config.get_import_style_file(),
osm2pgsql_style_path=self.config.config_dir,
threads=self.threads or default_threads,
dsn=self.config.get_libpq_dsn(),
flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
slim_index=self.config.TABLESPACE_OSM_INDEX,
main_data=self.config.TABLESPACE_PLACE_DATA,
main_index=self.config.TABLESPACE_PLACE_INDEX
)
)
def get_osm_file_list(self) -> Optional[List[Path]]:
""" Return the --osm-file argument as a list of Paths or None
if no argument was given. The function also checks if the files
exist and raises a UsageError if one cannot be found.
"""
if not self.osm_file:
return None
files = [Path(f) for f in self.osm_file]
for fname in files:
if not fname.is_file():
LOG.fatal("OSM file '%s' does not exist.", fname)
raise UsageError('Cannot access file.')
return files
def get_geometry_output(self) -> napi.GeometryFormat:
""" Get the requested geometry output format in a API-compatible
format.
"""
if not self.polygon_output:
return napi.GeometryFormat.NONE
if self.polygon_output == 'geojson':
return napi.GeometryFormat.GEOJSON
if self.polygon_output == 'kml':
return napi.GeometryFormat.KML
if self.polygon_output == 'svg':
return napi.GeometryFormat.SVG
if self.polygon_output == 'text':
return napi.GeometryFormat.TEXT
try:
return napi.GeometryFormat[self.polygon_output.upper()]
except KeyError as exp:
raise UsageError(f"Unknown polygon output format '{self.polygon_output}'.") from exp
def get_locales(self, default: Optional[str]) -> napi.Locales:
""" Get the locales from the language parameter.
"""
if self.lang:
return napi.Locales.from_accept_languages(self.lang)
if default:
return napi.Locales.from_accept_languages(default)
return napi.Locales()
def get_layers(self, default: napi.DataLayer) -> Optional[napi.DataLayer]:
""" Get the list of selected layers as a DataLayer enum.
"""
if not self.layers:
return default
return reduce(napi.DataLayer.__or__,
(napi.DataLayer[s.upper()] for s in self.layers))

View File

@@ -0,0 +1,95 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'convert' subcommand.
"""
from typing import Set, Any, Union, Optional, Sequence
import argparse
import asyncio
from pathlib import Path
from nominatim_core.errors import UsageError
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class WithAction(argparse.Action):
""" Special action that saves a list of flags, given on the command-line
as `--with-foo` or `--without-foo`.
"""
def __init__(self, option_strings: Sequence[str], dest: Any,
default: bool = True, **kwargs: Any) -> None:
if 'nargs' in kwargs:
raise ValueError("nargs not allowed.")
if option_strings is None:
raise ValueError("Positional parameter not allowed.")
self.dest_set = kwargs.pop('dest_set')
full_option_strings = []
for opt in option_strings:
if not opt.startswith('--'):
raise ValueError("short-form options not allowed")
if default:
self.dest_set.add(opt[2:])
full_option_strings.append(f"--with-{opt[2:]}")
full_option_strings.append(f"--without-{opt[2:]}")
super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
values: Union[str, Sequence[Any], None],
option_string: Optional[str] = None) -> None:
assert option_string
if option_string.startswith('--with-'):
self.dest_set.add(option_string[7:])
if option_string.startswith('--without-'):
self.dest_set.discard(option_string[10:])
class ConvertDB:
""" Convert an existing database into a different format. (EXPERIMENTAL)
Dump a read-only version of the database in a different format.
At the moment only a SQLite database suitable for reverse lookup
can be created.
"""
def __init__(self) -> None:
self.options: Set[str] = set()
def add_args(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument('--format', default='sqlite',
choices=('sqlite', ),
help='Format of the output database (must be sqlite currently)')
parser.add_argument('--output', '-o', required=True, type=Path,
help='File to write the database to.')
group = parser.add_argument_group('Switches to define database layout'
'(currently no effect)')
group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
help='Enable/disable support for reverse and lookup API'
' (default: enabled)')
group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
help='Enable/disable support for search API (default: disabled)')
group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
help='Enable/disable support for details API (default: enabled)')
def run(self, args: NominatimArgs) -> int:
if args.output.exists():
raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
if args.format == 'sqlite':
from ..tools import convert_sqlite
asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
return 0
return 1

View File

@@ -0,0 +1,200 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'export' subcommand.
"""
from typing import Optional, List, cast
import logging
import argparse
import asyncio
import csv
import sys
import sqlalchemy as sa
import nominatim_api as napi
from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
from nominatim_api.types import LookupDetails
from nominatim_core.errors import UsageError
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
# Needed for SQLAlchemy
# pylint: disable=singleton-comparison
LOG = logging.getLogger()
RANK_RANGE_MAP = {
'country': (4, 4),
'state': (5, 9),
'county': (10, 12),
'city': (13, 16),
'suburb': (17, 21),
'street': (26, 26),
'path': (27, 27)
}
RANK_TO_OUTPUT_MAP = {
4: 'country',
5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
10: 'county', 11: 'county', 12: 'county',
13: 'city', 14: 'city', 15: 'city', 16: 'city',
17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
26: 'street', 27: 'path'}
class QueryExport:
"""\
Export places as CSV file from the database.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Output arguments')
group.add_argument('--output-type', default='street',
choices=('country', 'state', 'county',
'city', 'suburb', 'street', 'path'),
help='Type of places to output (default: street)')
group.add_argument('--output-format',
default='street;suburb;city;county;state;country',
help=("Semicolon-separated list of address types "
"(see --output-type). Additionally accepts:"
"placeid,postcode"))
group.add_argument('--language',
help=("Preferred language for output "
"(use local name, if omitted)"))
group = parser.add_argument_group('Filter arguments')
group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
help='Export only objects within country')
group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
dest='node',
help='Export only children of this OSM node')
group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
dest='way',
help='Export only children of this OSM way')
group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
dest='relation',
help='Export only children of this OSM relation')
def run(self, args: NominatimArgs) -> int:
return asyncio.run(export(args))
async def export(args: NominatimArgs) -> int:
""" The actual export as a asynchronous function.
"""
api = napi.NominatimAPIAsync(args.project_dir)
try:
output_range = RANK_RANGE_MAP[args.output_type]
writer = init_csv_writer(args.output_format)
async with api.begin() as conn, api.begin() as detail_conn:
t = conn.t.placex
sql = sa.select(t.c.place_id, t.c.parent_place_id,
t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.admin_level,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia, t.c.indexed_date,
t.c.rank_address, t.c.rank_search,
t.c.centroid)\
.where(t.c.linked_place_id == None)\
.where(t.c.rank_address.between(*output_range))
parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
if parent_place_id:
taddr = conn.t.addressline
sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
.where(taddr.c.address_place_id == parent_place_id)\
.where(taddr.c.isaddress)
if args.restrict_to_country:
sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
results = []
for row in await conn.execute(sql):
result = create_from_placex_row(row, ReverseResult)
if result is not None:
results.append(result)
if len(results) == 1000:
await dump_results(detail_conn, results, writer, args.language)
results = []
if results:
await dump_results(detail_conn, results, writer, args.language)
finally:
await api.close()
return 0
def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
fields = output_format.split(';')
writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
writer.writeheader()
return writer
async def dump_results(conn: napi.SearchConnection,
results: List[ReverseResult],
writer: 'csv.DictWriter[str]',
lang: Optional[str]) -> None:
locale = napi.Locales([lang] if lang else None)
await add_result_details(conn, results,
LookupDetails(address_details=True, locales=locale))
for result in results:
data = {'placeid': result.place_id,
'postcode': result.postcode}
for line in (result.address_rows or []):
if line.isaddress and line.local_name:
if line.category[1] == 'postcode':
data['postcode'] = line.local_name
elif line.rank_address in RANK_TO_OUTPUT_MAP:
data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
writer.writerow(data)
async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
way_id: Optional[int],
relation_id: Optional[int]) -> Optional[int]:
""" Get the place ID for the given OSM object.
"""
if node_id is not None:
osm_type, osm_id = 'N', node_id
elif way_id is not None:
osm_type, osm_id = 'W', way_id
elif relation_id is not None:
osm_type, osm_id = 'R', relation_id
else:
return None
t = conn.t.placex
sql = sa.select(t.c.place_id).limit(1)\
.where(t.c.osm_type == osm_type)\
.where(t.c.osm_id == osm_id)\
.where(t.c.rank_address > 0)\
.order_by(t.c.rank_address)
for result in await conn.execute(sql):
return cast(int, result[0])
raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')

View File

@@ -0,0 +1,43 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'freeze' subcommand.
"""
import argparse
from nominatim_core.db.connection import connect
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class SetupFreeze:
"""\
Make database read-only.
About half of data in the Nominatim database is kept only to be able to
keep the data up-to-date with new changes made in OpenStreetMap. This
command drops all this data and only keeps the part needed for geocoding
itself.
This command has the same effect as the `--no-updates` option for imports.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
pass # No options
def run(self, args: NominatimArgs) -> int:
from ..tools import freeze
with connect(args.config.get_libpq_dsn()) as conn:
freeze.drop_update_tables(conn)
freeze.drop_flatnode_file(args.config.get_path('FLATNODE_FILE'))
return 0

View File

@@ -0,0 +1,66 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'index' subcommand.
"""
import argparse
import psutil
from nominatim_core.db import status
from nominatim_core.db.connection import connect
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class UpdateIndex:
"""\
Reindex all new and modified data.
Indexing is the process of computing the address and search terms for
the places in the database. Every time data is added or changed, indexing
needs to be run. Imports and replication updates automatically take care
of indexing. For other cases, this function allows to run indexing manually.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Filter arguments')
group.add_argument('--boundaries-only', action='store_true',
help="""Index only administrative boundaries.""")
group.add_argument('--no-boundaries', action='store_true',
help="""Index everything except administrative boundaries.""")
group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
help='Minimum/starting rank')
group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
help='Maximum/finishing rank')
def run(self, args: NominatimArgs) -> int:
from ..indexer.indexer import Indexer
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
indexer.index_postcodes()
if not args.no_boundaries and not args.boundaries_only \
and args.minrank == 0 and args.maxrank == 30:
with connect(args.config.get_libpq_dsn()) as conn:
status.set_indexed(conn, True)
return 0

View File

@@ -0,0 +1,187 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of 'refresh' subcommand.
"""
from typing import Tuple, Optional
import argparse
import logging
from pathlib import Path
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect
from ..tokenizer.base import AbstractTokenizer
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
def _parse_osm_object(obj: str) -> Tuple[str, int]:
""" Parse the given argument into a tuple of OSM type and ID.
Raises an ArgumentError if the format is not recognized.
"""
if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
return (obj[0].upper(), int(obj[1:]))
class UpdateRefresh:
"""\
Recompute auxiliary data used by the indexing process.
This sub-commands updates various static data and functions in the database.
It usually needs to be run after changing various aspects of the
configuration. The configuration documentation will mention the exact
command to use in such case.
Warning: the 'update' command must not be run in parallel with other update
commands like 'replication' or 'add-data'.
"""
def __init__(self) -> None:
self.tokenizer: Optional[AbstractTokenizer] = None
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Data arguments')
group.add_argument('--postcodes', action='store_true',
help='Update postcode centroid table')
group.add_argument('--word-tokens', action='store_true',
help='Clean up search terms')
group.add_argument('--word-counts', action='store_true',
help='Compute frequency of full-word search terms')
group.add_argument('--address-levels', action='store_true',
help='Reimport address level configuration')
group.add_argument('--functions', action='store_true',
help='Update the PL/pgSQL functions in the database')
group.add_argument('--wiki-data', action='store_true',
help='Update Wikipedia/data importance numbers')
group.add_argument('--secondary-importance', action='store_true',
help='Update secondary importance raster data')
group.add_argument('--importance', action='store_true',
help='Recompute place importances (expensive!)')
group.add_argument('--website', action='store_true',
help='Refresh the directory that serves the scripts for the web API')
group.add_argument('--data-object', action='append',
type=_parse_osm_object, metavar='OBJECT',
help='Mark the given OSM object as requiring an update'
' (format: [NWR]<id>)')
group.add_argument('--data-area', action='append',
type=_parse_osm_object, metavar='OBJECT',
help='Mark the area around the given OSM object as requiring an update'
' (format: [NWR]<id>)')
group = parser.add_argument_group('Arguments for function refresh')
group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
help='Do not enable code for propagating updates')
group.add_argument('--enable-debug-statements', action='store_true',
help='Enable debug warning statements in functions')
def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
from ..tools import refresh, postcodes
from ..indexer.indexer import Indexer
need_function_refresh = args.functions
if args.postcodes:
if postcodes.can_compute(args.config.get_libpq_dsn()):
LOG.warning("Update postcodes centroid")
tokenizer = self._get_tokenizer(args.config)
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_postcodes()
else:
LOG.error("The place table doesn't exist. "
"Postcode updates on a frozen database is not possible.")
if args.word_tokens:
LOG.warning('Updating word tokens')
tokenizer = self._get_tokenizer(args.config)
tokenizer.update_word_tokens()
if args.word_counts:
LOG.warning('Recompute word statistics')
self._get_tokenizer(args.config).update_statistics(args.config,
threads=args.threads or 1)
if args.address_levels:
LOG.warning('Updating address levels')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.load_address_levels_from_config(conn, args.config)
# Attention: must come BEFORE functions
if args.secondary_importance:
with connect(args.config.get_libpq_dsn()) as conn:
# If the table did not exist before, then the importance code
# needs to be enabled.
if not conn.table_exists('secondary_importance'):
args.functions = True
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) > 0:
LOG.fatal('FATAL: Cannot update secondary importance raster data')
return 1
need_function_refresh = True
if args.wiki_data:
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
or args.project_dir)
LOG.warning('Import wikipedia article importance from %s', data_path)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
return 1
need_function_refresh = True
if need_function_refresh:
LOG.warning('Create functions')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config,
args.diffs, args.enable_debug_statements)
self._get_tokenizer(args.config).update_sql_functions(args.config)
# Attention: importance MUST come after wiki data import and after functions.
if args.importance:
LOG.warning('Update importance values for database')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.recompute_importance(conn)
if args.website:
webdir = args.project_dir / 'website'
LOG.warning('Setting up website directory at %s', webdir)
# This is a little bit hacky: call the tokenizer setup, so that
# the tokenizer directory gets repopulated as well, in case it
# wasn't there yet.
self._get_tokenizer(args.config)
with connect(args.config.get_libpq_dsn()) as conn:
refresh.setup_website(webdir, args.config, conn)
if args.data_object or args.data_area:
with connect(args.config.get_libpq_dsn()) as conn:
for obj in args.data_object or []:
refresh.invalidate_osm_object(*obj, conn, recursive=False)
for obj in args.data_area or []:
refresh.invalidate_osm_object(*obj, conn, recursive=True)
conn.commit()
return 0
def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
if self.tokenizer is None:
from ..tokenizer import factory as tokenizer_factory
self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
return self.tokenizer

View File

@@ -0,0 +1,200 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'replication' sub-command.
"""
from typing import Optional
import argparse
import datetime as dt
import logging
import socket
import time
from nominatim_core.db import status
from nominatim_core.db.connection import connect
from nominatim_core.errors import UsageError
from .args import NominatimArgs
LOG = logging.getLogger()
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=C0415
class UpdateReplication:
"""\
Update the database using an online replication service.
An OSM replication service is an online service that provides regular
updates (OSM diff files) for the planet or update they provide. The OSMF
provides the primary replication service for the full planet at
https://planet.osm.org/replication/ but there are other providers of
extracts of OSM data who provide such a service as well.
This sub-command allows to set up such a replication service and download
and import updates at regular intervals. You need to call '--init' once to
set up the process or whenever you change the replication configuration
parameters. Without any arguments, the sub-command will go into a loop and
continuously apply updates as they become available. Giving `--once` just
downloads and imports the next batch of updates.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Arguments for initialisation')
group.add_argument('--init', action='store_true',
help='Initialise the update process')
group.add_argument('--no-update-functions', dest='update_functions',
action='store_false',
help="Do not update the trigger function to "
"support differential updates (EXPERT)")
group = parser.add_argument_group('Arguments for updates')
group.add_argument('--check-for-updates', action='store_true',
help='Check if new updates are available and exit')
group.add_argument('--once', action='store_true',
help="Download and apply updates only once. When "
"not set, updates are continuously applied")
group.add_argument('--catch-up', action='store_true',
help="Download and apply updates until no new "
"data is available on the server")
group.add_argument('--no-index', action='store_false', dest='do_index',
help=("Do not index the new data. Only usable "
"together with --once"))
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group = parser.add_argument_group('Download parameters')
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
help='Set timeout for file downloads')
def _init_replication(self, args: NominatimArgs) -> int:
from ..tools import replication, refresh
LOG.warning("Initialising replication updates")
with connect(args.config.get_libpq_dsn()) as conn:
replication.init_replication(conn, base_url=args.config.REPLICATION_URL,
socket_timeout=args.socket_timeout)
if args.update_functions:
LOG.warning("Create functions")
refresh.create_functions(conn, args.config, True, False)
return 0
def _check_for_updates(self, args: NominatimArgs) -> int:
from ..tools import replication
with connect(args.config.get_libpq_dsn()) as conn:
return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL,
socket_timeout=args.socket_timeout)
def _report_update(self, batchdate: dt.datetime,
start_import: dt.datetime,
start_index: Optional[dt.datetime]) -> None:
def round_time(delta: dt.timedelta) -> dt.timedelta:
return dt.timedelta(seconds=int(delta.total_seconds()))
end = dt.datetime.now(dt.timezone.utc)
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
round_time((start_index or end) - start_import),
f"Indexing: {round_time(end - start_index)} " if start_index else '',
round_time(end - start_import),
round_time(end - batchdate))
def _compute_update_interval(self, args: NominatimArgs) -> int:
if args.catch_up:
return 0
update_interval = args.config.get_int('REPLICATION_UPDATE_INTERVAL')
# Sanity check to not overwhelm the Geofabrik servers.
if 'download.geofabrik.de' in args.config.REPLICATION_URL\
and update_interval < 86400:
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
"Please check install documentation "
"(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
"setting-up-the-update-process).")
raise UsageError("Invalid replication update interval setting.")
return update_interval
def _update(self, args: NominatimArgs) -> None:
# pylint: disable=too-many-locals
from ..tools import replication
from ..indexer.indexer import Indexer
from ..tokenizer import factory as tokenizer_factory
update_interval = self._compute_update_interval(args)
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
params.update(base_url=args.config.REPLICATION_URL,
update_interval=update_interval,
import_file=args.project_dir / 'osmosischange.osc',
max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
indexed_only=not args.once)
if not args.once:
if not args.do_index:
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
raise UsageError("Bad argument '--no-index'.")
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)
dsn = args.config.get_libpq_dsn()
while True:
start = dt.datetime.now(dt.timezone.utc)
state = replication.update(dsn, params, socket_timeout=args.socket_timeout)
with connect(dsn) as conn:
if state is not replication.UpdateState.NO_CHANGES:
status.log_status(conn, start, 'import')
batchdate, _, _ = status.get_status(conn)
conn.commit()
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
indexer.index_full(analyse=False)
with connect(dsn) as conn:
status.set_indexed(conn, True)
status.log_status(conn, index_start, 'index')
conn.commit()
else:
index_start = None
if state is replication.UpdateState.NO_CHANGES and \
args.catch_up or update_interval > 40*60:
while indexer.has_pending():
indexer.index_full(analyse=False)
if LOG.isEnabledFor(logging.WARNING):
assert batchdate is not None
self._report_update(batchdate, start, index_start)
if args.once or (args.catch_up and state is replication.UpdateState.NO_CHANGES):
break
if state is replication.UpdateState.NO_CHANGES:
LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
time.sleep(recheck_interval)
def run(self, args: NominatimArgs) -> int:
socket.setdefaulttimeout(args.socket_timeout)
if args.init:
return self._init_replication(args)
if args.check_for_updates:
return self._check_for_updates(args)
self._update(args)
return 0

View File

@@ -0,0 +1,229 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'import' subcommand.
"""
from typing import Optional
import argparse
import logging
from pathlib import Path
import psutil
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect
from nominatim_core.db import status, properties
from ..tokenizer.base import AbstractTokenizer
from ..version import NOMINATIM_VERSION
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=C0415
LOG = logging.getLogger()
class SetupAll:
"""\
Create a new Nominatim database from an OSM file.
This sub-command sets up a new Nominatim database from scratch starting
with creating a new database in Postgresql. The user running this command
needs superuser rights on the database.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group1 = parser.add_argument_group('Required arguments')
group1.add_argument('--osm-file', metavar='FILE', action='append',
help='OSM file to be imported'
' (repeat for importing multiple files)',
default=None)
group1.add_argument('--continue', dest='continue_at',
choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted',
default=None)
group2 = parser.add_argument_group('Optional arguments')
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group2.add_argument('--reverse-only', action='store_true',
help='Do not create tables and indexes for searching')
group2.add_argument('--no-partitions', action='store_true',
help=("Do not partition search indices "
"(speeds up import of single country extracts)"))
group2.add_argument('--no-updates', action='store_true',
help="Do not keep tables that are only needed for "
"updating the database later")
group2.add_argument('--offline', action='store_true',
help="Do not attempt to load any additional data from the internet")
group3 = parser.add_argument_group('Expert options')
group3.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
group3.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index (expert only)')
group3.add_argument('--prepare-database', action='store_true',
help='Create the database but do not import any data')
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
from ..data import country_info
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
num_threads = args.threads or psutil.cpu_count() or 1
country_info.setup_country_config(args.config)
if args.osm_file is None and args.continue_at is None and not args.prepare_database:
raise UsageError("No input files (use --osm-file).")
if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
if args.continue_at is not None and args.prepare_database:
raise UsageError(
"Cannot use --continue and --prepare-database together."
)
if args.prepare_database or args.continue_at is None:
LOG.warning('Creating database')
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
rouser=args.config.DATABASE_WEBUSER)
if args.prepare_database:
return 0
if args.continue_at in (None, 'import-from-file'):
files = args.get_osm_file_list()
if not files:
raise UsageError("No input files (use --osm-file).")
if args.continue_at in ('import-from-file', None):
# Check if the correct plugins are installed
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
args.config.lib_dir.data,
args.no_partitions)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not '
'use Wikipedia importance data.')
LOG.warning('Importing secondary importance raster data')
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) != 0:
LOG.error('Secondary importance file not imported. '
'Falling back to default ranking.')
self._setup_tables(args.config, args.reverse_only)
if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn)
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(), num_threads)
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Calculate postcodes')
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
if args.continue_at in \
('import-from-file', 'load-data', 'indexing', None):
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.create_search_indices(conn, args.config,
drop=args.no_updates,
threads=num_threads)
LOG.warning('Create search index for default country names.')
country_info.create_country_names(conn, tokenizer,
args.config.get_str_list('LANGUAGES'))
if args.no_updates:
freeze.drop_update_tables(conn)
tokenizer.finalize_import(args.config)
LOG.warning('Recompute word counts')
tokenizer.update_statistics(args.config, threads=num_threads)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
with connect(args.config.get_libpq_dsn()) as conn:
refresh.setup_website(webdir, args.config, conn)
self._finalize_database(args.config.get_libpq_dsn(), args.offline)
return 0
def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
""" Set up the basic database layout: tables, indexes and functions.
"""
from ..tools import database_import, refresh
with connect(config.get_libpq_dsn()) as conn:
LOG.warning('Create functions (1st pass)')
refresh.create_functions(conn, config, False, False)
LOG.warning('Create tables')
database_import.create_tables(conn, config, reverse_only=reverse_only)
refresh.load_address_levels_from_config(conn, config)
LOG.warning('Create functions (2nd pass)')
refresh.create_functions(conn, config, False, False)
LOG.warning('Create table triggers')
database_import.create_table_triggers(conn, config)
LOG.warning('Create partition tables')
database_import.create_partition_tables(conn, config)
LOG.warning('Create functions (3rd pass)')
refresh.create_functions(conn, config, False, False)
def _get_tokenizer(self, continue_at: Optional[str],
config: Configuration) -> AbstractTokenizer:
""" Set up a new tokenizer or load an already initialised one.
"""
from ..tokenizer import factory as tokenizer_factory
if continue_at in ('import-from-file', 'load-data', None):
# (re)initialise the tokenizer data
return tokenizer_factory.create_tokenizer(config)
# just load the tokenizer
return tokenizer_factory.get_tokenizer_for_db(config)
def _finalize_database(self, dsn: str, offline: bool) -> None:
""" Determine the database date and set the status accordingly.
"""
with connect(dsn) as conn:
properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
try:
dbdate = status.compute_database_date(conn, offline)
status.set_status(conn, dbdate)
LOG.info('Database is at %s.', dbdate)
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)

View File

@@ -0,0 +1,93 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'special-phrases' command.
"""
import argparse
import logging
from pathlib import Path
from nominatim_core.errors import UsageError
from nominatim_core.db.connection import connect
from ..tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
from ..tools.special_phrases.sp_csv_loader import SPCsvLoader
from .args import NominatimArgs
LOG = logging.getLogger()
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class ImportSpecialPhrases:
"""\
Import special phrases.
Special phrases are search terms that narrow down the type of object
that should be searched. For example, you might want to search for
'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
in many languages, which can be imported with this command.
You can also provide your own phrases in a CSV file. The file needs to have
the following five columns:
* phrase - the term expected for searching
* class - the OSM tag key of the object type
* type - the OSM tag value of the object type
* operator - the kind of search to be done (one of: in, near, name, -)
* plural - whether the term is a plural or not (Y/N)
An example file can be found in the Nominatim sources at
'test/testdb/full_en_phrases_test.csv'.
The import can be further configured to ignore specific key/value pairs.
This is particularly useful when importing phrases from the wiki. The
default configuration excludes some very common tags like building=yes.
The configuration can be customized by putting a file `phrase-settings.json`
with custom rules into the project directory or by using the `--config`
option to point to another configuration file.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Input arguments')
group.add_argument('--import-from-wiki', action='store_true',
help='Import special phrases from the OSM wiki to the database')
group.add_argument('--import-from-csv', metavar='FILE',
help='Import special phrases from a CSV file')
group.add_argument('--no-replace', action='store_true',
help='Keep the old phrases and only add the new ones')
def run(self, args: NominatimArgs) -> int:
if args.import_from_wiki:
self.start_import(args, SPWikiLoader(args.config))
if args.import_from_csv:
if not Path(args.import_from_csv).is_file():
LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv)
raise UsageError('Cannot access file.')
self.start_import(args, SPCsvLoader(args.import_from_csv))
return 0
def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None:
"""
Create the SPImporter object containing the right
sp loader and then start the import of special phrases.
"""
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
should_replace = not args.no_replace
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, db_connection, loader
).import_phrases(tokenizer, should_replace)