mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-12 05:44:06 +00:00
split code into submodules
This commit is contained in:
28
src/nominatim_db/clicmd/__init__.py
Normal file
28
src/nominatim_db/clicmd/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Subcommand definitions for the command-line tool.
|
||||
"""
|
||||
# mypy and pylint disagree about the style of explicit exports,
|
||||
# see https://github.com/PyCQA/pylint/issues/6006.
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .setup import SetupAll as SetupAll
|
||||
from .replication import UpdateReplication as UpdateReplication
|
||||
from .api import (APISearch as APISearch,
|
||||
APIReverse as APIReverse,
|
||||
APILookup as APILookup,
|
||||
APIDetails as APIDetails,
|
||||
APIStatus as APIStatus)
|
||||
from .index import UpdateIndex as UpdateIndex
|
||||
from .refresh import UpdateRefresh as UpdateRefresh
|
||||
from .add_data import UpdateAddData as UpdateAddData
|
||||
from .admin import AdminFuncs as AdminFuncs
|
||||
from .freeze import SetupFreeze as SetupFreeze
|
||||
from .special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
|
||||
from .export import QueryExport as QueryExport
|
||||
from .convert import ConvertDB as ConvertDB
|
||||
101
src/nominatim_db/clicmd/add_data.py
Normal file
101
src/nominatim_db/clicmd/add_data.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'add-data' subcommand.
|
||||
"""
|
||||
from typing import cast
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
import psutil
|
||||
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
|
||||
This command allows to add or update the search data in the database.
|
||||
The data can come either from an OSM file or single OSM objects can
|
||||
directly be downloaded from the OSM API. This function only loads the
|
||||
data into the database. Afterwards it still needs to be integrated
|
||||
in the search index. Use the `nominatim index` command for that.
|
||||
|
||||
The command can also be used to add external non-OSM data to the
|
||||
database. At the moment the only supported format is TIGER housenumber
|
||||
data. See the online documentation at
|
||||
https://nominatim.org/release-docs/latest/admin/Import/#installing-tiger-housenumber-data-for-the-us
|
||||
for more information.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group_name = parser.add_argument_group('Source')
|
||||
group1 = group_name.add_mutually_exclusive_group(required=True)
|
||||
group1.add_argument('--file', metavar='FILE',
|
||||
help='Import data from an OSM file or diff file')
|
||||
group1.add_argument('--diff', metavar='FILE',
|
||||
help='Import data from an OSM diff file (deprecated: use --file)')
|
||||
group1.add_argument('--node', metavar='ID', type=int,
|
||||
help='Import a single node from the API')
|
||||
group1.add_argument('--way', metavar='ID', type=int,
|
||||
help='Import a single way from the API')
|
||||
group1.add_argument('--relation', metavar='ID', type=int,
|
||||
help='Import a single relation from the API')
|
||||
group1.add_argument('--tiger-data', metavar='DIR',
|
||||
help='Add housenumbers from the US TIGER census database')
|
||||
group2 = parser.add_argument_group('Extra arguments')
|
||||
group2.add_argument('--use-main-api', action='store_true',
|
||||
help='Use OSM API instead of Overpass to download objects')
|
||||
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group2.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
|
||||
help='Set timeout for file downloads')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from ..tools import tiger_data, add_osm_data
|
||||
|
||||
if args.tiger_data:
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
return tiger_data.add_tiger_data(args.tiger_data,
|
||||
args.config,
|
||||
args.threads or psutil.cpu_count() or 1,
|
||||
tokenizer)
|
||||
|
||||
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
|
||||
if args.file or args.diff:
|
||||
return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),
|
||||
cast(str, args.file or args.diff),
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.node:
|
||||
return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
|
||||
'node', args.node,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.way:
|
||||
return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
|
||||
'way', args.way,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.relation:
|
||||
return add_osm_data.add_osm_object(args.config.get_libpq_dsn(),
|
||||
'relation', args.relation,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
return 0
|
||||
123
src/nominatim_db/clicmd/admin.py
Normal file
123
src/nominatim_db/clicmd/admin.py
Normal file
@@ -0,0 +1,123 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'admin' subcommand.
|
||||
"""
|
||||
import logging
|
||||
import argparse
|
||||
import random
|
||||
|
||||
import nominatim_api as napi
|
||||
from nominatim_core.db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class AdminFuncs:
|
||||
"""\
|
||||
Analyse and maintain the database.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Admin tasks')
|
||||
objs = group.add_mutually_exclusive_group(required=True)
|
||||
objs.add_argument('--warm', action='store_true',
|
||||
help='Warm database caches for search and reverse queries')
|
||||
objs.add_argument('--check-database', action='store_true',
|
||||
help='Check that the database is complete and operational')
|
||||
objs.add_argument('--migrate', action='store_true',
|
||||
help='Migrate the database to a new software version')
|
||||
objs.add_argument('--analyse-indexing', action='store_true',
|
||||
help='Print performance analysis of the indexing process')
|
||||
objs.add_argument('--collect-os-info', action="store_true",
|
||||
help="Generate a report about the host system information")
|
||||
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
|
||||
help='Clean up deleted relations')
|
||||
group = parser.add_argument_group('Arguments for cache warming')
|
||||
group.add_argument('--search-only', action='store_const', dest='target',
|
||||
const='search',
|
||||
help="Only pre-warm tables for search queries")
|
||||
group.add_argument('--reverse-only', action='store_const', dest='target',
|
||||
const='reverse',
|
||||
help="Only pre-warm tables for reverse queries")
|
||||
group = parser.add_argument_group('Arguments for index anaysis')
|
||||
mgroup = group.add_mutually_exclusive_group()
|
||||
mgroup.add_argument('--osm-id', type=str,
|
||||
help='Analyse indexing of the given OSM object')
|
||||
mgroup.add_argument('--place-id', type=int,
|
||||
help='Analyse indexing of the given Nominatim object')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
# pylint: disable=too-many-return-statements
|
||||
if args.warm:
|
||||
return self._warm(args)
|
||||
|
||||
if args.check_database:
|
||||
LOG.warning('Checking database')
|
||||
from ..tools import check_database
|
||||
return check_database.check_database(args.config)
|
||||
|
||||
if args.analyse_indexing:
|
||||
LOG.warning('Analysing performance of indexing function')
|
||||
from ..tools import admin
|
||||
admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
|
||||
return 0
|
||||
|
||||
if args.migrate:
|
||||
LOG.warning('Checking for necessary database migrations')
|
||||
from ..tools import migration
|
||||
return migration.migrate(args.config, args)
|
||||
|
||||
if args.collect_os_info:
|
||||
LOG.warning("Reporting System Information")
|
||||
from ..tools import collect_os_info
|
||||
collect_os_info.report_system_information(args.config)
|
||||
return 0
|
||||
|
||||
if args.clean_deleted:
|
||||
LOG.warning('Cleaning up deleted relations')
|
||||
from ..tools import admin
|
||||
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def _warm(self, args: NominatimArgs) -> int:
|
||||
LOG.warning('Warming database caches')
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
try:
|
||||
if args.target != 'search':
|
||||
for _ in range(1000):
|
||||
api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
|
||||
address_details=True)
|
||||
|
||||
if args.target != 'reverse':
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
if conn.table_exists('search_name'):
|
||||
words = tokenizer.most_frequent_words(conn, 1000)
|
||||
else:
|
||||
words = []
|
||||
|
||||
for word in words:
|
||||
api.search(word)
|
||||
finally:
|
||||
api.close()
|
||||
|
||||
return 0
|
||||
374
src/nominatim_db/clicmd/api.py
Normal file
374
src/nominatim_db/clicmd/api.py
Normal file
@@ -0,0 +1,374 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Subcommand definitions for API calls from the command line.
|
||||
"""
|
||||
from typing import Dict, Any
|
||||
import argparse
|
||||
import logging
|
||||
import json
|
||||
import sys
|
||||
|
||||
import nominatim_api as napi
|
||||
import nominatim_api.v1 as api_output
|
||||
from nominatim_api.v1.helpers import zoom_to_rank, deduplicate_results
|
||||
from nominatim_api.v1.format import dispatch as formatting
|
||||
import nominatim_api.logging as loglib
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
STRUCTURED_QUERY = (
|
||||
('amenity', 'name and/or type of POI'),
|
||||
('street', 'housenumber and street'),
|
||||
('city', 'city, town or village'),
|
||||
('county', 'county'),
|
||||
('state', 'state'),
|
||||
('country', 'country'),
|
||||
('postalcode', 'postcode')
|
||||
)
|
||||
|
||||
EXTRADATA_PARAMS = (
|
||||
('addressdetails', 'Include a breakdown of the address into elements'),
|
||||
('extratags', ("Include additional information if available "
|
||||
"(e.g. wikipedia link, opening hours)")),
|
||||
('namedetails', 'Include a list of alternative names')
|
||||
)
|
||||
|
||||
def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Output arguments')
|
||||
group.add_argument('--format', default='jsonv2',
|
||||
choices=formatting.list_formats(napi.SearchResults) + ['debug'],
|
||||
help='Format of result')
|
||||
for name, desc in EXTRADATA_PARAMS:
|
||||
group.add_argument('--' + name, action='store_true', help=desc)
|
||||
|
||||
group.add_argument('--lang', '--accept-language', metavar='LANGS',
|
||||
help='Preferred language order for presenting search results')
|
||||
group.add_argument('--polygon-output',
|
||||
choices=['geojson', 'kml', 'svg', 'text'],
|
||||
help='Output geometry of results as a GeoJSON, KML, SVG or WKT')
|
||||
group.add_argument('--polygon-threshold', type=float, default = 0.0,
|
||||
metavar='TOLERANCE',
|
||||
help=("Simplify output geometry."
|
||||
"Parameter is difference tolerance in degrees."))
|
||||
|
||||
|
||||
class APISearch:
|
||||
"""\
|
||||
Execute a search query.
|
||||
|
||||
This command works exactly the same as if calling the /search endpoint on
|
||||
the web API. See the online documentation for more details on the
|
||||
various parameters:
|
||||
https://nominatim.org/release-docs/latest/api/Search/
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Query arguments')
|
||||
group.add_argument('--query',
|
||||
help='Free-form query string')
|
||||
for name, desc in STRUCTURED_QUERY:
|
||||
group.add_argument('--' + name, help='Structured query: ' + desc)
|
||||
|
||||
_add_api_output_arguments(parser)
|
||||
|
||||
group = parser.add_argument_group('Result limitation')
|
||||
group.add_argument('--countrycodes', metavar='CC,..',
|
||||
help='Limit search results to one or more countries')
|
||||
group.add_argument('--exclude_place_ids', metavar='ID,..',
|
||||
help='List of search object to be excluded')
|
||||
group.add_argument('--limit', type=int, default=10,
|
||||
help='Limit the number of returned results')
|
||||
group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
|
||||
help='Preferred area to find search results')
|
||||
group.add_argument('--bounded', action='store_true',
|
||||
help='Strictly restrict results to viewbox area')
|
||||
|
||||
group = parser.add_argument_group('Other arguments')
|
||||
group.add_argument('--no-dedupe', action='store_false', dest='dedupe',
|
||||
help='Do not remove duplicates from the result list')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.format == 'debug':
|
||||
loglib.set_log_output('text')
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
|
||||
'address_details': True, # needed for display name
|
||||
'geometry_output': args.get_geometry_output(),
|
||||
'geometry_simplification': args.polygon_threshold,
|
||||
'countries': args.countrycodes,
|
||||
'excluded': args.exclude_place_ids,
|
||||
'viewbox': args.viewbox,
|
||||
'bounded_viewbox': args.bounded,
|
||||
'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
|
||||
}
|
||||
|
||||
if args.query:
|
||||
results = api.search(args.query, **params)
|
||||
else:
|
||||
results = api.search_address(amenity=args.amenity,
|
||||
street=args.street,
|
||||
city=args.city,
|
||||
county=args.county,
|
||||
state=args.state,
|
||||
postalcode=args.postalcode,
|
||||
country=args.country,
|
||||
**params)
|
||||
|
||||
if args.dedupe and len(results) > 1:
|
||||
results = deduplicate_results(results, args.limit)
|
||||
|
||||
if args.format == 'debug':
|
||||
print(loglib.get_and_disable())
|
||||
return 0
|
||||
|
||||
output = api_output.format_result(
|
||||
results,
|
||||
args.format,
|
||||
{'extratags': args.extratags,
|
||||
'namedetails': args.namedetails,
|
||||
'addressdetails': args.addressdetails})
|
||||
if args.format != 'xml':
|
||||
# reformat the result, so it is pretty-printed
|
||||
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
|
||||
else:
|
||||
sys.stdout.write(output)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class APIReverse:
|
||||
"""\
|
||||
Execute API reverse query.
|
||||
|
||||
This command works exactly the same as if calling the /reverse endpoint on
|
||||
the web API. See the online documentation for more details on the
|
||||
various parameters:
|
||||
https://nominatim.org/release-docs/latest/api/Reverse/
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Query arguments')
|
||||
group.add_argument('--lat', type=float, required=True,
|
||||
help='Latitude of coordinate to look up (in WGS84)')
|
||||
group.add_argument('--lon', type=float, required=True,
|
||||
help='Longitude of coordinate to look up (in WGS84)')
|
||||
group.add_argument('--zoom', type=int,
|
||||
help='Level of detail required for the address')
|
||||
group.add_argument('--layer', metavar='LAYER',
|
||||
choices=[n.name.lower() for n in napi.DataLayer if n.name],
|
||||
action='append', required=False, dest='layers',
|
||||
help='OSM id to lookup in format <NRW><id> (may be repeated)')
|
||||
|
||||
_add_api_output_arguments(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.format == 'debug':
|
||||
loglib.set_log_output('text')
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
result = api.reverse(napi.Point(args.lon, args.lat),
|
||||
max_rank=zoom_to_rank(args.zoom or 18),
|
||||
layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
|
||||
address_details=True, # needed for display name
|
||||
geometry_output=args.get_geometry_output(),
|
||||
geometry_simplification=args.polygon_threshold,
|
||||
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
|
||||
if args.format == 'debug':
|
||||
print(loglib.get_and_disable())
|
||||
return 0
|
||||
|
||||
if result:
|
||||
output = api_output.format_result(
|
||||
napi.ReverseResults([result]),
|
||||
args.format,
|
||||
{'extratags': args.extratags,
|
||||
'namedetails': args.namedetails,
|
||||
'addressdetails': args.addressdetails})
|
||||
if args.format != 'xml':
|
||||
# reformat the result, so it is pretty-printed
|
||||
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
|
||||
else:
|
||||
sys.stdout.write(output)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
return 0
|
||||
|
||||
LOG.error("Unable to geocode.")
|
||||
return 42
|
||||
|
||||
|
||||
|
||||
class APILookup:
|
||||
"""\
|
||||
Execute API lookup query.
|
||||
|
||||
This command works exactly the same as if calling the /lookup endpoint on
|
||||
the web API. See the online documentation for more details on the
|
||||
various parameters:
|
||||
https://nominatim.org/release-docs/latest/api/Lookup/
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Query arguments')
|
||||
group.add_argument('--id', metavar='OSMID',
|
||||
action='append', required=True, dest='ids',
|
||||
help='OSM id to lookup in format <NRW><id> (may be repeated)')
|
||||
|
||||
_add_api_output_arguments(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.format == 'debug':
|
||||
loglib.set_log_output('text')
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
if args.format == 'debug':
|
||||
print(loglib.get_and_disable())
|
||||
return 0
|
||||
|
||||
places = [napi.OsmID(o[0], int(o[1:])) for o in args.ids]
|
||||
|
||||
results = api.lookup(places,
|
||||
address_details=True, # needed for display name
|
||||
geometry_output=args.get_geometry_output(),
|
||||
geometry_simplification=args.polygon_threshold or 0.0,
|
||||
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
|
||||
output = api_output.format_result(
|
||||
results,
|
||||
args.format,
|
||||
{'extratags': args.extratags,
|
||||
'namedetails': args.namedetails,
|
||||
'addressdetails': args.addressdetails})
|
||||
if args.format != 'xml':
|
||||
# reformat the result, so it is pretty-printed
|
||||
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
|
||||
else:
|
||||
sys.stdout.write(output)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class APIDetails:
|
||||
"""\
|
||||
Execute API details query.
|
||||
|
||||
This command works exactly the same as if calling the /details endpoint on
|
||||
the web API. See the online documentation for more details on the
|
||||
various parameters:
|
||||
https://nominatim.org/release-docs/latest/api/Details/
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Query arguments')
|
||||
objs = group.add_mutually_exclusive_group(required=True)
|
||||
objs.add_argument('--node', '-n', type=int,
|
||||
help="Look up the OSM node with the given ID.")
|
||||
objs.add_argument('--way', '-w', type=int,
|
||||
help="Look up the OSM way with the given ID.")
|
||||
objs.add_argument('--relation', '-r', type=int,
|
||||
help="Look up the OSM relation with the given ID.")
|
||||
objs.add_argument('--place_id', '-p', type=int,
|
||||
help='Database internal identifier of the OSM object to look up')
|
||||
group.add_argument('--class', dest='object_class',
|
||||
help=("Class type to disambiguated multiple entries "
|
||||
"of the same object."))
|
||||
|
||||
group = parser.add_argument_group('Output arguments')
|
||||
group.add_argument('--addressdetails', action='store_true',
|
||||
help='Include a breakdown of the address into elements')
|
||||
group.add_argument('--keywords', action='store_true',
|
||||
help='Include a list of name keywords and address keywords')
|
||||
group.add_argument('--linkedplaces', action='store_true',
|
||||
help='Include a details of places that are linked with this one')
|
||||
group.add_argument('--hierarchy', action='store_true',
|
||||
help='Include details of places lower in the address hierarchy')
|
||||
group.add_argument('--group_hierarchy', action='store_true',
|
||||
help='Group the places by type')
|
||||
group.add_argument('--polygon_geojson', action='store_true',
|
||||
help='Include geometry of result')
|
||||
group.add_argument('--lang', '--accept-language', metavar='LANGS',
|
||||
help='Preferred language order for presenting search results')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
place: napi.PlaceRef
|
||||
if args.node:
|
||||
place = napi.OsmID('N', args.node, args.object_class)
|
||||
elif args.way:
|
||||
place = napi.OsmID('W', args.way, args.object_class)
|
||||
elif args.relation:
|
||||
place = napi.OsmID('R', args.relation, args.object_class)
|
||||
else:
|
||||
assert args.place_id is not None
|
||||
place = napi.PlaceID(args.place_id)
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
|
||||
result = api.details(place,
|
||||
address_details=args.addressdetails,
|
||||
linked_places=args.linkedplaces,
|
||||
parented_places=args.hierarchy,
|
||||
keywords=args.keywords,
|
||||
geometry_output=napi.GeometryFormat.GEOJSON
|
||||
if args.polygon_geojson
|
||||
else napi.GeometryFormat.NONE,
|
||||
locales=locales)
|
||||
|
||||
|
||||
if result:
|
||||
output = api_output.format_result(
|
||||
result,
|
||||
'json',
|
||||
{'locales': locales,
|
||||
'group_hierarchy': args.group_hierarchy})
|
||||
# reformat the result, so it is pretty-printed
|
||||
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
return 0
|
||||
|
||||
LOG.error("Object not found in database.")
|
||||
return 42
|
||||
|
||||
|
||||
class APIStatus:
|
||||
"""
|
||||
Execute API status query.
|
||||
|
||||
This command works exactly the same as if calling the /status endpoint on
|
||||
the web API. See the online documentation for more details on the
|
||||
various parameters:
|
||||
https://nominatim.org/release-docs/latest/api/Status/
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
formats = api_output.list_formats(napi.StatusResult)
|
||||
group = parser.add_argument_group('API parameters')
|
||||
group.add_argument('--format', default=formats[0], choices=formats,
|
||||
help='Format of result')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
status = napi.NominatimAPI(args.project_dir).status()
|
||||
print(api_output.format_result(status, args.format, {}))
|
||||
return 0
|
||||
260
src/nominatim_db/clicmd/args.py
Normal file
260
src/nominatim_db/clicmd/args.py
Normal file
@@ -0,0 +1,260 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Provides custom functions over command-line arguments.
|
||||
"""
|
||||
from typing import Optional, List, Dict, Any, Sequence, Tuple
|
||||
import argparse
|
||||
import logging
|
||||
from functools import reduce
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.typing import Protocol
|
||||
import nominatim_api as napi
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class Subcommand(Protocol):
|
||||
"""
|
||||
Interface to be implemented by classes implementing a CLI subcommand.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Fill the given parser for the subcommand with the appropriate
|
||||
parameters.
|
||||
"""
|
||||
|
||||
def run(self, args: 'NominatimArgs') -> int:
|
||||
"""
|
||||
Run the subcommand with the given parsed arguments.
|
||||
"""
|
||||
|
||||
|
||||
class NominatimArgs:
|
||||
""" Customized namespace class for the nominatim command line tool
|
||||
to receive the command-line arguments.
|
||||
"""
|
||||
# Basic environment set by root program.
|
||||
config: Configuration
|
||||
project_dir: Path
|
||||
|
||||
# Global switches
|
||||
version: bool
|
||||
subcommand: Optional[str]
|
||||
command: Subcommand
|
||||
|
||||
# Shared parameters
|
||||
osm2pgsql_cache: Optional[int]
|
||||
socket_timeout: int
|
||||
|
||||
# Arguments added to all subcommands.
|
||||
verbose: int
|
||||
threads: Optional[int]
|
||||
|
||||
# Arguments to 'add-data'
|
||||
file: Optional[str]
|
||||
diff: Optional[str]
|
||||
node: Optional[int]
|
||||
way: Optional[int]
|
||||
relation: Optional[int]
|
||||
tiger_data: Optional[str]
|
||||
use_main_api: bool
|
||||
|
||||
# Arguments to 'admin'
|
||||
warm: bool
|
||||
check_database: bool
|
||||
migrate: bool
|
||||
collect_os_info: bool
|
||||
clean_deleted: str
|
||||
analyse_indexing: bool
|
||||
target: Optional[str]
|
||||
osm_id: Optional[str]
|
||||
place_id: Optional[int]
|
||||
|
||||
# Arguments to 'import'
|
||||
osm_file: List[str]
|
||||
continue_at: Optional[str]
|
||||
reverse_only: bool
|
||||
no_partitions: bool
|
||||
no_updates: bool
|
||||
offline: bool
|
||||
ignore_errors: bool
|
||||
index_noanalyse: bool
|
||||
prepare_database: bool
|
||||
|
||||
# Arguments to 'index'
|
||||
boundaries_only: bool
|
||||
no_boundaries: bool
|
||||
minrank: int
|
||||
maxrank: int
|
||||
|
||||
# Arguments to 'export'
|
||||
output_type: str
|
||||
output_format: str
|
||||
output_all_postcodes: bool
|
||||
language: Optional[str]
|
||||
restrict_to_country: Optional[str]
|
||||
|
||||
# Arguments to 'convert'
|
||||
output: Path
|
||||
|
||||
# Arguments to 'refresh'
|
||||
postcodes: bool
|
||||
word_tokens: bool
|
||||
word_counts: bool
|
||||
address_levels: bool
|
||||
functions: bool
|
||||
wiki_data: bool
|
||||
secondary_importance: bool
|
||||
importance: bool
|
||||
website: bool
|
||||
diffs: bool
|
||||
enable_debug_statements: bool
|
||||
data_object: Sequence[Tuple[str, int]]
|
||||
data_area: Sequence[Tuple[str, int]]
|
||||
|
||||
# Arguments to 'replication'
|
||||
init: bool
|
||||
update_functions: bool
|
||||
check_for_updates: bool
|
||||
once: bool
|
||||
catch_up: bool
|
||||
do_index: bool
|
||||
|
||||
# Arguments to 'serve'
|
||||
server: str
|
||||
engine: str
|
||||
|
||||
# Arguments to 'special-phrases
|
||||
import_from_wiki: bool
|
||||
import_from_csv: Optional[str]
|
||||
no_replace: bool
|
||||
|
||||
# Arguments to all query functions
|
||||
format: str
|
||||
addressdetails: bool
|
||||
extratags: bool
|
||||
namedetails: bool
|
||||
lang: Optional[str]
|
||||
polygon_output: Optional[str]
|
||||
polygon_threshold: Optional[float]
|
||||
|
||||
# Arguments to 'search'
|
||||
query: Optional[str]
|
||||
amenity: Optional[str]
|
||||
street: Optional[str]
|
||||
city: Optional[str]
|
||||
county: Optional[str]
|
||||
state: Optional[str]
|
||||
country: Optional[str]
|
||||
postalcode: Optional[str]
|
||||
countrycodes: Optional[str]
|
||||
exclude_place_ids: Optional[str]
|
||||
limit: int
|
||||
viewbox: Optional[str]
|
||||
bounded: bool
|
||||
dedupe: bool
|
||||
|
||||
# Arguments to 'reverse'
|
||||
lat: float
|
||||
lon: float
|
||||
zoom: Optional[int]
|
||||
layers: Optional[Sequence[str]]
|
||||
|
||||
# Arguments to 'lookup'
|
||||
ids: Sequence[str]
|
||||
|
||||
# Arguments to 'details'
|
||||
object_class: Optional[str]
|
||||
linkedplaces: bool
|
||||
hierarchy: bool
|
||||
keywords: bool
|
||||
polygon_geojson: bool
|
||||
group_hierarchy: bool
|
||||
|
||||
|
||||
def osm2pgsql_options(self, default_cache: int,
|
||||
default_threads: int) -> Dict[str, Any]:
|
||||
""" Return the standard osm2pgsql options that can be derived
|
||||
from the command line arguments. The resulting dict can be
|
||||
further customized and then used in `run_osm2pgsql()`.
|
||||
"""
|
||||
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.config.lib_dir.osm2pgsql,
|
||||
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
|
||||
osm2pgsql_style=self.config.get_import_style_file(),
|
||||
osm2pgsql_style_path=self.config.config_dir,
|
||||
threads=self.threads or default_threads,
|
||||
dsn=self.config.get_libpq_dsn(),
|
||||
flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
|
||||
tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
|
||||
slim_index=self.config.TABLESPACE_OSM_INDEX,
|
||||
main_data=self.config.TABLESPACE_PLACE_DATA,
|
||||
main_index=self.config.TABLESPACE_PLACE_INDEX
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def get_osm_file_list(self) -> Optional[List[Path]]:
|
||||
""" Return the --osm-file argument as a list of Paths or None
|
||||
if no argument was given. The function also checks if the files
|
||||
exist and raises a UsageError if one cannot be found.
|
||||
"""
|
||||
if not self.osm_file:
|
||||
return None
|
||||
|
||||
files = [Path(f) for f in self.osm_file]
|
||||
for fname in files:
|
||||
if not fname.is_file():
|
||||
LOG.fatal("OSM file '%s' does not exist.", fname)
|
||||
raise UsageError('Cannot access file.')
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def get_geometry_output(self) -> napi.GeometryFormat:
|
||||
""" Get the requested geometry output format in a API-compatible
|
||||
format.
|
||||
"""
|
||||
if not self.polygon_output:
|
||||
return napi.GeometryFormat.NONE
|
||||
if self.polygon_output == 'geojson':
|
||||
return napi.GeometryFormat.GEOJSON
|
||||
if self.polygon_output == 'kml':
|
||||
return napi.GeometryFormat.KML
|
||||
if self.polygon_output == 'svg':
|
||||
return napi.GeometryFormat.SVG
|
||||
if self.polygon_output == 'text':
|
||||
return napi.GeometryFormat.TEXT
|
||||
|
||||
try:
|
||||
return napi.GeometryFormat[self.polygon_output.upper()]
|
||||
except KeyError as exp:
|
||||
raise UsageError(f"Unknown polygon output format '{self.polygon_output}'.") from exp
|
||||
|
||||
|
||||
def get_locales(self, default: Optional[str]) -> napi.Locales:
|
||||
""" Get the locales from the language parameter.
|
||||
"""
|
||||
if self.lang:
|
||||
return napi.Locales.from_accept_languages(self.lang)
|
||||
if default:
|
||||
return napi.Locales.from_accept_languages(default)
|
||||
|
||||
return napi.Locales()
|
||||
|
||||
|
||||
def get_layers(self, default: napi.DataLayer) -> Optional[napi.DataLayer]:
|
||||
""" Get the list of selected layers as a DataLayer enum.
|
||||
"""
|
||||
if not self.layers:
|
||||
return default
|
||||
|
||||
return reduce(napi.DataLayer.__or__,
|
||||
(napi.DataLayer[s.upper()] for s in self.layers))
|
||||
95
src/nominatim_db/clicmd/convert.py
Normal file
95
src/nominatim_db/clicmd/convert.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'convert' subcommand.
|
||||
"""
|
||||
from typing import Set, Any, Union, Optional, Sequence
|
||||
import argparse
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class WithAction(argparse.Action):
|
||||
""" Special action that saves a list of flags, given on the command-line
|
||||
as `--with-foo` or `--without-foo`.
|
||||
"""
|
||||
def __init__(self, option_strings: Sequence[str], dest: Any,
|
||||
default: bool = True, **kwargs: Any) -> None:
|
||||
if 'nargs' in kwargs:
|
||||
raise ValueError("nargs not allowed.")
|
||||
if option_strings is None:
|
||||
raise ValueError("Positional parameter not allowed.")
|
||||
|
||||
self.dest_set = kwargs.pop('dest_set')
|
||||
full_option_strings = []
|
||||
for opt in option_strings:
|
||||
if not opt.startswith('--'):
|
||||
raise ValueError("short-form options not allowed")
|
||||
if default:
|
||||
self.dest_set.add(opt[2:])
|
||||
full_option_strings.append(f"--with-{opt[2:]}")
|
||||
full_option_strings.append(f"--without-{opt[2:]}")
|
||||
|
||||
super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
|
||||
|
||||
|
||||
def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
|
||||
values: Union[str, Sequence[Any], None],
|
||||
option_string: Optional[str] = None) -> None:
|
||||
assert option_string
|
||||
if option_string.startswith('--with-'):
|
||||
self.dest_set.add(option_string[7:])
|
||||
if option_string.startswith('--without-'):
|
||||
self.dest_set.discard(option_string[10:])
|
||||
|
||||
|
||||
class ConvertDB:
|
||||
""" Convert an existing database into a different format. (EXPERIMENTAL)
|
||||
|
||||
Dump a read-only version of the database in a different format.
|
||||
At the moment only a SQLite database suitable for reverse lookup
|
||||
can be created.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.options: Set[str] = set()
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
parser.add_argument('--format', default='sqlite',
|
||||
choices=('sqlite', ),
|
||||
help='Format of the output database (must be sqlite currently)')
|
||||
parser.add_argument('--output', '-o', required=True, type=Path,
|
||||
help='File to write the database to.')
|
||||
group = parser.add_argument_group('Switches to define database layout'
|
||||
'(currently no effect)')
|
||||
group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for reverse and lookup API'
|
||||
' (default: enabled)')
|
||||
group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for search API (default: disabled)')
|
||||
group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for details API (default: enabled)')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.output.exists():
|
||||
raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
|
||||
|
||||
if args.format == 'sqlite':
|
||||
from ..tools import convert_sqlite
|
||||
|
||||
asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
|
||||
return 0
|
||||
|
||||
return 1
|
||||
200
src/nominatim_db/clicmd/export.py
Normal file
200
src/nominatim_db/clicmd/export.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'export' subcommand.
|
||||
"""
|
||||
from typing import Optional, List, cast
|
||||
import logging
|
||||
import argparse
|
||||
import asyncio
|
||||
import csv
|
||||
import sys
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
import nominatim_api as napi
|
||||
from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
|
||||
from nominatim_api.types import LookupDetails
|
||||
from nominatim_core.errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
# Needed for SQLAlchemy
|
||||
# pylint: disable=singleton-comparison
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
RANK_RANGE_MAP = {
|
||||
'country': (4, 4),
|
||||
'state': (5, 9),
|
||||
'county': (10, 12),
|
||||
'city': (13, 16),
|
||||
'suburb': (17, 21),
|
||||
'street': (26, 26),
|
||||
'path': (27, 27)
|
||||
}
|
||||
|
||||
RANK_TO_OUTPUT_MAP = {
|
||||
4: 'country',
|
||||
5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
|
||||
10: 'county', 11: 'county', 12: 'county',
|
||||
13: 'city', 14: 'city', 15: 'city', 16: 'city',
|
||||
17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
|
||||
26: 'street', 27: 'path'}
|
||||
|
||||
class QueryExport:
|
||||
"""\
|
||||
Export places as CSV file from the database.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Output arguments')
|
||||
group.add_argument('--output-type', default='street',
|
||||
choices=('country', 'state', 'county',
|
||||
'city', 'suburb', 'street', 'path'),
|
||||
help='Type of places to output (default: street)')
|
||||
group.add_argument('--output-format',
|
||||
default='street;suburb;city;county;state;country',
|
||||
help=("Semicolon-separated list of address types "
|
||||
"(see --output-type). Additionally accepts:"
|
||||
"placeid,postcode"))
|
||||
group.add_argument('--language',
|
||||
help=("Preferred language for output "
|
||||
"(use local name, if omitted)"))
|
||||
group = parser.add_argument_group('Filter arguments')
|
||||
group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
|
||||
help='Export only objects within country')
|
||||
group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
|
||||
dest='node',
|
||||
help='Export only children of this OSM node')
|
||||
group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
|
||||
dest='way',
|
||||
help='Export only children of this OSM way')
|
||||
group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
|
||||
dest='relation',
|
||||
help='Export only children of this OSM relation')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
return asyncio.run(export(args))
|
||||
|
||||
|
||||
async def export(args: NominatimArgs) -> int:
|
||||
""" The actual export as a asynchronous function.
|
||||
"""
|
||||
|
||||
api = napi.NominatimAPIAsync(args.project_dir)
|
||||
|
||||
try:
|
||||
output_range = RANK_RANGE_MAP[args.output_type]
|
||||
|
||||
writer = init_csv_writer(args.output_format)
|
||||
|
||||
async with api.begin() as conn, api.begin() as detail_conn:
|
||||
t = conn.t.placex
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.admin_level,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia, t.c.indexed_date,
|
||||
t.c.rank_address, t.c.rank_search,
|
||||
t.c.centroid)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.rank_address.between(*output_range))
|
||||
|
||||
parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
|
||||
if parent_place_id:
|
||||
taddr = conn.t.addressline
|
||||
|
||||
sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
|
||||
.where(taddr.c.address_place_id == parent_place_id)\
|
||||
.where(taddr.c.isaddress)
|
||||
|
||||
if args.restrict_to_country:
|
||||
sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
|
||||
|
||||
results = []
|
||||
for row in await conn.execute(sql):
|
||||
result = create_from_placex_row(row, ReverseResult)
|
||||
if result is not None:
|
||||
results.append(result)
|
||||
|
||||
if len(results) == 1000:
|
||||
await dump_results(detail_conn, results, writer, args.language)
|
||||
results = []
|
||||
|
||||
if results:
|
||||
await dump_results(detail_conn, results, writer, args.language)
|
||||
finally:
|
||||
await api.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
|
||||
fields = output_format.split(';')
|
||||
writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
|
||||
return writer
|
||||
|
||||
|
||||
async def dump_results(conn: napi.SearchConnection,
|
||||
results: List[ReverseResult],
|
||||
writer: 'csv.DictWriter[str]',
|
||||
lang: Optional[str]) -> None:
|
||||
locale = napi.Locales([lang] if lang else None)
|
||||
await add_result_details(conn, results,
|
||||
LookupDetails(address_details=True, locales=locale))
|
||||
|
||||
|
||||
for result in results:
|
||||
data = {'placeid': result.place_id,
|
||||
'postcode': result.postcode}
|
||||
|
||||
for line in (result.address_rows or []):
|
||||
if line.isaddress and line.local_name:
|
||||
if line.category[1] == 'postcode':
|
||||
data['postcode'] = line.local_name
|
||||
elif line.rank_address in RANK_TO_OUTPUT_MAP:
|
||||
data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
|
||||
|
||||
writer.writerow(data)
|
||||
|
||||
|
||||
async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
|
||||
way_id: Optional[int],
|
||||
relation_id: Optional[int]) -> Optional[int]:
|
||||
""" Get the place ID for the given OSM object.
|
||||
"""
|
||||
if node_id is not None:
|
||||
osm_type, osm_id = 'N', node_id
|
||||
elif way_id is not None:
|
||||
osm_type, osm_id = 'W', way_id
|
||||
elif relation_id is not None:
|
||||
osm_type, osm_id = 'R', relation_id
|
||||
else:
|
||||
return None
|
||||
|
||||
t = conn.t.placex
|
||||
sql = sa.select(t.c.place_id).limit(1)\
|
||||
.where(t.c.osm_type == osm_type)\
|
||||
.where(t.c.osm_id == osm_id)\
|
||||
.where(t.c.rank_address > 0)\
|
||||
.order_by(t.c.rank_address)
|
||||
|
||||
for result in await conn.execute(sql):
|
||||
return cast(int, result[0])
|
||||
|
||||
raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
|
||||
43
src/nominatim_db/clicmd/freeze.py
Normal file
43
src/nominatim_db/clicmd/freeze.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'freeze' subcommand.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
from nominatim_core.db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class SetupFreeze:
|
||||
"""\
|
||||
Make database read-only.
|
||||
|
||||
About half of data in the Nominatim database is kept only to be able to
|
||||
keep the data up-to-date with new changes made in OpenStreetMap. This
|
||||
command drops all this data and only keeps the part needed for geocoding
|
||||
itself.
|
||||
|
||||
This command has the same effect as the `--no-updates` option for imports.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
pass # No options
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tools import freeze
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
freeze.drop_update_tables(conn)
|
||||
freeze.drop_flatnode_file(args.config.get_path('FLATNODE_FILE'))
|
||||
|
||||
return 0
|
||||
66
src/nominatim_db/clicmd/index.py
Normal file
66
src/nominatim_db/clicmd/index.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'index' subcommand.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
import psutil
|
||||
|
||||
from nominatim_core.db import status
|
||||
from nominatim_core.db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
|
||||
class UpdateIndex:
|
||||
"""\
|
||||
Reindex all new and modified data.
|
||||
|
||||
Indexing is the process of computing the address and search terms for
|
||||
the places in the database. Every time data is added or changed, indexing
|
||||
needs to be run. Imports and replication updates automatically take care
|
||||
of indexing. For other cases, this function allows to run indexing manually.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Filter arguments')
|
||||
group.add_argument('--boundaries-only', action='store_true',
|
||||
help="""Index only administrative boundaries.""")
|
||||
group.add_argument('--no-boundaries', action='store_true',
|
||||
help="""Index everything except administrative boundaries.""")
|
||||
group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
|
||||
help='Minimum/starting rank')
|
||||
group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
|
||||
help='Maximum/finishing rank')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..indexer.indexer import Indexer
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
if not args.no_boundaries:
|
||||
indexer.index_boundaries(args.minrank, args.maxrank)
|
||||
if not args.boundaries_only:
|
||||
indexer.index_by_rank(args.minrank, args.maxrank)
|
||||
indexer.index_postcodes()
|
||||
|
||||
if not args.no_boundaries and not args.boundaries_only \
|
||||
and args.minrank == 0 and args.maxrank == 30:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
status.set_indexed(conn, True)
|
||||
|
||||
return 0
|
||||
187
src/nominatim_db/clicmd/refresh.py
Normal file
187
src/nominatim_db/clicmd/refresh.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of 'refresh' subcommand.
|
||||
"""
|
||||
from typing import Tuple, Optional
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _parse_osm_object(obj: str) -> Tuple[str, int]:
|
||||
""" Parse the given argument into a tuple of OSM type and ID.
|
||||
Raises an ArgumentError if the format is not recognized.
|
||||
"""
|
||||
if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
|
||||
raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
|
||||
|
||||
return (obj[0].upper(), int(obj[1:]))
|
||||
|
||||
|
||||
class UpdateRefresh:
|
||||
"""\
|
||||
Recompute auxiliary data used by the indexing process.
|
||||
|
||||
This sub-commands updates various static data and functions in the database.
|
||||
It usually needs to be run after changing various aspects of the
|
||||
configuration. The configuration documentation will mention the exact
|
||||
command to use in such case.
|
||||
|
||||
Warning: the 'update' command must not be run in parallel with other update
|
||||
commands like 'replication' or 'add-data'.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.tokenizer: Optional[AbstractTokenizer] = None
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Data arguments')
|
||||
group.add_argument('--postcodes', action='store_true',
|
||||
help='Update postcode centroid table')
|
||||
group.add_argument('--word-tokens', action='store_true',
|
||||
help='Clean up search terms')
|
||||
group.add_argument('--word-counts', action='store_true',
|
||||
help='Compute frequency of full-word search terms')
|
||||
group.add_argument('--address-levels', action='store_true',
|
||||
help='Reimport address level configuration')
|
||||
group.add_argument('--functions', action='store_true',
|
||||
help='Update the PL/pgSQL functions in the database')
|
||||
group.add_argument('--wiki-data', action='store_true',
|
||||
help='Update Wikipedia/data importance numbers')
|
||||
group.add_argument('--secondary-importance', action='store_true',
|
||||
help='Update secondary importance raster data')
|
||||
group.add_argument('--importance', action='store_true',
|
||||
help='Recompute place importances (expensive!)')
|
||||
group.add_argument('--website', action='store_true',
|
||||
help='Refresh the directory that serves the scripts for the web API')
|
||||
group.add_argument('--data-object', action='append',
|
||||
type=_parse_osm_object, metavar='OBJECT',
|
||||
help='Mark the given OSM object as requiring an update'
|
||||
' (format: [NWR]<id>)')
|
||||
group.add_argument('--data-area', action='append',
|
||||
type=_parse_osm_object, metavar='OBJECT',
|
||||
help='Mark the area around the given OSM object as requiring an update'
|
||||
' (format: [NWR]<id>)')
|
||||
|
||||
group = parser.add_argument_group('Arguments for function refresh')
|
||||
group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
|
||||
help='Do not enable code for propagating updates')
|
||||
group.add_argument('--enable-debug-statements', action='store_true',
|
||||
help='Enable debug warning statements in functions')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
|
||||
from ..tools import refresh, postcodes
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
need_function_refresh = args.functions
|
||||
|
||||
if args.postcodes:
|
||||
if postcodes.can_compute(args.config.get_libpq_dsn()):
|
||||
LOG.warning("Update postcodes centroid")
|
||||
tokenizer = self._get_tokenizer(args.config)
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or 1)
|
||||
indexer.index_postcodes()
|
||||
else:
|
||||
LOG.error("The place table doesn't exist. "
|
||||
"Postcode updates on a frozen database is not possible.")
|
||||
|
||||
if args.word_tokens:
|
||||
LOG.warning('Updating word tokens')
|
||||
tokenizer = self._get_tokenizer(args.config)
|
||||
tokenizer.update_word_tokens()
|
||||
|
||||
if args.word_counts:
|
||||
LOG.warning('Recompute word statistics')
|
||||
self._get_tokenizer(args.config).update_statistics(args.config,
|
||||
threads=args.threads or 1)
|
||||
|
||||
if args.address_levels:
|
||||
LOG.warning('Updating address levels')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.load_address_levels_from_config(conn, args.config)
|
||||
|
||||
# Attention: must come BEFORE functions
|
||||
if args.secondary_importance:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
# If the table did not exist before, then the importance code
|
||||
# needs to be enabled.
|
||||
if not conn.table_exists('secondary_importance'):
|
||||
args.functions = True
|
||||
|
||||
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) > 0:
|
||||
LOG.fatal('FATAL: Cannot update secondary importance raster data')
|
||||
return 1
|
||||
need_function_refresh = True
|
||||
|
||||
if args.wiki_data:
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
|
||||
or args.project_dir)
|
||||
LOG.warning('Import wikipedia article importance from %s', data_path)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
|
||||
return 1
|
||||
need_function_refresh = True
|
||||
|
||||
if need_function_refresh:
|
||||
LOG.warning('Create functions')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
self._get_tokenizer(args.config).update_sql_functions(args.config)
|
||||
|
||||
# Attention: importance MUST come after wiki data import and after functions.
|
||||
if args.importance:
|
||||
LOG.warning('Update importance values for database')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.recompute_importance(conn)
|
||||
|
||||
if args.website:
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setting up website directory at %s', webdir)
|
||||
# This is a little bit hacky: call the tokenizer setup, so that
|
||||
# the tokenizer directory gets repopulated as well, in case it
|
||||
# wasn't there yet.
|
||||
self._get_tokenizer(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.setup_website(webdir, args.config, conn)
|
||||
|
||||
if args.data_object or args.data_area:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
for obj in args.data_object or []:
|
||||
refresh.invalidate_osm_object(*obj, conn, recursive=False)
|
||||
for obj in args.data_area or []:
|
||||
refresh.invalidate_osm_object(*obj, conn, recursive=True)
|
||||
conn.commit()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
|
||||
if self.tokenizer is None:
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||
|
||||
return self.tokenizer
|
||||
200
src/nominatim_db/clicmd/replication.py
Normal file
200
src/nominatim_db/clicmd/replication.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'replication' sub-command.
|
||||
"""
|
||||
from typing import Optional
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import logging
|
||||
import socket
|
||||
import time
|
||||
|
||||
from nominatim_core.db import status
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=C0415
|
||||
|
||||
class UpdateReplication:
|
||||
"""\
|
||||
Update the database using an online replication service.
|
||||
|
||||
An OSM replication service is an online service that provides regular
|
||||
updates (OSM diff files) for the planet or update they provide. The OSMF
|
||||
provides the primary replication service for the full planet at
|
||||
https://planet.osm.org/replication/ but there are other providers of
|
||||
extracts of OSM data who provide such a service as well.
|
||||
|
||||
This sub-command allows to set up such a replication service and download
|
||||
and import updates at regular intervals. You need to call '--init' once to
|
||||
set up the process or whenever you change the replication configuration
|
||||
parameters. Without any arguments, the sub-command will go into a loop and
|
||||
continuously apply updates as they become available. Giving `--once` just
|
||||
downloads and imports the next batch of updates.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Arguments for initialisation')
|
||||
group.add_argument('--init', action='store_true',
|
||||
help='Initialise the update process')
|
||||
group.add_argument('--no-update-functions', dest='update_functions',
|
||||
action='store_false',
|
||||
help="Do not update the trigger function to "
|
||||
"support differential updates (EXPERT)")
|
||||
group = parser.add_argument_group('Arguments for updates')
|
||||
group.add_argument('--check-for-updates', action='store_true',
|
||||
help='Check if new updates are available and exit')
|
||||
group.add_argument('--once', action='store_true',
|
||||
help="Download and apply updates only once. When "
|
||||
"not set, updates are continuously applied")
|
||||
group.add_argument('--catch-up', action='store_true',
|
||||
help="Download and apply updates until no new "
|
||||
"data is available on the server")
|
||||
group.add_argument('--no-index', action='store_false', dest='do_index',
|
||||
help=("Do not index the new data. Only usable "
|
||||
"together with --once"))
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group = parser.add_argument_group('Download parameters')
|
||||
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
|
||||
help='Set timeout for file downloads')
|
||||
|
||||
|
||||
def _init_replication(self, args: NominatimArgs) -> int:
|
||||
from ..tools import replication, refresh
|
||||
|
||||
LOG.warning("Initialising replication updates")
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
replication.init_replication(conn, base_url=args.config.REPLICATION_URL,
|
||||
socket_timeout=args.socket_timeout)
|
||||
if args.update_functions:
|
||||
LOG.warning("Create functions")
|
||||
refresh.create_functions(conn, args.config, True, False)
|
||||
return 0
|
||||
|
||||
|
||||
def _check_for_updates(self, args: NominatimArgs) -> int:
|
||||
from ..tools import replication
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL,
|
||||
socket_timeout=args.socket_timeout)
|
||||
|
||||
|
||||
def _report_update(self, batchdate: dt.datetime,
|
||||
start_import: dt.datetime,
|
||||
start_index: Optional[dt.datetime]) -> None:
|
||||
def round_time(delta: dt.timedelta) -> dt.timedelta:
|
||||
return dt.timedelta(seconds=int(delta.total_seconds()))
|
||||
|
||||
end = dt.datetime.now(dt.timezone.utc)
|
||||
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
|
||||
round_time((start_index or end) - start_import),
|
||||
f"Indexing: {round_time(end - start_index)} " if start_index else '',
|
||||
round_time(end - start_import),
|
||||
round_time(end - batchdate))
|
||||
|
||||
|
||||
def _compute_update_interval(self, args: NominatimArgs) -> int:
|
||||
if args.catch_up:
|
||||
return 0
|
||||
|
||||
update_interval = args.config.get_int('REPLICATION_UPDATE_INTERVAL')
|
||||
# Sanity check to not overwhelm the Geofabrik servers.
|
||||
if 'download.geofabrik.de' in args.config.REPLICATION_URL\
|
||||
and update_interval < 86400:
|
||||
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
|
||||
"Please check install documentation "
|
||||
"(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
|
||||
"setting-up-the-update-process).")
|
||||
raise UsageError("Invalid replication update interval setting.")
|
||||
|
||||
return update_interval
|
||||
|
||||
|
||||
def _update(self, args: NominatimArgs) -> None:
|
||||
# pylint: disable=too-many-locals
|
||||
from ..tools import replication
|
||||
from ..indexer.indexer import Indexer
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
update_interval = self._compute_update_interval(args)
|
||||
|
||||
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
|
||||
params.update(base_url=args.config.REPLICATION_URL,
|
||||
update_interval=update_interval,
|
||||
import_file=args.project_dir / 'osmosischange.osc',
|
||||
max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
|
||||
indexed_only=not args.once)
|
||||
|
||||
if not args.once:
|
||||
if not args.do_index:
|
||||
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
|
||||
raise UsageError("Bad argument '--no-index'.")
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)
|
||||
|
||||
dsn = args.config.get_libpq_dsn()
|
||||
|
||||
while True:
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
state = replication.update(dsn, params, socket_timeout=args.socket_timeout)
|
||||
|
||||
with connect(dsn) as conn:
|
||||
if state is not replication.UpdateState.NO_CHANGES:
|
||||
status.log_status(conn, start, 'import')
|
||||
batchdate, _, _ = status.get_status(conn)
|
||||
conn.commit()
|
||||
|
||||
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
||||
index_start = dt.datetime.now(dt.timezone.utc)
|
||||
indexer.index_full(analyse=False)
|
||||
|
||||
with connect(dsn) as conn:
|
||||
status.set_indexed(conn, True)
|
||||
status.log_status(conn, index_start, 'index')
|
||||
conn.commit()
|
||||
else:
|
||||
index_start = None
|
||||
|
||||
if state is replication.UpdateState.NO_CHANGES and \
|
||||
args.catch_up or update_interval > 40*60:
|
||||
while indexer.has_pending():
|
||||
indexer.index_full(analyse=False)
|
||||
|
||||
if LOG.isEnabledFor(logging.WARNING):
|
||||
assert batchdate is not None
|
||||
self._report_update(batchdate, start, index_start)
|
||||
|
||||
if args.once or (args.catch_up and state is replication.UpdateState.NO_CHANGES):
|
||||
break
|
||||
|
||||
if state is replication.UpdateState.NO_CHANGES:
|
||||
LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
|
||||
time.sleep(recheck_interval)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
socket.setdefaulttimeout(args.socket_timeout)
|
||||
|
||||
if args.init:
|
||||
return self._init_replication(args)
|
||||
|
||||
if args.check_for_updates:
|
||||
return self._check_for_updates(args)
|
||||
|
||||
self._update(args)
|
||||
return 0
|
||||
229
src/nominatim_db/clicmd/setup.py
Normal file
229
src/nominatim_db/clicmd/setup.py
Normal file
@@ -0,0 +1,229 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'import' subcommand.
|
||||
"""
|
||||
from typing import Optional
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.db import status, properties
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from ..version import NOMINATIM_VERSION
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class SetupAll:
|
||||
"""\
|
||||
Create a new Nominatim database from an OSM file.
|
||||
|
||||
This sub-command sets up a new Nominatim database from scratch starting
|
||||
with creating a new database in Postgresql. The user running this command
|
||||
needs superuser rights on the database.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group1 = parser.add_argument_group('Required arguments')
|
||||
group1.add_argument('--osm-file', metavar='FILE', action='append',
|
||||
help='OSM file to be imported'
|
||||
' (repeat for importing multiple files)',
|
||||
default=None)
|
||||
group1.add_argument('--continue', dest='continue_at',
|
||||
choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted',
|
||||
default=None)
|
||||
group2 = parser.add_argument_group('Optional arguments')
|
||||
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group2.add_argument('--reverse-only', action='store_true',
|
||||
help='Do not create tables and indexes for searching')
|
||||
group2.add_argument('--no-partitions', action='store_true',
|
||||
help=("Do not partition search indices "
|
||||
"(speeds up import of single country extracts)"))
|
||||
group2.add_argument('--no-updates', action='store_true',
|
||||
help="Do not keep tables that are only needed for "
|
||||
"updating the database later")
|
||||
group2.add_argument('--offline', action='store_true',
|
||||
help="Do not attempt to load any additional data from the internet")
|
||||
group3 = parser.add_argument_group('Expert options')
|
||||
group3.add_argument('--ignore-errors', action='store_true',
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group3.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index (expert only)')
|
||||
group3.add_argument('--prepare-database', action='store_true',
|
||||
help='Create the database but do not import any data')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
|
||||
from ..data import country_info
|
||||
from ..tools import database_import, refresh, postcodes, freeze
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
num_threads = args.threads or psutil.cpu_count() or 1
|
||||
|
||||
country_info.setup_country_config(args.config)
|
||||
|
||||
if args.osm_file is None and args.continue_at is None and not args.prepare_database:
|
||||
raise UsageError("No input files (use --osm-file).")
|
||||
|
||||
if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
|
||||
raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
|
||||
|
||||
if args.continue_at is not None and args.prepare_database:
|
||||
raise UsageError(
|
||||
"Cannot use --continue and --prepare-database together."
|
||||
)
|
||||
|
||||
|
||||
if args.prepare_database or args.continue_at is None:
|
||||
LOG.warning('Creating database')
|
||||
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
if args.prepare_database:
|
||||
return 0
|
||||
|
||||
if args.continue_at in (None, 'import-from-file'):
|
||||
files = args.get_osm_file_list()
|
||||
if not files:
|
||||
raise UsageError("No input files (use --osm-file).")
|
||||
|
||||
if args.continue_at in ('import-from-file', None):
|
||||
# Check if the correct plugins are installed
|
||||
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
|
||||
LOG.warning('Setting up country tables')
|
||||
country_info.setup_country_tables(args.config.get_libpq_dsn(),
|
||||
args.config.lib_dir.data,
|
||||
args.no_partitions)
|
||||
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(files,
|
||||
args.osm2pgsql_options(0, 1),
|
||||
drop=args.no_updates,
|
||||
ignore_errors=args.ignore_errors)
|
||||
|
||||
LOG.warning('Importing wikipedia importance data')
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.error('Wikipedia importance dump file not found. '
|
||||
'Calculating importance values of locations will not '
|
||||
'use Wikipedia importance data.')
|
||||
|
||||
LOG.warning('Importing secondary importance raster data')
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) != 0:
|
||||
LOG.error('Secondary importance file not imported. '
|
||||
'Falling back to default ranking.')
|
||||
|
||||
self._setup_tables(args.config, args.reverse_only)
|
||||
|
||||
if args.continue_at in ('import-from-file', 'load-data', None):
|
||||
LOG.warning('Initialise tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.truncate_data_tables(conn)
|
||||
|
||||
LOG.warning('Load data into placex table')
|
||||
database_import.load_data(args.config.get_libpq_dsn(), num_threads)
|
||||
|
||||
LOG.warning("Setting up tokenizer")
|
||||
tokenizer = self._get_tokenizer(args.continue_at, args.config)
|
||||
|
||||
if args.continue_at in ('import-from-file', 'load-data', None):
|
||||
LOG.warning('Calculate postcodes')
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
|
||||
if args.continue_at in \
|
||||
('import-from-file', 'load-data', 'indexing', None):
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
|
||||
LOG.warning('Post-process tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.create_search_indices(conn, args.config,
|
||||
drop=args.no_updates,
|
||||
threads=num_threads)
|
||||
LOG.warning('Create search index for default country names.')
|
||||
country_info.create_country_names(conn, tokenizer,
|
||||
args.config.get_str_list('LANGUAGES'))
|
||||
if args.no_updates:
|
||||
freeze.drop_update_tables(conn)
|
||||
tokenizer.finalize_import(args.config)
|
||||
|
||||
LOG.warning('Recompute word counts')
|
||||
tokenizer.update_statistics(args.config, threads=num_threads)
|
||||
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setup website at %s', webdir)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.setup_website(webdir, args.config, conn)
|
||||
|
||||
self._finalize_database(args.config.get_libpq_dsn(), args.offline)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
|
||||
""" Set up the basic database layout: tables, indexes and functions.
|
||||
"""
|
||||
from ..tools import database_import, refresh
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
LOG.warning('Create functions (1st pass)')
|
||||
refresh.create_functions(conn, config, False, False)
|
||||
LOG.warning('Create tables')
|
||||
database_import.create_tables(conn, config, reverse_only=reverse_only)
|
||||
refresh.load_address_levels_from_config(conn, config)
|
||||
LOG.warning('Create functions (2nd pass)')
|
||||
refresh.create_functions(conn, config, False, False)
|
||||
LOG.warning('Create table triggers')
|
||||
database_import.create_table_triggers(conn, config)
|
||||
LOG.warning('Create partition tables')
|
||||
database_import.create_partition_tables(conn, config)
|
||||
LOG.warning('Create functions (3rd pass)')
|
||||
refresh.create_functions(conn, config, False, False)
|
||||
|
||||
|
||||
def _get_tokenizer(self, continue_at: Optional[str],
|
||||
config: Configuration) -> AbstractTokenizer:
|
||||
""" Set up a new tokenizer or load an already initialised one.
|
||||
"""
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
if continue_at in ('import-from-file', 'load-data', None):
|
||||
# (re)initialise the tokenizer data
|
||||
return tokenizer_factory.create_tokenizer(config)
|
||||
|
||||
# just load the tokenizer
|
||||
return tokenizer_factory.get_tokenizer_for_db(config)
|
||||
|
||||
|
||||
def _finalize_database(self, dsn: str, offline: bool) -> None:
|
||||
""" Determine the database date and set the status accordingly.
|
||||
"""
|
||||
with connect(dsn) as conn:
|
||||
properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
|
||||
|
||||
try:
|
||||
dbdate = status.compute_database_date(conn, offline)
|
||||
status.set_status(conn, dbdate)
|
||||
LOG.info('Database is at %s.', dbdate)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
93
src/nominatim_db/clicmd/special_phrases.py
Normal file
93
src/nominatim_db/clicmd/special_phrases.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'special-phrases' command.
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
|
||||
from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
|
||||
from ..tools.special_phrases.sp_csv_loader import SPCsvLoader
|
||||
from .args import NominatimArgs
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class ImportSpecialPhrases:
|
||||
"""\
|
||||
Import special phrases.
|
||||
|
||||
Special phrases are search terms that narrow down the type of object
|
||||
that should be searched. For example, you might want to search for
|
||||
'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
|
||||
in many languages, which can be imported with this command.
|
||||
|
||||
You can also provide your own phrases in a CSV file. The file needs to have
|
||||
the following five columns:
|
||||
* phrase - the term expected for searching
|
||||
* class - the OSM tag key of the object type
|
||||
* type - the OSM tag value of the object type
|
||||
* operator - the kind of search to be done (one of: in, near, name, -)
|
||||
* plural - whether the term is a plural or not (Y/N)
|
||||
|
||||
An example file can be found in the Nominatim sources at
|
||||
'test/testdb/full_en_phrases_test.csv'.
|
||||
|
||||
The import can be further configured to ignore specific key/value pairs.
|
||||
This is particularly useful when importing phrases from the wiki. The
|
||||
default configuration excludes some very common tags like building=yes.
|
||||
The configuration can be customized by putting a file `phrase-settings.json`
|
||||
with custom rules into the project directory or by using the `--config`
|
||||
option to point to another configuration file.
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Input arguments')
|
||||
group.add_argument('--import-from-wiki', action='store_true',
|
||||
help='Import special phrases from the OSM wiki to the database')
|
||||
group.add_argument('--import-from-csv', metavar='FILE',
|
||||
help='Import special phrases from a CSV file')
|
||||
group.add_argument('--no-replace', action='store_true',
|
||||
help='Keep the old phrases and only add the new ones')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
|
||||
if args.import_from_wiki:
|
||||
self.start_import(args, SPWikiLoader(args.config))
|
||||
|
||||
if args.import_from_csv:
|
||||
if not Path(args.import_from_csv).is_file():
|
||||
LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv)
|
||||
raise UsageError('Cannot access file.')
|
||||
|
||||
self.start_import(args, SPCsvLoader(args.import_from_csv))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None:
|
||||
"""
|
||||
Create the SPImporter object containing the right
|
||||
sp loader and then start the import of special phrases.
|
||||
"""
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
should_replace = not args.no_replace
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
SPImporter(
|
||||
args.config, db_connection, loader
|
||||
).import_phrases(tokenizer, should_replace)
|
||||
Reference in New Issue
Block a user