mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
fix style issue found by flake8
This commit is contained in:
@@ -25,6 +25,7 @@ from .clicmd.args import NominatimArgs, Subcommand
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class CommandlineParser:
|
||||
""" Wraps some of the common functions for parsing the command line
|
||||
and setting up subcommands.
|
||||
@@ -57,7 +58,6 @@ class CommandlineParser:
|
||||
group.add_argument('-j', '--threads', metavar='NUM', type=int,
|
||||
help='Number of parallel threads to use')
|
||||
|
||||
|
||||
def nominatim_version_text(self) -> str:
|
||||
""" Program name and version number as string
|
||||
"""
|
||||
@@ -66,7 +66,6 @@ class CommandlineParser:
|
||||
text += f' ({version.GIT_COMMIT_HASH})'
|
||||
return text
|
||||
|
||||
|
||||
def add_subcommand(self, name: str, cmd: Subcommand) -> None:
|
||||
""" Add a subcommand to the parser. The subcommand must be a class
|
||||
with a function add_args() that adds the parameters for the
|
||||
@@ -82,7 +81,6 @@ class CommandlineParser:
|
||||
parser.set_defaults(command=cmd)
|
||||
cmd.add_args(parser)
|
||||
|
||||
|
||||
def run(self, **kwargs: Any) -> int:
|
||||
""" Parse the command line arguments of the program and execute the
|
||||
appropriate subcommand.
|
||||
@@ -122,7 +120,7 @@ class CommandlineParser:
|
||||
return ret
|
||||
except UsageError as exception:
|
||||
if log.isEnabledFor(logging.DEBUG):
|
||||
raise # use Python's exception printing
|
||||
raise # use Python's exception printing
|
||||
log.fatal('FATAL: %s', exception)
|
||||
|
||||
# If we get here, then execution has failed in some way.
|
||||
@@ -139,7 +137,6 @@ class CommandlineParser:
|
||||
# a subcommand.
|
||||
#
|
||||
# No need to document the functions each time.
|
||||
# pylint: disable=C0111
|
||||
class AdminServe:
|
||||
"""\
|
||||
Start a simple web server for serving the API.
|
||||
@@ -164,15 +161,13 @@ class AdminServe:
|
||||
choices=('falcon', 'starlette'),
|
||||
help='Webserver framework to run. (default: falcon)')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
asyncio.run(self.run_uvicorn(args))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
async def run_uvicorn(self, args: NominatimArgs) -> None:
|
||||
import uvicorn # pylint: disable=import-outside-toplevel
|
||||
import uvicorn
|
||||
|
||||
server_info = args.server.split(':', 1)
|
||||
host = server_info[0]
|
||||
@@ -226,7 +221,7 @@ def get_set_parser() -> CommandlineParser:
|
||||
parser.add_subcommand('details', apicmd.APIDetails())
|
||||
parser.add_subcommand('status', apicmd.APIStatus())
|
||||
except ModuleNotFoundError as ex:
|
||||
if not ex.name or 'nominatim_api' not in ex.name: # pylint: disable=E1135
|
||||
if not ex.name or 'nominatim_api' not in ex.name:
|
||||
raise ex
|
||||
|
||||
parser.parser.epilog = \
|
||||
@@ -235,7 +230,6 @@ def get_set_parser() -> CommandlineParser:
|
||||
'\n export, convert, serve, search, reverse, lookup, details, status'\
|
||||
"\n\nRun 'pip install nominatim-api' to install the package."
|
||||
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
|
||||
@@ -18,13 +18,10 @@ from .args import NominatimArgs
|
||||
from ..db.connection import connect
|
||||
from ..tools.freeze import is_frozen
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
@@ -65,7 +62,6 @@ class UpdateAddData:
|
||||
group2.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
|
||||
help='Set timeout for file downloads')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tools import add_osm_data
|
||||
|
||||
@@ -103,7 +99,6 @@ class UpdateAddData:
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
async def _add_tiger_data(self, args: NominatimArgs) -> int:
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from ..tools import tiger_data
|
||||
@@ -113,5 +108,5 @@ class UpdateAddData:
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
return await tiger_data.add_tiger_data(args.tiger_data,
|
||||
args.config,
|
||||
args.threads or psutil.cpu_count() or 1,
|
||||
args.threads or psutil.cpu_count() or 1,
|
||||
tokenizer)
|
||||
|
||||
@@ -57,7 +57,6 @@ class AdminFuncs:
|
||||
mgroup.add_argument('--place-id', type=int,
|
||||
help='Analyse indexing of the given Nominatim object')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
# pylint: disable=too-many-return-statements
|
||||
if args.warm:
|
||||
@@ -93,7 +92,6 @@ class AdminFuncs:
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def _warm(self, args: NominatimArgs) -> int:
|
||||
try:
|
||||
import nominatim_api as napi
|
||||
|
||||
@@ -22,11 +22,10 @@ import nominatim_api.logging as loglib
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
STRUCTURED_QUERY = (
|
||||
('amenity', 'name and/or type of POI'),
|
||||
('street', 'housenumber and street'),
|
||||
@@ -37,6 +36,7 @@ STRUCTURED_QUERY = (
|
||||
('postalcode', 'postcode')
|
||||
)
|
||||
|
||||
|
||||
EXTRADATA_PARAMS = (
|
||||
('addressdetails', 'Include a breakdown of the address into elements'),
|
||||
('extratags', ("Include additional information if available "
|
||||
@@ -44,6 +44,7 @@ EXTRADATA_PARAMS = (
|
||||
('namedetails', 'Include a list of alternative names')
|
||||
)
|
||||
|
||||
|
||||
def _add_list_format(parser: argparse.ArgumentParser) -> None:
|
||||
group = parser.add_argument_group('Other options')
|
||||
group.add_argument('--list-formats', action='store_true',
|
||||
@@ -62,7 +63,7 @@ def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
|
||||
group.add_argument('--polygon-output',
|
||||
choices=['geojson', 'kml', 'svg', 'text'],
|
||||
help='Output geometry of results as a GeoJSON, KML, SVG or WKT')
|
||||
group.add_argument('--polygon-threshold', type=float, default = 0.0,
|
||||
group.add_argument('--polygon-threshold', type=float, default=0.0,
|
||||
metavar='TOLERANCE',
|
||||
help=("Simplify output geometry."
|
||||
"Parameter is difference tolerance in degrees."))
|
||||
@@ -173,7 +174,6 @@ class APISearch:
|
||||
help='Do not remove duplicates from the result list')
|
||||
_add_list_format(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
formatter = napi.load_format_dispatcher('v1', args.project_dir)
|
||||
|
||||
@@ -189,7 +189,7 @@ class APISearch:
|
||||
try:
|
||||
with napi.NominatimAPI(args.project_dir) as api:
|
||||
params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
|
||||
'address_details': True, # needed for display name
|
||||
'address_details': True, # needed for display name
|
||||
'geometry_output': _get_geometry_output(args),
|
||||
'geometry_simplification': args.polygon_threshold,
|
||||
'countries': args.countrycodes,
|
||||
@@ -197,7 +197,7 @@ class APISearch:
|
||||
'viewbox': args.viewbox,
|
||||
'bounded_viewbox': args.bounded,
|
||||
'locales': _get_locales(args, api.config.DEFAULT_LANGUAGE)
|
||||
}
|
||||
}
|
||||
|
||||
if args.query:
|
||||
results = api.search(args.query, **params)
|
||||
@@ -253,7 +253,6 @@ class APIReverse:
|
||||
_add_api_output_arguments(parser)
|
||||
_add_list_format(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
formatter = napi.load_format_dispatcher('v1', args.project_dir)
|
||||
|
||||
@@ -276,7 +275,7 @@ class APIReverse:
|
||||
result = api.reverse(napi.Point(args.lon, args.lat),
|
||||
max_rank=zoom_to_rank(args.zoom or 18),
|
||||
layers=layers,
|
||||
address_details=True, # needed for display name
|
||||
address_details=True, # needed for display name
|
||||
geometry_output=_get_geometry_output(args),
|
||||
geometry_simplification=args.polygon_threshold,
|
||||
locales=_get_locales(args, api.config.DEFAULT_LANGUAGE))
|
||||
@@ -299,7 +298,6 @@ class APIReverse:
|
||||
return 42
|
||||
|
||||
|
||||
|
||||
class APILookup:
|
||||
"""\
|
||||
Execute API lookup query.
|
||||
@@ -319,7 +317,6 @@ class APILookup:
|
||||
_add_api_output_arguments(parser)
|
||||
_add_list_format(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
formatter = napi.load_format_dispatcher('v1', args.project_dir)
|
||||
|
||||
@@ -340,7 +337,7 @@ class APILookup:
|
||||
try:
|
||||
with napi.NominatimAPI(args.project_dir) as api:
|
||||
results = api.lookup(places,
|
||||
address_details=True, # needed for display name
|
||||
address_details=True, # needed for display name
|
||||
geometry_output=_get_geometry_output(args),
|
||||
geometry_simplification=args.polygon_threshold or 0.0,
|
||||
locales=_get_locales(args, api.config.DEFAULT_LANGUAGE))
|
||||
@@ -401,7 +398,6 @@ class APIDetails:
|
||||
help='Preferred language order for presenting search results')
|
||||
_add_list_format(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
formatter = napi.load_format_dispatcher('v1', args.project_dir)
|
||||
|
||||
@@ -421,7 +417,7 @@ class APIDetails:
|
||||
place = napi.OsmID('W', args.way, args.object_class)
|
||||
elif args.relation:
|
||||
place = napi.OsmID('R', args.relation, args.object_class)
|
||||
elif args.place_id is not None:
|
||||
elif args.place_id is not None:
|
||||
place = napi.PlaceID(args.place_id)
|
||||
else:
|
||||
raise UsageError('One of the arguments --node/-n --way/-w '
|
||||
@@ -435,10 +431,10 @@ class APIDetails:
|
||||
linked_places=args.linkedplaces,
|
||||
parented_places=args.hierarchy,
|
||||
keywords=args.keywords,
|
||||
geometry_output=napi.GeometryFormat.GEOJSON
|
||||
if args.polygon_geojson
|
||||
else napi.GeometryFormat.NONE,
|
||||
locales=locales)
|
||||
geometry_output=(napi.GeometryFormat.GEOJSON
|
||||
if args.polygon_geojson
|
||||
else napi.GeometryFormat.NONE),
|
||||
locales=locales)
|
||||
except napi.UsageError as ex:
|
||||
raise UsageError(ex) from ex
|
||||
|
||||
@@ -472,7 +468,6 @@ class APIStatus:
|
||||
help='Format of result (use --list-formats to see supported formats)')
|
||||
_add_list_format(parser)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
formatter = napi.load_format_dispatcher('v1', args.project_dir)
|
||||
|
||||
|
||||
@@ -16,8 +16,10 @@ from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..typing import Protocol
|
||||
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class Subcommand(Protocol):
|
||||
"""
|
||||
Interface to be implemented by classes implementing a CLI subcommand.
|
||||
@@ -178,7 +180,6 @@ class NominatimArgs:
|
||||
polygon_geojson: bool
|
||||
group_hierarchy: bool
|
||||
|
||||
|
||||
def osm2pgsql_options(self, default_cache: int,
|
||||
default_threads: int) -> Dict[str, Any]:
|
||||
""" Return the standard osm2pgsql options that can be derived
|
||||
@@ -196,9 +197,8 @@ class NominatimArgs:
|
||||
slim_index=self.config.TABLESPACE_OSM_INDEX,
|
||||
main_data=self.config.TABLESPACE_PLACE_DATA,
|
||||
main_index=self.config.TABLESPACE_PLACE_INDEX
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
)
|
||||
|
||||
def get_osm_file_list(self) -> Optional[List[Path]]:
|
||||
""" Return the --osm-file argument as a list of Paths or None
|
||||
|
||||
@@ -15,10 +15,6 @@ from pathlib import Path
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class WithAction(argparse.Action):
|
||||
""" Special action that saves a list of flags, given on the command-line
|
||||
@@ -43,7 +39,6 @@ class WithAction(argparse.Action):
|
||||
|
||||
super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
|
||||
|
||||
|
||||
def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
|
||||
values: Union[str, Sequence[Any], None],
|
||||
option_string: Optional[str] = None) -> None:
|
||||
@@ -81,7 +76,6 @@ class ConvertDB:
|
||||
group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for details API (default: enabled)')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.output.exists():
|
||||
raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
|
||||
|
||||
@@ -18,20 +18,15 @@ import nominatim_api as napi
|
||||
from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
|
||||
from nominatim_api.types import LookupDetails
|
||||
|
||||
import sqlalchemy as sa # pylint: disable=C0411
|
||||
import sqlalchemy as sa
|
||||
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
# Needed for SQLAlchemy
|
||||
# pylint: disable=singleton-comparison
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
RANK_RANGE_MAP = {
|
||||
'country': (4, 4),
|
||||
'state': (5, 9),
|
||||
@@ -42,6 +37,7 @@ RANK_RANGE_MAP = {
|
||||
'path': (27, 27)
|
||||
}
|
||||
|
||||
|
||||
RANK_TO_OUTPUT_MAP = {
|
||||
4: 'country',
|
||||
5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
|
||||
@@ -50,6 +46,7 @@ RANK_TO_OUTPUT_MAP = {
|
||||
17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
|
||||
26: 'street', 27: 'path'}
|
||||
|
||||
|
||||
class QueryExport:
|
||||
"""\
|
||||
Export places as CSV file from the database.
|
||||
@@ -84,7 +81,6 @@ class QueryExport:
|
||||
dest='relation',
|
||||
help='Export only children of this OSM relation')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
return asyncio.run(export(args))
|
||||
|
||||
@@ -104,15 +100,15 @@ async def export(args: NominatimArgs) -> int:
|
||||
t = conn.t.placex
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.admin_level,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia, t.c.indexed_date,
|
||||
t.c.rank_address, t.c.rank_search,
|
||||
t.c.centroid)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.rank_address.between(*output_range))
|
||||
t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.admin_level,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia, t.c.indexed_date,
|
||||
t.c.rank_address, t.c.rank_search,
|
||||
t.c.centroid)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.rank_address.between(*output_range))
|
||||
|
||||
parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
|
||||
if parent_place_id:
|
||||
@@ -159,7 +155,6 @@ async def dump_results(conn: napi.SearchConnection,
|
||||
await add_result_details(conn, results,
|
||||
LookupDetails(address_details=True, locales=locale))
|
||||
|
||||
|
||||
for result in results:
|
||||
data = {'placeid': result.place_id,
|
||||
'postcode': result.postcode}
|
||||
|
||||
@@ -12,10 +12,6 @@ import argparse
|
||||
from ..db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class SetupFreeze:
|
||||
"""\
|
||||
@@ -30,8 +26,7 @@ class SetupFreeze:
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
pass # No options
|
||||
|
||||
pass # No options
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tools import freeze
|
||||
|
||||
@@ -16,11 +16,6 @@ from ..db import status
|
||||
from ..db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
|
||||
class UpdateIndex:
|
||||
"""\
|
||||
@@ -43,7 +38,6 @@ class UpdateIndex:
|
||||
group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
|
||||
help='Maximum/finishing rank')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
asyncio.run(self._do_index(args))
|
||||
|
||||
@@ -54,7 +48,6 @@ class UpdateIndex:
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
async def _do_index(self, args: NominatimArgs) -> None:
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
@@ -64,7 +57,7 @@ class UpdateIndex:
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
has_pending = True # run at least once
|
||||
has_pending = True # run at least once
|
||||
while has_pending:
|
||||
if not args.no_boundaries:
|
||||
await indexer.index_boundaries(args.minrank, args.maxrank)
|
||||
|
||||
@@ -18,13 +18,10 @@ from ..db.connection import connect, table_exists
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _parse_osm_object(obj: str) -> Tuple[str, int]:
|
||||
""" Parse the given argument into a tuple of OSM type and ID.
|
||||
Raises an ArgumentError if the format is not recognized.
|
||||
@@ -86,8 +83,7 @@ class UpdateRefresh:
|
||||
group.add_argument('--enable-debug-statements', action='store_true',
|
||||
help='Enable debug warning statements in functions')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tools import refresh, postcodes
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
@@ -131,7 +127,7 @@ class UpdateRefresh:
|
||||
|
||||
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) > 0:
|
||||
args.project_dir) > 0:
|
||||
LOG.fatal('FATAL: Cannot update secondary importance raster data')
|
||||
return 1
|
||||
need_function_refresh = True
|
||||
@@ -173,7 +169,6 @@ class UpdateRefresh:
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
|
||||
if self.tokenizer is None:
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
@@ -22,10 +22,6 @@ from .args import NominatimArgs
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=C0415
|
||||
|
||||
class UpdateReplication:
|
||||
"""\
|
||||
@@ -71,7 +67,6 @@ class UpdateReplication:
|
||||
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
|
||||
help='Set timeout for file downloads')
|
||||
|
||||
|
||||
def _init_replication(self, args: NominatimArgs) -> int:
|
||||
from ..tools import replication, refresh
|
||||
|
||||
@@ -84,7 +79,6 @@ class UpdateReplication:
|
||||
refresh.create_functions(conn, args.config, True, False)
|
||||
return 0
|
||||
|
||||
|
||||
def _check_for_updates(self, args: NominatimArgs) -> int:
|
||||
from ..tools import replication
|
||||
|
||||
@@ -92,7 +86,6 @@ class UpdateReplication:
|
||||
return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL,
|
||||
socket_timeout=args.socket_timeout)
|
||||
|
||||
|
||||
def _report_update(self, batchdate: dt.datetime,
|
||||
start_import: dt.datetime,
|
||||
start_index: Optional[dt.datetime]) -> None:
|
||||
@@ -106,7 +99,6 @@ class UpdateReplication:
|
||||
round_time(end - start_import),
|
||||
round_time(end - batchdate))
|
||||
|
||||
|
||||
def _compute_update_interval(self, args: NominatimArgs) -> int:
|
||||
if args.catch_up:
|
||||
return 0
|
||||
@@ -123,7 +115,6 @@ class UpdateReplication:
|
||||
|
||||
return update_interval
|
||||
|
||||
|
||||
async def _update(self, args: NominatimArgs) -> None:
|
||||
# pylint: disable=too-many-locals
|
||||
from ..tools import replication
|
||||
@@ -186,7 +177,6 @@ class UpdateReplication:
|
||||
LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
|
||||
time.sleep(recheck_interval)
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
socket.setdefaulttimeout(args.socket_timeout)
|
||||
|
||||
|
||||
@@ -23,13 +23,10 @@ from ..tokenizer.base import AbstractTokenizer
|
||||
from ..version import NOMINATIM_VERSION
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class SetupAll:
|
||||
"""\
|
||||
Create a new Nominatim database from an OSM file.
|
||||
@@ -42,36 +39,35 @@ class SetupAll:
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group1 = parser.add_argument_group('Required arguments')
|
||||
group1.add_argument('--osm-file', metavar='FILE', action='append',
|
||||
help='OSM file to be imported'
|
||||
' (repeat for importing multiple files)',
|
||||
default=None)
|
||||
help='OSM file to be imported'
|
||||
' (repeat for importing multiple files)',
|
||||
default=None)
|
||||
group1.add_argument('--continue', dest='continue_at',
|
||||
choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted',
|
||||
default=None)
|
||||
choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted',
|
||||
default=None)
|
||||
group2 = parser.add_argument_group('Optional arguments')
|
||||
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group2.add_argument('--reverse-only', action='store_true',
|
||||
help='Do not create tables and indexes for searching')
|
||||
help='Do not create tables and indexes for searching')
|
||||
group2.add_argument('--no-partitions', action='store_true',
|
||||
help=("Do not partition search indices "
|
||||
"(speeds up import of single country extracts)"))
|
||||
help="Do not partition search indices "
|
||||
"(speeds up import of single country extracts)")
|
||||
group2.add_argument('--no-updates', action='store_true',
|
||||
help="Do not keep tables that are only needed for "
|
||||
"updating the database later")
|
||||
help="Do not keep tables that are only needed for "
|
||||
"updating the database later")
|
||||
group2.add_argument('--offline', action='store_true',
|
||||
help="Do not attempt to load any additional data from the internet")
|
||||
group3 = parser.add_argument_group('Expert options')
|
||||
group3.add_argument('--ignore-errors', action='store_true',
|
||||
help='Continue import even when errors in SQL are present')
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group3.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index (expert only)')
|
||||
help='Do not perform analyse operations during index (expert only)')
|
||||
group3.add_argument('--prepare-database', action='store_true',
|
||||
help='Create the database but do not import any data')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.osm_file is None and args.continue_at is None and not args.prepare_database:
|
||||
raise UsageError("No input files (use --osm-file).")
|
||||
|
||||
@@ -85,7 +81,6 @@ class SetupAll:
|
||||
|
||||
return asyncio.run(self.async_run(args))
|
||||
|
||||
|
||||
async def async_run(self, args: NominatimArgs) -> int:
|
||||
from ..data import country_info
|
||||
from ..tools import database_import, postcodes, freeze
|
||||
@@ -97,7 +92,7 @@ class SetupAll:
|
||||
if args.prepare_database or args.continue_at is None:
|
||||
LOG.warning('Creating database')
|
||||
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
if args.prepare_database:
|
||||
return 0
|
||||
|
||||
@@ -120,8 +115,7 @@ class SetupAll:
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
|
||||
if args.continue_at in \
|
||||
('import-from-file', 'load-data', 'indexing', None):
|
||||
if args.continue_at in ('import-from-file', 'load-data', 'indexing', None):
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
|
||||
await indexer.index_full(analyse=not args.index_noanalyse)
|
||||
@@ -145,7 +139,6 @@ class SetupAll:
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _base_import(self, args: NominatimArgs) -> None:
|
||||
from ..tools import database_import, refresh
|
||||
from ..data import country_info
|
||||
@@ -159,8 +152,8 @@ class SetupAll:
|
||||
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
|
||||
LOG.warning('Setting up country tables')
|
||||
country_info.setup_country_tables(args.config.get_libpq_dsn(),
|
||||
args.config.lib_dir.data,
|
||||
args.no_partitions)
|
||||
args.config.lib_dir.data,
|
||||
args.no_partitions)
|
||||
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(files,
|
||||
@@ -171,20 +164,19 @@ class SetupAll:
|
||||
LOG.warning('Importing wikipedia importance data')
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
data_path) > 0:
|
||||
LOG.error('Wikipedia importance dump file not found. '
|
||||
'Calculating importance values of locations will not '
|
||||
'use Wikipedia importance data.')
|
||||
'Calculating importance values of locations will not '
|
||||
'use Wikipedia importance data.')
|
||||
|
||||
LOG.warning('Importing secondary importance raster data')
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) != 0:
|
||||
args.project_dir) != 0:
|
||||
LOG.error('Secondary importance file not imported. '
|
||||
'Falling back to default ranking.')
|
||||
'Falling back to default ranking.')
|
||||
|
||||
self._setup_tables(args.config, args.reverse_only)
|
||||
|
||||
|
||||
def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
|
||||
""" Set up the basic database layout: tables, indexes and functions.
|
||||
"""
|
||||
@@ -205,7 +197,6 @@ class SetupAll:
|
||||
LOG.warning('Create functions (3rd pass)')
|
||||
refresh.create_functions(conn, config, False, False)
|
||||
|
||||
|
||||
def _get_tokenizer(self, continue_at: Optional[str],
|
||||
config: Configuration) -> AbstractTokenizer:
|
||||
""" Set up a new tokenizer or load an already initialised one.
|
||||
@@ -219,7 +210,6 @@ class SetupAll:
|
||||
# just load the tokenizer
|
||||
return tokenizer_factory.get_tokenizer_for_db(config)
|
||||
|
||||
|
||||
def _finalize_database(self, dsn: str, offline: bool) -> None:
|
||||
""" Determine the database date and set the status accordingly.
|
||||
"""
|
||||
@@ -230,5 +220,5 @@ class SetupAll:
|
||||
dbdate = status.compute_database_date(conn, offline)
|
||||
status.set_status(conn, dbdate)
|
||||
LOG.info('Database is at %s.', dbdate)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
except Exception as exc:
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
@@ -18,12 +18,9 @@ from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
|
||||
from ..tools.special_phrases.sp_csv_loader import SPCsvLoader
|
||||
from .args import NominatimArgs
|
||||
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class ImportSpecialPhrases:
|
||||
"""\
|
||||
@@ -62,7 +59,6 @@ class ImportSpecialPhrases:
|
||||
group.add_argument('--no-replace', action='store_true',
|
||||
help='Keep the old phrases and only add the new ones')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
|
||||
if args.import_from_wiki:
|
||||
@@ -77,7 +73,6 @@ class ImportSpecialPhrases:
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None:
|
||||
"""
|
||||
Create the SPImporter object containing the right
|
||||
|
||||
@@ -25,7 +25,8 @@ from .errors import UsageError
|
||||
from . import paths
|
||||
|
||||
LOG = logging.getLogger()
|
||||
CONFIG_CACHE : Dict[str, Any] = {}
|
||||
CONFIG_CACHE: Dict[str, Any] = {}
|
||||
|
||||
|
||||
def flatten_config_list(content: Any, section: str = '') -> List[Any]:
|
||||
""" Flatten YAML configuration lists that contain include sections
|
||||
@@ -79,14 +80,12 @@ class Configuration:
|
||||
self.lib_dir = _LibDirs()
|
||||
self._private_plugins: Dict[str, object] = {}
|
||||
|
||||
|
||||
def set_libdirs(self, **kwargs: StrPath) -> None:
|
||||
""" Set paths to library functions and data.
|
||||
"""
|
||||
for key, value in kwargs.items():
|
||||
setattr(self.lib_dir, key, None if value is None else Path(value))
|
||||
|
||||
|
||||
def __getattr__(self, name: str) -> str:
|
||||
name = 'NOMINATIM_' + name
|
||||
|
||||
@@ -95,7 +94,6 @@ class Configuration:
|
||||
|
||||
return self._config[name] or ''
|
||||
|
||||
|
||||
def get_bool(self, name: str) -> bool:
|
||||
""" Return the given configuration parameter as a boolean.
|
||||
|
||||
@@ -108,7 +106,6 @@ class Configuration:
|
||||
"""
|
||||
return getattr(self, name).lower() in ('1', 'yes', 'true')
|
||||
|
||||
|
||||
def get_int(self, name: str) -> int:
|
||||
""" Return the given configuration parameter as an int.
|
||||
|
||||
@@ -128,11 +125,10 @@ class Configuration:
|
||||
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
|
||||
raise UsageError("Configuration error.") from exp
|
||||
|
||||
|
||||
def get_str_list(self, name: str) -> Optional[List[str]]:
|
||||
""" Return the given configuration parameter as a list of strings.
|
||||
The values are assumed to be given as a comma-sparated list and
|
||||
will be stripped before returning them.
|
||||
will be stripped before returning them.
|
||||
|
||||
Parameters:
|
||||
name: Name of the configuration parameter with the NOMINATIM_
|
||||
@@ -148,7 +144,6 @@ class Configuration:
|
||||
|
||||
return [v.strip() for v in raw.split(',')] if raw else None
|
||||
|
||||
|
||||
def get_path(self, name: str) -> Optional[Path]:
|
||||
""" Return the given configuration parameter as a Path.
|
||||
|
||||
@@ -174,7 +169,6 @@ class Configuration:
|
||||
|
||||
return cfgpath.resolve()
|
||||
|
||||
|
||||
def get_libpq_dsn(self) -> str:
|
||||
""" Get configured database DSN converted into the key/value format
|
||||
understood by libpq and psycopg.
|
||||
@@ -194,7 +188,6 @@ class Configuration:
|
||||
|
||||
return dsn
|
||||
|
||||
|
||||
def get_database_params(self) -> Mapping[str, Union[str, int, None]]:
|
||||
""" Get the configured parameters for the database connection
|
||||
as a mapping.
|
||||
@@ -206,7 +199,6 @@ class Configuration:
|
||||
|
||||
return conninfo_to_dict(dsn)
|
||||
|
||||
|
||||
def get_import_style_file(self) -> Path:
|
||||
""" Return the import style file as a path object. Translates the
|
||||
name of the standard styles automatically into a file in the
|
||||
@@ -219,7 +211,6 @@ class Configuration:
|
||||
|
||||
return self.find_config_file('', 'IMPORT_STYLE')
|
||||
|
||||
|
||||
def get_os_env(self) -> Dict[str, str]:
|
||||
""" Return a copy of the OS environment with the Nominatim configuration
|
||||
merged in.
|
||||
@@ -229,7 +220,6 @@ class Configuration:
|
||||
|
||||
return env
|
||||
|
||||
|
||||
def load_sub_configuration(self, filename: StrPath,
|
||||
config: Optional[str] = None) -> Any:
|
||||
""" Load additional configuration from a file. `filename` is the name
|
||||
@@ -267,7 +257,6 @@ class Configuration:
|
||||
CONFIG_CACHE[str(configfile)] = result
|
||||
return result
|
||||
|
||||
|
||||
def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
|
||||
""" Load a Python module as a plugin.
|
||||
|
||||
@@ -310,7 +299,6 @@ class Configuration:
|
||||
|
||||
return sys.modules.get(module_name) or importlib.import_module(module_name)
|
||||
|
||||
|
||||
def find_config_file(self, filename: StrPath,
|
||||
config: Optional[str] = None) -> Path:
|
||||
""" Resolve the location of a configuration file given a filename and
|
||||
@@ -334,7 +322,6 @@ class Configuration:
|
||||
|
||||
filename = cfg_filename
|
||||
|
||||
|
||||
search_paths = [self.project_dir, self.config_dir]
|
||||
for path in search_paths:
|
||||
if path is not None and (path / filename).is_file():
|
||||
@@ -344,7 +331,6 @@ class Configuration:
|
||||
filename, search_paths)
|
||||
raise UsageError("Config file not found.")
|
||||
|
||||
|
||||
def _load_from_yaml(self, cfgfile: Path) -> Any:
|
||||
""" Load a YAML configuration file. This installs a special handler that
|
||||
allows to include other YAML files using the '!include' operator.
|
||||
@@ -353,7 +339,6 @@ class Configuration:
|
||||
Loader=yaml.SafeLoader)
|
||||
return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
|
||||
|
||||
|
||||
def _yaml_include_representer(self, loader: Any, node: yaml.Node) -> Any:
|
||||
""" Handler for the '!include' operator in YAML files.
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
|
||||
|
||||
def _flatten_name_list(names: Any) -> Dict[str, str]:
|
||||
if names is None:
|
||||
return {}
|
||||
@@ -39,7 +40,6 @@ def _flatten_name_list(names: Any) -> Dict[str, str]:
|
||||
return flat
|
||||
|
||||
|
||||
|
||||
class _CountryInfo:
|
||||
""" Caches country-specific properties from the configuration file.
|
||||
"""
|
||||
@@ -47,7 +47,6 @@ class _CountryInfo:
|
||||
def __init__(self) -> None:
|
||||
self._info: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
def load(self, config: Configuration) -> None:
|
||||
""" Load the country properties from the configuration files,
|
||||
if they are not loaded yet.
|
||||
@@ -63,7 +62,6 @@ class _CountryInfo:
|
||||
for x in prop['languages'].split(',')]
|
||||
prop['names'] = _flatten_name_list(prop.get('names'))
|
||||
|
||||
|
||||
def items(self) -> Iterable[Tuple[str, Dict[str, Any]]]:
|
||||
""" Return tuples of (country_code, property dict) as iterable.
|
||||
"""
|
||||
@@ -75,7 +73,6 @@ class _CountryInfo:
|
||||
return self._info.get(country_code, {})
|
||||
|
||||
|
||||
|
||||
_COUNTRY_INFO = _CountryInfo()
|
||||
|
||||
|
||||
@@ -86,14 +83,17 @@ def setup_country_config(config: Configuration) -> None:
|
||||
"""
|
||||
_COUNTRY_INFO.load(config)
|
||||
|
||||
|
||||
@overload
|
||||
def iterate() -> Iterable[Tuple[str, Dict[str, Any]]]:
|
||||
...
|
||||
|
||||
|
||||
@overload
|
||||
def iterate(prop: str) -> Iterable[Tuple[str, Any]]:
|
||||
...
|
||||
|
||||
|
||||
def iterate(prop: Optional[str] = None) -> Iterable[Tuple[str, Dict[str, Any]]]:
|
||||
""" Iterate over country code and properties.
|
||||
|
||||
@@ -168,7 +168,7 @@ def create_country_names(conn: Connection, tokenizer: AbstractTokenizer,
|
||||
|
||||
# country names (only in languages as provided)
|
||||
if name:
|
||||
names.update({k : v for k, v in name.items() if _include_key(k)})
|
||||
names.update({k: v for k, v in name.items() if _include_key(k)})
|
||||
|
||||
analyzer.add_country_names(code, names)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ the tokenizer.
|
||||
"""
|
||||
from typing import Optional, Mapping, Any, Tuple
|
||||
|
||||
|
||||
class PlaceInfo:
|
||||
""" This data class contains all information the tokenizer can access
|
||||
about a place.
|
||||
@@ -18,7 +19,6 @@ class PlaceInfo:
|
||||
def __init__(self, info: Mapping[str, Any]) -> None:
|
||||
self._info = info
|
||||
|
||||
|
||||
@property
|
||||
def name(self) -> Optional[Mapping[str, str]]:
|
||||
""" A dictionary with the names of the place. Keys and values represent
|
||||
@@ -28,7 +28,6 @@ class PlaceInfo:
|
||||
"""
|
||||
return self._info.get('name')
|
||||
|
||||
|
||||
@property
|
||||
def address(self) -> Optional[Mapping[str, str]]:
|
||||
""" A dictionary with the address elements of the place. They key
|
||||
@@ -43,7 +42,6 @@ class PlaceInfo:
|
||||
"""
|
||||
return self._info.get('address')
|
||||
|
||||
|
||||
@property
|
||||
def country_code(self) -> Optional[str]:
|
||||
""" The country code of the country the place is in. Guaranteed
|
||||
@@ -52,7 +50,6 @@ class PlaceInfo:
|
||||
"""
|
||||
return self._info.get('country_code')
|
||||
|
||||
|
||||
@property
|
||||
def rank_address(self) -> int:
|
||||
""" The [rank address][1] before any rank correction is applied.
|
||||
@@ -61,7 +58,6 @@ class PlaceInfo:
|
||||
"""
|
||||
return self._info.get('rank_address', 0)
|
||||
|
||||
|
||||
@property
|
||||
def centroid(self) -> Optional[Tuple[float, float]]:
|
||||
""" A center point of the place in WGS84. May be None when the
|
||||
@@ -70,17 +66,15 @@ class PlaceInfo:
|
||||
x, y = self._info.get('centroid_x'), self._info.get('centroid_y')
|
||||
return None if x is None or y is None else (x, y)
|
||||
|
||||
|
||||
def is_a(self, key: str, value: str) -> bool:
|
||||
""" Set to True when the place's primary tag corresponds to the given
|
||||
key and value.
|
||||
"""
|
||||
return self._info.get('class') == key and self._info.get('type') == value
|
||||
|
||||
|
||||
def is_country(self) -> bool:
|
||||
""" Set to True when the place is a valid country boundary.
|
||||
"""
|
||||
return self.rank_address == 4 \
|
||||
and self.is_a('boundary', 'administrative') \
|
||||
and self.country_code is not None
|
||||
and self.is_a('boundary', 'administrative') \
|
||||
and self.country_code is not None
|
||||
|
||||
@@ -9,6 +9,7 @@ Data class for a single name of a place.
|
||||
"""
|
||||
from typing import Optional, Dict, Mapping
|
||||
|
||||
|
||||
class PlaceName:
|
||||
""" Each name and address part of a place is encapsulated in an object of
|
||||
this class. It saves not only the name proper but also describes the
|
||||
@@ -32,11 +33,9 @@ class PlaceName:
|
||||
self.suffix = suffix
|
||||
self.attr: Dict[str, str] = {}
|
||||
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"PlaceName(name={self.name!r},kind={self.kind!r},suffix={self.suffix!r})"
|
||||
|
||||
|
||||
def clone(self, name: Optional[str] = None,
|
||||
kind: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
@@ -57,21 +56,18 @@ class PlaceName:
|
||||
|
||||
return newobj
|
||||
|
||||
|
||||
def set_attr(self, key: str, value: str) -> None:
|
||||
""" Add the given property to the name. If the property was already
|
||||
set, then the value is overwritten.
|
||||
"""
|
||||
self.attr[key] = value
|
||||
|
||||
|
||||
def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]:
|
||||
""" Return the given property or the value of 'default' if it
|
||||
is not set.
|
||||
"""
|
||||
return self.attr.get(key, default)
|
||||
|
||||
|
||||
def has_attr(self, key: str) -> bool:
|
||||
""" Check if the given attribute is set.
|
||||
"""
|
||||
|
||||
@@ -14,6 +14,7 @@ import re
|
||||
from ..errors import UsageError
|
||||
from . import country_info
|
||||
|
||||
|
||||
class CountryPostcodeMatcher:
|
||||
""" Matches and formats a postcode according to a format definition
|
||||
of the given country.
|
||||
@@ -30,7 +31,6 @@ class CountryPostcodeMatcher:
|
||||
|
||||
self.output = config.get('output', r'\g<0>')
|
||||
|
||||
|
||||
def match(self, postcode: str) -> Optional[Match[str]]:
|
||||
""" Match the given postcode against the postcode pattern for this
|
||||
matcher. Returns a `re.Match` object if the match was successful
|
||||
@@ -44,7 +44,6 @@ class CountryPostcodeMatcher:
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def normalize(self, match: Match[str]) -> str:
|
||||
""" Return the default format of the postcode for the given match.
|
||||
`match` must be a `re.Match` object previously returned by
|
||||
@@ -71,14 +70,12 @@ class PostcodeFormatter:
|
||||
else:
|
||||
raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
|
||||
|
||||
|
||||
def set_default_pattern(self, pattern: str) -> None:
|
||||
""" Set the postcode match pattern to use, when a country does not
|
||||
have a specific pattern.
|
||||
"""
|
||||
self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
|
||||
|
||||
|
||||
def get_matcher(self, country_code: Optional[str]) -> Optional[CountryPostcodeMatcher]:
|
||||
""" Return the CountryPostcodeMatcher for the given country.
|
||||
Returns None if the country doesn't have a postcode and the
|
||||
@@ -92,7 +89,6 @@ class PostcodeFormatter:
|
||||
|
||||
return self.country_matcher.get(country_code, self.default_matcher)
|
||||
|
||||
|
||||
def match(self, country_code: Optional[str], postcode: str) -> Optional[Match[str]]:
|
||||
""" Match the given postcode against the postcode pattern for this
|
||||
matcher. Returns a `re.Match` object if the country has a pattern
|
||||
@@ -105,7 +101,6 @@ class PostcodeFormatter:
|
||||
|
||||
return self.country_matcher.get(country_code, self.default_matcher).match(postcode)
|
||||
|
||||
|
||||
def normalize(self, country_code: str, match: Match[str]) -> str:
|
||||
""" Return the default format of the postcode for the given match.
|
||||
`match` must be a `re.Match` object previously returned by
|
||||
|
||||
@@ -23,6 +23,7 @@ LOG = logging.getLogger()
|
||||
Cursor = psycopg.Cursor[Any]
|
||||
Connection = psycopg.Connection[Any]
|
||||
|
||||
|
||||
def execute_scalar(conn: Connection, sql: psycopg.abc.Query, args: Any = None) -> Any:
|
||||
""" Execute query that returns a single value. The value is returned.
|
||||
If the query yields more than one row, a ValueError is raised.
|
||||
@@ -42,9 +43,10 @@ def execute_scalar(conn: Connection, sql: psycopg.abc.Query, args: Any = None) -
|
||||
def table_exists(conn: Connection, table: str) -> bool:
|
||||
""" Check that a table with the given name exists in the database.
|
||||
"""
|
||||
num = execute_scalar(conn,
|
||||
"""SELECT count(*) FROM pg_tables
|
||||
WHERE tablename = %s and schemaname = 'public'""", (table, ))
|
||||
num = execute_scalar(
|
||||
conn,
|
||||
"""SELECT count(*) FROM pg_tables
|
||||
WHERE tablename = %s and schemaname = 'public'""", (table, ))
|
||||
return num == 1 if isinstance(num, int) else False
|
||||
|
||||
|
||||
@@ -52,9 +54,9 @@ def table_has_column(conn: Connection, table: str, column: str) -> bool:
|
||||
""" Check if the table 'table' exists and has a column with name 'column'.
|
||||
"""
|
||||
has_column = execute_scalar(conn,
|
||||
"""SELECT count(*) FROM information_schema.columns
|
||||
WHERE table_name = %s and column_name = %s""",
|
||||
(table, column))
|
||||
"""SELECT count(*) FROM information_schema.columns
|
||||
WHERE table_name = %s and column_name = %s""",
|
||||
(table, column))
|
||||
return has_column > 0 if isinstance(has_column, int) else False
|
||||
|
||||
|
||||
@@ -77,8 +79,9 @@ def index_exists(conn: Connection, index: str, table: Optional[str] = None) -> b
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def drop_tables(conn: Connection, *names: str,
|
||||
if_exists: bool = True, cascade: bool = False) -> None:
|
||||
if_exists: bool = True, cascade: bool = False) -> None:
|
||||
""" Drop one or more tables with the given names.
|
||||
Set `if_exists` to False if a non-existent table should raise
|
||||
an exception instead of just being ignored. `cascade` will cause
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Optional, cast
|
||||
|
||||
from .connection import Connection, table_exists
|
||||
|
||||
|
||||
def set_property(conn: Connection, name: str, value: str) -> None:
|
||||
""" Add or replace the property with the given name.
|
||||
"""
|
||||
|
||||
@@ -18,6 +18,7 @@ LOG = logging.getLogger()
|
||||
|
||||
QueueItem = Optional[Tuple[psycopg.abc.Query, Any]]
|
||||
|
||||
|
||||
class QueryPool:
|
||||
""" Pool to run SQL queries in parallel asynchronous execution.
|
||||
|
||||
@@ -32,7 +33,6 @@ class QueryPool:
|
||||
self.pool = [asyncio.create_task(self._worker_loop(dsn, **conn_args))
|
||||
for _ in range(pool_size)]
|
||||
|
||||
|
||||
async def put_query(self, query: psycopg.abc.Query, params: Any) -> None:
|
||||
""" Schedule a query for execution.
|
||||
"""
|
||||
@@ -41,7 +41,6 @@ class QueryPool:
|
||||
self.wait_time += time.time() - tstart
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
async def finish(self) -> None:
|
||||
""" Wait for all queries to finish and close the pool.
|
||||
"""
|
||||
@@ -57,7 +56,6 @@ class QueryPool:
|
||||
if excp is not None:
|
||||
raise excp
|
||||
|
||||
|
||||
async def _worker_loop(self, dsn: str, **conn_args: Any) -> None:
|
||||
conn_args['autocommit'] = True
|
||||
aconn = await psycopg.AsyncConnection.connect(dsn, **conn_args)
|
||||
@@ -78,10 +76,8 @@ class QueryPool:
|
||||
str(item[0]), str(item[1]))
|
||||
# item is still valid here, causing a retry
|
||||
|
||||
|
||||
async def __aenter__(self) -> 'QueryPool':
|
||||
return self
|
||||
|
||||
|
||||
async def __aexit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
||||
await self.finish()
|
||||
|
||||
@@ -15,6 +15,7 @@ from .connection import Connection, server_version_tuple, postgis_version_tuple
|
||||
from ..config import Configuration
|
||||
from ..db.query_pool import QueryPool
|
||||
|
||||
|
||||
def _get_partitions(conn: Connection) -> Set[int]:
|
||||
""" Get the set of partitions currently in use.
|
||||
"""
|
||||
@@ -35,6 +36,7 @@ def _get_tables(conn: Connection) -> Set[str]:
|
||||
|
||||
return set((row[0] for row in list(cur)))
|
||||
|
||||
|
||||
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
|
||||
""" Returns the version of the slim middle tables.
|
||||
"""
|
||||
@@ -73,9 +75,10 @@ def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
|
||||
ps3 = postgis_version >= (3, 0)
|
||||
return {
|
||||
'has_index_non_key_column': pg11plus,
|
||||
'spgist_geom' : 'SPGIST' if pg11plus and ps3 else 'GIST'
|
||||
'spgist_geom': 'SPGIST' if pg11plus and ps3 else 'GIST'
|
||||
}
|
||||
|
||||
|
||||
class SQLPreprocessor:
|
||||
""" A environment for preprocessing SQL files from the
|
||||
lib-sql directory.
|
||||
@@ -102,7 +105,6 @@ class SQLPreprocessor:
|
||||
self.env.globals['db'] = db_info
|
||||
self.env.globals['postgres'] = _setup_postgresql_features(conn)
|
||||
|
||||
|
||||
def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
|
||||
""" Execute the given SQL template string on the connection.
|
||||
The keyword arguments may supply additional parameters
|
||||
@@ -114,7 +116,6 @@ class SQLPreprocessor:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
|
||||
""" Execute the given SQL file on the connection. The keyword arguments
|
||||
may supply additional parameters for preprocessing.
|
||||
@@ -125,7 +126,6 @@ class SQLPreprocessor:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
|
||||
|
||||
async def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
|
||||
**kwargs: Any) -> None:
|
||||
""" Execute the given SQL files using parallel asynchronous connections.
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _pipe_to_proc(proc: 'subprocess.Popen[bytes]',
|
||||
fdesc: Union[IO[bytes], gzip.GzipFile]) -> int:
|
||||
assert proc.stdin is not None
|
||||
@@ -31,6 +32,7 @@ def _pipe_to_proc(proc: 'subprocess.Popen[bytes]',
|
||||
|
||||
return len(chunk)
|
||||
|
||||
|
||||
def execute_file(dsn: str, fname: Path,
|
||||
ignore_errors: bool = False,
|
||||
pre_code: Optional[str] = None,
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
Custom exception and error classes for Nominatim.
|
||||
"""
|
||||
|
||||
|
||||
class UsageError(Exception):
|
||||
""" An error raised because of bad user input. This error will usually
|
||||
not cause a stack trace to be printed unless debugging is enabled.
|
||||
|
||||
@@ -21,6 +21,7 @@ from . import runners
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class Indexer:
|
||||
""" Main indexing routine.
|
||||
"""
|
||||
@@ -30,7 +31,6 @@ class Indexer:
|
||||
self.tokenizer = tokenizer
|
||||
self.num_threads = num_threads
|
||||
|
||||
|
||||
def has_pending(self) -> bool:
|
||||
""" Check if any data still needs indexing.
|
||||
This function must only be used after the import has finished.
|
||||
@@ -41,7 +41,6 @@ class Indexer:
|
||||
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
|
||||
return cur.rowcount > 0
|
||||
|
||||
|
||||
async def index_full(self, analyse: bool = True) -> None:
|
||||
""" Index the complete database. This will first index boundaries
|
||||
followed by all other objects. When `analyse` is True, then the
|
||||
@@ -75,7 +74,6 @@ class Indexer:
|
||||
if not self.has_pending():
|
||||
break
|
||||
|
||||
|
||||
async def index_boundaries(self, minrank: int, maxrank: int) -> int:
|
||||
""" Index only administrative boundaries within the given rank range.
|
||||
"""
|
||||
@@ -138,7 +136,6 @@ class Indexer:
|
||||
(minrank, maxrank))
|
||||
total_tuples = {row.rank_address: row.count for row in cur}
|
||||
|
||||
|
||||
with self.tokenizer.name_analyzer() as analyzer:
|
||||
for rank in range(max(1, minrank), maxrank + 1):
|
||||
if rank >= 30:
|
||||
@@ -156,7 +153,6 @@ class Indexer:
|
||||
|
||||
return total
|
||||
|
||||
|
||||
async def index_postcodes(self) -> int:
|
||||
"""Index the entries of the location_postcode table.
|
||||
"""
|
||||
@@ -164,7 +160,6 @@ class Indexer:
|
||||
|
||||
return await self._index(runners.PostcodeRunner(), batch=20)
|
||||
|
||||
|
||||
def update_status_table(self) -> None:
|
||||
""" Update the status in the status table to 'indexed'.
|
||||
"""
|
||||
@@ -193,7 +188,7 @@ class Indexer:
|
||||
|
||||
if total_tuples > 0:
|
||||
async with await psycopg.AsyncConnection.connect(
|
||||
self.dsn, row_factory=psycopg.rows.dict_row) as aconn,\
|
||||
self.dsn, row_factory=psycopg.rows.dict_row) as aconn, \
|
||||
QueryPool(self.dsn, self.num_threads, autocommit=True) as pool:
|
||||
fetcher_time = 0.0
|
||||
tstart = time.time()
|
||||
@@ -224,7 +219,6 @@ class Indexer:
|
||||
|
||||
return progress.done()
|
||||
|
||||
|
||||
def _prepare_indexing(self, runner: runners.Runner) -> int:
|
||||
with connect(self.dsn) as conn:
|
||||
hstore_info = psycopg.types.TypeInfo.fetch(conn, "hstore")
|
||||
|
||||
@@ -14,6 +14,7 @@ LOG = logging.getLogger()
|
||||
|
||||
INITIAL_PROGRESS = 10
|
||||
|
||||
|
||||
class ProgressLogger:
|
||||
""" Tracks and prints progress for the indexing process.
|
||||
`name` is the name of the indexing step being tracked.
|
||||
|
||||
@@ -19,11 +19,11 @@ from ..typing import Protocol
|
||||
from ..data.place_info import PlaceInfo
|
||||
from ..tokenizer.base import AbstractAnalyzer
|
||||
|
||||
# pylint: disable=C0111
|
||||
|
||||
def _mk_valuelist(template: str, num: int) -> pysql.Composed:
|
||||
return pysql.SQL(',').join([pysql.SQL(template)] * num)
|
||||
|
||||
|
||||
def _analyze_place(place: DictRow, analyzer: AbstractAnalyzer) -> Json:
|
||||
return Json(analyzer.process_place(PlaceInfo(place)))
|
||||
|
||||
@@ -41,6 +41,7 @@ SELECT_SQL = pysql.SQL("""SELECT place_id, extra.*
|
||||
LATERAL placex_indexing_prepare(px) as extra """)
|
||||
UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
|
||||
|
||||
|
||||
class AbstractPlacexRunner:
|
||||
""" Returns SQL commands for indexing of the placex table.
|
||||
"""
|
||||
@@ -49,7 +50,6 @@ class AbstractPlacexRunner:
|
||||
self.rank = rank
|
||||
self.analyzer = analyzer
|
||||
|
||||
|
||||
def index_places_query(self, batch_size: int) -> Query:
|
||||
return pysql.SQL(
|
||||
""" UPDATE placex
|
||||
@@ -59,7 +59,6 @@ class AbstractPlacexRunner:
|
||||
WHERE place_id = v.id
|
||||
""").format(_mk_valuelist(UPDATE_LINE, batch_size))
|
||||
|
||||
|
||||
def index_places_params(self, place: DictRow) -> Sequence[Any]:
|
||||
return (place['place_id'],
|
||||
place['name'],
|
||||
@@ -118,7 +117,6 @@ class InterpolationRunner:
|
||||
def __init__(self, analyzer: AbstractAnalyzer) -> None:
|
||||
self.analyzer = analyzer
|
||||
|
||||
|
||||
def name(self) -> str:
|
||||
return "interpolation lines (location_property_osmline)"
|
||||
|
||||
@@ -126,14 +124,12 @@ class InterpolationRunner:
|
||||
return """SELECT count(*) FROM location_property_osmline
|
||||
WHERE indexed_status > 0"""
|
||||
|
||||
|
||||
def sql_get_objects(self) -> Query:
|
||||
return """SELECT place_id, get_interpolation_address(address, osm_id) as address
|
||||
FROM location_property_osmline
|
||||
WHERE indexed_status > 0
|
||||
ORDER BY geometry_sector"""
|
||||
|
||||
|
||||
def index_places_query(self, batch_size: int) -> Query:
|
||||
return pysql.SQL("""UPDATE location_property_osmline
|
||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||
@@ -141,13 +137,11 @@ class InterpolationRunner:
|
||||
WHERE place_id = v.id
|
||||
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", batch_size))
|
||||
|
||||
|
||||
def index_places_params(self, place: DictRow) -> Sequence[Any]:
|
||||
return (place['place_id'], place['address'],
|
||||
_analyze_place(place, self.analyzer))
|
||||
|
||||
|
||||
|
||||
class PostcodeRunner(Runner):
|
||||
""" Provides the SQL commands for indexing the location_postcode table.
|
||||
"""
|
||||
@@ -155,22 +149,18 @@ class PostcodeRunner(Runner):
|
||||
def name(self) -> str:
|
||||
return "postcodes (location_postcode)"
|
||||
|
||||
|
||||
def sql_count_objects(self) -> Query:
|
||||
return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
|
||||
|
||||
|
||||
def sql_get_objects(self) -> Query:
|
||||
return """SELECT place_id FROM location_postcode
|
||||
WHERE indexed_status > 0
|
||||
ORDER BY country_code, postcode"""
|
||||
|
||||
|
||||
def index_places_query(self, batch_size: int) -> Query:
|
||||
return pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
|
||||
WHERE place_id IN ({})""")\
|
||||
.format(pysql.SQL(',').join((pysql.Placeholder() for _ in range(batch_size))))
|
||||
|
||||
|
||||
def index_places_params(self, place: DictRow) -> Sequence[Any]:
|
||||
return (place['place_id'], )
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..config import Configuration
|
||||
from ..db.connection import Connection
|
||||
from ..data.place_info import PlaceInfo
|
||||
|
||||
|
||||
class AbstractAnalyzer(ABC):
|
||||
""" The analyzer provides the functions for analysing names and building
|
||||
the token database.
|
||||
@@ -28,17 +29,14 @@ class AbstractAnalyzer(ABC):
|
||||
def __enter__(self) -> 'AbstractAnalyzer':
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
||||
self.close()
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def close(self) -> None:
|
||||
""" Free all resources used by the analyzer.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def get_word_token_info(self, words: List[str]) -> List[Tuple[str, str, int]]:
|
||||
""" Return token information for the given list of words.
|
||||
@@ -57,7 +55,6 @@ class AbstractAnalyzer(ABC):
|
||||
(original word, word token, word id).
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def normalize_postcode(self, postcode: str) -> str:
|
||||
""" Convert the postcode to its standardized form.
|
||||
@@ -72,14 +69,12 @@ class AbstractAnalyzer(ABC):
|
||||
The given postcode after normalization.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def update_postcodes_from_db(self) -> None:
|
||||
""" Update the tokenizer's postcode tokens from the current content
|
||||
of the `location_postcode` table.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def update_special_phrases(self,
|
||||
phrases: Iterable[Tuple[str, str, str, str]],
|
||||
@@ -95,7 +90,6 @@ class AbstractAnalyzer(ABC):
|
||||
ones that already exist.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def add_country_names(self, country_code: str, names: Dict[str, str]) -> None:
|
||||
""" Add the given names to the tokenizer's list of country tokens.
|
||||
@@ -106,7 +100,6 @@ class AbstractAnalyzer(ABC):
|
||||
names: Dictionary of name type to name.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def process_place(self, place: PlaceInfo) -> Any:
|
||||
""" Extract tokens for the given place and compute the
|
||||
@@ -122,7 +115,6 @@ class AbstractAnalyzer(ABC):
|
||||
"""
|
||||
|
||||
|
||||
|
||||
class AbstractTokenizer(ABC):
|
||||
""" The tokenizer instance is the central instance of the tokenizer in
|
||||
the system. There will only be a single instance of the tokenizer
|
||||
@@ -146,7 +138,6 @@ class AbstractTokenizer(ABC):
|
||||
tokenizers.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def init_from_project(self, config: Configuration) -> None:
|
||||
""" Initialise the tokenizer from an existing database setup.
|
||||
@@ -158,7 +149,6 @@ class AbstractTokenizer(ABC):
|
||||
config: Read-only object with configuration options.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def finalize_import(self, config: Configuration) -> None:
|
||||
""" This function is called at the very end of an import when all
|
||||
@@ -170,7 +160,6 @@ class AbstractTokenizer(ABC):
|
||||
config: Read-only object with configuration options.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def update_sql_functions(self, config: Configuration) -> None:
|
||||
""" Update the SQL part of the tokenizer. This function is called
|
||||
@@ -184,7 +173,6 @@ class AbstractTokenizer(ABC):
|
||||
config: Read-only object with configuration options.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def check_database(self, config: Configuration) -> Optional[str]:
|
||||
""" Check that the database is set up correctly and ready for being
|
||||
@@ -199,7 +187,6 @@ class AbstractTokenizer(ABC):
|
||||
how to resolve the issue. If everything is okay, return `None`.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def update_statistics(self, config: Configuration, threads: int = 1) -> None:
|
||||
""" Recompute any tokenizer statistics necessary for efficient lookup.
|
||||
@@ -208,14 +195,12 @@ class AbstractTokenizer(ABC):
|
||||
it to be called in order to work.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def update_word_tokens(self) -> None:
|
||||
""" Do house-keeping on the tokenizers internal data structures.
|
||||
Remove unused word tokens, resort data etc.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def name_analyzer(self) -> AbstractAnalyzer:
|
||||
""" Create a new analyzer for tokenizing names and queries
|
||||
@@ -231,7 +216,6 @@ class AbstractTokenizer(ABC):
|
||||
call the close() function before destructing the analyzer.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the most frequent full words in the database.
|
||||
|
||||
@@ -29,6 +29,7 @@ from ..tokenizer.base import AbstractTokenizer, TokenizerModule
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _import_tokenizer(name: str) -> TokenizerModule:
|
||||
""" Load the tokenizer.py module from project directory.
|
||||
"""
|
||||
|
||||
@@ -61,7 +61,6 @@ class ICURuleLoader:
|
||||
# Load optional sanitizer rule set.
|
||||
self.sanitizer_rules = rules.get('sanitizers', [])
|
||||
|
||||
|
||||
def load_config_from_db(self, conn: Connection) -> None:
|
||||
""" Get previously saved parts of the configuration from the
|
||||
database.
|
||||
@@ -81,7 +80,6 @@ class ICURuleLoader:
|
||||
self.analysis_rules = []
|
||||
self._setup_analysis()
|
||||
|
||||
|
||||
def save_config_to_db(self, conn: Connection) -> None:
|
||||
""" Save the part of the configuration that cannot be changed into
|
||||
the database.
|
||||
@@ -90,20 +88,17 @@ class ICURuleLoader:
|
||||
set_property(conn, DBCFG_IMPORT_TRANS_RULES, self.transliteration_rules)
|
||||
set_property(conn, DBCFG_IMPORT_ANALYSIS_RULES, json.dumps(self.analysis_rules))
|
||||
|
||||
|
||||
def make_sanitizer(self) -> PlaceSanitizer:
|
||||
""" Create a place sanitizer from the configured rules.
|
||||
"""
|
||||
return PlaceSanitizer(self.sanitizer_rules, self.config)
|
||||
|
||||
|
||||
def make_token_analysis(self) -> ICUTokenAnalysis:
|
||||
""" Create a token analyser from the reviouly loaded rules.
|
||||
"""
|
||||
return ICUTokenAnalysis(self.normalization_rules,
|
||||
self.transliteration_rules, self.analysis)
|
||||
|
||||
|
||||
def get_search_rules(self) -> str:
|
||||
""" Return the ICU rules to be used during search.
|
||||
The rules combine normalization and transliteration.
|
||||
@@ -116,23 +111,20 @@ class ICURuleLoader:
|
||||
rules.write(self.transliteration_rules)
|
||||
return rules.getvalue()
|
||||
|
||||
|
||||
def get_normalization_rules(self) -> str:
|
||||
""" Return rules for normalisation of a term.
|
||||
"""
|
||||
return self.normalization_rules
|
||||
|
||||
|
||||
def get_transliteration_rules(self) -> str:
|
||||
""" Return the rules for converting a string into its asciii representation.
|
||||
"""
|
||||
return self.transliteration_rules
|
||||
|
||||
|
||||
def _setup_analysis(self) -> None:
|
||||
""" Process the rules used for creating the various token analyzers.
|
||||
"""
|
||||
self.analysis: Dict[Optional[str], TokenAnalyzerRule] = {}
|
||||
self.analysis: Dict[Optional[str], TokenAnalyzerRule] = {}
|
||||
|
||||
if not isinstance(self.analysis_rules, list):
|
||||
raise UsageError("Configuration section 'token-analysis' must be a list.")
|
||||
@@ -140,7 +132,7 @@ class ICURuleLoader:
|
||||
norm = Transliterator.createFromRules("rule_loader_normalization",
|
||||
self.normalization_rules)
|
||||
trans = Transliterator.createFromRules("rule_loader_transliteration",
|
||||
self.transliteration_rules)
|
||||
self.transliteration_rules)
|
||||
|
||||
for section in self.analysis_rules:
|
||||
name = section.get('id', None)
|
||||
@@ -154,7 +146,6 @@ class ICURuleLoader:
|
||||
self.analysis[name] = TokenAnalyzerRule(section, norm, trans,
|
||||
self.config)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _cfg_to_icu_rules(rules: Mapping[str, Any], section: str) -> str:
|
||||
""" Load an ICU ruleset from the given section. If the section is a
|
||||
@@ -189,7 +180,6 @@ class TokenAnalyzerRule:
|
||||
self.config = self._analysis_mod.configure(rules, normalizer,
|
||||
transliterator)
|
||||
|
||||
|
||||
def create(self, normalizer: Any, transliterator: Any) -> Analyzer:
|
||||
""" Create a new analyser instance for the given rule.
|
||||
"""
|
||||
|
||||
@@ -14,8 +14,9 @@ from icu import Transliterator
|
||||
from .token_analysis.base import Analyzer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any
|
||||
from .icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import
|
||||
from typing import Any # noqa
|
||||
from .icu_rule_loader import TokenAnalyzerRule
|
||||
|
||||
|
||||
class ICUTokenAnalysis:
|
||||
""" Container class collecting the transliterators and token analysis
|
||||
@@ -35,7 +36,6 @@ class ICUTokenAnalysis:
|
||||
self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
|
||||
for name, arules in analysis_rules.items()}
|
||||
|
||||
|
||||
def get_analyzer(self, name: Optional[str]) -> Analyzer:
|
||||
""" Return the given named analyzer. If no analyzer with that
|
||||
name exists, return the default analyzer.
|
||||
|
||||
@@ -17,7 +17,7 @@ from pathlib import Path
|
||||
from psycopg.types.json import Jsonb
|
||||
from psycopg import sql as pysql
|
||||
|
||||
from ..db.connection import connect, Connection, Cursor, server_version_tuple,\
|
||||
from ..db.connection import connect, Connection, Cursor, server_version_tuple, \
|
||||
drop_tables, table_exists, execute_scalar
|
||||
from ..config import Configuration
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
@@ -32,10 +32,11 @@ DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
WORD_TYPES =(('country_names', 'C'),
|
||||
('postcodes', 'P'),
|
||||
('full_word', 'W'),
|
||||
('housenumbers', 'H'))
|
||||
WORD_TYPES = (('country_names', 'C'),
|
||||
('postcodes', 'P'),
|
||||
('full_word', 'W'),
|
||||
('housenumbers', 'H'))
|
||||
|
||||
|
||||
def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
@@ -54,7 +55,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self.data_dir = data_dir
|
||||
self.loader: Optional[ICURuleLoader] = None
|
||||
|
||||
|
||||
def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
|
||||
""" Set up a new tokenizer for the database.
|
||||
|
||||
@@ -70,7 +70,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self._setup_db_tables(config)
|
||||
self._create_base_indices(config, 'word')
|
||||
|
||||
|
||||
def init_from_project(self, config: Configuration) -> None:
|
||||
""" Initialise the tokenizer from the project directory.
|
||||
"""
|
||||
@@ -79,14 +78,12 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
with connect(self.dsn) as conn:
|
||||
self.loader.load_config_from_db(conn)
|
||||
|
||||
|
||||
def finalize_import(self, config: Configuration) -> None:
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
for use.
|
||||
"""
|
||||
self._create_lookup_indices(config, 'word')
|
||||
|
||||
|
||||
def update_sql_functions(self, config: Configuration) -> None:
|
||||
""" Reimport the SQL functions for this tokenizer.
|
||||
"""
|
||||
@@ -94,14 +91,12 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
|
||||
|
||||
|
||||
def check_database(self, config: Configuration) -> None:
|
||||
""" Check that the tokenizer is set up correctly.
|
||||
"""
|
||||
# Will throw an error if there is an issue.
|
||||
self.init_from_project(config)
|
||||
|
||||
|
||||
def update_statistics(self, config: Configuration, threads: int = 2) -> None:
|
||||
""" Recompute frequencies for all name words.
|
||||
"""
|
||||
@@ -126,28 +121,29 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
SELECT unnest(nameaddress_vector) as id, count(*)
|
||||
FROM search_name GROUP BY id""")
|
||||
cur.execute('CREATE INDEX ON addressword_frequencies(id)')
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
|
||||
INOUT info JSONB)
|
||||
AS $$
|
||||
DECLARE rec RECORD;
|
||||
BEGIN
|
||||
IF info is null THEN
|
||||
info = '{}'::jsonb;
|
||||
END IF;
|
||||
FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
|
||||
LOOP
|
||||
info = info || jsonb_build_object('count', rec.count);
|
||||
END LOOP;
|
||||
FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
|
||||
LOOP
|
||||
info = info || jsonb_build_object('addr_count', rec.count);
|
||||
END LOOP;
|
||||
IF info = '{}'::jsonb THEN
|
||||
info = null;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
""")
|
||||
cur.execute("""
|
||||
CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
|
||||
INOUT info JSONB)
|
||||
AS $$
|
||||
DECLARE rec RECORD;
|
||||
BEGIN
|
||||
IF info is null THEN
|
||||
info = '{}'::jsonb;
|
||||
END IF;
|
||||
FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
|
||||
LOOP
|
||||
info = info || jsonb_build_object('count', rec.count);
|
||||
END LOOP;
|
||||
FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
|
||||
LOOP
|
||||
info = info || jsonb_build_object('addr_count', rec.count);
|
||||
END LOOP;
|
||||
IF info = '{}'::jsonb THEN
|
||||
info = null;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
""")
|
||||
LOG.info('Update word table with recomputed frequencies')
|
||||
drop_tables(conn, 'tmp_word')
|
||||
cur.execute("""CREATE TABLE tmp_word AS
|
||||
@@ -200,8 +196,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self._create_lookup_indices(config, 'tmp_word')
|
||||
self._move_temporary_word_table('tmp_word')
|
||||
|
||||
|
||||
|
||||
def _cleanup_housenumbers(self) -> None:
|
||||
""" Remove unused house numbers.
|
||||
"""
|
||||
@@ -235,8 +229,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
(list(candidates.values()), ))
|
||||
conn.commit()
|
||||
|
||||
|
||||
|
||||
def update_word_tokens(self) -> None:
|
||||
""" Remove unused tokens.
|
||||
"""
|
||||
@@ -244,7 +236,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self._cleanup_housenumbers()
|
||||
LOG.warning("Tokenizer house-keeping done.")
|
||||
|
||||
|
||||
def name_analyzer(self) -> 'ICUNameAnalyzer':
|
||||
""" Create a new analyzer for tokenizing names and queries
|
||||
using this tokinzer. Analyzers are context managers and should
|
||||
@@ -264,7 +255,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
return ICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(),
|
||||
self.loader.make_token_analysis())
|
||||
|
||||
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
@@ -276,7 +266,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
ORDER BY count DESC LIMIT %s""", (num,))
|
||||
return list(s[0].split('@')[0] for s in cur)
|
||||
|
||||
|
||||
def _save_config(self) -> None:
|
||||
""" Save the configuration that needs to remain stable for the given
|
||||
database as database properties.
|
||||
@@ -285,7 +274,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
with connect(self.dsn) as conn:
|
||||
self.loader.save_config_to_db(conn)
|
||||
|
||||
|
||||
def _setup_db_tables(self, config: Configuration) -> None:
|
||||
""" Set up the word table and fill it with pre-computed word
|
||||
frequencies.
|
||||
@@ -309,7 +297,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _create_base_indices(self, config: Configuration, table_name: str) -> None:
|
||||
""" Set up the word table and fill it with pre-computed word
|
||||
frequencies.
|
||||
@@ -330,21 +317,21 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
column_type=ctype)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
|
||||
""" Create additional indexes used when running the API.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
# Index required for details lookup.
|
||||
sqlp.run_string(conn, """
|
||||
sqlp.run_string(
|
||||
conn,
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
|
||||
ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
|
||||
""",
|
||||
table_name=table_name)
|
||||
""",
|
||||
table_name=table_name)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _move_temporary_word_table(self, old: str) -> None:
|
||||
""" Rename all tables and indexes used by the tokenizer.
|
||||
"""
|
||||
@@ -361,8 +348,6 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
conn.commit()
|
||||
|
||||
|
||||
|
||||
|
||||
class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
""" The ICU analyzer uses the ICU library for splitting names.
|
||||
|
||||
@@ -379,7 +364,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
self._cache = _TokenCache()
|
||||
|
||||
|
||||
def close(self) -> None:
|
||||
""" Free all resources used by the analyzer.
|
||||
"""
|
||||
@@ -387,20 +371,17 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
|
||||
def _search_normalized(self, name: str) -> str:
|
||||
""" Return the search token transliteration of the given name.
|
||||
"""
|
||||
return cast(str, self.token_analysis.search.transliterate(name)).strip()
|
||||
|
||||
|
||||
def _normalized(self, name: str) -> str:
|
||||
""" Return the normalized version of the given name with all
|
||||
non-relevant information removed.
|
||||
"""
|
||||
return cast(str, self.token_analysis.normalizer.transliterate(name)).strip()
|
||||
|
||||
|
||||
def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
|
||||
""" Return token information for the given list of words.
|
||||
If a word starts with # it is assumed to be a full name
|
||||
@@ -432,8 +413,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
part_ids = {r[0]: r[1] for r in cur}
|
||||
|
||||
return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \
|
||||
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
|
||||
|
||||
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
|
||||
|
||||
def normalize_postcode(self, postcode: str) -> str:
|
||||
""" Convert the postcode to a standardized form.
|
||||
@@ -443,7 +423,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
"""
|
||||
return postcode.strip().upper()
|
||||
|
||||
|
||||
def update_postcodes_from_db(self) -> None:
|
||||
""" Update postcode tokens in the word table from the location_postcode
|
||||
table.
|
||||
@@ -516,9 +495,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.executemany("""SELECT create_postcode_word(%s, %s)""", terms)
|
||||
|
||||
|
||||
|
||||
|
||||
def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
|
||||
should_replace: bool) -> None:
|
||||
""" Replace the search index for special phrases with the new phrases.
|
||||
@@ -548,7 +524,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
|
||||
len(norm_phrases), added, deleted)
|
||||
|
||||
|
||||
def _add_special_phrases(self, cursor: Cursor,
|
||||
new_phrases: Set[Tuple[str, str, str, str]],
|
||||
existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
|
||||
@@ -568,10 +543,9 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
return added
|
||||
|
||||
|
||||
def _remove_special_phrases(self, cursor: Cursor,
|
||||
new_phrases: Set[Tuple[str, str, str, str]],
|
||||
existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
|
||||
new_phrases: Set[Tuple[str, str, str, str]],
|
||||
existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
|
||||
""" Remove all phrases from the database that are no longer in the
|
||||
new phrase list.
|
||||
"""
|
||||
@@ -587,7 +561,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
return len(to_delete)
|
||||
|
||||
|
||||
def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
|
||||
""" Add default names for the given country to the search index.
|
||||
"""
|
||||
@@ -599,7 +572,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
self.sanitizer.process_names(info)[0],
|
||||
internal=True)
|
||||
|
||||
|
||||
def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
|
||||
internal: bool = False) -> None:
|
||||
""" Add names for the given country from an already sanitized
|
||||
@@ -651,7 +623,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
"""
|
||||
cur.execute(sql, (country_code, list(new_tokens)))
|
||||
|
||||
|
||||
def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
|
||||
""" Determine tokenizer information about the given place.
|
||||
|
||||
@@ -674,7 +645,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
return token_info.to_dict()
|
||||
|
||||
|
||||
def _process_place_address(self, token_info: '_TokenInfo',
|
||||
address: Sequence[PlaceName]) -> None:
|
||||
for item in address:
|
||||
@@ -687,12 +657,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
elif item.kind == 'place':
|
||||
if not item.suffix:
|
||||
token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
|
||||
elif not item.kind.startswith('_') and not item.suffix and \
|
||||
item.kind not in ('country', 'full', 'inclusion'):
|
||||
elif (not item.kind.startswith('_') and not item.suffix and
|
||||
item.kind not in ('country', 'full', 'inclusion')):
|
||||
token_info.add_address_term(item.kind,
|
||||
itertools.chain(*self._compute_name_tokens([item])))
|
||||
|
||||
|
||||
def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
|
||||
""" Normalize the housenumber and return the word token and the
|
||||
canonical form.
|
||||
@@ -728,7 +697,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _retrieve_full_tokens(self, name: str) -> List[int]:
|
||||
""" Get the full name token for the given name, if it exists.
|
||||
The name is only retrieved for the standard analyser.
|
||||
@@ -749,7 +717,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
return full
|
||||
|
||||
|
||||
def _compute_name_tokens(self, names: Sequence[PlaceName]) -> Tuple[Set[int], Set[int]]:
|
||||
""" Computes the full name and partial name tokens for the given
|
||||
dictionary of names.
|
||||
@@ -787,7 +754,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
return full_tokens, partial_tokens
|
||||
|
||||
|
||||
def _add_postcode(self, item: PlaceName) -> Optional[str]:
|
||||
""" Make sure the normalized postcode is present in the word table.
|
||||
"""
|
||||
@@ -835,11 +801,9 @@ class _TokenInfo:
|
||||
self.address_tokens: Dict[str, str] = {}
|
||||
self.postcode: Optional[str] = None
|
||||
|
||||
|
||||
def _mk_array(self, tokens: Iterable[Any]) -> str:
|
||||
return f"{{{','.join((str(s) for s in tokens))}}}"
|
||||
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
""" Return the token information in database importable format.
|
||||
"""
|
||||
@@ -866,13 +830,11 @@ class _TokenInfo:
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def set_names(self, fulls: Iterable[int], partials: Iterable[int]) -> None:
|
||||
""" Adds token information for the normalised names.
|
||||
"""
|
||||
self.names = self._mk_array(itertools.chain(fulls, partials))
|
||||
|
||||
|
||||
def add_housenumber(self, token: Optional[int], hnr: Optional[str]) -> None:
|
||||
""" Extract housenumber information from a list of normalised
|
||||
housenumbers.
|
||||
@@ -882,7 +844,6 @@ class _TokenInfo:
|
||||
self.housenumbers.add(hnr)
|
||||
self.housenumber_tokens.add(token)
|
||||
|
||||
|
||||
def add_street(self, tokens: Iterable[int]) -> None:
|
||||
""" Add addr:street match terms.
|
||||
"""
|
||||
@@ -890,13 +851,11 @@ class _TokenInfo:
|
||||
self.street_tokens = set()
|
||||
self.street_tokens.update(tokens)
|
||||
|
||||
|
||||
def add_place(self, tokens: Iterable[int]) -> None:
|
||||
""" Add addr:place search and match terms.
|
||||
"""
|
||||
self.place_tokens.update(tokens)
|
||||
|
||||
|
||||
def add_address_term(self, key: str, partials: Iterable[int]) -> None:
|
||||
""" Add additional address terms.
|
||||
"""
|
||||
|
||||
@@ -39,7 +39,6 @@ class PlaceSanitizer:
|
||||
|
||||
self.handlers.append(module.create(SanitizerConfig(func)))
|
||||
|
||||
|
||||
def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]:
|
||||
""" Extract a sanitized list of names and address parts from the
|
||||
given place. The function returns a tuple
|
||||
|
||||
@@ -27,7 +27,6 @@ class ProcessInfo:
|
||||
self.names = self._convert_name_dict(place.name)
|
||||
self.address = self._convert_name_dict(place.address)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _convert_name_dict(names: Optional[Mapping[str, str]]) -> List[PlaceName]:
|
||||
""" Convert a dictionary of names into a list of PlaceNames.
|
||||
|
||||
@@ -30,6 +30,7 @@ from ...data.place_name import PlaceName
|
||||
from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
|
||||
|
||||
class _HousenumberSanitizer:
|
||||
|
||||
def __init__(self, config: SanitizerConfig) -> None:
|
||||
@@ -38,7 +39,6 @@ class _HousenumberSanitizer:
|
||||
|
||||
self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL')
|
||||
|
||||
|
||||
def __call__(self, obj: ProcessInfo) -> None:
|
||||
if not obj.address:
|
||||
return
|
||||
@@ -57,7 +57,6 @@ class _HousenumberSanitizer:
|
||||
|
||||
obj.address = new_address
|
||||
|
||||
|
||||
def sanitize(self, value: str) -> Iterator[str]:
|
||||
""" Extract housenumbers in a regularized format from an OSM value.
|
||||
|
||||
@@ -68,7 +67,6 @@ class _HousenumberSanitizer:
|
||||
if hnr:
|
||||
yield from self._regularize(hnr)
|
||||
|
||||
|
||||
def _regularize(self, hnr: str) -> Iterator[str]:
|
||||
yield hnr
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ from ...data.postcode_format import PostcodeFormatter
|
||||
from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
|
||||
|
||||
class _PostcodeSanitizer:
|
||||
|
||||
def __init__(self, config: SanitizerConfig) -> None:
|
||||
@@ -36,7 +37,6 @@ class _PostcodeSanitizer:
|
||||
if default_pattern is not None and isinstance(default_pattern, str):
|
||||
self.matcher.set_default_pattern(default_pattern)
|
||||
|
||||
|
||||
def __call__(self, obj: ProcessInfo) -> None:
|
||||
if not obj.address:
|
||||
return
|
||||
@@ -55,7 +55,6 @@ class _PostcodeSanitizer:
|
||||
postcode.name = formatted[0]
|
||||
postcode.set_attr('variant', formatted[1])
|
||||
|
||||
|
||||
def scan(self, postcode: str, country: Optional[str]) -> Optional[Tuple[str, str]]:
|
||||
""" Check the postcode for correct formatting and return the
|
||||
normalized version. Returns None if the postcode does not
|
||||
@@ -67,10 +66,8 @@ class _PostcodeSanitizer:
|
||||
|
||||
assert country is not None
|
||||
|
||||
return self.matcher.normalize(country, match),\
|
||||
' '.join(filter(lambda p: p is not None, match.groups()))
|
||||
|
||||
|
||||
return self.matcher.normalize(country, match), \
|
||||
' '.join(filter(lambda p: p is not None, match.groups()))
|
||||
|
||||
|
||||
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||
|
||||
@@ -19,6 +19,7 @@ from .config import SanitizerConfig
|
||||
|
||||
COUNTY_MATCH = re.compile('(.*), [A-Z][A-Z]')
|
||||
|
||||
|
||||
def _clean_tiger_county(obj: ProcessInfo) -> None:
|
||||
""" Remove the state reference from tiger:county tags.
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
_BaseUserDict = UserDict
|
||||
|
||||
|
||||
class SanitizerConfig(_BaseUserDict):
|
||||
""" The `SanitizerConfig` class is a read-only dictionary
|
||||
with configuration options for the sanitizer.
|
||||
@@ -61,7 +62,6 @@ class SanitizerConfig(_BaseUserDict):
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def get_bool(self, param: str, default: Optional[bool] = None) -> bool:
|
||||
""" Extract a configuration parameter as a boolean.
|
||||
|
||||
@@ -82,7 +82,6 @@ class SanitizerConfig(_BaseUserDict):
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def get_delimiter(self, default: str = ',;') -> Pattern[str]:
|
||||
""" Return the 'delimiters' parameter in the configuration as a
|
||||
compiled regular expression that can be used to split strings on
|
||||
@@ -105,7 +104,6 @@ class SanitizerConfig(_BaseUserDict):
|
||||
|
||||
return re.compile('\\s*[{}]+\\s*'.format(''.join('\\' + d for d in delimiter_set)))
|
||||
|
||||
|
||||
def get_filter(self, param: str, default: Union[str, Sequence[str]] = 'PASS_ALL'
|
||||
) -> Callable[[str], bool]:
|
||||
""" Returns a filter function for the given parameter of the sanitizer
|
||||
|
||||
@@ -60,6 +60,7 @@ from ...data.place_name import PlaceName
|
||||
from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
|
||||
|
||||
class _TagSanitizer:
|
||||
|
||||
def __init__(self, config: SanitizerConfig) -> None:
|
||||
@@ -74,7 +75,6 @@ class _TagSanitizer:
|
||||
|
||||
self.has_country_code = config.get('country_code', None) is not None
|
||||
|
||||
|
||||
def __call__(self, obj: ProcessInfo) -> None:
|
||||
tags = obj.names if self.type == 'name' else obj.address
|
||||
|
||||
@@ -93,13 +93,11 @@ class _TagSanitizer:
|
||||
or not self.filter_name(tag.name):
|
||||
filtered_tags.append(tag)
|
||||
|
||||
|
||||
if self.type == 'name':
|
||||
obj.names = filtered_tags
|
||||
else:
|
||||
obj.address = filtered_tags
|
||||
|
||||
|
||||
def _set_allowed_ranks(self, ranks: Sequence[str]) -> Tuple[bool, ...]:
|
||||
""" Returns a tuple of 31 boolean values corresponding to the
|
||||
address ranks 0-30. Value at index 'i' is True if rank 'i'
|
||||
@@ -117,7 +115,6 @@ class _TagSanitizer:
|
||||
for i in range(start, end + 1):
|
||||
allowed_ranks[i] = True
|
||||
|
||||
|
||||
return tuple(allowed_ranks)
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from typing import Callable
|
||||
from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
|
||||
|
||||
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||
""" Create a name processing function that splits name values with
|
||||
multiple values into their components.
|
||||
|
||||
@@ -36,6 +36,7 @@ from ...data import country_info
|
||||
from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
|
||||
|
||||
class _AnalyzerByLanguage:
|
||||
""" Processor for tagging the language of names in a place.
|
||||
"""
|
||||
@@ -47,7 +48,6 @@ class _AnalyzerByLanguage:
|
||||
|
||||
self._compute_default_languages(config.get('use-defaults', 'no'))
|
||||
|
||||
|
||||
def _compute_default_languages(self, use_defaults: str) -> None:
|
||||
self.deflangs: Dict[Optional[str], List[str]] = {}
|
||||
|
||||
@@ -55,18 +55,16 @@ class _AnalyzerByLanguage:
|
||||
for ccode, clangs in country_info.iterate('languages'):
|
||||
if len(clangs) == 1 or use_defaults == 'all':
|
||||
if self.whitelist:
|
||||
self.deflangs[ccode] = [l for l in clangs if l in self.whitelist]
|
||||
self.deflangs[ccode] = [cl for cl in clangs if cl in self.whitelist]
|
||||
else:
|
||||
self.deflangs[ccode] = clangs
|
||||
|
||||
|
||||
def _suffix_matches(self, suffix: str) -> bool:
|
||||
if self.whitelist is None:
|
||||
return len(suffix) in (2, 3) and suffix.islower()
|
||||
|
||||
return suffix in self.whitelist
|
||||
|
||||
|
||||
def __call__(self, obj: ProcessInfo) -> None:
|
||||
if not obj.names:
|
||||
return
|
||||
@@ -80,14 +78,13 @@ class _AnalyzerByLanguage:
|
||||
else:
|
||||
langs = self.deflangs.get(obj.place.country_code)
|
||||
|
||||
|
||||
if langs:
|
||||
if self.replace:
|
||||
name.set_attr('analyzer', langs[0])
|
||||
else:
|
||||
more_names.append(name.clone(attr={'analyzer': langs[0]}))
|
||||
|
||||
more_names.extend(name.clone(attr={'analyzer': l}) for l in langs[1:])
|
||||
more_names.extend(name.clone(attr={'analyzer': lg}) for lg in langs[1:])
|
||||
|
||||
obj.names.extend(more_names)
|
||||
|
||||
|
||||
@@ -18,11 +18,13 @@ from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
from ...data.place_name import PlaceName
|
||||
|
||||
|
||||
def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||
"""Set up the sanitizer
|
||||
"""
|
||||
return tag_japanese
|
||||
|
||||
|
||||
def reconbine_housenumber(
|
||||
new_address: List[PlaceName],
|
||||
tmp_housenumber: Optional[str],
|
||||
@@ -56,6 +58,7 @@ def reconbine_housenumber(
|
||||
)
|
||||
return new_address
|
||||
|
||||
|
||||
def reconbine_place(
|
||||
new_address: List[PlaceName],
|
||||
tmp_neighbourhood: Optional[str],
|
||||
@@ -88,6 +91,8 @@ def reconbine_place(
|
||||
)
|
||||
)
|
||||
return new_address
|
||||
|
||||
|
||||
def tag_japanese(obj: ProcessInfo) -> None:
|
||||
"""Recombine kind of address
|
||||
"""
|
||||
|
||||
@@ -12,6 +12,7 @@ from typing import Mapping, List, Any
|
||||
from ...typing import Protocol
|
||||
from ...data.place_name import PlaceName
|
||||
|
||||
|
||||
class Analyzer(Protocol):
|
||||
""" The `create()` function of an analysis module needs to return an
|
||||
object that implements the following functions.
|
||||
|
||||
@@ -15,6 +15,7 @@ import re
|
||||
from ...config import flatten_config_list
|
||||
from ...errors import UsageError
|
||||
|
||||
|
||||
class ICUVariant(NamedTuple):
|
||||
""" A single replacement rule for variant creation.
|
||||
"""
|
||||
@@ -64,7 +65,6 @@ class _VariantMaker:
|
||||
def __init__(self, normalizer: Any) -> None:
|
||||
self.norm = normalizer
|
||||
|
||||
|
||||
def compute(self, rule: Any) -> Iterator[ICUVariant]:
|
||||
""" Generator for all ICUVariant tuples from a single variant rule.
|
||||
"""
|
||||
@@ -88,7 +88,6 @@ class _VariantMaker:
|
||||
for froms, tos in _create_variants(*src, repl, decompose):
|
||||
yield ICUVariant(froms, tos)
|
||||
|
||||
|
||||
def _parse_variant_word(self, name: str) -> Optional[Tuple[str, str, str]]:
|
||||
name = name.strip()
|
||||
match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
|
||||
|
||||
@@ -17,7 +17,8 @@ from ...data.place_name import PlaceName
|
||||
from .config_variants import get_variant_config
|
||||
from .generic_mutation import MutationVariantGenerator
|
||||
|
||||
### Configuration section
|
||||
# Configuration section
|
||||
|
||||
|
||||
def configure(rules: Mapping[str, Any], normalizer: Any, _: Any) -> Dict[str, Any]:
|
||||
""" Extract and preprocess the configuration for this module.
|
||||
@@ -47,7 +48,7 @@ def configure(rules: Mapping[str, Any], normalizer: Any, _: Any) -> Dict[str, An
|
||||
return config
|
||||
|
||||
|
||||
### Analysis section
|
||||
# Analysis section
|
||||
|
||||
def create(normalizer: Any, transliterator: Any,
|
||||
config: Mapping[str, Any]) -> 'GenericTokenAnalysis':
|
||||
@@ -77,14 +78,12 @@ class GenericTokenAnalysis:
|
||||
# set up mutation rules
|
||||
self.mutations = [MutationVariantGenerator(*cfg) for cfg in config['mutations']]
|
||||
|
||||
|
||||
def get_canonical_id(self, name: PlaceName) -> str:
|
||||
""" Return the normalized form of the name. This is the standard form
|
||||
from which possible variants for the name can be derived.
|
||||
"""
|
||||
return cast(str, self.norm.transliterate(name.name)).strip()
|
||||
|
||||
|
||||
def compute_variants(self, norm_name: str) -> List[str]:
|
||||
""" Compute the spelling variants for the given normalized name
|
||||
and transliterate the result.
|
||||
@@ -96,7 +95,6 @@ class GenericTokenAnalysis:
|
||||
|
||||
return [name for name in self._transliterate_unique_list(norm_name, variants) if name]
|
||||
|
||||
|
||||
def _transliterate_unique_list(self, norm_name: str,
|
||||
iterable: Iterable[str]) -> Iterator[Optional[str]]:
|
||||
seen = set()
|
||||
@@ -108,7 +106,6 @@ class GenericTokenAnalysis:
|
||||
seen.add(variant)
|
||||
yield self.to_ascii.transliterate(variant).strip()
|
||||
|
||||
|
||||
def _generate_word_variants(self, norm_name: str) -> Iterable[str]:
|
||||
baseform = '^ ' + norm_name + ' ^'
|
||||
baselen = len(baseform)
|
||||
|
||||
@@ -16,6 +16,7 @@ from ...errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _zigzag(outer: Iterable[str], inner: Iterable[str]) -> Iterator[str]:
|
||||
return itertools.chain.from_iterable(itertools.zip_longest(outer, inner, fillvalue=''))
|
||||
|
||||
@@ -36,7 +37,6 @@ class MutationVariantGenerator:
|
||||
"This is not allowed.", pattern)
|
||||
raise UsageError("Bad mutation pattern in configuration.")
|
||||
|
||||
|
||||
def generate(self, names: Iterable[str]) -> Iterator[str]:
|
||||
""" Generator function for the name variants. 'names' is an iterable
|
||||
over a set of names for which the variants are to be generated.
|
||||
@@ -49,7 +49,6 @@ class MutationVariantGenerator:
|
||||
for seps in self._fillers(len(parts)):
|
||||
yield ''.join(_zigzag(parts, seps))
|
||||
|
||||
|
||||
def _fillers(self, num_parts: int) -> Iterator[Tuple[str, ...]]:
|
||||
""" Returns a generator for strings to join the given number of string
|
||||
parts in all possible combinations.
|
||||
|
||||
@@ -19,16 +19,18 @@ RE_DIGIT_ALPHA = re.compile(r'(\d)\s*([^\d\s␣])')
|
||||
RE_ALPHA_DIGIT = re.compile(r'([^\s\d␣])\s*(\d)')
|
||||
RE_NAMED_PART = re.compile(r'[a-z]{4}')
|
||||
|
||||
### Configuration section
|
||||
# Configuration section
|
||||
|
||||
|
||||
def configure(*_: Any) -> None:
|
||||
""" All behaviour is currently hard-coded.
|
||||
"""
|
||||
return None
|
||||
|
||||
### Analysis section
|
||||
# Analysis section
|
||||
|
||||
def create(normalizer: Any, transliterator: Any, config: None) -> 'HousenumberTokenAnalysis': # pylint: disable=W0613
|
||||
|
||||
def create(normalizer: Any, transliterator: Any, config: None) -> 'HousenumberTokenAnalysis':
|
||||
""" Create a new token analysis instance for this module.
|
||||
"""
|
||||
return HousenumberTokenAnalysis(normalizer, transliterator)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Specialized processor for postcodes. Supports a 'lookup' variant of the
|
||||
@@ -13,16 +13,18 @@ from typing import Any, List
|
||||
from ...data.place_name import PlaceName
|
||||
from .generic_mutation import MutationVariantGenerator
|
||||
|
||||
### Configuration section
|
||||
# Configuration section
|
||||
|
||||
|
||||
def configure(*_: Any) -> None:
|
||||
""" All behaviour is currently hard-coded.
|
||||
"""
|
||||
return None
|
||||
|
||||
### Analysis section
|
||||
# Analysis section
|
||||
|
||||
def create(normalizer: Any, transliterator: Any, config: None) -> 'PostcodeTokenAnalysis': # pylint: disable=W0613
|
||||
|
||||
def create(normalizer: Any, transliterator: Any, config: None) -> 'PostcodeTokenAnalysis':
|
||||
""" Create a new token analysis instance for this module.
|
||||
"""
|
||||
return PostcodeTokenAnalysis(normalizer, transliterator)
|
||||
@@ -44,13 +46,11 @@ class PostcodeTokenAnalysis:
|
||||
|
||||
self.mutator = MutationVariantGenerator(' ', (' ', ''))
|
||||
|
||||
|
||||
def get_canonical_id(self, name: PlaceName) -> str:
|
||||
""" Return the standard form of the postcode.
|
||||
"""
|
||||
return name.name.strip().upper()
|
||||
|
||||
|
||||
def compute_variants(self, norm_name: str) -> List[str]:
|
||||
""" Compute the spelling variants for the given normalized postcode.
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ from .exec_utils import run_osm2pgsql
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _run_osm2pgsql(dsn: str, options: MutableMapping[str, Any]) -> None:
|
||||
run_osm2pgsql(options)
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from ..data.place_info import PlaceInfo
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _get_place_info(cursor: Cursor, osm_id: Optional[str],
|
||||
place_id: Optional[int]) -> DictCursorResult:
|
||||
sql = """SELECT place_id, extra.*
|
||||
|
||||
@@ -12,7 +12,7 @@ from enum import Enum
|
||||
from textwrap import dedent
|
||||
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, Connection, server_version_tuple,\
|
||||
from ..db.connection import connect, Connection, server_version_tuple, \
|
||||
index_exists, table_exists, execute_scalar
|
||||
from ..db import properties
|
||||
from ..errors import UsageError
|
||||
@@ -22,6 +22,7 @@ from ..version import NOMINATIM_VERSION, parse_version
|
||||
|
||||
CHECKLIST = []
|
||||
|
||||
|
||||
class CheckState(Enum):
|
||||
""" Possible states of a check. FATAL stops check execution entirely.
|
||||
"""
|
||||
@@ -31,9 +32,11 @@ class CheckState(Enum):
|
||||
NOT_APPLICABLE = 3
|
||||
WARN = 4
|
||||
|
||||
|
||||
CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
|
||||
CheckFunc = Callable[[Connection, Configuration], CheckResult]
|
||||
|
||||
|
||||
def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
|
||||
""" Decorator for checks. It adds the function to the list of
|
||||
checks to execute and adds the code for printing progress messages.
|
||||
@@ -68,6 +71,7 @@ def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class _BadConnection:
|
||||
|
||||
def __init__(self, msg: str) -> None:
|
||||
@@ -77,13 +81,14 @@ class _BadConnection:
|
||||
""" Dummy function to provide the implementation.
|
||||
"""
|
||||
|
||||
|
||||
def check_database(config: Configuration) -> int:
|
||||
""" Run a number of checks on the database and return the status.
|
||||
"""
|
||||
try:
|
||||
conn = connect(config.get_libpq_dsn())
|
||||
except UsageError as err:
|
||||
conn = _BadConnection(str(err)) # type: ignore[assignment]
|
||||
conn = _BadConnection(str(err)) # type: ignore[assignment]
|
||||
|
||||
overall_result = 0
|
||||
for check in CHECKLIST:
|
||||
@@ -110,7 +115,7 @@ def _get_indexes(conn: Connection) -> List[str]:
|
||||
'idx_osmline_parent_osm_id',
|
||||
'idx_postcode_id',
|
||||
'idx_postcode_postcode'
|
||||
]
|
||||
]
|
||||
|
||||
# These won't exist if --reverse-only import was used
|
||||
if table_exists(conn, 'search_name'):
|
||||
@@ -154,6 +159,7 @@ def check_connection(conn: Any, config: Configuration) -> CheckResult:
|
||||
|
||||
return CheckState.OK
|
||||
|
||||
|
||||
@_check(hint="""\
|
||||
Database version ({db_version}) doesn't match Nominatim version ({nom_version})
|
||||
|
||||
@@ -195,6 +201,7 @@ def check_database_version(conn: Connection, config: Configuration) -> CheckResu
|
||||
instruction=instruction,
|
||||
config=config)
|
||||
|
||||
|
||||
@_check(hint="""\
|
||||
placex table not found
|
||||
|
||||
@@ -274,7 +281,7 @@ def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
|
||||
return CheckState.OK
|
||||
|
||||
if freeze.is_frozen(conn):
|
||||
index_cmd="""\
|
||||
index_cmd = """\
|
||||
Database is marked frozen, it cannot be updated.
|
||||
Low counts of unindexed places are fine."""
|
||||
return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
|
||||
|
||||
@@ -132,8 +132,8 @@ def report_system_information(config: Configuration) -> None:
|
||||
- PostgreSQL version: {postgresql_ver}
|
||||
- PostGIS version: {postgis_ver}
|
||||
- OS: {os_name_info()}
|
||||
|
||||
|
||||
|
||||
|
||||
**Hardware Configuration:**
|
||||
- RAM: {friendly_memory_string(psutil.virtual_memory().total)}
|
||||
- number of CPUs: {psutil.cpu_count(logical=False)}
|
||||
@@ -144,13 +144,13 @@ def report_system_information(config: Configuration) -> None:
|
||||
```
|
||||
{run_command(["df", "-h"])}
|
||||
```
|
||||
|
||||
|
||||
**lsblk - list block devices: **
|
||||
```
|
||||
{run_command("lsblk")}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
**Postgresql Configuration:**
|
||||
```
|
||||
{postgresql_config}
|
||||
|
||||
@@ -21,6 +21,7 @@ from nominatim_api.sql.sqlalchemy_types import Geometry, IntArray
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
async def convert(project_dir: Optional[Union[str, Path]],
|
||||
outfile: Path, options: Set[str]) -> None:
|
||||
""" Export an existing database to sqlite. The resulting database
|
||||
@@ -53,7 +54,6 @@ class SqliteWriter:
|
||||
self.dest = dest
|
||||
self.options = options
|
||||
|
||||
|
||||
async def write(self) -> None:
|
||||
""" Create the database structure and copy the data from
|
||||
the source database to the destination.
|
||||
@@ -67,7 +67,6 @@ class SqliteWriter:
|
||||
await self.create_word_table()
|
||||
await self.create_indexes()
|
||||
|
||||
|
||||
async def create_tables(self) -> None:
|
||||
""" Set up the database tables.
|
||||
"""
|
||||
@@ -87,7 +86,6 @@ class SqliteWriter:
|
||||
sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
|
||||
col.type.subtype.upper(), 'XY')))
|
||||
|
||||
|
||||
async def create_class_tables(self) -> None:
|
||||
""" Set up the table that serve class/type-specific geometries.
|
||||
"""
|
||||
@@ -99,7 +97,6 @@ class SqliteWriter:
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
|
||||
async def create_word_table(self) -> None:
|
||||
""" Create the word table.
|
||||
This table needs the property information to determine the
|
||||
@@ -122,7 +119,6 @@ class SqliteWriter:
|
||||
|
||||
await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
|
||||
|
||||
|
||||
async def copy_data(self) -> None:
|
||||
""" Copy data for all registered tables.
|
||||
"""
|
||||
@@ -151,7 +147,6 @@ class SqliteWriter:
|
||||
data = [{'tablename': t} for t in self.dest.t.meta.tables]
|
||||
await self.dest.execute(pg_tables.insert().values(data))
|
||||
|
||||
|
||||
async def create_indexes(self) -> None:
|
||||
""" Add indexes necessary for the frontend.
|
||||
"""
|
||||
@@ -197,14 +192,12 @@ class SqliteWriter:
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(t, 'centroid')))
|
||||
|
||||
|
||||
async def create_spatial_index(self, table: str, column: str) -> None:
|
||||
""" Create a spatial index on the given table and column.
|
||||
"""
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
|
||||
|
||||
|
||||
async def create_index(self, table_name: str, column: str) -> None:
|
||||
""" Create a simple index on the given table and column.
|
||||
"""
|
||||
@@ -212,7 +205,6 @@ class SqliteWriter:
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
|
||||
|
||||
|
||||
async def create_search_index(self) -> None:
|
||||
""" Create the tables and indexes needed for word lookup.
|
||||
"""
|
||||
@@ -242,7 +234,6 @@ class SqliteWriter:
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
|
||||
|
||||
|
||||
def select_from(self, table: str) -> SaSelect:
|
||||
""" Create the SQL statement to select the source columns and rows.
|
||||
"""
|
||||
@@ -258,9 +249,9 @@ class SqliteWriter:
|
||||
columns.geometry),
|
||||
else_=sa.func.ST_SimplifyPreserveTopology(
|
||||
columns.geometry, 0.0001)
|
||||
)).label('geometry'))
|
||||
)).label('geometry'))
|
||||
|
||||
sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
|
||||
if isinstance(c.type, Geometry) else c for c in columns))
|
||||
if isinstance(c.type, Geometry) else c for c in columns))
|
||||
|
||||
return sql
|
||||
|
||||
@@ -20,7 +20,7 @@ from psycopg import sql as pysql
|
||||
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, get_pg_env, Connection, server_version_tuple,\
|
||||
from ..db.connection import connect, get_pg_env, Connection, server_version_tuple, \
|
||||
postgis_version_tuple, drop_tables, table_exists, execute_scalar
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..db.query_pool import QueryPool
|
||||
@@ -29,6 +29,7 @@ from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int, int]) -> None:
|
||||
""" Compares the version for the given module and raises an exception
|
||||
if the actual version is too old.
|
||||
@@ -251,7 +252,7 @@ async def _progress_print() -> None:
|
||||
|
||||
|
||||
async def create_search_indices(conn: Connection, config: Configuration,
|
||||
drop: bool = False, threads: int = 1) -> None:
|
||||
drop: bool = False, threads: int = 1) -> None:
|
||||
""" Create tables that have explicit partitioning.
|
||||
"""
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
'--number-processes', '1' if options['append'] else str(options['threads']),
|
||||
'--cache', str(options['osm2pgsql_cache']),
|
||||
'--style', str(options['osm2pgsql_style'])
|
||||
]
|
||||
]
|
||||
|
||||
if str(options['osm2pgsql_style']).endswith('.lua'):
|
||||
env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
|
||||
@@ -50,7 +50,6 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
cmd.extend(('--output', 'gazetteer', '--hstore', '--latlon'))
|
||||
cmd.extend(_mk_tablespace_options('main', options))
|
||||
|
||||
|
||||
if options['flatnode_file']:
|
||||
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ UPDATE_TABLES = [
|
||||
'wikipedia_%'
|
||||
]
|
||||
|
||||
|
||||
def drop_update_tables(conn: Connection) -> None:
|
||||
""" Drop all tables only necessary for updating the database from
|
||||
OSM replication data.
|
||||
@@ -49,8 +50,8 @@ def drop_flatnode_file(fpath: Optional[Path]) -> None:
|
||||
if fpath and fpath.exists():
|
||||
fpath.unlink()
|
||||
|
||||
|
||||
def is_frozen(conn: Connection) -> bool:
|
||||
""" Returns true if database is in a frozen state
|
||||
"""
|
||||
|
||||
return table_exists(conn, 'place') is False
|
||||
|
||||
@@ -13,7 +13,7 @@ import logging
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db import properties
|
||||
from ..db.connection import connect, Connection,\
|
||||
from ..db.connection import connect, Connection, \
|
||||
table_exists, register_hstore
|
||||
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
@@ -21,7 +21,8 @@ from . import refresh
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
_MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = []
|
||||
_MIGRATION_FUNCTIONS: List[Tuple[NominatimVersion, Callable[..., None]]] = []
|
||||
|
||||
|
||||
def migrate(config: Configuration, paths: Any) -> int:
|
||||
""" Check for the current database version and execute migrations,
|
||||
|
||||
@@ -25,6 +25,7 @@ from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _to_float(numstr: str, max_value: float) -> float:
|
||||
""" Convert the number in string into a float. The number is expected
|
||||
to be in the range of [-max_value, max_value]. Otherwise rises a
|
||||
@@ -36,6 +37,7 @@ def _to_float(numstr: str, max_value: float) -> float:
|
||||
|
||||
return num
|
||||
|
||||
|
||||
class _PostcodeCollector:
|
||||
""" Collector for postcodes of a single country.
|
||||
"""
|
||||
@@ -46,7 +48,6 @@ class _PostcodeCollector:
|
||||
self.collected: Dict[str, PointsCentroid] = defaultdict(PointsCentroid)
|
||||
self.normalization_cache: Optional[Tuple[str, Optional[str]]] = None
|
||||
|
||||
|
||||
def add(self, postcode: str, x: float, y: float) -> None:
|
||||
""" Add the given postcode to the collection cache. If the postcode
|
||||
already existed, it is overwritten with the new centroid.
|
||||
@@ -63,7 +64,6 @@ class _PostcodeCollector:
|
||||
if normalized:
|
||||
self.collected[normalized] += (x, y)
|
||||
|
||||
|
||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Update postcodes for the country from the postcodes selected so far
|
||||
as well as any externally supplied postcodes.
|
||||
@@ -97,9 +97,9 @@ class _PostcodeCollector:
|
||||
""").format(pysql.Literal(self.country)),
|
||||
to_update)
|
||||
|
||||
|
||||
def _compute_changes(self, conn: Connection) \
|
||||
-> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[float, float, str]]]:
|
||||
def _compute_changes(
|
||||
self, conn: Connection
|
||||
) -> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[float, float, str]]]:
|
||||
""" Compute which postcodes from the collected postcodes have to be
|
||||
added or modified and which from the location_postcode table
|
||||
have to be deleted.
|
||||
@@ -125,7 +125,6 @@ class _PostcodeCollector:
|
||||
|
||||
return to_add, to_delete, to_update
|
||||
|
||||
|
||||
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Look for an external postcode file for the active country in
|
||||
the project directory and add missing postcodes when found.
|
||||
@@ -155,7 +154,6 @@ class _PostcodeCollector:
|
||||
finally:
|
||||
csvfile.close()
|
||||
|
||||
|
||||
def _open_external(self, project_dir: Path) -> Optional[TextIO]:
|
||||
fname = project_dir / f'{self.country}_postcodes.csv'
|
||||
|
||||
@@ -225,6 +223,7 @@ def update_postcodes(dsn: str, project_dir: Path, tokenizer: AbstractTokenizer)
|
||||
|
||||
analyzer.update_postcodes_from_db()
|
||||
|
||||
|
||||
def can_compute(dsn: str) -> bool:
|
||||
"""
|
||||
Check that the place table exists so that
|
||||
|
||||
@@ -16,7 +16,7 @@ from pathlib import Path
|
||||
from psycopg import sql as pysql
|
||||
|
||||
from ..config import Configuration
|
||||
from ..db.connection import Connection, connect, postgis_version_tuple,\
|
||||
from ..db.connection import Connection, connect, postgis_version_tuple, \
|
||||
drop_tables
|
||||
from ..db.utils import execute_file
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
@@ -25,6 +25,7 @@ LOG = logging.getLogger()
|
||||
|
||||
OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
|
||||
|
||||
|
||||
def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
|
||||
entry: Mapping[str, Any]) -> None:
|
||||
""" Converts a single entry from the JSON format for address rank
|
||||
@@ -51,7 +52,7 @@ def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[s
|
||||
The table has the following columns:
|
||||
country, class, type, rank_search, rank_address
|
||||
"""
|
||||
rows: List[Tuple[Any, ...]] = []
|
||||
rows: List[Tuple[Any, ...]] = []
|
||||
for entry in levels:
|
||||
_add_address_level_rows_from_entry(rows, entry)
|
||||
|
||||
@@ -199,6 +200,7 @@ def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool =
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def recompute_importance(conn: Connection) -> None:
|
||||
""" Recompute wikipedia links and importance for all entries in placex.
|
||||
This is a long-running operations that must not be executed in
|
||||
|
||||
@@ -33,6 +33,7 @@ except ModuleNotFoundError as exc:
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def init_replication(conn: Connection, base_url: str,
|
||||
socket_timeout: int = 60) -> None:
|
||||
""" Set up replication for the server at the given base URL.
|
||||
@@ -83,6 +84,7 @@ def check_for_updates(conn: Connection, base_url: str,
|
||||
LOG.warning("New data available (%i => %i).", seq, state.sequence)
|
||||
return 0
|
||||
|
||||
|
||||
class UpdateState(Enum):
|
||||
""" Possible states after an update has run.
|
||||
"""
|
||||
@@ -176,12 +178,12 @@ def _make_replication_server(url: str, timeout: int) -> ContextManager[Replicati
|
||||
""" Download a resource from the given URL and return a byte sequence
|
||||
of the content.
|
||||
"""
|
||||
headers = {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
|
||||
headers = {"User-Agent": f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
|
||||
|
||||
if self.session is not None:
|
||||
return self.session.get(url.get_full_url(),
|
||||
headers=headers, timeout=timeout or None,
|
||||
stream=True)
|
||||
headers=headers, timeout=timeout or None,
|
||||
stream=True)
|
||||
|
||||
@contextmanager
|
||||
def _get_url_with_session() -> Iterator[requests.Response]:
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
import logging
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class SpecialPhrasesImporterStatistics():
|
||||
"""
|
||||
Class handling statistics of the import
|
||||
|
||||
@@ -16,6 +16,7 @@ import os
|
||||
from ...errors import UsageError
|
||||
from .special_phrase import SpecialPhrase
|
||||
|
||||
|
||||
class SPCsvLoader:
|
||||
"""
|
||||
Handles loading of special phrases from external csv file.
|
||||
@@ -23,7 +24,6 @@ class SPCsvLoader:
|
||||
def __init__(self, csv_path: str) -> None:
|
||||
self.csv_path = csv_path
|
||||
|
||||
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Open and parse the given csv file.
|
||||
Create the corresponding SpecialPhrases.
|
||||
@@ -35,7 +35,6 @@ class SPCsvLoader:
|
||||
for row in reader:
|
||||
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||
|
||||
|
||||
def _check_csv_validity(self) -> None:
|
||||
"""
|
||||
Check that the csv file has the right extension.
|
||||
|
||||
@@ -28,6 +28,7 @@ from ...tokenizer.base import AbstractTokenizer
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _classtype_table(phrase_class: str, phrase_type: str) -> str:
|
||||
""" Return the name of the table for the given class and type.
|
||||
"""
|
||||
@@ -96,7 +97,6 @@ class SPImporter():
|
||||
LOG.warning('Import done.')
|
||||
self.statistics_handler.notify_import_done()
|
||||
|
||||
|
||||
def _fetch_existing_place_classtype_tables(self) -> None:
|
||||
"""
|
||||
Fetch existing place_classtype tables.
|
||||
@@ -114,7 +114,7 @@ class SPImporter():
|
||||
self.table_phrases_to_delete.add(row[0])
|
||||
|
||||
def _load_white_and_black_lists(self) \
|
||||
-> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
|
||||
-> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
|
||||
"""
|
||||
Load white and black lists from phrases-settings.json.
|
||||
"""
|
||||
@@ -163,7 +163,6 @@ class SPImporter():
|
||||
|
||||
return (phrase.p_class, phrase.p_type)
|
||||
|
||||
|
||||
def _create_classtype_table_and_indexes(self,
|
||||
class_type_pairs: Iterable[Tuple[str, str]]) -> None:
|
||||
"""
|
||||
@@ -207,7 +206,6 @@ class SPImporter():
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
||||
|
||||
|
||||
def _create_place_classtype_table(self, sql_tablespace: str,
|
||||
phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
@@ -224,7 +222,6 @@ class SPImporter():
|
||||
""").format(Identifier(table_name), SQL(sql_tablespace)),
|
||||
(phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_indexes(self, sql_tablespace: str,
|
||||
phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
@@ -248,7 +245,6 @@ class SPImporter():
|
||||
Identifier(base_table),
|
||||
SQL(sql_tablespace)))
|
||||
|
||||
|
||||
def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
Grant access on read to the table place_classtype for the webuser.
|
||||
@@ -259,7 +255,6 @@ class SPImporter():
|
||||
.format(Identifier(table_name),
|
||||
Identifier(self.config.DATABASE_WEBUSER)))
|
||||
|
||||
|
||||
def _remove_non_existent_tables_from_db(self) -> None:
|
||||
"""
|
||||
Remove special phrases which doesn't exist on the wiki anymore.
|
||||
|
||||
@@ -17,6 +17,7 @@ from .special_phrase import SpecialPhrase
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _get_wiki_content(lang: str) -> str:
|
||||
"""
|
||||
Request and return the wiki page's content
|
||||
@@ -43,12 +44,11 @@ class SPWikiLoader:
|
||||
self.type_fix_pattern = re.compile(r'\"|"')
|
||||
|
||||
self.languages = self.config.get_str_list('LANGUAGES') or \
|
||||
['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi',
|
||||
'lv', 'tr']
|
||||
|
||||
['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi',
|
||||
'lv', 'tr']
|
||||
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Download the wiki pages for the configured languages
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
|
||||
class SpecialPhrase:
|
||||
"""
|
||||
Model representing a special phrase.
|
||||
@@ -29,9 +30,9 @@ class SpecialPhrase:
|
||||
return False
|
||||
|
||||
return self.p_label == other.p_label \
|
||||
and self.p_class == other.p_class \
|
||||
and self.p_type == other.p_type \
|
||||
and self.p_operator == other.p_operator
|
||||
and self.p_class == other.p_class \
|
||||
and self.p_type == other.p_type \
|
||||
and self.p_operator == other.p_operator
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
|
||||
|
||||
@@ -27,6 +27,7 @@ from . import freeze
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class TigerInput:
|
||||
""" Context manager that goes through Tiger input files which may
|
||||
either be in a directory or gzipped together in a tar file.
|
||||
@@ -38,7 +39,7 @@ class TigerInput:
|
||||
|
||||
if data_dir.endswith('.tar.gz'):
|
||||
try:
|
||||
self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
|
||||
self.tar_handle = tarfile.open(data_dir)
|
||||
except tarfile.ReadError as err:
|
||||
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
|
||||
raise UsageError("Cannot open Tiger data file.") from err
|
||||
@@ -53,11 +54,9 @@ class TigerInput:
|
||||
if not self.files:
|
||||
LOG.warning("Tiger data import selected but no files found at %s", data_dir)
|
||||
|
||||
|
||||
def __enter__(self) -> 'TigerInput':
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
||||
if self.tar_handle:
|
||||
self.tar_handle.close()
|
||||
@@ -77,7 +76,6 @@ class TigerInput:
|
||||
|
||||
return open(cast(str, fname), encoding='utf-8')
|
||||
|
||||
|
||||
def __iter__(self) -> Iterator[Dict[str, Any]]:
|
||||
""" Iterate over the lines in each file.
|
||||
"""
|
||||
@@ -87,7 +85,7 @@ class TigerInput:
|
||||
|
||||
|
||||
async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
|
||||
tokenizer: AbstractTokenizer) -> int:
|
||||
tokenizer: AbstractTokenizer) -> int:
|
||||
""" Import tiger data from directory or tar file `data dir`.
|
||||
"""
|
||||
dsn = config.get_libpq_dsn()
|
||||
|
||||
@@ -11,9 +11,6 @@ Complex type definitions are moved here, to keep the source files readable.
|
||||
"""
|
||||
from typing import Any, Union, Mapping, TypeVar, Sequence, TYPE_CHECKING
|
||||
|
||||
# Generics variable names do not confirm to naming styles, ignore globally here.
|
||||
# pylint: disable=invalid-name,abstract-method,multiple-statements
|
||||
# pylint: disable=missing-class-docstring,useless-import-alias
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import os
|
||||
@@ -26,9 +23,11 @@ SysEnv = Mapping[str, str]
|
||||
|
||||
T_ResultKey = TypeVar('T_ResultKey', int, str)
|
||||
|
||||
|
||||
class DictCursorResult(Mapping[str, Any]):
|
||||
def __getitem__(self, x: Union[int, str]) -> Any: ...
|
||||
|
||||
|
||||
DictCursorResults = Sequence[DictCursorResult]
|
||||
|
||||
# The following typing features require typing_extensions to work
|
||||
|
||||
@@ -10,6 +10,7 @@ Functions for computation of centroids.
|
||||
from typing import Tuple, Any
|
||||
from collections.abc import Collection
|
||||
|
||||
|
||||
class PointsCentroid:
|
||||
""" Centroid computation from single points using an online algorithm.
|
||||
More points may be added at any time.
|
||||
@@ -32,11 +33,9 @@ class PointsCentroid:
|
||||
return (float(self.sum_x/self.count)/10000000,
|
||||
float(self.sum_y/self.count)/10000000)
|
||||
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.count
|
||||
|
||||
|
||||
def __iadd__(self, other: Any) -> 'PointsCentroid':
|
||||
if isinstance(other, Collection) and len(other) == 2:
|
||||
if all(isinstance(p, (float, int)) for p in other):
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"""
|
||||
Helper functions for accessing URL.
|
||||
"""
|
||||
from typing import IO
|
||||
from typing import IO # noqa
|
||||
import logging
|
||||
import urllib.request as urlrequest
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..version import NOMINATIM_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def get_url(url: str) -> str:
|
||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||
|
||||
@@ -24,7 +25,7 @@ def get_url(url: str) -> str:
|
||||
|
||||
try:
|
||||
request = urlrequest.Request(url, headers=headers)
|
||||
with urlrequest.urlopen(request) as response: # type: IO[bytes]
|
||||
with urlrequest.urlopen(request) as response: # type: IO[bytes]
|
||||
return response.read().decode('utf-8')
|
||||
except Exception:
|
||||
LOG.fatal('Failed to load URL: %s', url)
|
||||
|
||||
@@ -9,8 +9,6 @@ Version information for Nominatim.
|
||||
"""
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
# See also https://github.com/PyCQA/pylint/issues/6006
|
||||
# pylint: disable=useless-import-alias,unused-import
|
||||
|
||||
class NominatimVersion(NamedTuple):
|
||||
""" Version information for Nominatim. We follow semantic versioning.
|
||||
@@ -47,7 +45,6 @@ class NominatimVersion(NamedTuple):
|
||||
return f"{self.major}.{self.minor}.{self.patch_level}"
|
||||
|
||||
|
||||
|
||||
def parse_version(version: str) -> NominatimVersion:
|
||||
""" Parse a version string into a version consisting of a tuple of
|
||||
four ints: major, minor, patch level, database patch level
|
||||
@@ -68,4 +65,4 @@ OSM2PGSQL_REQUIRED_VERSION = (1, 8)
|
||||
# on every execution of 'make'.
|
||||
# cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
|
||||
# there is a call to set the variable value below.
|
||||
GIT_COMMIT_HASH : Optional[str] = None
|
||||
GIT_COMMIT_HASH: Optional[str] = None
|
||||
|
||||
Reference in New Issue
Block a user