mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
reduce from 3 to 2 packages
This commit is contained in:
@@ -16,8 +16,8 @@ import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.errors import UsageError
|
||||
from .config import Configuration
|
||||
from .errors import UsageError
|
||||
from .tools.exec_utils import run_php_server
|
||||
from . import clicmd
|
||||
from . import version
|
||||
|
||||
@@ -11,8 +11,8 @@ import logging
|
||||
import argparse
|
||||
import random
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..errors import UsageError
|
||||
from ..db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
|
||||
@@ -14,12 +14,12 @@ import json
|
||||
import sys
|
||||
from functools import reduce
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
import nominatim_api as napi
|
||||
import nominatim_api.v1 as api_output
|
||||
from nominatim_api.v1.helpers import zoom_to_rank, deduplicate_results
|
||||
from nominatim_api.v1.format import dispatch as formatting
|
||||
import nominatim_api.logging as loglib
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
|
||||
@@ -12,9 +12,9 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.typing import Protocol
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..typing import Protocol
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ import argparse
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
|
||||
@@ -14,12 +14,13 @@ import asyncio
|
||||
import csv
|
||||
import sys
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
import nominatim_api as napi
|
||||
from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
|
||||
from nominatim_api.types import LookupDetails
|
||||
from nominatim_core.errors import UsageError
|
||||
|
||||
import sqlalchemy as sa # pylint: disable=C0411
|
||||
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
|
||||
@@ -9,7 +9,7 @@ Implementation of the 'freeze' subcommand.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
|
||||
@@ -11,8 +11,8 @@ import argparse
|
||||
|
||||
import psutil
|
||||
|
||||
from nominatim_core.db import status
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..db import status
|
||||
from ..db.connection import connect
|
||||
from .args import NominatimArgs
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
|
||||
@@ -12,8 +12,8 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from .args import NominatimArgs
|
||||
|
||||
|
||||
@@ -14,9 +14,9 @@ import logging
|
||||
import socket
|
||||
import time
|
||||
|
||||
from nominatim_core.db import status
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..db import status
|
||||
from ..db.connection import connect
|
||||
from ..errors import UsageError
|
||||
from .args import NominatimArgs
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -14,10 +14,10 @@ from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.db import status, properties
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect
|
||||
from ..db import status, properties
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from ..version import NOMINATIM_VERSION
|
||||
from .args import NominatimArgs
|
||||
|
||||
@@ -11,8 +11,8 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..errors import UsageError
|
||||
from ..db.connection import connect
|
||||
from ..tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
|
||||
from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
|
||||
from ..tools.special_phrases.sp_csv_loader import SPCsvLoader
|
||||
|
||||
378
src/nominatim_db/config.py
Normal file
378
src/nominatim_db/config.py
Normal file
@@ -0,0 +1,378 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Nominatim configuration accessor.
|
||||
"""
|
||||
from typing import Dict, Any, List, Mapping, Optional
|
||||
import importlib.util
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import json
|
||||
import yaml
|
||||
|
||||
from dotenv import dotenv_values
|
||||
|
||||
try:
|
||||
from psycopg2.extensions import parse_dsn
|
||||
except ModuleNotFoundError:
|
||||
from psycopg.conninfo import conninfo_to_dict as parse_dsn # type: ignore[assignment]
|
||||
|
||||
from .typing import StrPath
|
||||
from .errors import UsageError
|
||||
from . import paths
|
||||
|
||||
LOG = logging.getLogger()
|
||||
CONFIG_CACHE : Dict[str, Any] = {}
|
||||
|
||||
def flatten_config_list(content: Any, section: str = '') -> List[Any]:
|
||||
""" Flatten YAML configuration lists that contain include sections
|
||||
which are lists themselves.
|
||||
"""
|
||||
if not content:
|
||||
return []
|
||||
|
||||
if not isinstance(content, list):
|
||||
raise UsageError(f"List expected in section '{section}'.")
|
||||
|
||||
output = []
|
||||
for ele in content:
|
||||
if isinstance(ele, list):
|
||||
output.extend(flatten_config_list(ele, section))
|
||||
else:
|
||||
output.append(ele)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
class Configuration:
|
||||
""" This class wraps access to the configuration settings
|
||||
for the Nominatim instance in use.
|
||||
|
||||
All Nominatim configuration options are prefixed with 'NOMINATIM_' to
|
||||
avoid conflicts with other environment variables. All settings can
|
||||
be accessed as properties of the class under the same name as the
|
||||
setting but with the `NOMINATIM_` prefix removed. In addition, there
|
||||
are accessor functions that convert the setting values to types
|
||||
other than string.
|
||||
"""
|
||||
|
||||
def __init__(self, project_dir: Optional[Path],
|
||||
environ: Optional[Mapping[str, str]] = None) -> None:
|
||||
self.environ = environ or os.environ
|
||||
self.project_dir = project_dir
|
||||
self.config_dir = paths.CONFIG_DIR
|
||||
self._config = dotenv_values(str(self.config_dir / 'env.defaults'))
|
||||
if self.project_dir is not None and (self.project_dir / '.env').is_file():
|
||||
self.project_dir = self.project_dir.resolve()
|
||||
self._config.update(dotenv_values(str(self.project_dir / '.env')))
|
||||
|
||||
class _LibDirs:
|
||||
module: Path
|
||||
osm2pgsql: Path
|
||||
php = paths.PHPLIB_DIR
|
||||
sql = paths.SQLLIB_DIR
|
||||
data = paths.DATA_DIR
|
||||
|
||||
self.lib_dir = _LibDirs()
|
||||
self._private_plugins: Dict[str, object] = {}
|
||||
|
||||
|
||||
def set_libdirs(self, **kwargs: StrPath) -> None:
|
||||
""" Set paths to library functions and data.
|
||||
"""
|
||||
for key, value in kwargs.items():
|
||||
setattr(self.lib_dir, key, None if value is None else Path(value))
|
||||
|
||||
|
||||
def __getattr__(self, name: str) -> str:
|
||||
name = 'NOMINATIM_' + name
|
||||
|
||||
if name in self.environ:
|
||||
return self.environ[name]
|
||||
|
||||
return self._config[name] or ''
|
||||
|
||||
|
||||
def get_bool(self, name: str) -> bool:
|
||||
""" Return the given configuration parameter as a boolean.
|
||||
|
||||
Parameters:
|
||||
name: Name of the configuration parameter with the NOMINATIM_
|
||||
prefix removed.
|
||||
|
||||
Returns:
|
||||
`True` for values of '1', 'yes' and 'true', `False` otherwise.
|
||||
"""
|
||||
return getattr(self, name).lower() in ('1', 'yes', 'true')
|
||||
|
||||
|
||||
def get_int(self, name: str) -> int:
|
||||
""" Return the given configuration parameter as an int.
|
||||
|
||||
Parameters:
|
||||
name: Name of the configuration parameter with the NOMINATIM_
|
||||
prefix removed.
|
||||
|
||||
Returns:
|
||||
The configuration value converted to int.
|
||||
|
||||
Raises:
|
||||
ValueError: when the value is not a number.
|
||||
"""
|
||||
try:
|
||||
return int(getattr(self, name))
|
||||
except ValueError as exp:
|
||||
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
|
||||
raise UsageError("Configuration error.") from exp
|
||||
|
||||
|
||||
def get_str_list(self, name: str) -> Optional[List[str]]:
|
||||
""" Return the given configuration parameter as a list of strings.
|
||||
The values are assumed to be given as a comma-sparated list and
|
||||
will be stripped before returning them.
|
||||
|
||||
Parameters:
|
||||
name: Name of the configuration parameter with the NOMINATIM_
|
||||
prefix removed.
|
||||
|
||||
Returns:
|
||||
(List[str]): The comma-split parameter as a list. The
|
||||
elements are stripped of leading and final spaces before
|
||||
being returned.
|
||||
(None): The configuration parameter was unset or empty.
|
||||
"""
|
||||
raw = getattr(self, name)
|
||||
|
||||
return [v.strip() for v in raw.split(',')] if raw else None
|
||||
|
||||
|
||||
def get_path(self, name: str) -> Optional[Path]:
|
||||
""" Return the given configuration parameter as a Path.
|
||||
|
||||
Parameters:
|
||||
name: Name of the configuration parameter with the NOMINATIM_
|
||||
prefix removed.
|
||||
|
||||
Returns:
|
||||
(Path): A Path object of the parameter value.
|
||||
If a relative path is configured, then the function converts this
|
||||
into an absolute path with the project directory as root path.
|
||||
(None): The configuration parameter was unset or empty.
|
||||
"""
|
||||
value = getattr(self, name)
|
||||
if not value:
|
||||
return None
|
||||
|
||||
cfgpath = Path(value)
|
||||
|
||||
if not cfgpath.is_absolute():
|
||||
assert self.project_dir is not None
|
||||
cfgpath = self.project_dir / cfgpath
|
||||
|
||||
return cfgpath.resolve()
|
||||
|
||||
|
||||
def get_libpq_dsn(self) -> str:
|
||||
""" Get configured database DSN converted into the key/value format
|
||||
understood by libpq and psycopg.
|
||||
"""
|
||||
dsn = self.DATABASE_DSN
|
||||
|
||||
def quote_param(param: str) -> str:
|
||||
key, val = param.split('=')
|
||||
val = val.replace('\\', '\\\\').replace("'", "\\'")
|
||||
if ' ' in val:
|
||||
val = "'" + val + "'"
|
||||
return key + '=' + val
|
||||
|
||||
if dsn.startswith('pgsql:'):
|
||||
# Old PHP DSN format. Convert before returning.
|
||||
return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
|
||||
|
||||
return dsn
|
||||
|
||||
|
||||
def get_database_params(self) -> Mapping[str, str]:
|
||||
""" Get the configured parameters for the database connection
|
||||
as a mapping.
|
||||
"""
|
||||
dsn = self.DATABASE_DSN
|
||||
|
||||
if dsn.startswith('pgsql:'):
|
||||
return dict((p.split('=', 1) for p in dsn[6:].split(';')))
|
||||
|
||||
return parse_dsn(dsn)
|
||||
|
||||
|
||||
def get_import_style_file(self) -> Path:
|
||||
""" Return the import style file as a path object. Translates the
|
||||
name of the standard styles automatically into a file in the
|
||||
config style.
|
||||
"""
|
||||
style = getattr(self, 'IMPORT_STYLE')
|
||||
|
||||
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
||||
return self.config_dir / f'import-{style}.lua'
|
||||
|
||||
return self.find_config_file('', 'IMPORT_STYLE')
|
||||
|
||||
|
||||
def get_os_env(self) -> Dict[str, str]:
|
||||
""" Return a copy of the OS environment with the Nominatim configuration
|
||||
merged in.
|
||||
"""
|
||||
env = {k: v for k, v in self._config.items() if v is not None}
|
||||
env.update(self.environ)
|
||||
|
||||
return env
|
||||
|
||||
|
||||
def load_sub_configuration(self, filename: StrPath,
|
||||
config: Optional[str] = None) -> Any:
|
||||
""" Load additional configuration from a file. `filename` is the name
|
||||
of the configuration file. The file is first searched in the
|
||||
project directory and then in the global settings directory.
|
||||
|
||||
If `config` is set, then the name of the configuration file can
|
||||
be additionally given through a .env configuration option. When
|
||||
the option is set, then the file will be exclusively loaded as set:
|
||||
if the name is an absolute path, the file name is taken as is,
|
||||
if the name is relative, it is taken to be relative to the
|
||||
project directory.
|
||||
|
||||
The format of the file is determined from the filename suffix.
|
||||
Currently only files with extension '.yaml' are supported.
|
||||
|
||||
YAML files support a special '!include' construct. When the
|
||||
directive is given, the value is taken to be a filename, the file
|
||||
is loaded using this function and added at the position in the
|
||||
configuration tree.
|
||||
"""
|
||||
configfile = self.find_config_file(filename, config)
|
||||
|
||||
if str(configfile) in CONFIG_CACHE:
|
||||
return CONFIG_CACHE[str(configfile)]
|
||||
|
||||
if configfile.suffix in ('.yaml', '.yml'):
|
||||
result = self._load_from_yaml(configfile)
|
||||
elif configfile.suffix == '.json':
|
||||
with configfile.open('r', encoding='utf-8') as cfg:
|
||||
result = json.load(cfg)
|
||||
else:
|
||||
raise UsageError(f"Config file '{configfile}' has unknown format.")
|
||||
|
||||
CONFIG_CACHE[str(configfile)] = result
|
||||
return result
|
||||
|
||||
|
||||
def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
|
||||
""" Load a Python module as a plugin.
|
||||
|
||||
The module_name may have three variants:
|
||||
|
||||
* A name without any '.' is assumed to be an internal module
|
||||
and will be searched relative to `internal_path`.
|
||||
* If the name ends in `.py`, module_name is assumed to be a
|
||||
file name relative to the project directory.
|
||||
* Any other name is assumed to be an absolute module name.
|
||||
|
||||
In either of the variants the module name must start with a letter.
|
||||
"""
|
||||
if not module_name or not module_name[0].isidentifier():
|
||||
raise UsageError(f'Invalid module name {module_name}')
|
||||
|
||||
if '.' not in module_name:
|
||||
module_name = module_name.replace('-', '_')
|
||||
full_module = f'{internal_path}.{module_name}'
|
||||
return sys.modules.get(full_module) or importlib.import_module(full_module)
|
||||
|
||||
if module_name.endswith('.py'):
|
||||
if self.project_dir is None or not (self.project_dir / module_name).exists():
|
||||
raise UsageError(f"Cannot find module '{module_name}' in project directory.")
|
||||
|
||||
if module_name in self._private_plugins:
|
||||
return self._private_plugins[module_name]
|
||||
|
||||
file_path = str(self.project_dir / module_name)
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
if spec:
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
# Do not add to global modules because there is no standard
|
||||
# module name that Python can resolve.
|
||||
self._private_plugins[module_name] = module
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
return module
|
||||
|
||||
return sys.modules.get(module_name) or importlib.import_module(module_name)
|
||||
|
||||
|
||||
def find_config_file(self, filename: StrPath,
|
||||
config: Optional[str] = None) -> Path:
|
||||
""" Resolve the location of a configuration file given a filename and
|
||||
an optional configuration option with the file name.
|
||||
Raises a UsageError when the file cannot be found or is not
|
||||
a regular file.
|
||||
"""
|
||||
if config is not None:
|
||||
cfg_value = getattr(self, config)
|
||||
if cfg_value:
|
||||
cfg_filename = Path(cfg_value)
|
||||
|
||||
if cfg_filename.is_absolute():
|
||||
cfg_filename = cfg_filename.resolve()
|
||||
|
||||
if not cfg_filename.is_file():
|
||||
LOG.fatal("Cannot find config file '%s'.", cfg_filename)
|
||||
raise UsageError("Config file not found.")
|
||||
|
||||
return cfg_filename
|
||||
|
||||
filename = cfg_filename
|
||||
|
||||
|
||||
search_paths = [self.project_dir, self.config_dir]
|
||||
for path in search_paths:
|
||||
if path is not None and (path / filename).is_file():
|
||||
return path / filename
|
||||
|
||||
LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s",
|
||||
filename, search_paths)
|
||||
raise UsageError("Config file not found.")
|
||||
|
||||
|
||||
def _load_from_yaml(self, cfgfile: Path) -> Any:
|
||||
""" Load a YAML configuration file. This installs a special handler that
|
||||
allows to include other YAML files using the '!include' operator.
|
||||
"""
|
||||
yaml.add_constructor('!include', self._yaml_include_representer,
|
||||
Loader=yaml.SafeLoader)
|
||||
return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
|
||||
|
||||
|
||||
def _yaml_include_representer(self, loader: Any, node: yaml.Node) -> Any:
|
||||
""" Handler for the '!include' operator in YAML files.
|
||||
|
||||
When the filename is relative, then the file is first searched in the
|
||||
project directory and then in the global settings directory.
|
||||
"""
|
||||
fname = loader.construct_scalar(node)
|
||||
|
||||
if Path(fname).is_absolute():
|
||||
configfile = Path(fname)
|
||||
else:
|
||||
configfile = self.find_config_file(loader.construct_scalar(node))
|
||||
|
||||
if configfile.suffix != '.yaml':
|
||||
LOG.fatal("Format error while reading '%s': only YAML format supported.",
|
||||
configfile)
|
||||
raise UsageError("Cannot handle config file format.")
|
||||
|
||||
return yaml.safe_load(configfile.read_text(encoding='utf-8'))
|
||||
@@ -11,10 +11,10 @@ from typing import Dict, Any, Iterable, Tuple, Optional, Container, overload
|
||||
from pathlib import Path
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim_core.db import utils as db_utils
|
||||
from nominatim_core.db.connection import connect, Connection
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from ..db import utils as db_utils
|
||||
from ..db.connection import connect, Connection
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
|
||||
def _flatten_name_list(names: Any) -> Dict[str, str]:
|
||||
|
||||
@@ -11,7 +11,7 @@ format.
|
||||
from typing import Any, Mapping, Optional, Set, Match
|
||||
import re
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..errors import UsageError
|
||||
from . import country_info
|
||||
|
||||
class CountryPostcodeMatcher:
|
||||
|
||||
0
src/nominatim_db/db/__init__.py
Normal file
0
src/nominatim_db/db/__init__.py
Normal file
236
src/nominatim_db/db/async_connection.py
Normal file
236
src/nominatim_db/db/async_connection.py
Normal file
@@ -0,0 +1,236 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
""" Non-blocking database connections.
|
||||
"""
|
||||
from typing import Callable, Any, Optional, Iterator, Sequence
|
||||
import logging
|
||||
import select
|
||||
import time
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import wait_select
|
||||
|
||||
# psycopg2 emits different exceptions pre and post 2.8. Detect if the new error
|
||||
# module is available and adapt the error handling accordingly.
|
||||
try:
|
||||
import psycopg2.errors # pylint: disable=no-name-in-module,import-error
|
||||
__has_psycopg2_errors__ = True
|
||||
except ImportError:
|
||||
__has_psycopg2_errors__ = False
|
||||
|
||||
from ..typing import T_cursor, Query
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class DeadlockHandler:
|
||||
""" Context manager that catches deadlock exceptions and calls
|
||||
the given handler function. All other exceptions are passed on
|
||||
normally.
|
||||
"""
|
||||
|
||||
def __init__(self, handler: Callable[[], None], ignore_sql_errors: bool = False) -> None:
|
||||
self.handler = handler
|
||||
self.ignore_sql_errors = ignore_sql_errors
|
||||
|
||||
def __enter__(self) -> 'DeadlockHandler':
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool:
|
||||
if __has_psycopg2_errors__:
|
||||
if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
|
||||
self.handler()
|
||||
return True
|
||||
elif exc_type == psycopg2.extensions.TransactionRollbackError \
|
||||
and exc_value.pgcode == '40P01':
|
||||
self.handler()
|
||||
return True
|
||||
|
||||
if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
|
||||
LOG.info("SQL error ignored: %s", exc_value)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class DBConnection:
|
||||
""" A single non-blocking database connection.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn: str,
|
||||
cursor_factory: Optional[Callable[..., T_cursor]] = None,
|
||||
ignore_sql_errors: bool = False) -> None:
|
||||
self.dsn = dsn
|
||||
|
||||
self.current_query: Optional[Query] = None
|
||||
self.current_params: Optional[Sequence[Any]] = None
|
||||
self.ignore_sql_errors = ignore_sql_errors
|
||||
|
||||
self.conn: Optional['psycopg2._psycopg.connection'] = None
|
||||
self.cursor: Optional['psycopg2._psycopg.cursor'] = None
|
||||
self.connect(cursor_factory=cursor_factory)
|
||||
|
||||
def close(self) -> None:
|
||||
""" Close all open connections. Does not wait for pending requests.
|
||||
"""
|
||||
if self.conn is not None:
|
||||
if self.cursor is not None:
|
||||
self.cursor.close()
|
||||
self.cursor = None
|
||||
self.conn.close()
|
||||
|
||||
self.conn = None
|
||||
|
||||
def connect(self, cursor_factory: Optional[Callable[..., T_cursor]] = None) -> None:
|
||||
""" (Re)connect to the database. Creates an asynchronous connection
|
||||
with JIT and parallel processing disabled. If a connection was
|
||||
already open, it is closed and a new connection established.
|
||||
The caller must ensure that no query is pending before reconnecting.
|
||||
"""
|
||||
self.close()
|
||||
|
||||
# Use a dict to hand in the parameters because async is a reserved
|
||||
# word in Python3.
|
||||
self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True}) # type: ignore
|
||||
assert self.conn
|
||||
self.wait()
|
||||
|
||||
if cursor_factory is not None:
|
||||
self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
|
||||
else:
|
||||
self.cursor = self.conn.cursor()
|
||||
# Disable JIT and parallel workers as they are known to cause problems.
|
||||
# Update pg_settings instead of using SET because it does not yield
|
||||
# errors on older versions of Postgres where the settings are not
|
||||
# implemented.
|
||||
self.perform(
|
||||
""" UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost';
|
||||
UPDATE pg_settings SET setting = 0
|
||||
WHERE name = 'max_parallel_workers_per_gather';""")
|
||||
self.wait()
|
||||
|
||||
def _deadlock_handler(self) -> None:
|
||||
LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params))
|
||||
assert self.cursor is not None
|
||||
assert self.current_query is not None
|
||||
assert self.current_params is not None
|
||||
|
||||
self.cursor.execute(self.current_query, self.current_params)
|
||||
|
||||
def wait(self) -> None:
|
||||
""" Block until any pending operation is done.
|
||||
"""
|
||||
while True:
|
||||
with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
|
||||
wait_select(self.conn)
|
||||
self.current_query = None
|
||||
return
|
||||
|
||||
def perform(self, sql: Query, args: Optional[Sequence[Any]] = None) -> None:
|
||||
""" Send SQL query to the server. Returns immediately without
|
||||
blocking.
|
||||
"""
|
||||
assert self.cursor is not None
|
||||
self.current_query = sql
|
||||
self.current_params = args
|
||||
self.cursor.execute(sql, args)
|
||||
|
||||
def fileno(self) -> int:
|
||||
""" File descriptor to wait for. (Makes this class select()able.)
|
||||
"""
|
||||
assert self.conn is not None
|
||||
return self.conn.fileno()
|
||||
|
||||
def is_done(self) -> bool:
|
||||
""" Check if the connection is available for a new query.
|
||||
|
||||
Also checks if the previous query has run into a deadlock.
|
||||
If so, then the previous query is repeated.
|
||||
"""
|
||||
assert self.conn is not None
|
||||
|
||||
if self.current_query is None:
|
||||
return True
|
||||
|
||||
with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
|
||||
if self.conn.poll() == psycopg2.extensions.POLL_OK:
|
||||
self.current_query = None
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class WorkerPool:
|
||||
""" A pool of asynchronous database connections.
|
||||
|
||||
The pool may be used as a context manager.
|
||||
"""
|
||||
REOPEN_CONNECTIONS_AFTER = 100000
|
||||
|
||||
def __init__(self, dsn: str, pool_size: int, ignore_sql_errors: bool = False) -> None:
|
||||
self.threads = [DBConnection(dsn, ignore_sql_errors=ignore_sql_errors)
|
||||
for _ in range(pool_size)]
|
||||
self.free_workers = self._yield_free_worker()
|
||||
self.wait_time = 0.0
|
||||
|
||||
|
||||
def finish_all(self) -> None:
|
||||
""" Wait for all connection to finish.
|
||||
"""
|
||||
for thread in self.threads:
|
||||
while not thread.is_done():
|
||||
thread.wait()
|
||||
|
||||
self.free_workers = self._yield_free_worker()
|
||||
|
||||
def close(self) -> None:
|
||||
""" Close all connections and clear the pool.
|
||||
"""
|
||||
for thread in self.threads:
|
||||
thread.close()
|
||||
self.threads = []
|
||||
self.free_workers = iter([])
|
||||
|
||||
|
||||
def next_free_worker(self) -> DBConnection:
|
||||
""" Get the next free connection.
|
||||
"""
|
||||
return next(self.free_workers)
|
||||
|
||||
|
||||
def _yield_free_worker(self) -> Iterator[DBConnection]:
|
||||
ready = self.threads
|
||||
command_stat = 0
|
||||
while True:
|
||||
for thread in ready:
|
||||
if thread.is_done():
|
||||
command_stat += 1
|
||||
yield thread
|
||||
|
||||
if command_stat > self.REOPEN_CONNECTIONS_AFTER:
|
||||
self._reconnect_threads()
|
||||
ready = self.threads
|
||||
command_stat = 0
|
||||
else:
|
||||
tstart = time.time()
|
||||
_, ready, _ = select.select([], self.threads, [])
|
||||
self.wait_time += time.time() - tstart
|
||||
|
||||
|
||||
def _reconnect_threads(self) -> None:
|
||||
for thread in self.threads:
|
||||
while not thread.is_done():
|
||||
thread.wait()
|
||||
thread.connect()
|
||||
|
||||
|
||||
def __enter__(self) -> 'WorkerPool':
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
||||
self.finish_all()
|
||||
self.close()
|
||||
254
src/nominatim_db/db/connection.py
Normal file
254
src/nominatim_db/db/connection.py
Normal file
@@ -0,0 +1,254 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Specialised connection and cursor functions.
|
||||
"""
|
||||
from typing import Optional, Any, Callable, ContextManager, Dict, cast, overload, Tuple, Iterable
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extensions
|
||||
import psycopg2.extras
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from ..typing import SysEnv, Query, T_cursor
|
||||
from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class Cursor(psycopg2.extras.DictCursor):
|
||||
""" A cursor returning dict-like objects and providing specialised
|
||||
execution functions.
|
||||
"""
|
||||
# pylint: disable=arguments-renamed,arguments-differ
|
||||
def execute(self, query: Query, args: Any = None) -> None:
|
||||
""" Query execution that logs the SQL query when debugging is enabled.
|
||||
"""
|
||||
if LOG.isEnabledFor(logging.DEBUG):
|
||||
LOG.debug(self.mogrify(query, args).decode('utf-8'))
|
||||
|
||||
super().execute(query, args)
|
||||
|
||||
|
||||
def execute_values(self, sql: Query, argslist: Iterable[Tuple[Any, ...]],
|
||||
template: Optional[Query] = None) -> None:
|
||||
""" Wrapper for the psycopg2 convenience function to execute
|
||||
SQL for a list of values.
|
||||
"""
|
||||
LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
|
||||
|
||||
psycopg2.extras.execute_values(self, sql, argslist, template=template)
|
||||
|
||||
|
||||
def scalar(self, sql: Query, args: Any = None) -> Any:
|
||||
""" Execute query that returns a single value. The value is returned.
|
||||
If the query yields more than one row, a ValueError is raised.
|
||||
"""
|
||||
self.execute(sql, args)
|
||||
|
||||
if self.rowcount != 1:
|
||||
raise RuntimeError("Query did not return a single row.")
|
||||
|
||||
result = self.fetchone()
|
||||
assert result is not None
|
||||
|
||||
return result[0]
|
||||
|
||||
|
||||
def drop_table(self, name: str, if_exists: bool = True, cascade: bool = False) -> None:
|
||||
""" Drop the table with the given name.
|
||||
Set `if_exists` to False if a non-existent table should raise
|
||||
an exception instead of just being ignored. If 'cascade' is set
|
||||
to True then all dependent tables are deleted as well.
|
||||
"""
|
||||
sql = 'DROP TABLE '
|
||||
if if_exists:
|
||||
sql += 'IF EXISTS '
|
||||
sql += '{}'
|
||||
if cascade:
|
||||
sql += ' CASCADE'
|
||||
|
||||
self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
|
||||
|
||||
|
||||
class Connection(psycopg2.extensions.connection):
|
||||
""" A connection that provides the specialised cursor by default and
|
||||
adds convenience functions for administrating the database.
|
||||
"""
|
||||
@overload # type: ignore[override]
|
||||
def cursor(self) -> Cursor:
|
||||
...
|
||||
|
||||
@overload
|
||||
def cursor(self, name: str) -> Cursor:
|
||||
...
|
||||
|
||||
@overload
|
||||
def cursor(self, cursor_factory: Callable[..., T_cursor]) -> T_cursor:
|
||||
...
|
||||
|
||||
def cursor(self, cursor_factory = Cursor, **kwargs): # type: ignore
|
||||
""" Return a new cursor. By default the specialised cursor is returned.
|
||||
"""
|
||||
return super().cursor(cursor_factory=cursor_factory, **kwargs)
|
||||
|
||||
|
||||
def table_exists(self, table: str) -> bool:
|
||||
""" Check that a table with the given name exists in the database.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
num = cur.scalar("""SELECT count(*) FROM pg_tables
|
||||
WHERE tablename = %s and schemaname = 'public'""", (table, ))
|
||||
return num == 1 if isinstance(num, int) else False
|
||||
|
||||
|
||||
def table_has_column(self, table: str, column: str) -> bool:
|
||||
""" Check if the table 'table' exists and has a column with name 'column'.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
|
||||
WHERE table_name = %s
|
||||
and column_name = %s""",
|
||||
(table, column))
|
||||
return has_column > 0 if isinstance(has_column, int) else False
|
||||
|
||||
|
||||
def index_exists(self, index: str, table: Optional[str] = None) -> bool:
|
||||
""" Check that an index with the given name exists in the database.
|
||||
If table is not None then the index must relate to the given
|
||||
table.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
cur.execute("""SELECT tablename FROM pg_indexes
|
||||
WHERE indexname = %s and schemaname = 'public'""", (index, ))
|
||||
if cur.rowcount == 0:
|
||||
return False
|
||||
|
||||
if table is not None:
|
||||
row = cur.fetchone()
|
||||
if row is None or not isinstance(row[0], str):
|
||||
return False
|
||||
return row[0] == table
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def drop_table(self, name: str, if_exists: bool = True, cascade: bool = False) -> None:
|
||||
""" Drop the table with the given name.
|
||||
Set `if_exists` to False if a non-existent table should raise
|
||||
an exception instead of just being ignored.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
cur.drop_table(name, if_exists, cascade)
|
||||
self.commit()
|
||||
|
||||
|
||||
def server_version_tuple(self) -> Tuple[int, int]:
|
||||
""" Return the server version as a tuple of (major, minor).
|
||||
Converts correctly for pre-10 and post-10 PostgreSQL versions.
|
||||
"""
|
||||
version = self.server_version
|
||||
if version < 100000:
|
||||
return (int(version / 10000), int((version % 10000) / 100))
|
||||
|
||||
return (int(version / 10000), version % 10000)
|
||||
|
||||
|
||||
def postgis_version_tuple(self) -> Tuple[int, int]:
|
||||
""" Return the postgis version installed in the database as a
|
||||
tuple of (major, minor). Assumes that the PostGIS extension
|
||||
has been installed already.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
version = cur.scalar('SELECT postgis_lib_version()')
|
||||
|
||||
version_parts = version.split('.')
|
||||
if len(version_parts) < 2:
|
||||
raise UsageError(f"Error fetching Postgis version. Bad format: {version}")
|
||||
|
||||
return (int(version_parts[0]), int(version_parts[1]))
|
||||
|
||||
|
||||
def extension_loaded(self, extension_name: str) -> bool:
|
||||
""" Return True if the hstore extension is loaded in the database.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
|
||||
return cur.rowcount > 0
|
||||
|
||||
|
||||
class ConnectionContext(ContextManager[Connection]):
|
||||
""" Context manager of the connection that also provides direct access
|
||||
to the underlying connection.
|
||||
"""
|
||||
connection: Connection
|
||||
|
||||
def connect(dsn: str) -> ConnectionContext:
|
||||
""" Open a connection to the database using the specialised connection
|
||||
factory. The returned object may be used in conjunction with 'with'.
|
||||
When used outside a context manager, use the `connection` attribute
|
||||
to get the connection.
|
||||
"""
|
||||
try:
|
||||
conn = psycopg2.connect(dsn, connection_factory=Connection)
|
||||
ctxmgr = cast(ConnectionContext, contextlib.closing(conn))
|
||||
ctxmgr.connection = conn
|
||||
return ctxmgr
|
||||
except psycopg2.OperationalError as err:
|
||||
raise UsageError(f"Cannot connect to database: {err}") from err
|
||||
|
||||
|
||||
# Translation from PG connection string parameters to PG environment variables.
|
||||
# Derived from https://www.postgresql.org/docs/current/libpq-envars.html.
|
||||
_PG_CONNECTION_STRINGS = {
|
||||
'host': 'PGHOST',
|
||||
'hostaddr': 'PGHOSTADDR',
|
||||
'port': 'PGPORT',
|
||||
'dbname': 'PGDATABASE',
|
||||
'user': 'PGUSER',
|
||||
'password': 'PGPASSWORD',
|
||||
'passfile': 'PGPASSFILE',
|
||||
'channel_binding': 'PGCHANNELBINDING',
|
||||
'service': 'PGSERVICE',
|
||||
'options': 'PGOPTIONS',
|
||||
'application_name': 'PGAPPNAME',
|
||||
'sslmode': 'PGSSLMODE',
|
||||
'requiressl': 'PGREQUIRESSL',
|
||||
'sslcompression': 'PGSSLCOMPRESSION',
|
||||
'sslcert': 'PGSSLCERT',
|
||||
'sslkey': 'PGSSLKEY',
|
||||
'sslrootcert': 'PGSSLROOTCERT',
|
||||
'sslcrl': 'PGSSLCRL',
|
||||
'requirepeer': 'PGREQUIREPEER',
|
||||
'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION',
|
||||
'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
|
||||
'gssencmode': 'PGGSSENCMODE',
|
||||
'krbsrvname': 'PGKRBSRVNAME',
|
||||
'gsslib': 'PGGSSLIB',
|
||||
'connect_timeout': 'PGCONNECT_TIMEOUT',
|
||||
'target_session_attrs': 'PGTARGETSESSIONATTRS',
|
||||
}
|
||||
|
||||
|
||||
def get_pg_env(dsn: str,
|
||||
base_env: Optional[SysEnv] = None) -> Dict[str, str]:
|
||||
""" Return a copy of `base_env` with the environment variables for
|
||||
PostgreSQL set up from the given database connection string.
|
||||
If `base_env` is None, then the OS environment is used as a base
|
||||
environment.
|
||||
"""
|
||||
env = dict(base_env if base_env is not None else os.environ)
|
||||
|
||||
for param, value in psycopg2.extensions.parse_dsn(dsn).items():
|
||||
if param in _PG_CONNECTION_STRINGS:
|
||||
env[_PG_CONNECTION_STRINGS[param]] = value
|
||||
else:
|
||||
LOG.error("Unknown connection parameter '%s' ignored.", param)
|
||||
|
||||
return env
|
||||
47
src/nominatim_db/db/properties.py
Normal file
47
src/nominatim_db/db/properties.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Query and access functions for the in-database property table.
|
||||
"""
|
||||
from typing import Optional, cast
|
||||
|
||||
from .connection import Connection
|
||||
|
||||
def set_property(conn: Connection, name: str, value: str) -> None:
|
||||
""" Add or replace the property with the given name.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
|
||||
(name, ))
|
||||
|
||||
if cur.rowcount == 0:
|
||||
sql = 'INSERT INTO nominatim_properties (value, property) VALUES (%s, %s)'
|
||||
else:
|
||||
sql = 'UPDATE nominatim_properties SET value = %s WHERE property = %s'
|
||||
|
||||
cur.execute(sql, (value, name))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_property(conn: Connection, name: str) -> Optional[str]:
|
||||
""" Return the current value of the given property or None if the property
|
||||
is not set.
|
||||
"""
|
||||
if not conn.table_exists('nominatim_properties'):
|
||||
return None
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
|
||||
(name, ))
|
||||
|
||||
if cur.rowcount == 0:
|
||||
return None
|
||||
|
||||
result = cur.fetchone()
|
||||
assert result is not None
|
||||
|
||||
return cast(Optional[str], result[0])
|
||||
143
src/nominatim_db/db/sql_preprocessor.py
Normal file
143
src/nominatim_db/db/sql_preprocessor.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Preprocessing of SQL files.
|
||||
"""
|
||||
from typing import Set, Dict, Any, cast
|
||||
import jinja2
|
||||
|
||||
from .connection import Connection
|
||||
from .async_connection import WorkerPool
|
||||
from ..config import Configuration
|
||||
|
||||
def _get_partitions(conn: Connection) -> Set[int]:
|
||||
""" Get the set of partitions currently in use.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT DISTINCT partition FROM country_name')
|
||||
partitions = set([0])
|
||||
for row in cur:
|
||||
partitions.add(row[0])
|
||||
|
||||
return partitions
|
||||
|
||||
|
||||
def _get_tables(conn: Connection) -> Set[str]:
|
||||
""" Return the set of tables currently in use.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
|
||||
|
||||
return set((row[0] for row in list(cur)))
|
||||
|
||||
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
|
||||
""" Returns the version of the slim middle tables.
|
||||
"""
|
||||
if 'osm2pgsql_properties' not in tables:
|
||||
return '1'
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
|
||||
row = cur.fetchone()
|
||||
|
||||
return cast(str, row[0]) if row is not None else '1'
|
||||
|
||||
|
||||
def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
|
||||
""" Returns a dict with tablespace expressions for the different tablespace
|
||||
kinds depending on whether a tablespace is configured or not.
|
||||
"""
|
||||
out = {}
|
||||
for subset in ('ADDRESS', 'SEARCH', 'AUX'):
|
||||
for kind in ('DATA', 'INDEX'):
|
||||
tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
|
||||
if tspace:
|
||||
tspace = f'TABLESPACE "{tspace}"'
|
||||
out[f'{subset.lower()}_{kind.lower()}'] = tspace
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
|
||||
""" Set up a dictionary with various optional Postgresql/Postgis features that
|
||||
depend on the database version.
|
||||
"""
|
||||
pg_version = conn.server_version_tuple()
|
||||
postgis_version = conn.postgis_version_tuple()
|
||||
pg11plus = pg_version >= (11, 0, 0)
|
||||
ps3 = postgis_version >= (3, 0)
|
||||
return {
|
||||
'has_index_non_key_column': pg11plus,
|
||||
'spgist_geom' : 'SPGIST' if pg11plus and ps3 else 'GIST'
|
||||
}
|
||||
|
||||
class SQLPreprocessor:
|
||||
""" A environment for preprocessing SQL files from the
|
||||
lib-sql directory.
|
||||
|
||||
The preprocessor provides a number of default filters and variables.
|
||||
The variables may be overwritten when rendering an SQL file.
|
||||
|
||||
The preprocessing is currently based on the jinja2 templating library
|
||||
and follows its syntax.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: Connection, config: Configuration) -> None:
|
||||
self.env = jinja2.Environment(autoescape=False,
|
||||
loader=jinja2.FileSystemLoader(str(config.lib_dir.sql)))
|
||||
|
||||
db_info: Dict[str, Any] = {}
|
||||
db_info['partitions'] = _get_partitions(conn)
|
||||
db_info['tables'] = _get_tables(conn)
|
||||
db_info['reverse_only'] = 'search_name' not in db_info['tables']
|
||||
db_info['tablespace'] = _setup_tablespace_sql(config)
|
||||
db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
|
||||
|
||||
self.env.globals['config'] = config
|
||||
self.env.globals['db'] = db_info
|
||||
self.env.globals['postgres'] = _setup_postgresql_features(conn)
|
||||
|
||||
|
||||
def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
|
||||
""" Execute the given SQL template string on the connection.
|
||||
The keyword arguments may supply additional parameters
|
||||
for preprocessing.
|
||||
"""
|
||||
sql = self.env.from_string(template).render(**kwargs)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
|
||||
""" Execute the given SQL file on the connection. The keyword arguments
|
||||
may supply additional parameters for preprocessing.
|
||||
"""
|
||||
sql = self.env.get_template(name).render(**kwargs)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
|
||||
**kwargs: Any) -> None:
|
||||
""" Execute the given SQL files using parallel asynchronous connections.
|
||||
The keyword arguments may supply additional parameters for
|
||||
preprocessing.
|
||||
|
||||
After preprocessing the SQL code is cut at lines containing only
|
||||
'---'. Each chunk is sent to one of the `num_threads` workers.
|
||||
"""
|
||||
sql = self.env.get_template(name).render(**kwargs)
|
||||
|
||||
parts = sql.split('\n---\n')
|
||||
|
||||
with WorkerPool(dsn, num_threads) as pool:
|
||||
for part in parts:
|
||||
pool.next_free_worker().perform(part)
|
||||
127
src/nominatim_db/db/status.py
Normal file
127
src/nominatim_db/db/status.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Access and helper functions for the status and status log table.
|
||||
"""
|
||||
from typing import Optional, Tuple, cast
|
||||
import datetime as dt
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .connection import Connection
|
||||
from ..utils.url_utils import get_url
|
||||
from ..errors import UsageError
|
||||
from ..typing import TypedDict
|
||||
|
||||
LOG = logging.getLogger()
|
||||
ISODATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
|
||||
|
||||
|
||||
class StatusRow(TypedDict):
|
||||
""" Dictionary of columns of the import_status table.
|
||||
"""
|
||||
lastimportdate: dt.datetime
|
||||
sequence_id: Optional[int]
|
||||
indexed: Optional[bool]
|
||||
|
||||
|
||||
def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime:
|
||||
""" Determine the date of the database from the newest object in the
|
||||
data base.
|
||||
"""
|
||||
# If there is a date from osm2pgsql available, use that.
|
||||
if conn.table_exists('osm2pgsql_properties'):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(""" SELECT value FROM osm2pgsql_properties
|
||||
WHERE property = 'current_timestamp' """)
|
||||
row = cur.fetchone()
|
||||
if row is not None:
|
||||
return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\
|
||||
.replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
if offline:
|
||||
raise UsageError("Cannot determine database date from data in offline mode.")
|
||||
|
||||
# Else, find the node with the highest ID in the database
|
||||
with conn.cursor() as cur:
|
||||
if conn.table_exists('place'):
|
||||
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
|
||||
else:
|
||||
osmid = cur.scalar("SELECT max(osm_id) FROM placex WHERE osm_type='N'")
|
||||
|
||||
if osmid is None:
|
||||
LOG.fatal("No data found in the database.")
|
||||
raise UsageError("No data found in the database.")
|
||||
|
||||
LOG.info("Using node id %d for timestamp lookup", osmid)
|
||||
# Get the node from the API to find the timestamp when it was created.
|
||||
node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
|
||||
data = get_url(node_url)
|
||||
|
||||
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
|
||||
|
||||
if match is None:
|
||||
LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
|
||||
"URL used: %s", node_url)
|
||||
raise UsageError("Bad API data.")
|
||||
|
||||
LOG.debug("Found timestamp %s", match.group(1))
|
||||
|
||||
return dt.datetime.strptime(match.group(1), ISODATE_FORMAT).replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
|
||||
def set_status(conn: Connection, date: Optional[dt.datetime],
|
||||
seq: Optional[int] = None, indexed: bool = True) -> None:
|
||||
""" Replace the current status with the given status. If date is `None`
|
||||
then only sequence and indexed will be updated as given. Otherwise
|
||||
the whole status is replaced.
|
||||
The change will be committed to the database.
|
||||
"""
|
||||
assert date is None or date.tzinfo == dt.timezone.utc
|
||||
with conn.cursor() as cur:
|
||||
if date is None:
|
||||
cur.execute("UPDATE import_status set sequence_id = %s, indexed = %s",
|
||||
(seq, indexed))
|
||||
else:
|
||||
cur.execute("TRUNCATE TABLE import_status")
|
||||
cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
|
||||
VALUES (%s, %s, %s)""", (date, seq, indexed))
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_status(conn: Connection) -> Tuple[Optional[dt.datetime], Optional[int], Optional[bool]]:
|
||||
""" Return the current status as a triple of (date, sequence, indexed).
|
||||
If status has not been set up yet, a triple of None is returned.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT * FROM import_status LIMIT 1")
|
||||
if cur.rowcount < 1:
|
||||
return None, None, None
|
||||
|
||||
row = cast(StatusRow, cur.fetchone())
|
||||
return row['lastimportdate'], row['sequence_id'], row['indexed']
|
||||
|
||||
|
||||
def set_indexed(conn: Connection, state: bool) -> None:
|
||||
""" Set the indexed flag in the status table to the given state.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE import_status SET indexed = %s", (state, ))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def log_status(conn: Connection, start: dt.datetime,
|
||||
event: str, batchsize: Optional[int] = None) -> None:
|
||||
""" Write a new status line to the `import_osmosis_log` table.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO import_osmosis_log
|
||||
(batchend, batchseq, batchsize, starttime, endtime, event)
|
||||
SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
|
||||
(batchsize, start, event))
|
||||
conn.commit()
|
||||
129
src/nominatim_db/db/utils.py
Normal file
129
src/nominatim_db/db/utils.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for handling DB accesses.
|
||||
"""
|
||||
from typing import IO, Optional, Union, Any, Iterable
|
||||
import subprocess
|
||||
import logging
|
||||
import gzip
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
from .connection import get_pg_env, Cursor
|
||||
from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _pipe_to_proc(proc: 'subprocess.Popen[bytes]',
|
||||
fdesc: Union[IO[bytes], gzip.GzipFile]) -> int:
|
||||
assert proc.stdin is not None
|
||||
chunk = fdesc.read(2048)
|
||||
while chunk and proc.poll() is None:
|
||||
try:
|
||||
proc.stdin.write(chunk)
|
||||
except BrokenPipeError as exc:
|
||||
raise UsageError("Failed to execute SQL file.") from exc
|
||||
chunk = fdesc.read(2048)
|
||||
|
||||
return len(chunk)
|
||||
|
||||
def execute_file(dsn: str, fname: Path,
|
||||
ignore_errors: bool = False,
|
||||
pre_code: Optional[str] = None,
|
||||
post_code: Optional[str] = None) -> None:
|
||||
""" Read an SQL file and run its contents against the given database
|
||||
using psql. Use `pre_code` and `post_code` to run extra commands
|
||||
before or after executing the file. The commands are run within the
|
||||
same session, so they may be used to wrap the file execution in a
|
||||
transaction.
|
||||
"""
|
||||
cmd = ['psql']
|
||||
if not ignore_errors:
|
||||
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
cmd.append('--quiet')
|
||||
|
||||
with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
|
||||
assert proc.stdin is not None
|
||||
try:
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
||||
|
||||
if pre_code:
|
||||
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
||||
|
||||
if fname.suffix == '.gz':
|
||||
with gzip.open(str(fname), 'rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
else:
|
||||
with fname.open('rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
|
||||
if remain == 0 and post_code:
|
||||
proc.stdin.write((';' + post_code).encode('utf-8'))
|
||||
finally:
|
||||
proc.stdin.close()
|
||||
ret = proc.wait()
|
||||
|
||||
if ret != 0 or remain > 0:
|
||||
raise UsageError("Failed to execute SQL file.")
|
||||
|
||||
|
||||
# List of characters that need to be quoted for the copy command.
|
||||
_SQL_TRANSLATION = {ord('\\'): '\\\\',
|
||||
ord('\t'): '\\t',
|
||||
ord('\n'): '\\n'}
|
||||
|
||||
|
||||
class CopyBuffer:
|
||||
""" Data collector for the copy_from command.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.buffer = io.StringIO()
|
||||
|
||||
|
||||
def __enter__(self) -> 'CopyBuffer':
|
||||
return self
|
||||
|
||||
|
||||
def size(self) -> int:
|
||||
""" Return the number of bytes the buffer currently contains.
|
||||
"""
|
||||
return self.buffer.tell()
|
||||
|
||||
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
||||
if self.buffer is not None:
|
||||
self.buffer.close()
|
||||
|
||||
|
||||
def add(self, *data: Any) -> None:
|
||||
""" Add another row of data to the copy buffer.
|
||||
"""
|
||||
first = True
|
||||
for column in data:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
self.buffer.write('\t')
|
||||
if column is None:
|
||||
self.buffer.write('\\N')
|
||||
else:
|
||||
self.buffer.write(str(column).translate(_SQL_TRANSLATION))
|
||||
self.buffer.write('\n')
|
||||
|
||||
|
||||
def copy_out(self, cur: Cursor, table: str, columns: Optional[Iterable[str]] = None) -> None:
|
||||
""" Copy all collected data into the given table.
|
||||
|
||||
The buffer is empty and reusable after this operation.
|
||||
"""
|
||||
if self.buffer.tell() > 0:
|
||||
self.buffer.seek(0)
|
||||
cur.copy_from(self.buffer, table, columns=columns)
|
||||
self.buffer = io.StringIO()
|
||||
14
src/nominatim_db/errors.py
Normal file
14
src/nominatim_db/errors.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom exception and error classes for Nominatim.
|
||||
"""
|
||||
|
||||
class UsageError(Exception):
|
||||
""" An error raised because of bad user input. This error will usually
|
||||
not cause a stack trace to be printed unless debugging is enabled.
|
||||
"""
|
||||
@@ -13,9 +13,9 @@ import time
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim_core.typing import DictCursorResults
|
||||
from nominatim_core.db.async_connection import DBConnection, WorkerPool
|
||||
from nominatim_core.db.connection import connect, Connection, Cursor
|
||||
from ..typing import DictCursorResults
|
||||
from ..db.async_connection import DBConnection, WorkerPool
|
||||
from ..db.connection import connect, Connection, Cursor
|
||||
from ..tokenizer.base import AbstractTokenizer
|
||||
from .progress import ProgressLogger
|
||||
from . import runners
|
||||
|
||||
@@ -14,8 +14,8 @@ import functools
|
||||
from psycopg2 import sql as pysql
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim_core.typing import Query, DictCursorResult, DictCursorResults, Protocol
|
||||
from nominatim_core.db.async_connection import DBConnection
|
||||
from ..typing import Query, DictCursorResult, DictCursorResults, Protocol
|
||||
from ..db.async_connection import DBConnection
|
||||
from ..data.place_info import PlaceInfo
|
||||
from ..tokenizer.base import AbstractAnalyzer
|
||||
|
||||
|
||||
15
src/nominatim_db/paths.py
Normal file
15
src/nominatim_db/paths.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Path settings for extra data used by Nominatim.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
PHPLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-php').resolve()
|
||||
SQLLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve()
|
||||
DATA_DIR = (Path(__file__) / '..' / '..' / '..' / 'data').resolve()
|
||||
CONFIG_DIR = (Path(__file__) / '..' / '..' / '..' / 'settings').resolve()
|
||||
@@ -12,9 +12,9 @@ from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple, Dict, Any, Optional, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.typing import Protocol
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import Connection
|
||||
from ..typing import Protocol
|
||||
from ..config import Configuration
|
||||
from ..db.connection import Connection
|
||||
from ..data.place_info import PlaceInfo
|
||||
|
||||
class AbstractAnalyzer(ABC):
|
||||
|
||||
@@ -24,10 +24,10 @@ import logging
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db import properties
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.config import Configuration
|
||||
from ..errors import UsageError
|
||||
from ..db import properties
|
||||
from ..db.connection import connect
|
||||
from ..config import Configuration
|
||||
from ..tokenizer.base import AbstractTokenizer, TokenizerModule
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -14,10 +14,10 @@ import logging
|
||||
|
||||
from icu import Transliterator
|
||||
|
||||
from nominatim_core.config import flatten_config_list, Configuration
|
||||
from nominatim_core.db.properties import set_property, get_property
|
||||
from nominatim_core.db.connection import Connection
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..config import flatten_config_list, Configuration
|
||||
from ..db.properties import set_property, get_property
|
||||
from ..db.connection import Connection
|
||||
from ..errors import UsageError
|
||||
from .place_sanitizer import PlaceSanitizer
|
||||
from .icu_token_analysis import ICUTokenAnalysis
|
||||
from .token_analysis.base import AnalysisModule, Analyzer
|
||||
|
||||
@@ -16,10 +16,10 @@ import logging
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
|
||||
from nominatim_core.db.connection import connect, Connection, Cursor
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.utils import CopyBuffer
|
||||
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
|
||||
from ..db.connection import connect, Connection, Cursor
|
||||
from ..config import Configuration
|
||||
from ..db.utils import CopyBuffer
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..data.place_info import PlaceInfo
|
||||
from ..data.place_name import PlaceName
|
||||
from .icu_rule_loader import ICURuleLoader
|
||||
|
||||
@@ -20,12 +20,12 @@ from icu import Transliterator
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db.connection import connect, Connection
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db import properties
|
||||
from nominatim_core.db import utils as db_utils
|
||||
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
|
||||
from ..errors import UsageError
|
||||
from ..db.connection import connect, Connection
|
||||
from ..config import Configuration
|
||||
from ..db import properties
|
||||
from ..db import utils as db_utils
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..data.place_info import PlaceInfo
|
||||
from .base import AbstractAnalyzer, AbstractTokenizer
|
||||
|
||||
|
||||
@@ -10,8 +10,8 @@ is handed to the token analysis.
|
||||
"""
|
||||
from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from .sanitizers.config import SanitizerConfig
|
||||
from .sanitizers.base import SanitizerHandler, ProcessInfo
|
||||
from ..data.place_name import PlaceName
|
||||
|
||||
@@ -9,7 +9,7 @@ Common data types and protocols for sanitizers.
|
||||
"""
|
||||
from typing import Optional, List, Mapping, Callable
|
||||
|
||||
from nominatim_core.typing import Protocol, Final
|
||||
from ...typing import Protocol, Final
|
||||
from ...data.place_info import PlaceInfo
|
||||
from ...data.place_name import PlaceName
|
||||
from .config import SanitizerConfig
|
||||
|
||||
@@ -11,7 +11,7 @@ from typing import Sequence, Union, Optional, Pattern, Callable, Any, TYPE_CHECK
|
||||
from collections import UserDict
|
||||
import re
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from ...errors import UsageError
|
||||
|
||||
# working around missing generics in Python < 3.8
|
||||
# See https://github.com/python/typing/issues/60#issuecomment-869757075
|
||||
|
||||
@@ -9,7 +9,7 @@ Common data types and protocols for analysers.
|
||||
"""
|
||||
from typing import Mapping, List, Any
|
||||
|
||||
from nominatim_core.typing import Protocol
|
||||
from ...typing import Protocol
|
||||
from ...data.place_name import PlaceName
|
||||
|
||||
class Analyzer(Protocol):
|
||||
|
||||
@@ -12,8 +12,8 @@ from collections import defaultdict
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from nominatim_core.config import flatten_config_list
|
||||
from nominatim_core.errors import UsageError
|
||||
from ...config import flatten_config_list
|
||||
from ...errors import UsageError
|
||||
|
||||
class ICUVariant(NamedTuple):
|
||||
""" A single replacement rule for variant creation.
|
||||
|
||||
@@ -12,7 +12,7 @@ import itertools
|
||||
|
||||
import datrie
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from ...errors import UsageError
|
||||
from ...data.place_name import PlaceName
|
||||
from .config_variants import get_variant_config
|
||||
from .generic_mutation import MutationVariantGenerator
|
||||
|
||||
@@ -12,7 +12,7 @@ import itertools
|
||||
import logging
|
||||
import re
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from ...errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ from pathlib import Path
|
||||
import logging
|
||||
import urllib
|
||||
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.utils.url_utils import get_url
|
||||
from ..db.connection import connect
|
||||
from ..utils.url_utils import get_url
|
||||
from .exec_utils import run_osm2pgsql
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -13,10 +13,10 @@ import logging
|
||||
from psycopg2.extras import Json, register_hstore
|
||||
from psycopg2 import DataError
|
||||
|
||||
from nominatim_core.typing import DictCursorResult
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect, Cursor
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..typing import DictCursorResult
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, Cursor
|
||||
from ..errors import UsageError
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from ..data.place_info import PlaceInfo
|
||||
|
||||
|
||||
@@ -11,10 +11,10 @@ from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
|
||||
from enum import Enum
|
||||
from textwrap import dedent
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect, Connection
|
||||
from nominatim_core.db import properties
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, Connection
|
||||
from ..db import properties
|
||||
from ..errors import UsageError
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from . import freeze
|
||||
from ..version import NOMINATIM_VERSION, parse_version
|
||||
|
||||
@@ -17,8 +17,8 @@ from typing import List, Optional, Tuple, Union
|
||||
import psutil
|
||||
from psycopg2.extensions import make_dsn, parse_dsn
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect
|
||||
from ..version import NOMINATIM_VERSION
|
||||
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ import sqlalchemy as sa
|
||||
|
||||
import nominatim_api as napi
|
||||
from nominatim_api.search.query_analyzer_factory import make_query_analyzer
|
||||
from nominatim_core.typing import SaSelect, SaRow
|
||||
from nominatim_core.db.sqlalchemy_types import Geometry, IntArray
|
||||
from nominatim_api.typing import SaSelect, SaRow
|
||||
from nominatim_api.sql.sqlalchemy_types import Geometry, IntArray
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
@@ -17,11 +17,11 @@ from pathlib import Path
|
||||
import psutil
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect, get_pg_env, Connection
|
||||
from nominatim_core.db.async_connection import DBConnection
|
||||
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, get_pg_env, Connection
|
||||
from ..db.async_connection import DBConnection
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from .exec_utils import run_osm2pgsql
|
||||
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
|
||||
|
||||
|
||||
@@ -13,8 +13,8 @@ import os
|
||||
import subprocess
|
||||
import shutil
|
||||
|
||||
from nominatim_core.typing import StrPath
|
||||
from nominatim_core.db.connection import get_pg_env
|
||||
from ..typing import StrPath
|
||||
from ..db.connection import get_pg_env
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from pathlib import Path
|
||||
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim_core.db.connection import Connection
|
||||
from ..db.connection import Connection
|
||||
|
||||
UPDATE_TABLES = [
|
||||
'address_levels',
|
||||
|
||||
@@ -12,10 +12,10 @@ import logging
|
||||
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db import properties
|
||||
from nominatim_core.db.connection import connect, Connection
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db import properties
|
||||
from ..db.connection import connect, Connection
|
||||
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from . import refresh
|
||||
|
||||
@@ -18,8 +18,8 @@ from math import isfinite
|
||||
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim_core.db.connection import connect, Connection
|
||||
from nominatim_core.utils.centroid import PointsCentroid
|
||||
from ..db.connection import connect, Connection
|
||||
from ..utils.centroid import PointsCentroid
|
||||
from ..data.postcode_format import PostcodeFormatter, CountryPostcodeMatcher
|
||||
from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
|
||||
|
||||
|
||||
@@ -16,10 +16,10 @@ from pathlib import Path
|
||||
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import Connection, connect
|
||||
from nominatim_core.db.utils import execute_file, CopyBuffer
|
||||
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
|
||||
from ..config import Configuration
|
||||
from ..db.connection import Connection, connect
|
||||
from ..db.utils import execute_file, CopyBuffer
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..version import NOMINATIM_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -18,9 +18,9 @@ import urllib.request as urlrequest
|
||||
|
||||
import requests
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db import status
|
||||
from nominatim_core.db.connection import Connection, connect
|
||||
from ..errors import UsageError
|
||||
from ..db import status
|
||||
from ..db.connection import Connection, connect
|
||||
from .exec_utils import run_osm2pgsql
|
||||
|
||||
try:
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import Iterable
|
||||
import csv
|
||||
import os
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from ...errors import UsageError
|
||||
from .special_phrase import SpecialPhrase
|
||||
|
||||
class SPCsvLoader:
|
||||
|
||||
@@ -19,9 +19,9 @@ import re
|
||||
|
||||
from psycopg2.sql import Identifier, SQL
|
||||
|
||||
from nominatim_core.typing import Protocol
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import Connection
|
||||
from ...typing import Protocol
|
||||
from ...config import Configuration
|
||||
from ...db.connection import Connection
|
||||
from .importer_statistics import SpecialPhrasesImporterStatistics
|
||||
from .special_phrase import SpecialPhrase
|
||||
from ...tokenizer.base import AbstractTokenizer
|
||||
|
||||
@@ -11,8 +11,8 @@ from typing import Iterable
|
||||
import re
|
||||
import logging
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.utils.url_utils import get_url
|
||||
from ...config import Configuration
|
||||
from ...utils.url_utils import get_url
|
||||
from .special_phrase import SpecialPhrase
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -16,11 +16,11 @@ import tarfile
|
||||
|
||||
from psycopg2.extras import Json
|
||||
|
||||
from nominatim_core.config import Configuration
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.db.async_connection import WorkerPool
|
||||
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim_core.errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect
|
||||
from ..db.async_connection import WorkerPool
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..errors import UsageError
|
||||
from ..data.place_info import PlaceInfo
|
||||
from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
|
||||
from . import freeze
|
||||
|
||||
52
src/nominatim_db/typing.py
Normal file
52
src/nominatim_db/typing.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Type definitions for typing annotations.
|
||||
|
||||
Complex type definitions are moved here, to keep the source files readable.
|
||||
"""
|
||||
from typing import Any, Union, Mapping, TypeVar, Sequence, TYPE_CHECKING
|
||||
|
||||
# Generics variable names do not confirm to naming styles, ignore globally here.
|
||||
# pylint: disable=invalid-name,abstract-method,multiple-statements
|
||||
# pylint: disable=missing-class-docstring,useless-import-alias
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import psycopg2.sql
|
||||
import psycopg2.extensions
|
||||
import psycopg2.extras
|
||||
import os
|
||||
|
||||
StrPath = Union[str, 'os.PathLike[str]']
|
||||
|
||||
SysEnv = Mapping[str, str]
|
||||
|
||||
# psycopg2-related types
|
||||
|
||||
Query = Union[str, bytes, 'psycopg2.sql.Composable']
|
||||
|
||||
T_ResultKey = TypeVar('T_ResultKey', int, str)
|
||||
|
||||
class DictCursorResult(Mapping[str, Any]):
|
||||
def __getitem__(self, x: Union[int, str]) -> Any: ...
|
||||
|
||||
DictCursorResults = Sequence[DictCursorResult]
|
||||
|
||||
T_cursor = TypeVar('T_cursor', bound='psycopg2.extensions.cursor')
|
||||
|
||||
# The following typing features require typing_extensions to work
|
||||
# on all supported Python versions.
|
||||
# Only require this for type checking but not for normal operations.
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import (Protocol as Protocol,
|
||||
Final as Final,
|
||||
TypedDict as TypedDict)
|
||||
else:
|
||||
Protocol = object
|
||||
Final = 'Final'
|
||||
TypedDict = dict
|
||||
0
src/nominatim_db/utils/__init__.py
Normal file
0
src/nominatim_db/utils/__init__.py
Normal file
49
src/nominatim_db/utils/centroid.py
Normal file
49
src/nominatim_db/utils/centroid.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for computation of centroids.
|
||||
"""
|
||||
from typing import Tuple, Any
|
||||
from collections.abc import Collection
|
||||
|
||||
class PointsCentroid:
|
||||
""" Centroid computation from single points using an online algorithm.
|
||||
More points may be added at any time.
|
||||
|
||||
Coordinates are internally treated as a 7-digit fixed-point float
|
||||
(i.e. in OSM style).
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.sum_x = 0
|
||||
self.sum_y = 0
|
||||
self.count = 0
|
||||
|
||||
def centroid(self) -> Tuple[float, float]:
|
||||
""" Return the centroid of all points collected so far.
|
||||
"""
|
||||
if self.count == 0:
|
||||
raise ValueError("No points available for centroid.")
|
||||
|
||||
return (float(self.sum_x/self.count)/10000000,
|
||||
float(self.sum_y/self.count)/10000000)
|
||||
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.count
|
||||
|
||||
|
||||
def __iadd__(self, other: Any) -> 'PointsCentroid':
|
||||
if isinstance(other, Collection) and len(other) == 2:
|
||||
if all(isinstance(p, (float, int)) for p in other):
|
||||
x, y = other
|
||||
self.sum_x += int(x * 10000000)
|
||||
self.sum_y += int(y * 10000000)
|
||||
self.count += 1
|
||||
return self
|
||||
|
||||
raise ValueError("Can only add 2-element tuples to centroid.")
|
||||
31
src/nominatim_db/utils/url_utils.py
Normal file
31
src/nominatim_db/utils/url_utils.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for accessing URL.
|
||||
"""
|
||||
from typing import IO
|
||||
import logging
|
||||
import urllib.request as urlrequest
|
||||
|
||||
from ..version import NOMINATIM_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def get_url(url: str) -> str:
|
||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||
|
||||
This version makes sure that an appropriate user agent is sent.
|
||||
"""
|
||||
headers = {"User-Agent": f"Nominatim/{NOMINATIM_VERSION!s}"}
|
||||
|
||||
try:
|
||||
request = urlrequest.Request(url, headers=headers)
|
||||
with urlrequest.urlopen(request) as response: # type: IO[bytes]
|
||||
return response.read().decode('utf-8')
|
||||
except Exception:
|
||||
LOG.fatal('Failed to load URL: %s', url)
|
||||
raise
|
||||
@@ -7,13 +7,55 @@
|
||||
"""
|
||||
Version information for Nominatim.
|
||||
"""
|
||||
from typing import Optional
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
# See also https://github.com/PyCQA/pylint/issues/6006
|
||||
# pylint: disable=useless-import-alias,unused-import
|
||||
|
||||
from nominatim_core.version import (NominatimVersion as NominatimVersion,
|
||||
parse_version as parse_version)
|
||||
class NominatimVersion(NamedTuple):
|
||||
""" Version information for Nominatim. We follow semantic versioning.
|
||||
|
||||
Major, minor and patch_level refer to the last released version.
|
||||
The database patch level tracks important changes between releases
|
||||
and must always be increased when there is a change to the database or code
|
||||
that requires a migration.
|
||||
|
||||
When adding a migration on the development branch, raise the patch level
|
||||
to 99 to make sure that the migration is applied when updating from a
|
||||
patch release to the next minor version. Patch releases usually shouldn't
|
||||
have migrations in them. When they are needed, then make sure that the
|
||||
migration can be reapplied and set the migration version to the appropriate
|
||||
patch level when cherry-picking the commit with the migration.
|
||||
"""
|
||||
|
||||
major: int
|
||||
minor: int
|
||||
patch_level: int
|
||||
db_patch_level: Optional[int]
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.db_patch_level is None:
|
||||
return f"{self.major}.{self.minor}.{self.patch_level}"
|
||||
|
||||
return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}"
|
||||
|
||||
def release_version(self) -> str:
|
||||
""" Return the release version in semantic versioning format.
|
||||
|
||||
The release version does not include the database patch version.
|
||||
"""
|
||||
return f"{self.major}.{self.minor}.{self.patch_level}"
|
||||
|
||||
|
||||
def parse_version(version: str) -> NominatimVersion:
|
||||
""" Parse a version string into a version consisting of a tuple of
|
||||
four ints: major, minor, patch level, database patch level
|
||||
|
||||
This is the reverse operation of `version_str()`.
|
||||
"""
|
||||
parts = version.split('.')
|
||||
return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
|
||||
|
||||
|
||||
NOMINATIM_VERSION = parse_version('4.4.99-1')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user