reduce from 3 to 2 packages

This commit is contained in:
Sarah Hoffmann
2024-06-27 21:26:12 +02:00
parent 139cea5720
commit 4da4cbfe27
149 changed files with 419 additions and 422 deletions

View File

@@ -16,8 +16,8 @@ import sys
import argparse
from pathlib import Path
from nominatim_core.config import Configuration
from nominatim_core.errors import UsageError
from .config import Configuration
from .errors import UsageError
from .tools.exec_utils import run_php_server
from . import clicmd
from . import version

View File

@@ -11,8 +11,8 @@ import logging
import argparse
import random
from nominatim_core.errors import UsageError
from nominatim_core.db.connection import connect
from ..errors import UsageError
from ..db.connection import connect
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.

View File

@@ -14,12 +14,12 @@ import json
import sys
from functools import reduce
from nominatim_core.errors import UsageError
import nominatim_api as napi
import nominatim_api.v1 as api_output
from nominatim_api.v1.helpers import zoom_to_rank, deduplicate_results
from nominatim_api.v1.format import dispatch as formatting
import nominatim_api.logging as loglib
from ..errors import UsageError
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.

View File

@@ -12,9 +12,9 @@ import argparse
import logging
from pathlib import Path
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from nominatim_core.typing import Protocol
from ..errors import UsageError
from ..config import Configuration
from ..typing import Protocol
LOG = logging.getLogger()

View File

@@ -12,7 +12,7 @@ import argparse
import asyncio
from pathlib import Path
from nominatim_core.errors import UsageError
from ..errors import UsageError
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.

View File

@@ -14,12 +14,13 @@ import asyncio
import csv
import sys
import sqlalchemy as sa
import nominatim_api as napi
from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
from nominatim_api.types import LookupDetails
from nominatim_core.errors import UsageError
import sqlalchemy as sa # pylint: disable=C0411
from ..errors import UsageError
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.

View File

@@ -9,7 +9,7 @@ Implementation of the 'freeze' subcommand.
"""
import argparse
from nominatim_core.db.connection import connect
from ..db.connection import connect
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.

View File

@@ -11,8 +11,8 @@ import argparse
import psutil
from nominatim_core.db import status
from nominatim_core.db.connection import connect
from ..db import status
from ..db.connection import connect
from .args import NominatimArgs
# Do not repeat documentation of subcommand classes.

View File

@@ -12,8 +12,8 @@ import argparse
import logging
from pathlib import Path
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect
from ..config import Configuration
from ..db.connection import connect
from ..tokenizer.base import AbstractTokenizer
from .args import NominatimArgs

View File

@@ -14,9 +14,9 @@ import logging
import socket
import time
from nominatim_core.db import status
from nominatim_core.db.connection import connect
from nominatim_core.errors import UsageError
from ..db import status
from ..db.connection import connect
from ..errors import UsageError
from .args import NominatimArgs
LOG = logging.getLogger()

View File

@@ -14,10 +14,10 @@ from pathlib import Path
import psutil
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect
from nominatim_core.db import status, properties
from ..errors import UsageError
from ..config import Configuration
from ..db.connection import connect
from ..db import status, properties
from ..tokenizer.base import AbstractTokenizer
from ..version import NOMINATIM_VERSION
from .args import NominatimArgs

View File

@@ -11,8 +11,8 @@ import argparse
import logging
from pathlib import Path
from nominatim_core.errors import UsageError
from nominatim_core.db.connection import connect
from ..errors import UsageError
from ..db.connection import connect
from ..tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
from ..tools.special_phrases.sp_wiki_loader import SPWikiLoader
from ..tools.special_phrases.sp_csv_loader import SPCsvLoader

378
src/nominatim_db/config.py Normal file
View File

@@ -0,0 +1,378 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Nominatim configuration accessor.
"""
from typing import Dict, Any, List, Mapping, Optional
import importlib.util
import logging
import os
import sys
from pathlib import Path
import json
import yaml
from dotenv import dotenv_values
try:
from psycopg2.extensions import parse_dsn
except ModuleNotFoundError:
from psycopg.conninfo import conninfo_to_dict as parse_dsn # type: ignore[assignment]
from .typing import StrPath
from .errors import UsageError
from . import paths
LOG = logging.getLogger()
CONFIG_CACHE : Dict[str, Any] = {}
def flatten_config_list(content: Any, section: str = '') -> List[Any]:
""" Flatten YAML configuration lists that contain include sections
which are lists themselves.
"""
if not content:
return []
if not isinstance(content, list):
raise UsageError(f"List expected in section '{section}'.")
output = []
for ele in content:
if isinstance(ele, list):
output.extend(flatten_config_list(ele, section))
else:
output.append(ele)
return output
class Configuration:
""" This class wraps access to the configuration settings
for the Nominatim instance in use.
All Nominatim configuration options are prefixed with 'NOMINATIM_' to
avoid conflicts with other environment variables. All settings can
be accessed as properties of the class under the same name as the
setting but with the `NOMINATIM_` prefix removed. In addition, there
are accessor functions that convert the setting values to types
other than string.
"""
def __init__(self, project_dir: Optional[Path],
environ: Optional[Mapping[str, str]] = None) -> None:
self.environ = environ or os.environ
self.project_dir = project_dir
self.config_dir = paths.CONFIG_DIR
self._config = dotenv_values(str(self.config_dir / 'env.defaults'))
if self.project_dir is not None and (self.project_dir / '.env').is_file():
self.project_dir = self.project_dir.resolve()
self._config.update(dotenv_values(str(self.project_dir / '.env')))
class _LibDirs:
module: Path
osm2pgsql: Path
php = paths.PHPLIB_DIR
sql = paths.SQLLIB_DIR
data = paths.DATA_DIR
self.lib_dir = _LibDirs()
self._private_plugins: Dict[str, object] = {}
def set_libdirs(self, **kwargs: StrPath) -> None:
""" Set paths to library functions and data.
"""
for key, value in kwargs.items():
setattr(self.lib_dir, key, None if value is None else Path(value))
def __getattr__(self, name: str) -> str:
name = 'NOMINATIM_' + name
if name in self.environ:
return self.environ[name]
return self._config[name] or ''
def get_bool(self, name: str) -> bool:
""" Return the given configuration parameter as a boolean.
Parameters:
name: Name of the configuration parameter with the NOMINATIM_
prefix removed.
Returns:
`True` for values of '1', 'yes' and 'true', `False` otherwise.
"""
return getattr(self, name).lower() in ('1', 'yes', 'true')
def get_int(self, name: str) -> int:
""" Return the given configuration parameter as an int.
Parameters:
name: Name of the configuration parameter with the NOMINATIM_
prefix removed.
Returns:
The configuration value converted to int.
Raises:
ValueError: when the value is not a number.
"""
try:
return int(getattr(self, name))
except ValueError as exp:
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
raise UsageError("Configuration error.") from exp
def get_str_list(self, name: str) -> Optional[List[str]]:
""" Return the given configuration parameter as a list of strings.
The values are assumed to be given as a comma-sparated list and
will be stripped before returning them.
Parameters:
name: Name of the configuration parameter with the NOMINATIM_
prefix removed.
Returns:
(List[str]): The comma-split parameter as a list. The
elements are stripped of leading and final spaces before
being returned.
(None): The configuration parameter was unset or empty.
"""
raw = getattr(self, name)
return [v.strip() for v in raw.split(',')] if raw else None
def get_path(self, name: str) -> Optional[Path]:
""" Return the given configuration parameter as a Path.
Parameters:
name: Name of the configuration parameter with the NOMINATIM_
prefix removed.
Returns:
(Path): A Path object of the parameter value.
If a relative path is configured, then the function converts this
into an absolute path with the project directory as root path.
(None): The configuration parameter was unset or empty.
"""
value = getattr(self, name)
if not value:
return None
cfgpath = Path(value)
if not cfgpath.is_absolute():
assert self.project_dir is not None
cfgpath = self.project_dir / cfgpath
return cfgpath.resolve()
def get_libpq_dsn(self) -> str:
""" Get configured database DSN converted into the key/value format
understood by libpq and psycopg.
"""
dsn = self.DATABASE_DSN
def quote_param(param: str) -> str:
key, val = param.split('=')
val = val.replace('\\', '\\\\').replace("'", "\\'")
if ' ' in val:
val = "'" + val + "'"
return key + '=' + val
if dsn.startswith('pgsql:'):
# Old PHP DSN format. Convert before returning.
return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
return dsn
def get_database_params(self) -> Mapping[str, str]:
""" Get the configured parameters for the database connection
as a mapping.
"""
dsn = self.DATABASE_DSN
if dsn.startswith('pgsql:'):
return dict((p.split('=', 1) for p in dsn[6:].split(';')))
return parse_dsn(dsn)
def get_import_style_file(self) -> Path:
""" Return the import style file as a path object. Translates the
name of the standard styles automatically into a file in the
config style.
"""
style = getattr(self, 'IMPORT_STYLE')
if style in ('admin', 'street', 'address', 'full', 'extratags'):
return self.config_dir / f'import-{style}.lua'
return self.find_config_file('', 'IMPORT_STYLE')
def get_os_env(self) -> Dict[str, str]:
""" Return a copy of the OS environment with the Nominatim configuration
merged in.
"""
env = {k: v for k, v in self._config.items() if v is not None}
env.update(self.environ)
return env
def load_sub_configuration(self, filename: StrPath,
config: Optional[str] = None) -> Any:
""" Load additional configuration from a file. `filename` is the name
of the configuration file. The file is first searched in the
project directory and then in the global settings directory.
If `config` is set, then the name of the configuration file can
be additionally given through a .env configuration option. When
the option is set, then the file will be exclusively loaded as set:
if the name is an absolute path, the file name is taken as is,
if the name is relative, it is taken to be relative to the
project directory.
The format of the file is determined from the filename suffix.
Currently only files with extension '.yaml' are supported.
YAML files support a special '!include' construct. When the
directive is given, the value is taken to be a filename, the file
is loaded using this function and added at the position in the
configuration tree.
"""
configfile = self.find_config_file(filename, config)
if str(configfile) in CONFIG_CACHE:
return CONFIG_CACHE[str(configfile)]
if configfile.suffix in ('.yaml', '.yml'):
result = self._load_from_yaml(configfile)
elif configfile.suffix == '.json':
with configfile.open('r', encoding='utf-8') as cfg:
result = json.load(cfg)
else:
raise UsageError(f"Config file '{configfile}' has unknown format.")
CONFIG_CACHE[str(configfile)] = result
return result
def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
""" Load a Python module as a plugin.
The module_name may have three variants:
* A name without any '.' is assumed to be an internal module
and will be searched relative to `internal_path`.
* If the name ends in `.py`, module_name is assumed to be a
file name relative to the project directory.
* Any other name is assumed to be an absolute module name.
In either of the variants the module name must start with a letter.
"""
if not module_name or not module_name[0].isidentifier():
raise UsageError(f'Invalid module name {module_name}')
if '.' not in module_name:
module_name = module_name.replace('-', '_')
full_module = f'{internal_path}.{module_name}'
return sys.modules.get(full_module) or importlib.import_module(full_module)
if module_name.endswith('.py'):
if self.project_dir is None or not (self.project_dir / module_name).exists():
raise UsageError(f"Cannot find module '{module_name}' in project directory.")
if module_name in self._private_plugins:
return self._private_plugins[module_name]
file_path = str(self.project_dir / module_name)
spec = importlib.util.spec_from_file_location(module_name, file_path)
if spec:
module = importlib.util.module_from_spec(spec)
# Do not add to global modules because there is no standard
# module name that Python can resolve.
self._private_plugins[module_name] = module
assert spec.loader is not None
spec.loader.exec_module(module)
return module
return sys.modules.get(module_name) or importlib.import_module(module_name)
def find_config_file(self, filename: StrPath,
config: Optional[str] = None) -> Path:
""" Resolve the location of a configuration file given a filename and
an optional configuration option with the file name.
Raises a UsageError when the file cannot be found or is not
a regular file.
"""
if config is not None:
cfg_value = getattr(self, config)
if cfg_value:
cfg_filename = Path(cfg_value)
if cfg_filename.is_absolute():
cfg_filename = cfg_filename.resolve()
if not cfg_filename.is_file():
LOG.fatal("Cannot find config file '%s'.", cfg_filename)
raise UsageError("Config file not found.")
return cfg_filename
filename = cfg_filename
search_paths = [self.project_dir, self.config_dir]
for path in search_paths:
if path is not None and (path / filename).is_file():
return path / filename
LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s",
filename, search_paths)
raise UsageError("Config file not found.")
def _load_from_yaml(self, cfgfile: Path) -> Any:
""" Load a YAML configuration file. This installs a special handler that
allows to include other YAML files using the '!include' operator.
"""
yaml.add_constructor('!include', self._yaml_include_representer,
Loader=yaml.SafeLoader)
return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
def _yaml_include_representer(self, loader: Any, node: yaml.Node) -> Any:
""" Handler for the '!include' operator in YAML files.
When the filename is relative, then the file is first searched in the
project directory and then in the global settings directory.
"""
fname = loader.construct_scalar(node)
if Path(fname).is_absolute():
configfile = Path(fname)
else:
configfile = self.find_config_file(loader.construct_scalar(node))
if configfile.suffix != '.yaml':
LOG.fatal("Format error while reading '%s': only YAML format supported.",
configfile)
raise UsageError("Cannot handle config file format.")
return yaml.safe_load(configfile.read_text(encoding='utf-8'))

View File

@@ -11,10 +11,10 @@ from typing import Dict, Any, Iterable, Tuple, Optional, Container, overload
from pathlib import Path
import psycopg2.extras
from nominatim_core.db import utils as db_utils
from nominatim_core.db.connection import connect, Connection
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from ..db import utils as db_utils
from ..db.connection import connect, Connection
from ..errors import UsageError
from ..config import Configuration
from ..tokenizer.base import AbstractTokenizer
def _flatten_name_list(names: Any) -> Dict[str, str]:

View File

@@ -11,7 +11,7 @@ format.
from typing import Any, Mapping, Optional, Set, Match
import re
from nominatim_core.errors import UsageError
from ..errors import UsageError
from . import country_info
class CountryPostcodeMatcher:

View File

View File

@@ -0,0 +1,236 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
""" Non-blocking database connections.
"""
from typing import Callable, Any, Optional, Iterator, Sequence
import logging
import select
import time
import psycopg2
from psycopg2.extras import wait_select
# psycopg2 emits different exceptions pre and post 2.8. Detect if the new error
# module is available and adapt the error handling accordingly.
try:
import psycopg2.errors # pylint: disable=no-name-in-module,import-error
__has_psycopg2_errors__ = True
except ImportError:
__has_psycopg2_errors__ = False
from ..typing import T_cursor, Query
LOG = logging.getLogger()
class DeadlockHandler:
""" Context manager that catches deadlock exceptions and calls
the given handler function. All other exceptions are passed on
normally.
"""
def __init__(self, handler: Callable[[], None], ignore_sql_errors: bool = False) -> None:
self.handler = handler
self.ignore_sql_errors = ignore_sql_errors
def __enter__(self) -> 'DeadlockHandler':
return self
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool:
if __has_psycopg2_errors__:
if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
self.handler()
return True
elif exc_type == psycopg2.extensions.TransactionRollbackError \
and exc_value.pgcode == '40P01':
self.handler()
return True
if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
LOG.info("SQL error ignored: %s", exc_value)
return True
return False
class DBConnection:
""" A single non-blocking database connection.
"""
def __init__(self, dsn: str,
cursor_factory: Optional[Callable[..., T_cursor]] = None,
ignore_sql_errors: bool = False) -> None:
self.dsn = dsn
self.current_query: Optional[Query] = None
self.current_params: Optional[Sequence[Any]] = None
self.ignore_sql_errors = ignore_sql_errors
self.conn: Optional['psycopg2._psycopg.connection'] = None
self.cursor: Optional['psycopg2._psycopg.cursor'] = None
self.connect(cursor_factory=cursor_factory)
def close(self) -> None:
""" Close all open connections. Does not wait for pending requests.
"""
if self.conn is not None:
if self.cursor is not None:
self.cursor.close()
self.cursor = None
self.conn.close()
self.conn = None
def connect(self, cursor_factory: Optional[Callable[..., T_cursor]] = None) -> None:
""" (Re)connect to the database. Creates an asynchronous connection
with JIT and parallel processing disabled. If a connection was
already open, it is closed and a new connection established.
The caller must ensure that no query is pending before reconnecting.
"""
self.close()
# Use a dict to hand in the parameters because async is a reserved
# word in Python3.
self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True}) # type: ignore
assert self.conn
self.wait()
if cursor_factory is not None:
self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
else:
self.cursor = self.conn.cursor()
# Disable JIT and parallel workers as they are known to cause problems.
# Update pg_settings instead of using SET because it does not yield
# errors on older versions of Postgres where the settings are not
# implemented.
self.perform(
""" UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost';
UPDATE pg_settings SET setting = 0
WHERE name = 'max_parallel_workers_per_gather';""")
self.wait()
def _deadlock_handler(self) -> None:
LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params))
assert self.cursor is not None
assert self.current_query is not None
assert self.current_params is not None
self.cursor.execute(self.current_query, self.current_params)
def wait(self) -> None:
""" Block until any pending operation is done.
"""
while True:
with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
wait_select(self.conn)
self.current_query = None
return
def perform(self, sql: Query, args: Optional[Sequence[Any]] = None) -> None:
""" Send SQL query to the server. Returns immediately without
blocking.
"""
assert self.cursor is not None
self.current_query = sql
self.current_params = args
self.cursor.execute(sql, args)
def fileno(self) -> int:
""" File descriptor to wait for. (Makes this class select()able.)
"""
assert self.conn is not None
return self.conn.fileno()
def is_done(self) -> bool:
""" Check if the connection is available for a new query.
Also checks if the previous query has run into a deadlock.
If so, then the previous query is repeated.
"""
assert self.conn is not None
if self.current_query is None:
return True
with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
if self.conn.poll() == psycopg2.extensions.POLL_OK:
self.current_query = None
return True
return False
class WorkerPool:
""" A pool of asynchronous database connections.
The pool may be used as a context manager.
"""
REOPEN_CONNECTIONS_AFTER = 100000
def __init__(self, dsn: str, pool_size: int, ignore_sql_errors: bool = False) -> None:
self.threads = [DBConnection(dsn, ignore_sql_errors=ignore_sql_errors)
for _ in range(pool_size)]
self.free_workers = self._yield_free_worker()
self.wait_time = 0.0
def finish_all(self) -> None:
""" Wait for all connection to finish.
"""
for thread in self.threads:
while not thread.is_done():
thread.wait()
self.free_workers = self._yield_free_worker()
def close(self) -> None:
""" Close all connections and clear the pool.
"""
for thread in self.threads:
thread.close()
self.threads = []
self.free_workers = iter([])
def next_free_worker(self) -> DBConnection:
""" Get the next free connection.
"""
return next(self.free_workers)
def _yield_free_worker(self) -> Iterator[DBConnection]:
ready = self.threads
command_stat = 0
while True:
for thread in ready:
if thread.is_done():
command_stat += 1
yield thread
if command_stat > self.REOPEN_CONNECTIONS_AFTER:
self._reconnect_threads()
ready = self.threads
command_stat = 0
else:
tstart = time.time()
_, ready, _ = select.select([], self.threads, [])
self.wait_time += time.time() - tstart
def _reconnect_threads(self) -> None:
for thread in self.threads:
while not thread.is_done():
thread.wait()
thread.connect()
def __enter__(self) -> 'WorkerPool':
return self
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
self.finish_all()
self.close()

View File

@@ -0,0 +1,254 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Specialised connection and cursor functions.
"""
from typing import Optional, Any, Callable, ContextManager, Dict, cast, overload, Tuple, Iterable
import contextlib
import logging
import os
import psycopg2
import psycopg2.extensions
import psycopg2.extras
from psycopg2 import sql as pysql
from ..typing import SysEnv, Query, T_cursor
from ..errors import UsageError
LOG = logging.getLogger()
class Cursor(psycopg2.extras.DictCursor):
""" A cursor returning dict-like objects and providing specialised
execution functions.
"""
# pylint: disable=arguments-renamed,arguments-differ
def execute(self, query: Query, args: Any = None) -> None:
""" Query execution that logs the SQL query when debugging is enabled.
"""
if LOG.isEnabledFor(logging.DEBUG):
LOG.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
def execute_values(self, sql: Query, argslist: Iterable[Tuple[Any, ...]],
template: Optional[Query] = None) -> None:
""" Wrapper for the psycopg2 convenience function to execute
SQL for a list of values.
"""
LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
psycopg2.extras.execute_values(self, sql, argslist, template=template)
def scalar(self, sql: Query, args: Any = None) -> Any:
""" Execute query that returns a single value. The value is returned.
If the query yields more than one row, a ValueError is raised.
"""
self.execute(sql, args)
if self.rowcount != 1:
raise RuntimeError("Query did not return a single row.")
result = self.fetchone()
assert result is not None
return result[0]
def drop_table(self, name: str, if_exists: bool = True, cascade: bool = False) -> None:
""" Drop the table with the given name.
Set `if_exists` to False if a non-existent table should raise
an exception instead of just being ignored. If 'cascade' is set
to True then all dependent tables are deleted as well.
"""
sql = 'DROP TABLE '
if if_exists:
sql += 'IF EXISTS '
sql += '{}'
if cascade:
sql += ' CASCADE'
self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
class Connection(psycopg2.extensions.connection):
""" A connection that provides the specialised cursor by default and
adds convenience functions for administrating the database.
"""
@overload # type: ignore[override]
def cursor(self) -> Cursor:
...
@overload
def cursor(self, name: str) -> Cursor:
...
@overload
def cursor(self, cursor_factory: Callable[..., T_cursor]) -> T_cursor:
...
def cursor(self, cursor_factory = Cursor, **kwargs): # type: ignore
""" Return a new cursor. By default the specialised cursor is returned.
"""
return super().cursor(cursor_factory=cursor_factory, **kwargs)
def table_exists(self, table: str) -> bool:
""" Check that a table with the given name exists in the database.
"""
with self.cursor() as cur:
num = cur.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s and schemaname = 'public'""", (table, ))
return num == 1 if isinstance(num, int) else False
def table_has_column(self, table: str, column: str) -> bool:
""" Check if the table 'table' exists and has a column with name 'column'.
"""
with self.cursor() as cur:
has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
WHERE table_name = %s
and column_name = %s""",
(table, column))
return has_column > 0 if isinstance(has_column, int) else False
def index_exists(self, index: str, table: Optional[str] = None) -> bool:
""" Check that an index with the given name exists in the database.
If table is not None then the index must relate to the given
table.
"""
with self.cursor() as cur:
cur.execute("""SELECT tablename FROM pg_indexes
WHERE indexname = %s and schemaname = 'public'""", (index, ))
if cur.rowcount == 0:
return False
if table is not None:
row = cur.fetchone()
if row is None or not isinstance(row[0], str):
return False
return row[0] == table
return True
def drop_table(self, name: str, if_exists: bool = True, cascade: bool = False) -> None:
""" Drop the table with the given name.
Set `if_exists` to False if a non-existent table should raise
an exception instead of just being ignored.
"""
with self.cursor() as cur:
cur.drop_table(name, if_exists, cascade)
self.commit()
def server_version_tuple(self) -> Tuple[int, int]:
""" Return the server version as a tuple of (major, minor).
Converts correctly for pre-10 and post-10 PostgreSQL versions.
"""
version = self.server_version
if version < 100000:
return (int(version / 10000), int((version % 10000) / 100))
return (int(version / 10000), version % 10000)
def postgis_version_tuple(self) -> Tuple[int, int]:
""" Return the postgis version installed in the database as a
tuple of (major, minor). Assumes that the PostGIS extension
has been installed already.
"""
with self.cursor() as cur:
version = cur.scalar('SELECT postgis_lib_version()')
version_parts = version.split('.')
if len(version_parts) < 2:
raise UsageError(f"Error fetching Postgis version. Bad format: {version}")
return (int(version_parts[0]), int(version_parts[1]))
def extension_loaded(self, extension_name: str) -> bool:
""" Return True if the hstore extension is loaded in the database.
"""
with self.cursor() as cur:
cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
return cur.rowcount > 0
class ConnectionContext(ContextManager[Connection]):
""" Context manager of the connection that also provides direct access
to the underlying connection.
"""
connection: Connection
def connect(dsn: str) -> ConnectionContext:
""" Open a connection to the database using the specialised connection
factory. The returned object may be used in conjunction with 'with'.
When used outside a context manager, use the `connection` attribute
to get the connection.
"""
try:
conn = psycopg2.connect(dsn, connection_factory=Connection)
ctxmgr = cast(ConnectionContext, contextlib.closing(conn))
ctxmgr.connection = conn
return ctxmgr
except psycopg2.OperationalError as err:
raise UsageError(f"Cannot connect to database: {err}") from err
# Translation from PG connection string parameters to PG environment variables.
# Derived from https://www.postgresql.org/docs/current/libpq-envars.html.
_PG_CONNECTION_STRINGS = {
'host': 'PGHOST',
'hostaddr': 'PGHOSTADDR',
'port': 'PGPORT',
'dbname': 'PGDATABASE',
'user': 'PGUSER',
'password': 'PGPASSWORD',
'passfile': 'PGPASSFILE',
'channel_binding': 'PGCHANNELBINDING',
'service': 'PGSERVICE',
'options': 'PGOPTIONS',
'application_name': 'PGAPPNAME',
'sslmode': 'PGSSLMODE',
'requiressl': 'PGREQUIRESSL',
'sslcompression': 'PGSSLCOMPRESSION',
'sslcert': 'PGSSLCERT',
'sslkey': 'PGSSLKEY',
'sslrootcert': 'PGSSLROOTCERT',
'sslcrl': 'PGSSLCRL',
'requirepeer': 'PGREQUIREPEER',
'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION',
'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
'gssencmode': 'PGGSSENCMODE',
'krbsrvname': 'PGKRBSRVNAME',
'gsslib': 'PGGSSLIB',
'connect_timeout': 'PGCONNECT_TIMEOUT',
'target_session_attrs': 'PGTARGETSESSIONATTRS',
}
def get_pg_env(dsn: str,
base_env: Optional[SysEnv] = None) -> Dict[str, str]:
""" Return a copy of `base_env` with the environment variables for
PostgreSQL set up from the given database connection string.
If `base_env` is None, then the OS environment is used as a base
environment.
"""
env = dict(base_env if base_env is not None else os.environ)
for param, value in psycopg2.extensions.parse_dsn(dsn).items():
if param in _PG_CONNECTION_STRINGS:
env[_PG_CONNECTION_STRINGS[param]] = value
else:
LOG.error("Unknown connection parameter '%s' ignored.", param)
return env

View File

@@ -0,0 +1,47 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Query and access functions for the in-database property table.
"""
from typing import Optional, cast
from .connection import Connection
def set_property(conn: Connection, name: str, value: str) -> None:
""" Add or replace the property with the given name.
"""
with conn.cursor() as cur:
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
(name, ))
if cur.rowcount == 0:
sql = 'INSERT INTO nominatim_properties (value, property) VALUES (%s, %s)'
else:
sql = 'UPDATE nominatim_properties SET value = %s WHERE property = %s'
cur.execute(sql, (value, name))
conn.commit()
def get_property(conn: Connection, name: str) -> Optional[str]:
""" Return the current value of the given property or None if the property
is not set.
"""
if not conn.table_exists('nominatim_properties'):
return None
with conn.cursor() as cur:
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
(name, ))
if cur.rowcount == 0:
return None
result = cur.fetchone()
assert result is not None
return cast(Optional[str], result[0])

View File

@@ -0,0 +1,143 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Preprocessing of SQL files.
"""
from typing import Set, Dict, Any, cast
import jinja2
from .connection import Connection
from .async_connection import WorkerPool
from ..config import Configuration
def _get_partitions(conn: Connection) -> Set[int]:
""" Get the set of partitions currently in use.
"""
with conn.cursor() as cur:
cur.execute('SELECT DISTINCT partition FROM country_name')
partitions = set([0])
for row in cur:
partitions.add(row[0])
return partitions
def _get_tables(conn: Connection) -> Set[str]:
""" Return the set of tables currently in use.
"""
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
return set((row[0] for row in list(cur)))
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
""" Returns the version of the slim middle tables.
"""
if 'osm2pgsql_properties' not in tables:
return '1'
with conn.cursor() as cur:
cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
row = cur.fetchone()
return cast(str, row[0]) if row is not None else '1'
def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
""" Returns a dict with tablespace expressions for the different tablespace
kinds depending on whether a tablespace is configured or not.
"""
out = {}
for subset in ('ADDRESS', 'SEARCH', 'AUX'):
for kind in ('DATA', 'INDEX'):
tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
if tspace:
tspace = f'TABLESPACE "{tspace}"'
out[f'{subset.lower()}_{kind.lower()}'] = tspace
return out
def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
""" Set up a dictionary with various optional Postgresql/Postgis features that
depend on the database version.
"""
pg_version = conn.server_version_tuple()
postgis_version = conn.postgis_version_tuple()
pg11plus = pg_version >= (11, 0, 0)
ps3 = postgis_version >= (3, 0)
return {
'has_index_non_key_column': pg11plus,
'spgist_geom' : 'SPGIST' if pg11plus and ps3 else 'GIST'
}
class SQLPreprocessor:
""" A environment for preprocessing SQL files from the
lib-sql directory.
The preprocessor provides a number of default filters and variables.
The variables may be overwritten when rendering an SQL file.
The preprocessing is currently based on the jinja2 templating library
and follows its syntax.
"""
def __init__(self, conn: Connection, config: Configuration) -> None:
self.env = jinja2.Environment(autoescape=False,
loader=jinja2.FileSystemLoader(str(config.lib_dir.sql)))
db_info: Dict[str, Any] = {}
db_info['partitions'] = _get_partitions(conn)
db_info['tables'] = _get_tables(conn)
db_info['reverse_only'] = 'search_name' not in db_info['tables']
db_info['tablespace'] = _setup_tablespace_sql(config)
db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
self.env.globals['config'] = config
self.env.globals['db'] = db_info
self.env.globals['postgres'] = _setup_postgresql_features(conn)
def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
""" Execute the given SQL template string on the connection.
The keyword arguments may supply additional parameters
for preprocessing.
"""
sql = self.env.from_string(template).render(**kwargs)
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()
def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
""" Execute the given SQL file on the connection. The keyword arguments
may supply additional parameters for preprocessing.
"""
sql = self.env.get_template(name).render(**kwargs)
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()
def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
**kwargs: Any) -> None:
""" Execute the given SQL files using parallel asynchronous connections.
The keyword arguments may supply additional parameters for
preprocessing.
After preprocessing the SQL code is cut at lines containing only
'---'. Each chunk is sent to one of the `num_threads` workers.
"""
sql = self.env.get_template(name).render(**kwargs)
parts = sql.split('\n---\n')
with WorkerPool(dsn, num_threads) as pool:
for part in parts:
pool.next_free_worker().perform(part)

View File

@@ -0,0 +1,127 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Access and helper functions for the status and status log table.
"""
from typing import Optional, Tuple, cast
import datetime as dt
import logging
import re
from .connection import Connection
from ..utils.url_utils import get_url
from ..errors import UsageError
from ..typing import TypedDict
LOG = logging.getLogger()
ISODATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
class StatusRow(TypedDict):
""" Dictionary of columns of the import_status table.
"""
lastimportdate: dt.datetime
sequence_id: Optional[int]
indexed: Optional[bool]
def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime:
""" Determine the date of the database from the newest object in the
data base.
"""
# If there is a date from osm2pgsql available, use that.
if conn.table_exists('osm2pgsql_properties'):
with conn.cursor() as cur:
cur.execute(""" SELECT value FROM osm2pgsql_properties
WHERE property = 'current_timestamp' """)
row = cur.fetchone()
if row is not None:
return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\
.replace(tzinfo=dt.timezone.utc)
if offline:
raise UsageError("Cannot determine database date from data in offline mode.")
# Else, find the node with the highest ID in the database
with conn.cursor() as cur:
if conn.table_exists('place'):
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
else:
osmid = cur.scalar("SELECT max(osm_id) FROM placex WHERE osm_type='N'")
if osmid is None:
LOG.fatal("No data found in the database.")
raise UsageError("No data found in the database.")
LOG.info("Using node id %d for timestamp lookup", osmid)
# Get the node from the API to find the timestamp when it was created.
node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
data = get_url(node_url)
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
if match is None:
LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
"URL used: %s", node_url)
raise UsageError("Bad API data.")
LOG.debug("Found timestamp %s", match.group(1))
return dt.datetime.strptime(match.group(1), ISODATE_FORMAT).replace(tzinfo=dt.timezone.utc)
def set_status(conn: Connection, date: Optional[dt.datetime],
seq: Optional[int] = None, indexed: bool = True) -> None:
""" Replace the current status with the given status. If date is `None`
then only sequence and indexed will be updated as given. Otherwise
the whole status is replaced.
The change will be committed to the database.
"""
assert date is None or date.tzinfo == dt.timezone.utc
with conn.cursor() as cur:
if date is None:
cur.execute("UPDATE import_status set sequence_id = %s, indexed = %s",
(seq, indexed))
else:
cur.execute("TRUNCATE TABLE import_status")
cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
VALUES (%s, %s, %s)""", (date, seq, indexed))
conn.commit()
def get_status(conn: Connection) -> Tuple[Optional[dt.datetime], Optional[int], Optional[bool]]:
""" Return the current status as a triple of (date, sequence, indexed).
If status has not been set up yet, a triple of None is returned.
"""
with conn.cursor() as cur:
cur.execute("SELECT * FROM import_status LIMIT 1")
if cur.rowcount < 1:
return None, None, None
row = cast(StatusRow, cur.fetchone())
return row['lastimportdate'], row['sequence_id'], row['indexed']
def set_indexed(conn: Connection, state: bool) -> None:
""" Set the indexed flag in the status table to the given state.
"""
with conn.cursor() as cur:
cur.execute("UPDATE import_status SET indexed = %s", (state, ))
conn.commit()
def log_status(conn: Connection, start: dt.datetime,
event: str, batchsize: Optional[int] = None) -> None:
""" Write a new status line to the `import_osmosis_log` table.
"""
with conn.cursor() as cur:
cur.execute("""INSERT INTO import_osmosis_log
(batchend, batchseq, batchsize, starttime, endtime, event)
SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
(batchsize, start, event))
conn.commit()

View File

@@ -0,0 +1,129 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Helper functions for handling DB accesses.
"""
from typing import IO, Optional, Union, Any, Iterable
import subprocess
import logging
import gzip
import io
from pathlib import Path
from .connection import get_pg_env, Cursor
from ..errors import UsageError
LOG = logging.getLogger()
def _pipe_to_proc(proc: 'subprocess.Popen[bytes]',
fdesc: Union[IO[bytes], gzip.GzipFile]) -> int:
assert proc.stdin is not None
chunk = fdesc.read(2048)
while chunk and proc.poll() is None:
try:
proc.stdin.write(chunk)
except BrokenPipeError as exc:
raise UsageError("Failed to execute SQL file.") from exc
chunk = fdesc.read(2048)
return len(chunk)
def execute_file(dsn: str, fname: Path,
ignore_errors: bool = False,
pre_code: Optional[str] = None,
post_code: Optional[str] = None) -> None:
""" Read an SQL file and run its contents against the given database
using psql. Use `pre_code` and `post_code` to run extra commands
before or after executing the file. The commands are run within the
same session, so they may be used to wrap the file execution in a
transaction.
"""
cmd = ['psql']
if not ignore_errors:
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
if not LOG.isEnabledFor(logging.INFO):
cmd.append('--quiet')
with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
assert proc.stdin is not None
try:
if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
if pre_code:
proc.stdin.write((pre_code + ';').encode('utf-8'))
if fname.suffix == '.gz':
with gzip.open(str(fname), 'rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
else:
with fname.open('rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
if remain == 0 and post_code:
proc.stdin.write((';' + post_code).encode('utf-8'))
finally:
proc.stdin.close()
ret = proc.wait()
if ret != 0 or remain > 0:
raise UsageError("Failed to execute SQL file.")
# List of characters that need to be quoted for the copy command.
_SQL_TRANSLATION = {ord('\\'): '\\\\',
ord('\t'): '\\t',
ord('\n'): '\\n'}
class CopyBuffer:
""" Data collector for the copy_from command.
"""
def __init__(self) -> None:
self.buffer = io.StringIO()
def __enter__(self) -> 'CopyBuffer':
return self
def size(self) -> int:
""" Return the number of bytes the buffer currently contains.
"""
return self.buffer.tell()
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
if self.buffer is not None:
self.buffer.close()
def add(self, *data: Any) -> None:
""" Add another row of data to the copy buffer.
"""
first = True
for column in data:
if first:
first = False
else:
self.buffer.write('\t')
if column is None:
self.buffer.write('\\N')
else:
self.buffer.write(str(column).translate(_SQL_TRANSLATION))
self.buffer.write('\n')
def copy_out(self, cur: Cursor, table: str, columns: Optional[Iterable[str]] = None) -> None:
""" Copy all collected data into the given table.
The buffer is empty and reusable after this operation.
"""
if self.buffer.tell() > 0:
self.buffer.seek(0)
cur.copy_from(self.buffer, table, columns=columns)
self.buffer = io.StringIO()

View File

@@ -0,0 +1,14 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom exception and error classes for Nominatim.
"""
class UsageError(Exception):
""" An error raised because of bad user input. This error will usually
not cause a stack trace to be printed unless debugging is enabled.
"""

View File

@@ -13,9 +13,9 @@ import time
import psycopg2.extras
from nominatim_core.typing import DictCursorResults
from nominatim_core.db.async_connection import DBConnection, WorkerPool
from nominatim_core.db.connection import connect, Connection, Cursor
from ..typing import DictCursorResults
from ..db.async_connection import DBConnection, WorkerPool
from ..db.connection import connect, Connection, Cursor
from ..tokenizer.base import AbstractTokenizer
from .progress import ProgressLogger
from . import runners

View File

@@ -14,8 +14,8 @@ import functools
from psycopg2 import sql as pysql
import psycopg2.extras
from nominatim_core.typing import Query, DictCursorResult, DictCursorResults, Protocol
from nominatim_core.db.async_connection import DBConnection
from ..typing import Query, DictCursorResult, DictCursorResults, Protocol
from ..db.async_connection import DBConnection
from ..data.place_info import PlaceInfo
from ..tokenizer.base import AbstractAnalyzer

15
src/nominatim_db/paths.py Normal file
View File

@@ -0,0 +1,15 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim.
"""
from pathlib import Path
PHPLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-php').resolve()
SQLLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve()
DATA_DIR = (Path(__file__) / '..' / '..' / '..' / 'data').resolve()
CONFIG_DIR = (Path(__file__) / '..' / '..' / '..' / 'settings').resolve()

View File

@@ -12,9 +12,9 @@ from abc import ABC, abstractmethod
from typing import List, Tuple, Dict, Any, Optional, Iterable
from pathlib import Path
from nominatim_core.typing import Protocol
from nominatim_core.config import Configuration
from nominatim_core.db.connection import Connection
from ..typing import Protocol
from ..config import Configuration
from ..db.connection import Connection
from ..data.place_info import PlaceInfo
class AbstractAnalyzer(ABC):

View File

@@ -24,10 +24,10 @@ import logging
import importlib
from pathlib import Path
from nominatim_core.errors import UsageError
from nominatim_core.db import properties
from nominatim_core.db.connection import connect
from nominatim_core.config import Configuration
from ..errors import UsageError
from ..db import properties
from ..db.connection import connect
from ..config import Configuration
from ..tokenizer.base import AbstractTokenizer, TokenizerModule
LOG = logging.getLogger()

View File

@@ -14,10 +14,10 @@ import logging
from icu import Transliterator
from nominatim_core.config import flatten_config_list, Configuration
from nominatim_core.db.properties import set_property, get_property
from nominatim_core.db.connection import Connection
from nominatim_core.errors import UsageError
from ..config import flatten_config_list, Configuration
from ..db.properties import set_property, get_property
from ..db.connection import Connection
from ..errors import UsageError
from .place_sanitizer import PlaceSanitizer
from .icu_token_analysis import ICUTokenAnalysis
from .token_analysis.base import AnalysisModule, Analyzer

View File

@@ -16,10 +16,10 @@ import logging
from pathlib import Path
from textwrap import dedent
from nominatim_core.db.connection import connect, Connection, Cursor
from nominatim_core.config import Configuration
from nominatim_core.db.utils import CopyBuffer
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
from ..db.connection import connect, Connection, Cursor
from ..config import Configuration
from ..db.utils import CopyBuffer
from ..db.sql_preprocessor import SQLPreprocessor
from ..data.place_info import PlaceInfo
from ..data.place_name import PlaceName
from .icu_rule_loader import ICURuleLoader

View File

@@ -20,12 +20,12 @@ from icu import Transliterator
import psycopg2
import psycopg2.extras
from nominatim_core.errors import UsageError
from nominatim_core.db.connection import connect, Connection
from nominatim_core.config import Configuration
from nominatim_core.db import properties
from nominatim_core.db import utils as db_utils
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
from ..errors import UsageError
from ..db.connection import connect, Connection
from ..config import Configuration
from ..db import properties
from ..db import utils as db_utils
from ..db.sql_preprocessor import SQLPreprocessor
from ..data.place_info import PlaceInfo
from .base import AbstractAnalyzer, AbstractTokenizer

View File

@@ -10,8 +10,8 @@ is handed to the token analysis.
"""
from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from ..errors import UsageError
from ..config import Configuration
from .sanitizers.config import SanitizerConfig
from .sanitizers.base import SanitizerHandler, ProcessInfo
from ..data.place_name import PlaceName

View File

@@ -9,7 +9,7 @@ Common data types and protocols for sanitizers.
"""
from typing import Optional, List, Mapping, Callable
from nominatim_core.typing import Protocol, Final
from ...typing import Protocol, Final
from ...data.place_info import PlaceInfo
from ...data.place_name import PlaceName
from .config import SanitizerConfig

View File

@@ -11,7 +11,7 @@ from typing import Sequence, Union, Optional, Pattern, Callable, Any, TYPE_CHECK
from collections import UserDict
import re
from nominatim_core.errors import UsageError
from ...errors import UsageError
# working around missing generics in Python < 3.8
# See https://github.com/python/typing/issues/60#issuecomment-869757075

View File

@@ -9,7 +9,7 @@ Common data types and protocols for analysers.
"""
from typing import Mapping, List, Any
from nominatim_core.typing import Protocol
from ...typing import Protocol
from ...data.place_name import PlaceName
class Analyzer(Protocol):

View File

@@ -12,8 +12,8 @@ from collections import defaultdict
import itertools
import re
from nominatim_core.config import flatten_config_list
from nominatim_core.errors import UsageError
from ...config import flatten_config_list
from ...errors import UsageError
class ICUVariant(NamedTuple):
""" A single replacement rule for variant creation.

View File

@@ -12,7 +12,7 @@ import itertools
import datrie
from nominatim_core.errors import UsageError
from ...errors import UsageError
from ...data.place_name import PlaceName
from .config_variants import get_variant_config
from .generic_mutation import MutationVariantGenerator

View File

@@ -12,7 +12,7 @@ import itertools
import logging
import re
from nominatim_core.errors import UsageError
from ...errors import UsageError
LOG = logging.getLogger()

View File

@@ -12,8 +12,8 @@ from pathlib import Path
import logging
import urllib
from nominatim_core.db.connection import connect
from nominatim_core.utils.url_utils import get_url
from ..db.connection import connect
from ..utils.url_utils import get_url
from .exec_utils import run_osm2pgsql
LOG = logging.getLogger()

View File

@@ -13,10 +13,10 @@ import logging
from psycopg2.extras import Json, register_hstore
from psycopg2 import DataError
from nominatim_core.typing import DictCursorResult
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect, Cursor
from nominatim_core.errors import UsageError
from ..typing import DictCursorResult
from ..config import Configuration
from ..db.connection import connect, Cursor
from ..errors import UsageError
from ..tokenizer import factory as tokenizer_factory
from ..data.place_info import PlaceInfo

View File

@@ -11,10 +11,10 @@ from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
from enum import Enum
from textwrap import dedent
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect, Connection
from nominatim_core.db import properties
from nominatim_core.errors import UsageError
from ..config import Configuration
from ..db.connection import connect, Connection
from ..db import properties
from ..errors import UsageError
from ..tokenizer import factory as tokenizer_factory
from . import freeze
from ..version import NOMINATIM_VERSION, parse_version

View File

@@ -17,8 +17,8 @@ from typing import List, Optional, Tuple, Union
import psutil
from psycopg2.extensions import make_dsn, parse_dsn
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect
from ..config import Configuration
from ..db.connection import connect
from ..version import NOMINATIM_VERSION

View File

@@ -16,8 +16,8 @@ import sqlalchemy as sa
import nominatim_api as napi
from nominatim_api.search.query_analyzer_factory import make_query_analyzer
from nominatim_core.typing import SaSelect, SaRow
from nominatim_core.db.sqlalchemy_types import Geometry, IntArray
from nominatim_api.typing import SaSelect, SaRow
from nominatim_api.sql.sqlalchemy_types import Geometry, IntArray
LOG = logging.getLogger()

View File

@@ -17,11 +17,11 @@ from pathlib import Path
import psutil
from psycopg2 import sql as pysql
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect, get_pg_env, Connection
from nominatim_core.db.async_connection import DBConnection
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
from ..errors import UsageError
from ..config import Configuration
from ..db.connection import connect, get_pg_env, Connection
from ..db.async_connection import DBConnection
from ..db.sql_preprocessor import SQLPreprocessor
from .exec_utils import run_osm2pgsql
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION

View File

@@ -13,8 +13,8 @@ import os
import subprocess
import shutil
from nominatim_core.typing import StrPath
from nominatim_core.db.connection import get_pg_env
from ..typing import StrPath
from ..db.connection import get_pg_env
LOG = logging.getLogger()

View File

@@ -12,7 +12,7 @@ from pathlib import Path
from psycopg2 import sql as pysql
from nominatim_core.db.connection import Connection
from ..db.connection import Connection
UPDATE_TABLES = [
'address_levels',

View File

@@ -12,10 +12,10 @@ import logging
from psycopg2 import sql as pysql
from nominatim_core.errors import UsageError
from nominatim_core.config import Configuration
from nominatim_core.db import properties
from nominatim_core.db.connection import connect, Connection
from ..errors import UsageError
from ..config import Configuration
from ..db import properties
from ..db.connection import connect, Connection
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
from ..tokenizer import factory as tokenizer_factory
from . import refresh

View File

@@ -18,8 +18,8 @@ from math import isfinite
from psycopg2 import sql as pysql
from nominatim_core.db.connection import connect, Connection
from nominatim_core.utils.centroid import PointsCentroid
from ..db.connection import connect, Connection
from ..utils.centroid import PointsCentroid
from ..data.postcode_format import PostcodeFormatter, CountryPostcodeMatcher
from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer

View File

@@ -16,10 +16,10 @@ from pathlib import Path
from psycopg2 import sql as pysql
from nominatim_core.config import Configuration
from nominatim_core.db.connection import Connection, connect
from nominatim_core.db.utils import execute_file, CopyBuffer
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
from ..config import Configuration
from ..db.connection import Connection, connect
from ..db.utils import execute_file, CopyBuffer
from ..db.sql_preprocessor import SQLPreprocessor
from ..version import NOMINATIM_VERSION
LOG = logging.getLogger()

View File

@@ -18,9 +18,9 @@ import urllib.request as urlrequest
import requests
from nominatim_core.errors import UsageError
from nominatim_core.db import status
from nominatim_core.db.connection import Connection, connect
from ..errors import UsageError
from ..db import status
from ..db.connection import Connection, connect
from .exec_utils import run_osm2pgsql
try:

View File

@@ -13,7 +13,7 @@ from typing import Iterable
import csv
import os
from nominatim_core.errors import UsageError
from ...errors import UsageError
from .special_phrase import SpecialPhrase
class SPCsvLoader:

View File

@@ -19,9 +19,9 @@ import re
from psycopg2.sql import Identifier, SQL
from nominatim_core.typing import Protocol
from nominatim_core.config import Configuration
from nominatim_core.db.connection import Connection
from ...typing import Protocol
from ...config import Configuration
from ...db.connection import Connection
from .importer_statistics import SpecialPhrasesImporterStatistics
from .special_phrase import SpecialPhrase
from ...tokenizer.base import AbstractTokenizer

View File

@@ -11,8 +11,8 @@ from typing import Iterable
import re
import logging
from nominatim_core.config import Configuration
from nominatim_core.utils.url_utils import get_url
from ...config import Configuration
from ...utils.url_utils import get_url
from .special_phrase import SpecialPhrase
LOG = logging.getLogger()

View File

@@ -16,11 +16,11 @@ import tarfile
from psycopg2.extras import Json
from nominatim_core.config import Configuration
from nominatim_core.db.connection import connect
from nominatim_core.db.async_connection import WorkerPool
from nominatim_core.db.sql_preprocessor import SQLPreprocessor
from nominatim_core.errors import UsageError
from ..config import Configuration
from ..db.connection import connect
from ..db.async_connection import WorkerPool
from ..db.sql_preprocessor import SQLPreprocessor
from ..errors import UsageError
from ..data.place_info import PlaceInfo
from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
from . import freeze

View File

@@ -0,0 +1,52 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Type definitions for typing annotations.
Complex type definitions are moved here, to keep the source files readable.
"""
from typing import Any, Union, Mapping, TypeVar, Sequence, TYPE_CHECKING
# Generics variable names do not confirm to naming styles, ignore globally here.
# pylint: disable=invalid-name,abstract-method,multiple-statements
# pylint: disable=missing-class-docstring,useless-import-alias
if TYPE_CHECKING:
import psycopg2.sql
import psycopg2.extensions
import psycopg2.extras
import os
StrPath = Union[str, 'os.PathLike[str]']
SysEnv = Mapping[str, str]
# psycopg2-related types
Query = Union[str, bytes, 'psycopg2.sql.Composable']
T_ResultKey = TypeVar('T_ResultKey', int, str)
class DictCursorResult(Mapping[str, Any]):
def __getitem__(self, x: Union[int, str]) -> Any: ...
DictCursorResults = Sequence[DictCursorResult]
T_cursor = TypeVar('T_cursor', bound='psycopg2.extensions.cursor')
# The following typing features require typing_extensions to work
# on all supported Python versions.
# Only require this for type checking but not for normal operations.
if TYPE_CHECKING:
from typing_extensions import (Protocol as Protocol,
Final as Final,
TypedDict as TypedDict)
else:
Protocol = object
Final = 'Final'
TypedDict = dict

View File

View File

@@ -0,0 +1,49 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Functions for computation of centroids.
"""
from typing import Tuple, Any
from collections.abc import Collection
class PointsCentroid:
""" Centroid computation from single points using an online algorithm.
More points may be added at any time.
Coordinates are internally treated as a 7-digit fixed-point float
(i.e. in OSM style).
"""
def __init__(self) -> None:
self.sum_x = 0
self.sum_y = 0
self.count = 0
def centroid(self) -> Tuple[float, float]:
""" Return the centroid of all points collected so far.
"""
if self.count == 0:
raise ValueError("No points available for centroid.")
return (float(self.sum_x/self.count)/10000000,
float(self.sum_y/self.count)/10000000)
def __len__(self) -> int:
return self.count
def __iadd__(self, other: Any) -> 'PointsCentroid':
if isinstance(other, Collection) and len(other) == 2:
if all(isinstance(p, (float, int)) for p in other):
x, y = other
self.sum_x += int(x * 10000000)
self.sum_y += int(y * 10000000)
self.count += 1
return self
raise ValueError("Can only add 2-element tuples to centroid.")

View File

@@ -0,0 +1,31 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Helper functions for accessing URL.
"""
from typing import IO
import logging
import urllib.request as urlrequest
from ..version import NOMINATIM_VERSION
LOG = logging.getLogger()
def get_url(url: str) -> str:
""" Get the contents from the given URL and return it as a UTF-8 string.
This version makes sure that an appropriate user agent is sent.
"""
headers = {"User-Agent": f"Nominatim/{NOMINATIM_VERSION!s}"}
try:
request = urlrequest.Request(url, headers=headers)
with urlrequest.urlopen(request) as response: # type: IO[bytes]
return response.read().decode('utf-8')
except Exception:
LOG.fatal('Failed to load URL: %s', url)
raise

View File

@@ -7,13 +7,55 @@
"""
Version information for Nominatim.
"""
from typing import Optional
from typing import NamedTuple, Optional
# See also https://github.com/PyCQA/pylint/issues/6006
# pylint: disable=useless-import-alias,unused-import
from nominatim_core.version import (NominatimVersion as NominatimVersion,
parse_version as parse_version)
class NominatimVersion(NamedTuple):
""" Version information for Nominatim. We follow semantic versioning.
Major, minor and patch_level refer to the last released version.
The database patch level tracks important changes between releases
and must always be increased when there is a change to the database or code
that requires a migration.
When adding a migration on the development branch, raise the patch level
to 99 to make sure that the migration is applied when updating from a
patch release to the next minor version. Patch releases usually shouldn't
have migrations in them. When they are needed, then make sure that the
migration can be reapplied and set the migration version to the appropriate
patch level when cherry-picking the commit with the migration.
"""
major: int
minor: int
patch_level: int
db_patch_level: Optional[int]
def __str__(self) -> str:
if self.db_patch_level is None:
return f"{self.major}.{self.minor}.{self.patch_level}"
return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}"
def release_version(self) -> str:
""" Return the release version in semantic versioning format.
The release version does not include the database patch version.
"""
return f"{self.major}.{self.minor}.{self.patch_level}"
def parse_version(version: str) -> NominatimVersion:
""" Parse a version string into a version consisting of a tuple of
four ints: major, minor, patch level, database patch level
This is the reverse operation of `version_str()`.
"""
parts = version.split('.')
return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
NOMINATIM_VERSION = parse_version('4.4.99-1')