port database setup function to python

Hide the former PHP functions in a transition command until
they are removed.
This commit is contained in:
Sarah Hoffmann
2021-02-23 22:50:23 +01:00
parent b93ec2522e
commit f6e894a53a
18 changed files with 357 additions and 116 deletions

View File

@@ -354,4 +354,6 @@ def nominatim(**kwargs):
else:
parser.parser.epilog = 'php-cgi not found. Query commands not available.'
parser.add_subcommand('transition', clicmd.AdminTransition)
return parser.run(**kwargs)

View File

@@ -8,3 +8,4 @@ from .index import UpdateIndex
from .refresh import UpdateRefresh
from .admin import AdminFuncs
from .freeze import SetupFreeze
from .transition import AdminTransition

View File

@@ -50,13 +50,11 @@ class UpdateRefresh:
if args.postcodes:
LOG.warning("Update postcodes centroid")
with connect(args.config.get_libpq_dsn()) as conn:
refresh.update_postcodes(conn, args.sqllib_dir)
refresh.update_postcodes(args.config.get_libpq_dsn(), args.sqllib_dir)
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.recompute_word_counts(conn, args.sqllib_dir)
refresh.recompute_word_counts(args.config.get_libpq_dsn(), args.sqllib_dir)
if args.address_levels:
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)

View File

@@ -0,0 +1,53 @@
"""
Implementation of the 'transition' subcommand.
This subcommand provides standins for functions that were available
through the PHP scripts but are now no longer directly accessible.
This module will be removed as soon as the transition phase is over.
"""
import logging
from ..db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class AdminTransition:
"""\
Internal functions for code transition. Do not use.
"""
@staticmethod
def add_args(parser):
group = parser.add_argument_group('Sub-functions')
group.add_argument('--create-db', action='store_true',
help='Create nominatim db')
group.add_argument('--setup-db', action='store_true',
help='Build a blank nominatim db')
group = parser.add_argument_group('Options')
group.add_argument('--no-partitions', action='store_true',
help='Do not partition search indices')
@staticmethod
def run(args):
from ..tools import database_import
if args.create_db:
LOG.warning('Create DB')
database_import.create_db(args.config.get_libpq_dsn())
if args.setup_db:
LOG.warning('Setup DB')
mpath = database_import.install_module(args.module_dir, args.project_dir,
args.config.DATABASE_MODULE_PATH)
with connect(args.config.get_libpq_dsn()) as conn:
database_import.setup_extensions(conn)
database_import.check_module_dir_path(conn, mpath)
database_import.import_base_data(args.config.get_libpq_dsn(),
args.data_dir, args.no_partitions)

View File

@@ -81,9 +81,21 @@ class _Connection(psycopg2.extensions.connection):
"""
version = self.server_version
if version < 100000:
return (version / 10000, (version % 10000) / 100)
return (int(version / 10000), (version % 10000) / 100)
return (int(version / 10000), version % 10000)
def postgis_version_tuple(self):
""" Return the postgis version installed in the database as a
tuple of (major, minor). Assumes that the PostGIS extension
has been installed already.
"""
with self.cursor() as cur:
version = cur.scalar('SELECT postgis_lib_version()')
return tuple((int(x) for x in version.split('.')[:2]))
return (version / 10000, version % 10000)
def connect(dsn):
""" Open a connection to the database using the specialised connection
@@ -123,7 +135,7 @@ _PG_CONNECTION_STRINGS = {
'sslcrl': 'PGSSLCRL',
'requirepeer': 'PGREQUIREPEER',
'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION',
'ssl_min_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
'gssencmode': 'PGGSSENCMODE',
'krbsrvname': 'PGKRBSRVNAME',
'gsslib': 'PGGSSLIB',
@@ -138,7 +150,7 @@ def get_pg_env(dsn, base_env=None):
If `base_env` is None, then the OS environment is used as a base
environment.
"""
env = base_env if base_env is not None else os.environ
env = dict(base_env if base_env is not None else os.environ)
for param, value in psycopg2.extensions.parse_dsn(dsn).items():
if param in _PG_CONNECTION_STRINGS:

View File

@@ -3,12 +3,24 @@ Helper functions for handling DB accesses.
"""
import subprocess
import logging
import gzip
from .connection import get_pg_env
from ..errors import UsageError
LOG = logging.getLogger()
def _pipe_to_proc(proc, fdesc):
chunk = fdesc.read(2048)
while chunk and proc.poll() is None:
try:
proc.stdin.write(chunk)
except BrokenPipeError as exc:
raise UsageError("Failed to execute SQL file.") from exc
chunk = fdesc.read(2048)
return len(chunk)
def execute_file(dsn, fname, ignore_errors=False):
""" Read an SQL file and run its contents against the given database
using psql.
@@ -21,15 +33,15 @@ def execute_file(dsn, fname, ignore_errors=False):
if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
with fname.open('rb') as fdesc:
chunk = fdesc.read(2048)
while chunk and proc.poll() is None:
proc.stdin.write(chunk)
chunk = fdesc.read(2048)
if fname.suffix == '.gz':
with gzip.open(str(fname), 'rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
else:
with fname.open('rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
proc.stdin.close()
ret = proc.wait()
print(ret, chunk)
if ret != 0 or chunk:
if ret != 0 or remain > 0:
raise UsageError("Failed to execute SQL file.")

View File

@@ -0,0 +1,121 @@
"""
Functions for setting up and importing a new Nominatim database.
"""
import logging
import subprocess
import shutil
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
from ..errors import UsageError
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
def create_db(dsn, rouser=None):
""" Create a new database for the given DSN. Fails when the database
already exists or the PostgreSQL version is too old.
Uses `createdb` to create the database.
If 'rouser' is given, then the function also checks that the user
with that given name exists.
Requires superuser rights by the caller.
"""
proc = subprocess.run(['createdb'], env=get_pg_env(dsn), check=False)
if proc.returncode != 0:
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
postgres_version = conn.server_version_tuple() # pylint: disable=E1101
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1],
postgres_version[0], postgres_version[1])
raise UsageError('PostgreSQL server is too old.')
if rouser is not None:
with conn.cursor() as cur: # pylint: disable=E1101
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
LOG.fatal("Web user '%s' does not exists. Create it with:\n"
"\n createuser %s", rouser, rouser)
raise UsageError('Missing read-only user.')
def setup_extensions(conn):
""" Set up all extensions needed for Nominatim. Also checks that the
versions of the extensions are sufficient.
"""
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
conn.commit()
postgis_version = conn.postgis_version_tuple()
if postgis_version < POSTGIS_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of PostGIS is %d.%d. '
'Found version %d.%d.',
POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1],
postgis_version[0], postgis_version[1])
raise UsageError('PostGIS version is too old.')
def install_module(src_dir, project_dir, module_dir):
""" Copy the normalization module from src_dir into the project
directory under the '/module' directory. If 'module_dir' is set, then
use the module from there instead and check that it is accessible
for Postgresql.
The function detects when the installation is run from the
build directory. It doesn't touch the module in that case.
"""
if not module_dir:
module_dir = project_dir / 'module'
if not module_dir.exists() or not src_dir.samefile(module_dir):
if not module_dir.exists():
module_dir.mkdir()
destfile = module_dir / 'nominatim.so'
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
destfile.chmod(0o755)
LOG.info('Database module installed at %s', str(destfile))
else:
LOG.info('Running from build directory. Leaving database module as is.')
else:
LOG.info("Using custom path for database module at '%s'", module_dir)
return module_dir
def check_module_dir_path(conn, path):
""" Check that the normalisation module can be found and executed
from the given path.
"""
with conn.cursor() as cur:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS '{}/nominatim.so', 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
""".format(path))
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
with conn.cursor() as cur: # pylint: disable=E1101
cur.execute('UPDATE country_name SET partition = 0')
conn.commit() # pylint: disable=E1101

View File

@@ -2,13 +2,10 @@
Helper functions for executing external programs.
"""
import logging
import os
import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
from psycopg2.extensions import parse_dsn
from ..version import NOMINATIM_VERSION
from ..db.connection import get_pg_env

View File

@@ -3,3 +3,6 @@ Version information for Nominatim.
"""
NOMINATIM_VERSION = "3.6.0"
POSTGRESQL_REQUIRED_VERSION = (9, 3)
POSTGIS_REQUIRED_VERSION = (2, 2)