move setup function to python

There are still back-calls to PHP for some of the sub-steps.
These needs some larger refactoring to be moved to Python.
This commit is contained in:
Sarah Hoffmann
2021-02-26 15:02:39 +01:00
parent 3ee8d9fa75
commit 15b5906790
10 changed files with 342 additions and 102 deletions

View File

@@ -111,72 +111,6 @@ class CommandlineParser:
# pylint: disable=E0012,C0415
class SetupAll:
"""\
Create a new Nominatim database from an OSM file.
"""
@staticmethod
def add_args(parser):
group_name = parser.add_argument_group('Required arguments')
group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file',
help='OSM file to be imported.')
group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
group = parser.add_argument_group('Optional arguments')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--reverse-only', action='store_true',
help='Do not create tables and indexes for searching')
group.add_argument('--enable-debug-statements', action='store_true',
help='Include debug warning statements in SQL code')
group.add_argument('--no-partitions', action='store_true',
help="""Do not partition search indices
(speeds up import of single country extracts)""")
group.add_argument('--no-updates', action='store_true',
help="""Do not keep tables that are only needed for
updating the database later""")
group = parser.add_argument_group('Expert options')
group.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
group.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index')
@staticmethod
def run(args):
params = ['setup.php']
if args.osm_file:
params.extend(('--all', '--osm-file', args.osm_file))
else:
if args.continue_at == 'load-data':
params.append('--load-data')
if args.continue_at in ('load-data', 'indexing'):
params.append('--index')
params.extend(('--create-search-indices', '--create-country-names',
'--setup-website'))
if args.osm2pgsql_cache:
params.extend(('--osm2pgsql-cache', args.osm2pgsql_cache))
if args.reverse_only:
params.append('--reverse-only')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
if args.no_partitions:
params.append('--no-partitions')
if args.no_updates:
params.append('--drop')
if args.ignore_errors:
params.append('--ignore-errors')
if args.index_noanalyse:
params.append('--index-noanalyse')
if args.threads:
params.extend(('--threads', args.threads))
return run_legacy_script(*params, nominatim_env=args)
class SetupSpecialPhrases:
"""\
Maintain special phrases.
@@ -334,7 +268,7 @@ def nominatim(**kwargs):
"""
parser = CommandlineParser('nominatim', nominatim.__doc__)
parser.add_subcommand('import', SetupAll)
parser.add_subcommand('import', clicmd.SetupAll)
parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication)

View File

@@ -2,6 +2,7 @@
Subcommand definitions for the command-line tool.
"""
from .setup import SetupAll
from .replication import UpdateReplication
from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
from .index import UpdateIndex

140
nominatim/clicmd/setup.py Normal file
View File

@@ -0,0 +1,140 @@
"""
Implementation of the 'import' subcommand.
"""
import logging
from pathlib import Path
import psutil
from ..tools.exec_utils import run_legacy_script
from ..db.connection import connect
from ..db import status
from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class SetupAll:
"""\
Create a new Nominatim database from an OSM file.
"""
@staticmethod
def add_args(parser):
group_name = parser.add_argument_group('Required arguments')
group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file', metavar='FILE',
help='OSM file to be imported.')
group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
group = parser.add_argument_group('Optional arguments')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--reverse-only', action='store_true',
help='Do not create tables and indexes for searching')
group.add_argument('--no-partitions', action='store_true',
help="""Do not partition search indices
(speeds up import of single country extracts)""")
group.add_argument('--no-updates', action='store_true',
help="""Do not keep tables that are only needed for
updating the database later""")
group = parser.add_argument_group('Expert options')
group.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
group.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index')
@staticmethod
def run(args): # pylint: disable=too-many-statements
from ..tools import database_import
from ..tools import refresh
from ..indexer.indexer import Indexer
if args.osm_file and not Path(args.osm_file).is_file():
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
raise UsageError('Cannot access file.')
if args.continue_at is None:
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
args.data_dir,
args.no_partitions,
rouser=args.config.DATABASE_WEBUSER)
LOG.warning('Installing database module')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.install_module(args.module_dir, args.project_dir,
args.config.DATABASE_MODULE_PATH,
conn=conn)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.no_updates)
LOG.warning('Create functions (1st pass)')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config, args.sqllib_dir,
False, False)
LOG.warning('Create tables')
params = ['setup.php', '--create-tables', '--create-partition-tables']
if args.reverse_only:
params.append('--reverse-only')
run_legacy_script(*params, nominatim_env=args)
LOG.warning('Create functions (2nd pass)')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config, args.sqllib_dir,
False, False)
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Will be using default importances.')
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
args.data_dir,
args.threads or psutil.cpu_count() or 1)
LOG.warning('Calculate postcodes')
run_legacy_script('setup.php', '--calculate-postcodes', nominatim_env=args)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or psutil.cpu_count() or 1)
indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
params = ['setup.php', '--create-search-indices', '--create-country-names']
if args.no_updates:
params.append('--drop')
run_legacy_script(*params, nominatim_env=args)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
refresh.setup_website(webdir, args.phplib_dir, args.config)
with connect(args.config.get_libpq_dsn()) as conn:
try:
dbdate = status.compute_database_date(conn)
status.set_status(conn, dbdate)
LOG.info('Database is at %s.', dbdate)
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)
return 0

View File

@@ -59,12 +59,12 @@ class AdminTransition:
if args.setup_db:
LOG.warning('Setup DB')
mpath = database_import.install_module(args.module_dir, args.project_dir,
args.config.DATABASE_MODULE_PATH)
with connect(args.config.get_libpq_dsn()) as conn:
database_import.setup_extensions(conn)
database_import.check_module_dir_path(conn, mpath)
database_import.install_module(args.module_dir, args.project_dir,
args.config.DATABASE_MODULE_PATH,
conn=conn)
database_import.import_base_data(args.config.get_libpq_dsn(),
args.data_dir, args.no_partitions)
@@ -88,7 +88,7 @@ class AdminTransition:
with connect(args.config.get_libpq_dsn()) as conn:
try:
status.set_status(conn, status.compute_database_date(conn))
except Exception as exc: # pylint: disable=bare-except
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)
if args.index:

View File

@@ -119,6 +119,13 @@ class PostcodeRunner:
WHERE place_id IN ({})
""".format(','.join((str(i) for i in ids)))
def _analyse_db_if(conn, condition):
if condition:
with conn.cursor() as cur:
cur.execute('ANALYSE')
class Indexer:
""" Main indexing routine.
"""
@@ -142,7 +149,7 @@ class Indexer:
for thread in self.threads:
thread.close()
threads = []
self.threads = []
def index_full(self, analyse=True):
@@ -155,26 +162,22 @@ class Indexer:
try:
self.index_by_rank(0, 4)
self._analyse_db_if(conn, analyse)
_analyse_db_if(conn, analyse)
self.index_boundaries(0, 30)
self._analyse_db_if(conn, analyse)
_analyse_db_if(conn, analyse)
self.index_by_rank(5, 25)
self._analyse_db_if(conn, analyse)
_analyse_db_if(conn, analyse)
self.index_by_rank(26, 30)
self._analyse_db_if(conn, analyse)
_analyse_db_if(conn, analyse)
self.index_postcodes()
self._analyse_db_if(conn, analyse)
_analyse_db_if(conn, analyse)
finally:
conn.close()
def _analyse_db_if(self, conn, condition):
if condition:
with conn.cursor() as cur:
cur.execute('ANALYSE')
def index_boundaries(self, minrank, maxrank):
""" Index only administrative boundaries within the given rank range.

View File

@@ -9,6 +9,7 @@ import shutil
from pathlib import Path
import psutil
import psycopg2
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
@@ -19,6 +20,21 @@ from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None):
""" Create a new database for Nominatim and populate it with the
essential extensions and data.
"""
LOG.warning('Creating database')
create_db(dsn, rouser)
LOG.warning('Setting up database')
with connect(dsn) as conn:
setup_extensions(conn)
LOG.warning('Loading basic data')
import_base_data(dsn, data_dir, no_partitions)
def create_db(dsn, rouser=None):
""" Create a new database for the given DSN. Fails when the database
already exists or the PostgreSQL version is too old.
@@ -72,7 +88,7 @@ def setup_extensions(conn):
raise UsageError('PostGIS version is too old.')
def install_module(src_dir, project_dir, module_dir):
def install_module(src_dir, project_dir, module_dir, conn=None):
""" Copy the normalization module from src_dir into the project
directory under the '/module' directory. If 'module_dir' is set, then
use the module from there instead and check that it is accessible
@@ -80,6 +96,9 @@ def install_module(src_dir, project_dir, module_dir):
The function detects when the installation is run from the
build directory. It doesn't touch the module in that case.
If 'conn' is given, then the function also tests if the module
can be access via the given database.
"""
if not module_dir:
module_dir = project_dir / 'module'
@@ -99,19 +118,17 @@ def install_module(src_dir, project_dir, module_dir):
else:
LOG.info("Using custom path for database module at '%s'", module_dir)
return module_dir
def check_module_dir_path(conn, path):
""" Check that the normalisation module can be found and executed
from the given path.
"""
with conn.cursor() as cur:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS '{}/nominatim.so', 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
""".format(path))
if conn is not None:
with conn.cursor() as cur:
try:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS '{}/nominatim.so', 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
""".format(module_dir))
except psycopg2.DatabaseError as err:
LOG.fatal("Error accessing database module: %s", err)
raise UsageError("Database module cannot be accessed.") from err
def import_base_data(dsn, sql_dir, ignore_partitions=False):
@@ -174,7 +191,7 @@ def truncate_data_tables(conn, max_word_frequency=None):
cur.execute('TRUNCATE location_property_osmline')
cur.execute('TRUNCATE location_postcode')
cur.execute('TRUNCATE search_name')
cur.execute('DROP SEQUENCE seq_place')
cur.execute('DROP SEQUENCE IF EXISTS seq_place')
cur.execute('CREATE SEQUENCE seq_place start 100000')
cur.execute("""SELECT tablename FROM pg_tables