mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-12 05:44:06 +00:00
move setup function to python
There are still back-calls to PHP for some of the sub-steps. These needs some larger refactoring to be moved to Python.
This commit is contained in:
@@ -111,72 +111,6 @@ class CommandlineParser:
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
|
||||
class SetupAll:
|
||||
"""\
|
||||
Create a new Nominatim database from an OSM file.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Required arguments')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--osm-file',
|
||||
help='OSM file to be imported.')
|
||||
group.add_argument('--continue', dest='continue_at',
|
||||
choices=['load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted')
|
||||
group = parser.add_argument_group('Optional arguments')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--reverse-only', action='store_true',
|
||||
help='Do not create tables and indexes for searching')
|
||||
group.add_argument('--enable-debug-statements', action='store_true',
|
||||
help='Include debug warning statements in SQL code')
|
||||
group.add_argument('--no-partitions', action='store_true',
|
||||
help="""Do not partition search indices
|
||||
(speeds up import of single country extracts)""")
|
||||
group.add_argument('--no-updates', action='store_true',
|
||||
help="""Do not keep tables that are only needed for
|
||||
updating the database later""")
|
||||
group = parser.add_argument_group('Expert options')
|
||||
group.add_argument('--ignore-errors', action='store_true',
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index')
|
||||
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
params = ['setup.php']
|
||||
if args.osm_file:
|
||||
params.extend(('--all', '--osm-file', args.osm_file))
|
||||
else:
|
||||
if args.continue_at == 'load-data':
|
||||
params.append('--load-data')
|
||||
if args.continue_at in ('load-data', 'indexing'):
|
||||
params.append('--index')
|
||||
params.extend(('--create-search-indices', '--create-country-names',
|
||||
'--setup-website'))
|
||||
if args.osm2pgsql_cache:
|
||||
params.extend(('--osm2pgsql-cache', args.osm2pgsql_cache))
|
||||
if args.reverse_only:
|
||||
params.append('--reverse-only')
|
||||
if args.enable_debug_statements:
|
||||
params.append('--enable-debug-statements')
|
||||
if args.no_partitions:
|
||||
params.append('--no-partitions')
|
||||
if args.no_updates:
|
||||
params.append('--drop')
|
||||
if args.ignore_errors:
|
||||
params.append('--ignore-errors')
|
||||
if args.index_noanalyse:
|
||||
params.append('--index-noanalyse')
|
||||
if args.threads:
|
||||
params.extend(('--threads', args.threads))
|
||||
|
||||
return run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
|
||||
class SetupSpecialPhrases:
|
||||
"""\
|
||||
Maintain special phrases.
|
||||
@@ -334,7 +268,7 @@ def nominatim(**kwargs):
|
||||
"""
|
||||
parser = CommandlineParser('nominatim', nominatim.__doc__)
|
||||
|
||||
parser.add_subcommand('import', SetupAll)
|
||||
parser.add_subcommand('import', clicmd.SetupAll)
|
||||
parser.add_subcommand('freeze', clicmd.SetupFreeze)
|
||||
parser.add_subcommand('replication', clicmd.UpdateReplication)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Subcommand definitions for the command-line tool.
|
||||
"""
|
||||
|
||||
from .setup import SetupAll
|
||||
from .replication import UpdateReplication
|
||||
from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
|
||||
from .index import UpdateIndex
|
||||
|
||||
140
nominatim/clicmd/setup.py
Normal file
140
nominatim/clicmd/setup.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""
|
||||
Implementation of the 'import' subcommand.
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from ..tools.exec_utils import run_legacy_script
|
||||
from ..db.connection import connect
|
||||
from ..db import status
|
||||
from ..errors import UsageError
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class SetupAll:
|
||||
"""\
|
||||
Create a new Nominatim database from an OSM file.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Required arguments')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--osm-file', metavar='FILE',
|
||||
help='OSM file to be imported.')
|
||||
group.add_argument('--continue', dest='continue_at',
|
||||
choices=['load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted')
|
||||
group = parser.add_argument_group('Optional arguments')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--reverse-only', action='store_true',
|
||||
help='Do not create tables and indexes for searching')
|
||||
group.add_argument('--no-partitions', action='store_true',
|
||||
help="""Do not partition search indices
|
||||
(speeds up import of single country extracts)""")
|
||||
group.add_argument('--no-updates', action='store_true',
|
||||
help="""Do not keep tables that are only needed for
|
||||
updating the database later""")
|
||||
group = parser.add_argument_group('Expert options')
|
||||
group.add_argument('--ignore-errors', action='store_true',
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index')
|
||||
|
||||
|
||||
@staticmethod
|
||||
def run(args): # pylint: disable=too-many-statements
|
||||
from ..tools import database_import
|
||||
from ..tools import refresh
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
if args.osm_file and not Path(args.osm_file).is_file():
|
||||
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
|
||||
raise UsageError('Cannot access file.')
|
||||
|
||||
if args.continue_at is None:
|
||||
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
|
||||
args.data_dir,
|
||||
args.no_partitions,
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
|
||||
LOG.warning('Installing database module')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.install_module(args.module_dir, args.project_dir,
|
||||
args.config.DATABASE_MODULE_PATH,
|
||||
conn=conn)
|
||||
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(Path(args.osm_file),
|
||||
args.osm2pgsql_options(0, 1),
|
||||
drop=args.no_updates)
|
||||
|
||||
LOG.warning('Create functions (1st pass)')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
False, False)
|
||||
|
||||
LOG.warning('Create tables')
|
||||
params = ['setup.php', '--create-tables', '--create-partition-tables']
|
||||
if args.reverse_only:
|
||||
params.append('--reverse-only')
|
||||
run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
LOG.warning('Create functions (2nd pass)')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
False, False)
|
||||
|
||||
LOG.warning('Importing wikipedia importance data')
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.error('Wikipedia importance dump file not found. '
|
||||
'Will be using default importances.')
|
||||
|
||||
LOG.warning('Initialise tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
|
||||
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
LOG.warning('Load data into placex table')
|
||||
database_import.load_data(args.config.get_libpq_dsn(),
|
||||
args.data_dir,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
LOG.warning('Calculate postcodes')
|
||||
run_legacy_script('setup.php', '--calculate-postcodes', nominatim_env=args)
|
||||
|
||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
|
||||
LOG.warning('Post-process tables')
|
||||
params = ['setup.php', '--create-search-indices', '--create-country-names']
|
||||
if args.no_updates:
|
||||
params.append('--drop')
|
||||
run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setup website at %s', webdir)
|
||||
refresh.setup_website(webdir, args.phplib_dir, args.config)
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
try:
|
||||
dbdate = status.compute_database_date(conn)
|
||||
status.set_status(conn, dbdate)
|
||||
LOG.info('Database is at %s.', dbdate)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
return 0
|
||||
@@ -59,12 +59,12 @@ class AdminTransition:
|
||||
|
||||
if args.setup_db:
|
||||
LOG.warning('Setup DB')
|
||||
mpath = database_import.install_module(args.module_dir, args.project_dir,
|
||||
args.config.DATABASE_MODULE_PATH)
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.setup_extensions(conn)
|
||||
database_import.check_module_dir_path(conn, mpath)
|
||||
database_import.install_module(args.module_dir, args.project_dir,
|
||||
args.config.DATABASE_MODULE_PATH,
|
||||
conn=conn)
|
||||
|
||||
database_import.import_base_data(args.config.get_libpq_dsn(),
|
||||
args.data_dir, args.no_partitions)
|
||||
@@ -88,7 +88,7 @@ class AdminTransition:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
try:
|
||||
status.set_status(conn, status.compute_database_date(conn))
|
||||
except Exception as exc: # pylint: disable=bare-except
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
if args.index:
|
||||
|
||||
@@ -119,6 +119,13 @@ class PostcodeRunner:
|
||||
WHERE place_id IN ({})
|
||||
""".format(','.join((str(i) for i in ids)))
|
||||
|
||||
|
||||
def _analyse_db_if(conn, condition):
|
||||
if condition:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('ANALYSE')
|
||||
|
||||
|
||||
class Indexer:
|
||||
""" Main indexing routine.
|
||||
"""
|
||||
@@ -142,7 +149,7 @@ class Indexer:
|
||||
|
||||
for thread in self.threads:
|
||||
thread.close()
|
||||
threads = []
|
||||
self.threads = []
|
||||
|
||||
|
||||
def index_full(self, analyse=True):
|
||||
@@ -155,26 +162,22 @@ class Indexer:
|
||||
|
||||
try:
|
||||
self.index_by_rank(0, 4)
|
||||
self._analyse_db_if(conn, analyse)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_boundaries(0, 30)
|
||||
self._analyse_db_if(conn, analyse)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_by_rank(5, 25)
|
||||
self._analyse_db_if(conn, analyse)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_by_rank(26, 30)
|
||||
self._analyse_db_if(conn, analyse)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_postcodes()
|
||||
self._analyse_db_if(conn, analyse)
|
||||
_analyse_db_if(conn, analyse)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _analyse_db_if(self, conn, condition):
|
||||
if condition:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('ANALYSE')
|
||||
|
||||
def index_boundaries(self, minrank, maxrank):
|
||||
""" Index only administrative boundaries within the given rank range.
|
||||
|
||||
@@ -9,6 +9,7 @@ import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import psycopg2
|
||||
|
||||
from ..db.connection import connect, get_pg_env
|
||||
from ..db import utils as db_utils
|
||||
@@ -19,6 +20,21 @@ from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None):
|
||||
""" Create a new database for Nominatim and populate it with the
|
||||
essential extensions and data.
|
||||
"""
|
||||
LOG.warning('Creating database')
|
||||
create_db(dsn, rouser)
|
||||
|
||||
LOG.warning('Setting up database')
|
||||
with connect(dsn) as conn:
|
||||
setup_extensions(conn)
|
||||
|
||||
LOG.warning('Loading basic data')
|
||||
import_base_data(dsn, data_dir, no_partitions)
|
||||
|
||||
|
||||
def create_db(dsn, rouser=None):
|
||||
""" Create a new database for the given DSN. Fails when the database
|
||||
already exists or the PostgreSQL version is too old.
|
||||
@@ -72,7 +88,7 @@ def setup_extensions(conn):
|
||||
raise UsageError('PostGIS version is too old.')
|
||||
|
||||
|
||||
def install_module(src_dir, project_dir, module_dir):
|
||||
def install_module(src_dir, project_dir, module_dir, conn=None):
|
||||
""" Copy the normalization module from src_dir into the project
|
||||
directory under the '/module' directory. If 'module_dir' is set, then
|
||||
use the module from there instead and check that it is accessible
|
||||
@@ -80,6 +96,9 @@ def install_module(src_dir, project_dir, module_dir):
|
||||
|
||||
The function detects when the installation is run from the
|
||||
build directory. It doesn't touch the module in that case.
|
||||
|
||||
If 'conn' is given, then the function also tests if the module
|
||||
can be access via the given database.
|
||||
"""
|
||||
if not module_dir:
|
||||
module_dir = project_dir / 'module'
|
||||
@@ -99,19 +118,17 @@ def install_module(src_dir, project_dir, module_dir):
|
||||
else:
|
||||
LOG.info("Using custom path for database module at '%s'", module_dir)
|
||||
|
||||
return module_dir
|
||||
|
||||
|
||||
def check_module_dir_path(conn, path):
|
||||
""" Check that the normalisation module can be found and executed
|
||||
from the given path.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
DROP FUNCTION nominatim_test_import_func(text)
|
||||
""".format(path))
|
||||
if conn is not None:
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
DROP FUNCTION nominatim_test_import_func(text)
|
||||
""".format(module_dir))
|
||||
except psycopg2.DatabaseError as err:
|
||||
LOG.fatal("Error accessing database module: %s", err)
|
||||
raise UsageError("Database module cannot be accessed.") from err
|
||||
|
||||
|
||||
def import_base_data(dsn, sql_dir, ignore_partitions=False):
|
||||
@@ -174,7 +191,7 @@ def truncate_data_tables(conn, max_word_frequency=None):
|
||||
cur.execute('TRUNCATE location_property_osmline')
|
||||
cur.execute('TRUNCATE location_postcode')
|
||||
cur.execute('TRUNCATE search_name')
|
||||
cur.execute('DROP SEQUENCE seq_place')
|
||||
cur.execute('DROP SEQUENCE IF EXISTS seq_place')
|
||||
cur.execute('CREATE SEQUENCE seq_place start 100000')
|
||||
|
||||
cur.execute("""SELECT tablename FROM pg_tables
|
||||
|
||||
Reference in New Issue
Block a user