move import-data option to native python

This adds a new dependecy to the Python psutil package.
This commit is contained in:
Sarah Hoffmann
2021-02-24 17:21:45 +01:00
parent 7222235579
commit 32683f73c7
21 changed files with 205 additions and 93 deletions

View File

@@ -3,7 +3,7 @@ Provides custom functions over command-line arguments.
"""
class NominatimArgs:
class NominatimArgs: # pylint: disable=too-few-public-methods
""" Customized namespace class for the nominatim command line tool
to receive the command-line arguments.
"""
@@ -18,5 +18,10 @@ class NominatimArgs:
osm2pgsql_style=self.config.get_import_style_file(),
threads=self.threads or default_threads,
dsn=self.config.get_libpq_dsn(),
flatnode_file=self.config.FLATNODE_FILE)
flatnode_file=self.config.FLATNODE_FILE,
tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
slim_index=self.config.TABLESPACE_OSM_INDEX,
main_data=self.config.TABLESPACE_PLACE_DATA,
main_index=self.config.TABLESPACE_PLACE_INDEX
)
)

View File

@@ -1,7 +1,7 @@
"""
Implementation of the 'index' subcommand.
"""
import os
import psutil
from ..db import status
from ..db.connection import connect
@@ -11,14 +11,6 @@ from ..db.connection import connect
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
def _num_system_cpus():
try:
cpus = len(os.sched_getaffinity(0))
except NotImplementedError:
cpus = None
return cpus or os.cpu_count()
class UpdateIndex:
"""\
@@ -42,7 +34,7 @@ class UpdateIndex:
from ..indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or _num_system_cpus() or 1)
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)

View File

@@ -6,8 +6,10 @@ through the PHP scripts but are now no longer directly accessible.
This module will be removed as soon as the transition phase is over.
"""
import logging
from pathlib import Path
from ..db.connection import connect
from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
@@ -28,9 +30,17 @@ class AdminTransition:
help='Create nominatim db')
group.add_argument('--setup-db', action='store_true',
help='Build a blank nominatim db')
group.add_argument('--import-data', action='store_true',
help='Import a osm file')
group = parser.add_argument_group('Options')
group.add_argument('--no-partitions', action='store_true',
help='Do not partition search indices')
group.add_argument('--osm-file', metavar='FILE',
help='File to import')
group.add_argument('--drop', action='store_true',
help='Drop tables needed for updates, making the database readonly')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
@staticmethod
def run(args):
@@ -51,3 +61,11 @@ class AdminTransition:
database_import.import_base_data(args.config.get_libpq_dsn(),
args.data_dir, args.no_partitions)
if args.import_data:
LOG.warning('Import data')
if not args.osm_file:
raise UsageError('Missing required --osm-file argument')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.drop)

View File

@@ -75,6 +75,17 @@ class _Connection(psycopg2.extensions.connection):
return True
def drop_table(self, name, if_exists=True):
""" Drop the table with the given name.
Set `if_exists` to False if a non-existant table should raise
an exception instead of just being ignored.
"""
with self.cursor() as cur:
cur.execute("""DROP TABLE {} "{}"
""".format('IF EXISTS' if if_exists else '', name))
self.commit()
def server_version_tuple(self):
""" Return the server version as a tuple of (major, minor).
Converts correctly for pre-10 and post-10 PostgreSQL versions.

View File

@@ -2,11 +2,16 @@
Functions for setting up and importing a new Nominatim database.
"""
import logging
import os
import subprocess
import shutil
from pathlib import Path
import psutil
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
from .exec_utils import run_osm2pgsql
from ..errors import UsageError
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
@@ -28,7 +33,7 @@ def create_db(dsn, rouser=None):
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
postgres_version = conn.server_version_tuple() # pylint: disable=E1101
postgres_version = conn.server_version_tuple()
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
@@ -37,7 +42,7 @@ def create_db(dsn, rouser=None):
raise UsageError('PostgreSQL server is too old.')
if rouser is not None:
with conn.cursor() as cur: # pylint: disable=E1101
with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
@@ -109,13 +114,45 @@ def check_module_dir_path(conn, path):
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding.
the background for geocoding. Data is assumed to not yet exist.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
with conn.cursor() as cur: # pylint: disable=E1101
with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0')
conn.commit() # pylint: disable=E1101
conn.commit()
def import_osm_data(osm_file, options, drop=False):
""" Import the given OSM file. 'options' contains the list of
default settings for osm2pgsql.
"""
options['import_file'] = osm_file
options['append'] = False
options['threads'] = 1
if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
# Make some educated guesses about cache size based on the size
# of the import file and the available memory.
mem = psutil.virtual_memory()
fsize = os.stat(str(osm_file)).st_size
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
fsize * 2) / 1024 / 1024) + 1
run_osm2pgsql(options)
with connect(options['dsn']) as conn:
with conn.cursor() as cur:
cur.execute('SELECT * FROM place LIMIT 1')
if cur.rowcount == 0:
raise UsageError('No data imported by osm2pgsql.')
if drop:
conn.drop_table('planet_osm_nodes')
if drop:
if options['flatnode_file']:
Path(options['flatnode_file']).unlink()

View File

@@ -110,10 +110,19 @@ def run_osm2pgsql(options):
]
if options['append']:
cmd.append('--append')
else:
cmd.append('--create')
if options['flatnode_file']:
cmd.extend(('--flat-nodes', options['flatnode_file']))
for key, param in (('slim_data', '--tablespace-slim-data'),
('slim_index', '--tablespace-slim-index'),
('main_data', '--tablespace-main-data'),
('main_index', '--tablespace-main-index')):
if options['tablespaces'][key]:
cmd.extend((param, options['tablespaces'][key]))
if options.get('disable_jit', False):
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'