bdd: use python library where possible

Replace calls to PHP scripts with direct calls into the
nominatim Python library where possible. This speed up
tests quite a bit.
This commit is contained in:
Sarah Hoffmann
2021-02-26 16:14:29 +01:00
parent 15b5906790
commit dd03aeb966
8 changed files with 60 additions and 32 deletions

View File

@@ -69,7 +69,11 @@ $iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1
function run($oCmd) {
global $iInstances;
global $aCMDResult;
$oCmd->addParams('--threads', $iInstances);
if ($aCMDResult['ignore-errors'] ?? false) {
$oCmd->addParams('--ignore-errors');
}
$oCmd->run(true);
}

View File

@@ -75,12 +75,14 @@ class CommandlineParser:
setattr(args, arg, Path(kwargs[arg]))
args.project_dir = Path(args.project_dir).resolve()
logging.basicConfig(stream=sys.stderr,
format='%(asctime)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
if 'cli_args' not in kwargs:
logging.basicConfig(stream=sys.stderr,
format='%(asctime)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
args.config = Configuration(args.project_dir, args.config_dir)
args.config = Configuration(args.project_dir, args.config_dir,
environ=kwargs.get('environ', os.environ))
log = logging.getLogger()
log.warning('Using project directory: %s', str(args.project_dir))

View File

@@ -75,7 +75,8 @@ class SetupAll:
LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.no_updates)
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Create functions (1st pass)')
with connect(args.config.get_libpq_dsn()) as conn:

View File

@@ -48,6 +48,8 @@ class AdminTransition:
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--no-analyse', action='store_true',
help='Do not perform analyse operations during index')
group.add_argument('--ignore-errors', action='store_true',
help="Ignore certain erros on import.")
@staticmethod
def run(args):
@@ -75,7 +77,8 @@ class AdminTransition:
raise UsageError('Missing required --osm-file argument')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.drop)
drop=args.drop,
ignore_errors=args.ignore_errors)
if args.load_data:
LOG.warning('Load data')

View File

@@ -145,7 +145,7 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False):
conn.commit()
def import_osm_data(osm_file, options, drop=False):
def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
""" Import the given OSM file. 'options' contains the list of
default settings for osm2pgsql.
"""
@@ -164,10 +164,11 @@ def import_osm_data(osm_file, options, drop=False):
run_osm2pgsql(options)
with connect(options['dsn']) as conn:
with conn.cursor() as cur:
cur.execute('SELECT * FROM place LIMIT 1')
if cur.rowcount == 0:
raise UsageError('No data imported by osm2pgsql.')
if not ignore_errors:
with conn.cursor() as cur:
cur.execute('SELECT * FROM place LIMIT 1')
if cur.rowcount == 0:
raise UsageError('No data imported by osm2pgsql.')
if drop:
conn.drop_table('planet_osm_nodes')

View File

@@ -7,6 +7,7 @@ import psycopg2.extras
sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
from nominatim import cli
from nominatim.config import Configuration
from nominatim.tools import refresh
from steps.utils import run_script
@@ -88,18 +89,18 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
self.test_env['NOMINATIM_DATADIR'] = self.src_dir / 'data'
self.test_env['NOMINATIM_SQLDIR'] = self.src_dir / 'lib-sql'
self.test_env['NOMINATIM_CONFIGDIR'] = self.src_dir / 'settings'
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = self.build_dir / 'module'
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
else:
# avoid module being copied into the temporary environment
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
if self.website_dir is not None:
self.website_dir.cleanup()
@@ -182,9 +183,9 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
try:
self.run_setup_script('all', osm_file=self.api_test_file)
self.run_nominatim('import', '--osm-file', str(self.api_test_file))
self.run_setup_script('import-tiger-data')
self.run_setup_script('drop')
self.run_nominatim('freeze')
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
@@ -249,12 +250,25 @@ class NominatimEnvironment:
"""
with db.cursor() as cur:
while True:
self.run_update_script('index')
self.run_nominatim('index')
cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
if cur.rowcount == 0:
return
def run_nominatim(self, *cmdline):
""" Run the nominatim command-line tool via the library.
"""
cli.nominatim(module_dir='',
osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
phplib_dir=str(self.src_dir / 'lib-php'),
sqllib_dir=str(self.src_dir / 'lib-sql'),
data_dir=str(self.src_dir / 'data'),
config_dir=str(self.src_dir / 'settings'),
cli_args=cmdline,
phpcgi_path='',
environ=self.test_env)
def run_setup_script(self, *args, **kwargs):
""" Run the Nominatim setup script with the given arguments.
"""
@@ -285,7 +299,7 @@ class NominatimEnvironment:
""" Copy data from place to the placex and location_property_osmline
tables invoking the appropriate triggers.
"""
self.run_setup_script('create-functions', 'create-partition-functions')
self.run_nominatim('refresh', '--functions', '--no-diff-updates')
with db.cursor() as cur:
cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,

View File

@@ -5,6 +5,7 @@ import psycopg2.extras
from place_inserter import PlaceColumn
from table_compare import NominatimID, DBRow
from nominatim.indexer.indexer import Indexer
def check_database_integrity(context):
""" Check some generic constraints on the tables.
@@ -85,7 +86,12 @@ def import_and_index_data_from_place_table(context):
""" Import data previously set up in the place table.
"""
context.nominatim.copy_from_place(context.db)
context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse')
context.nominatim.run_setup_script('calculate-postcodes')
# Call directly as the refresh function does not include postcodes.
indexer = Indexer(context.nominatim.test_env['NOMINATIM_DATABASE_DSN'][6:], 1)
indexer.index_full(analyse=False)
check_database_integrity(context)
@when("updating places")
@@ -93,8 +99,7 @@ def update_place_table(context):
""" Update the place table with the given data. Also runs all triggers
related to updates and reindexes the new data.
"""
context.nominatim.run_setup_script(
'create-functions', 'create-partition-functions', 'enable-diff-updates')
context.nominatim.run_nominatim('refresh', '--functions')
with context.db.cursor() as cur:
for row in context.table:
PlaceColumn(context).add_row(row, False).db_insert(cur)
@@ -106,7 +111,7 @@ def update_place_table(context):
def update_postcodes(context):
""" Rerun the calculation of postcodes.
"""
context.nominatim.run_update_script('calculate-postcodes')
context.nominatim.run_nominatim('refresh', '--postcodes')
@when("marking for delete (?P<oids>.*)")
def delete_places(context, oids):
@@ -114,8 +119,7 @@ def delete_places(context, oids):
separated by commas. Also runs all triggers
related to updates and reindexes the new data.
"""
context.nominatim.run_setup_script(
'create-functions', 'create-partition-functions', 'enable-diff-updates')
context.nominatim.run_nominatim('refresh', '--functions')
with context.db.cursor() as cur:
for oid in oids.split(','):
NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')

View File

@@ -75,9 +75,8 @@ def update_from_osm_file(context):
The data is expected as attached text in OPL format.
"""
context.nominatim.copy_from_place(context.db)
context.nominatim.run_setup_script('index', 'index-noanalyse')
context.nominatim.run_setup_script('create-functions', 'create-partition-functions',
'enable-diff-updates')
context.nominatim.run_nominatim('index')
context.nominatim.run_nominatim('refresh', '--functions')
# create an OSM file and import it
fname = write_opl_file(context.text, context.osm)