bdd: use python library where possible

Replace calls to PHP scripts with direct calls into the
nominatim Python library where possible. This speed up
tests quite a bit.
This commit is contained in:
Sarah Hoffmann
2021-02-26 16:14:29 +01:00
parent 15b5906790
commit dd03aeb966
8 changed files with 60 additions and 32 deletions

View File

@@ -69,7 +69,11 @@ $iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1
function run($oCmd) { function run($oCmd) {
global $iInstances; global $iInstances;
global $aCMDResult;
$oCmd->addParams('--threads', $iInstances); $oCmd->addParams('--threads', $iInstances);
if ($aCMDResult['ignore-errors'] ?? false) {
$oCmd->addParams('--ignore-errors');
}
$oCmd->run(true); $oCmd->run(true);
} }

View File

@@ -75,12 +75,14 @@ class CommandlineParser:
setattr(args, arg, Path(kwargs[arg])) setattr(args, arg, Path(kwargs[arg]))
args.project_dir = Path(args.project_dir).resolve() args.project_dir = Path(args.project_dir).resolve()
logging.basicConfig(stream=sys.stderr, if 'cli_args' not in kwargs:
format='%(asctime)s: %(message)s', logging.basicConfig(stream=sys.stderr,
datefmt='%Y-%m-%d %H:%M:%S', format='%(asctime)s: %(message)s',
level=max(4 - args.verbose, 1) * 10) datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
args.config = Configuration(args.project_dir, args.config_dir) args.config = Configuration(args.project_dir, args.config_dir,
environ=kwargs.get('environ', os.environ))
log = logging.getLogger() log = logging.getLogger()
log.warning('Using project directory: %s', str(args.project_dir)) log.warning('Using project directory: %s', str(args.project_dir))

View File

@@ -75,7 +75,8 @@ class SetupAll:
LOG.warning('Importing OSM data file') LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file), database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1), args.osm2pgsql_options(0, 1),
drop=args.no_updates) drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Create functions (1st pass)') LOG.warning('Create functions (1st pass)')
with connect(args.config.get_libpq_dsn()) as conn: with connect(args.config.get_libpq_dsn()) as conn:

View File

@@ -48,6 +48,8 @@ class AdminTransition:
help='Size of cache to be used by osm2pgsql (in MB)') help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--no-analyse', action='store_true', group.add_argument('--no-analyse', action='store_true',
help='Do not perform analyse operations during index') help='Do not perform analyse operations during index')
group.add_argument('--ignore-errors', action='store_true',
help="Ignore certain erros on import.")
@staticmethod @staticmethod
def run(args): def run(args):
@@ -75,7 +77,8 @@ class AdminTransition:
raise UsageError('Missing required --osm-file argument') raise UsageError('Missing required --osm-file argument')
database_import.import_osm_data(Path(args.osm_file), database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1), args.osm2pgsql_options(0, 1),
drop=args.drop) drop=args.drop,
ignore_errors=args.ignore_errors)
if args.load_data: if args.load_data:
LOG.warning('Load data') LOG.warning('Load data')

View File

@@ -145,7 +145,7 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False):
conn.commit() conn.commit()
def import_osm_data(osm_file, options, drop=False): def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
""" Import the given OSM file. 'options' contains the list of """ Import the given OSM file. 'options' contains the list of
default settings for osm2pgsql. default settings for osm2pgsql.
""" """
@@ -164,10 +164,11 @@ def import_osm_data(osm_file, options, drop=False):
run_osm2pgsql(options) run_osm2pgsql(options)
with connect(options['dsn']) as conn: with connect(options['dsn']) as conn:
with conn.cursor() as cur: if not ignore_errors:
cur.execute('SELECT * FROM place LIMIT 1') with conn.cursor() as cur:
if cur.rowcount == 0: cur.execute('SELECT * FROM place LIMIT 1')
raise UsageError('No data imported by osm2pgsql.') if cur.rowcount == 0:
raise UsageError('No data imported by osm2pgsql.')
if drop: if drop:
conn.drop_table('planet_osm_nodes') conn.drop_table('planet_osm_nodes')

View File

@@ -7,6 +7,7 @@ import psycopg2.extras
sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve())) sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
from nominatim import cli
from nominatim.config import Configuration from nominatim.config import Configuration
from nominatim.tools import refresh from nominatim.tools import refresh
from steps.utils import run_script from steps.utils import run_script
@@ -88,18 +89,18 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_FLATNODE_FILE'] = '' self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
self.test_env['NOMINATIM_DATADIR'] = self.src_dir / 'data' self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
self.test_env['NOMINATIM_SQLDIR'] = self.src_dir / 'lib-sql' self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
self.test_env['NOMINATIM_CONFIGDIR'] = self.src_dir / 'settings' self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = self.build_dir / 'module' self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql' self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim' self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
if self.server_module_path: if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
else: else:
# avoid module being copied into the temporary environment # avoid module being copied into the temporary environment
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module' self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
if self.website_dir is not None: if self.website_dir is not None:
self.website_dir.cleanup() self.website_dir.cleanup()
@@ -182,9 +183,9 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
try: try:
self.run_setup_script('all', osm_file=self.api_test_file) self.run_nominatim('import', '--osm-file', str(self.api_test_file))
self.run_setup_script('import-tiger-data') self.run_setup_script('import-tiger-data')
self.run_setup_script('drop') self.run_nominatim('freeze')
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
@@ -249,12 +250,25 @@ class NominatimEnvironment:
""" """
with db.cursor() as cur: with db.cursor() as cur:
while True: while True:
self.run_update_script('index') self.run_nominatim('index')
cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
if cur.rowcount == 0: if cur.rowcount == 0:
return return
def run_nominatim(self, *cmdline):
""" Run the nominatim command-line tool via the library.
"""
cli.nominatim(module_dir='',
osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
phplib_dir=str(self.src_dir / 'lib-php'),
sqllib_dir=str(self.src_dir / 'lib-sql'),
data_dir=str(self.src_dir / 'data'),
config_dir=str(self.src_dir / 'settings'),
cli_args=cmdline,
phpcgi_path='',
environ=self.test_env)
def run_setup_script(self, *args, **kwargs): def run_setup_script(self, *args, **kwargs):
""" Run the Nominatim setup script with the given arguments. """ Run the Nominatim setup script with the given arguments.
""" """
@@ -285,7 +299,7 @@ class NominatimEnvironment:
""" Copy data from place to the placex and location_property_osmline """ Copy data from place to the placex and location_property_osmline
tables invoking the appropriate triggers. tables invoking the appropriate triggers.
""" """
self.run_setup_script('create-functions', 'create-partition-functions') self.run_nominatim('refresh', '--functions', '--no-diff-updates')
with db.cursor() as cur: with db.cursor() as cur:
cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type, cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,

View File

@@ -5,6 +5,7 @@ import psycopg2.extras
from place_inserter import PlaceColumn from place_inserter import PlaceColumn
from table_compare import NominatimID, DBRow from table_compare import NominatimID, DBRow
from nominatim.indexer.indexer import Indexer
def check_database_integrity(context): def check_database_integrity(context):
""" Check some generic constraints on the tables. """ Check some generic constraints on the tables.
@@ -85,7 +86,12 @@ def import_and_index_data_from_place_table(context):
""" Import data previously set up in the place table. """ Import data previously set up in the place table.
""" """
context.nominatim.copy_from_place(context.db) context.nominatim.copy_from_place(context.db)
context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') context.nominatim.run_setup_script('calculate-postcodes')
# Call directly as the refresh function does not include postcodes.
indexer = Indexer(context.nominatim.test_env['NOMINATIM_DATABASE_DSN'][6:], 1)
indexer.index_full(analyse=False)
check_database_integrity(context) check_database_integrity(context)
@when("updating places") @when("updating places")
@@ -93,8 +99,7 @@ def update_place_table(context):
""" Update the place table with the given data. Also runs all triggers """ Update the place table with the given data. Also runs all triggers
related to updates and reindexes the new data. related to updates and reindexes the new data.
""" """
context.nominatim.run_setup_script( context.nominatim.run_nominatim('refresh', '--functions')
'create-functions', 'create-partition-functions', 'enable-diff-updates')
with context.db.cursor() as cur: with context.db.cursor() as cur:
for row in context.table: for row in context.table:
PlaceColumn(context).add_row(row, False).db_insert(cur) PlaceColumn(context).add_row(row, False).db_insert(cur)
@@ -106,7 +111,7 @@ def update_place_table(context):
def update_postcodes(context): def update_postcodes(context):
""" Rerun the calculation of postcodes. """ Rerun the calculation of postcodes.
""" """
context.nominatim.run_update_script('calculate-postcodes') context.nominatim.run_nominatim('refresh', '--postcodes')
@when("marking for delete (?P<oids>.*)") @when("marking for delete (?P<oids>.*)")
def delete_places(context, oids): def delete_places(context, oids):
@@ -114,8 +119,7 @@ def delete_places(context, oids):
separated by commas. Also runs all triggers separated by commas. Also runs all triggers
related to updates and reindexes the new data. related to updates and reindexes the new data.
""" """
context.nominatim.run_setup_script( context.nominatim.run_nominatim('refresh', '--functions')
'create-functions', 'create-partition-functions', 'enable-diff-updates')
with context.db.cursor() as cur: with context.db.cursor() as cur:
for oid in oids.split(','): for oid in oids.split(','):
NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}') NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')

View File

@@ -75,9 +75,8 @@ def update_from_osm_file(context):
The data is expected as attached text in OPL format. The data is expected as attached text in OPL format.
""" """
context.nominatim.copy_from_place(context.db) context.nominatim.copy_from_place(context.db)
context.nominatim.run_setup_script('index', 'index-noanalyse') context.nominatim.run_nominatim('index')
context.nominatim.run_setup_script('create-functions', 'create-partition-functions', context.nominatim.run_nominatim('refresh', '--functions')
'enable-diff-updates')
# create an OSM file and import it # create an OSM file and import it
fname = write_opl_file(context.text, context.osm) fname = write_opl_file(context.text, context.osm)