From 843d3a137c8fa17bd41b4187f76dfc8c8a0311f6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 14:14:34 +0100 Subject: [PATCH 01/17] remove stale code for python2 --- test/bdd/environment.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/bdd/environment.py b/test/bdd/environment.py index 0ac92104..d95fea4a 100644 --- a/test/bdd/environment.py +++ b/test/bdd/environment.py @@ -5,7 +5,6 @@ import psycopg2 import psycopg2.extras import subprocess import tempfile -from sys import version_info as python_version logger = logging.getLogger(__name__) @@ -172,10 +171,7 @@ class NominatimEnvironment(object): cur.execute('CREATE DATABASE %s TEMPLATE = %s' % (self.test_db, self.template_db)) conn.close() context.db = self.connect_database(self.test_db) - if python_version[0] < 3: - psycopg2.extras.register_hstore(context.db, globally=False, unicode=True) - else: - psycopg2.extras.register_hstore(context.db, globally=False) + psycopg2.extras.register_hstore(context.db, globally=False) def teardown_db(self, context): if 'db' in context: From f727620859cacfa39146642b31ad446762212262 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 16:29:15 +0100 Subject: [PATCH 02/17] bdd: move geoemtry creation into separate file Also renames the OsmDataFactory in the more appropriate GeometryFactory and modernizes code for python3. --- test/bdd/environment.py | 81 +-------------------- test/bdd/steps/geometry_factory.py | 113 +++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 78 deletions(-) create mode 100644 test/bdd/steps/geometry_factory.py diff --git a/test/bdd/environment.py b/test/bdd/environment.py index d95fea4a..b6f6b986 100644 --- a/test/bdd/environment.py +++ b/test/bdd/environment.py @@ -6,6 +6,8 @@ import psycopg2.extras import subprocess import tempfile +from steps.geometry_factory import GeometryFactory + logger = logging.getLogger(__name__) userconfig = { @@ -209,83 +211,6 @@ class NominatimEnvironment(object): assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr) -class OSMDataFactory(object): - - def __init__(self): - scriptpath = os.path.dirname(os.path.abspath(__file__)) - self.scene_path = os.environ.get('SCENE_PATH', - os.path.join(scriptpath, '..', 'scenes', 'data')) - self.scene_cache = {} - self.clear_grid() - - def parse_geometry(self, geom, scene): - if geom.find(':') >= 0: - return "ST_SetSRID(%s, 4326)" % self.get_scene_geometry(scene, geom) - - if geom.find(',') < 0: - out = "POINT(%s)" % self.mk_wkt_point(geom) - elif geom.find('(') < 0: - line = ','.join([self.mk_wkt_point(x) for x in geom.split(',')]) - out = "LINESTRING(%s)" % line - else: - inner = geom.strip('() ') - line = ','.join([self.mk_wkt_point(x) for x in inner.split(',')]) - out = "POLYGON((%s))" % line - - return "ST_SetSRID('%s'::geometry, 4326)" % out - - def mk_wkt_point(self, point): - geom = point.strip() - if geom.find(' ') >= 0: - return geom - else: - pt = self.grid_node(int(geom)) - assert pt is not None, "Bad scenario: Point '{}' not found in grid".format(geom) - return "%f %f" % pt - - def get_scene_geometry(self, default_scene, name): - geoms = [] - for obj in name.split('+'): - oname = obj.strip() - if oname.startswith(':'): - assert default_scene is not None, "Bad scenario: You need to set a scene" - defscene = self.load_scene(default_scene) - wkt = defscene[oname[1:]] - else: - scene, obj = oname.split(':', 2) - scene_geoms = self.load_scene(scene) - wkt = scene_geoms[obj] - - geoms.append("'%s'::geometry" % wkt) - - if len(geoms) == 1: - return geoms[0] - else: - return 'ST_LineMerge(ST_Collect(ARRAY[%s]))' % ','.join(geoms) - - def load_scene(self, name): - if name in self.scene_cache: - return self.scene_cache[name] - - scene = {} - with open(os.path.join(self.scene_path, "%s.wkt" % name), 'r') as fd: - for line in fd: - if line.strip(): - obj, wkt = line.split('|', 2) - scene[obj.strip()] = wkt.strip() - self.scene_cache[name] = scene - - return scene - - def clear_grid(self): - self.grid = {} - - def add_grid_node(self, nodeid, x, y): - self.grid[nodeid] = (x, y) - - def grid_node(self, nodeid): - return self.grid.get(nodeid) - def before_all(context): # logging setup @@ -296,7 +221,7 @@ def before_all(context): logging.debug('User config: %s' %(str(context.config.userdata))) # Nominatim test setup context.nominatim = NominatimEnvironment(context.config.userdata) - context.osm = OSMDataFactory() + context.osm = GeometryFactory() def before_scenario(context, scenario): diff --git a/test/bdd/steps/geometry_factory.py b/test/bdd/steps/geometry_factory.py new file mode 100644 index 00000000..7eedfc37 --- /dev/null +++ b/test/bdd/steps/geometry_factory.py @@ -0,0 +1,113 @@ +from pathlib import Path +import os + +class GeometryFactory: + """ Provides functions to create geometries from scenes and data grids. + """ + + def __init__(self): + defpath = Path(__file__) / '..' / '..' / '..' / 'scenes' / 'data' + self.scene_path = os.environ.get('SCENE_PATH', defpath.resolve()) + self.scene_cache = {} + self.clear_grid() + + def parse_geometry(self, geom, scene): + """ Create a WKT SQL term for the given geometry. + The function understands the following formats: + + []: + Geometry from a scene. If the scene is omitted, use the + default scene. +

+ Point geometry +

,...,

+ Line geometry + (

,...,

) + Polygon geometry + +

may either be a coordinate of the form ' ' or a single + number. In the latter case it must refer to a point in + a previously defined grid. + """ + if geom.find(':') >= 0: + return "ST_SetSRID({}, 4326)".format(self.get_scene_geometry(scene, geom)) + + if geom.find(',') < 0: + out = "POINT({})".format(self.mk_wkt_point(geom)) + elif geom.find('(') < 0: + out = "LINESTRING({})".format(self.mk_wkt_points(geom)) + else: + out = "POLYGON(({}))".format(self.mk_wkt_points(geom.strip('() '))) + + return "ST_SetSRID('{}'::geometry, 4326)".format(out) + + def mk_wkt_point(self, point): + """ Parse a point description. + The point may either consist of 'x y' cooordinates or a number + that refers to a grid setup. + """ + geom = point.strip() + if geom.find(' ') >= 0: + return geom + + try: + pt = self.grid_node(int(geom)) + except ValueError: + assert False, "Scenario error: Point '{}' is not a number".format(geom) + + assert pt is not None, "Scenario error: Point '{}' not found in grid".format(geom) + return "{} {}".format(*pt) + + def mk_wkt_points(self, geom): + """ Parse a list of points. + The list must be a comma-separated list of points. Points + in coordinate and grid format may be mixed. + """ + return ','.join([self.mk_wkt_point(x) for x in geom.split(',')]) + + def get_scene_geometry(self, default_scene, name): + """ Load the geometry from a scene. + """ + geoms = [] + for obj in name.split('+'): + oname = obj.strip() + if oname.startswith(':'): + assert default_scene is not None, "Scenario error: You need to set a scene" + defscene = self.load_scene(default_scene) + wkt = defscene[oname[1:]] + else: + scene, obj = oname.split(':', 2) + scene_geoms = self.load_scene(scene) + wkt = scene_geoms[obj] + + geoms.append("'{}'::geometry".format(wkt)) + + if len(geoms) == 1: + return geoms[0] + + return 'ST_LineMerge(ST_Collect(ARRAY[{}]))'.format(','.join(geoms)) + + def load_scene(self, name): + """ Load a scene from a file. + """ + if name in self.scene_cache: + return self.scene_cache[name] + + scene = {} + with open(Path(self.scene_path) / "{}.wkt".format(name), 'r') as fd: + for line in fd: + if line.strip(): + obj, wkt = line.split('|', 2) + scene[obj.strip()] = wkt.strip() + self.scene_cache[name] = scene + + return scene + + def clear_grid(self): + self.grid = {} + + def add_grid_node(self, nodeid, x, y): + self.grid[nodeid] = (x, y) + + def grid_node(self, nodeid): + return self.grid.get(nodeid) From 14e5bc7a1723fd07c3b58eb0bda8a9b1221ef123 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 17:04:47 +0100 Subject: [PATCH 03/17] bdd: move grid generation code into geometry factory --- test/bdd/steps/geometry_factory.py | 19 ++++++++++++++----- test/bdd/steps/osm_data.py | 20 ++++---------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/test/bdd/steps/geometry_factory.py b/test/bdd/steps/geometry_factory.py index 7eedfc37..0a403839 100644 --- a/test/bdd/steps/geometry_factory.py +++ b/test/bdd/steps/geometry_factory.py @@ -9,7 +9,7 @@ class GeometryFactory: defpath = Path(__file__) / '..' / '..' / '..' / 'scenes' / 'data' self.scene_path = os.environ.get('SCENE_PATH', defpath.resolve()) self.scene_cache = {} - self.clear_grid() + self.grid = {} def parse_geometry(self, geom, scene): """ Create a WKT SQL term for the given geometry. @@ -103,11 +103,20 @@ class GeometryFactory: return scene - def clear_grid(self): + def set_grid(self, lines, grid_step): + """ Replace the grid with one from the given lines. + """ self.grid = {} - - def add_grid_node(self, nodeid, x, y): - self.grid[nodeid] = (x, y) + y = 0 + for line in lines: + x = 0 + for pt_id in line: + if pt_id.isdigit(): + self.grid[int(pt_id)] = (x, y) + x += grid_step + y += grid_step def grid_node(self, nodeid): + """ Get the coordinates for the given grid node. + """ return self.grid.get(nodeid) diff --git a/test/bdd/steps/osm_data.py b/test/bdd/steps/osm_data.py index 0f8b1188..c1043baa 100644 --- a/test/bdd/steps/osm_data.py +++ b/test/bdd/steps/osm_data.py @@ -7,28 +7,16 @@ import os def define_node_grid(context, grid_step): """ Define a grid of node positions. + Use a table to define the grid. The nodes must be integer ids. Optionally + you can give the grid distance. The default is 0.00001 degrees. """ if grid_step is not None: grid_step = float(grid_step.strip()) else: grid_step = 0.00001 - context.osm.clear_grid() - - i = 0 - for h in context.table.headings: - if h.isdigit(): - context.osm.add_grid_node(int(h), 0, i) - i += grid_step - - x = grid_step - for r in context.table: - y = 0 - for h in r: - if h.isdigit(): - context.osm.add_grid_node(int(h), x, y) - y += grid_step - x += grid_step + context.osm.set_grid([context.table.headings] + [list(h) for h in context.table], + grid_step) @when(u'loading osm data') From faa85ded50bf8ab4e772d0352f316c25d2a0f510 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 17:54:51 +0100 Subject: [PATCH 04/17] bdd: move NominatimEnvironment into separate file Also cleans up and modernizes the code and adds documentation. --- test/bdd/environment.py | 202 +-------------------- test/bdd/steps/nominatim_environment.py | 224 ++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 197 deletions(-) create mode 100644 test/bdd/steps/nominatim_environment.py diff --git a/test/bdd/environment.py b/test/bdd/environment.py index b6f6b986..aeee2301 100644 --- a/test/bdd/environment.py +++ b/test/bdd/environment.py @@ -1,17 +1,11 @@ from behave import * -import logging -import os -import psycopg2 -import psycopg2.extras -import subprocess -import tempfile +from pathlib import Path from steps.geometry_factory import GeometryFactory - -logger = logging.getLogger(__name__) +from steps.nominatim_environment import NominatimEnvironment userconfig = { - 'BUILDDIR' : os.path.join(os.path.split(__file__)[0], "../../build"), + 'BUILDDIR' : (Path(__file__) / '..' / '..' / '..' / 'build').resolve(), 'REMOVE_TEMPLATE' : False, 'KEEP_TEST_DB' : False, 'DB_HOST' : None, @@ -27,198 +21,12 @@ userconfig = { use_step_matcher("re") - -class NominatimEnvironment(object): - """ Collects all functions for the execution of Nominatim functions. - """ - - def __init__(self, config): - self.build_dir = os.path.abspath(config['BUILDDIR']) - self.src_dir = os.path.abspath(os.path.join(os.path.split(__file__)[0], "../..")) - self.db_host = config['DB_HOST'] - self.db_port = config['DB_PORT'] - self.db_user = config['DB_USER'] - self.db_pass = config['DB_PASS'] - self.template_db = config['TEMPLATE_DB'] - self.test_db = config['TEST_DB'] - self.api_test_db = config['API_TEST_DB'] - self.server_module_path = config['SERVER_MODULE_PATH'] - self.reuse_template = not config['REMOVE_TEMPLATE'] - self.keep_scenario_db = config['KEEP_TEST_DB'] - self.code_coverage_path = config['PHPCOV'] - self.code_coverage_id = 1 - self.test_env = None - - self.template_db_done = False - self.website_dir = None - - def connect_database(self, dbname): - dbargs = {'database': dbname} - if self.db_host: - dbargs['host'] = self.db_host - if self.db_port: - dbargs['port'] = self.db_port - if self.db_user: - dbargs['user'] = self.db_user - if self.db_pass: - dbargs['password'] = self.db_pass - conn = psycopg2.connect(**dbargs) - return conn - - def next_code_coverage_file(self): - fn = os.path.join(self.code_coverage_path, "%06d.cov" % self.code_coverage_id) - self.code_coverage_id += 1 - - return fn - - def write_nominatim_config(self, dbname): - dsn = 'pgsql:dbname={}{}{}{}{}'.format( - dbname, - (';host=' + self.db_host) if self.db_host else '', - (';port=' + self.db_port) if self.db_port else '', - (';user=' + self.db_user) if self.db_user else '', - (';password=' + self.db_pass) if self.db_pass else '' - ) - - if self.website_dir is not None \ - and self.test_env is not None \ - and dsn == self.test_env['NOMINATIM_DATABASE_DSN']: - return # environment already set uo - - self.test_env = os.environ - self.test_env['NOMINATIM_DATABASE_DSN'] = dsn - self.test_env['NOMINATIM_FLATNODE_FILE'] = '' - self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' - self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' - - if self.website_dir is not None: - self.website_dir.cleanup() - - self.website_dir = tempfile.TemporaryDirectory() - self.run_setup_script('setup-website') - - - def db_drop_database(self, name): - conn = self.connect_database('postgres') - conn.set_isolation_level(0) - cur = conn.cursor() - cur.execute('DROP DATABASE IF EXISTS %s' % (name, )) - conn.close() - - def setup_template_db(self): - if self.template_db_done: - return - - self.template_db_done = True - - if self.reuse_template: - # check that the template is there - conn = self.connect_database('postgres') - cur = conn.cursor() - cur.execute('select count(*) from pg_database where datname = %s', - (self.template_db,)) - if cur.fetchone()[0] == 1: - return - conn.close() - else: - # just in case... make sure a previous table has been dropped - self.db_drop_database(self.template_db) - - try: - # call the first part of database setup - self.write_nominatim_config(self.template_db) - self.run_setup_script('create-db', 'setup-db') - # remove external data to speed up indexing for tests - conn = self.connect_database(self.template_db) - cur = conn.cursor() - cur.execute("""select tablename from pg_tables - where tablename in ('gb_postcode', 'us_postcode')""") - for t in cur: - conn.cursor().execute('TRUNCATE TABLE %s' % (t[0],)) - conn.commit() - conn.close() - - # execute osm2pgsql import on an empty file to get the right tables - with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd: - fd.write(b'') - fd.flush() - self.run_setup_script('import-data', - 'ignore-errors', - 'create-functions', - 'create-tables', - 'create-partition-tables', - 'create-partition-functions', - 'load-data', - 'create-search-indices', - osm_file=fd.name, - osm2pgsql_cache='200') - except: - self.db_drop_database(self.template_db) - raise - - - def setup_api_db(self, context): - self.write_nominatim_config(self.api_test_db) - - def setup_unknown_db(self, context): - self.write_nominatim_config('UNKNOWN_DATABASE_NAME') - - def setup_db(self, context): - self.setup_template_db() - self.write_nominatim_config(self.test_db) - conn = self.connect_database(self.template_db) - conn.set_isolation_level(0) - cur = conn.cursor() - cur.execute('DROP DATABASE IF EXISTS %s' % (self.test_db, )) - cur.execute('CREATE DATABASE %s TEMPLATE = %s' % (self.test_db, self.template_db)) - conn.close() - context.db = self.connect_database(self.test_db) - psycopg2.extras.register_hstore(context.db, globally=False) - - def teardown_db(self, context): - if 'db' in context: - context.db.close() - - if not self.keep_scenario_db: - self.db_drop_database(self.test_db) - - def run_setup_script(self, *args, **kwargs): - if self.server_module_path: - kwargs = dict(kwargs) - kwargs['module_path'] = self.server_module_path - self.run_nominatim_script('setup', *args, **kwargs) - - def run_update_script(self, *args, **kwargs): - self.run_nominatim_script('update', *args, **kwargs) - - def run_nominatim_script(self, script, *args, **kwargs): - cmd = ['/usr/bin/env', 'php', '-Cq'] - cmd.append(os.path.join(self.build_dir, 'utils', '%s.php' % script)) - cmd.extend(['--%s' % x for x in args]) - for k, v in kwargs.items(): - cmd.extend(('--' + k.replace('_', '-'), str(v))) - - if self.website_dir is not None: - cwd = self.website_dir.name - else: - cwd = self.build_dir - - proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (outp, outerr) = proc.communicate() - outerr = outerr.decode('utf-8').replace('\\n', '\n') - logger.debug("run_nominatim_script: %s\n%s\n%s" % (cmd, outp, outerr)) - assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr) - - - def before_all(context): # logging setup context.config.setup_logging() # set up -D options for k,v in userconfig.items(): context.config.userdata.setdefault(k, v) - logging.debug('User config: %s' %(str(context.config.userdata))) # Nominatim test setup context.nominatim = NominatimEnvironment(context.config.userdata) context.osm = GeometryFactory() @@ -228,9 +36,9 @@ def before_scenario(context, scenario): if 'DB' in context.tags: context.nominatim.setup_db(context) elif 'APIDB' in context.tags: - context.nominatim.setup_api_db(context) + context.nominatim.setup_api_db() elif 'UNKNOWNDB' in context.tags: - context.nominatim.setup_unknown_db(context) + context.nominatim.setup_unknown_db() context.scene = None def after_scenario(context, scenario): diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py new file mode 100644 index 00000000..4c973358 --- /dev/null +++ b/test/bdd/steps/nominatim_environment.py @@ -0,0 +1,224 @@ +import logging +import os +from pathlib import Path +import subprocess +import tempfile + +import psycopg2 +import psycopg2.extras + +LOG = logging.getLogger(__name__) + +class NominatimEnvironment: + """ Collects all functions for the execution of Nominatim functions. + """ + + def __init__(self, config): + self.build_dir = Path(config['BUILDDIR']).resolve() + self.src_dir = (Path(__file__) / '..' / '..' / '..' / '..').resolve() + self.db_host = config['DB_HOST'] + self.db_port = config['DB_PORT'] + self.db_user = config['DB_USER'] + self.db_pass = config['DB_PASS'] + self.template_db = config['TEMPLATE_DB'] + self.test_db = config['TEST_DB'] + self.api_test_db = config['API_TEST_DB'] + self.server_module_path = config['SERVER_MODULE_PATH'] + self.reuse_template = not config['REMOVE_TEMPLATE'] + self.keep_scenario_db = config['KEEP_TEST_DB'] + self.code_coverage_path = config['PHPCOV'] + self.code_coverage_id = 1 + self.test_env = None + + self.template_db_done = False + self.website_dir = None + + def connect_database(self, dbname): + """ Return a connection to the database with the given name. + Uses configured host, user and port. + """ + dbargs = {'database': dbname} + if self.db_host: + dbargs['host'] = self.db_host + if self.db_port: + dbargs['port'] = self.db_port + if self.db_user: + dbargs['user'] = self.db_user + if self.db_pass: + dbargs['password'] = self.db_pass + conn = psycopg2.connect(**dbargs) + return conn + + def next_code_coverage_file(self): + """ Generate the next name for a coverage file. + """ + fn = Path(self.code_coverage_path) / "{:06d}.cov".format(self.code_coverage_id) + self.code_coverage_id += 1 + + return fn.resolve() + + def write_nominatim_config(self, dbname): + """ Set up a custom test configuration that connects to the given + database. This sets up the environment variables so that they can + be picked up by dotenv and creates a project directory with the + appropriate website scripts. + """ + dsn = 'pgsql:dbname={}'.format(dbname) + if self.db_host: + dsn += ';host=' + self.db_host + if self.db_port: + dsn += ';port=' + self.db_port + if self.db_user: + dsn += ';user=' + self.db_user + if self.db_pass: + dsn += ';password=' + self.db_pass + + if self.website_dir is not None \ + and self.test_env is not None \ + and dsn == self.test_env['NOMINATIM_DATABASE_DSN']: + return # environment already set uo + + self.test_env = os.environ + self.test_env['NOMINATIM_DATABASE_DSN'] = dsn + self.test_env['NOMINATIM_FLATNODE_FILE'] = '' + self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' + self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' + + if self.server_module_path: + self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path + + if self.website_dir is not None: + self.website_dir.cleanup() + + self.website_dir = tempfile.TemporaryDirectory() + self.run_setup_script('setup-website') + + + def db_drop_database(self, name): + """ Drop the database with the given name. + """ + conn = self.connect_database('postgres') + conn.set_isolation_level(0) + cur = conn.cursor() + cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) + conn.close() + + def setup_template_db(self): + """ Setup a template database that already contains common test data. + Having a template database speeds up tests considerably but at + the price that the tests sometimes run with stale data. + """ + if self.template_db_done: + return + + self.template_db_done = True + + if self.reuse_template: + # check that the template is there + conn = self.connect_database('postgres') + cur = conn.cursor() + cur.execute('select count(*) from pg_database where datname = %s', + (self.template_db,)) + if cur.fetchone()[0] == 1: + return + conn.close() + else: + # just in case... make sure a previous table has been dropped + self.db_drop_database(self.template_db) + + try: + # call the first part of database setup + self.write_nominatim_config(self.template_db) + self.run_setup_script('create-db', 'setup-db') + # remove external data to speed up indexing for tests + conn = self.connect_database(self.template_db) + cur = conn.cursor() + cur.execute("""select tablename from pg_tables + where tablename in ('gb_postcode', 'us_postcode')""") + for t in cur: + conn.cursor().execute('TRUNCATE TABLE {}'.format(t[0])) + conn.commit() + conn.close() + + # execute osm2pgsql import on an empty file to get the right tables + with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd: + fd.write(b'') + fd.flush() + self.run_setup_script('import-data', + 'ignore-errors', + 'create-functions', + 'create-tables', + 'create-partition-tables', + 'create-partition-functions', + 'load-data', + 'create-search-indices', + osm_file=fd.name, + osm2pgsql_cache='200') + except: + self.db_drop_database(self.template_db) + raise + + + def setup_api_db(self): + """ Setup a test against the API test database. + """ + self.write_nominatim_config(self.api_test_db) + + def setup_unknown_db(self): + """ Setup a test against a non-existing database. + """ + self.write_nominatim_config('UNKNOWN_DATABASE_NAME') + + def setup_db(self, context): + """ Setup a test against a fresh, empty test database. + """ + self.setup_template_db() + self.write_nominatim_config(self.test_db) + conn = self.connect_database(self.template_db) + conn.set_isolation_level(0) + cur = conn.cursor() + cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db)) + cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db)) + conn.close() + context.db = self.connect_database(self.test_db) + psycopg2.extras.register_hstore(context.db, globally=False) + + def teardown_db(self, context): + """ Remove the test database, if it exists. + """ + if 'db' in context: + context.db.close() + + if not self.keep_scenario_db: + self.db_drop_database(self.test_db) + + def run_setup_script(self, *args, **kwargs): + """ Run the Nominatim setup script with the given arguments. + """ + self.run_nominatim_script('setup', *args, **kwargs) + + def run_update_script(self, *args, **kwargs): + """ Run the Nominatim update script with the given arguments. + """ + self.run_nominatim_script('update', *args, **kwargs) + + def run_nominatim_script(self, script, *args, **kwargs): + """ Run one of the Nominatim utility scripts with the given arguments. + """ + cmd = ['/usr/bin/env', 'php', '-Cq'] + cmd.append((Path(self.build_dir) / 'utils' / '{}.php'.format(script)).resolve()) + cmd.extend(['--' + x for x in args]) + for k, v in kwargs.items(): + cmd.extend(('--' + k.replace('_', '-'), str(v))) + + if self.website_dir is not None: + cwd = self.website_dir.name + else: + cwd = self.build_dir + + proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (outp, outerr) = proc.communicate() + outerr = outerr.decode('utf-8').replace('\\n', '\n') + LOG.debug("run_nominatim_script: %s\n%s\n%s", cmd, outp, outerr) + assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr) From 72587b08fa66d70930b279a7ed96a945ed6c27e7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 19:58:59 +0100 Subject: [PATCH 05/17] bdd: move external process execution in separate func --- test/bdd/steps/nominatim_environment.py | 11 ++--------- test/bdd/steps/queries.py | 25 ++++--------------------- test/bdd/steps/utils.py | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 30 deletions(-) create mode 100644 test/bdd/steps/utils.py diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 4c973358..7741d667 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -1,13 +1,11 @@ -import logging import os from pathlib import Path -import subprocess import tempfile import psycopg2 import psycopg2.extras -LOG = logging.getLogger(__name__) +from steps.utils import run_script class NominatimEnvironment: """ Collects all functions for the execution of Nominatim functions. @@ -216,9 +214,4 @@ class NominatimEnvironment: else: cwd = self.build_dir - proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (outp, outerr) = proc.communicate() - outerr = outerr.decode('utf-8').replace('\\n', '\n') - LOG.debug("run_nominatim_script: %s\n%s\n%s", cmd, outp, outerr) - assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr) + run_script(cmd, cwd=cwd, env=self.test_env) diff --git a/test/bdd/steps/queries.py b/test/bdd/steps/queries.py index 0ea4685b..c7fdd6db 100644 --- a/test/bdd/steps/queries.py +++ b/test/bdd/steps/queries.py @@ -10,11 +10,11 @@ import io import re import logging import xml.etree.ElementTree as ET -import subprocess from urllib.parse import urlencode from collections import OrderedDict from check_functions import Almost +from utils import run_script logger = logging.getLogger(__name__) @@ -277,14 +277,9 @@ def query_cmd(context, query, dups): if dups: cmd.extend(('--dedupe', '0')) - proc = subprocess.Popen(cmd, cwd=context.nominatim.build_dir, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (outp, err) = proc.communicate() + outp, err = run_script(cmd, cwd=context.nominatim.build_dir) - assert proc.returncode == 0, "query.php failed with message: %s\noutput: %s" % (err, outp) - logger.debug("run_nominatim_script: %s\n%s\n" % (cmd, outp.decode('utf-8').replace('\\n', '\n'))) - - context.response = SearchResponse(outp.decode('utf-8'), 'json') + context.response = SearchResponse(outp, 'json') def send_api_query(endpoint, params, fmt, context): if fmt is not None: @@ -326,19 +321,7 @@ def send_api_query(endpoint, params, fmt, context): for k,v in params.items(): cmd.append("%s=%s" % (k, v)) - proc = subprocess.Popen(cmd, cwd=context.nominatim.website_dir.name, env=env, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - (outp, err) = proc.communicate() - outp = outp.decode('utf-8') - err = err.decode("utf-8") - - logger.debug("Result: \n===============================\n" - + outp + "\n===============================\n") - - assert proc.returncode == 0, \ - "%s failed with message: %s" % ( - os.path.basename(env['SCRIPT_FILENAME']), err) + outp, err = run_script(cmd, cwd=context.nominatim.website_dir.name, env=env) assert len(err) == 0, "Unexpected PHP error: %s" % (err) diff --git a/test/bdd/steps/utils.py b/test/bdd/steps/utils.py new file mode 100644 index 00000000..64d020db --- /dev/null +++ b/test/bdd/steps/utils.py @@ -0,0 +1,22 @@ +""" +Various smaller helps for step execution. +""" +import logging +import subprocess + +LOG = logging.getLogger(__name__) + +def run_script(cmd, **kwargs): + """ Run the given command, check that it is successful and output + when necessary. + """ + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + **kwargs) + (outp, outerr) = proc.communicate() + outp = outp.decode('utf-8') + outerr = outerr.decode('utf-8').replace('\\n', '\n') + LOG.debug("Run command: %s\n%s\n%s", cmd, outp, outerr) + + assert proc.returncode == 0, "Script '{}' failed:\n{}\n{}\n".format(cmd[0], outp, outerr) + + return outp, outerr From 2712c5f90ee9e15d69424e367c7e53ab2a579f7b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 20:17:17 +0100 Subject: [PATCH 06/17] bdd: rename and clean up osm_data steps Move common OPL creation code into a function and remove unused imports. --- .../steps/{osm_data.py => steps_osm_data.py} | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) rename test/bdd/steps/{osm_data.py => steps_osm_data.py} (80%) diff --git a/test/bdd/steps/osm_data.py b/test/bdd/steps/steps_osm_data.py similarity index 80% rename from test/bdd/steps/osm_data.py rename to test/bdd/steps/steps_osm_data.py index c1043baa..465ad718 100644 --- a/test/bdd/steps/osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -1,8 +1,27 @@ -import subprocess import tempfile import random import os +def write_opl_file(opl, grid): + """ Create a temporary OSM file from OPL and return the file name. It is + the responsibility of the caller to delete the file again. + + Node with missing coordinates, can retrieve their coordinates from + a supplied grid. Failing that a random coordinate is assigned. + """ + with tempfile.NamedTemporaryFile(suffix='.opl', delete=False) as fd: + for line in opl.splitlines(): + if line.startswith('n') and line.find(' x') < 0: + coord = grid.grid_node(int(line[1:].split(' ')[0])) + if coord is None: + coord = (random.random() * 360 - 180, + random.random() * 180 - 90) + line += " x%f y%f" % coord + fd.write(line.encode('utf-8')) + fd.write(b'\n') + + return fd.name + @given(u'the ([0-9.]+ )?grid') def define_node_grid(context, grid_step): """ @@ -27,21 +46,11 @@ def load_osm_file(context): The data is expected as attached text in OPL format. """ - # create a OSM file in /tmp and import it - with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.opl', delete=False) as fd: - fname = fd.name - for line in context.text.splitlines(): - if line.startswith('n') and line.find(' x') < 0: - coord = context.osm.grid_node(int(line[1:].split(' ')[0])) - if coord is None: - coord = (random.random() * 360 - 180, - random.random() * 180 - 90) - line += " x%f y%f" % coord - fd.write(line.encode('utf-8')) - fd.write(b'\n') - + # create an OSM file and import it + fname = write_opl_file(context.text, context.osm) context.nominatim.run_setup_script('import-data', osm_file=fname, osm2pgsql_cache=300) + os.remove(fname) ### reintroduce the triggers/indexes we've lost by having osm2pgsql set up place again cur = context.db.cursor() @@ -52,7 +61,6 @@ def load_osm_file(context): cur.execute("""CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type)""") context.db.commit() - os.remove(fname) @when(u'updating osm data') def update_from_osm_file(context): @@ -77,15 +85,7 @@ def update_from_osm_file(context): context.nominatim.run_setup_script('create-functions', 'create-partition-functions', 'enable-diff-updates') - # create a OSM file in /tmp and import it - with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.opl', delete=False) as fd: - fname = fd.name - for line in context.text.splitlines(): - if line.startswith('n') and line.find(' x') < 0: - line += " x%d y%d" % (random.random() * 360 - 180, - random.random() * 180 - 90) - fd.write(line.encode('utf-8')) - fd.write(b'\n') - + # create an OSM file and import it + fname = write_opl_file(context.text, context.osm) context.nominatim.run_update_script(import_diff=fname) os.remove(fname) From 8a93f8ed94d04fed67824366b1691aa62b2c8a1d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 4 Jan 2021 22:18:01 +0100 Subject: [PATCH 07/17] bdd: move Response classes in own file and simplify Removes most of the duplicated parse functions, introduces a common assert_field function with a more expressive error message. --- test/bdd/steps/http_responses.py | 194 ++++++++++++++++ .../{queries.py => steps_api_queries.py} | 218 +----------------- test/bdd/steps/steps_osm_data.py | 2 +- 3 files changed, 201 insertions(+), 213 deletions(-) create mode 100644 test/bdd/steps/http_responses.py rename test/bdd/steps/{queries.py => steps_api_queries.py} (64%) diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py new file mode 100644 index 00000000..c3cd4f52 --- /dev/null +++ b/test/bdd/steps/http_responses.py @@ -0,0 +1,194 @@ +""" +Classes wrapping HTTP responses from the Nominatim API. +""" +from collections import OrderedDict +import re +import json +import xml.etree.ElementTree as ET + +from check_functions import Almost + +def _geojson_result_to_json_result(geojson_result): + result = geojson_result['properties'] + result['geojson'] = geojson_result['geometry'] + if 'bbox' in geojson_result: + # bbox is minlon, minlat, maxlon, maxlat + # boundingbox is minlat, maxlat, minlon, maxlon + result['boundingbox'] = [geojson_result['bbox'][1], + geojson_result['bbox'][3], + geojson_result['bbox'][0], + geojson_result['bbox'][2]] + return result + +class BadRowValueAssert: + """ Lazily formatted message for failures to find a field content. + """ + + def __init__(self, response, idx, field, value): + self.idx = idx + self.field = field + self.value = value + self.row = response.result[idx] + + def __str__(self): + return "\nBad value for row {} field '{}'. Expected: {}, got: {}.\nFull row: {}"""\ + .format(self.idx, self.field, self.value, + self.row[self.field], json.dumps(self.row, indent=4)) + + +class GenericResponse: + """ Common base class for all API responses. + """ + def __init__(self, page, fmt, errorcode=200): + self.page = page + self.format = fmt + self.errorcode = errorcode + self.result = [] + self.header = dict() + + if errorcode == 200: + getattr(self, '_parse_' + fmt)() + + def _parse_json(self): + m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page) + if m is None: + code = self.page + else: + code = m.group(2) + self.header['json_func'] = m.group(1) + self.result = json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code) + if isinstance(self.result, OrderedDict): + self.result = [self.result] + + def _parse_geojson(self): + self._parse_json() + if 'error' in self.result[0]: + self.result = [] + else: + self.result = list(map(_geojson_result_to_json_result, self.result[0]['features'])) + + def _parse_geocodejson(self): + self._parse_geojson() + if self.result is not None: + self.result = [r['geocoding'] for r in self.result] + + def assert_field(self, idx, field, value): + """ Check that result row `idx` has a field `field` with value `value`. + Float numbers are matched approximately. When the expected value + starts with a carat, regular expression matching is used. + """ + assert field in self.result[idx], \ + "Result row {} has no field '{}'.\nFull row: {}"\ + .format(idx, field, json.dumps(self.result[idx], indent=4)) + + if isinstance(value, float): + assert Almost(value) == float(self.result[idx][field]), \ + BadRowValueAssert(self, idx, field, value) + elif value.startswith("^"): + assert re.fullmatch(value, self.result[idx][field]), \ + BadRowValueAssert(self, idx, field, value) + else: + assert str(self.result[idx][field]) == str(value), \ + BadRowValueAssert(self, idx, field, value) + + def match_row(self, row): + """ Match the result fields against the given behave table row. + """ + if 'ID' in row.headings: + todo = [int(row['ID'])] + else: + todo = range(len(self.result)) + + for i in todo: + for name, value in zip(row.headings, row.cells): + if name == 'ID': + pass + elif name == 'osm': + self.assert_field(i, 'osm_type', value[0]) + self.assert_field(i, 'osm_id', value[1:]) + elif name == 'centroid': + lon, lat = value.split(' ') + self.assert_field(i, 'lat', float(lat)) + self.assert_field(i, 'lon', float(lon)) + else: + self.assert_field(i, name, value) + + def property_list(self, prop): + return [x[prop] for x in self.result] + + +class SearchResponse(GenericResponse): + """ Specialised class for search and lookup responses. + Transforms the xml response in a format similar to json. + """ + + def _parse_xml(self): + xml_tree = ET.fromstring(self.page) + + self.header = dict(xml_tree.attrib) + + for child in xml_tree: + assert child.tag == "place" + self.result.append(dict(child.attrib)) + + address = {} + for sub in child: + if sub.tag == 'extratags': + self.result[-1]['extratags'] = {} + for tag in sub: + self.result[-1]['extratags'][tag.attrib['key']] = tag.attrib['value'] + elif sub.tag == 'namedetails': + self.result[-1]['namedetails'] = {} + for tag in sub: + self.result[-1]['namedetails'][tag.attrib['desc']] = tag.text + elif sub.tag == 'geokml': + self.result[-1][sub.tag] = True + else: + address[sub.tag] = sub.text + + if address: + self.result[-1]['address'] = address + + +class ReverseResponse(GenericResponse): + """ Specialised class for reverse responses. + Transforms the xml response in a format similar to json. + """ + + def _parse_xml(self): + xml_tree = ET.fromstring(self.page) + + self.header = dict(xml_tree.attrib) + self.result = [] + + for child in xml_tree: + if child.tag == 'result': + assert not self.result, "More than one result in reverse result" + self.result.append(dict(child.attrib)) + elif child.tag == 'addressparts': + address = {} + for sub in child: + address[sub.tag] = sub.text + self.result[0]['address'] = address + elif child.tag == 'extratags': + self.result[0]['extratags'] = {} + for tag in child: + self.result[0]['extratags'][tag.attrib['key']] = tag.attrib['value'] + elif child.tag == 'namedetails': + self.result[0]['namedetails'] = {} + for tag in child: + self.result[0]['namedetails'][tag.attrib['desc']] = tag.text + elif child.tag == 'geokml': + self.result[0][child.tag] = True + else: + assert child.tag == 'error', \ + "Unknown XML tag {} on page: {}".format(child.tag, self.page) + + +class StatusResponse(GenericResponse): + """ Specialised class for status responses. + Can also parse text responses. + """ + + def _parse_text(self): + pass diff --git a/test/bdd/steps/queries.py b/test/bdd/steps/steps_api_queries.py similarity index 64% rename from test/bdd/steps/queries.py rename to test/bdd/steps/steps_api_queries.py index c7fdd6db..ebda5ec6 100644 --- a/test/bdd/steps/queries.py +++ b/test/bdd/steps/steps_api_queries.py @@ -1,22 +1,18 @@ -""" Steps that run search queries. +""" Steps that run queries against the API. Queries may either be run directly via PHP using the query script - or via the HTTP interface. + or via the HTTP interface using php-cgi. """ - import json import os -import io import re import logging -import xml.etree.ElementTree as ET from urllib.parse import urlencode -from collections import OrderedDict -from check_functions import Almost from utils import run_script +from http_responses import GenericResponse, SearchResponse, ReverseResponse, StatusResponse -logger = logging.getLogger(__name__) +LOG = logging.getLogger(__name__) BASE_SERVER_ENV = { 'HTTP_HOST' : 'localhost', @@ -56,208 +52,6 @@ def compare(operator, op1, op2): else: raise Exception("unknown operator '%s'" % operator) -class GenericResponse(object): - - def match_row(self, row): - if 'ID' in row.headings: - todo = [int(row['ID'])] - else: - todo = range(len(self.result)) - - for i in todo: - res = self.result[i] - for h in row.headings: - if h == 'ID': - pass - elif h == 'osm': - assert res['osm_type'] == row[h][0] - assert res['osm_id'] == int(row[h][1:]) - elif h == 'centroid': - x, y = row[h].split(' ') - assert Almost(float(y)) == float(res['lat']) - assert Almost(float(x)) == float(res['lon']) - elif row[h].startswith("^"): - assert h in res - assert re.fullmatch(row[h], res[h]) is not None, \ - "attribute '%s': expected: '%s', got '%s'" % (h, row[h], res[h]) - else: - assert h in res - assert str(res[h]) == str(row[h]) - - def property_list(self, prop): - return [ x[prop] for x in self.result ] - - -class SearchResponse(GenericResponse): - - def __init__(self, page, fmt='json', errorcode=200): - self.page = page - self.format = fmt - self.errorcode = errorcode - self.result = [] - self.header = dict() - - if errorcode == 200: - getattr(self, 'parse_' + fmt)() - - def parse_json(self): - m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page) - if m is None: - code = self.page - else: - code = m.group(2) - self.header['json_func'] = m.group(1) - self.result = json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code) - - def parse_geojson(self): - self.parse_json() - self.result = geojson_results_to_json_results(self.result) - - def parse_geocodejson(self): - self.parse_geojson() - if self.result is not None: - self.result = [r['geocoding'] for r in self.result] - - def parse_xml(self): - et = ET.fromstring(self.page) - - self.header = dict(et.attrib) - - for child in et: - assert child.tag == "place" - self.result.append(dict(child.attrib)) - - address = {} - for sub in child: - if sub.tag == 'extratags': - self.result[-1]['extratags'] = {} - for tag in sub: - self.result[-1]['extratags'][tag.attrib['key']] = tag.attrib['value'] - elif sub.tag == 'namedetails': - self.result[-1]['namedetails'] = {} - for tag in sub: - self.result[-1]['namedetails'][tag.attrib['desc']] = tag.text - elif sub.tag in ('geokml'): - self.result[-1][sub.tag] = True - else: - address[sub.tag] = sub.text - - if len(address) > 0: - self.result[-1]['address'] = address - - -class ReverseResponse(GenericResponse): - - def __init__(self, page, fmt='json', errorcode=200): - self.page = page - self.format = fmt - self.errorcode = errorcode - self.result = [] - self.header = dict() - - if errorcode == 200: - getattr(self, 'parse_' + fmt)() - - def parse_json(self): - m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page) - if m is None: - code = self.page - else: - code = m.group(2) - self.header['json_func'] = m.group(1) - self.result = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code)] - - def parse_geojson(self): - self.parse_json() - if 'error' in self.result: - return - self.result = geojson_results_to_json_results(self.result[0]) - - def parse_geocodejson(self): - self.parse_geojson() - if self.result is not None: - self.result = [r['geocoding'] for r in self.result] - - def parse_xml(self): - et = ET.fromstring(self.page) - - self.header = dict(et.attrib) - self.result = [] - - for child in et: - if child.tag == 'result': - assert len(self.result) == 0, "More than one result in reverse result" - self.result.append(dict(child.attrib)) - elif child.tag == 'addressparts': - address = {} - for sub in child: - address[sub.tag] = sub.text - self.result[0]['address'] = address - elif child.tag == 'extratags': - self.result[0]['extratags'] = {} - for tag in child: - self.result[0]['extratags'][tag.attrib['key']] = tag.attrib['value'] - elif child.tag == 'namedetails': - self.result[0]['namedetails'] = {} - for tag in child: - self.result[0]['namedetails'][tag.attrib['desc']] = tag.text - elif child.tag in ('geokml'): - self.result[0][child.tag] = True - else: - assert child.tag == 'error', \ - "Unknown XML tag %s on page: %s" % (child.tag, self.page) - - -class DetailsResponse(GenericResponse): - - def __init__(self, page, fmt='json', errorcode=200): - self.page = page - self.format = fmt - self.errorcode = errorcode - self.result = [] - self.header = dict() - - if errorcode == 200: - getattr(self, 'parse_' + fmt)() - - def parse_json(self): - self.result = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(self.page)] - - -class StatusResponse(GenericResponse): - - def __init__(self, page, fmt='text', errorcode=200): - self.page = page - self.format = fmt - self.errorcode = errorcode - - if errorcode == 200 and fmt != 'text': - getattr(self, 'parse_' + fmt)() - - def parse_json(self): - self.result = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(self.page)] - - -def geojson_result_to_json_result(geojson_result): - result = geojson_result['properties'] - result['geojson'] = geojson_result['geometry'] - if 'bbox' in geojson_result: - # bbox is minlon, minlat, maxlon, maxlat - # boundingbox is minlat, maxlat, minlon, maxlon - result['boundingbox'] = [ - geojson_result['bbox'][1], - geojson_result['bbox'][3], - geojson_result['bbox'][0], - geojson_result['bbox'][2] - ] - return result - - -def geojson_results_to_json_results(geojson_results): - if 'error' in geojson_results: - return - return list(map(geojson_result_to_json_result, geojson_results['features'])) - @when(u'searching for "(?P.*)"(?P with dups)?') def query_cmd(context, query, dups): @@ -301,7 +95,7 @@ def send_api_query(endpoint, params, fmt, context): env['SCRIPT_FILENAME'] = os.path.join(env['CONTEXT_DOCUMENT_ROOT'], '%s.php' % endpoint) - logger.debug("Environment:" + json.dumps(env, sort_keys=True, indent=2)) + LOG.debug("Environment:" + json.dumps(env, sort_keys=True, indent=2)) if hasattr(context, 'http_headers'): env.update(context.http_headers) @@ -395,7 +189,7 @@ def website_details_request(context, fmt, query): else: outfmt = fmt.strip() - context.response = DetailsResponse(outp, outfmt, status) + context.response = GenericResponse(outp, outfmt, status) @when(u'sending (?P\S+ )?lookup query for (?P.*)') def website_lookup_request(context, fmt, query): diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 465ad718..db208fe8 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -35,7 +35,7 @@ def define_node_grid(context, grid_step): grid_step = 0.00001 context.osm.set_grid([context.table.headings] + [list(h) for h in context.table], - grid_step) + grid_step) @when(u'loading osm data') From 12ae8a4ed38ecf61d375e12ea699bf265737ba2b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 10:17:59 +0100 Subject: [PATCH 08/17] bdd: move output format computation into response --- test/bdd/steps/http_responses.py | 4 +++ test/bdd/steps/steps_api_queries.py | 43 ++++------------------------- 2 files changed, 9 insertions(+), 38 deletions(-) diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py index c3cd4f52..161e29fd 100644 --- a/test/bdd/steps/http_responses.py +++ b/test/bdd/steps/http_responses.py @@ -40,6 +40,10 @@ class GenericResponse: """ Common base class for all API responses. """ def __init__(self, page, fmt, errorcode=200): + fmt = fmt.strip() + if fmt == 'jsonv2': + fmt = 'json' + self.page = page self.format = fmt self.errorcode = errorcode diff --git a/test/bdd/steps/steps_api_queries.py b/test/bdd/steps/steps_api_queries.py index ebda5ec6..47dc8ac3 100644 --- a/test/bdd/steps/steps_api_queries.py +++ b/test/bdd/steps/steps_api_queries.py @@ -148,12 +148,7 @@ def website_search_request(context, fmt, query, addr): outp, status = send_api_query('search', params, fmt, context) - if fmt is None or fmt == 'jsonv2 ': - outfmt = 'json' - else: - outfmt = fmt.strip() - - context.response = SearchResponse(outp, outfmt, status) + context.response = SearchResponse(outp, fmt or 'json', status) @when(u'sending (?P\S+ )?reverse coordinates (?P.+)?,(?P.+)?') def website_reverse_request(context, fmt, lat, lon): @@ -165,14 +160,7 @@ def website_reverse_request(context, fmt, lat, lon): outp, status = send_api_query('reverse', params, fmt, context) - if fmt is None: - outfmt = 'xml' - elif fmt == 'jsonv2 ': - outfmt = 'json' - else: - outfmt = fmt.strip() - - context.response = ReverseResponse(outp, outfmt, status) + context.response = ReverseResponse(outp, fmt or 'xml', status) @when(u'sending (?P\S+ )?details query for (?P.*)') def website_details_request(context, fmt, query): @@ -184,42 +172,21 @@ def website_details_request(context, fmt, query): params['place_id'] = query outp, status = send_api_query('details', params, fmt, context) - if fmt is None: - outfmt = 'json' - else: - outfmt = fmt.strip() - - context.response = GenericResponse(outp, outfmt, status) + context.response = GenericResponse(outp, fmt or 'json', status) @when(u'sending (?P\S+ )?lookup query for (?P.*)') def website_lookup_request(context, fmt, query): params = { 'osm_ids' : query } outp, status = send_api_query('lookup', params, fmt, context) - if fmt == 'json ': - outfmt = 'json' - elif fmt == 'jsonv2 ': - outfmt = 'json' - elif fmt == 'geojson ': - outfmt = 'geojson' - elif fmt == 'geocodejson ': - outfmt = 'geocodejson' - else: - outfmt = 'xml' - - context.response = SearchResponse(outp, outfmt, status) + context.response = SearchResponse(outp, fmt or 'xml', status) @when(u'sending (?P\S+ )?status query') def website_status_request(context, fmt): params = {} outp, status = send_api_query('status', params, fmt, context) - if fmt is None: - outfmt = 'text' - else: - outfmt = fmt.strip() - - context.response = StatusResponse(outp, outfmt, status) + context.response = StatusResponse(outp, fmt or 'text', status) @step(u'(?Pless than|more than|exactly|at least|at most) (?P\d+) results? (?:is|are) returned') def validate_result_number(context, operator, number): From 213bf7d19d0e99355954bfacfe0833695f770151 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 10:20:00 +0100 Subject: [PATCH 09/17] bdd: rename db_ops steps Now all files implementing steps are called steps_*.py. --- test/bdd/steps/{db_ops.py => steps_db_ops.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/bdd/steps/{db_ops.py => steps_db_ops.py} (100%) diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/steps_db_ops.py similarity index 100% rename from test/bdd/steps/db_ops.py rename to test/bdd/steps/steps_db_ops.py From 58c471c627a8893413f9734ff36c3fdc00827e6e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 10:39:44 +0100 Subject: [PATCH 10/17] bdd: remove class for lazy formatting assert in combination with format() does the right thing and calls the __str__() method only when an assertion hits. --- test/bdd/steps/steps_db_ops.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 078e29fe..67ebcd3e 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -92,14 +92,6 @@ class PlaceColumn: self.geometry) cursor.execute(query, list(self.columns.values())) -class LazyFmt(object): - - def __init__(self, fmtstr, *args): - self.fmt = fmtstr - self.args = args - - def __str__(self): - return self.fmt % self.args class PlaceObjName(object): @@ -125,15 +117,15 @@ class PlaceObjName(object): def compare_place_id(expected, result, column, context): if expected == '0': assert result == 0, \ - LazyFmt("Bad place id in column %s. Expected: 0, got: %s.", + "Bad place id in column {}. Expected: 0, got: {!s}.".format( column, PlaceObjName(result, context.db)) elif expected == '-': assert result is None, \ - LazyFmt("bad place id in column %s: %s.", + "Bad place id in column {}: {!s}.".format( column, PlaceObjName(result, context.db)) else: assert NominatimID(expected).get_place_id(context.db.cursor()) == result, \ - LazyFmt("Bad place id in column %s. Expected: %s, got: %s.", + "Bad place id in column {}. Expected: {}, got: {!s}.".format( column, expected, PlaceObjName(result, context.db)) def check_database_integrity(context): From 5dfa76a610972eea3930ecff1c41fb795ea2e7e4 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 11:22:43 +0100 Subject: [PATCH 11/17] bdd: switch to auto commit mode Put the connection to the test database into auto-commit mode and get rid of the explicit commits. Also use cursors always in context managers and unify the two implementations that copy data from the place table. --- test/bdd/steps/nominatim_environment.py | 23 + test/bdd/steps/steps_db_ops.py | 563 +++++++++++------------- test/bdd/steps/steps_osm_data.py | 12 +- 3 files changed, 289 insertions(+), 309 deletions(-) diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 7741d667..2547bbe2 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -179,6 +179,7 @@ class NominatimEnvironment: cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db)) conn.close() context.db = self.connect_database(self.test_db) + context.db.autocommit = True psycopg2.extras.register_hstore(context.db, globally=False) def teardown_db(self, context): @@ -215,3 +216,25 @@ class NominatimEnvironment: cwd = self.build_dir run_script(cmd, cwd=cwd, env=self.test_env) + + def copy_from_place(self, db): + """ Copy data from place to the placex and location_property_osmline + tables invoking the appropriate triggers. + """ + self.run_setup_script('create-functions', 'create-partition-functions') + + with db.cursor() as cur: + cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type, + name, admin_level, address, + extratags, geometry) + SELECT osm_type, osm_id, class, type, + name, admin_level, address, + extratags, geometry + FROM place + WHERE not (class='place' and type='houses' and osm_type='W')""") + cur.execute("""INSERT INTO location_property_osmline (osm_id, address, linegeo) + SELECT osm_id, address, geometry + FROM place + WHERE class='place' and type='houses' + and osm_type='W' + and ST_GeometryType(geometry) = 'ST_LineString'""") diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 67ebcd3e..97f9055b 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -218,7 +218,7 @@ def assert_db_column(row, column, value, context): "Row '%s': expected: %s, got: %s" % (column, value, str(row[column])) -################################ STEPS ################################## +################################ GIVEN ################################## @given(u'the scene (?P.+)') def set_default_scene(context, scene): @@ -226,83 +226,72 @@ def set_default_scene(context, scene): @given("the (?Pnamed )?places") def add_data_to_place_table(context, named): - cur = context.db.cursor() - cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert') - for r in context.table: - col = PlaceColumn(context, named is not None) + with context.db.cursor() as cur: + cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert') + for r in context.table: + col = PlaceColumn(context, named is not None) - for h in r.headings: - col.add(h, r[h]) + for h in r.headings: + col.add(h, r[h]) - col.db_insert(cur) - cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert') - cur.close() - context.db.commit() + col.db_insert(cur) + cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert') @given("the relations") def add_data_to_planet_relations(context): - cur = context.db.cursor() - for r in context.table: - last_node = 0 - last_way = 0 - parts = [] - if r['members']: - members = [] - for m in r['members'].split(','): - mid = NominatimID(m) - if mid.typ == 'N': - parts.insert(last_node, int(mid.oid)) - last_node += 1 - last_way += 1 - elif mid.typ == 'W': - parts.insert(last_way, int(mid.oid)) - last_way += 1 - else: - parts.append(int(mid.oid)) + with context.db.cursor() as cur: + for r in context.table: + last_node = 0 + last_way = 0 + parts = [] + if r['members']: + members = [] + for m in r['members'].split(','): + mid = NominatimID(m) + if mid.typ == 'N': + parts.insert(last_node, int(mid.oid)) + last_node += 1 + last_way += 1 + elif mid.typ == 'W': + parts.insert(last_way, int(mid.oid)) + last_way += 1 + else: + parts.append(int(mid.oid)) - members.extend((mid.typ.lower() + mid.oid, mid.cls or '')) - else: - members = None + members.extend((mid.typ.lower() + mid.oid, mid.cls or '')) + else: + members = None - tags = [] - for h in r.headings: - if h.startswith("tags+"): - tags.extend((h[5:], r[h])) + tags = [] + for h in r.headings: + if h.startswith("tags+"): + tags.extend((h[5:], r[h])) - cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags) - VALUES (%s, %s, %s, %s, %s, %s)""", - (r['id'], last_node, last_way, parts, members, tags)) - context.db.commit() + cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags) + VALUES (%s, %s, %s, %s, %s, %s)""", + (r['id'], last_node, last_way, parts, members, tags)) @given("the ways") def add_data_to_planet_ways(context): - cur = context.db.cursor() - for r in context.table: - tags = [] - for h in r.headings: - if h.startswith("tags+"): - tags.extend((h[5:], r[h])) + with context.db.cursor() as cur: + for r in context.table: + tags = [] + for h in r.headings: + if h.startswith("tags+"): + tags.extend((h[5:], r[h])) - nodes = [ int(x.strip()) for x in r['nodes'].split(',') ] + nodes = [ int(x.strip()) for x in r['nodes'].split(',') ] - cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)", - (r['id'], nodes, tags)) - context.db.commit() + cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)", + (r['id'], nodes, tags)) + +################################ WHEN ################################## @when("importing") def import_and_index_data_from_place_table(context): - context.nominatim.run_setup_script('create-functions', 'create-partition-functions') - cur = context.db.cursor() - cur.execute( - """insert into placex (osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry - from place where not (class='place' and type='houses' and osm_type='W')""") - cur.execute( - """insert into location_property_osmline (osm_id, address, linegeo) - SELECT osm_id, address, geometry from place - WHERE class='place' and type='houses' and osm_type='W' - and ST_GeometryType(geometry) = 'ST_LineString'""") - context.db.commit() + """ Import data previously set up in the place table. + """ + context.nominatim.copy_from_place(context.db) context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') check_database_integrity(context) @@ -310,24 +299,21 @@ def import_and_index_data_from_place_table(context): def update_place_table(context): context.nominatim.run_setup_script( 'create-functions', 'create-partition-functions', 'enable-diff-updates') - cur = context.db.cursor() - for r in context.table: - col = PlaceColumn(context, False) + with context.db.cursor() as cur: + for r in context.table: + col = PlaceColumn(context, False) - for h in r.headings: - col.add(h, r[h]) + for h in r.headings: + col.add(h, r[h]) - col.db_insert(cur) + col.db_insert(cur) - context.db.commit() + while True: + context.nominatim.run_update_script('index') - while True: - context.nominatim.run_update_script('index') - - cur = context.db.cursor() - cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") - if cur.rowcount == 0: - break + cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") + if cur.rowcount == 0: + break check_database_integrity(context) @@ -339,277 +325,258 @@ def update_postcodes(context): def delete_places(context, oids): context.nominatim.run_setup_script( 'create-functions', 'create-partition-functions', 'enable-diff-updates') - cur = context.db.cursor() - for oid in oids.split(','): - where, params = NominatimID(oid).table_select() - cur.execute("DELETE FROM place WHERE " + where, params) - context.db.commit() + with context.db.cursor() as cur: + for oid in oids.split(','): + where, params = NominatimID(oid).table_select() + cur.execute("DELETE FROM place WHERE " + where, params) while True: context.nominatim.run_update_script('index') - cur = context.db.cursor() - cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") - if cur.rowcount == 0: - break + with context.db.cursor() as cur: + cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") + if cur.rowcount == 0: + break + +################################ THEN ################################## @then("placex contains(?P exactly)?") def check_placex_contents(context, exact): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + expected_content = set() + for row in context.table: + nid = NominatimID(row['object']) + where, params = nid.table_select() + cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt, + ST_X(centroid) as cx, ST_Y(centroid) as cy + FROM placex where %s""" % where, + params) + assert cur.rowcount > 0, "No rows found for " + row['object'] - expected_content = set() - for row in context.table: - nid = NominatimID(row['object']) - where, params = nid.table_select() - cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt, - ST_X(centroid) as cx, ST_Y(centroid) as cy - FROM placex where %s""" % where, - params) - assert cur.rowcount > 0, "No rows found for " + row['object'] - - for res in cur: - if exact: - expected_content.add((res['osm_type'], res['osm_id'], res['class'])) - for h in row.headings: - if h in ('extratags', 'address'): - if row[h] == '-': - assert res[h] is None + for res in cur: + if exact: + expected_content.add((res['osm_type'], res['osm_id'], res['class'])) + for h in row.headings: + if h in ('extratags', 'address'): + if row[h] == '-': + assert res[h] is None + else: + vdict = eval('{' + row[h] + '}') + assert vdict == res[h] + elif h.startswith('name'): + name = h[5:] if h.startswith('name+') else 'name' + assert name in res['name'] + assert res['name'][name] == row[h] + elif h.startswith('extratags+'): + assert res['extratags'][h[10:]] == row[h] + elif h.startswith('addr+'): + if row[h] == '-': + if res['address'] is not None: + assert h[5:] not in res['address'] + else: + assert h[5:] in res['address'], "column " + h + assert res['address'][h[5:]] == row[h], "column %s" % h + elif h in ('linked_place_id', 'parent_place_id'): + compare_place_id(row[h], res[h], h, context) else: - vdict = eval('{' + row[h] + '}') - assert vdict == res[h] - elif h.startswith('name'): - name = h[5:] if h.startswith('name+') else 'name' - assert name in res['name'] - assert res['name'][name] == row[h] - elif h.startswith('extratags+'): - assert res['extratags'][h[10:]] == row[h] - elif h.startswith('addr+'): - if row[h] == '-': - if res['address'] is not None: - assert h[5:] not in res['address'] - else: - assert h[5:] in res['address'], "column " + h - assert res['address'][h[5:]] == row[h], "column %s" % h - elif h in ('linked_place_id', 'parent_place_id'): - compare_place_id(row[h], res[h], h, context) - else: - assert_db_column(res, h, row[h], context) + assert_db_column(res, h, row[h], context) - if exact: - cur.execute('SELECT osm_type, osm_id, class from placex') - assert expected_content == set([(r[0], r[1], r[2]) for r in cur]) - - context.db.commit() + if exact: + cur.execute('SELECT osm_type, osm_id, class from placex') + assert expected_content == set([(r[0], r[1], r[2]) for r in cur]) @then("place contains(?P exactly)?") def check_placex_contents(context, exact): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + expected_content = set() + for row in context.table: + nid = NominatimID(row['object']) + where, params = nid.table_select() + cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt, + ST_GeometryType(geometry) as geometrytype + FROM place where %s""" % where, + params) + assert cur.rowcount > 0, "No rows found for " + row['object'] - expected_content = set() - for row in context.table: - nid = NominatimID(row['object']) - where, params = nid.table_select() - cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt, - ST_GeometryType(geometry) as geometrytype - FROM place where %s""" % where, - params) - assert cur.rowcount > 0, "No rows found for " + row['object'] - - for res in cur: - if exact: - expected_content.add((res['osm_type'], res['osm_id'], res['class'])) - for h in row.headings: - msg = "%s: %s" % (row['object'], h) - if h in ('name', 'extratags', 'address'): - if row[h] == '-': - assert res[h] is None, msg + for res in cur: + if exact: + expected_content.add((res['osm_type'], res['osm_id'], res['class'])) + for h in row.headings: + msg = "%s: %s" % (row['object'], h) + if h in ('name', 'extratags', 'address'): + if row[h] == '-': + assert res[h] is None, msg + else: + vdict = eval('{' + row[h] + '}') + assert vdict == res[h], msg + elif h.startswith('name+'): + assert res['name'][h[5:]] == row[h], msg + elif h.startswith('extratags+'): + assert res['extratags'][h[10:]] == row[h], msg + elif h.startswith('addr+'): + if row[h] == '-': + if res['address'] is not None: + assert h[5:] not in res['address'] + else: + assert res['address'][h[5:]] == row[h], msg + elif h in ('linked_place_id', 'parent_place_id'): + compare_place_id(row[h], res[h], h, context) else: - vdict = eval('{' + row[h] + '}') - assert vdict == res[h], msg - elif h.startswith('name+'): - assert res['name'][h[5:]] == row[h], msg - elif h.startswith('extratags+'): - assert res['extratags'][h[10:]] == row[h], msg - elif h.startswith('addr+'): - if row[h] == '-': - if res['address'] is not None: - assert h[5:] not in res['address'] - else: - assert res['address'][h[5:]] == row[h], msg - elif h in ('linked_place_id', 'parent_place_id'): - compare_place_id(row[h], res[h], h, context) - else: - assert_db_column(res, h, row[h], context) + assert_db_column(res, h, row[h], context) - if exact: - cur.execute('SELECT osm_type, osm_id, class from place') - assert expected_content, set([(r[0], r[1], r[2]) for r in cur]) - - context.db.commit() + if exact: + cur.execute('SELECT osm_type, osm_id, class from place') + assert expected_content, set([(r[0], r[1], r[2]) for r in cur]) @then("search_name contains(?P not)?") def check_search_name_contents(context, exclude): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for row in context.table: + pid = NominatimID(row['object']).get_place_id(cur) + cur.execute("""SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy + FROM search_name WHERE place_id = %s""", (pid, )) + assert cur.rowcount > 0, "No rows found for " + row['object'] - for row in context.table: - pid = NominatimID(row['object']).get_place_id(cur) - cur.execute("""SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy - FROM search_name WHERE place_id = %s""", (pid, )) - assert cur.rowcount > 0, "No rows found for " + row['object'] - - for res in cur: - for h in row.headings: - if h in ('name_vector', 'nameaddress_vector'): - terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')] - words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')] - subcur = context.db.cursor() - subcur.execute(""" SELECT word_id, word_token - FROM word, (SELECT unnest(%s::TEXT[]) as term) t - WHERE word_token = make_standard_name(t.term) - and class is null and country_code is null - and operator is null - UNION - SELECT word_id, word_token - FROM word, (SELECT unnest(%s::TEXT[]) as term) t - WHERE word_token = ' ' || make_standard_name(t.term) - and class is null and country_code is null - and operator is null - """, - (terms, words)) - if not exclude: - assert subcur.rowcount >= len(terms) + len(words), \ - "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount) - for wid in subcur: - if exclude: - assert wid[0] not in res[h], "Found term for %s/%s: %s" % (pid, h, wid[1]) - else: - assert wid[0] in res[h], "Missing term for %s/%s: %s" % (pid, h, wid[1]) - else: - assert_db_column(res, h, row[h], context) - - - context.db.commit() + for res in cur: + for h in row.headings: + if h in ('name_vector', 'nameaddress_vector'): + terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')] + words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')] + with context.db.cursor() as subcur: + subcur.execute(""" SELECT word_id, word_token + FROM word, (SELECT unnest(%s::TEXT[]) as term) t + WHERE word_token = make_standard_name(t.term) + and class is null and country_code is null + and operator is null + UNION + SELECT word_id, word_token + FROM word, (SELECT unnest(%s::TEXT[]) as term) t + WHERE word_token = ' ' || make_standard_name(t.term) + and class is null and country_code is null + and operator is null + """, + (terms, words)) + if not exclude: + assert subcur.rowcount >= len(terms) + len(words), \ + "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount) + for wid in subcur: + if exclude: + assert wid[0] not in res[h], "Found term for %s/%s: %s" % (pid, h, wid[1]) + else: + assert wid[0] in res[h], "Missing term for %s/%s: %s" % (pid, h, wid[1]) + else: + assert_db_column(res, h, row[h], context) @then("location_postcode contains exactly") def check_location_postcode(context): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode") + assert cur.rowcount == len(list(context.table)), \ + "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table))) - cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode") - assert cur.rowcount == len(list(context.table)), \ - "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table))) - - table = list(cur) - for row in context.table: - for i in range(len(table)): - if table[i]['country_code'] != row['country'] \ - or table[i]['postcode'] != row['postcode']: - continue - for h in row.headings: - if h not in ('country', 'postcode'): - assert_db_column(table[i], h, row[h], context) + table = list(cur) + for row in context.table: + for i in range(len(table)): + if table[i]['country_code'] != row['country'] \ + or table[i]['postcode'] != row['postcode']: + continue + for h in row.headings: + if h not in ('country', 'postcode'): + assert_db_column(table[i], h, row[h], context) @then("word contains(?P not)?") def check_word_table(context, exclude): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) - - for row in context.table: - wheres = [] - values = [] - for h in row.headings: - wheres.append("%s = %%s" % h) - values.append(row[h]) - cur.execute("SELECT * from word WHERE %s" % ' AND '.join(wheres), values) - if exclude: - assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values) - else: - assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for row in context.table: + wheres = [] + values = [] + for h in row.headings: + wheres.append("%s = %%s" % h) + values.append(row[h]) + cur.execute("SELECT * from word WHERE %s" % ' AND '.join(wheres), values) + if exclude: + assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values) + else: + assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values) @then("place_addressline contains") def check_place_addressline(context): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for row in context.table: + pid = NominatimID(row['object']).get_place_id(cur) + apid = NominatimID(row['address']).get_place_id(cur) + cur.execute(""" SELECT * FROM place_addressline + WHERE place_id = %s AND address_place_id = %s""", + (pid, apid)) + assert cur.rowcount > 0, \ + "No rows found for place %s and address %s" % (row['object'], row['address']) - for row in context.table: - pid = NominatimID(row['object']).get_place_id(cur) - apid = NominatimID(row['address']).get_place_id(cur) - cur.execute(""" SELECT * FROM place_addressline - WHERE place_id = %s AND address_place_id = %s""", - (pid, apid)) - assert cur.rowcount > 0, \ - "No rows found for place %s and address %s" % (row['object'], row['address']) - - for res in cur: - for h in row.headings: - if h not in ('address', 'object'): - assert_db_column(res, h, row[h], context) - - context.db.commit() + for res in cur: + for h in row.headings: + if h not in ('address', 'object'): + assert_db_column(res, h, row[h], context) @then("place_addressline doesn't contain") def check_place_addressline_exclude(context): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) - - for row in context.table: - pid = NominatimID(row['object']).get_place_id(cur) - apid = NominatimID(row['address']).get_place_id(cur) - cur.execute(""" SELECT * FROM place_addressline - WHERE place_id = %s AND address_place_id = %s""", - (pid, apid)) - assert cur.rowcount == 0, \ - "Row found for place %s and address %s" % (row['object'], row['address']) - - context.db.commit() + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for row in context.table: + pid = NominatimID(row['object']).get_place_id(cur) + apid = NominatimID(row['address']).get_place_id(cur) + cur.execute(""" SELECT * FROM place_addressline + WHERE place_id = %s AND address_place_id = %s""", + (pid, apid)) + assert cur.rowcount == 0, \ + "Row found for place %s and address %s" % (row['object'], row['address']) @then("(?P\w+) expands to(?P no)? interpolation") def check_location_property_osmline(context, oid, neg): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) nid = NominatimID(oid) assert 'W' == nid.typ, "interpolation must be a way" - cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt - FROM location_property_osmline - WHERE osm_id = %s AND startnumber IS NOT NULL""", - (nid.oid, )) + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt + FROM location_property_osmline + WHERE osm_id = %s AND startnumber IS NOT NULL""", + (nid.oid, )) - if neg: - assert cur.rowcount == 0 - return + if neg: + assert cur.rowcount == 0 + return - todo = list(range(len(list(context.table)))) - for res in cur: - for i in todo: - row = context.table[i] - if (int(row['start']) == res['startnumber'] - and int(row['end']) == res['endnumber']): - todo.remove(i) - break - else: - assert False, "Unexpected row %s" % (str(res)) - - for h in row.headings: - if h in ('start', 'end'): - continue - elif h == 'parent_place_id': - compare_place_id(row[h], res[h], h, context) + todo = list(range(len(list(context.table)))) + for res in cur: + for i in todo: + row = context.table[i] + if (int(row['start']) == res['startnumber'] + and int(row['end']) == res['endnumber']): + todo.remove(i) + break else: - assert_db_column(res, h, row[h], context) + assert False, "Unexpected row %s" % (str(res)) - assert not todo + for h in row.headings: + if h in ('start', 'end'): + continue + elif h == 'parent_place_id': + compare_place_id(row[h], res[h], h, context) + else: + assert_db_column(res, h, row[h], context) + + assert not todo @then("(?Pplacex|place) has no entry for (?P.*)") def check_placex_has_entry(context, table, oid): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) - nid = NominatimID(oid) - where, params = nid.table_select() - cur.execute("SELECT * FROM %s where %s" % (table, where), params) - assert cur.rowcount == 0 - context.db.commit() + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + nid = NominatimID(oid) + where, params = nid.table_select() + cur.execute("SELECT * FROM %s where %s" % (table, where), params) + assert cur.rowcount == 0 @then("search_name has no entry for (?P.*)") def check_search_name_has_entry(context, oid): - cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) - pid = NominatimID(oid).get_place_id(cur) - cur.execute("SELECT * FROM search_name WHERE place_id = %s", (pid, )) - assert cur.rowcount == 0 - context.db.commit() + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + pid = NominatimID(oid).get_place_id(cur) + cur.execute("SELECT * FROM search_name WHERE place_id = %s", (pid, )) + assert cur.rowcount == 0 diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index db208fe8..f8a6d3cd 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -70,17 +70,7 @@ def update_from_osm_file(context): The data is expected as attached text in OPL format. """ - context.nominatim.run_setup_script('create-functions', 'create-partition-functions') - - cur = context.db.cursor() - cur.execute("""insert into placex (osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry from place""") - cur.execute( - """insert into location_property_osmline (osm_id, address, linegeo) - SELECT osm_id, address, geometry from place - WHERE class='place' and type='houses' and osm_type='W' - and ST_GeometryType(geometry) = 'ST_LineString'""") - context.db.commit() + context.nominatim.copy_from_place(context.db) context.nominatim.run_setup_script('index', 'index-noanalyse') context.nominatim.run_setup_script('create-functions', 'create-partition-functions', 'enable-diff-updates') From b8e39d2dde4ffc0d202fc60af920b54b054601d7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 11:26:39 +0100 Subject: [PATCH 12/17] bdd: move scene setup to OSM data steps The step has nothing to do with the database. --- test/bdd/steps/steps_db_ops.py | 4 ---- test/bdd/steps/steps_osm_data.py | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 97f9055b..19429999 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -220,10 +220,6 @@ def assert_db_column(row, column, value, context): ################################ GIVEN ################################## -@given(u'the scene (?P.+)') -def set_default_scene(context, scene): - context.scene = scene - @given("the (?Pnamed )?places") def add_data_to_place_table(context, named): with context.db.cursor() as cur: diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index f8a6d3cd..3858198b 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -22,6 +22,10 @@ def write_opl_file(opl, grid): return fd.name +@given(u'the scene (?P.+)') +def set_default_scene(context, scene): + context.scene = scene + @given(u'the ([0-9.]+ )?grid') def define_node_grid(context, grid_step): """ From 197870e67a82505d52fb2fd45db638435af956b0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 12:12:59 +0100 Subject: [PATCH 13/17] bdd: move place table inserter into separate file Also simplifies usage by implementing a function that inserts a complete table row. --- test/bdd/steps/place_inserter.py | 105 +++++++++++++++++++++++++++++ test/bdd/steps/steps_db_ops.py | 109 ++----------------------------- 2 files changed, 110 insertions(+), 104 deletions(-) create mode 100644 test/bdd/steps/place_inserter.py diff --git a/test/bdd/steps/place_inserter.py b/test/bdd/steps/place_inserter.py new file mode 100644 index 00000000..90f071b2 --- /dev/null +++ b/test/bdd/steps/place_inserter.py @@ -0,0 +1,105 @@ +""" +Helper classes for filling the place table. +""" +import random +import string + +class PlaceColumn: + """ Helper class to collect contents from a behave table row and + insert it into the place table. + """ + def __init__(self, context): + self.columns = {'admin_level' : 15} + self.context = context + self.geometry = None + + def add_row(self, row, force_name): + """ Parse the content from the given behave row as place column data. + """ + for name, value in zip(row.headings, row.cells): + self._add(name, value) + + assert 'osm_type' in self.columns, "osm column missing" + + if force_name and 'name' not in self.columns: + self._add_hstore('name', 'name', + ''.join(random.choice(string.printable) + for _ in range(int(random.random()*30)))) + + return self + + def _add(self, key, value): + if hasattr(self, '_set_key_' + key): + getattr(self, '_set_key_' + key)(value) + elif key.startswith('name+'): + self._add_hstore('name', key[5:], value) + elif key.startswith('extra+'): + self._add_hstore('extratags', key[6:], value) + elif key.startswith('addr+'): + self._add_hstore('address', key[5:], value) + elif key in ('name', 'address', 'extratags'): + self.columns[key] = eval('{' + value + '}') + else: + assert key in ('class', 'type'), "Unknown column '{}'.".format(key) + self.columns[key] = None if value == '' else value + + def _set_key_name(self, value): + self._add_hstore('name', 'name', value) + + def _set_key_osm(self, value): + assert value[0] in 'NRW' and value[1:].isdigit(), \ + "OSM id needs to be of format ." + + self.columns['osm_type'] = value[0] + self.columns['osm_id'] = int(value[1:]) + + def _set_key_admin(self, value): + self.columns['admin_level'] = int(value) + + def _set_key_housenr(self, value): + if value: + self._add_hstore('address', 'housenumber', value) + + def _set_key_postcode(self, value): + if value: + self._add_hstore('address', 'postcode', value) + + def _set_key_street(self, value): + if value: + self._add_hstore('address', 'street', value) + + def _set_key_addr_place(self, value): + if value: + self._add_hstore('address', 'place', value) + + def _set_key_country(self, value): + if value: + self._add_hstore('address', 'country', value) + + def _set_key_geometry(self, value): + self.geometry = self.context.osm.parse_geometry(value, self.context.scene) + assert self.geometry is not None, "Bad geometry: {}".format(value) + + def _add_hstore(self, column, key, value): + if column in self.columns: + self.columns[column][key] = value + else: + self.columns[column] = {key: value} + + def db_insert(self, cursor): + """ Insert the collected data into the database. + """ + if self.columns['osm_type'] == 'N' and self.geometry is None: + pt = self.context.osm.grid_node(self.columns['osm_id']) + if pt is None: + pt = (random.random()*360 - 180, random.random()*180 - 90) + + self.geometry = "ST_SetSRID(ST_Point(%f, %f), 4326)" % pt + else: + assert self.geometry is not None, "Geometry missing" + + query = 'INSERT INTO place ({}, geometry) values({}, {})'.format( + ','.join(self.columns.keys()), + ','.join(['%s' for x in range(len(self.columns))]), + self.geometry) + cursor.execute(query, list(self.columns.values())) diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 19429999..8f20cdaa 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -1,97 +1,8 @@ -import base64 -import random -import string import re import psycopg2.extras from check_functions import Almost - -class PlaceColumn: - - def __init__(self, context, force_name): - self.columns = { 'admin_level' : 15} - self.force_name = force_name - self.context = context - self.geometry = None - - def add(self, key, value): - if hasattr(self, 'set_key_' + key): - getattr(self, 'set_key_' + key)(value) - elif key.startswith('name+'): - self.add_hstore('name', key[5:], value) - elif key.startswith('extra+'): - self.add_hstore('extratags', key[6:], value) - elif key.startswith('addr+'): - self.add_hstore('address', key[5:], value) - elif key in ('name', 'address', 'extratags'): - self.columns[key] = eval('{' + value + '}') - else: - assert key in ('class', 'type') - self.columns[key] = None if value == '' else value - - def set_key_name(self, value): - self.add_hstore('name', 'name', value) - - def set_key_osm(self, value): - assert value[0] in 'NRW' - assert value[1:].isdigit() - - self.columns['osm_type'] = value[0] - self.columns['osm_id'] = int(value[1:]) - - def set_key_admin(self, value): - self.columns['admin_level'] = int(value) - - def set_key_housenr(self, value): - if value: - self.add_hstore('address', 'housenumber', value) - - def set_key_postcode(self, value): - if value: - self.add_hstore('address', 'postcode', value) - - def set_key_street(self, value): - if value: - self.add_hstore('address', 'street', value) - - def set_key_addr_place(self, value): - if value: - self.add_hstore('address', 'place', value) - - def set_key_country(self, value): - if value: - self.add_hstore('address', 'country', value) - - def set_key_geometry(self, value): - self.geometry = self.context.osm.parse_geometry(value, self.context.scene) - assert self.geometry is not None - - def add_hstore(self, column, key, value): - if column in self.columns: - self.columns[column][key] = value - else: - self.columns[column] = { key : value } - - def db_insert(self, cursor): - assert 'osm_type' in self.columns - if self.force_name and 'name' not in self.columns: - self.add_hstore('name', 'name', ''.join(random.choice(string.printable) - for _ in range(int(random.random()*30)))) - - if self.columns['osm_type'] == 'N' and self.geometry is None: - pt = self.context.osm.grid_node(self.columns['osm_id']) - if pt is None: - pt = (random.random()*360 - 180, random.random()*180 - 90) - - self.geometry = "ST_SetSRID(ST_Point(%f, %f), 4326)" % pt - else: - assert self.geometry is not None, "Geometry missing" - query = 'INSERT INTO place (%s, geometry) values(%s, %s)' % ( - ','.join(self.columns.keys()), - ','.join(['%s' for x in range(len(self.columns))]), - self.geometry) - cursor.execute(query, list(self.columns.values())) - +from place_inserter import PlaceColumn class PlaceObjName(object): @@ -224,13 +135,8 @@ def assert_db_column(row, column, value, context): def add_data_to_place_table(context, named): with context.db.cursor() as cur: cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert') - for r in context.table: - col = PlaceColumn(context, named is not None) - - for h in r.headings: - col.add(h, r[h]) - - col.db_insert(cur) + for row in context.table: + PlaceColumn(context).add_row(row, named is not None).db_insert(cur) cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert') @given("the relations") @@ -296,13 +202,8 @@ def update_place_table(context): context.nominatim.run_setup_script( 'create-functions', 'create-partition-functions', 'enable-diff-updates') with context.db.cursor() as cur: - for r in context.table: - col = PlaceColumn(context, False) - - for h in r.headings: - col.add(h, r[h]) - - col.db_insert(cur) + for row in context.table: + PlaceColumn(context).add_row(row, False).db_insert(cur) while True: context.nominatim.run_update_script('index') From 25557e5f14734aa597dbe32293be25d2c539531c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 15:17:46 +0100 Subject: [PATCH 14/17] bdd: factor out reindexing on updates --- test/bdd/steps/nominatim_environment.py | 14 ++++++++++++++ test/bdd/steps/steps_db_ops.py | 16 ++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 2547bbe2..7013a20e 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -191,6 +191,20 @@ class NominatimEnvironment: if not self.keep_scenario_db: self.db_drop_database(self.test_db) + def reindex_placex(self, db): + """ Run the indexing step until all data in the placex has + been processed. Indexing during updates can produce more data + to index under some circumstances. That is why indexing may have + to be run multiple times. + """ + with db.cursor() as cur: + while True: + self.run_update_script('index') + + cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") + if cur.rowcount == 0: + return + def run_setup_script(self, *args, **kwargs): """ Run the Nominatim setup script with the given arguments. """ diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 8f20cdaa..6bee88c1 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -205,13 +205,7 @@ def update_place_table(context): for row in context.table: PlaceColumn(context).add_row(row, False).db_insert(cur) - while True: - context.nominatim.run_update_script('index') - - cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") - if cur.rowcount == 0: - break - + context.nominatim.reindex_placex(context.db) check_database_integrity(context) @when("updating postcodes") @@ -227,13 +221,7 @@ def delete_places(context, oids): where, params = NominatimID(oid).table_select() cur.execute("DELETE FROM place WHERE " + where, params) - while True: - context.nominatim.run_update_script('index') - - with context.db.cursor() as cur: - cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") - if cur.rowcount == 0: - break + context.nominatim.reindex_placex(context.db) ################################ THEN ################################## From d586b95ff11a45ab6223e83e1b0540f1db87b325 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Jan 2021 16:00:48 +0100 Subject: [PATCH 15/17] bdd: move nominitim id reader to separate file --- test/bdd/steps/steps_db_ops.py | 95 ++++++++------------------------- test/bdd/steps/table_compare.py | 62 +++++++++++++++++++++ 2 files changed, 84 insertions(+), 73 deletions(-) create mode 100644 test/bdd/steps/table_compare.py diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 6bee88c1..0df9fec2 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -3,6 +3,7 @@ import psycopg2.extras from check_functions import Almost from place_inserter import PlaceColumn +from table_compare import NominatimID class PlaceObjName(object): @@ -51,53 +52,6 @@ def check_database_integrity(context): assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline" -class NominatimID: - """ Splits a unique identifier for places into its components. - As place_ids cannot be used for testing, we use a unique - identifier instead that is of the form [:]. - """ - - id_regex = re.compile(r"(?P[NRW])(?P\d+)(:(?P\w+))?") - - def __init__(self, oid): - self.typ = self.oid = self.cls = None - - if oid is not None: - m = self.id_regex.fullmatch(oid) - assert m is not None, "ID '%s' not of form [:]" % oid - - self.typ = m.group('tp') - self.oid = m.group('id') - self.cls = m.group('cls') - - def __str__(self): - if self.cls is None: - return self.typ + self.oid - - return '%s%d:%s' % (self.typ, self.oid, self.cls) - - def table_select(self): - """ Return where clause and parameter list to select the object - from a Nominatim table. - """ - where = 'osm_type = %s and osm_id = %s' - params = [self.typ, self. oid] - - if self.cls is not None: - where += ' and class = %s' - params.append(self.cls) - - return where, params - - def get_place_id(self, cur): - where, params = self.table_select() - cur.execute("SELECT place_id FROM placex WHERE %s" % where, params) - assert cur.rowcount == 1, \ - "Expected exactly 1 entry in placex for %s found %s" % (str(self), cur.rowcount) - - return cur.fetchone()[0] - - def assert_db_column(row, column, value, context): if column == 'object': return @@ -218,8 +172,7 @@ def delete_places(context, oids): 'create-functions', 'create-partition-functions', 'enable-diff-updates') with context.db.cursor() as cur: for oid in oids.split(','): - where, params = NominatimID(oid).table_select() - cur.execute("DELETE FROM place WHERE " + where, params) + NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}') context.nominatim.reindex_placex(context.db) @@ -230,12 +183,10 @@ def check_placex_contents(context, exact): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: expected_content = set() for row in context.table: - nid = NominatimID(row['object']) - where, params = nid.table_select() - cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt, - ST_X(centroid) as cx, ST_Y(centroid) as cy - FROM placex where %s""" % where, - params) + NominatimID(row['object']).query_osm_id(cur, + """SELECT *, ST_AsText(geometry) as geomtxt, + ST_X(centroid) as cx, ST_Y(centroid) as cy + FROM placex WHERE {}""") assert cur.rowcount > 0, "No rows found for " + row['object'] for res in cur: @@ -275,12 +226,10 @@ def check_placex_contents(context, exact): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: expected_content = set() for row in context.table: - nid = NominatimID(row['object']) - where, params = nid.table_select() - cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt, - ST_GeometryType(geometry) as geometrytype - FROM place where %s""" % where, - params) + NominatimID(row['object']).query_osm_id(cur, + """SELECT *, ST_AsText(geometry) as geomtxt, + ST_GeometryType(geometry) as geometrytype + FROM place WHERE {}""") assert cur.rowcount > 0, "No rows found for " + row['object'] for res in cur: @@ -317,9 +266,9 @@ def check_placex_contents(context, exact): def check_search_name_contents(context, exclude): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: - pid = NominatimID(row['object']).get_place_id(cur) - cur.execute("""SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy - FROM search_name WHERE place_id = %s""", (pid, )) + NominatimID(row['object']).query_place_id(cur, + """SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy + FROM search_name WHERE place_id = %s""") assert cur.rowcount > 0, "No rows found for " + row['object'] for res in cur: @@ -346,9 +295,9 @@ def check_search_name_contents(context, exclude): "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount) for wid in subcur: if exclude: - assert wid[0] not in res[h], "Found term for %s/%s: %s" % (pid, h, wid[1]) + assert wid[0] not in res[h], "Found term for %s/%s: %s" % (row['object'], h, wid[1]) else: - assert wid[0] in res[h], "Missing term for %s/%s: %s" % (pid, h, wid[1]) + assert wid[0] in res[h], "Missing term for %s/%s: %s" % (row['object'], h, wid[1]) else: assert_db_column(res, h, row[h], context) @@ -454,14 +403,14 @@ def check_location_property_osmline(context, oid, neg): @then("(?P
placex|place) has no entry for (?P.*)") def check_placex_has_entry(context, table, oid): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - nid = NominatimID(oid) - where, params = nid.table_select() - cur.execute("SELECT * FROM %s where %s" % (table, where), params) - assert cur.rowcount == 0 + NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table) + assert cur.rowcount == 0, \ + "Found {} entries for ID {}".format(cur.rowcount, oid) @then("search_name has no entry for (?P.*)") def check_search_name_has_entry(context, oid): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - pid = NominatimID(oid).get_place_id(cur) - cur.execute("SELECT * FROM search_name WHERE place_id = %s", (pid, )) - assert cur.rowcount == 0 + NominatimID(oid).query_place_id(cur, + "SELECT * FROM search_name WHERE place_id = %s") + assert cur.rowcount == 0, \ + "Found {} entries for ID {}".format(cur.rowcount, oid) diff --git a/test/bdd/steps/table_compare.py b/test/bdd/steps/table_compare.py new file mode 100644 index 00000000..dfc261d4 --- /dev/null +++ b/test/bdd/steps/table_compare.py @@ -0,0 +1,62 @@ +""" +Functions to facilitate accessing and comparing the content of DB tables. +""" +import re + +ID_REGEX = re.compile(r"(?P[NRW])(?P\d+)(:(?P\w+))?") + +class NominatimID: + """ Splits a unique identifier for places into its components. + As place_ids cannot be used for testing, we use a unique + identifier instead that is of the form [:]. + """ + + def __init__(self, oid): + self.typ = self.oid = self.cls = None + + if oid is not None: + m = ID_REGEX.fullmatch(oid) + assert m is not None, \ + "ID '{}' not of form [:]".format(oid) + + self.typ = m.group('typ') + self.oid = m.group('oid') + self.cls = m.group('cls') + + def __str__(self): + if self.cls is None: + return self.typ + self.oid + + return '{self.typ}{self.oid}:{self.cls}'.format(self=self) + + def query_osm_id(self, cur, query): + """ Run a query on cursor `cur` using osm ID, type and class. The + `query` string must contain exactly one placeholder '{}' where + the 'where' query should go. + """ + where = 'osm_type = %s and osm_id = %s' + params = [self.typ, self. oid] + + if self.cls is not None: + where += ' and class = %s' + params.append(self.cls) + + return cur.execute(query.format(where), params) + + def query_place_id(self, cur, query): + """ Run a query on cursor `cur` using the place ID. The `query` string + must contain exactly one placeholder '%s' where the 'where' query + should go. + """ + pid = self.get_place_id(cur) + return cur.execute(query, (pid, )) + + def get_place_id(self, cur): + """ Look up the place id for the ID. Throws an assertion if the ID + is not unique. + """ + self.query_osm_id(cur, "SELECT place_id FROM placex WHERE {}") + assert cur.rowcount == 1, \ + "Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount) + + return cur.fetchone()[0] From 1f29475fa51455363f8cc7b55bec0e801afc638b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 6 Jan 2021 12:28:09 +0100 Subject: [PATCH 16/17] bdd: move column comparison in separate file Introduces a new class DBRow that encapsulates the comparison functions. This also is responsible for formatting more informative assert messages. place and placex steps are unified. --- test/bdd/steps/steps_db_ops.py | 211 +++++++------------------------- test/bdd/steps/table_compare.py | 159 +++++++++++++++++++++++- 2 files changed, 194 insertions(+), 176 deletions(-) diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 0df9fec2..178e1a28 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -1,44 +1,8 @@ -import re import psycopg2.extras -from check_functions import Almost from place_inserter import PlaceColumn -from table_compare import NominatimID +from table_compare import NominatimID, DBRow -class PlaceObjName(object): - - def __init__(self, placeid, conn): - self.pid = placeid - self.conn = conn - - def __str__(self): - if self.pid is None: - return "" - - if self.pid == 0: - return "place ID 0" - - cur = self.conn.cursor() - cur.execute("""SELECT osm_type, osm_id, class - FROM placex WHERE place_id = %s""", - (self.pid, )) - assert cur.rowcount == 1, "No entry found for place id %s" % self.pid - - return "%s%s:%s" % cur.fetchone() - -def compare_place_id(expected, result, column, context): - if expected == '0': - assert result == 0, \ - "Bad place id in column {}. Expected: 0, got: {!s}.".format( - column, PlaceObjName(result, context.db)) - elif expected == '-': - assert result is None, \ - "Bad place id in column {}: {!s}.".format( - column, PlaceObjName(result, context.db)) - else: - assert NominatimID(expected).get_place_id(context.db.cursor()) == result, \ - "Bad place id in column {}. Expected: {}, got: {!s}.".format( - column, expected, PlaceObjName(result, context.db)) def check_database_integrity(context): """ Check some generic constraints on the tables. @@ -52,37 +16,6 @@ def check_database_integrity(context): assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline" -def assert_db_column(row, column, value, context): - if column == 'object': - return - - if column.startswith('centroid'): - if value == 'in geometry': - query = """SELECT ST_Within(ST_SetSRID(ST_Point({}, {}), 4326), - ST_SetSRID('{}'::geometry, 4326))""".format( - row['cx'], row['cy'], row['geomtxt']) - cur = context.db.cursor() - cur.execute(query) - assert cur.fetchone()[0], "(Row %s failed: %s)" % (column, query) - else: - fac = float(column[9:]) if column.startswith('centroid*') else 1.0 - x, y = value.split(' ') - assert Almost(float(x) * fac) == row['cx'], "Bad x coordinate" - assert Almost(float(y) * fac) == row['cy'], "Bad y coordinate" - elif column == 'geometry': - geom = context.osm.parse_geometry(value, context.scene) - cur = context.db.cursor() - query = "SELECT ST_Equals(ST_SnapToGrid(%s, 0.00001, 0.00001), ST_SnapToGrid(ST_SetSRID('%s'::geometry, 4326), 0.00001, 0.00001))" % ( - geom, row['geomtxt'],) - cur.execute(query) - assert cur.fetchone()[0], "(Row %s failed: %s)" % (column, query) - elif value == '-': - assert row[column] is None, "Row %s" % column - else: - assert value == str(row[column]), \ - "Row '%s': expected: %s, got: %s" % (column, value, str(row[column])) - - ################################ GIVEN ################################## @given("the (?Pnamed )?places") @@ -178,100 +111,49 @@ def delete_places(context, oids): ################################ THEN ################################## -@then("placex contains(?P exactly)?") -def check_placex_contents(context, exact): +@then("(?P
placex|place) contains(?P exactly)?") +def check_place_contents(context, table, exact): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: expected_content = set() for row in context.table: - NominatimID(row['object']).query_osm_id(cur, - """SELECT *, ST_AsText(geometry) as geomtxt, - ST_X(centroid) as cx, ST_Y(centroid) as cy - FROM placex WHERE {}""") + nid = NominatimID(row['object']) + query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype' + if table == 'placex': + query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy' + query += " FROM %s WHERE {}" % (table, ) + nid.query_osm_id(cur, query) assert cur.rowcount > 0, "No rows found for " + row['object'] for res in cur: if exact: expected_content.add((res['osm_type'], res['osm_id'], res['class'])) - for h in row.headings: - if h in ('extratags', 'address'): - if row[h] == '-': - assert res[h] is None - else: - vdict = eval('{' + row[h] + '}') - assert vdict == res[h] - elif h.startswith('name'): - name = h[5:] if h.startswith('name+') else 'name' - assert name in res['name'] - assert res['name'][name] == row[h] - elif h.startswith('extratags+'): - assert res['extratags'][h[10:]] == row[h] - elif h.startswith('addr+'): - if row[h] == '-': - if res['address'] is not None: - assert h[5:] not in res['address'] - else: - assert h[5:] in res['address'], "column " + h - assert res['address'][h[5:]] == row[h], "column %s" % h - elif h in ('linked_place_id', 'parent_place_id'): - compare_place_id(row[h], res[h], h, context) - else: - assert_db_column(res, h, row[h], context) + + DBRow(nid, res, context).assert_row(row, ['object']) if exact: - cur.execute('SELECT osm_type, osm_id, class from placex') + cur.execute('SELECT osm_type, osm_id, class from {}'.format(table)) assert expected_content == set([(r[0], r[1], r[2]) for r in cur]) -@then("place contains(?P exactly)?") -def check_placex_contents(context, exact): + +@then("(?P
placex|place) has no entry for (?P.*)") +def check_place_has_entry(context, table, oid): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - expected_content = set() - for row in context.table: - NominatimID(row['object']).query_osm_id(cur, - """SELECT *, ST_AsText(geometry) as geomtxt, - ST_GeometryType(geometry) as geometrytype - FROM place WHERE {}""") - assert cur.rowcount > 0, "No rows found for " + row['object'] + NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table) + assert cur.rowcount == 0, \ + "Found {} entries for ID {}".format(cur.rowcount, oid) - for res in cur: - if exact: - expected_content.add((res['osm_type'], res['osm_id'], res['class'])) - for h in row.headings: - msg = "%s: %s" % (row['object'], h) - if h in ('name', 'extratags', 'address'): - if row[h] == '-': - assert res[h] is None, msg - else: - vdict = eval('{' + row[h] + '}') - assert vdict == res[h], msg - elif h.startswith('name+'): - assert res['name'][h[5:]] == row[h], msg - elif h.startswith('extratags+'): - assert res['extratags'][h[10:]] == row[h], msg - elif h.startswith('addr+'): - if row[h] == '-': - if res['address'] is not None: - assert h[5:] not in res['address'] - else: - assert res['address'][h[5:]] == row[h], msg - elif h in ('linked_place_id', 'parent_place_id'): - compare_place_id(row[h], res[h], h, context) - else: - assert_db_column(res, h, row[h], context) - - if exact: - cur.execute('SELECT osm_type, osm_id, class from place') - assert expected_content, set([(r[0], r[1], r[2]) for r in cur]) @then("search_name contains(?P not)?") def check_search_name_contents(context, exclude): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: - NominatimID(row['object']).query_place_id(cur, - """SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy - FROM search_name WHERE place_id = %s""") + nid = NominatimID(row['object']) + nid.row_by_place_id(cur, 'search_name', + ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy']) assert cur.rowcount > 0, "No rows found for " + row['object'] for res in cur: + db_row = DBRow(nid, res, context) for h in row.headings: if h in ('name_vector', 'nameaddress_vector'): terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')] @@ -298,8 +180,8 @@ def check_search_name_contents(context, exclude): assert wid[0] not in res[h], "Found term for %s/%s: %s" % (row['object'], h, wid[1]) else: assert wid[0] in res[h], "Missing term for %s/%s: %s" % (row['object'], h, wid[1]) - else: - assert_db_column(res, h, row[h], context) + elif h != 'object': + assert db_row.contains(h, row[h]), db_row.assert_msg(h, row[h]) @then("location_postcode contains exactly") def check_location_postcode(context): @@ -308,15 +190,18 @@ def check_location_postcode(context): assert cur.rowcount == len(list(context.table)), \ "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table))) - table = list(cur) + results = {} + for row in cur: + key = (row['country_code'], row['postcode']) + assert key not in results, "Postcode table has duplicate entry: {}".format(row) + results[key] = DBRow((row['country_code'],row['postcode']), row, context) + for row in context.table: - for i in range(len(table)): - if table[i]['country_code'] != row['country'] \ - or table[i]['postcode'] != row['postcode']: - continue - for h in row.headings: - if h not in ('country', 'postcode'): - assert_db_column(table[i], h, row[h], context) + db_row = results.get((row['country'],row['postcode'])) + assert db_row is not None, \ + "Missing row for country '{}' postcode '{}'.".format(r['country'],['postcode']) + + db_row.assert_row(row, ('country', 'postcode')) @then("word contains(?P not)?") def check_word_table(context, exclude): @@ -337,7 +222,8 @@ def check_word_table(context, exclude): def check_place_addressline(context): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: - pid = NominatimID(row['object']).get_place_id(cur) + nid = NominatimID(row['object']) + pid = nid.get_place_id(cur) apid = NominatimID(row['address']).get_place_id(cur) cur.execute(""" SELECT * FROM place_addressline WHERE place_id = %s AND address_place_id = %s""", @@ -346,9 +232,7 @@ def check_place_addressline(context): "No rows found for place %s and address %s" % (row['object'], row['address']) for res in cur: - for h in row.headings: - if h not in ('address', 'object'): - assert_db_column(res, h, row[h], context) + DBRow(nid, res, context).assert_row(row, ('address', 'object')) @then("place_addressline doesn't contain") def check_place_addressline_exclude(context): @@ -389,28 +273,15 @@ def check_location_property_osmline(context, oid, neg): else: assert False, "Unexpected row %s" % (str(res)) - for h in row.headings: - if h in ('start', 'end'): - continue - elif h == 'parent_place_id': - compare_place_id(row[h], res[h], h, context) - else: - assert_db_column(res, h, row[h], context) + DBRow(nid, res, context).assert_row(row, ('start', 'end')) assert not todo -@then("(?P
placex|place) has no entry for (?P.*)") -def check_placex_has_entry(context, table, oid): - with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table) - assert cur.rowcount == 0, \ - "Found {} entries for ID {}".format(cur.rowcount, oid) - @then("search_name has no entry for (?P.*)") def check_search_name_has_entry(context, oid): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - NominatimID(oid).query_place_id(cur, - "SELECT * FROM search_name WHERE place_id = %s") + NominatimID(oid).row_by_place_id(cur, 'search_name') + assert cur.rowcount == 0, \ "Found {} entries for ID {}".format(cur.rowcount, oid) diff --git a/test/bdd/steps/table_compare.py b/test/bdd/steps/table_compare.py index dfc261d4..51d1b645 100644 --- a/test/bdd/steps/table_compare.py +++ b/test/bdd/steps/table_compare.py @@ -2,6 +2,9 @@ Functions to facilitate accessing and comparing the content of DB tables. """ import re +import json + +from steps.check_functions import Almost ID_REGEX = re.compile(r"(?P[NRW])(?P\d+)(:(?P\w+))?") @@ -41,15 +44,17 @@ class NominatimID: where += ' and class = %s' params.append(self.cls) - return cur.execute(query.format(where), params) + cur.execute(query.format(where), params) - def query_place_id(self, cur, query): - """ Run a query on cursor `cur` using the place ID. The `query` string - must contain exactly one placeholder '%s' where the 'where' query - should go. + def row_by_place_id(self, cur, table, extra_columns=None): + """ Get a row by place_id from the given table using cursor `cur`. + extra_columns may contain a list additional elements for the select + part of the query. """ pid = self.get_place_id(cur) - return cur.execute(query, (pid, )) + query = "SELECT {} FROM {} WHERE place_id = %s".format( + ','.join(['*'] + (extra_columns or [])), table) + cur.execute(query, (pid, )) def get_place_id(self, cur): """ Look up the place id for the ID. Throws an assertion if the ID @@ -60,3 +65,145 @@ class NominatimID: "Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount) return cur.fetchone()[0] + + +class DBRow: + """ Represents a row from a database and offers comparison functions. + """ + def __init__(self, nid, db_row, context): + self.nid = nid + self.db_row = db_row + self.context = context + + def assert_row(self, row, exclude_columns): + """ Check that all columns of the given behave row are contained + in the database row. Exclude behave rows with the names given + in the `exclude_columns` list. + """ + for name, value in zip(row.headings, row.cells): + if name not in exclude_columns: + assert self.contains(name, value), self.assert_msg(name, value) + + def contains(self, name, expected): + """ Check that the DB row contains a column `name` with the given value. + """ + if '+' in name: + column, field = name.split('+', 1) + return self._contains_hstore_value(column, field, expected) + + if name == 'geometry': + return self._has_geometry(expected) + + if name not in self.db_row: + return False + + actual = self.db_row[name] + + if expected == '-': + return actual is None + + if name == 'name' and ':' not in expected: + return self._compare_column(actual[name], expected) + + if 'place_id' in name: + return self._compare_place_id(actual, expected) + + if name == 'centroid': + return self._has_centroid(expected) + + return self._compare_column(actual, expected) + + def _contains_hstore_value(self, column, field, expected): + if column == 'addr': + column = 'address' + + if column not in self.db_row: + return False + + if expected == '-': + return self.db_row[column] is None or field not in self.db_row[column] + + if self.db_row[column] is None: + return False + + return self._compare_column(self.db_row[column].get(field), expected) + + def _compare_column(self, actual, expected): + if isinstance(actual, dict): + return actual == eval('{' + expected + '}') + + return str(actual) == expected + + def _compare_place_id(self, actual, expected): + if expected == '0': + return actual == 0 + + with self.context.db.cursor() as cur: + return NominatimID(expected).get_place_id(cur) == actual + + def _has_centroid(self, expected): + if expected == 'in geometry': + with self.context.db.cursor() as cur: + cur.execute("""SELECT ST_Within(ST_SetSRID(ST_Point({cx}, {cy}), 4326), + ST_SetSRID('{geomtxt}'::geometry, 4326))""".format(**self.db_row)) + return cur.fetchone()[0] + + x, y = expected.split(' ') + return Almost(float(x)) == self.db_row['cx'] and Almost(float(y)) == self.db_row['cy'] + + def _has_geometry(self, expected): + geom = self.context.osm.parse_geometry(expected, self.context.scene) + with self.context.db.cursor() as cur: + cur.execute("""SELECT ST_Equals(ST_SnapToGrid({}, 0.00001, 0.00001), + ST_SnapToGrid(ST_SetSRID('{}'::geometry, 4326), 0.00001, 0.00001))""".format( + geom, self.db_row['geomtxt'])) + return cur.fetchone()[0] + + def assert_msg(self, name, value): + """ Return a string with an informative message for a failed compare. + """ + msg = "\nBad column '{}' in row '{!s}'.".format(name, self.nid) + actual = self._get_actual(name) + if actual is not None: + msg += " Expected: {}, got: {}.".format(value, actual) + else: + msg += " No such column." + + return msg + "\nFull DB row: {}".format(json.dumps(dict(self.db_row), indent=4, default=str)) + + def _get_actual(self, name): + if '+' in name: + column, field = name.split('+', 1) + if column == 'addr': + column = 'address' + return (self.db_row.get(column) or {}).get(field) + + if name == 'geometry': + return self.db_row['geomtxt'] + + if name not in self.db_row: + return None + + if name == 'centroid': + return "POINT({cx} {cy})".format(**self.db_row) + + actual = self.db_row[name] + + if 'place_id' in name: + if actual is None: + return '' + + if actual == 0: + return "place ID 0" + + with self.context.db.cursor(): + cur.execute("""SELECT osm_type, osm_id, class + FROM placex WHERE place_id = %s""", + (actual, )) + + if cur.rowcount == 1: + return "{0[0]}{0[1]}:{0[2]}".format(cur.fetchone()) + + return "[place ID {} not found]".format(actual) + + return actual From 73cbb6eb9a5a4ccbab98692e0476a17b7c022b80 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 6 Jan 2021 16:37:32 +0100 Subject: [PATCH 17/17] bdd: clean up DB ops steps Adds comments and modernizes code. --- test/bdd/steps/steps_db_ops.py | 141 +++++++++++++++++++++----------- test/bdd/steps/table_compare.py | 2 +- 2 files changed, 96 insertions(+), 47 deletions(-) diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 178e1a28..c549f3eb 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -1,3 +1,5 @@ +from itertools import chain + import psycopg2.extras from place_inserter import PlaceColumn @@ -20,6 +22,9 @@ def check_database_integrity(context): @given("the (?Pnamed )?places") def add_data_to_place_table(context, named): + """ Add entries into the place table. 'named places' makes sure that + the entries get a random name when none is explicitly given. + """ with context.db.cursor() as cur: cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert') for row in context.table: @@ -28,6 +33,9 @@ def add_data_to_place_table(context, named): @given("the relations") def add_data_to_planet_relations(context): + """ Add entries into the osm2pgsql relation middle table. This is needed + for tests on data that looks up members. + """ with context.db.cursor() as cur: for r in context.table: last_node = 0 @@ -51,28 +59,24 @@ def add_data_to_planet_relations(context): else: members = None - tags = [] - for h in r.headings: - if h.startswith("tags+"): - tags.extend((h[5:], r[h])) + tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")]) cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags) VALUES (%s, %s, %s, %s, %s, %s)""", - (r['id'], last_node, last_way, parts, members, tags)) + (r['id'], last_node, last_way, parts, members, list(tags))) @given("the ways") def add_data_to_planet_ways(context): + """ Add entries into the osm2pgsql way middle table. This is necessary for + tests on that that looks up node ids in this table. + """ with context.db.cursor() as cur: for r in context.table: - tags = [] - for h in r.headings: - if h.startswith("tags+"): - tags.extend((h[5:], r[h])) - + tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")]) nodes = [ int(x.strip()) for x in r['nodes'].split(',') ] cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)", - (r['id'], nodes, tags)) + (r['id'], nodes, list(tags))) ################################ WHEN ################################## @@ -86,6 +90,9 @@ def import_and_index_data_from_place_table(context): @when("updating places") def update_place_table(context): + """ Update the place table with the given data. Also runs all triggers + related to updates and reindexes the new data. + """ context.nominatim.run_setup_script( 'create-functions', 'create-partition-functions', 'enable-diff-updates') with context.db.cursor() as cur: @@ -97,10 +104,16 @@ def update_place_table(context): @when("updating postcodes") def update_postcodes(context): + """ Rerun the calculation of postcodes. + """ context.nominatim.run_update_script('calculate-postcodes') @when("marking for delete (?P.*)") def delete_places(context, oids): + """ Remove entries from the place table. Multiple ids may be given + separated by commas. Also runs all triggers + related to updates and reindexes the new data. + """ context.nominatim.run_setup_script( 'create-functions', 'create-partition-functions', 'enable-diff-updates') with context.db.cursor() as cur: @@ -113,6 +126,15 @@ def delete_places(context, oids): @then("(?P
placex|place) contains(?P exactly)?") def check_place_contents(context, table, exact): + """ Check contents of place/placex tables. Each row represents a table row + and all data must match. Data not present in the expected table, may + be arbitry. The rows are identified via the 'object' column which must + have an identifier of the form '[:]'. When multiple + rows match (for example because 'class' was left out and there are + multiple entries for the given OSM object) then all must match. All + expected rows are expected to be present with at least one database row. + When 'exactly' is given, there must not be additional rows in the database. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: expected_content = set() for row in context.table: @@ -137,6 +159,9 @@ def check_place_contents(context, table, exact): @then("(?P
placex|place) has no entry for (?P.*)") def check_place_has_entry(context, table, oid): + """ Ensure that no database row for the given object exists. The ID + must be of the form '[:]'. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table) assert cur.rowcount == 0, \ @@ -145,6 +170,12 @@ def check_place_has_entry(context, table, oid): @then("search_name contains(?P not)?") def check_search_name_contents(context, exclude): + """ Check contents of place/placex tables. Each row represents a table row + and all data must match. Data not present in the expected table, may + be arbitry. The rows are identified via the 'object' column which must + have an identifier of the form '[:]'. All + expected rows are expected to be present with at least one database row. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: nid = NominatimID(row['object']) @@ -154,10 +185,9 @@ def check_search_name_contents(context, exclude): for res in cur: db_row = DBRow(nid, res, context) - for h in row.headings: - if h in ('name_vector', 'nameaddress_vector'): - terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')] - words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')] + for name, value in zip(row.headings, row.cells): + if name in ('name_vector', 'nameaddress_vector'): + items = [x.strip() for x in value.split(',')] with context.db.cursor() as subcur: subcur.execute(""" SELECT word_id, word_token FROM word, (SELECT unnest(%s::TEXT[]) as term) t @@ -171,24 +201,43 @@ def check_search_name_contents(context, exclude): and class is null and country_code is null and operator is null """, - (terms, words)) + (list(filter(lambda x: not x.startswith('#'), items)), + list(filter(lambda x: x.startswith('#'), items)))) if not exclude: - assert subcur.rowcount >= len(terms) + len(words), \ - "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount) + assert subcur.rowcount >= len(items), \ + "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount) for wid in subcur: + present = wid[0] in res[name] if exclude: - assert wid[0] not in res[h], "Found term for %s/%s: %s" % (row['object'], h, wid[1]) + assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1]) else: - assert wid[0] in res[h], "Missing term for %s/%s: %s" % (row['object'], h, wid[1]) - elif h != 'object': - assert db_row.contains(h, row[h]), db_row.assert_msg(h, row[h]) + assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1]) + elif name != 'object': + assert db_row.contains(name, value), db_row.assert_msg(name, value) + +@then("search_name has no entry for (?P.*)") +def check_search_name_has_entry(context, oid): + """ Check that there is noentry in the search_name table for the given + objects. IDs are in format '[:]'. + """ + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + NominatimID(oid).row_by_place_id(cur, 'search_name') + + assert cur.rowcount == 0, \ + "Found {} entries for ID {}".format(cur.rowcount, oid) @then("location_postcode contains exactly") def check_location_postcode(context): + """ Check full contents for location_postcode table. Each row represents a table row + and all data must match. Data not present in the expected table, may + be arbitry. The rows are identified via 'country' and 'postcode' columns. + All rows must be present as excepted and there must not be additional + rows. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode") assert cur.rowcount == len(list(context.table)), \ - "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table))) + "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table))) results = {} for row in cur: @@ -199,20 +248,20 @@ def check_location_postcode(context): for row in context.table: db_row = results.get((row['country'],row['postcode'])) assert db_row is not None, \ - "Missing row for country '{}' postcode '{}'.".format(r['country'],['postcode']) + "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row) db_row.assert_row(row, ('country', 'postcode')) @then("word contains(?P not)?") def check_word_table(context, exclude): + """ Check the contents of the word table. Each row represents a table row + and all data must match. Data not present in the expected table, may + be arbitry. The rows are identified via all given columns. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: - wheres = [] - values = [] - for h in row.headings: - wheres.append("%s = %%s" % h) - values.append(row[h]) - cur.execute("SELECT * from word WHERE %s" % ' AND '.join(wheres), values) + wheres = ' AND '.join(["{} = %s".format(h) for h in row.headings]) + cur.execute("SELECT * from word WHERE " + wheres, list(row.cells)) if exclude: assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values) else: @@ -220,6 +269,12 @@ def check_word_table(context, exclude): @then("place_addressline contains") def check_place_addressline(context): + """ Check the contents of the place_addressline table. Each row represents + a table row and all data must match. Data not present in the expected + table, may be arbitry. The rows are identified via the 'object' column, + representing the addressee and the 'address' column, representing the + address item. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: nid = NominatimID(row['object']) @@ -236,6 +291,9 @@ def check_place_addressline(context): @then("place_addressline doesn't contain") def check_place_addressline_exclude(context): + """ Check that the place_addressline doesn't contain any entries for the + given addressee/address item pairs. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for row in context.table: pid = NominatimID(row['object']).get_place_id(cur) @@ -246,20 +304,18 @@ def check_place_addressline_exclude(context): assert cur.rowcount == 0, \ "Row found for place %s and address %s" % (row['object'], row['address']) -@then("(?P\w+) expands to(?P no)? interpolation") +@then("W(?P\d+) expands to(?P no)? interpolation") def check_location_property_osmline(context, oid, neg): - nid = NominatimID(oid) - - assert 'W' == nid.typ, "interpolation must be a way" - + """ Check that the given way is present in the interpolation table. + """ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt FROM location_property_osmline WHERE osm_id = %s AND startnumber IS NOT NULL""", - (nid.oid, )) + (oid, )) if neg: - assert cur.rowcount == 0 + assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid) return todo = list(range(len(list(context.table)))) @@ -271,17 +327,10 @@ def check_location_property_osmline(context, oid, neg): todo.remove(i) break else: - assert False, "Unexpected row %s" % (str(res)) + assert False, "Unexpected row " + str(res) - DBRow(nid, res, context).assert_row(row, ('start', 'end')) + DBRow(oid, res, context).assert_row(row, ('start', 'end')) assert not todo -@then("search_name has no entry for (?P.*)") -def check_search_name_has_entry(context, oid): - with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - NominatimID(oid).row_by_place_id(cur, 'search_name') - - assert cur.rowcount == 0, \ - "Found {} entries for ID {}".format(cur.rowcount, oid) diff --git a/test/bdd/steps/table_compare.py b/test/bdd/steps/table_compare.py index 51d1b645..2e71d943 100644 --- a/test/bdd/steps/table_compare.py +++ b/test/bdd/steps/table_compare.py @@ -196,7 +196,7 @@ class DBRow: if actual == 0: return "place ID 0" - with self.context.db.cursor(): + with self.context.db.cursor() as cur: cur.execute("""SELECT osm_type, osm_id, class FROM placex WHERE place_id = %s""", (actual, ))