From 843d3a137c8fa17bd41b4187f76dfc8c8a0311f6 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 14:14:34 +0100
Subject: [PATCH 01/17] remove stale code for python2

---
 test/bdd/environment.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/test/bdd/environment.py b/test/bdd/environment.py
index 0ac92104..d95fea4a 100644
--- a/test/bdd/environment.py
+++ b/test/bdd/environment.py
@@ -5,7 +5,6 @@ import psycopg2
 import psycopg2.extras
 import subprocess
 import tempfile
-from sys import version_info as python_version
 
 logger = logging.getLogger(__name__)
 
@@ -172,10 +171,7 @@ class NominatimEnvironment(object):
         cur.execute('CREATE DATABASE %s TEMPLATE = %s' % (self.test_db, self.template_db))
         conn.close()
         context.db = self.connect_database(self.test_db)
-        if python_version[0] < 3:
-            psycopg2.extras.register_hstore(context.db, globally=False, unicode=True)
-        else:
-            psycopg2.extras.register_hstore(context.db, globally=False)
+        psycopg2.extras.register_hstore(context.db, globally=False)
 
     def teardown_db(self, context):
         if 'db' in context:

From f727620859cacfa39146642b31ad446762212262 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 16:29:15 +0100
Subject: [PATCH 02/17] bdd: move geoemtry creation into separate file

Also renames the OsmDataFactory in the more appropriate
GeometryFactory and modernizes code for python3.
---
 test/bdd/environment.py            |  81 +--------------------
 test/bdd/steps/geometry_factory.py | 113 +++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 78 deletions(-)
 create mode 100644 test/bdd/steps/geometry_factory.py

diff --git a/test/bdd/environment.py b/test/bdd/environment.py
index d95fea4a..b6f6b986 100644
--- a/test/bdd/environment.py
+++ b/test/bdd/environment.py
@@ -6,6 +6,8 @@ import psycopg2.extras
 import subprocess
 import tempfile
 
+from steps.geometry_factory import GeometryFactory
+
 logger = logging.getLogger(__name__)
 
 userconfig = {
@@ -209,83 +211,6 @@ class NominatimEnvironment(object):
         assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr)
 
 
-class OSMDataFactory(object):
-
-    def __init__(self):
-        scriptpath = os.path.dirname(os.path.abspath(__file__))
-        self.scene_path = os.environ.get('SCENE_PATH',
-                           os.path.join(scriptpath, '..', 'scenes', 'data'))
-        self.scene_cache = {}
-        self.clear_grid()
-
-    def parse_geometry(self, geom, scene):
-        if geom.find(':') >= 0:
-            return "ST_SetSRID(%s, 4326)" % self.get_scene_geometry(scene, geom)
-
-        if geom.find(',') < 0:
-            out = "POINT(%s)" % self.mk_wkt_point(geom)
-        elif geom.find('(') < 0:
-            line = ','.join([self.mk_wkt_point(x) for x in geom.split(',')])
-            out = "LINESTRING(%s)" % line
-        else:
-            inner = geom.strip('() ')
-            line = ','.join([self.mk_wkt_point(x) for x in inner.split(',')])
-            out = "POLYGON((%s))" % line
-
-        return "ST_SetSRID('%s'::geometry, 4326)" % out
-
-    def mk_wkt_point(self, point):
-        geom = point.strip()
-        if geom.find(' ') >= 0:
-            return geom
-        else:
-            pt = self.grid_node(int(geom))
-            assert pt is not None, "Bad scenario: Point '{}' not found in grid".format(geom)
-            return "%f %f" % pt
-
-    def get_scene_geometry(self, default_scene, name):
-        geoms = []
-        for obj in name.split('+'):
-            oname = obj.strip()
-            if oname.startswith(':'):
-                assert default_scene is not None, "Bad scenario: You need to set a scene"
-                defscene = self.load_scene(default_scene)
-                wkt = defscene[oname[1:]]
-            else:
-                scene, obj = oname.split(':', 2)
-                scene_geoms = self.load_scene(scene)
-                wkt = scene_geoms[obj]
-
-            geoms.append("'%s'::geometry" % wkt)
-
-        if len(geoms) == 1:
-            return geoms[0]
-        else:
-            return 'ST_LineMerge(ST_Collect(ARRAY[%s]))' % ','.join(geoms)
-
-    def load_scene(self, name):
-        if name in self.scene_cache:
-            return self.scene_cache[name]
-
-        scene = {}
-        with open(os.path.join(self.scene_path, "%s.wkt" % name), 'r') as fd:
-            for line in fd:
-                if line.strip():
-                    obj, wkt = line.split('|', 2)
-                    scene[obj.strip()] = wkt.strip()
-            self.scene_cache[name] = scene
-
-        return scene
-
-    def clear_grid(self):
-        self.grid = {}
-
-    def add_grid_node(self, nodeid, x, y):
-        self.grid[nodeid] = (x, y)
-
-    def grid_node(self, nodeid):
-        return self.grid.get(nodeid)
-
 
 def before_all(context):
     # logging setup
@@ -296,7 +221,7 @@ def before_all(context):
     logging.debug('User config: %s' %(str(context.config.userdata)))
     # Nominatim test setup
     context.nominatim = NominatimEnvironment(context.config.userdata)
-    context.osm = OSMDataFactory()
+    context.osm = GeometryFactory()
 
 
 def before_scenario(context, scenario):
diff --git a/test/bdd/steps/geometry_factory.py b/test/bdd/steps/geometry_factory.py
new file mode 100644
index 00000000..7eedfc37
--- /dev/null
+++ b/test/bdd/steps/geometry_factory.py
@@ -0,0 +1,113 @@
+from pathlib import Path
+import os
+
+class GeometryFactory:
+    """ Provides functions to create geometries from scenes and data grids.
+    """
+
+    def __init__(self):
+        defpath = Path(__file__) / '..' / '..' / '..' / 'scenes' / 'data'
+        self.scene_path = os.environ.get('SCENE_PATH', defpath.resolve())
+        self.scene_cache = {}
+        self.clear_grid()
+
+    def parse_geometry(self, geom, scene):
+        """ Create a WKT SQL term for the given geometry.
+            The function understands the following formats:
+
+              [<scene>]:<name>
+                 Geometry from a scene. If the scene is omitted, use the
+                 default scene.
+              <P>
+                 Point geometry
+              <P>,...,<P>
+                 Line geometry
+              (<P>,...,<P>)
+                 Polygon geometry
+
+           <P> may either be a coordinate of the form '<x> <y>' or a single
+           number. In the latter case it must refer to a point in
+           a previously defined grid.
+        """
+        if geom.find(':') >= 0:
+            return "ST_SetSRID({}, 4326)".format(self.get_scene_geometry(scene, geom))
+
+        if geom.find(',') < 0:
+            out = "POINT({})".format(self.mk_wkt_point(geom))
+        elif geom.find('(') < 0:
+            out = "LINESTRING({})".format(self.mk_wkt_points(geom))
+        else:
+            out = "POLYGON(({}))".format(self.mk_wkt_points(geom.strip('() ')))
+
+        return "ST_SetSRID('{}'::geometry, 4326)".format(out)
+
+    def mk_wkt_point(self, point):
+        """ Parse a point description.
+            The point may either consist of 'x y' cooordinates or a number
+            that refers to a grid setup.
+        """
+        geom = point.strip()
+        if geom.find(' ') >= 0:
+            return geom
+
+        try:
+            pt = self.grid_node(int(geom))
+        except ValueError:
+            assert False, "Scenario error: Point '{}' is not a number".format(geom)
+
+        assert pt is not None, "Scenario error: Point '{}' not found in grid".format(geom)
+        return "{} {}".format(*pt)
+
+    def mk_wkt_points(self, geom):
+        """ Parse a list of points.
+            The list must be a comma-separated list of points. Points
+            in coordinate and grid format may be mixed.
+        """
+        return ','.join([self.mk_wkt_point(x) for x in geom.split(',')])
+
+    def get_scene_geometry(self, default_scene, name):
+        """ Load the geometry from a scene.
+        """
+        geoms = []
+        for obj in name.split('+'):
+            oname = obj.strip()
+            if oname.startswith(':'):
+                assert default_scene is not None, "Scenario error: You need to set a scene"
+                defscene = self.load_scene(default_scene)
+                wkt = defscene[oname[1:]]
+            else:
+                scene, obj = oname.split(':', 2)
+                scene_geoms = self.load_scene(scene)
+                wkt = scene_geoms[obj]
+
+            geoms.append("'{}'::geometry".format(wkt))
+
+        if len(geoms) == 1:
+            return geoms[0]
+
+        return 'ST_LineMerge(ST_Collect(ARRAY[{}]))'.format(','.join(geoms))
+
+    def load_scene(self, name):
+        """ Load a scene from a file.
+        """
+        if name in self.scene_cache:
+            return self.scene_cache[name]
+
+        scene = {}
+        with open(Path(self.scene_path) / "{}.wkt".format(name), 'r') as fd:
+            for line in fd:
+                if line.strip():
+                    obj, wkt = line.split('|', 2)
+                    scene[obj.strip()] = wkt.strip()
+            self.scene_cache[name] = scene
+
+        return scene
+
+    def clear_grid(self):
+        self.grid = {}
+
+    def add_grid_node(self, nodeid, x, y):
+        self.grid[nodeid] = (x, y)
+
+    def grid_node(self, nodeid):
+        return self.grid.get(nodeid)

From 14e5bc7a1723fd07c3b58eb0bda8a9b1221ef123 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 17:04:47 +0100
Subject: [PATCH 03/17] bdd: move grid generation code into geometry factory

---
 test/bdd/steps/geometry_factory.py | 19 ++++++++++++++-----
 test/bdd/steps/osm_data.py         | 20 ++++----------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/test/bdd/steps/geometry_factory.py b/test/bdd/steps/geometry_factory.py
index 7eedfc37..0a403839 100644
--- a/test/bdd/steps/geometry_factory.py
+++ b/test/bdd/steps/geometry_factory.py
@@ -9,7 +9,7 @@ class GeometryFactory:
         defpath = Path(__file__) / '..' / '..' / '..' / 'scenes' / 'data'
         self.scene_path = os.environ.get('SCENE_PATH', defpath.resolve())
         self.scene_cache = {}
-        self.clear_grid()
+        self.grid = {}
 
     def parse_geometry(self, geom, scene):
         """ Create a WKT SQL term for the given geometry.
@@ -103,11 +103,20 @@ class GeometryFactory:
 
         return scene
 
-    def clear_grid(self):
+    def set_grid(self, lines, grid_step):
+        """ Replace the grid with one from the given lines.
+        """
         self.grid = {}
-
-    def add_grid_node(self, nodeid, x, y):
-        self.grid[nodeid] = (x, y)
+        y = 0
+        for line in lines:
+            x = 0
+            for pt_id in line:
+                if pt_id.isdigit():
+                    self.grid[int(pt_id)] = (x, y)
+                x += grid_step
+            y += grid_step
 
     def grid_node(self, nodeid):
+        """ Get the coordinates for the given grid node.
+        """
         return self.grid.get(nodeid)
diff --git a/test/bdd/steps/osm_data.py b/test/bdd/steps/osm_data.py
index 0f8b1188..c1043baa 100644
--- a/test/bdd/steps/osm_data.py
+++ b/test/bdd/steps/osm_data.py
@@ -7,28 +7,16 @@ import os
 def define_node_grid(context, grid_step):
     """
     Define a grid of node positions.
+    Use a table to define the grid. The nodes must be integer ids. Optionally
+    you can give the grid distance. The default is 0.00001 degrees.
     """
     if grid_step is not None:
         grid_step = float(grid_step.strip())
     else:
         grid_step = 0.00001
 
-    context.osm.clear_grid()
-
-    i = 0
-    for h in context.table.headings:
-        if h.isdigit():
-            context.osm.add_grid_node(int(h), 0, i)
-        i += grid_step
-
-    x = grid_step
-    for r in context.table:
-        y = 0
-        for h in r:
-            if h.isdigit():
-                context.osm.add_grid_node(int(h), x, y)
-            y += grid_step
-        x += grid_step
+    context.osm.set_grid([context.table.headings] + [list(h) for h in context.table],
+                          grid_step)
 
 
 @when(u'loading osm data')

From faa85ded50bf8ab4e772d0352f316c25d2a0f510 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 17:54:51 +0100
Subject: [PATCH 04/17] bdd: move NominatimEnvironment into separate file

Also cleans up and modernizes the code and adds documentation.
---
 test/bdd/environment.py                 | 202 +--------------------
 test/bdd/steps/nominatim_environment.py | 224 ++++++++++++++++++++++++
 2 files changed, 229 insertions(+), 197 deletions(-)
 create mode 100644 test/bdd/steps/nominatim_environment.py

diff --git a/test/bdd/environment.py b/test/bdd/environment.py
index b6f6b986..aeee2301 100644
--- a/test/bdd/environment.py
+++ b/test/bdd/environment.py
@@ -1,17 +1,11 @@
 from behave import *
-import logging
-import os
-import psycopg2
-import psycopg2.extras
-import subprocess
-import tempfile
+from pathlib import Path
 
 from steps.geometry_factory import GeometryFactory
-
-logger = logging.getLogger(__name__)
+from steps.nominatim_environment import NominatimEnvironment
 
 userconfig = {
-    'BUILDDIR' : os.path.join(os.path.split(__file__)[0], "../../build"),
+    'BUILDDIR' : (Path(__file__) / '..' / '..' / '..' / 'build').resolve(),
     'REMOVE_TEMPLATE' : False,
     'KEEP_TEST_DB' : False,
     'DB_HOST' : None,
@@ -27,198 +21,12 @@ userconfig = {
 
 use_step_matcher("re")
 
-
-class NominatimEnvironment(object):
-    """ Collects all functions for the execution of Nominatim functions.
-    """
-
-    def __init__(self, config):
-        self.build_dir = os.path.abspath(config['BUILDDIR'])
-        self.src_dir = os.path.abspath(os.path.join(os.path.split(__file__)[0], "../.."))
-        self.db_host = config['DB_HOST']
-        self.db_port = config['DB_PORT']
-        self.db_user = config['DB_USER']
-        self.db_pass = config['DB_PASS']
-        self.template_db = config['TEMPLATE_DB']
-        self.test_db = config['TEST_DB']
-        self.api_test_db = config['API_TEST_DB']
-        self.server_module_path = config['SERVER_MODULE_PATH']
-        self.reuse_template = not config['REMOVE_TEMPLATE']
-        self.keep_scenario_db = config['KEEP_TEST_DB']
-        self.code_coverage_path = config['PHPCOV']
-        self.code_coverage_id = 1
-        self.test_env = None
-
-        self.template_db_done = False
-        self.website_dir = None
-
-    def connect_database(self, dbname):
-        dbargs = {'database': dbname}
-        if self.db_host:
-            dbargs['host'] = self.db_host
-        if self.db_port:
-            dbargs['port'] = self.db_port
-        if self.db_user:
-            dbargs['user'] = self.db_user
-        if self.db_pass:
-            dbargs['password'] = self.db_pass
-        conn = psycopg2.connect(**dbargs)
-        return conn
-
-    def next_code_coverage_file(self):
-        fn = os.path.join(self.code_coverage_path, "%06d.cov" % self.code_coverage_id)
-        self.code_coverage_id += 1
-
-        return fn
-
-    def write_nominatim_config(self, dbname):
-        dsn = 'pgsql:dbname={}{}{}{}{}'.format(
-                dbname,
-                 (';host=' + self.db_host) if self.db_host else '',
-                 (';port=' + self.db_port) if self.db_port else '',
-                 (';user=' + self.db_user) if self.db_user else '',
-                 (';password=' + self.db_pass) if self.db_pass else ''
-                 )
-
-        if self.website_dir is not None \
-           and self.test_env is not None \
-           and dsn == self.test_env['NOMINATIM_DATABASE_DSN']:
-            return # environment already set uo
-
-        self.test_env = os.environ
-        self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
-        self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
-        self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
-        self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
-
-        if self.website_dir is not None:
-            self.website_dir.cleanup()
-
-        self.website_dir = tempfile.TemporaryDirectory()
-        self.run_setup_script('setup-website')
-
-
-    def db_drop_database(self, name):
-        conn = self.connect_database('postgres')
-        conn.set_isolation_level(0)
-        cur = conn.cursor()
-        cur.execute('DROP DATABASE IF EXISTS %s' % (name, ))
-        conn.close()
-
-    def setup_template_db(self):
-        if self.template_db_done:
-            return
-
-        self.template_db_done = True
-
-        if self.reuse_template:
-            # check that the template is there
-            conn = self.connect_database('postgres')
-            cur = conn.cursor()
-            cur.execute('select count(*) from pg_database where datname = %s',
-                        (self.template_db,))
-            if cur.fetchone()[0] == 1:
-                return
-            conn.close()
-        else:
-            # just in case... make sure a previous table has been dropped
-            self.db_drop_database(self.template_db)
-
-        try:
-            # call the first part of database setup
-            self.write_nominatim_config(self.template_db)
-            self.run_setup_script('create-db', 'setup-db')
-            # remove external data to speed up indexing for tests
-            conn = self.connect_database(self.template_db)
-            cur = conn.cursor()
-            cur.execute("""select tablename from pg_tables
-                           where tablename in ('gb_postcode', 'us_postcode')""")
-            for t in cur:
-                conn.cursor().execute('TRUNCATE TABLE %s' % (t[0],))
-            conn.commit()
-            conn.close()
-
-            # execute osm2pgsql import on an empty file to get the right tables
-            with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
-                fd.write(b'<osm version="0.6"></osm>')
-                fd.flush()
-                self.run_setup_script('import-data',
-                                      'ignore-errors',
-                                      'create-functions',
-                                      'create-tables',
-                                      'create-partition-tables',
-                                      'create-partition-functions',
-                                      'load-data',
-                                      'create-search-indices',
-                                      osm_file=fd.name,
-                                      osm2pgsql_cache='200')
-        except:
-            self.db_drop_database(self.template_db)
-            raise
-
-
-    def setup_api_db(self, context):
-        self.write_nominatim_config(self.api_test_db)
-
-    def setup_unknown_db(self, context):
-        self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
-
-    def setup_db(self, context):
-        self.setup_template_db()
-        self.write_nominatim_config(self.test_db)
-        conn = self.connect_database(self.template_db)
-        conn.set_isolation_level(0)
-        cur = conn.cursor()
-        cur.execute('DROP DATABASE IF EXISTS %s' % (self.test_db, ))
-        cur.execute('CREATE DATABASE %s TEMPLATE = %s' % (self.test_db, self.template_db))
-        conn.close()
-        context.db = self.connect_database(self.test_db)
-        psycopg2.extras.register_hstore(context.db, globally=False)
-
-    def teardown_db(self, context):
-        if 'db' in context:
-            context.db.close()
-
-        if not self.keep_scenario_db:
-            self.db_drop_database(self.test_db)
-
-    def run_setup_script(self, *args, **kwargs):
-        if self.server_module_path:
-            kwargs = dict(kwargs)
-            kwargs['module_path'] = self.server_module_path
-        self.run_nominatim_script('setup', *args, **kwargs)
-
-    def run_update_script(self, *args, **kwargs):
-        self.run_nominatim_script('update', *args, **kwargs)
-
-    def run_nominatim_script(self, script, *args, **kwargs):
-        cmd = ['/usr/bin/env', 'php', '-Cq']
-        cmd.append(os.path.join(self.build_dir, 'utils', '%s.php' % script))
-        cmd.extend(['--%s' % x for x in args])
-        for k, v in kwargs.items():
-            cmd.extend(('--' + k.replace('_', '-'), str(v)))
-
-        if self.website_dir is not None:
-            cwd = self.website_dir.name
-        else:
-            cwd = self.build_dir
-
-        proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env,
-                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        (outp, outerr) = proc.communicate()
-        outerr = outerr.decode('utf-8').replace('\\n', '\n')
-        logger.debug("run_nominatim_script: %s\n%s\n%s" % (cmd, outp, outerr))
-        assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr)
-
-
-
 def before_all(context):
     # logging setup
     context.config.setup_logging()
     # set up -D options
     for k,v in userconfig.items():
         context.config.userdata.setdefault(k, v)
-    logging.debug('User config: %s' %(str(context.config.userdata)))
     # Nominatim test setup
     context.nominatim = NominatimEnvironment(context.config.userdata)
     context.osm = GeometryFactory()
@@ -228,9 +36,9 @@ def before_scenario(context, scenario):
     if 'DB' in context.tags:
         context.nominatim.setup_db(context)
     elif 'APIDB' in context.tags:
-        context.nominatim.setup_api_db(context)
+        context.nominatim.setup_api_db()
     elif 'UNKNOWNDB' in context.tags:
-        context.nominatim.setup_unknown_db(context)
+        context.nominatim.setup_unknown_db()
     context.scene = None
 
 def after_scenario(context, scenario):
diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py
new file mode 100644
index 00000000..4c973358
--- /dev/null
+++ b/test/bdd/steps/nominatim_environment.py
@@ -0,0 +1,224 @@
+import logging
+import os
+from pathlib import Path
+import subprocess
+import tempfile
+
+import psycopg2
+import psycopg2.extras
+
+LOG = logging.getLogger(__name__)
+
+class NominatimEnvironment:
+    """ Collects all functions for the execution of Nominatim functions.
+    """
+
+    def __init__(self, config):
+        self.build_dir = Path(config['BUILDDIR']).resolve()
+        self.src_dir = (Path(__file__) / '..' / '..' / '..' / '..').resolve()
+        self.db_host = config['DB_HOST']
+        self.db_port = config['DB_PORT']
+        self.db_user = config['DB_USER']
+        self.db_pass = config['DB_PASS']
+        self.template_db = config['TEMPLATE_DB']
+        self.test_db = config['TEST_DB']
+        self.api_test_db = config['API_TEST_DB']
+        self.server_module_path = config['SERVER_MODULE_PATH']
+        self.reuse_template = not config['REMOVE_TEMPLATE']
+        self.keep_scenario_db = config['KEEP_TEST_DB']
+        self.code_coverage_path = config['PHPCOV']
+        self.code_coverage_id = 1
+        self.test_env = None
+
+        self.template_db_done = False
+        self.website_dir = None
+
+    def connect_database(self, dbname):
+        """ Return a connection to the database with the given name.
+            Uses configured host, user and port.
+        """
+        dbargs = {'database': dbname}
+        if self.db_host:
+            dbargs['host'] = self.db_host
+        if self.db_port:
+            dbargs['port'] = self.db_port
+        if self.db_user:
+            dbargs['user'] = self.db_user
+        if self.db_pass:
+            dbargs['password'] = self.db_pass
+        conn = psycopg2.connect(**dbargs)
+        return conn
+
+    def next_code_coverage_file(self):
+        """ Generate the next name for a coverage file.
+        """
+        fn = Path(self.code_coverage_path) / "{:06d}.cov".format(self.code_coverage_id)
+        self.code_coverage_id += 1
+
+        return fn.resolve()
+
+    def write_nominatim_config(self, dbname):
+        """ Set up a custom test configuration that connects to the given
+            database. This sets up the environment variables so that they can
+            be picked up by dotenv and creates a project directory with the
+            appropriate website scripts.
+        """
+        dsn = 'pgsql:dbname={}'.format(dbname)
+        if self.db_host:
+            dsn += ';host=' + self.db_host
+        if self.db_port:
+            dsn += ';port=' + self.db_port
+        if self.db_user:
+            dsn += ';user=' + self.db_user
+        if self.db_pass:
+            dsn += ';password=' + self.db_pass
+
+        if self.website_dir is not None \
+           and self.test_env is not None \
+           and dsn == self.test_env['NOMINATIM_DATABASE_DSN']:
+            return # environment already set uo
+
+        self.test_env = os.environ
+        self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
+        self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
+        self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
+        self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
+
+        if self.server_module_path:
+            self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
+
+        if self.website_dir is not None:
+            self.website_dir.cleanup()
+
+        self.website_dir = tempfile.TemporaryDirectory()
+        self.run_setup_script('setup-website')
+
+
+    def db_drop_database(self, name):
+        """ Drop the database with the given name.
+        """
+        conn = self.connect_database('postgres')
+        conn.set_isolation_level(0)
+        cur = conn.cursor()
+        cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+        conn.close()
+
+    def setup_template_db(self):
+        """ Setup a template database that already contains common test data.
+            Having a template database speeds up tests considerably but at
+            the price that the tests sometimes run with stale data.
+        """
+        if self.template_db_done:
+            return
+
+        self.template_db_done = True
+
+        if self.reuse_template:
+            # check that the template is there
+            conn = self.connect_database('postgres')
+            cur = conn.cursor()
+            cur.execute('select count(*) from pg_database where datname = %s',
+                        (self.template_db,))
+            if cur.fetchone()[0] == 1:
+                return
+            conn.close()
+        else:
+            # just in case... make sure a previous table has been dropped
+            self.db_drop_database(self.template_db)
+
+        try:
+            # call the first part of database setup
+            self.write_nominatim_config(self.template_db)
+            self.run_setup_script('create-db', 'setup-db')
+            # remove external data to speed up indexing for tests
+            conn = self.connect_database(self.template_db)
+            cur = conn.cursor()
+            cur.execute("""select tablename from pg_tables
+                           where tablename in ('gb_postcode', 'us_postcode')""")
+            for t in cur:
+                conn.cursor().execute('TRUNCATE TABLE {}'.format(t[0]))
+            conn.commit()
+            conn.close()
+
+            # execute osm2pgsql import on an empty file to get the right tables
+            with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
+                fd.write(b'<osm version="0.6"></osm>')
+                fd.flush()
+                self.run_setup_script('import-data',
+                                      'ignore-errors',
+                                      'create-functions',
+                                      'create-tables',
+                                      'create-partition-tables',
+                                      'create-partition-functions',
+                                      'load-data',
+                                      'create-search-indices',
+                                      osm_file=fd.name,
+                                      osm2pgsql_cache='200')
+        except:
+            self.db_drop_database(self.template_db)
+            raise
+
+
+    def setup_api_db(self):
+        """ Setup a test against the API test database.
+        """
+        self.write_nominatim_config(self.api_test_db)
+
+    def setup_unknown_db(self):
+        """ Setup a test against a non-existing database.
+        """
+        self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
+
+    def setup_db(self, context):
+        """ Setup a test against a fresh, empty test database.
+        """
+        self.setup_template_db()
+        self.write_nominatim_config(self.test_db)
+        conn = self.connect_database(self.template_db)
+        conn.set_isolation_level(0)
+        cur = conn.cursor()
+        cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
+        cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
+        conn.close()
+        context.db = self.connect_database(self.test_db)
+        psycopg2.extras.register_hstore(context.db, globally=False)
+
+    def teardown_db(self, context):
+        """ Remove the test database, if it exists.
+        """
+        if 'db' in context:
+            context.db.close()
+
+        if not self.keep_scenario_db:
+            self.db_drop_database(self.test_db)
+
+    def run_setup_script(self, *args, **kwargs):
+        """ Run the Nominatim setup script with the given arguments.
+        """
+        self.run_nominatim_script('setup', *args, **kwargs)
+
+    def run_update_script(self, *args, **kwargs):
+        """ Run the Nominatim update script with the given arguments.
+        """
+        self.run_nominatim_script('update', *args, **kwargs)
+
+    def run_nominatim_script(self, script, *args, **kwargs):
+        """ Run one of the Nominatim utility scripts with the given arguments.
+        """
+        cmd = ['/usr/bin/env', 'php', '-Cq']
+        cmd.append((Path(self.build_dir) / 'utils' / '{}.php'.format(script)).resolve())
+        cmd.extend(['--' + x for x in args])
+        for k, v in kwargs.items():
+            cmd.extend(('--' + k.replace('_', '-'), str(v)))
+
+        if self.website_dir is not None:
+            cwd = self.website_dir.name
+        else:
+            cwd = self.build_dir
+
+        proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env,
+                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        (outp, outerr) = proc.communicate()
+        outerr = outerr.decode('utf-8').replace('\\n', '\n')
+        LOG.debug("run_nominatim_script: %s\n%s\n%s", cmd, outp, outerr)
+        assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr)

From 72587b08fa66d70930b279a7ed96a945ed6c27e7 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 19:58:59 +0100
Subject: [PATCH 05/17] bdd: move external process execution in separate func

---
 test/bdd/steps/nominatim_environment.py | 11 ++---------
 test/bdd/steps/queries.py               | 25 ++++---------------------
 test/bdd/steps/utils.py                 | 22 ++++++++++++++++++++++
 3 files changed, 28 insertions(+), 30 deletions(-)
 create mode 100644 test/bdd/steps/utils.py

diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py
index 4c973358..7741d667 100644
--- a/test/bdd/steps/nominatim_environment.py
+++ b/test/bdd/steps/nominatim_environment.py
@@ -1,13 +1,11 @@
-import logging
 import os
 from pathlib import Path
-import subprocess
 import tempfile
 
 import psycopg2
 import psycopg2.extras
 
-LOG = logging.getLogger(__name__)
+from steps.utils import run_script
 
 class NominatimEnvironment:
     """ Collects all functions for the execution of Nominatim functions.
@@ -216,9 +214,4 @@ class NominatimEnvironment:
         else:
             cwd = self.build_dir
 
-        proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env,
-                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        (outp, outerr) = proc.communicate()
-        outerr = outerr.decode('utf-8').replace('\\n', '\n')
-        LOG.debug("run_nominatim_script: %s\n%s\n%s", cmd, outp, outerr)
-        assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr)
+        run_script(cmd, cwd=cwd, env=self.test_env)
diff --git a/test/bdd/steps/queries.py b/test/bdd/steps/queries.py
index 0ea4685b..c7fdd6db 100644
--- a/test/bdd/steps/queries.py
+++ b/test/bdd/steps/queries.py
@@ -10,11 +10,11 @@ import io
 import re
 import logging
 import xml.etree.ElementTree as ET
-import subprocess
 from urllib.parse import urlencode
 from collections import OrderedDict
 
 from check_functions import Almost
+from utils import run_script
 
 logger = logging.getLogger(__name__)
 
@@ -277,14 +277,9 @@ def query_cmd(context, query, dups):
     if dups:
         cmd.extend(('--dedupe', '0'))
 
-    proc = subprocess.Popen(cmd, cwd=context.nominatim.build_dir,
-                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (outp, err) = proc.communicate()
+    outp, err = run_script(cmd, cwd=context.nominatim.build_dir)
 
-    assert proc.returncode == 0, "query.php failed with message: %s\noutput: %s" % (err, outp)
-    logger.debug("run_nominatim_script: %s\n%s\n" % (cmd, outp.decode('utf-8').replace('\\n', '\n')))
-
-    context.response = SearchResponse(outp.decode('utf-8'), 'json')
+    context.response = SearchResponse(outp, 'json')
 
 def send_api_query(endpoint, params, fmt, context):
     if fmt is not None:
@@ -326,19 +321,7 @@ def send_api_query(endpoint, params, fmt, context):
     for k,v in params.items():
         cmd.append("%s=%s" % (k, v))
 
-    proc = subprocess.Popen(cmd, cwd=context.nominatim.website_dir.name, env=env,
-                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-    (outp, err) = proc.communicate()
-    outp = outp.decode('utf-8')
-    err = err.decode("utf-8")
-
-    logger.debug("Result: \n===============================\n"
-                 + outp + "\n===============================\n")
-
-    assert proc.returncode == 0, \
-                  "%s failed with message: %s" % (
-                      os.path.basename(env['SCRIPT_FILENAME']), err)
+    outp, err = run_script(cmd, cwd=context.nominatim.website_dir.name, env=env)
 
     assert len(err) == 0, "Unexpected PHP error: %s" % (err)
 
diff --git a/test/bdd/steps/utils.py b/test/bdd/steps/utils.py
new file mode 100644
index 00000000..64d020db
--- /dev/null
+++ b/test/bdd/steps/utils.py
@@ -0,0 +1,22 @@
+"""
+Various smaller helps for step execution.
+"""
+import logging
+import subprocess
+
+LOG = logging.getLogger(__name__)
+
+def run_script(cmd, **kwargs):
+    """ Run the given command, check that it is successful and output
+        when necessary.
+    """
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                            **kwargs)
+    (outp, outerr) = proc.communicate()
+    outp = outp.decode('utf-8')
+    outerr = outerr.decode('utf-8').replace('\\n', '\n')
+    LOG.debug("Run command: %s\n%s\n%s", cmd, outp, outerr)
+
+    assert proc.returncode == 0, "Script '{}' failed:\n{}\n{}\n".format(cmd[0], outp, outerr)
+
+    return outp, outerr

From 2712c5f90ee9e15d69424e367c7e53ab2a579f7b Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 20:17:17 +0100
Subject: [PATCH 06/17] bdd: rename and clean up osm_data steps

Move common OPL creation code into a function and remove
unused imports.
---
 .../steps/{osm_data.py => steps_osm_data.py}  | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)
 rename test/bdd/steps/{osm_data.py => steps_osm_data.py} (80%)

diff --git a/test/bdd/steps/osm_data.py b/test/bdd/steps/steps_osm_data.py
similarity index 80%
rename from test/bdd/steps/osm_data.py
rename to test/bdd/steps/steps_osm_data.py
index c1043baa..465ad718 100644
--- a/test/bdd/steps/osm_data.py
+++ b/test/bdd/steps/steps_osm_data.py
@@ -1,8 +1,27 @@
-import subprocess
 import tempfile
 import random
 import os
 
+def write_opl_file(opl, grid):
+    """ Create a temporary OSM file from OPL and return the file name. It is
+        the responsibility of the caller to delete the file again.
+
+        Node with missing coordinates, can retrieve their coordinates from
+        a supplied grid. Failing that a random coordinate is assigned.
+    """
+    with tempfile.NamedTemporaryFile(suffix='.opl', delete=False) as fd:
+        for line in opl.splitlines():
+            if line.startswith('n') and line.find(' x') < 0:
+                coord = grid.grid_node(int(line[1:].split(' ')[0]))
+                if coord is None:
+                    coord = (random.random() * 360 - 180,
+                             random.random() * 180 - 90)
+                line += " x%f y%f" % coord
+            fd.write(line.encode('utf-8'))
+            fd.write(b'\n')
+
+        return fd.name
+
 @given(u'the ([0-9.]+ )?grid')
 def define_node_grid(context, grid_step):
     """
@@ -27,21 +46,11 @@ def load_osm_file(context):
 
     The data is expected as attached text in OPL format.
     """
-    # create a OSM file in /tmp and import it
-    with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.opl', delete=False) as fd:
-        fname = fd.name
-        for line in context.text.splitlines():
-            if line.startswith('n') and line.find(' x') < 0:
-                coord = context.osm.grid_node(int(line[1:].split(' ')[0]))
-                if coord is None:
-                    coord = (random.random() * 360 - 180,
-                             random.random() * 180 - 90)
-                line += " x%f y%f" % coord
-            fd.write(line.encode('utf-8'))
-            fd.write(b'\n')
-
+    # create an OSM file and import it
+    fname = write_opl_file(context.text, context.osm)
     context.nominatim.run_setup_script('import-data', osm_file=fname,
                                        osm2pgsql_cache=300)
+    os.remove(fname)
 
     ### reintroduce the triggers/indexes we've lost by having osm2pgsql set up place again
     cur = context.db.cursor()
@@ -52,7 +61,6 @@ def load_osm_file(context):
     cur.execute("""CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type)""")
     context.db.commit()
 
-    os.remove(fname)
 
 @when(u'updating osm data')
 def update_from_osm_file(context):
@@ -77,15 +85,7 @@ def update_from_osm_file(context):
     context.nominatim.run_setup_script('create-functions', 'create-partition-functions',
                                        'enable-diff-updates')
 
-    # create a OSM file in /tmp and import it
-    with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.opl', delete=False) as fd:
-        fname = fd.name
-        for line in context.text.splitlines():
-            if line.startswith('n') and line.find(' x') < 0:
-                    line += " x%d y%d" % (random.random() * 360 - 180,
-                                          random.random() * 180 - 90)
-            fd.write(line.encode('utf-8'))
-            fd.write(b'\n')
-
+    # create an OSM file and import it
+    fname = write_opl_file(context.text, context.osm)
     context.nominatim.run_update_script(import_diff=fname)
     os.remove(fname)

From 8a93f8ed94d04fed67824366b1691aa62b2c8a1d Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 4 Jan 2021 22:18:01 +0100
Subject: [PATCH 07/17] bdd: move Response classes in own file and simplify

Removes most of the duplicated parse functions, introduces
a common assert_field function with a more expressive error
message.
---
 test/bdd/steps/http_responses.py              | 194 ++++++++++++++++
 .../{queries.py => steps_api_queries.py}      | 218 +-----------------
 test/bdd/steps/steps_osm_data.py              |   2 +-
 3 files changed, 201 insertions(+), 213 deletions(-)
 create mode 100644 test/bdd/steps/http_responses.py
 rename test/bdd/steps/{queries.py => steps_api_queries.py} (64%)

diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py
new file mode 100644
index 00000000..c3cd4f52
--- /dev/null
+++ b/test/bdd/steps/http_responses.py
@@ -0,0 +1,194 @@
+"""
+Classes wrapping HTTP responses from the Nominatim API.
+"""
+from collections import OrderedDict
+import re
+import json
+import xml.etree.ElementTree as ET
+
+from check_functions import Almost
+
+def _geojson_result_to_json_result(geojson_result):
+    result = geojson_result['properties']
+    result['geojson'] = geojson_result['geometry']
+    if 'bbox' in geojson_result:
+        # bbox is  minlon, minlat, maxlon, maxlat
+        # boundingbox is minlat, maxlat, minlon, maxlon
+        result['boundingbox'] = [geojson_result['bbox'][1],
+                                 geojson_result['bbox'][3],
+                                 geojson_result['bbox'][0],
+                                 geojson_result['bbox'][2]]
+    return result
+
+class BadRowValueAssert:
+    """ Lazily formatted message for failures to find a field content.
+    """
+
+    def __init__(self, response, idx, field, value):
+        self.idx = idx
+        self.field = field
+        self.value = value
+        self.row = response.result[idx]
+
+    def __str__(self):
+        return "\nBad value for row {} field '{}'. Expected: {}, got: {}.\nFull row: {}"""\
+                   .format(self.idx, self.field, self.value,
+                           self.row[self.field], json.dumps(self.row, indent=4))
+
+
+class GenericResponse:
+    """ Common base class for all API responses.
+    """
+    def __init__(self, page, fmt, errorcode=200):
+        self.page = page
+        self.format = fmt
+        self.errorcode = errorcode
+        self.result = []
+        self.header = dict()
+
+        if errorcode == 200:
+            getattr(self, '_parse_' + fmt)()
+
+    def _parse_json(self):
+        m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page)
+        if m is None:
+            code = self.page
+        else:
+            code = m.group(2)
+            self.header['json_func'] = m.group(1)
+        self.result = json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code)
+        if isinstance(self.result, OrderedDict):
+            self.result = [self.result]
+
+    def _parse_geojson(self):
+        self._parse_json()
+        if 'error' in self.result[0]:
+            self.result = []
+        else:
+            self.result = list(map(_geojson_result_to_json_result, self.result[0]['features']))
+
+    def _parse_geocodejson(self):
+        self._parse_geojson()
+        if self.result is not None:
+            self.result = [r['geocoding'] for r in self.result]
+
+    def assert_field(self, idx, field, value):
+        """ Check that result row `idx` has a field `field` with value `value`.
+            Float numbers are matched approximately. When the expected value
+            starts with a carat, regular expression matching is used.
+        """
+        assert field in self.result[idx], \
+               "Result row {} has no field '{}'.\nFull row: {}"\
+                   .format(idx, field, json.dumps(self.result[idx], indent=4))
+
+        if isinstance(value, float):
+            assert Almost(value) == float(self.result[idx][field]), \
+                   BadRowValueAssert(self, idx, field, value)
+        elif value.startswith("^"):
+            assert re.fullmatch(value, self.result[idx][field]), \
+                   BadRowValueAssert(self, idx, field, value)
+        else:
+            assert str(self.result[idx][field]) == str(value), \
+                   BadRowValueAssert(self, idx, field, value)
+
+    def match_row(self, row):
+        """ Match the result fields against the given behave table row.
+        """
+        if 'ID' in row.headings:
+            todo = [int(row['ID'])]
+        else:
+            todo = range(len(self.result))
+
+        for i in todo:
+            for name, value in zip(row.headings, row.cells):
+                if name == 'ID':
+                    pass
+                elif name == 'osm':
+                    self.assert_field(i, 'osm_type', value[0])
+                    self.assert_field(i, 'osm_id', value[1:])
+                elif name == 'centroid':
+                    lon, lat = value.split(' ')
+                    self.assert_field(i, 'lat', float(lat))
+                    self.assert_field(i, 'lon', float(lon))
+                else:
+                    self.assert_field(i, name, value)
+
+    def property_list(self, prop):
+        return [x[prop] for x in self.result]
+
+
+class SearchResponse(GenericResponse):
+    """ Specialised class for search and lookup responses.
+        Transforms the xml response in a format similar to json.
+    """
+
+    def _parse_xml(self):
+        xml_tree = ET.fromstring(self.page)
+
+        self.header = dict(xml_tree.attrib)
+
+        for child in xml_tree:
+            assert child.tag == "place"
+            self.result.append(dict(child.attrib))
+
+            address = {}
+            for sub in child:
+                if sub.tag == 'extratags':
+                    self.result[-1]['extratags'] = {}
+                    for tag in sub:
+                        self.result[-1]['extratags'][tag.attrib['key']] = tag.attrib['value']
+                elif sub.tag == 'namedetails':
+                    self.result[-1]['namedetails'] = {}
+                    for tag in sub:
+                        self.result[-1]['namedetails'][tag.attrib['desc']] = tag.text
+                elif sub.tag == 'geokml':
+                    self.result[-1][sub.tag] = True
+                else:
+                    address[sub.tag] = sub.text
+
+            if address:
+                self.result[-1]['address'] = address
+
+
+class ReverseResponse(GenericResponse):
+    """ Specialised class for reverse responses.
+        Transforms the xml response in a format similar to json.
+    """
+
+    def _parse_xml(self):
+        xml_tree = ET.fromstring(self.page)
+
+        self.header = dict(xml_tree.attrib)
+        self.result = []
+
+        for child in xml_tree:
+            if child.tag == 'result':
+                assert not self.result, "More than one result in reverse result"
+                self.result.append(dict(child.attrib))
+            elif child.tag == 'addressparts':
+                address = {}
+                for sub in child:
+                    address[sub.tag] = sub.text
+                self.result[0]['address'] = address
+            elif child.tag == 'extratags':
+                self.result[0]['extratags'] = {}
+                for tag in child:
+                    self.result[0]['extratags'][tag.attrib['key']] = tag.attrib['value']
+            elif child.tag == 'namedetails':
+                self.result[0]['namedetails'] = {}
+                for tag in child:
+                    self.result[0]['namedetails'][tag.attrib['desc']] = tag.text
+            elif child.tag == 'geokml':
+                self.result[0][child.tag] = True
+            else:
+                assert child.tag == 'error', \
+                       "Unknown XML tag {} on page: {}".format(child.tag, self.page)
+
+
+class StatusResponse(GenericResponse):
+    """ Specialised class for status responses.
+        Can also parse text responses.
+    """
+
+    def _parse_text(self):
+        pass
diff --git a/test/bdd/steps/queries.py b/test/bdd/steps/steps_api_queries.py
similarity index 64%
rename from test/bdd/steps/queries.py
rename to test/bdd/steps/steps_api_queries.py
index c7fdd6db..ebda5ec6 100644
--- a/test/bdd/steps/queries.py
+++ b/test/bdd/steps/steps_api_queries.py
@@ -1,22 +1,18 @@
-""" Steps that run search queries.
+""" Steps that run queries against the API.
 
     Queries may either be run directly via PHP using the query script
-    or via the HTTP interface.
+    or via the HTTP interface using php-cgi.
 """
-
 import json
 import os
-import io
 import re
 import logging
-import xml.etree.ElementTree as ET
 from urllib.parse import urlencode
-from collections import OrderedDict
 
-from check_functions import Almost
 from utils import run_script
+from http_responses import GenericResponse, SearchResponse, ReverseResponse, StatusResponse
 
-logger = logging.getLogger(__name__)
+LOG = logging.getLogger(__name__)
 
 BASE_SERVER_ENV = {
     'HTTP_HOST' : 'localhost',
@@ -56,208 +52,6 @@ def compare(operator, op1, op2):
     else:
         raise Exception("unknown operator '%s'" % operator)
 
-class GenericResponse(object):
-
-    def match_row(self, row):
-        if 'ID' in row.headings:
-            todo = [int(row['ID'])]
-        else:
-            todo = range(len(self.result))
-
-        for i in todo:
-            res = self.result[i]
-            for h in row.headings:
-                if h == 'ID':
-                    pass
-                elif h == 'osm':
-                    assert res['osm_type'] == row[h][0]
-                    assert res['osm_id'] == int(row[h][1:])
-                elif h == 'centroid':
-                    x, y = row[h].split(' ')
-                    assert Almost(float(y)) == float(res['lat'])
-                    assert Almost(float(x)) == float(res['lon'])
-                elif row[h].startswith("^"):
-                    assert h in res
-                    assert re.fullmatch(row[h], res[h]) is not None, \
-                           "attribute '%s': expected: '%s', got '%s'" % (h, row[h], res[h])
-                else:
-                    assert h in res
-                    assert str(res[h]) == str(row[h])
-
-    def property_list(self, prop):
-        return [ x[prop] for x in self.result ]
-
-
-class SearchResponse(GenericResponse):
-
-    def __init__(self, page, fmt='json', errorcode=200):
-        self.page = page
-        self.format = fmt
-        self.errorcode = errorcode
-        self.result = []
-        self.header = dict()
-
-        if errorcode == 200:
-            getattr(self, 'parse_' + fmt)()
-
-    def parse_json(self):
-        m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page)
-        if m is None:
-            code = self.page
-        else:
-            code = m.group(2)
-            self.header['json_func'] = m.group(1)
-        self.result = json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code)
-
-    def parse_geojson(self):
-        self.parse_json()
-        self.result = geojson_results_to_json_results(self.result)
-
-    def parse_geocodejson(self):
-        self.parse_geojson()
-        if self.result is not None:
-            self.result = [r['geocoding'] for r in self.result]
-
-    def parse_xml(self):
-        et = ET.fromstring(self.page)
-
-        self.header = dict(et.attrib)
-
-        for child in et:
-            assert child.tag == "place"
-            self.result.append(dict(child.attrib))
-
-            address = {}
-            for sub in child:
-                if sub.tag == 'extratags':
-                    self.result[-1]['extratags'] = {}
-                    for tag in sub:
-                        self.result[-1]['extratags'][tag.attrib['key']] = tag.attrib['value']
-                elif sub.tag == 'namedetails':
-                    self.result[-1]['namedetails'] = {}
-                    for tag in sub:
-                        self.result[-1]['namedetails'][tag.attrib['desc']] = tag.text
-                elif sub.tag in ('geokml'):
-                    self.result[-1][sub.tag] = True
-                else:
-                    address[sub.tag] = sub.text
-
-            if len(address) > 0:
-                self.result[-1]['address'] = address
-
-
-class ReverseResponse(GenericResponse):
-
-    def __init__(self, page, fmt='json', errorcode=200):
-        self.page = page
-        self.format = fmt
-        self.errorcode = errorcode
-        self.result = []
-        self.header = dict()
-
-        if errorcode == 200:
-            getattr(self, 'parse_' + fmt)()
-
-    def parse_json(self):
-        m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page)
-        if m is None:
-            code = self.page
-        else:
-            code = m.group(2)
-            self.header['json_func'] = m.group(1)
-        self.result = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code)]
-
-    def parse_geojson(self):
-        self.parse_json()
-        if 'error' in self.result:
-            return
-        self.result = geojson_results_to_json_results(self.result[0])
-
-    def parse_geocodejson(self):
-        self.parse_geojson()
-        if self.result is not None:
-            self.result = [r['geocoding'] for r in self.result]
-
-    def parse_xml(self):
-        et = ET.fromstring(self.page)
-
-        self.header = dict(et.attrib)
-        self.result = []
-
-        for child in et:
-            if child.tag == 'result':
-                assert len(self.result) == 0, "More than one result in reverse result"
-                self.result.append(dict(child.attrib))
-            elif child.tag == 'addressparts':
-                address = {}
-                for sub in child:
-                    address[sub.tag] = sub.text
-                self.result[0]['address'] = address
-            elif child.tag == 'extratags':
-                self.result[0]['extratags'] = {}
-                for tag in child:
-                    self.result[0]['extratags'][tag.attrib['key']] = tag.attrib['value']
-            elif child.tag == 'namedetails':
-                self.result[0]['namedetails'] = {}
-                for tag in child:
-                    self.result[0]['namedetails'][tag.attrib['desc']] = tag.text
-            elif child.tag in ('geokml'):
-                self.result[0][child.tag] = True
-            else:
-                assert child.tag == 'error', \
-                        "Unknown XML tag %s on page: %s" % (child.tag, self.page)
-
-
-class DetailsResponse(GenericResponse):
-
-    def __init__(self, page, fmt='json', errorcode=200):
-        self.page = page
-        self.format = fmt
-        self.errorcode = errorcode
-        self.result = []
-        self.header = dict()
-
-        if errorcode == 200:
-            getattr(self, 'parse_' + fmt)()
-
-    def parse_json(self):
-        self.result = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(self.page)]
-
-
-class StatusResponse(GenericResponse):
-
-    def __init__(self, page, fmt='text', errorcode=200):
-        self.page = page
-        self.format = fmt
-        self.errorcode = errorcode
-
-        if errorcode == 200 and fmt != 'text':
-            getattr(self, 'parse_' + fmt)()
-
-    def parse_json(self):
-        self.result = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(self.page)]
-
-
-def geojson_result_to_json_result(geojson_result):
-    result = geojson_result['properties']
-    result['geojson'] = geojson_result['geometry']
-    if 'bbox' in geojson_result:
-        # bbox is  minlon, minlat, maxlon, maxlat
-        # boundingbox is minlat, maxlat, minlon, maxlon
-        result['boundingbox'] = [
-                                    geojson_result['bbox'][1],
-                                    geojson_result['bbox'][3],
-                                    geojson_result['bbox'][0],
-                                    geojson_result['bbox'][2]
-                                ]
-    return result
-
-
-def geojson_results_to_json_results(geojson_results):
-    if 'error' in geojson_results:
-        return
-    return list(map(geojson_result_to_json_result, geojson_results['features']))
-
 
 @when(u'searching for "(?P<query>.*)"(?P<dups> with dups)?')
 def query_cmd(context, query, dups):
@@ -301,7 +95,7 @@ def send_api_query(endpoint, params, fmt, context):
     env['SCRIPT_FILENAME'] = os.path.join(env['CONTEXT_DOCUMENT_ROOT'],
                                           '%s.php' % endpoint)
 
-    logger.debug("Environment:" + json.dumps(env, sort_keys=True, indent=2))
+    LOG.debug("Environment:" + json.dumps(env, sort_keys=True, indent=2))
 
     if hasattr(context, 'http_headers'):
         env.update(context.http_headers)
@@ -395,7 +189,7 @@ def website_details_request(context, fmt, query):
     else:
         outfmt = fmt.strip()
 
-    context.response = DetailsResponse(outp, outfmt, status)
+    context.response = GenericResponse(outp, outfmt, status)
 
 @when(u'sending (?P<fmt>\S+ )?lookup query for (?P<query>.*)')
 def website_lookup_request(context, fmt, query):
diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py
index 465ad718..db208fe8 100644
--- a/test/bdd/steps/steps_osm_data.py
+++ b/test/bdd/steps/steps_osm_data.py
@@ -35,7 +35,7 @@ def define_node_grid(context, grid_step):
         grid_step = 0.00001
 
     context.osm.set_grid([context.table.headings] + [list(h) for h in context.table],
-                          grid_step)
+                         grid_step)
 
 
 @when(u'loading osm data')

From 12ae8a4ed38ecf61d375e12ea699bf265737ba2b Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 10:17:59 +0100
Subject: [PATCH 08/17] bdd: move output format computation into response

---
 test/bdd/steps/http_responses.py    |  4 +++
 test/bdd/steps/steps_api_queries.py | 43 ++++-------------------------
 2 files changed, 9 insertions(+), 38 deletions(-)

diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py
index c3cd4f52..161e29fd 100644
--- a/test/bdd/steps/http_responses.py
+++ b/test/bdd/steps/http_responses.py
@@ -40,6 +40,10 @@ class GenericResponse:
     """ Common base class for all API responses.
     """
     def __init__(self, page, fmt, errorcode=200):
+        fmt = fmt.strip()
+        if fmt == 'jsonv2':
+            fmt = 'json'
+
         self.page = page
         self.format = fmt
         self.errorcode = errorcode
diff --git a/test/bdd/steps/steps_api_queries.py b/test/bdd/steps/steps_api_queries.py
index ebda5ec6..47dc8ac3 100644
--- a/test/bdd/steps/steps_api_queries.py
+++ b/test/bdd/steps/steps_api_queries.py
@@ -148,12 +148,7 @@ def website_search_request(context, fmt, query, addr):
 
     outp, status = send_api_query('search', params, fmt, context)
 
-    if fmt is None or fmt == 'jsonv2 ':
-        outfmt = 'json'
-    else:
-        outfmt = fmt.strip()
-
-    context.response = SearchResponse(outp, outfmt, status)
+    context.response = SearchResponse(outp, fmt or 'json', status)
 
 @when(u'sending (?P<fmt>\S+ )?reverse coordinates (?P<lat>.+)?,(?P<lon>.+)?')
 def website_reverse_request(context, fmt, lat, lon):
@@ -165,14 +160,7 @@ def website_reverse_request(context, fmt, lat, lon):
 
     outp, status = send_api_query('reverse', params, fmt, context)
 
-    if fmt is None:
-        outfmt = 'xml'
-    elif fmt == 'jsonv2 ':
-        outfmt = 'json'
-    else:
-        outfmt = fmt.strip()
-
-    context.response = ReverseResponse(outp, outfmt, status)
+    context.response = ReverseResponse(outp, fmt or 'xml', status)
 
 @when(u'sending (?P<fmt>\S+ )?details query for (?P<query>.*)')
 def website_details_request(context, fmt, query):
@@ -184,42 +172,21 @@ def website_details_request(context, fmt, query):
         params['place_id'] = query
     outp, status = send_api_query('details', params, fmt, context)
 
-    if fmt is None:
-        outfmt = 'json'
-    else:
-        outfmt = fmt.strip()
-
-    context.response = GenericResponse(outp, outfmt, status)
+    context.response = GenericResponse(outp, fmt or 'json', status)
 
 @when(u'sending (?P<fmt>\S+ )?lookup query for (?P<query>.*)')
 def website_lookup_request(context, fmt, query):
     params = { 'osm_ids' : query }
     outp, status = send_api_query('lookup', params, fmt, context)
 
-    if fmt == 'json ':
-        outfmt = 'json'
-    elif fmt == 'jsonv2 ':
-        outfmt = 'json'
-    elif fmt == 'geojson ':
-        outfmt = 'geojson'
-    elif fmt == 'geocodejson ':
-        outfmt = 'geocodejson'
-    else:
-        outfmt = 'xml'
-
-    context.response = SearchResponse(outp, outfmt, status)
+    context.response = SearchResponse(outp, fmt or 'xml', status)
 
 @when(u'sending (?P<fmt>\S+ )?status query')
 def website_status_request(context, fmt):
     params = {}
     outp, status = send_api_query('status', params, fmt, context)
 
-    if fmt is None:
-        outfmt = 'text'
-    else:
-        outfmt = fmt.strip()
-
-    context.response = StatusResponse(outp, outfmt, status)
+    context.response = StatusResponse(outp, fmt or 'text', status)
 
 @step(u'(?P<operator>less than|more than|exactly|at least|at most) (?P<number>\d+) results? (?:is|are) returned')
 def validate_result_number(context, operator, number):

From 213bf7d19d0e99355954bfacfe0833695f770151 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 10:20:00 +0100
Subject: [PATCH 09/17] bdd: rename db_ops steps

Now all files implementing steps are called steps_*.py.
---
 test/bdd/steps/{db_ops.py => steps_db_ops.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/bdd/steps/{db_ops.py => steps_db_ops.py} (100%)

diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/steps_db_ops.py
similarity index 100%
rename from test/bdd/steps/db_ops.py
rename to test/bdd/steps/steps_db_ops.py

From 58c471c627a8893413f9734ff36c3fdc00827e6e Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 10:39:44 +0100
Subject: [PATCH 10/17] bdd: remove class for lazy formatting

assert in combination with format() does the right thing and calls
the __str__() method only when an assertion hits.
---
 test/bdd/steps/steps_db_ops.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 078e29fe..67ebcd3e 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -92,14 +92,6 @@ class PlaceColumn:
                      self.geometry)
         cursor.execute(query, list(self.columns.values()))
 
-class LazyFmt(object):
-
-    def __init__(self, fmtstr, *args):
-        self.fmt = fmtstr
-        self.args = args
-
-    def __str__(self):
-        return self.fmt % self.args
 
 class PlaceObjName(object):
 
@@ -125,15 +117,15 @@ class PlaceObjName(object):
 def compare_place_id(expected, result, column, context):
     if expected == '0':
         assert result == 0, \
-            LazyFmt("Bad place id in column %s. Expected: 0, got: %s.",
+               "Bad place id in column {}. Expected: 0, got: {!s}.".format(
                     column, PlaceObjName(result, context.db))
     elif expected == '-':
         assert result is None, \
-                LazyFmt("bad place id in column %s: %s.",
+               "Bad place id in column {}: {!s}.".format(
                         column, PlaceObjName(result, context.db))
     else:
         assert NominatimID(expected).get_place_id(context.db.cursor()) == result, \
-            LazyFmt("Bad place id in column %s. Expected: %s, got: %s.",
+               "Bad place id in column {}. Expected: {}, got: {!s}.".format(
                     column, expected, PlaceObjName(result, context.db))
 
 def check_database_integrity(context):

From 5dfa76a610972eea3930ecff1c41fb795ea2e7e4 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 11:22:43 +0100
Subject: [PATCH 11/17] bdd: switch to auto commit mode

Put the connection to the test database into auto-commit mode
and get rid of the explicit commits. Also use cursors always in
context managers and unify the two implementations that copy
data from the place table.
---
 test/bdd/steps/nominatim_environment.py |  23 +
 test/bdd/steps/steps_db_ops.py          | 563 +++++++++++-------------
 test/bdd/steps/steps_osm_data.py        |  12 +-
 3 files changed, 289 insertions(+), 309 deletions(-)

diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py
index 7741d667..2547bbe2 100644
--- a/test/bdd/steps/nominatim_environment.py
+++ b/test/bdd/steps/nominatim_environment.py
@@ -179,6 +179,7 @@ class NominatimEnvironment:
         cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
         conn.close()
         context.db = self.connect_database(self.test_db)
+        context.db.autocommit = True
         psycopg2.extras.register_hstore(context.db, globally=False)
 
     def teardown_db(self, context):
@@ -215,3 +216,25 @@ class NominatimEnvironment:
             cwd = self.build_dir
 
         run_script(cmd, cwd=cwd, env=self.test_env)
+
+    def copy_from_place(self, db):
+        """ Copy data from place to the placex and location_property_osmline
+            tables invoking the appropriate triggers.
+        """
+        self.run_setup_script('create-functions', 'create-partition-functions')
+
+        with db.cursor() as cur:
+            cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,
+                                               name, admin_level, address,
+                                               extratags, geometry)
+                             SELECT osm_type, osm_id, class, type,
+                                    name, admin_level, address,
+                                    extratags, geometry
+                               FROM place
+                               WHERE not (class='place' and type='houses' and osm_type='W')""")
+            cur.execute("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
+                             SELECT osm_id, address, geometry
+                               FROM place
+                              WHERE class='place' and type='houses'
+                                    and osm_type='W'
+                                    and ST_GeometryType(geometry) = 'ST_LineString'""")
diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 67ebcd3e..97f9055b 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -218,7 +218,7 @@ def assert_db_column(row, column, value, context):
             "Row '%s': expected: %s, got: %s" % (column, value, str(row[column]))
 
 
-################################ STEPS ##################################
+################################ GIVEN ##################################
 
 @given(u'the scene (?P<scene>.+)')
 def set_default_scene(context, scene):
@@ -226,83 +226,72 @@ def set_default_scene(context, scene):
 
 @given("the (?P<named>named )?places")
 def add_data_to_place_table(context, named):
-    cur = context.db.cursor()
-    cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
-    for r in context.table:
-        col = PlaceColumn(context, named is not None)
+    with context.db.cursor() as cur:
+        cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
+        for r in context.table:
+            col = PlaceColumn(context, named is not None)
 
-        for h in r.headings:
-            col.add(h, r[h])
+            for h in r.headings:
+                col.add(h, r[h])
 
-        col.db_insert(cur)
-    cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
-    cur.close()
-    context.db.commit()
+            col.db_insert(cur)
+        cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
 
 @given("the relations")
 def add_data_to_planet_relations(context):
-    cur = context.db.cursor()
-    for r in context.table:
-        last_node = 0
-        last_way = 0
-        parts = []
-        if r['members']:
-            members = []
-            for m in r['members'].split(','):
-                mid = NominatimID(m)
-                if mid.typ == 'N':
-                    parts.insert(last_node, int(mid.oid))
-                    last_node += 1
-                    last_way += 1
-                elif mid.typ == 'W':
-                    parts.insert(last_way, int(mid.oid))
-                    last_way += 1
-                else:
-                    parts.append(int(mid.oid))
+    with context.db.cursor() as cur:
+        for r in context.table:
+            last_node = 0
+            last_way = 0
+            parts = []
+            if r['members']:
+                members = []
+                for m in r['members'].split(','):
+                    mid = NominatimID(m)
+                    if mid.typ == 'N':
+                        parts.insert(last_node, int(mid.oid))
+                        last_node += 1
+                        last_way += 1
+                    elif mid.typ == 'W':
+                        parts.insert(last_way, int(mid.oid))
+                        last_way += 1
+                    else:
+                        parts.append(int(mid.oid))
 
-                members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
-        else:
-            members = None
+                    members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
+            else:
+                members = None
 
-        tags = []
-        for h in r.headings:
-            if h.startswith("tags+"):
-                tags.extend((h[5:], r[h]))
+            tags = []
+            for h in r.headings:
+                if h.startswith("tags+"):
+                    tags.extend((h[5:], r[h]))
 
-        cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
-                       VALUES (%s, %s, %s, %s, %s, %s)""",
-                    (r['id'], last_node, last_way, parts, members, tags))
-    context.db.commit()
+            cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
+                           VALUES (%s, %s, %s, %s, %s, %s)""",
+                        (r['id'], last_node, last_way, parts, members, tags))
 
 @given("the ways")
 def add_data_to_planet_ways(context):
-    cur = context.db.cursor()
-    for r in context.table:
-        tags = []
-        for h in r.headings:
-            if h.startswith("tags+"):
-                tags.extend((h[5:], r[h]))
+    with context.db.cursor() as cur:
+        for r in context.table:
+            tags = []
+            for h in r.headings:
+                if h.startswith("tags+"):
+                    tags.extend((h[5:], r[h]))
 
-        nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
+            nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
 
-        cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
-                    (r['id'], nodes, tags))
-    context.db.commit()
+            cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
+                        (r['id'], nodes, tags))
+
+################################ WHEN ##################################
 
 @when("importing")
 def import_and_index_data_from_place_table(context):
-    context.nominatim.run_setup_script('create-functions', 'create-partition-functions')
-    cur = context.db.cursor()
-    cur.execute(
-        """insert into placex (osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry)
-           select              osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry
-           from place where not (class='place' and type='houses' and osm_type='W')""")
-    cur.execute(
-            """insert into location_property_osmline (osm_id, address, linegeo)
-             SELECT osm_id, address, geometry from place
-              WHERE class='place' and type='houses' and osm_type='W'
-                    and ST_GeometryType(geometry) = 'ST_LineString'""")
-    context.db.commit()
+    """ Import data previously set up in the place table.
+    """
+    context.nominatim.copy_from_place(context.db)
     context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse')
     check_database_integrity(context)
 
@@ -310,24 +299,21 @@ def import_and_index_data_from_place_table(context):
 def update_place_table(context):
     context.nominatim.run_setup_script(
         'create-functions', 'create-partition-functions', 'enable-diff-updates')
-    cur = context.db.cursor()
-    for r in context.table:
-        col = PlaceColumn(context, False)
+    with context.db.cursor() as cur:
+        for r in context.table:
+            col = PlaceColumn(context, False)
 
-        for h in r.headings:
-            col.add(h, r[h])
+            for h in r.headings:
+                col.add(h, r[h])
 
-        col.db_insert(cur)
+            col.db_insert(cur)
 
-    context.db.commit()
+        while True:
+            context.nominatim.run_update_script('index')
 
-    while True:
-        context.nominatim.run_update_script('index')
-
-        cur = context.db.cursor()
-        cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
-        if cur.rowcount == 0:
-            break
+            cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
+            if cur.rowcount == 0:
+                break
 
     check_database_integrity(context)
 
@@ -339,277 +325,258 @@ def update_postcodes(context):
 def delete_places(context, oids):
     context.nominatim.run_setup_script(
         'create-functions', 'create-partition-functions', 'enable-diff-updates')
-    cur = context.db.cursor()
-    for oid in oids.split(','):
-        where, params = NominatimID(oid).table_select()
-        cur.execute("DELETE FROM place WHERE " + where, params)
-    context.db.commit()
+    with context.db.cursor() as cur:
+        for oid in oids.split(','):
+            where, params = NominatimID(oid).table_select()
+            cur.execute("DELETE FROM place WHERE " + where, params)
 
     while True:
         context.nominatim.run_update_script('index')
 
-        cur = context.db.cursor()
-        cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
-        if cur.rowcount == 0:
-            break
+        with context.db.cursor() as cur:
+            cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
+            if cur.rowcount == 0:
+                break
+
+################################ THEN ##################################
 
 @then("placex contains(?P<exact> exactly)?")
 def check_placex_contents(context, exact):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        expected_content = set()
+        for row in context.table:
+            nid = NominatimID(row['object'])
+            where, params = nid.table_select()
+            cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt,
+                           ST_X(centroid) as cx, ST_Y(centroid) as cy
+                           FROM placex where %s""" % where,
+                        params)
+            assert cur.rowcount > 0, "No rows found for " + row['object']
 
-    expected_content = set()
-    for row in context.table:
-        nid = NominatimID(row['object'])
-        where, params = nid.table_select()
-        cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt,
-                       ST_X(centroid) as cx, ST_Y(centroid) as cy
-                       FROM placex where %s""" % where,
-                    params)
-        assert cur.rowcount > 0, "No rows found for " + row['object']
-
-        for res in cur:
-            if exact:
-                expected_content.add((res['osm_type'], res['osm_id'], res['class']))
-            for h in row.headings:
-                if h in ('extratags', 'address'):
-                    if row[h] == '-':
-                        assert res[h] is None
+            for res in cur:
+                if exact:
+                    expected_content.add((res['osm_type'], res['osm_id'], res['class']))
+                for h in row.headings:
+                    if h in ('extratags', 'address'):
+                        if row[h] == '-':
+                            assert res[h] is None
+                        else:
+                            vdict = eval('{' + row[h] + '}')
+                            assert vdict == res[h]
+                    elif h.startswith('name'):
+                        name = h[5:] if h.startswith('name+') else 'name'
+                        assert name in res['name']
+                        assert res['name'][name] == row[h]
+                    elif h.startswith('extratags+'):
+                        assert res['extratags'][h[10:]] == row[h]
+                    elif h.startswith('addr+'):
+                        if row[h] == '-':
+                            if res['address'] is not None:
+                                assert h[5:] not in res['address']
+                        else:
+                            assert h[5:] in res['address'], "column " + h
+                            assert res['address'][h[5:]] == row[h], "column %s" % h
+                    elif h in ('linked_place_id', 'parent_place_id'):
+                        compare_place_id(row[h], res[h], h, context)
                     else:
-                        vdict = eval('{' + row[h] + '}')
-                        assert vdict == res[h]
-                elif h.startswith('name'):
-                    name = h[5:] if h.startswith('name+') else 'name'
-                    assert name in res['name']
-                    assert res['name'][name] == row[h]
-                elif h.startswith('extratags+'):
-                    assert res['extratags'][h[10:]] == row[h]
-                elif h.startswith('addr+'):
-                    if row[h] == '-':
-                        if res['address'] is not None:
-                            assert h[5:] not in res['address']
-                    else:
-                        assert h[5:] in res['address'], "column " + h
-                        assert res['address'][h[5:]] == row[h], "column %s" % h
-                elif h in ('linked_place_id', 'parent_place_id'):
-                    compare_place_id(row[h], res[h], h, context)
-                else:
-                    assert_db_column(res, h, row[h], context)
+                        assert_db_column(res, h, row[h], context)
 
-    if exact:
-        cur.execute('SELECT osm_type, osm_id, class from placex')
-        assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
-
-    context.db.commit()
+        if exact:
+            cur.execute('SELECT osm_type, osm_id, class from placex')
+            assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 
 @then("place contains(?P<exact> exactly)?")
 def check_placex_contents(context, exact):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        expected_content = set()
+        for row in context.table:
+            nid = NominatimID(row['object'])
+            where, params = nid.table_select()
+            cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt,
+                           ST_GeometryType(geometry) as geometrytype
+                           FROM place where %s""" % where,
+                        params)
+            assert cur.rowcount > 0, "No rows found for " + row['object']
 
-    expected_content = set()
-    for row in context.table:
-        nid = NominatimID(row['object'])
-        where, params = nid.table_select()
-        cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt,
-                       ST_GeometryType(geometry) as geometrytype
-                       FROM place where %s""" % where,
-                    params)
-        assert cur.rowcount > 0, "No rows found for " + row['object']
-
-        for res in cur:
-            if exact:
-                expected_content.add((res['osm_type'], res['osm_id'], res['class']))
-            for h in row.headings:
-                msg = "%s: %s" % (row['object'], h)
-                if h in ('name', 'extratags', 'address'):
-                    if row[h] == '-':
-                        assert res[h] is None, msg
+            for res in cur:
+                if exact:
+                    expected_content.add((res['osm_type'], res['osm_id'], res['class']))
+                for h in row.headings:
+                    msg = "%s: %s" % (row['object'], h)
+                    if h in ('name', 'extratags', 'address'):
+                        if row[h] == '-':
+                            assert res[h] is None, msg
+                        else:
+                            vdict = eval('{' + row[h] + '}')
+                            assert vdict == res[h], msg
+                    elif h.startswith('name+'):
+                        assert res['name'][h[5:]] == row[h], msg
+                    elif h.startswith('extratags+'):
+                        assert res['extratags'][h[10:]] == row[h], msg
+                    elif h.startswith('addr+'):
+                        if row[h] == '-':
+                            if res['address']  is not None:
+                                assert h[5:] not in res['address']
+                        else:
+                            assert res['address'][h[5:]] == row[h], msg
+                    elif h in ('linked_place_id', 'parent_place_id'):
+                        compare_place_id(row[h], res[h], h, context)
                     else:
-                        vdict = eval('{' + row[h] + '}')
-                        assert vdict == res[h], msg
-                elif h.startswith('name+'):
-                    assert res['name'][h[5:]] == row[h], msg
-                elif h.startswith('extratags+'):
-                    assert res['extratags'][h[10:]] == row[h], msg
-                elif h.startswith('addr+'):
-                    if row[h] == '-':
-                        if res['address']  is not None:
-                            assert h[5:] not in res['address']
-                    else:
-                        assert res['address'][h[5:]] == row[h], msg
-                elif h in ('linked_place_id', 'parent_place_id'):
-                    compare_place_id(row[h], res[h], h, context)
-                else:
-                    assert_db_column(res, h, row[h], context)
+                        assert_db_column(res, h, row[h], context)
 
-    if exact:
-        cur.execute('SELECT osm_type, osm_id, class from place')
-        assert expected_content, set([(r[0], r[1], r[2]) for r in cur])
-
-    context.db.commit()
+        if exact:
+            cur.execute('SELECT osm_type, osm_id, class from place')
+            assert expected_content, set([(r[0], r[1], r[2]) for r in cur])
 
 @then("search_name contains(?P<exclude> not)?")
 def check_search_name_contents(context, exclude):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        for row in context.table:
+            pid = NominatimID(row['object']).get_place_id(cur)
+            cur.execute("""SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy
+                           FROM search_name WHERE place_id = %s""", (pid, ))
+            assert cur.rowcount > 0, "No rows found for " + row['object']
 
-    for row in context.table:
-        pid = NominatimID(row['object']).get_place_id(cur)
-        cur.execute("""SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy
-                       FROM search_name WHERE place_id = %s""", (pid, ))
-        assert cur.rowcount > 0, "No rows found for " + row['object']
-
-        for res in cur:
-            for h in row.headings:
-                if h in ('name_vector', 'nameaddress_vector'):
-                    terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')]
-                    words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')]
-                    subcur = context.db.cursor()
-                    subcur.execute(""" SELECT word_id, word_token
-                                       FROM word, (SELECT unnest(%s::TEXT[]) as term) t
-                                       WHERE word_token = make_standard_name(t.term)
-                                             and class is null and country_code is null
-                                             and operator is null
-                                      UNION
-                                       SELECT word_id, word_token
-                                       FROM word, (SELECT unnest(%s::TEXT[]) as term) t
-                                       WHERE word_token = ' ' || make_standard_name(t.term)
-                                             and class is null and country_code is null
-                                             and operator is null
-                                   """,
-                                   (terms, words))
-                    if not exclude:
-                        assert subcur.rowcount >= len(terms) + len(words), \
-                            "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount)
-                    for wid in subcur:
-                        if exclude:
-                            assert wid[0] not in res[h], "Found term for %s/%s: %s" % (pid, h, wid[1])
-                        else:
-                            assert wid[0] in res[h], "Missing term for %s/%s: %s" % (pid, h, wid[1])
-                else:
-                    assert_db_column(res, h, row[h], context)
-
-
-    context.db.commit()
+            for res in cur:
+                for h in row.headings:
+                    if h in ('name_vector', 'nameaddress_vector'):
+                        terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')]
+                        words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')]
+                        with context.db.cursor() as subcur:
+                            subcur.execute(""" SELECT word_id, word_token
+                                               FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                                               WHERE word_token = make_standard_name(t.term)
+                                                     and class is null and country_code is null
+                                                     and operator is null
+                                              UNION
+                                               SELECT word_id, word_token
+                                               FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                                               WHERE word_token = ' ' || make_standard_name(t.term)
+                                                     and class is null and country_code is null
+                                                     and operator is null
+                                           """,
+                                           (terms, words))
+                            if not exclude:
+                                assert subcur.rowcount >= len(terms) + len(words), \
+                                    "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount)
+                            for wid in subcur:
+                                if exclude:
+                                    assert wid[0] not in res[h], "Found term for %s/%s: %s" % (pid, h, wid[1])
+                                else:
+                                    assert wid[0] in res[h], "Missing term for %s/%s: %s" % (pid, h, wid[1])
+                    else:
+                        assert_db_column(res, h, row[h], context)
 
 @then("location_postcode contains exactly")
 def check_location_postcode(context):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
+        assert cur.rowcount == len(list(context.table)), \
+            "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table)))
 
-    cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
-    assert cur.rowcount == len(list(context.table)), \
-        "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table)))
-
-    table = list(cur)
-    for row in context.table:
-        for i in range(len(table)):
-            if table[i]['country_code'] != row['country'] \
-                    or table[i]['postcode'] != row['postcode']:
-                continue
-            for h in row.headings:
-                if h not in ('country', 'postcode'):
-                    assert_db_column(table[i], h, row[h], context)
+        table = list(cur)
+        for row in context.table:
+            for i in range(len(table)):
+                if table[i]['country_code'] != row['country'] \
+                        or table[i]['postcode'] != row['postcode']:
+                    continue
+                for h in row.headings:
+                    if h not in ('country', 'postcode'):
+                        assert_db_column(table[i], h, row[h], context)
 
 @then("word contains(?P<exclude> not)?")
 def check_word_table(context, exclude):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
-
-    for row in context.table:
-        wheres = []
-        values = []
-        for h in row.headings:
-            wheres.append("%s = %%s" % h)
-            values.append(row[h])
-        cur.execute("SELECT * from word WHERE %s" % ' AND '.join(wheres), values)
-        if exclude:
-            assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values)
-        else:
-            assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        for row in context.table:
+            wheres = []
+            values = []
+            for h in row.headings:
+                wheres.append("%s = %%s" % h)
+                values.append(row[h])
+            cur.execute("SELECT * from word WHERE %s" % ' AND '.join(wheres), values)
+            if exclude:
+                assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values)
+            else:
+                assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
 
 @then("place_addressline contains")
 def check_place_addressline(context):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        for row in context.table:
+            pid = NominatimID(row['object']).get_place_id(cur)
+            apid = NominatimID(row['address']).get_place_id(cur)
+            cur.execute(""" SELECT * FROM place_addressline
+                            WHERE place_id = %s AND address_place_id = %s""",
+                        (pid, apid))
+            assert cur.rowcount > 0, \
+                        "No rows found for place %s and address %s" % (row['object'], row['address'])
 
-    for row in context.table:
-        pid = NominatimID(row['object']).get_place_id(cur)
-        apid = NominatimID(row['address']).get_place_id(cur)
-        cur.execute(""" SELECT * FROM place_addressline
-                        WHERE place_id = %s AND address_place_id = %s""",
-                    (pid, apid))
-        assert cur.rowcount > 0, \
-                    "No rows found for place %s and address %s" % (row['object'], row['address'])
-
-        for res in cur:
-            for h in row.headings:
-                if h not in ('address', 'object'):
-                    assert_db_column(res, h, row[h], context)
-
-    context.db.commit()
+            for res in cur:
+                for h in row.headings:
+                    if h not in ('address', 'object'):
+                        assert_db_column(res, h, row[h], context)
 
 @then("place_addressline doesn't contain")
 def check_place_addressline_exclude(context):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
-
-    for row in context.table:
-        pid = NominatimID(row['object']).get_place_id(cur)
-        apid = NominatimID(row['address']).get_place_id(cur)
-        cur.execute(""" SELECT * FROM place_addressline
-                        WHERE place_id = %s AND address_place_id = %s""",
-                    (pid, apid))
-        assert cur.rowcount == 0, \
-            "Row found for place %s and address %s" % (row['object'], row['address'])
-
-    context.db.commit()
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        for row in context.table:
+            pid = NominatimID(row['object']).get_place_id(cur)
+            apid = NominatimID(row['address']).get_place_id(cur)
+            cur.execute(""" SELECT * FROM place_addressline
+                            WHERE place_id = %s AND address_place_id = %s""",
+                        (pid, apid))
+            assert cur.rowcount == 0, \
+                "Row found for place %s and address %s" % (row['object'], row['address'])
 
 @then("(?P<oid>\w+) expands to(?P<neg> no)? interpolation")
 def check_location_property_osmline(context, oid, neg):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
     nid = NominatimID(oid)
 
     assert 'W' == nid.typ, "interpolation must be a way"
 
-    cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
-                   FROM location_property_osmline
-                   WHERE osm_id = %s AND startnumber IS NOT NULL""",
-                (nid.oid, ))
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
+                       FROM location_property_osmline
+                       WHERE osm_id = %s AND startnumber IS NOT NULL""",
+                    (nid.oid, ))
 
-    if neg:
-        assert cur.rowcount == 0
-        return
+        if neg:
+            assert cur.rowcount == 0
+            return
 
-    todo = list(range(len(list(context.table))))
-    for res in cur:
-        for i in todo:
-            row = context.table[i]
-            if (int(row['start']) == res['startnumber']
-                and int(row['end']) == res['endnumber']):
-                todo.remove(i)
-                break
-        else:
-            assert False, "Unexpected row %s" % (str(res))
-
-        for h in row.headings:
-            if h in ('start', 'end'):
-                continue
-            elif h == 'parent_place_id':
-                compare_place_id(row[h], res[h], h, context)
+        todo = list(range(len(list(context.table))))
+        for res in cur:
+            for i in todo:
+                row = context.table[i]
+                if (int(row['start']) == res['startnumber']
+                    and int(row['end']) == res['endnumber']):
+                    todo.remove(i)
+                    break
             else:
-                assert_db_column(res, h, row[h], context)
+                assert False, "Unexpected row %s" % (str(res))
 
-    assert not todo
+            for h in row.headings:
+                if h in ('start', 'end'):
+                    continue
+                elif h == 'parent_place_id':
+                    compare_place_id(row[h], res[h], h, context)
+                else:
+                    assert_db_column(res, h, row[h], context)
+
+        assert not todo
 
 
 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 def check_placex_has_entry(context, table, oid):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
-    nid = NominatimID(oid)
-    where, params = nid.table_select()
-    cur.execute("SELECT * FROM %s where %s" % (table, where), params)
-    assert cur.rowcount == 0
-    context.db.commit()
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        nid = NominatimID(oid)
+        where, params = nid.table_select()
+        cur.execute("SELECT * FROM %s where %s" % (table, where), params)
+        assert cur.rowcount == 0
 
 @then("search_name has no entry for (?P<oid>.*)")
 def check_search_name_has_entry(context, oid):
-    cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
-    pid = NominatimID(oid).get_place_id(cur)
-    cur.execute("SELECT * FROM search_name WHERE place_id = %s", (pid, ))
-    assert cur.rowcount == 0
-    context.db.commit()
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        pid = NominatimID(oid).get_place_id(cur)
+        cur.execute("SELECT * FROM search_name WHERE place_id = %s", (pid, ))
+        assert cur.rowcount == 0
diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py
index db208fe8..f8a6d3cd 100644
--- a/test/bdd/steps/steps_osm_data.py
+++ b/test/bdd/steps/steps_osm_data.py
@@ -70,17 +70,7 @@ def update_from_osm_file(context):
 
     The data is expected as attached text in OPL format.
     """
-    context.nominatim.run_setup_script('create-functions', 'create-partition-functions')
-
-    cur = context.db.cursor()
-    cur.execute("""insert into placex (osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry)
-                     select            osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry from place""")
-    cur.execute(
-        """insert into location_property_osmline (osm_id, address, linegeo)
-             SELECT osm_id, address, geometry from place
-              WHERE class='place' and type='houses' and osm_type='W'
-                    and ST_GeometryType(geometry) = 'ST_LineString'""")
-    context.db.commit()
+    context.nominatim.copy_from_place(context.db)
     context.nominatim.run_setup_script('index', 'index-noanalyse')
     context.nominatim.run_setup_script('create-functions', 'create-partition-functions',
                                        'enable-diff-updates')

From b8e39d2dde4ffc0d202fc60af920b54b054601d7 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 11:26:39 +0100
Subject: [PATCH 12/17] bdd: move scene setup to OSM data steps

The step has nothing to do with the database.
---
 test/bdd/steps/steps_db_ops.py   | 4 ----
 test/bdd/steps/steps_osm_data.py | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 97f9055b..19429999 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -220,10 +220,6 @@ def assert_db_column(row, column, value, context):
 
 ################################ GIVEN ##################################
 
-@given(u'the scene (?P<scene>.+)')
-def set_default_scene(context, scene):
-    context.scene = scene
-
 @given("the (?P<named>named )?places")
 def add_data_to_place_table(context, named):
     with context.db.cursor() as cur:
diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py
index f8a6d3cd..3858198b 100644
--- a/test/bdd/steps/steps_osm_data.py
+++ b/test/bdd/steps/steps_osm_data.py
@@ -22,6 +22,10 @@ def write_opl_file(opl, grid):
 
         return fd.name
 
+@given(u'the scene (?P<scene>.+)')
+def set_default_scene(context, scene):
+    context.scene = scene
+
 @given(u'the ([0-9.]+ )?grid')
 def define_node_grid(context, grid_step):
     """

From 197870e67a82505d52fb2fd45db638435af956b0 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 12:12:59 +0100
Subject: [PATCH 13/17] bdd: move place table inserter into separate file

Also simplifies usage by implementing a function that inserts
a complete table row.
---
 test/bdd/steps/place_inserter.py | 105 +++++++++++++++++++++++++++++
 test/bdd/steps/steps_db_ops.py   | 109 ++-----------------------------
 2 files changed, 110 insertions(+), 104 deletions(-)
 create mode 100644 test/bdd/steps/place_inserter.py

diff --git a/test/bdd/steps/place_inserter.py b/test/bdd/steps/place_inserter.py
new file mode 100644
index 00000000..90f071b2
--- /dev/null
+++ b/test/bdd/steps/place_inserter.py
@@ -0,0 +1,105 @@
+"""
+Helper classes for filling the place table.
+"""
+import random
+import string
+
+class PlaceColumn:
+    """ Helper class to collect contents from a behave table row and
+        insert it into the place table.
+    """
+    def __init__(self, context):
+        self.columns = {'admin_level' : 15}
+        self.context = context
+        self.geometry = None
+
+    def add_row(self, row, force_name):
+        """ Parse the content from the given behave row as place column data.
+        """
+        for name, value in zip(row.headings, row.cells):
+            self._add(name, value)
+
+        assert 'osm_type' in self.columns, "osm column missing"
+
+        if force_name and 'name' not in self.columns:
+            self._add_hstore('name', 'name',
+                             ''.join(random.choice(string.printable)
+                                     for _ in range(int(random.random()*30))))
+
+        return self
+
+    def _add(self, key, value):
+        if hasattr(self, '_set_key_' + key):
+            getattr(self, '_set_key_' + key)(value)
+        elif key.startswith('name+'):
+            self._add_hstore('name', key[5:], value)
+        elif key.startswith('extra+'):
+            self._add_hstore('extratags', key[6:], value)
+        elif key.startswith('addr+'):
+            self._add_hstore('address', key[5:], value)
+        elif key in ('name', 'address', 'extratags'):
+            self.columns[key] = eval('{' + value + '}')
+        else:
+            assert key in ('class', 'type'), "Unknown column '{}'.".format(key)
+            self.columns[key] = None if value == '' else value
+
+    def _set_key_name(self, value):
+        self._add_hstore('name', 'name', value)
+
+    def _set_key_osm(self, value):
+        assert value[0] in 'NRW' and value[1:].isdigit(), \
+               "OSM id needs to be of format <NRW><id>."
+
+        self.columns['osm_type'] = value[0]
+        self.columns['osm_id'] = int(value[1:])
+
+    def _set_key_admin(self, value):
+        self.columns['admin_level'] = int(value)
+
+    def _set_key_housenr(self, value):
+        if value:
+            self._add_hstore('address', 'housenumber', value)
+
+    def _set_key_postcode(self, value):
+        if value:
+            self._add_hstore('address', 'postcode', value)
+
+    def _set_key_street(self, value):
+        if value:
+            self._add_hstore('address', 'street', value)
+
+    def _set_key_addr_place(self, value):
+        if value:
+            self._add_hstore('address', 'place', value)
+
+    def _set_key_country(self, value):
+        if value:
+            self._add_hstore('address', 'country', value)
+
+    def _set_key_geometry(self, value):
+        self.geometry = self.context.osm.parse_geometry(value, self.context.scene)
+        assert self.geometry is not None, "Bad geometry: {}".format(value)
+
+    def _add_hstore(self, column, key, value):
+        if column in self.columns:
+            self.columns[column][key] = value
+        else:
+            self.columns[column] = {key: value}
+
+    def db_insert(self, cursor):
+        """ Insert the collected data into the database.
+        """
+        if self.columns['osm_type'] == 'N' and self.geometry is None:
+            pt = self.context.osm.grid_node(self.columns['osm_id'])
+            if pt is None:
+                pt = (random.random()*360 - 180, random.random()*180 - 90)
+
+            self.geometry = "ST_SetSRID(ST_Point(%f, %f), 4326)" % pt
+        else:
+            assert self.geometry is not None, "Geometry missing"
+
+        query = 'INSERT INTO place ({}, geometry) values({}, {})'.format(
+            ','.join(self.columns.keys()),
+            ','.join(['%s' for x in range(len(self.columns))]),
+            self.geometry)
+        cursor.execute(query, list(self.columns.values()))
diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 19429999..8f20cdaa 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -1,97 +1,8 @@
-import base64
-import random
-import string
 import re
 import psycopg2.extras
 
 from check_functions import Almost
-
-class PlaceColumn:
-
-    def __init__(self, context, force_name):
-        self.columns = { 'admin_level' : 15}
-        self.force_name = force_name
-        self.context = context
-        self.geometry = None
-
-    def add(self, key, value):
-        if hasattr(self, 'set_key_' + key):
-            getattr(self, 'set_key_' + key)(value)
-        elif key.startswith('name+'):
-            self.add_hstore('name', key[5:], value)
-        elif key.startswith('extra+'):
-            self.add_hstore('extratags', key[6:], value)
-        elif key.startswith('addr+'):
-            self.add_hstore('address', key[5:], value)
-        elif key in ('name', 'address', 'extratags'):
-            self.columns[key] = eval('{' + value + '}')
-        else:
-            assert key in ('class', 'type')
-            self.columns[key] = None if value == '' else value
-
-    def set_key_name(self, value):
-        self.add_hstore('name', 'name', value)
-
-    def set_key_osm(self, value):
-        assert value[0] in 'NRW'
-        assert value[1:].isdigit()
-
-        self.columns['osm_type'] = value[0]
-        self.columns['osm_id'] = int(value[1:])
-
-    def set_key_admin(self, value):
-        self.columns['admin_level'] = int(value)
-
-    def set_key_housenr(self, value):
-        if value:
-            self.add_hstore('address', 'housenumber', value)
-
-    def set_key_postcode(self, value):
-        if value:
-            self.add_hstore('address', 'postcode', value)
-
-    def set_key_street(self, value):
-        if value:
-            self.add_hstore('address', 'street', value)
-
-    def set_key_addr_place(self, value):
-        if value:
-            self.add_hstore('address', 'place', value)
-
-    def set_key_country(self, value):
-        if value:
-            self.add_hstore('address', 'country', value)
-
-    def set_key_geometry(self, value):
-        self.geometry = self.context.osm.parse_geometry(value, self.context.scene)
-        assert self.geometry is not None
-
-    def add_hstore(self, column, key, value):
-        if column in self.columns:
-            self.columns[column][key] = value
-        else:
-            self.columns[column] = { key : value }
-
-    def db_insert(self, cursor):
-        assert 'osm_type' in self.columns
-        if self.force_name and 'name' not in self.columns:
-            self.add_hstore('name', 'name', ''.join(random.choice(string.printable)
-                                           for _ in range(int(random.random()*30))))
-
-        if self.columns['osm_type'] == 'N' and self.geometry is None:
-            pt = self.context.osm.grid_node(self.columns['osm_id'])
-            if pt is None:
-                pt = (random.random()*360 - 180, random.random()*180 - 90)
-
-            self.geometry = "ST_SetSRID(ST_Point(%f, %f), 4326)" % pt
-        else:
-            assert self.geometry is not None, "Geometry missing"
-        query = 'INSERT INTO place (%s, geometry) values(%s, %s)' % (
-                     ','.join(self.columns.keys()),
-                     ','.join(['%s' for x in range(len(self.columns))]),
-                     self.geometry)
-        cursor.execute(query, list(self.columns.values()))
-
+from place_inserter import PlaceColumn
 
 class PlaceObjName(object):
 
@@ -224,13 +135,8 @@ def assert_db_column(row, column, value, context):
 def add_data_to_place_table(context, named):
     with context.db.cursor() as cur:
         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
-        for r in context.table:
-            col = PlaceColumn(context, named is not None)
-
-            for h in r.headings:
-                col.add(h, r[h])
-
-            col.db_insert(cur)
+        for row in context.table:
+            PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
         cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
 
 @given("the relations")
@@ -296,13 +202,8 @@ def update_place_table(context):
     context.nominatim.run_setup_script(
         'create-functions', 'create-partition-functions', 'enable-diff-updates')
     with context.db.cursor() as cur:
-        for r in context.table:
-            col = PlaceColumn(context, False)
-
-            for h in r.headings:
-                col.add(h, r[h])
-
-            col.db_insert(cur)
+        for row in context.table:
+            PlaceColumn(context).add_row(row, False).db_insert(cur)
 
         while True:
             context.nominatim.run_update_script('index')

From 25557e5f14734aa597dbe32293be25d2c539531c Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 15:17:46 +0100
Subject: [PATCH 14/17] bdd: factor out reindexing on updates

---
 test/bdd/steps/nominatim_environment.py | 14 ++++++++++++++
 test/bdd/steps/steps_db_ops.py          | 16 ++--------------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py
index 2547bbe2..7013a20e 100644
--- a/test/bdd/steps/nominatim_environment.py
+++ b/test/bdd/steps/nominatim_environment.py
@@ -191,6 +191,20 @@ class NominatimEnvironment:
         if not self.keep_scenario_db:
             self.db_drop_database(self.test_db)
 
+    def reindex_placex(self, db):
+        """ Run the indexing step until all data in the placex has
+            been processed. Indexing during updates can produce more data
+            to index under some circumstances. That is why indexing may have
+            to be run multiple times.
+        """
+        with db.cursor() as cur:
+            while True:
+                self.run_update_script('index')
+
+                cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
+                if cur.rowcount == 0:
+                    return
+
     def run_setup_script(self, *args, **kwargs):
         """ Run the Nominatim setup script with the given arguments.
         """
diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 8f20cdaa..6bee88c1 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -205,13 +205,7 @@ def update_place_table(context):
         for row in context.table:
             PlaceColumn(context).add_row(row, False).db_insert(cur)
 
-        while True:
-            context.nominatim.run_update_script('index')
-
-            cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
-            if cur.rowcount == 0:
-                break
-
+    context.nominatim.reindex_placex(context.db)
     check_database_integrity(context)
 
 @when("updating postcodes")
@@ -227,13 +221,7 @@ def delete_places(context, oids):
             where, params = NominatimID(oid).table_select()
             cur.execute("DELETE FROM place WHERE " + where, params)
 
-    while True:
-        context.nominatim.run_update_script('index')
-
-        with context.db.cursor() as cur:
-            cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
-            if cur.rowcount == 0:
-                break
+    context.nominatim.reindex_placex(context.db)
 
 ################################ THEN ##################################
 

From d586b95ff11a45ab6223e83e1b0540f1db87b325 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 5 Jan 2021 16:00:48 +0100
Subject: [PATCH 15/17] bdd: move nominitim id reader to separate file

---
 test/bdd/steps/steps_db_ops.py  | 95 ++++++++-------------------------
 test/bdd/steps/table_compare.py | 62 +++++++++++++++++++++
 2 files changed, 84 insertions(+), 73 deletions(-)
 create mode 100644 test/bdd/steps/table_compare.py

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 6bee88c1..0df9fec2 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -3,6 +3,7 @@ import psycopg2.extras
 
 from check_functions import Almost
 from place_inserter import PlaceColumn
+from table_compare import NominatimID
 
 class PlaceObjName(object):
 
@@ -51,53 +52,6 @@ def check_database_integrity(context):
     assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
 
 
-class NominatimID:
-    """ Splits a unique identifier for places into its components.
-        As place_ids cannot be used for testing, we use a unique
-        identifier instead that is of the form <osmtype><osmid>[:<class>].
-    """
-
-    id_regex = re.compile(r"(?P<tp>[NRW])(?P<id>\d+)(:(?P<cls>\w+))?")
-
-    def __init__(self, oid):
-        self.typ = self.oid = self.cls = None
-
-        if oid is not None:
-            m = self.id_regex.fullmatch(oid)
-            assert m is not None, "ID '%s' not of form <osmtype><osmid>[:<class>]" % oid
-
-            self.typ = m.group('tp')
-            self.oid = m.group('id')
-            self.cls = m.group('cls')
-
-    def __str__(self):
-        if self.cls is None:
-            return self.typ + self.oid
-
-        return '%s%d:%s' % (self.typ, self.oid, self.cls)
-
-    def table_select(self):
-        """ Return where clause and parameter list to select the object
-            from a Nominatim table.
-        """
-        where = 'osm_type = %s and osm_id = %s'
-        params = [self.typ, self. oid]
-
-        if self.cls is not None:
-            where += ' and class = %s'
-            params.append(self.cls)
-
-        return where, params
-
-    def get_place_id(self, cur):
-        where, params = self.table_select()
-        cur.execute("SELECT place_id FROM placex WHERE %s" % where, params)
-        assert cur.rowcount == 1, \
-            "Expected exactly 1 entry in placex for %s found %s" % (str(self), cur.rowcount)
-
-        return cur.fetchone()[0]
-
-
 def assert_db_column(row, column, value, context):
     if column == 'object':
         return
@@ -218,8 +172,7 @@ def delete_places(context, oids):
         'create-functions', 'create-partition-functions', 'enable-diff-updates')
     with context.db.cursor() as cur:
         for oid in oids.split(','):
-            where, params = NominatimID(oid).table_select()
-            cur.execute("DELETE FROM place WHERE " + where, params)
+            NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
 
     context.nominatim.reindex_placex(context.db)
 
@@ -230,12 +183,10 @@ def check_placex_contents(context, exact):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         expected_content = set()
         for row in context.table:
-            nid = NominatimID(row['object'])
-            where, params = nid.table_select()
-            cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt,
-                           ST_X(centroid) as cx, ST_Y(centroid) as cy
-                           FROM placex where %s""" % where,
-                        params)
+            NominatimID(row['object']).query_osm_id(cur,
+                """SELECT *, ST_AsText(geometry) as geomtxt,
+                          ST_X(centroid) as cx, ST_Y(centroid) as cy
+                   FROM placex WHERE {}""")
             assert cur.rowcount > 0, "No rows found for " + row['object']
 
             for res in cur:
@@ -275,12 +226,10 @@ def check_placex_contents(context, exact):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         expected_content = set()
         for row in context.table:
-            nid = NominatimID(row['object'])
-            where, params = nid.table_select()
-            cur.execute("""SELECT *, ST_AsText(geometry) as geomtxt,
-                           ST_GeometryType(geometry) as geometrytype
-                           FROM place where %s""" % where,
-                        params)
+            NominatimID(row['object']).query_osm_id(cur,
+                """SELECT *, ST_AsText(geometry) as geomtxt,
+                          ST_GeometryType(geometry) as geometrytype
+                   FROM place WHERE {}""")
             assert cur.rowcount > 0, "No rows found for " + row['object']
 
             for res in cur:
@@ -317,9 +266,9 @@ def check_placex_contents(context, exact):
 def check_search_name_contents(context, exclude):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
-            pid = NominatimID(row['object']).get_place_id(cur)
-            cur.execute("""SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy
-                           FROM search_name WHERE place_id = %s""", (pid, ))
+            NominatimID(row['object']).query_place_id(cur,
+                """SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy
+                     FROM search_name WHERE place_id = %s""")
             assert cur.rowcount > 0, "No rows found for " + row['object']
 
             for res in cur:
@@ -346,9 +295,9 @@ def check_search_name_contents(context, exclude):
                                     "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount)
                             for wid in subcur:
                                 if exclude:
-                                    assert wid[0] not in res[h], "Found term for %s/%s: %s" % (pid, h, wid[1])
+                                    assert wid[0] not in res[h], "Found term for %s/%s: %s" % (row['object'], h, wid[1])
                                 else:
-                                    assert wid[0] in res[h], "Missing term for %s/%s: %s" % (pid, h, wid[1])
+                                    assert wid[0] in res[h], "Missing term for %s/%s: %s" % (row['object'], h, wid[1])
                     else:
                         assert_db_column(res, h, row[h], context)
 
@@ -454,14 +403,14 @@ def check_location_property_osmline(context, oid, neg):
 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 def check_placex_has_entry(context, table, oid):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        nid = NominatimID(oid)
-        where, params = nid.table_select()
-        cur.execute("SELECT * FROM %s where %s" % (table, where), params)
-        assert cur.rowcount == 0
+        NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
+        assert cur.rowcount == 0, \
+               "Found {} entries for ID {}".format(cur.rowcount, oid)
 
 @then("search_name has no entry for (?P<oid>.*)")
 def check_search_name_has_entry(context, oid):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        pid = NominatimID(oid).get_place_id(cur)
-        cur.execute("SELECT * FROM search_name WHERE place_id = %s", (pid, ))
-        assert cur.rowcount == 0
+        NominatimID(oid).query_place_id(cur,
+            "SELECT * FROM search_name WHERE place_id = %s")
+        assert cur.rowcount == 0, \
+               "Found {} entries for ID {}".format(cur.rowcount, oid)
diff --git a/test/bdd/steps/table_compare.py b/test/bdd/steps/table_compare.py
new file mode 100644
index 00000000..dfc261d4
--- /dev/null
+++ b/test/bdd/steps/table_compare.py
@@ -0,0 +1,62 @@
+"""
+Functions to facilitate accessing and comparing the content of DB tables.
+"""
+import re
+
+ID_REGEX = re.compile(r"(?P<typ>[NRW])(?P<oid>\d+)(:(?P<cls>\w+))?")
+
+class NominatimID:
+    """ Splits a unique identifier for places into its components.
+        As place_ids cannot be used for testing, we use a unique
+        identifier instead that is of the form <osmtype><osmid>[:<class>].
+    """
+
+    def __init__(self, oid):
+        self.typ = self.oid = self.cls = None
+
+        if oid is not None:
+            m = ID_REGEX.fullmatch(oid)
+            assert m is not None, \
+                   "ID '{}' not of form <osmtype><osmid>[:<class>]".format(oid)
+
+            self.typ = m.group('typ')
+            self.oid = m.group('oid')
+            self.cls = m.group('cls')
+
+    def __str__(self):
+        if self.cls is None:
+            return self.typ + self.oid
+
+        return '{self.typ}{self.oid}:{self.cls}'.format(self=self)
+
+    def query_osm_id(self, cur, query):
+        """ Run a query on cursor `cur` using osm ID, type and class. The
+            `query` string must contain exactly one placeholder '{}' where
+            the 'where' query should go.
+        """
+        where = 'osm_type = %s and osm_id = %s'
+        params = [self.typ, self. oid]
+
+        if self.cls is not None:
+            where += ' and class = %s'
+            params.append(self.cls)
+
+        return cur.execute(query.format(where), params)
+
+    def query_place_id(self, cur, query):
+        """ Run a query on cursor `cur` using the place ID. The `query` string
+            must contain exactly one placeholder '%s' where the 'where' query
+            should go.
+        """
+        pid = self.get_place_id(cur)
+        return cur.execute(query, (pid, ))
+
+    def get_place_id(self, cur):
+        """ Look up the place id for the ID. Throws an assertion if the ID
+            is not unique.
+        """
+        self.query_osm_id(cur, "SELECT place_id FROM placex WHERE {}")
+        assert cur.rowcount == 1, \
+               "Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount)
+
+        return cur.fetchone()[0]

From 1f29475fa51455363f8cc7b55bec0e801afc638b Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 6 Jan 2021 12:28:09 +0100
Subject: [PATCH 16/17] bdd: move column comparison in separate file

Introduces a new class DBRow that encapsulates the comparison
functions. This also is responsible for formatting more informative
assert messages. place and placex steps are unified.
---
 test/bdd/steps/steps_db_ops.py  | 211 +++++++-------------------------
 test/bdd/steps/table_compare.py | 159 +++++++++++++++++++++++-
 2 files changed, 194 insertions(+), 176 deletions(-)

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 0df9fec2..178e1a28 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -1,44 +1,8 @@
-import re
 import psycopg2.extras
 
-from check_functions import Almost
 from place_inserter import PlaceColumn
-from table_compare import NominatimID
+from table_compare import NominatimID, DBRow
 
-class PlaceObjName(object):
-
-    def __init__(self, placeid, conn):
-        self.pid = placeid
-        self.conn = conn
-
-    def __str__(self):
-        if self.pid is None:
-            return "<null>"
-
-        if self.pid == 0:
-            return "place ID 0"
-
-        cur = self.conn.cursor()
-        cur.execute("""SELECT osm_type, osm_id, class
-                       FROM placex WHERE place_id = %s""",
-                    (self.pid, ))
-        assert cur.rowcount == 1, "No entry found for place id %s" % self.pid
-
-        return "%s%s:%s" % cur.fetchone()
-
-def compare_place_id(expected, result, column, context):
-    if expected == '0':
-        assert result == 0, \
-               "Bad place id in column {}. Expected: 0, got: {!s}.".format(
-                    column, PlaceObjName(result, context.db))
-    elif expected == '-':
-        assert result is None, \
-               "Bad place id in column {}: {!s}.".format(
-                        column, PlaceObjName(result, context.db))
-    else:
-        assert NominatimID(expected).get_place_id(context.db.cursor()) == result, \
-               "Bad place id in column {}. Expected: {}, got: {!s}.".format(
-                    column, expected, PlaceObjName(result, context.db))
 
 def check_database_integrity(context):
     """ Check some generic constraints on the tables.
@@ -52,37 +16,6 @@ def check_database_integrity(context):
     assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
 
 
-def assert_db_column(row, column, value, context):
-    if column == 'object':
-        return
-
-    if column.startswith('centroid'):
-        if value == 'in geometry':
-            query = """SELECT ST_Within(ST_SetSRID(ST_Point({}, {}), 4326),
-                                        ST_SetSRID('{}'::geometry, 4326))""".format(
-                      row['cx'], row['cy'], row['geomtxt'])
-            cur = context.db.cursor()
-            cur.execute(query)
-            assert cur.fetchone()[0], "(Row %s failed: %s)" % (column, query)
-        else:
-            fac = float(column[9:]) if column.startswith('centroid*') else 1.0
-            x, y = value.split(' ')
-            assert Almost(float(x) * fac) == row['cx'], "Bad x coordinate"
-            assert Almost(float(y) * fac) == row['cy'], "Bad y coordinate"
-    elif column == 'geometry':
-        geom = context.osm.parse_geometry(value, context.scene)
-        cur = context.db.cursor()
-        query = "SELECT ST_Equals(ST_SnapToGrid(%s, 0.00001, 0.00001), ST_SnapToGrid(ST_SetSRID('%s'::geometry, 4326), 0.00001, 0.00001))" % (
-                 geom, row['geomtxt'],)
-        cur.execute(query)
-        assert cur.fetchone()[0], "(Row %s failed: %s)" % (column, query)
-    elif value == '-':
-        assert row[column] is None, "Row %s" % column
-    else:
-        assert value == str(row[column]), \
-            "Row '%s': expected: %s, got: %s" % (column, value, str(row[column]))
-
-
 ################################ GIVEN ##################################
 
 @given("the (?P<named>named )?places")
@@ -178,100 +111,49 @@ def delete_places(context, oids):
 
 ################################ THEN ##################################
 
-@then("placex contains(?P<exact> exactly)?")
-def check_placex_contents(context, exact):
+@then("(?P<table>placex|place) contains(?P<exact> exactly)?")
+def check_place_contents(context, table, exact):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         expected_content = set()
         for row in context.table:
-            NominatimID(row['object']).query_osm_id(cur,
-                """SELECT *, ST_AsText(geometry) as geomtxt,
-                          ST_X(centroid) as cx, ST_Y(centroid) as cy
-                   FROM placex WHERE {}""")
+            nid = NominatimID(row['object'])
+            query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
+            if table == 'placex':
+                query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
+            query += " FROM %s WHERE {}" % (table, )
+            nid.query_osm_id(cur, query)
             assert cur.rowcount > 0, "No rows found for " + row['object']
 
             for res in cur:
                 if exact:
                     expected_content.add((res['osm_type'], res['osm_id'], res['class']))
-                for h in row.headings:
-                    if h in ('extratags', 'address'):
-                        if row[h] == '-':
-                            assert res[h] is None
-                        else:
-                            vdict = eval('{' + row[h] + '}')
-                            assert vdict == res[h]
-                    elif h.startswith('name'):
-                        name = h[5:] if h.startswith('name+') else 'name'
-                        assert name in res['name']
-                        assert res['name'][name] == row[h]
-                    elif h.startswith('extratags+'):
-                        assert res['extratags'][h[10:]] == row[h]
-                    elif h.startswith('addr+'):
-                        if row[h] == '-':
-                            if res['address'] is not None:
-                                assert h[5:] not in res['address']
-                        else:
-                            assert h[5:] in res['address'], "column " + h
-                            assert res['address'][h[5:]] == row[h], "column %s" % h
-                    elif h in ('linked_place_id', 'parent_place_id'):
-                        compare_place_id(row[h], res[h], h, context)
-                    else:
-                        assert_db_column(res, h, row[h], context)
+
+                DBRow(nid, res, context).assert_row(row, ['object'])
 
         if exact:
-            cur.execute('SELECT osm_type, osm_id, class from placex')
+            cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
             assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 
-@then("place contains(?P<exact> exactly)?")
-def check_placex_contents(context, exact):
+
+@then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
+def check_place_has_entry(context, table, oid):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        expected_content = set()
-        for row in context.table:
-            NominatimID(row['object']).query_osm_id(cur,
-                """SELECT *, ST_AsText(geometry) as geomtxt,
-                          ST_GeometryType(geometry) as geometrytype
-                   FROM place WHERE {}""")
-            assert cur.rowcount > 0, "No rows found for " + row['object']
+        NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
+        assert cur.rowcount == 0, \
+               "Found {} entries for ID {}".format(cur.rowcount, oid)
 
-            for res in cur:
-                if exact:
-                    expected_content.add((res['osm_type'], res['osm_id'], res['class']))
-                for h in row.headings:
-                    msg = "%s: %s" % (row['object'], h)
-                    if h in ('name', 'extratags', 'address'):
-                        if row[h] == '-':
-                            assert res[h] is None, msg
-                        else:
-                            vdict = eval('{' + row[h] + '}')
-                            assert vdict == res[h], msg
-                    elif h.startswith('name+'):
-                        assert res['name'][h[5:]] == row[h], msg
-                    elif h.startswith('extratags+'):
-                        assert res['extratags'][h[10:]] == row[h], msg
-                    elif h.startswith('addr+'):
-                        if row[h] == '-':
-                            if res['address']  is not None:
-                                assert h[5:] not in res['address']
-                        else:
-                            assert res['address'][h[5:]] == row[h], msg
-                    elif h in ('linked_place_id', 'parent_place_id'):
-                        compare_place_id(row[h], res[h], h, context)
-                    else:
-                        assert_db_column(res, h, row[h], context)
-
-        if exact:
-            cur.execute('SELECT osm_type, osm_id, class from place')
-            assert expected_content, set([(r[0], r[1], r[2]) for r in cur])
 
 @then("search_name contains(?P<exclude> not)?")
 def check_search_name_contents(context, exclude):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
-            NominatimID(row['object']).query_place_id(cur,
-                """SELECT *, ST_X(centroid) as cx, ST_Y(centroid) as cy
-                     FROM search_name WHERE place_id = %s""")
+            nid = NominatimID(row['object'])
+            nid.row_by_place_id(cur, 'search_name',
+                                ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
             assert cur.rowcount > 0, "No rows found for " + row['object']
 
             for res in cur:
+                db_row = DBRow(nid, res, context)
                 for h in row.headings:
                     if h in ('name_vector', 'nameaddress_vector'):
                         terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')]
@@ -298,8 +180,8 @@ def check_search_name_contents(context, exclude):
                                     assert wid[0] not in res[h], "Found term for %s/%s: %s" % (row['object'], h, wid[1])
                                 else:
                                     assert wid[0] in res[h], "Missing term for %s/%s: %s" % (row['object'], h, wid[1])
-                    else:
-                        assert_db_column(res, h, row[h], context)
+                    elif h != 'object':
+                        assert db_row.contains(h, row[h]), db_row.assert_msg(h, row[h])
 
 @then("location_postcode contains exactly")
 def check_location_postcode(context):
@@ -308,15 +190,18 @@ def check_location_postcode(context):
         assert cur.rowcount == len(list(context.table)), \
             "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table)))
 
-        table = list(cur)
+        results = {}
+        for row in cur:
+            key = (row['country_code'], row['postcode'])
+            assert key not in results, "Postcode table has duplicate entry: {}".format(row)
+            results[key] = DBRow((row['country_code'],row['postcode']), row, context)
+
         for row in context.table:
-            for i in range(len(table)):
-                if table[i]['country_code'] != row['country'] \
-                        or table[i]['postcode'] != row['postcode']:
-                    continue
-                for h in row.headings:
-                    if h not in ('country', 'postcode'):
-                        assert_db_column(table[i], h, row[h], context)
+            db_row = results.get((row['country'],row['postcode']))
+            assert db_row is not None, \
+                "Missing row for country '{}' postcode '{}'.".format(r['country'],['postcode'])
+
+            db_row.assert_row(row, ('country', 'postcode'))
 
 @then("word contains(?P<exclude> not)?")
 def check_word_table(context, exclude):
@@ -337,7 +222,8 @@ def check_word_table(context, exclude):
 def check_place_addressline(context):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
-            pid = NominatimID(row['object']).get_place_id(cur)
+            nid = NominatimID(row['object'])
+            pid = nid.get_place_id(cur)
             apid = NominatimID(row['address']).get_place_id(cur)
             cur.execute(""" SELECT * FROM place_addressline
                             WHERE place_id = %s AND address_place_id = %s""",
@@ -346,9 +232,7 @@ def check_place_addressline(context):
                         "No rows found for place %s and address %s" % (row['object'], row['address'])
 
             for res in cur:
-                for h in row.headings:
-                    if h not in ('address', 'object'):
-                        assert_db_column(res, h, row[h], context)
+                DBRow(nid, res, context).assert_row(row, ('address', 'object'))
 
 @then("place_addressline doesn't contain")
 def check_place_addressline_exclude(context):
@@ -389,28 +273,15 @@ def check_location_property_osmline(context, oid, neg):
             else:
                 assert False, "Unexpected row %s" % (str(res))
 
-            for h in row.headings:
-                if h in ('start', 'end'):
-                    continue
-                elif h == 'parent_place_id':
-                    compare_place_id(row[h], res[h], h, context)
-                else:
-                    assert_db_column(res, h, row[h], context)
+            DBRow(nid, res, context).assert_row(row, ('start', 'end'))
 
         assert not todo
 
 
-@then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
-def check_placex_has_entry(context, table, oid):
-    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
-        assert cur.rowcount == 0, \
-               "Found {} entries for ID {}".format(cur.rowcount, oid)
-
 @then("search_name has no entry for (?P<oid>.*)")
 def check_search_name_has_entry(context, oid):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        NominatimID(oid).query_place_id(cur,
-            "SELECT * FROM search_name WHERE place_id = %s")
+        NominatimID(oid).row_by_place_id(cur, 'search_name')
+
         assert cur.rowcount == 0, \
                "Found {} entries for ID {}".format(cur.rowcount, oid)
diff --git a/test/bdd/steps/table_compare.py b/test/bdd/steps/table_compare.py
index dfc261d4..51d1b645 100644
--- a/test/bdd/steps/table_compare.py
+++ b/test/bdd/steps/table_compare.py
@@ -2,6 +2,9 @@
 Functions to facilitate accessing and comparing the content of DB tables.
 """
 import re
+import json
+
+from steps.check_functions import Almost
 
 ID_REGEX = re.compile(r"(?P<typ>[NRW])(?P<oid>\d+)(:(?P<cls>\w+))?")
 
@@ -41,15 +44,17 @@ class NominatimID:
             where += ' and class = %s'
             params.append(self.cls)
 
-        return cur.execute(query.format(where), params)
+        cur.execute(query.format(where), params)
 
-    def query_place_id(self, cur, query):
-        """ Run a query on cursor `cur` using the place ID. The `query` string
-            must contain exactly one placeholder '%s' where the 'where' query
-            should go.
+    def row_by_place_id(self, cur, table, extra_columns=None):
+        """ Get a row by place_id from the given table using cursor `cur`.
+            extra_columns may contain a list additional elements for the select
+            part of the query.
         """
         pid = self.get_place_id(cur)
-        return cur.execute(query, (pid, ))
+        query = "SELECT {} FROM {} WHERE place_id = %s".format(
+                    ','.join(['*'] + (extra_columns or [])), table)
+        cur.execute(query, (pid, ))
 
     def get_place_id(self, cur):
         """ Look up the place id for the ID. Throws an assertion if the ID
@@ -60,3 +65,145 @@ class NominatimID:
                "Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount)
 
         return cur.fetchone()[0]
+
+
+class DBRow:
+    """ Represents a row from a database and offers comparison functions.
+    """
+    def __init__(self, nid, db_row, context):
+        self.nid = nid
+        self.db_row = db_row
+        self.context = context
+
+    def assert_row(self, row, exclude_columns):
+        """ Check that all columns of the given behave row are contained
+            in the database row. Exclude behave rows with the names given
+            in the `exclude_columns` list.
+        """
+        for name, value in zip(row.headings, row.cells):
+            if name not in exclude_columns:
+                assert self.contains(name, value), self.assert_msg(name, value)
+
+    def contains(self, name, expected):
+        """ Check that the DB row contains a column `name` with the given value.
+        """
+        if '+' in name:
+            column, field = name.split('+', 1)
+            return self._contains_hstore_value(column, field, expected)
+
+        if name == 'geometry':
+            return self._has_geometry(expected)
+
+        if name not in self.db_row:
+            return False
+
+        actual = self.db_row[name]
+
+        if expected == '-':
+            return actual is None
+
+        if name == 'name' and ':' not in expected:
+            return self._compare_column(actual[name], expected)
+
+        if 'place_id' in name:
+            return self._compare_place_id(actual, expected)
+
+        if name == 'centroid':
+            return self._has_centroid(expected)
+
+        return self._compare_column(actual, expected)
+
+    def _contains_hstore_value(self, column, field, expected):
+        if column == 'addr':
+            column = 'address'
+
+        if column not in self.db_row:
+            return False
+
+        if expected == '-':
+            return self.db_row[column] is None or field not in self.db_row[column]
+
+        if self.db_row[column] is None:
+            return False
+
+        return self._compare_column(self.db_row[column].get(field), expected)
+
+    def _compare_column(self, actual, expected):
+        if isinstance(actual, dict):
+            return actual == eval('{' + expected + '}')
+
+        return str(actual) == expected
+
+    def _compare_place_id(self, actual, expected):
+       if expected == '0':
+            return actual == 0
+
+       with self.context.db.cursor() as cur:
+            return NominatimID(expected).get_place_id(cur) == actual
+
+    def _has_centroid(self, expected):
+        if expected == 'in geometry':
+            with self.context.db.cursor() as cur:
+                cur.execute("""SELECT ST_Within(ST_SetSRID(ST_Point({cx}, {cy}), 4326),
+                                        ST_SetSRID('{geomtxt}'::geometry, 4326))""".format(**self.db_row))
+                return cur.fetchone()[0]
+
+        x, y = expected.split(' ')
+        return Almost(float(x)) == self.db_row['cx'] and Almost(float(y)) == self.db_row['cy']
+
+    def _has_geometry(self, expected):
+        geom = self.context.osm.parse_geometry(expected, self.context.scene)
+        with self.context.db.cursor() as cur:
+            cur.execute("""SELECT ST_Equals(ST_SnapToGrid({}, 0.00001, 0.00001),
+                                   ST_SnapToGrid(ST_SetSRID('{}'::geometry, 4326), 0.00001, 0.00001))""".format(
+                            geom, self.db_row['geomtxt']))
+            return cur.fetchone()[0]
+
+    def assert_msg(self, name, value):
+        """ Return a string with an informative message for a failed compare.
+        """
+        msg = "\nBad column '{}' in row '{!s}'.".format(name, self.nid)
+        actual = self._get_actual(name)
+        if actual is not None:
+            msg += " Expected: {}, got: {}.".format(value, actual)
+        else:
+            msg += " No such column."
+
+        return msg + "\nFull DB row: {}".format(json.dumps(dict(self.db_row), indent=4, default=str))
+
+    def _get_actual(self, name):
+        if '+' in name:
+            column, field = name.split('+', 1)
+            if column == 'addr':
+                column = 'address'
+            return (self.db_row.get(column) or {}).get(field)
+
+        if name == 'geometry':
+            return self.db_row['geomtxt']
+
+        if name not in self.db_row:
+            return None
+
+        if name == 'centroid':
+            return "POINT({cx} {cy})".format(**self.db_row)
+
+        actual = self.db_row[name]
+
+        if 'place_id' in name:
+            if actual is None:
+                return '<null>'
+
+            if actual == 0:
+                return "place ID 0"
+
+            with self.context.db.cursor():
+                cur.execute("""SELECT osm_type, osm_id, class
+                               FROM placex WHERE place_id = %s""",
+                            (actual, ))
+
+                if cur.rowcount == 1:
+                    return "{0[0]}{0[1]}:{0[2]}".format(cur.fetchone())
+
+                return "[place ID {} not found]".format(actual)
+
+        return actual

From 73cbb6eb9a5a4ccbab98692e0476a17b7c022b80 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 6 Jan 2021 16:37:32 +0100
Subject: [PATCH 17/17] bdd: clean up DB ops steps

Adds comments and modernizes code.
---
 test/bdd/steps/steps_db_ops.py  | 141 +++++++++++++++++++++-----------
 test/bdd/steps/table_compare.py |   2 +-
 2 files changed, 96 insertions(+), 47 deletions(-)

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 178e1a28..c549f3eb 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -1,3 +1,5 @@
+from itertools import chain
+
 import psycopg2.extras
 
 from place_inserter import PlaceColumn
@@ -20,6 +22,9 @@ def check_database_integrity(context):
 
 @given("the (?P<named>named )?places")
 def add_data_to_place_table(context, named):
+    """ Add entries into the place table. 'named places' makes sure that
+        the entries get a random name when none is explicitly given.
+    """
     with context.db.cursor() as cur:
         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
         for row in context.table:
@@ -28,6 +33,9 @@ def add_data_to_place_table(context, named):
 
 @given("the relations")
 def add_data_to_planet_relations(context):
+    """ Add entries into the osm2pgsql relation middle table. This is needed
+        for tests on data that looks up members.
+    """
     with context.db.cursor() as cur:
         for r in context.table:
             last_node = 0
@@ -51,28 +59,24 @@ def add_data_to_planet_relations(context):
             else:
                 members = None
 
-            tags = []
-            for h in r.headings:
-                if h.startswith("tags+"):
-                    tags.extend((h[5:], r[h]))
+            tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
 
             cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
                            VALUES (%s, %s, %s, %s, %s, %s)""",
-                        (r['id'], last_node, last_way, parts, members, tags))
+                        (r['id'], last_node, last_way, parts, members, list(tags)))
 
 @given("the ways")
 def add_data_to_planet_ways(context):
+    """ Add entries into the osm2pgsql way middle table. This is necessary for
+        tests on that that looks up node ids in this table.
+    """
     with context.db.cursor() as cur:
         for r in context.table:
-            tags = []
-            for h in r.headings:
-                if h.startswith("tags+"):
-                    tags.extend((h[5:], r[h]))
-
+            tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
             nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
 
             cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
-                        (r['id'], nodes, tags))
+                        (r['id'], nodes, list(tags)))
 
 ################################ WHEN ##################################
 
@@ -86,6 +90,9 @@ def import_and_index_data_from_place_table(context):
 
 @when("updating places")
 def update_place_table(context):
+    """ Update the place table with the given data. Also runs all triggers
+        related to updates and reindexes the new data.
+    """
     context.nominatim.run_setup_script(
         'create-functions', 'create-partition-functions', 'enable-diff-updates')
     with context.db.cursor() as cur:
@@ -97,10 +104,16 @@ def update_place_table(context):
 
 @when("updating postcodes")
 def update_postcodes(context):
+    """ Rerun the calculation of postcodes.
+    """
     context.nominatim.run_update_script('calculate-postcodes')
 
 @when("marking for delete (?P<oids>.*)")
 def delete_places(context, oids):
+    """ Remove entries from the place table. Multiple ids may be given
+        separated by commas. Also runs all triggers
+        related to updates and reindexes the new data.
+    """
     context.nominatim.run_setup_script(
         'create-functions', 'create-partition-functions', 'enable-diff-updates')
     with context.db.cursor() as cur:
@@ -113,6 +126,15 @@ def delete_places(context, oids):
 
 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
 def check_place_contents(context, table, exact):
+    """ Check contents of place/placex tables. Each row represents a table row
+        and all data must match. Data not present in the expected table, may
+        be arbitry. The rows are identified via the 'object' column which must
+        have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
+        rows match (for example because 'class' was left out and there are
+        multiple entries for the given OSM object) then all must match. All
+        expected rows are expected to be present with at least one database row.
+        When 'exactly' is given, there must not be additional rows in the database.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         expected_content = set()
         for row in context.table:
@@ -137,6 +159,9 @@ def check_place_contents(context, table, exact):
 
 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 def check_place_has_entry(context, table, oid):
+    """ Ensure that no database row for the given object exists. The ID
+        must be of the form '<NRW><osm id>[:<class>]'.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
         assert cur.rowcount == 0, \
@@ -145,6 +170,12 @@ def check_place_has_entry(context, table, oid):
 
 @then("search_name contains(?P<exclude> not)?")
 def check_search_name_contents(context, exclude):
+    """ Check contents of place/placex tables. Each row represents a table row
+        and all data must match. Data not present in the expected table, may
+        be arbitry. The rows are identified via the 'object' column which must
+        have an identifier of the form '<NRW><osm id>[:<class>]'. All
+        expected rows are expected to be present with at least one database row.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
             nid = NominatimID(row['object'])
@@ -154,10 +185,9 @@ def check_search_name_contents(context, exclude):
 
             for res in cur:
                 db_row = DBRow(nid, res, context)
-                for h in row.headings:
-                    if h in ('name_vector', 'nameaddress_vector'):
-                        terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')]
-                        words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')]
+                for name, value in zip(row.headings, row.cells):
+                    if name in ('name_vector', 'nameaddress_vector'):
+                        items = [x.strip() for x in value.split(',')]
                         with context.db.cursor() as subcur:
                             subcur.execute(""" SELECT word_id, word_token
                                                FROM word, (SELECT unnest(%s::TEXT[]) as term) t
@@ -171,24 +201,43 @@ def check_search_name_contents(context, exclude):
                                                      and class is null and country_code is null
                                                      and operator is null
                                            """,
-                                           (terms, words))
+                                           (list(filter(lambda x: not x.startswith('#'), items)),
+                                            list(filter(lambda x: x.startswith('#'), items))))
                             if not exclude:
-                                assert subcur.rowcount >= len(terms) + len(words), \
-                                    "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount)
+                                assert subcur.rowcount >= len(items), \
+                                    "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount)
                             for wid in subcur:
+                                present = wid[0] in res[name]
                                 if exclude:
-                                    assert wid[0] not in res[h], "Found term for %s/%s: %s" % (row['object'], h, wid[1])
+                                    assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
                                 else:
-                                    assert wid[0] in res[h], "Missing term for %s/%s: %s" % (row['object'], h, wid[1])
-                    elif h != 'object':
-                        assert db_row.contains(h, row[h]), db_row.assert_msg(h, row[h])
+                                    assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
+                    elif name != 'object':
+                        assert db_row.contains(name, value), db_row.assert_msg(name, value)
+
+@then("search_name has no entry for (?P<oid>.*)")
+def check_search_name_has_entry(context, oid):
+    """ Check that there is noentry in the search_name table for the given
+        objects. IDs are in format '<NRW><osm id>[:<class>]'.
+    """
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        NominatimID(oid).row_by_place_id(cur, 'search_name')
+
+        assert cur.rowcount == 0, \
+               "Found {} entries for ID {}".format(cur.rowcount, oid)
 
 @then("location_postcode contains exactly")
 def check_location_postcode(context):
+    """ Check full contents for location_postcode table. Each row represents a table row
+        and all data must match. Data not present in the expected table, may
+        be arbitry. The rows are identified via 'country' and 'postcode' columns.
+        All rows must be present as excepted and there must not be additional
+        rows.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
         assert cur.rowcount == len(list(context.table)), \
-            "Postcode table has %d rows, expected %d rows." % (cur.rowcount, len(list(context.table)))
+            "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table)))
 
         results = {}
         for row in cur:
@@ -199,20 +248,20 @@ def check_location_postcode(context):
         for row in context.table:
             db_row = results.get((row['country'],row['postcode']))
             assert db_row is not None, \
-                "Missing row for country '{}' postcode '{}'.".format(r['country'],['postcode'])
+                "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
 
             db_row.assert_row(row, ('country', 'postcode'))
 
 @then("word contains(?P<exclude> not)?")
 def check_word_table(context, exclude):
+    """ Check the contents of the word table. Each row represents a table row
+        and all data must match. Data not present in the expected table, may
+        be arbitry. The rows are identified via all given columns.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
-            wheres = []
-            values = []
-            for h in row.headings:
-                wheres.append("%s = %%s" % h)
-                values.append(row[h])
-            cur.execute("SELECT * from word WHERE %s" % ' AND '.join(wheres), values)
+            wheres = ' AND '.join(["{} = %s".format(h) for h in row.headings])
+            cur.execute("SELECT * from word WHERE " + wheres, list(row.cells))
             if exclude:
                 assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values)
             else:
@@ -220,6 +269,12 @@ def check_word_table(context, exclude):
 
 @then("place_addressline contains")
 def check_place_addressline(context):
+    """ Check the contents of the place_addressline table. Each row represents
+        a table row and all data must match. Data not present in the expected
+        table, may be arbitry. The rows are identified via the 'object' column,
+        representing the addressee and the 'address' column, representing the
+        address item.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
             nid = NominatimID(row['object'])
@@ -236,6 +291,9 @@ def check_place_addressline(context):
 
 @then("place_addressline doesn't contain")
 def check_place_addressline_exclude(context):
+    """ Check that the place_addressline doesn't contain any entries for the
+        given addressee/address item pairs.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         for row in context.table:
             pid = NominatimID(row['object']).get_place_id(cur)
@@ -246,20 +304,18 @@ def check_place_addressline_exclude(context):
             assert cur.rowcount == 0, \
                 "Row found for place %s and address %s" % (row['object'], row['address'])
 
-@then("(?P<oid>\w+) expands to(?P<neg> no)? interpolation")
+@then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
 def check_location_property_osmline(context, oid, neg):
-    nid = NominatimID(oid)
-
-    assert 'W' == nid.typ, "interpolation must be a way"
-
+    """ Check that the given way is present in the interpolation table.
+    """
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
                        FROM location_property_osmline
                        WHERE osm_id = %s AND startnumber IS NOT NULL""",
-                    (nid.oid, ))
+                    (oid, ))
 
         if neg:
-            assert cur.rowcount == 0
+            assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
             return
 
         todo = list(range(len(list(context.table))))
@@ -271,17 +327,10 @@ def check_location_property_osmline(context, oid, neg):
                     todo.remove(i)
                     break
             else:
-                assert False, "Unexpected row %s" % (str(res))
+                assert False, "Unexpected row " + str(res)
 
-            DBRow(nid, res, context).assert_row(row, ('start', 'end'))
+            DBRow(oid, res, context).assert_row(row, ('start', 'end'))
 
         assert not todo
 
 
-@then("search_name has no entry for (?P<oid>.*)")
-def check_search_name_has_entry(context, oid):
-    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        NominatimID(oid).row_by_place_id(cur, 'search_name')
-
-        assert cur.rowcount == 0, \
-               "Found {} entries for ID {}".format(cur.rowcount, oid)
diff --git a/test/bdd/steps/table_compare.py b/test/bdd/steps/table_compare.py
index 51d1b645..2e71d943 100644
--- a/test/bdd/steps/table_compare.py
+++ b/test/bdd/steps/table_compare.py
@@ -196,7 +196,7 @@ class DBRow:
             if actual == 0:
                 return "place ID 0"
 
-            with self.context.db.cursor():
+            with self.context.db.cursor() as cur:
                 cur.execute("""SELECT osm_type, osm_id, class
                                FROM placex WHERE place_id = %s""",
                             (actual, ))