port code to psycopg3

This commit is contained in:
Sarah Hoffmann
2024-07-05 10:43:10 +02:00
parent 3742fa2929
commit 9659afbade
57 changed files with 800 additions and 1330 deletions

View File

@@ -9,14 +9,14 @@ import importlib
import sys
import tempfile
import psycopg2
import psycopg2.extras
import psycopg
from psycopg import sql as pysql
sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..'/ 'src').resolve()))
from nominatim_db import cli
from nominatim_db.config import Configuration
from nominatim_db.db.connection import Connection
from nominatim_db.db.connection import Connection, register_hstore, execute_scalar
from nominatim_db.tools import refresh
from nominatim_db.tokenizer import factory as tokenizer_factory
from steps.utils import run_script
@@ -60,7 +60,7 @@ class NominatimEnvironment:
""" Return a connection to the database with the given name.
Uses configured host, user and port.
"""
dbargs = {'database': dbname}
dbargs = {'dbname': dbname, 'row_factory': psycopg.rows.dict_row}
if self.db_host:
dbargs['host'] = self.db_host
if self.db_port:
@@ -69,8 +69,7 @@ class NominatimEnvironment:
dbargs['user'] = self.db_user
if self.db_pass:
dbargs['password'] = self.db_pass
conn = psycopg2.connect(connection_factory=Connection, **dbargs)
return conn
return psycopg.connect(**dbargs)
def next_code_coverage_file(self):
""" Generate the next name for a coverage file.
@@ -132,6 +131,8 @@ class NominatimEnvironment:
conn = False
refresh.setup_website(Path(self.website_dir.name) / 'website',
self.get_test_config(), conn)
if conn:
conn.close()
def get_test_config(self):
@@ -160,11 +161,10 @@ class NominatimEnvironment:
def db_drop_database(self, name):
""" Drop the database with the given name.
"""
conn = self.connect_database('postgres')
conn.set_isolation_level(0)
cur = conn.cursor()
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
conn.close()
with self.connect_database('postgres') as conn:
conn.autocommit = True
conn.execute(pysql.SQL('DROP DATABASE IF EXISTS')
+ pysql.Identifier(name))
def setup_template_db(self):
""" Setup a template database that already contains common test data.
@@ -249,16 +249,18 @@ class NominatimEnvironment:
""" Setup a test against a fresh, empty test database.
"""
self.setup_template_db()
conn = self.connect_database(self.template_db)
conn.set_isolation_level(0)
cur = conn.cursor()
cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
conn.close()
with self.connect_database(self.template_db) as conn:
conn.autocommit = True
conn.execute(pysql.SQL('DROP DATABASE IF EXISTS')
+ pysql.Identifier(self.test_db))
conn.execute(pysql.SQL('CREATE DATABASE {} TEMPLATE = {}').format(
pysql.Identifier(self.test_db),
pysql.Identifier(self.template_db)))
self.write_nominatim_config(self.test_db)
context.db = self.connect_database(self.test_db)
context.db.autocommit = True
psycopg2.extras.register_hstore(context.db, globally=False)
register_hstore(context.db)
def teardown_db(self, context, force_drop=False):
""" Remove the test database, if it exists.
@@ -276,31 +278,26 @@ class NominatimEnvironment:
dropped and always false returned.
"""
if self.reuse_template:
conn = self.connect_database('postgres')
with conn.cursor() as cur:
cur.execute('select count(*) from pg_database where datname = %s',
(name,))
if cur.fetchone()[0] == 1:
with self.connect_database('postgres') as conn:
num = execute_scalar(conn,
'select count(*) from pg_database where datname = %s',
(name,))
if num == 1:
return True
conn.close()
else:
self.db_drop_database(name)
return False
def reindex_placex(self, db):
""" Run the indexing step until all data in the placex has
been processed. Indexing during updates can produce more data
to index under some circumstances. That is why indexing may have
to be run multiple times.
"""
with db.cursor() as cur:
while True:
self.run_nominatim('index')
self.run_nominatim('index')
cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
if cur.rowcount == 0:
return
def run_nominatim(self, *cmdline):
""" Run the nominatim command-line tool via the library.

View File

@@ -7,7 +7,8 @@
import logging
from itertools import chain
import psycopg2.extras
import psycopg
from psycopg import sql as pysql
from place_inserter import PlaceColumn
from table_compare import NominatimID, DBRow
@@ -18,7 +19,7 @@ from nominatim_db.tokenizer import factory as tokenizer_factory
def check_database_integrity(context):
""" Check some generic constraints on the tables.
"""
with context.db.cursor() as cur:
with context.db.cursor(row_factory=psycopg.rows.tuple_row) as cur:
# place_addressline should not have duplicate (place_id, address_place_id)
cur.execute("""SELECT count(*) FROM
(SELECT place_id, address_place_id, count(*) as c
@@ -54,7 +55,7 @@ def add_data_to_planet_relations(context):
with context.db.cursor() as cur:
cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
row = cur.fetchone()
if row is None or row[0] == '1':
if row is None or row['value'] == '1':
for r in context.table:
last_node = 0
last_way = 0
@@ -96,8 +97,8 @@ def add_data_to_planet_relations(context):
cur.execute("""INSERT INTO planet_osm_rels (id, tags, members)
VALUES (%s, %s, %s)""",
(r['id'], psycopg2.extras.Json(tags),
psycopg2.extras.Json(members)))
(r['id'], psycopg.types.json.Json(tags),
psycopg.types.json.Json(members)))
@given("the ways")
def add_data_to_planet_ways(context):
@@ -107,10 +108,10 @@ def add_data_to_planet_ways(context):
with context.db.cursor() as cur:
cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
row = cur.fetchone()
json_tags = row is not None and row[0] != '1'
json_tags = row is not None and row['value'] != '1'
for r in context.table:
if json_tags:
tags = psycopg2.extras.Json({h[5:]: r[h] for h in r.headings if h.startswith("tags+")})
tags = psycopg.types.json.Json({h[5:]: r[h] for h in r.headings if h.startswith("tags+")})
else:
tags = list(chain.from_iterable([(h[5:], r[h])
for h in r.headings if h.startswith("tags+")]))
@@ -197,7 +198,7 @@ def check_place_contents(context, table, exact):
expected rows are expected to be present with at least one database row.
When 'exactly' is given, there must not be additional rows in the database.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
expected_content = set()
for row in context.table:
nid = NominatimID(row['object'])
@@ -215,8 +216,9 @@ def check_place_contents(context, table, exact):
DBRow(nid, res, context).assert_row(row, ['object'])
if exact:
cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
actual = set([(r[0], r[1], r[2]) for r in cur])
cur.execute(pysql.SQL('SELECT osm_type, osm_id, class from')
+ pysql.Identifier(table))
actual = set([(r['osm_type'], r['osm_id'], r['class']) for r in cur])
assert expected_content == actual, \
f"Missing entries: {expected_content - actual}\n" \
f"Not expected in table: {actual - expected_content}"
@@ -227,7 +229,7 @@ def check_place_has_entry(context, table, oid):
""" Ensure that no database row for the given object exists. The ID
must be of the form '<NRW><osm id>[:<class>]'.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
assert cur.rowcount == 0, \
"Found {} entries for ID {}".format(cur.rowcount, oid)
@@ -244,7 +246,7 @@ def check_search_name_contents(context, exclude):
tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
with tokenizer.name_analyzer() as analyzer:
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
for row in context.table:
nid = NominatimID(row['object'])
nid.row_by_place_id(cur, 'search_name',
@@ -276,7 +278,7 @@ def check_search_name_has_entry(context, oid):
""" Check that there is noentry in the search_name table for the given
objects. IDs are in format '<NRW><osm id>[:<class>]'.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
NominatimID(oid).row_by_place_id(cur, 'search_name')
assert cur.rowcount == 0, \
@@ -290,7 +292,7 @@ def check_location_postcode(context):
All rows must be present as excepted and there must not be additional
rows.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
assert cur.rowcount == len(list(context.table)), \
"Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
@@ -321,7 +323,7 @@ def check_word_table_for_postcodes(context, exclude, postcodes):
plist.sort()
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
if nctx.tokenizer != 'legacy':
cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
(plist,))
@@ -330,7 +332,7 @@ def check_word_table_for_postcodes(context, exclude, postcodes):
and class = 'place' and type = 'postcode'""",
(plist,))
found = [row[0] for row in cur]
found = [row['word'] for row in cur]
assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
if exclude:
@@ -347,7 +349,7 @@ def check_place_addressline(context):
representing the addressee and the 'address' column, representing the
address item.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
for row in context.table:
nid = NominatimID(row['object'])
pid = nid.get_place_id(cur)
@@ -366,7 +368,7 @@ def check_place_addressline_exclude(context):
""" Check that the place_addressline doesn't contain any entries for the
given addressee/address item pairs.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
for row in context.table:
pid = NominatimID(row['object']).get_place_id(cur)
apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
@@ -381,7 +383,7 @@ def check_place_addressline_exclude(context):
def check_location_property_osmline(context, oid, neg):
""" Check that the given way is present in the interpolation table.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
FROM location_property_osmline
WHERE osm_id = %s AND startnumber IS NOT NULL""",
@@ -417,7 +419,7 @@ def check_place_contents(context, exact):
expected rows are expected to be present with at least one database row.
When 'exactly' is given, there must not be additional rows in the database.
"""
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
with context.db.cursor() as cur:
expected_content = set()
for row in context.table:
if ':' in row['object']:
@@ -447,7 +449,7 @@ def check_place_contents(context, exact):
if exact:
cur.execute('SELECT osm_id, startnumber from location_property_osmline')
actual = set([(r[0], r[1]) for r in cur])
actual = set([(r['osm_id'], r['startnumber']) for r in cur])
assert expected_content == actual, \
f"Missing entries: {expected_content - actual}\n" \
f"Not expected in table: {actual - expected_content}"

View File

@@ -10,6 +10,9 @@ Functions to facilitate accessing and comparing the content of DB tables.
import re
import json
import psycopg
from psycopg import sql as pysql
from steps.check_functions import Almost
ID_REGEX = re.compile(r"(?P<typ>[NRW])(?P<oid>\d+)(:(?P<cls>\w+))?")
@@ -73,7 +76,7 @@ class NominatimID:
assert cur.rowcount == 1, \
"Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount)
return cur.fetchone()[0]
return cur.fetchone()['place_id']
class DBRow:
@@ -152,9 +155,10 @@ class DBRow:
def _has_centroid(self, expected):
if expected == 'in geometry':
with self.context.db.cursor() as cur:
cur.execute("""SELECT ST_Within(ST_SetSRID(ST_Point({cx}, {cy}), 4326),
ST_SetSRID('{geomtxt}'::geometry, 4326))""".format(**self.db_row))
with self.context.db.cursor(row_factory=psycopg.rows.tuple_row) as cur:
cur.execute("""SELECT ST_Within(ST_SetSRID(ST_Point(%(cx)s, %(cy)s), 4326),
ST_SetSRID(%(geomtxt)s::geometry, 4326))""",
(self.db_row))
return cur.fetchone()[0]
if ' ' in expected:
@@ -166,10 +170,11 @@ class DBRow:
def _has_geometry(self, expected):
geom = self.context.osm.parse_geometry(expected)
with self.context.db.cursor() as cur:
cur.execute("""SELECT ST_Equals(ST_SnapToGrid({}, 0.00001, 0.00001),
ST_SnapToGrid(ST_SetSRID('{}'::geometry, 4326), 0.00001, 0.00001))""".format(
geom, self.db_row['geomtxt']))
with self.context.db.cursor(row_factory=psycopg.rows.tuple_row) as cur:
cur.execute(pysql.SQL("""SELECT ST_Equals(ST_SnapToGrid({}, 0.00001, 0.00001),
ST_SnapToGrid(ST_SetSRID({}::geometry, 4326), 0.00001, 0.00001))""")
.format(pysql.SQL(geom),
pysql.Literal(self.db_row['geomtxt'])))
return cur.fetchone()[0]
def assert_msg(self, name, value):
@@ -209,7 +214,7 @@ class DBRow:
if actual == 0:
return "place ID 0"
with self.context.db.cursor() as cur:
with self.context.db.cursor(row_factory=psycopg.rows.tuple_row) as cur:
cur.execute("""SELECT osm_type, osm_id, class
FROM placex WHERE place_id = %s""",
(actual, ))