port code to psycopg3

This commit is contained in:
Sarah Hoffmann
2024-07-05 10:43:10 +02:00
parent 3742fa2929
commit 9659afbade
57 changed files with 800 additions and 1330 deletions

View File

@@ -13,8 +13,6 @@ from pathlib import Path
import pytest
import pytest_asyncio
import psycopg2.extras
from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
import nominatim_api.v1.server_glue as glue
@@ -31,7 +29,6 @@ class TestDeletableEndPoint:
@pytest.fixture(autouse=True)
def setup_deletable_table(self, temp_db_cursor, table_factory, temp_db_with_extensions):
psycopg2.extras.register_hstore(temp_db_cursor)
table_factory('import_polygon_delete',
definition='osm_id bigint, osm_type char(1), class text, type text',
content=[(345, 'N', 'boundary', 'administrative'),

View File

@@ -14,8 +14,6 @@ from pathlib import Path
import pytest
import pytest_asyncio
import psycopg2.extras
from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
import nominatim_api.v1.server_glue as glue
@@ -32,8 +30,6 @@ class TestPolygonsEndPoint:
@pytest.fixture(autouse=True)
def setup_deletable_table(self, temp_db_cursor, table_factory, temp_db_with_extensions):
psycopg2.extras.register_hstore(temp_db_cursor)
self.now = dt.datetime.now()
self.recent = dt.datetime.now() - dt.timedelta(days=3)

View File

@@ -25,6 +25,23 @@ class MockParamCapture:
return self.return_value
class AsyncMockParamCapture:
""" Mock that records the parameters with which a function was called
as well as the number of calls.
"""
def __init__(self, retval=0):
self.called = 0
self.return_value = retval
self.last_args = None
self.last_kwargs = None
async def __call__(self, *args, **kwargs):
self.called += 1
self.last_args = args
self.last_kwargs = kwargs
return self.return_value
class DummyTokenizer:
def __init__(self, *args, **kwargs):
self.update_sql_functions_called = False
@@ -69,6 +86,17 @@ def mock_func_factory(monkeypatch):
return get_mock
@pytest.fixture
def async_mock_func_factory(monkeypatch):
def get_mock(module, func):
mock = AsyncMockParamCapture()
mock.func_name = func
monkeypatch.setattr(module, func, mock)
return mock
return get_mock
@pytest.fixture
def cli_tokenizer_mock(monkeypatch):
tok = DummyTokenizer()

View File

@@ -17,6 +17,7 @@ import pytest
import nominatim_db.indexer.indexer
import nominatim_db.tools.add_osm_data
import nominatim_db.tools.freeze
import nominatim_db.tools.tiger_data
def test_cli_help(cli_call, capsys):
@@ -52,8 +53,8 @@ def test_cli_add_data_object_command(cli_call, mock_func_factory, name, oid):
def test_cli_add_data_tiger_data(cli_call, cli_tokenizer_mock, mock_func_factory):
mock = mock_func_factory(nominatim_db.tools.tiger_data, 'add_tiger_data')
def test_cli_add_data_tiger_data(cli_call, cli_tokenizer_mock, async_mock_func_factory):
mock = async_mock_func_factory(nominatim_db.tools.tiger_data, 'add_tiger_data')
assert cli_call('add-data', '--tiger-data', 'somewhere') == 0
@@ -68,38 +69,6 @@ def test_cli_serve_php(cli_call, mock_func_factory):
assert func.called == 1
def test_cli_serve_starlette_custom_server(cli_call, mock_func_factory):
pytest.importorskip("starlette")
mod = pytest.importorskip("uvicorn")
func = mock_func_factory(mod, "run")
cli_call('serve', '--engine', 'starlette', '--server', 'foobar:4545') == 0
assert func.called == 1
assert func.last_kwargs['host'] == 'foobar'
assert func.last_kwargs['port'] == 4545
def test_cli_serve_starlette_custom_server_bad_port(cli_call, mock_func_factory):
pytest.importorskip("starlette")
mod = pytest.importorskip("uvicorn")
func = mock_func_factory(mod, "run")
cli_call('serve', '--engine', 'starlette', '--server', 'foobar:45:45') == 1
@pytest.mark.parametrize("engine", ['falcon', 'starlette'])
def test_cli_serve_uvicorn_based(cli_call, engine, mock_func_factory):
pytest.importorskip(engine)
mod = pytest.importorskip("uvicorn")
func = mock_func_factory(mod, "run")
cli_call('serve', '--engine', engine) == 0
assert func.called == 1
assert func.last_kwargs['host'] == '127.0.0.1'
assert func.last_kwargs['port'] == 8088
class TestCliWithDb:
@@ -120,16 +89,19 @@ class TestCliWithDb:
@pytest.mark.parametrize("params,do_bnds,do_ranks", [
([], 1, 1),
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
([], 2, 2),
(['--boundaries-only'], 2, 0),
(['--no-boundaries'], 0, 2),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
def test_index_command(self, mock_func_factory, table_factory,
def test_index_command(self, monkeypatch, async_mock_func_factory, table_factory,
params, do_bnds, do_ranks):
table_factory('import_status', 'indexed bool')
bnd_mock = mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries')
rank_mock = mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank')
postcode_mock = mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes')
bnd_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries')
rank_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank')
postcode_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes')
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
[False, True].pop)
assert self.call_nominatim('index', *params) == 0

View File

@@ -34,7 +34,8 @@ class TestCliImportWithDb:
@pytest.mark.parametrize('with_updates', [True, False])
def test_import_full(self, mock_func_factory, with_updates, place_table, property_table):
def test_import_full(self, mock_func_factory, async_mock_func_factory,
with_updates, place_table, property_table):
mocks = [
mock_func_factory(nominatim_db.tools.database_import, 'setup_database_skeleton'),
mock_func_factory(nominatim_db.data.country_info, 'setup_country_tables'),
@@ -42,15 +43,15 @@ class TestCliImportWithDb:
mock_func_factory(nominatim_db.tools.refresh, 'import_wikipedia_articles'),
mock_func_factory(nominatim_db.tools.refresh, 'import_secondary_importance'),
mock_func_factory(nominatim_db.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim_db.tools.database_import, 'load_data'),
async_mock_func_factory(nominatim_db.tools.database_import, 'load_data'),
mock_func_factory(nominatim_db.tools.database_import, 'create_tables'),
mock_func_factory(nominatim_db.tools.database_import, 'create_table_triggers'),
mock_func_factory(nominatim_db.tools.database_import, 'create_partition_tables'),
mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
async_mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim_db.data.country_info, 'create_country_names'),
mock_func_factory(nominatim_db.tools.refresh, 'load_address_levels_from_config'),
mock_func_factory(nominatim_db.tools.postcodes, 'update_postcodes'),
mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim_db.tools.refresh, 'setup_website'),
]
@@ -72,14 +73,14 @@ class TestCliImportWithDb:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_import_continue_load_data(self, mock_func_factory):
def test_import_continue_load_data(self, mock_func_factory, async_mock_func_factory):
mocks = [
mock_func_factory(nominatim_db.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim_db.tools.database_import, 'load_data'),
mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
async_mock_func_factory(nominatim_db.tools.database_import, 'load_data'),
async_mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim_db.data.country_info, 'create_country_names'),
mock_func_factory(nominatim_db.tools.postcodes, 'update_postcodes'),
mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim_db.tools.refresh, 'setup_website'),
mock_func_factory(nominatim_db.db.properties, 'set_property')
]
@@ -91,12 +92,12 @@ class TestCliImportWithDb:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_import_continue_indexing(self, mock_func_factory, placex_table,
temp_db_conn):
def test_import_continue_indexing(self, mock_func_factory, async_mock_func_factory,
placex_table, temp_db_conn):
mocks = [
mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
async_mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim_db.data.country_info, 'create_country_names'),
mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full'),
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim_db.tools.refresh, 'setup_website'),
mock_func_factory(nominatim_db.db.properties, 'set_property')
]
@@ -110,9 +111,9 @@ class TestCliImportWithDb:
assert self.call_nominatim('import', '--continue', 'indexing') == 0
def test_import_continue_postprocess(self, mock_func_factory):
def test_import_continue_postprocess(self, mock_func_factory, async_mock_func_factory):
mocks = [
mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
async_mock_func_factory(nominatim_db.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim_db.data.country_info, 'create_country_names'),
mock_func_factory(nominatim_db.tools.refresh, 'setup_website'),
mock_func_factory(nominatim_db.db.properties, 'set_property')

View File

@@ -45,9 +45,9 @@ class TestRefresh:
assert self.tokenizer_mock.update_word_tokens_called
def test_refresh_postcodes(self, mock_func_factory, place_table):
def test_refresh_postcodes(self, async_mock_func_factory, mock_func_factory, place_table):
func_mock = mock_func_factory(nominatim_db.tools.postcodes, 'update_postcodes')
idx_mock = mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes')
idx_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes')
assert self.call_nominatim('refresh', '--postcodes') == 0
assert func_mock.called == 1

View File

@@ -47,8 +47,8 @@ def init_status(temp_db_conn, status_table):
@pytest.fixture
def index_mock(mock_func_factory, tokenizer_mock, init_status):
return mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full')
def index_mock(async_mock_func_factory, tokenizer_mock, init_status):
return async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_full')
@pytest.fixture

View File

@@ -8,7 +8,8 @@ import itertools
import sys
from pathlib import Path
import psycopg2
import psycopg
from psycopg import sql as pysql
import pytest
# always test against the source
@@ -36,26 +37,23 @@ def temp_db(monkeypatch):
exported into NOMINATIM_DATABASE_DSN.
"""
name = 'test_nominatim_python_unittest'
conn = psycopg2.connect(database='postgres')
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
cur.execute('CREATE DATABASE {}'.format(name))
conn.close()
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS') + pysql.Identifier(name))
cur.execute(pysql.SQL('CREATE DATABASE') + pysql.Identifier(name))
monkeypatch.setenv('NOMINATIM_DATABASE_DSN', 'dbname=' + name)
with psycopg.connect(dbname=name) as conn:
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION hstore')
yield name
conn = psycopg2.connect(database='postgres')
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
conn.close()
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
@pytest.fixture
@@ -65,11 +63,9 @@ def dsn(temp_db):
@pytest.fixture
def temp_db_with_extensions(temp_db):
conn = psycopg2.connect(database=temp_db)
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;')
conn.commit()
conn.close()
with psycopg.connect(dbname=temp_db) as conn:
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION postgis')
return temp_db
@@ -77,7 +73,8 @@ def temp_db_with_extensions(temp_db):
def temp_db_conn(temp_db):
""" Connection to the test database.
"""
with connection.connect('dbname=' + temp_db) as conn:
with connection.connect('', autocommit=True, dbname=temp_db) as conn:
connection.register_hstore(conn)
yield conn
@@ -86,22 +83,25 @@ def temp_db_cursor(temp_db):
""" Connection and cursor towards the test database. The connection will
be in auto-commit mode.
"""
conn = psycopg2.connect('dbname=' + temp_db)
conn.set_isolation_level(0)
with conn.cursor(cursor_factory=CursorForTesting) as cur:
yield cur
conn.close()
with psycopg.connect(dbname=temp_db, autocommit=True, cursor_factory=CursorForTesting) as conn:
connection.register_hstore(conn)
with conn.cursor() as cur:
yield cur
@pytest.fixture
def table_factory(temp_db_cursor):
def table_factory(temp_db_conn):
""" A fixture that creates new SQL tables, potentially filled with
content.
"""
def mk_table(name, definition='id INT', content=None):
temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
if content is not None:
temp_db_cursor.execute_values("INSERT INTO {} VALUES %s".format(name), content)
with psycopg.ClientCursor(temp_db_conn) as cur:
cur.execute('CREATE TABLE {} ({})'.format(name, definition))
if content:
sql = pysql.SQL("INSERT INTO {} VALUES ({})")\
.format(pysql.Identifier(name),
pysql.SQL(',').join([pysql.Placeholder() for _ in range(len(content[0]))]))
cur.executemany(sql , content)
return mk_table
@@ -168,7 +168,6 @@ def place_row(place_table, temp_db_cursor):
""" A factory for rows in the place table. The table is created as a
prerequisite to the fixture.
"""
psycopg2.extras.register_hstore(temp_db_cursor)
idseq = itertools.count(1001)
def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom=None):

View File

@@ -5,11 +5,11 @@
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Specialised psycopg2 cursor with shortcut functions useful for testing.
Specialised psycopg cursor with shortcut functions useful for testing.
"""
import psycopg2.extras
import psycopg
class CursorForTesting(psycopg2.extras.DictCursor):
class CursorForTesting(psycopg.Cursor):
""" Extension to the DictCursor class that provides execution
short-cuts that simplify writing assertions.
"""
@@ -59,9 +59,3 @@ class CursorForTesting(psycopg2.extras.DictCursor):
return self.scalar('SELECT count(*) FROM ' + table)
return self.scalar('SELECT count(*) FROM {} WHERE {}'.format(table, where))
def execute_values(self, *args, **kwargs):
""" Execute the execute_values() function on the cursor.
"""
psycopg2.extras.execute_values(self, *args, **kwargs)

View File

@@ -1,113 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for function providing a non-blocking query interface towards PostgreSQL.
"""
from contextlib import closing
import concurrent.futures
import pytest
import psycopg2
from nominatim_db.db.async_connection import DBConnection, DeadlockHandler
@pytest.fixture
def conn(temp_db):
with closing(DBConnection('dbname=' + temp_db)) as connection:
yield connection
@pytest.fixture
def simple_conns(temp_db):
conn1 = psycopg2.connect('dbname=' + temp_db)
conn2 = psycopg2.connect('dbname=' + temp_db)
yield conn1.cursor(), conn2.cursor()
conn1.close()
conn2.close()
def test_simple_query(conn, temp_db_cursor):
conn.connect()
conn.perform('CREATE TABLE foo (id INT)')
conn.wait()
assert temp_db_cursor.table_exists('foo')
def test_wait_for_query(conn):
conn.connect()
conn.perform('SELECT pg_sleep(1)')
assert not conn.is_done()
conn.wait()
def test_bad_query(conn):
conn.connect()
conn.perform('SELECT efasfjsea')
with pytest.raises(psycopg2.ProgrammingError):
conn.wait()
def test_bad_query_ignore(temp_db):
with closing(DBConnection('dbname=' + temp_db, ignore_sql_errors=True)) as conn:
conn.connect()
conn.perform('SELECT efasfjsea')
conn.wait()
def exec_with_deadlock(cur, sql, detector):
with DeadlockHandler(lambda *args: detector.append(1)):
cur.execute(sql)
def test_deadlock(simple_conns):
cur1, cur2 = simple_conns
cur1.execute("""CREATE TABLE t1 (id INT PRIMARY KEY, t TEXT);
INSERT into t1 VALUES (1, 'a'), (2, 'b')""")
cur1.connection.commit()
cur1.execute("UPDATE t1 SET t = 'x' WHERE id = 1")
cur2.execute("UPDATE t1 SET t = 'x' WHERE id = 2")
# This is the tricky part of the test. The first SQL command runs into
# a lock and blocks, so we have to run it in a separate thread. When the
# second deadlocking SQL statement is issued, Postgresql will abort one of
# the two transactions that cause the deadlock. There is no way to tell
# which one of the two. Therefore wrap both in a DeadlockHandler and
# expect that exactly one of the two triggers.
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
deadlock_check = []
try:
future = executor.submit(exec_with_deadlock, cur2,
"UPDATE t1 SET t = 'y' WHERE id = 1",
deadlock_check)
while not future.running():
pass
exec_with_deadlock(cur1, "UPDATE t1 SET t = 'y' WHERE id = 2",
deadlock_check)
finally:
# Whatever happens, make sure the deadlock gets resolved.
cur1.connection.rollback()
future.result()
assert len(deadlock_check) == 1

View File

@@ -8,7 +8,7 @@
Tests for specialised connection and cursor classes.
"""
import pytest
import psycopg2
import psycopg
import nominatim_db.db.connection as nc
@@ -73,7 +73,7 @@ def test_drop_many_tables(db, table_factory):
def test_drop_table_non_existing_force(db):
with pytest.raises(psycopg2.ProgrammingError, match='.*does not exist.*'):
with pytest.raises(psycopg.ProgrammingError, match='.*does not exist.*'):
nc.drop_tables(db, 'dfkjgjriogjigjgjrdghehtre', if_exists=False)
def test_connection_server_version_tuple(db):

View File

@@ -8,6 +8,7 @@
Tests for SQL preprocessing.
"""
import pytest
import pytest_asyncio
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
@@ -54,3 +55,17 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp
sql_preprocessor.run_sql_file(temp_db_conn, sqlfile, bar='XX', foo='ZZ')
assert temp_db_cursor.scalar('SELECT test()') == 'ZZ XX'
@pytest.mark.asyncio
async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor):
(tmp_path / 'test.sql').write_text("""
CREATE TABLE foo (a TEXT);
CREATE TABLE foo2(a TEXT);""" +
"\n---\nCREATE TABLE bar (b INT);")
await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4)
assert temp_db_cursor.table_exists('foo')
assert temp_db_cursor.table_exists('foo2')
assert temp_db_cursor.table_exists('bar')

View File

@@ -58,103 +58,3 @@ def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')
assert temp_db_cursor.row_set('SELECT * FROM test') == {(23, )}
class TestCopyBuffer:
TABLE_NAME = 'copytable'
@pytest.fixture(autouse=True)
def setup_test_table(self, table_factory):
table_factory(self.TABLE_NAME, 'col_a INT, col_b TEXT')
def table_rows(self, cursor):
return cursor.row_set('SELECT * FROM ' + self.TABLE_NAME)
def test_copybuffer_empty(self):
with db_utils.CopyBuffer() as buf:
buf.copy_out(None, "dummy")
def test_all_columns(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add(3, 'hum')
buf.add(None, 'f\\t')
buf.copy_out(temp_db_cursor, self.TABLE_NAME)
assert self.table_rows(temp_db_cursor) == {(3, 'hum'), (None, 'f\\t')}
def test_selected_columns(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add('foo')
buf.copy_out(temp_db_cursor, self.TABLE_NAME,
columns=['col_b'])
assert self.table_rows(temp_db_cursor) == {(None, 'foo')}
def test_reordered_columns(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add('one', 1)
buf.add(' two ', 2)
buf.copy_out(temp_db_cursor, self.TABLE_NAME,
columns=['col_b', 'col_a'])
assert self.table_rows(temp_db_cursor) == {(1, 'one'), (2, ' two ')}
def test_special_characters(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add('foo\tbar')
buf.add('sun\nson')
buf.add('\\N')
buf.copy_out(temp_db_cursor, self.TABLE_NAME,
columns=['col_b'])
assert self.table_rows(temp_db_cursor) == {(None, 'foo\tbar'),
(None, 'sun\nson'),
(None, '\\N')}
class TestCopyBufferJson:
TABLE_NAME = 'copytable'
@pytest.fixture(autouse=True)
def setup_test_table(self, table_factory):
table_factory(self.TABLE_NAME, 'col_a INT, col_b JSONB')
def table_rows(self, cursor):
cursor.execute('SELECT * FROM ' + self.TABLE_NAME)
results = {k: v for k,v in cursor}
assert len(results) == cursor.rowcount
return results
def test_json_object(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add(1, json.dumps({'test': 'value', 'number': 1}))
buf.copy_out(temp_db_cursor, self.TABLE_NAME)
assert self.table_rows(temp_db_cursor) == \
{1: {'test': 'value', 'number': 1}}
def test_json_object_special_chras(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add(1, json.dumps({'te\tst': 'va\nlue', 'nu"mber': None}))
buf.copy_out(temp_db_cursor, self.TABLE_NAME)
assert self.table_rows(temp_db_cursor) == \
{1: {'te\tst': 'va\nlue', 'nu"mber': None}}

View File

@@ -9,6 +9,7 @@ Tests for running the indexing.
"""
import itertools
import pytest
import pytest_asyncio
from nominatim_db.indexer import indexer
from nominatim_db.tokenizer import factory
@@ -21,9 +22,8 @@ class IndexerTestDB:
self.postcode_id = itertools.count(700000)
self.conn = conn
self.conn.set_isolation_level(0)
self.conn.autocimmit = True
with self.conn.cursor() as cur:
cur.execute('CREATE EXTENSION hstore')
cur.execute("""CREATE TABLE placex (place_id BIGINT,
name HSTORE,
class TEXT,
@@ -156,7 +156,8 @@ def test_tokenizer(tokenizer_mock, project_env):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_all_by_rank(test_db, threads, test_tokenizer):
@pytest.mark.asyncio
async def test_index_all_by_rank(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -165,7 +166,7 @@ def test_index_all_by_rank(test_db, threads, test_tokenizer):
assert test_db.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(0, 30)
await idx.index_by_rank(0, 30)
assert test_db.placex_unindexed() == 0
assert test_db.osmline_unindexed() == 0
@@ -190,7 +191,8 @@ def test_index_all_by_rank(test_db, threads, test_tokenizer):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_partial_without_30(test_db, threads, test_tokenizer):
@pytest.mark.asyncio
async def test_index_partial_without_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -200,7 +202,7 @@ def test_index_partial_without_30(test_db, threads, test_tokenizer):
idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
test_tokenizer, threads)
idx.index_by_rank(4, 15)
await idx.index_by_rank(4, 15)
assert test_db.placex_unindexed() == 19
assert test_db.osmline_unindexed() == 1
@@ -211,7 +213,8 @@ def test_index_partial_without_30(test_db, threads, test_tokenizer):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_partial_with_30(test_db, threads, test_tokenizer):
@pytest.mark.asyncio
async def test_index_partial_with_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -220,7 +223,7 @@ def test_index_partial_with_30(test_db, threads, test_tokenizer):
assert test_db.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
await idx.index_by_rank(28, 30)
assert test_db.placex_unindexed() == 27
assert test_db.osmline_unindexed() == 0
@@ -230,7 +233,8 @@ def test_index_partial_with_30(test_db, threads, test_tokenizer):
WHERE indexed_status = 0 AND rank_address between 1 and 27""") == 0
@pytest.mark.parametrize("threads", [1, 15])
def test_index_boundaries(test_db, threads, test_tokenizer):
@pytest.mark.asyncio
async def test_index_boundaries(test_db, threads, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
@@ -241,7 +245,7 @@ def test_index_boundaries(test_db, threads, test_tokenizer):
assert test_db.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_boundaries(0, 30)
await idx.index_boundaries(0, 30)
assert test_db.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1
@@ -252,21 +256,23 @@ def test_index_boundaries(test_db, threads, test_tokenizer):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_postcodes(test_db, threads, test_tokenizer):
@pytest.mark.asyncio
async def test_index_postcodes(test_db, threads, test_tokenizer):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_postcodes()
await idx.index_postcodes()
assert test_db.scalar("""SELECT count(*) FROM location_postcode
WHERE indexed_status != 0""") == 0
@pytest.mark.parametrize("analyse", [True, False])
def test_index_full(test_db, analyse, test_tokenizer):
@pytest.mark.asyncio
async def test_index_full(test_db, analyse, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
@@ -276,22 +282,9 @@ def test_index_full(test_db, analyse, test_tokenizer):
test_db.add_postcode('de', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
idx.index_full(analyse=analyse)
await idx.index_full(analyse=analyse)
assert test_db.placex_unindexed() == 0
assert test_db.osmline_unindexed() == 0
assert test_db.scalar("""SELECT count(*) FROM location_postcode
WHERE indexed_status != 0""") == 0
@pytest.mark.parametrize("threads", [1, 15])
def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
for _ in range(1000):
test_db.add_place(rank_address=30, rank_search=30)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
assert test_db.placex_unindexed() == 0

View File

@@ -36,9 +36,9 @@ class MockIcuWordTable:
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, type, word, info)
VALUES (%s, 'S', %s,
json_build_object('class', %s,
'type', %s,
'op', %s))
json_build_object('class', %s::text,
'type', %s::text,
'op', %s::text))
""", (word_token, word, cls, typ, oper))
self.conn.commit()
@@ -71,7 +71,7 @@ class MockIcuWordTable:
word = word_tokens[0]
for token in word_tokens:
cur.execute("""INSERT INTO word (word_id, word_token, type, word, info)
VALUES (%s, %s, 'H', %s, jsonb_build_object('lookup', %s))
VALUES (%s, %s, 'H', %s, jsonb_build_object('lookup', %s::text))
""", (word_id, token, word, word_tokens[0]))
self.conn.commit()

View File

@@ -68,7 +68,7 @@ class MockLegacyWordTable:
def get_special(self):
with self.conn.cursor() as cur:
cur.execute("""SELECT word_token, word, class, type, operator
cur.execute("""SELECT word_token, word, class as cls, type, operator
FROM word WHERE class != 'place'""")
result = set((tuple(row) for row in cur))
assert len(result) == cur.rowcount, "Word table has duplicates."

View File

@@ -9,8 +9,6 @@ Custom mocks for testing.
"""
import itertools
import psycopg2.extras
from nominatim_db.db import properties
# This must always point to the mock word table for the default tokenizer.
@@ -56,7 +54,6 @@ class MockPlacexTable:
admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
country=None, housenumber=None, rank_search=30):
with self.conn.cursor() as cur:
psycopg2.extras.register_hstore(cur)
cur.execute("""INSERT INTO placex (place_id, osm_type, osm_id, class,
type, name, admin_level, address,
housenumber, rank_search,

View File

@@ -8,10 +8,11 @@
Tests for functions to import a new database.
"""
from pathlib import Path
from contextlib import closing
import pytest
import psycopg2
import pytest_asyncio
import psycopg
from psycopg import sql as pysql
from nominatim_db.tools import database_import
from nominatim_db.errors import UsageError
@@ -21,10 +22,7 @@ class TestDatabaseSetup:
@pytest.fixture(autouse=True)
def setup_nonexistant_db(self):
conn = psycopg2.connect(database='postgres')
try:
conn.set_isolation_level(0)
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
@@ -32,22 +30,17 @@ class TestDatabaseSetup:
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
finally:
conn.close()
@pytest.fixture
def cursor(self):
conn = psycopg2.connect(database=self.DBNAME)
try:
with psycopg.connect(dbname=self.DBNAME) as conn:
with conn.cursor() as cur:
yield cur
finally:
conn.close()
def conn(self):
return closing(psycopg2.connect(database=self.DBNAME))
return psycopg.connect(dbname=self.DBNAME)
def test_setup_skeleton(self):
@@ -178,18 +171,19 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, w
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, place_row, placex_table, osmline_table,
@pytest.mark.asyncio
async def test_load_data(dsn, place_row, placex_table, osmline_table,
temp_db_cursor, threads):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute(f"""CREATE FUNCTION {func} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""")
temp_db_cursor.execute(pysql.SQL("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""").format(pysql.Identifier(func)))
for oid in range(100, 130):
place_row(osm_id=oid)
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
geom='SRID=4326;LINESTRING(0 0, 10 10)')
database_import.load_data(dsn, threads)
await database_import.load_data(dsn, threads)
assert temp_db_cursor.table_rows('placex') == 30
assert temp_db_cursor.table_rows('location_property_osmline') == 1
@@ -241,11 +235,12 @@ class TestSetupSQL:
@pytest.mark.parametrize("drop", [True, False])
def test_create_search_indices(self, temp_db_conn, temp_db_cursor, drop):
@pytest.mark.asyncio
async def test_create_search_indices(self, temp_db_conn, temp_db_cursor, drop):
self.write_sql('indices.sql',
"""CREATE FUNCTION test() RETURNS bool
AS $$ SELECT {{drop}} $$ LANGUAGE SQL""")
database_import.create_search_indices(temp_db_conn, self.config, drop)
await database_import.create_search_indices(temp_db_conn, self.config, drop)
temp_db_cursor.scalar('SELECT test()') == drop

View File

@@ -8,7 +8,6 @@
Tests for migration functions
"""
import pytest
import psycopg2.extras
from nominatim_db.tools import migration
from nominatim_db.errors import UsageError
@@ -44,7 +43,6 @@ def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_c
def test_set_up_migration_for_36(temp_db_with_extensions, temp_db_cursor,
table_factory, def_config, monkeypatch,
postprocess_mock):
psycopg2.extras.register_hstore(temp_db_cursor)
# don't actually run any migration, except the property table creation
monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS',
[((3, 5, 0, 99), migration.add_nominatim_property_table)])

View File

@@ -47,7 +47,7 @@ class MockPostcodeTable:
country_code, postcode,
geometry)
VALUES (nextval('seq_place'), 1, %s, %s,
'SRID=4326;POINT(%s %s)')""",
ST_SetSRID(ST_MakePoint(%s, %s), 4326))""",
(country, postcode, x, y))
self.conn.commit()

View File

@@ -11,6 +11,7 @@ import tarfile
from textwrap import dedent
import pytest
import pytest_asyncio
from nominatim_db.db.connection import execute_scalar
from nominatim_db.tools import tiger_data, freeze
@@ -76,82 +77,91 @@ def csv_factory(tmp_path):
@pytest.mark.parametrize("threads", (1, 5))
def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads):
tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, threads, tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, threads, tokenizer_mock())
assert tiger_table.count() == 6213
def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock,
tmp_path):
freeze.drop_update_tables(temp_db_conn)
with pytest.raises(UsageError) as excinfo:
tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert "database frozen" in str(excinfo.value)
assert tiger_table.count() == 0
def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
tmp_path):
tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert tiger_table.count() == 0
def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""")
tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert tiger_table.count() == 0
def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock,
csv_factory, tmp_path):
csv_factory('file1', hnr_from=99)
csv_factory('file2', hnr_from='L12')
csv_factory('file3', hnr_to='12.4')
tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert tiger_table.count() == 1
assert tiger_table.row()['start'] == 99
assert tiger_table.row().start == 99
@pytest.mark.parametrize("threads", (1, 5))
def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path, src_dir, threads):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
tar.close()
tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
tokenizer_mock())
assert tiger_table.count() == 6213
def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""")
with pytest.raises(UsageError):
tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock,
@pytest.mark.asyncio
async def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(__file__)
tar.close()
tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
tokenizer_mock())
assert tiger_table.count() == 0