require tokeinzer for indexer

This commit is contained in:
Sarah Hoffmann
2021-04-24 11:25:47 +02:00
parent 1b1ed820c3
commit e1c5673ac3
11 changed files with 88 additions and 56 deletions

View File

@@ -1,3 +1,4 @@
import importlib
import itertools
import sys
from pathlib import Path
@@ -15,6 +16,9 @@ sys.path.insert(0, str(SRC_DIR.resolve()))
from nominatim.config import Configuration
from nominatim.db import connection
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.db import properties
import dummy_tokenizer
class _TestingCursor(psycopg2.extras.DictCursor):
""" Extension to the DictCursor class that provides execution
@@ -292,3 +296,17 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
sql=tmp_path, data=SRC_DIR / 'data')
return SQLPreprocessor(temp_db_conn, cfg)
@pytest.fixture
def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
""" Sets up the configuration so that the test dummy tokenizer will be
loaded.
"""
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
def _import_dummy(module, *args, **kwargs):
return dummy_tokenizer
monkeypatch.setattr(importlib, "import_module", _import_dummy)
properties.set_property(temp_db_conn, 'tokenizer', 'dummy')

View File

@@ -57,6 +57,22 @@ def mock_func_factory(monkeypatch):
return get_mock
@pytest.fixture
def tokenizer_mock(monkeypatch):
class DummyTokenizer:
def __init__(self, *args, **kwargs):
self.update_sql_functions_called = False
def update_sql_functions(self, *args):
self.update_sql_functions_called = True
tok = DummyTokenizer()
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
lambda *args: tok)
return tok
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
"""
@@ -221,7 +237,8 @@ def test_add_data_command(mock_run_legacy, name, oid):
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
params, do_bnds, do_ranks):
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
@@ -253,20 +270,12 @@ def test_refresh_command(mock_func_factory, temp_db, command, func):
assert func_mock.called == 1
def test_refresh_create_functions(mock_func_factory, monkeypatch, temp_db):
class DummyTokenizer:
def update_sql_functions(self, *args):
self.called = True
def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
tok = DummyTokenizer()
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
lambda *args: tok)
assert 0 == call_nominatim('refresh', '--functions')
assert func_mock.called == 1
assert hasattr(tok, 'called')
assert tokenizer_mock.update_sql_functions_called
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):

View File

@@ -27,7 +27,7 @@ def call_nominatim(*args):
cli_args=['replication'] + list(args))
@pytest.fixture
def index_mock(monkeypatch):
def index_mock(monkeypatch, tokenizer_mock):
mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
@@ -52,7 +52,7 @@ def init_status(temp_db_conn, status_table):
@pytest.fixture
def update_mock(mock_func_factory, init_status):
def update_mock(mock_func_factory, init_status, tokenizer_mock):
return mock_func_factory(nominatim.tools.replication, 'update')
@pytest.mark.parametrize("params,func", [

View File

@@ -6,6 +6,7 @@ import psycopg2
import pytest
from nominatim.indexer import indexer
from nominatim.tokenizer import factory
class IndexerTestDB:
@@ -115,8 +116,14 @@ def test_db(temp_db_conn):
yield IndexerTestDB(temp_db_conn)
@pytest.fixture
def test_tokenizer(tokenizer_mock, def_config, tmp_path):
def_config.project_dir = tmp_path
return factory.create_tokenizer(def_config)
@pytest.mark.parametrize("threads", [1, 15])
def test_index_all_by_rank(test_db, threads):
def test_index_all_by_rank(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -124,7 +131,7 @@ def test_index_all_by_rank(test_db, threads):
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(0, 30)
assert 0 == test_db.placex_unindexed()
@@ -155,7 +162,7 @@ def test_index_all_by_rank(test_db, threads):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_partial_without_30(test_db, threads):
def test_index_partial_without_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -163,7 +170,8 @@ def test_index_partial_without_30(test_db, threads):
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
test_tokenizer, threads)
idx.index_by_rank(4, 15)
assert 19 == test_db.placex_unindexed()
@@ -175,7 +183,7 @@ def test_index_partial_without_30(test_db, threads):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_partial_with_30(test_db, threads):
def test_index_partial_with_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -183,7 +191,7 @@ def test_index_partial_with_30(test_db, threads):
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
assert 27 == test_db.placex_unindexed()
@@ -194,7 +202,7 @@ def test_index_partial_with_30(test_db, threads):
WHERE indexed_status = 0 AND rank_address between 1 and 27""")
@pytest.mark.parametrize("threads", [1, 15])
def test_index_boundaries(test_db, threads):
def test_index_boundaries(test_db, threads, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
@@ -204,7 +212,7 @@ def test_index_boundaries(test_db, threads):
assert 37 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_boundaries(0, 30)
assert 31 == test_db.placex_unindexed()
@@ -216,13 +224,13 @@ def test_index_boundaries(test_db, threads):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_postcodes(test_db, threads):
def test_index_postcodes(test_db, threads, test_tokenizer):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_postcodes()
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
@@ -230,7 +238,7 @@ def test_index_postcodes(test_db, threads):
@pytest.mark.parametrize("analyse", [True, False])
def test_index_full(test_db, analyse):
def test_index_full(test_db, analyse, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
@@ -239,7 +247,7 @@ def test_index_full(test_db, analyse):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
idx.index_full(analyse=analyse)
assert 0 == test_db.placex_unindexed()
@@ -249,13 +257,13 @@ def test_index_full(test_db, analyse):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_reopen_connection(test_db, threads, monkeypatch):
def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
for _ in range(1000):
test_db.add_place(rank_address=30, rank_search=30)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
assert 0 == test_db.placex_unindexed()

View File

@@ -7,7 +7,7 @@ import pytest
from nominatim.db import properties
from nominatim.tokenizer import factory
from nominatim.errors import UsageError
import dummy_tokenizer
from dummy_tokenizer import DummyTokenizer
@pytest.fixture
def test_config(def_config, tmp_path):
@@ -15,37 +15,27 @@ def test_config(def_config, tmp_path):
return def_config
@pytest.fixture
def tokenizer_import(monkeypatch):
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
def _import_dummy(module, *args, **kwargs):
return dummy_tokenizer
monkeypatch.setattr(importlib, "import_module", _import_dummy)
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
tokenizer_import, property_table):
tokenizer_mock, property_table):
tokenizer = factory.create_tokenizer(test_config)
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "new"
assert (test_config.project_dir / 'tokenizer').is_dir()
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
def test_setup_tokenizer_dir_exists(test_config, tokenizer_import, property_table):
def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table):
(test_config.project_dir / 'tokenizer').mkdir()
tokenizer = factory.create_tokenizer(test_config)
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "new"
def test_setup_tokenizer_dir_failure(test_config, tokenizer_import, property_table):
def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table):
(test_config.project_dir / 'tokenizer').write_text("foo")
with pytest.raises(UsageError):
@@ -59,16 +49,16 @@ def test_setup_bad_tokenizer_name(test_config, monkeypatch):
factory.create_tokenizer(test_config)
def test_load_tokenizer(temp_db_conn, test_config,
tokenizer_import, property_table):
tokenizer_mock, property_table):
factory.create_tokenizer(test_config)
tokenizer = factory.get_tokenizer_for_db(test_config)
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "loaded"
def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table):
factory.create_tokenizer(test_config)
test_config.project_dir = test_config.project_dir / 'foo'
@@ -77,7 +67,7 @@ def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
factory.get_tokenizer_for_db(test_config)
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_import, property_table):
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table):
factory.create_tokenizer(test_config)
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")