Files
Nominatim/test/python/test_tokenizer_factory.py
Sarah Hoffmann af968d4903 introduce tokenizer modules
This adds the boilerplate for selecting configurable tokenizers.
A tokenizer can be chosen at import time and will then install
itself such that it is fixed for the given database import even
when the software itself is updated.

The legacy tokenizer implements Nominatim's traditional algorithms.
2021-04-30 11:29:57 +02:00

88 lines
2.6 KiB
Python

"""
Tests for creating new tokenizers.
"""
import importlib
import pytest
from nominatim.db import properties
from nominatim.tokenizer import factory
from nominatim.errors import UsageError
import dummy_tokenizer
@pytest.fixture
def test_config(def_config, tmp_path):
def_config.project_dir = tmp_path
return def_config
@pytest.fixture
def tokenizer_import(monkeypatch):
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
def _import_dummy(module, *args, **kwargs):
return dummy_tokenizer
monkeypatch.setattr(importlib, "import_module", _import_dummy)
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
tokenizer_import, property_table):
tokenizer = factory.create_tokenizer(test_config)
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
assert tokenizer.init_state == "new"
assert (test_config.project_dir / 'tokenizer').is_dir()
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
def test_setup_tokenizer_dir_exists(test_config, tokenizer_import, property_table):
(test_config.project_dir / 'tokenizer').mkdir()
tokenizer = factory.create_tokenizer(test_config)
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
assert tokenizer.init_state == "new"
def test_setup_tokenizer_dir_failure(test_config, tokenizer_import, property_table):
(test_config.project_dir / 'tokenizer').write_text("foo")
with pytest.raises(UsageError):
factory.create_tokenizer(test_config)
def test_setup_bad_tokenizer_name(test_config, monkeypatch):
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
with pytest.raises(UsageError):
factory.create_tokenizer(test_config)
def test_load_tokenizer(temp_db_conn, test_config,
tokenizer_import, property_table):
factory.create_tokenizer(test_config)
tokenizer = factory.get_tokenizer_for_db(test_config)
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
assert tokenizer.init_state == "loaded"
def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
factory.create_tokenizer(test_config)
test_config.project_dir = test_config.project_dir / 'foo'
with pytest.raises(UsageError):
factory.get_tokenizer_for_db(test_config)
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_import, property_table):
factory.create_tokenizer(test_config)
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
with pytest.raises(UsageError):
factory.get_tokenizer_for_db(test_config)