mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
This adds the boilerplate for selecting configurable tokenizers. A tokenizer can be chosen at import time and will then install itself such that it is fixed for the given database import even when the software itself is updated. The legacy tokenizer implements Nominatim's traditional algorithms.
88 lines
2.6 KiB
Python
88 lines
2.6 KiB
Python
"""
|
|
Tests for creating new tokenizers.
|
|
"""
|
|
import importlib
|
|
import pytest
|
|
|
|
from nominatim.db import properties
|
|
from nominatim.tokenizer import factory
|
|
from nominatim.errors import UsageError
|
|
import dummy_tokenizer
|
|
|
|
@pytest.fixture
|
|
def test_config(def_config, tmp_path):
|
|
def_config.project_dir = tmp_path
|
|
return def_config
|
|
|
|
|
|
@pytest.fixture
|
|
def tokenizer_import(monkeypatch):
|
|
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
|
|
|
|
def _import_dummy(module, *args, **kwargs):
|
|
return dummy_tokenizer
|
|
|
|
monkeypatch.setattr(importlib, "import_module", _import_dummy)
|
|
|
|
|
|
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
|
|
tokenizer_import, property_table):
|
|
tokenizer = factory.create_tokenizer(test_config)
|
|
|
|
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
|
|
assert tokenizer.init_state == "new"
|
|
assert (test_config.project_dir / 'tokenizer').is_dir()
|
|
|
|
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
|
|
|
|
|
|
def test_setup_tokenizer_dir_exists(test_config, tokenizer_import, property_table):
|
|
(test_config.project_dir / 'tokenizer').mkdir()
|
|
|
|
tokenizer = factory.create_tokenizer(test_config)
|
|
|
|
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
|
|
assert tokenizer.init_state == "new"
|
|
|
|
|
|
def test_setup_tokenizer_dir_failure(test_config, tokenizer_import, property_table):
|
|
(test_config.project_dir / 'tokenizer').write_text("foo")
|
|
|
|
with pytest.raises(UsageError):
|
|
factory.create_tokenizer(test_config)
|
|
|
|
|
|
def test_setup_bad_tokenizer_name(test_config, monkeypatch):
|
|
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
|
|
|
|
with pytest.raises(UsageError):
|
|
factory.create_tokenizer(test_config)
|
|
|
|
def test_load_tokenizer(temp_db_conn, test_config,
|
|
tokenizer_import, property_table):
|
|
factory.create_tokenizer(test_config)
|
|
|
|
tokenizer = factory.get_tokenizer_for_db(test_config)
|
|
|
|
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
|
|
assert tokenizer.init_state == "loaded"
|
|
|
|
|
|
def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
|
|
factory.create_tokenizer(test_config)
|
|
|
|
test_config.project_dir = test_config.project_dir / 'foo'
|
|
|
|
with pytest.raises(UsageError):
|
|
factory.get_tokenizer_for_db(test_config)
|
|
|
|
|
|
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_import, property_table):
|
|
factory.create_tokenizer(test_config)
|
|
|
|
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
|
|
|
|
with pytest.raises(UsageError):
|
|
factory.get_tokenizer_for_db(test_config)
|
|
|