forked from hans/Nominatim
Merge pull request #3702 from lonvia/remove-tokenizer-dir
Remove automatic setup of tokenizer directory So far the tokenizer factory would create a directory for private data for the tokenizer and then hand in the directory location to the tokenizer. ICU tokenizer doesn't need any extra data anymore, so it doesn't make sense to create a directory which then remains empty. If a tokenizer needs such a directory in the future, it needs to create it on its own and make sure to handle the situation correctly where no project directory is used at all.
This commit is contained in:
@@ -234,6 +234,6 @@ def tokenizer_mock(monkeypatch, property_table):
|
||||
property_table.set('tokenizer', 'dummy')
|
||||
|
||||
def _create_tokenizer():
|
||||
return dummy_tokenizer.DummyTokenizer(None, None)
|
||||
return dummy_tokenizer.DummyTokenizer(None)
|
||||
|
||||
return _create_tokenizer
|
||||
|
||||
@@ -11,17 +11,16 @@ from nominatim_db.data.place_info import PlaceInfo
|
||||
from nominatim_db.config import Configuration
|
||||
|
||||
|
||||
def create(dsn, data_dir):
|
||||
def create(dsn):
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
"""
|
||||
return DummyTokenizer(dsn, data_dir)
|
||||
return DummyTokenizer(dsn)
|
||||
|
||||
|
||||
class DummyTokenizer:
|
||||
|
||||
def __init__(self, dsn, data_dir):
|
||||
def __init__(self, dsn):
|
||||
self.dsn = dsn
|
||||
self.data_dir = data_dir
|
||||
self.init_state = None
|
||||
self.analyser_cache = {}
|
||||
|
||||
|
||||
@@ -32,24 +32,9 @@ class TestFactory:
|
||||
|
||||
assert isinstance(tokenizer, DummyTokenizer)
|
||||
assert tokenizer.init_state == "new"
|
||||
assert (self.config.project_dir / 'tokenizer').is_dir()
|
||||
|
||||
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
|
||||
|
||||
def test_setup_tokenizer_dir_exists(self):
|
||||
(self.config.project_dir / 'tokenizer').mkdir()
|
||||
|
||||
tokenizer = factory.create_tokenizer(self.config)
|
||||
|
||||
assert isinstance(tokenizer, DummyTokenizer)
|
||||
assert tokenizer.init_state == "new"
|
||||
|
||||
def test_setup_tokenizer_dir_failure(self):
|
||||
(self.config.project_dir / 'tokenizer').write_text("foo")
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
factory.create_tokenizer(self.config)
|
||||
|
||||
def test_load_tokenizer(self):
|
||||
factory.create_tokenizer(self.config)
|
||||
|
||||
@@ -64,7 +49,6 @@ class TestFactory:
|
||||
self.config.project_dir = self.config.project_dir
|
||||
|
||||
factory.get_tokenizer_for_db(self.config)
|
||||
assert (self.config.project_dir / 'tokenizer').exists()
|
||||
|
||||
def test_load_missing_property(self, temp_db_cursor):
|
||||
factory.create_tokenizer(self.config)
|
||||
|
||||
@@ -39,12 +39,9 @@ def test_config(project_env, tmp_path):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer_factory(dsn, tmp_path, property_table,
|
||||
sql_preprocessor, place_table, word_table):
|
||||
(tmp_path / 'tokenizer').mkdir()
|
||||
|
||||
def tokenizer_factory(dsn, property_table, sql_preprocessor, place_table, word_table):
|
||||
def _maker():
|
||||
return icu_tokenizer.create(dsn, tmp_path / 'tokenizer')
|
||||
return icu_tokenizer.create(dsn)
|
||||
|
||||
return _maker
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ class MockPostcodeTable:
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer():
|
||||
return dummy_tokenizer.DummyTokenizer(None, None)
|
||||
return dummy_tokenizer.DummyTokenizer(None)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
Reference in New Issue
Block a user