remove automatic setup of tokenizer directory

ICU tokenizer doesn't need any extra data anymore, so it doesn't
make sense to create a directory which then remains empty. If a
tokenizer needs such a directory in the future, it needs to create
it on its own and make sure to handle the situation correctly where
no project directory is used at all.
This commit is contained in:
Sarah Hoffmann
2025-04-02 20:20:04 +02:00
parent 9cf5eee5d4
commit 186f562dd7
8 changed files with 16 additions and 52 deletions

View File

@@ -32,24 +32,9 @@ class TestFactory:
assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "new"
assert (self.config.project_dir / 'tokenizer').is_dir()
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
def test_setup_tokenizer_dir_exists(self):
(self.config.project_dir / 'tokenizer').mkdir()
tokenizer = factory.create_tokenizer(self.config)
assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "new"
def test_setup_tokenizer_dir_failure(self):
(self.config.project_dir / 'tokenizer').write_text("foo")
with pytest.raises(UsageError):
factory.create_tokenizer(self.config)
def test_load_tokenizer(self):
factory.create_tokenizer(self.config)
@@ -64,7 +49,6 @@ class TestFactory:
self.config.project_dir = self.config.project_dir
factory.get_tokenizer_for_db(self.config)
assert (self.config.project_dir / 'tokenizer').exists()
def test_load_missing_property(self, temp_db_cursor):
factory.create_tokenizer(self.config)

View File

@@ -39,12 +39,9 @@ def test_config(project_env, tmp_path):
@pytest.fixture
def tokenizer_factory(dsn, tmp_path, property_table,
sql_preprocessor, place_table, word_table):
(tmp_path / 'tokenizer').mkdir()
def tokenizer_factory(dsn, property_table, sql_preprocessor, place_table, word_table):
def _maker():
return icu_tokenizer.create(dsn, tmp_path / 'tokenizer')
return icu_tokenizer.create(dsn)
return _maker