Merge pull request #2780 from lonvia/python-modules-in-project-directory

Support for external sanitizer and token analysis modules
2022-07-28 21:58:04 +02:00
parent 95d4061b2a d819036daa
commit a8b037669a
10 changed files with 276 additions and 102 deletions
--- a/nominatim/config.py
+++ b/nominatim/config.py
@@ -8,8 +8,10 @@
 Nominatim configuration accessor.
 """
 from typing import Dict, Any, List, Mapping, Optional
 import importlib.util
 import logging
 import os
 import sys
 from pathlib import Path
 import json
 import yaml
@@ -73,6 +75,7 @@ class Configuration:
            data: Path
        self.lib_dir = _LibDirs()
        self._private_plugins: Dict[str, object] = {}
    def set_libdirs(self, **kwargs: StrPath) -> None:
@@ -219,6 +222,49 @@ class Configuration:
        return result
    def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
        """ Load a Python module as a plugin.
            The module_name may have three variants:
            * A name without any '.' is assumed to be an internal module
              and will be searched relative to `internal_path`.
            * If the name ends in `.py`, module_name is assumed to be a
              file name relative to the project directory.
            * Any other name is assumed to be an absolute module name.
            In either of the variants the module name must start with a letter.
        """
        if not module_name or not module_name[0].isidentifier():
            raise UsageError(f'Invalid module name {module_name}')
        if '.' not in module_name:
            module_name = module_name.replace('-', '_')
            full_module = f'{internal_path}.{module_name}'
            return sys.modules.get(full_module) or importlib.import_module(full_module)
        if module_name.endswith('.py'):
            if self.project_dir is None or not (self.project_dir / module_name).exists():
                raise UsageError(f"Cannot find module '{module_name}' in project directory.")
            if module_name in self._private_plugins:
                return self._private_plugins[module_name]
            file_path = str(self.project_dir / module_name)
            spec = importlib.util.spec_from_file_location(module_name, file_path)
            if spec:
                module = importlib.util.module_from_spec(spec)
                # Do not add to global modules because there is no standard
                # module name that Python can resolve.
                self._private_plugins[module_name] = module
                assert spec.loader is not None
                spec.loader.exec_module(module)
                return module
        return sys.modules.get(module_name) or importlib.import_module(module_name)
    def find_config_file(self, filename: StrPath,
                         config: Optional[str] = None) -> Path:
        """ Resolve the location of a configuration file given a filename and
--- a/nominatim/tokenizer/icu_rule_loader.py
+++ b/nominatim/tokenizer/icu_rule_loader.py
@@ -8,7 +8,6 @@
 Helper class to create ICU rules from a configuration file.
 """
 from typing import Mapping, Any, Dict, Optional
 import importlib
 import io
 import json
 import logging
@@ -45,6 +44,7 @@ class ICURuleLoader:
    """
    def __init__(self, config: Configuration) -> None:
        self.config = config
        rules = config.load_sub_configuration('icu_tokenizer.yaml',
                                              config='TOKENIZER_CONFIG')
@@ -92,7 +92,7 @@ class ICURuleLoader:
    def make_sanitizer(self) -> PlaceSanitizer:
        """ Create a place sanitizer from the configured rules.
        """
-        return PlaceSanitizer(self.sanitizer_rules)
+        return PlaceSanitizer(self.sanitizer_rules, self.config)
    def make_token_analysis(self) -> ICUTokenAnalysis:
@@ -144,7 +144,9 @@ class ICURuleLoader:
                    LOG.fatal("ICU tokenizer configuration has two token "
                              "analyzers with id '%s'.", name)
                raise UsageError("Syntax error in ICU tokenizer config.")
-            self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules)
+            self.analysis[name] = TokenAnalyzerRule(section,
                                                    self.normalization_rules,
                                                    self.config)
    @staticmethod
@@ -168,15 +170,18 @@ class TokenAnalyzerRule:
        and creates a new token analyzer on request.
    """
-    def __init__(self, rules: Mapping[str, Any], normalization_rules: str) -> None:
+    def __init__(self, rules: Mapping[str, Any], normalization_rules: str,
-        # Find the analysis module
+                 config: Configuration) -> None:
-        module_name = 'nominatim.tokenizer.token_analysis.' \
+        analyzer_name = _get_section(rules, 'analyzer')
-                      + _get_section(rules, 'analyzer').replace('-', '_')
+        if not analyzer_name or not isinstance(analyzer_name, str):
-        self._analysis_mod: AnalysisModule = importlib.import_module(module_name)
+            raise UsageError("'analyzer' parameter needs to be simple string")
        self._analysis_mod: AnalysisModule = \
            config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
        # Load the configuration.
        self.config = self._analysis_mod.configure(rules, normalization_rules)
    def create(self, normalizer: Any, transliterator: Any) -> Analyser:
        """ Create a new analyser instance for the given rule.
        """
--- a/nominatim/tokenizer/place_sanitizer.py
+++ b/nominatim/tokenizer/place_sanitizer.py
@@ -9,9 +9,9 @@ Handler for cleaning name and address tags in place information before it
 is handed to the token analysis.
 """
 from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
 import importlib
 from nominatim.errors import UsageError
 from nominatim.config import Configuration
 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
 from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo, PlaceName
 from nominatim.data.place_info import PlaceInfo
@@ -22,16 +22,21 @@ class PlaceSanitizer:
        names and address before they are used by the token analysers.
    """
-    def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]]) -> None:
+    def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]],
                 config: Configuration) -> None:
        self.handlers: List[Callable[[ProcessInfo], None]] = []
        if rules:
            for func in rules:
                if 'step' not in func:
                    raise UsageError("Sanitizer rule is missing the 'step' attribute.")
-                module_name = 'nominatim.tokenizer.sanitizers.' + func['step'].replace('-', '_')
+                if not isinstance(func['step'], str):
-                handler_module: SanitizerHandler = importlib.import_module(module_name)
+                    raise UsageError("'step' attribute must be a simple string.")
-                self.handlers.append(handler_module.create(SanitizerConfig(func)))
+
                module: SanitizerHandler = \
                    config.load_plugin_module(func['step'], 'nominatim.tokenizer.sanitizers')
                self.handlers.append(module.create(SanitizerConfig(func)))
    def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]:
--- a/test/python/config/test_config_load_module.py
+++ b/test/python/config/test_config_load_module.py
@@ -0,0 +1,81 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
 # Copyright (C) 2022 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Test for loading extra Python modules.
 """
 from pathlib import Path
 import sys
 import pytest
 from nominatim.config import Configuration
@pytest.fixture
 def test_config(src_dir, tmp_path):
    """ Create a configuration object with project and config directories
        in a temporary directory.
    """
    (tmp_path / 'project').mkdir()
    (tmp_path / 'config').mkdir()
    conf = Configuration(tmp_path / 'project', src_dir / 'settings')
    conf.config_dir = tmp_path / 'config'
    return conf
 def test_load_default_module(test_config):
    module = test_config.load_plugin_module('version', 'nominatim')
    assert isinstance(module.NOMINATIM_VERSION, tuple)
 def test_load_default_module_with_hyphen(test_config):
    module = test_config.load_plugin_module('place-info', 'nominatim.data')
    assert isinstance(module.PlaceInfo, object)
 def test_load_plugin_module(test_config, tmp_path):
    (tmp_path / 'project' / 'testpath').mkdir()
    (tmp_path / 'project' / 'testpath' / 'mymod.py')\
        .write_text("def my_test_function():\n  return 'gjwitlsSG42TG%'")
    module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
    assert module.my_test_function() == 'gjwitlsSG42TG%'
    # also test reloading module
    (tmp_path / 'project' / 'testpath' / 'mymod.py')\
        .write_text("def my_test_function():\n  return 'hjothjorhj'")
    module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
    assert module.my_test_function() == 'gjwitlsSG42TG%'
 def test_load_external_library_module(test_config, tmp_path, monkeypatch):
    MODULE_NAME = 'foogurenqodr4'
    pythonpath = tmp_path / 'priv-python'
    pythonpath.mkdir()
    (pythonpath / MODULE_NAME).mkdir()
    (pythonpath / MODULE_NAME / '__init__.py').write_text('')
    (pythonpath / MODULE_NAME / 'tester.py')\
        .write_text("def my_test_function():\n  return 'gjwitlsSG42TG%'")
    monkeypatch.syspath_prepend(pythonpath)
    module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')
    assert module.my_test_function() == 'gjwitlsSG42TG%'
    # also test reloading module
    (pythonpath / MODULE_NAME / 'tester.py')\
        .write_text("def my_test_function():\n  return 'dfigjreigj'")
    module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')
    assert module.my_test_function() == 'gjwitlsSG42TG%'
    del sys.modules[f'{MODULE_NAME}.tester']
--- a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py
+++ b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py
@@ -13,14 +13,14 @@ from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 from nominatim.data.place_info import PlaceInfo
@pytest.fixture
-def sanitize(request):
+def sanitize(request, def_config):
    sanitizer_args = {'step': 'clean-housenumbers'}
    for mark in request.node.iter_markers(name="sanitizer_params"):
        sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
    def _run(**kwargs):
        place = PlaceInfo({'address': kwargs})
-        _, address = PlaceSanitizer([sanitizer_args]).process_names(place)
+        _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place)
        return sorted([(p.kind, p.name) for p in address])
@@ -45,24 +45,24 @@ def test_filter_kind(sanitize):
@pytest.mark.parametrize('number', ('6523', 'n/a', '4'))
-def test_convert_to_name_converted(number):
+def test_convert_to_name_converted(def_config, number):
    sanitizer_args = {'step': 'clean-housenumbers',
                      'convert-to-name': (r'\d+', 'n/a')}
    place = PlaceInfo({'address': {'housenumber': number}})
-    names, address = PlaceSanitizer([sanitizer_args]).process_names(place)
+    names, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place)
    assert ('housenumber', number) in set((p.kind, p.name) for p in names)
    assert 'housenumber' not in set(p.kind for p in address)
@pytest.mark.parametrize('number', ('a54', 'n.a', 'bow'))
-def test_convert_to_name_unconverted(number):
+def test_convert_to_name_unconverted(def_config, number):
    sanitizer_args = {'step': 'clean-housenumbers',
                      'convert-to-name': (r'\d+', 'n/a')}
    place = PlaceInfo({'address': {'housenumber': number}})
-    names, address = PlaceSanitizer([sanitizer_args]).process_names(place)
+    names, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place)
    assert 'housenumber' not in set(p.kind for p in names)
    assert ('housenumber', number) in set((p.kind, p.name) for p in address)
--- a/test/python/tokenizer/sanitizers/test_clean_postcodes.py
+++ b/test/python/tokenizer/sanitizers/test_clean_postcodes.py
@@ -25,7 +25,7 @@ def sanitize(def_config, request):
        if country is not None:
            pi['country_code'] = country
-        _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi))
+        _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi))
        return sorted([(p.kind, p.name) for p in address])
--- a/test/python/tokenizer/sanitizers/test_split_name_list.py
+++ b/test/python/tokenizer/sanitizers/test_split_name_list.py
@@ -14,58 +14,66 @@ from nominatim.data.place_info import PlaceInfo
 from nominatim.errors import UsageError
-def run_sanitizer_on(**kwargs):
+class TestSplitName:
    place = PlaceInfo({'name': kwargs})
    name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
-    return sorted([(p.name, p.kind, p.suffix) for p in name])
+    @pytest.fixture(autouse=True)
    def setup_country(self, def_config):
        self.config = def_config
-def sanitize_with_delimiter(delimiter, name):
+    def run_sanitizer_on(self, **kwargs):
-    place = PlaceInfo({'name': {'name': name}})
+        place = PlaceInfo({'name': kwargs})
-    san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
+        name, _ = PlaceSanitizer([{'step': 'split-name-list'}], self.config).process_names(place)
    name, _ = san.process_names(place)
-    return sorted([p.name for p in name])
+        return sorted([(p.name, p.kind, p.suffix) for p in name])
-def test_simple():
+    def sanitize_with_delimiter(self, delimiter, name):
-    assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
+        place = PlaceInfo({'name': {'name': name}})
-    assert run_sanitizer_on(name='') == [('', 'name', None)]
+        san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}],
                             self.config)
        name, _ = san.process_names(place)
        return sorted([p.name for p in name])
-def test_splits():
+    def test_simple(self):
-    assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
+        assert self.run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
-                                              ('B', 'name', None),
+        assert self.run_sanitizer_on(name='') == [('', 'name', None)]
                                              ('C', 'name', None)]
    assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
                                                            ('boat', 'short_name', None)]
-def test_empty_fields():
+    def test_splits(self):
-    assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
+        assert self.run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
-                                             ('B', 'name', None)]
+                                                       ('B', 'name', None),
-    assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
+                                                       ('C', 'name', None)]
-                                              ('B', 'name', None)]
+        assert self.run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
-    assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
+                                                                     ('boat', 'short_name', None)]
    assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
-def test_custom_delimiters():
+    def test_empty_fields(self):
-    assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
+        assert self.run_sanitizer_on(name='A;;B') == [('A', 'name', None),
-    assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
+                                                      ('B', 'name', None)]
-    assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
+        assert self.run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
-    assert sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
+                                                       ('B', 'name', None)]
        assert self.run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
        assert self.run_sanitizer_on(name='B,') == [('B', 'name', None)]
-def test_empty_delimiter_set():
+    def test_custom_delimiters(self):
-    with pytest.raises(UsageError):
+        assert self.sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
-        sanitize_with_delimiter('', 'abc')
+        assert self.sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
        assert self.sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
        assert self.sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
-def test_no_name_list():
+    def test_empty_delimiter_set(self):
        with pytest.raises(UsageError):
            self.sanitize_with_delimiter('', 'abc')
 def test_no_name_list(def_config):
    place = PlaceInfo({'address': {'housenumber': '3'}})
-    name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
+    name, address = PlaceSanitizer([{'step': 'split-name-list'}], def_config).process_names(place)
    assert not name
    assert len(address) == 1
--- a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py
+++ b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py
@@ -12,39 +12,45 @@ import pytest
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 from nominatim.data.place_info import PlaceInfo
-def run_sanitizer_on(**kwargs):
+class TestStripBrace:
    place = PlaceInfo({'name': kwargs})
    name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
-    return sorted([(p.name, p.kind, p.suffix) for p in name])
+    @pytest.fixture(autouse=True)
    def setup_country(self, def_config):
        self.config = def_config
    def run_sanitizer_on(self, **kwargs):
        place = PlaceInfo({'name': kwargs})
        name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}], self.config).process_names(place)
        return sorted([(p.name, p.kind, p.suffix) for p in name])
-def test_no_braces():
+    def test_no_braces(self):
-    assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
+        assert self.run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
-                                                      ('foo', 'name', None)]
+                                                               ('foo', 'name', None)]
-def test_simple_braces():
+    def test_simple_braces(self):
-    assert run_sanitizer_on(name='Halle (Saale)', ref='3')\
+        assert self.run_sanitizer_on(name='Halle (Saale)', ref='3')\
-      == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
+          == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
-    assert run_sanitizer_on(name='ack ( bar')\
+        assert self.run_sanitizer_on(name='ack ( bar')\
-      == [('ack', 'name', None), ('ack ( bar', 'name', None)]
+          == [('ack', 'name', None), ('ack ( bar', 'name', None)]
-def test_only_braces():
+    def test_only_braces(self):
-    assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
+        assert self.run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
-def test_double_braces():
+    def test_double_braces(self):
-    assert run_sanitizer_on(name='a((b))') == [('a', 'name', None),
+        assert self.run_sanitizer_on(name='a((b))') == [('a', 'name', None),
-                                               ('a((b))', 'name', None)]
+                                                        ('a((b))', 'name', None)]
-    assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
+        assert self.run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
-                                                  ('a (b) (c)', 'name', None)]
+                                                           ('a (b) (c)', 'name', None)]
-def test_no_names():
+def test_no_names(def_config):
    place = PlaceInfo({'address': {'housenumber': '3'}})
-    name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
+    name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}], def_config).process_names(place)
    assert not name
    assert len(address) == 1
--- a/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py
+++ b/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py
@@ -15,11 +15,16 @@ from nominatim.data.country_info import setup_country_config
 class TestWithDefaults:
-    @staticmethod
+    @pytest.fixture(autouse=True)
-    def run_sanitizer_on(country, **kwargs):
+    def setup_country(self, def_config):
        self.config = def_config
    def run_sanitizer_on(self, country, **kwargs):
        place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                           'country_code': country})
-        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language'}]).process_names(place)
+        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language'}],
                                 self.config).process_names(place)
        return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
@@ -44,12 +49,17 @@ class TestWithDefaults:
 class TestFilterKind:
-    @staticmethod
+    @pytest.fixture(autouse=True)
-    def run_sanitizer_on(filt, **kwargs):
+    def setup_country(self, def_config):
        self.config = def_config
    def run_sanitizer_on(self, filt, **kwargs):
        place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                           'country_code': 'de'})
        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
-                                   'filter-kind': filt}]).process_names(place)
+                                   'filter-kind': filt}],
                                 self.config).process_names(place)
        return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
@@ -94,14 +104,16 @@ class TestDefaultCountry:
    @pytest.fixture(autouse=True)
    def setup_country(self, def_config):
        setup_country_config(def_config)
        self.config = def_config
-    @staticmethod
+
-    def run_sanitizer_append(mode,  country, **kwargs):
+    def run_sanitizer_append(self, mode,  country, **kwargs):
        place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                           'country_code': country})
        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                   'use-defaults': mode,
-                                   'mode': 'append'}]).process_names(place)
+                                   'mode': 'append'}],
                                 self.config).process_names(place)
        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
@@ -111,13 +123,13 @@ class TestDefaultCountry:
        return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
-    @staticmethod
+    def run_sanitizer_replace(self, mode,  country, **kwargs):
    def run_sanitizer_replace(mode,  country, **kwargs):
        place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                           'country_code': country})
        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                   'use-defaults': mode,
-                                   'mode': 'replace'}]).process_names(place)
+                                   'mode': 'replace'}],
                                 self.config).process_names(place)
        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
@@ -131,7 +143,8 @@ class TestDefaultCountry:
        place = PlaceInfo({'name': {'name': 'something'}})
        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                   'use-defaults': 'all',
-                                   'mode': 'replace'}]).process_names(place)
+                                   'mode': 'replace'}],
                                 self.config).process_names(place)
        assert len(name) == 1
        assert name[0].name == 'something'
@@ -199,14 +212,19 @@ class TestDefaultCountry:
 class TestCountryWithWhitelist:
-    @staticmethod
+    @pytest.fixture(autouse=True)
-    def run_sanitizer_on(mode,  country, **kwargs):
+    def setup_country(self, def_config):
        self.config = def_config
    def run_sanitizer_on(self, mode,  country, **kwargs):
        place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                           'country_code': country})
        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                   'use-defaults': mode,
                                   'mode': 'replace',
-                                   'whitelist': ['de', 'fr', 'ru']}]).process_names(place)
+                                   'whitelist': ['de', 'fr', 'ru']}],
                                 self.config).process_names(place)
        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
@@ -238,12 +256,17 @@ class TestCountryWithWhitelist:
 class TestWhiteList:
-    @staticmethod
+    @pytest.fixture(autouse=True)
-    def run_sanitizer_on(whitelist, **kwargs):
+    def setup_country(self, def_config):
        self.config = def_config
    def run_sanitizer_on(self, whitelist, **kwargs):
        place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}})
        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                   'mode': 'replace',
-                                   'whitelist': whitelist}]).process_names(place)
+                                   'whitelist': whitelist}],
                                 self.config).process_names(place)
        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
--- a/test/python/tokenizer/test_place_sanitizer.py
+++ b/test/python/tokenizer/test_place_sanitizer.py
@@ -47,8 +47,8 @@ def test_placeinfo_has_attr():
    assert not place.has_attr('whatever')
-def test_sanitizer_default():
+def test_sanitizer_default(def_config):
-    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}])
+    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}], def_config)
    name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'},
                                                  'address': {'street': 'Bald'}}))
@@ -63,8 +63,8 @@ def test_sanitizer_default():
@pytest.mark.parametrize('rules', [None, []])
-def test_sanitizer_empty_list(rules):
+def test_sanitizer_empty_list(def_config, rules):
-    san = sanitizer.PlaceSanitizer(rules)
+    san = sanitizer.PlaceSanitizer(rules, def_config)
    name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}}))
@@ -72,6 +72,6 @@ def test_sanitizer_empty_list(rules):
    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
-def test_sanitizer_missing_step_definition():
+def test_sanitizer_missing_step_definition(def_config):
    with pytest.raises(UsageError):
-        san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}])
+        san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}], def_config)