add support for external token analysis modules

This commit is contained in:
Sarah Hoffmann
2022-07-25 16:27:22 +02:00
parent 6d41046b15
commit d819036daa
2 changed files with 13 additions and 9 deletions

View File

@@ -222,7 +222,7 @@ class Configuration:
return result return result
def load_plugin_module(self, module_name: str, internal_path: str) -> object: def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
""" Load a Python module as a plugin. """ Load a Python module as a plugin.
The module_name may have three variants: The module_name may have three variants:

View File

@@ -8,7 +8,6 @@
Helper class to create ICU rules from a configuration file. Helper class to create ICU rules from a configuration file.
""" """
from typing import Mapping, Any, Dict, Optional from typing import Mapping, Any, Dict, Optional
import importlib
import io import io
import json import json
import logging import logging
@@ -145,7 +144,9 @@ class ICURuleLoader:
LOG.fatal("ICU tokenizer configuration has two token " LOG.fatal("ICU tokenizer configuration has two token "
"analyzers with id '%s'.", name) "analyzers with id '%s'.", name)
raise UsageError("Syntax error in ICU tokenizer config.") raise UsageError("Syntax error in ICU tokenizer config.")
self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules) self.analysis[name] = TokenAnalyzerRule(section,
self.normalization_rules,
self.config)
@staticmethod @staticmethod
@@ -169,15 +170,18 @@ class TokenAnalyzerRule:
and creates a new token analyzer on request. and creates a new token analyzer on request.
""" """
def __init__(self, rules: Mapping[str, Any], normalization_rules: str) -> None: def __init__(self, rules: Mapping[str, Any], normalization_rules: str,
# Find the analysis module config: Configuration) -> None:
module_name = 'nominatim.tokenizer.token_analysis.' \ analyzer_name = _get_section(rules, 'analyzer')
+ _get_section(rules, 'analyzer').replace('-', '_') if not analyzer_name or not isinstance(analyzer_name, str):
self._analysis_mod: AnalysisModule = importlib.import_module(module_name) raise UsageError("'analyzer' parameter needs to be simple string")
self._analysis_mod: AnalysisModule = \
config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
# Load the configuration.
self.config = self._analysis_mod.configure(rules, normalization_rules) self.config = self._analysis_mod.configure(rules, normalization_rules)
def create(self, normalizer: Any, transliterator: Any) -> Analyser: def create(self, normalizer: Any, transliterator: Any) -> Analyser:
""" Create a new analyser instance for the given rule. """ Create a new analyser instance for the given rule.
""" """