add support for external token analysis modules

This commit is contained in:
Sarah Hoffmann
2022-07-25 16:27:22 +02:00
parent 6d41046b15
commit d819036daa
2 changed files with 13 additions and 9 deletions

View File

@@ -8,7 +8,6 @@
Helper class to create ICU rules from a configuration file.
"""
from typing import Mapping, Any, Dict, Optional
import importlib
import io
import json
import logging
@@ -145,7 +144,9 @@ class ICURuleLoader:
LOG.fatal("ICU tokenizer configuration has two token "
"analyzers with id '%s'.", name)
raise UsageError("Syntax error in ICU tokenizer config.")
self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules)
self.analysis[name] = TokenAnalyzerRule(section,
self.normalization_rules,
self.config)
@staticmethod
@@ -169,15 +170,18 @@ class TokenAnalyzerRule:
and creates a new token analyzer on request.
"""
def __init__(self, rules: Mapping[str, Any], normalization_rules: str) -> None:
# Find the analysis module
module_name = 'nominatim.tokenizer.token_analysis.' \
+ _get_section(rules, 'analyzer').replace('-', '_')
self._analysis_mod: AnalysisModule = importlib.import_module(module_name)
def __init__(self, rules: Mapping[str, Any], normalization_rules: str,
config: Configuration) -> None:
analyzer_name = _get_section(rules, 'analyzer')
if not analyzer_name or not isinstance(analyzer_name, str):
raise UsageError("'analyzer' parameter needs to be simple string")
self._analysis_mod: AnalysisModule = \
config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
# Load the configuration.
self.config = self._analysis_mod.configure(rules, normalization_rules)
def create(self, normalizer: Any, transliterator: Any) -> Analyser:
""" Create a new analyser instance for the given rule.
"""