forked from hans/Nominatim
add support for external token analysis modules
This commit is contained in:
@@ -222,7 +222,7 @@ class Configuration:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def load_plugin_module(self, module_name: str, internal_path: str) -> object:
|
def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
|
||||||
""" Load a Python module as a plugin.
|
""" Load a Python module as a plugin.
|
||||||
|
|
||||||
The module_name may have three variants:
|
The module_name may have three variants:
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
Helper class to create ICU rules from a configuration file.
|
Helper class to create ICU rules from a configuration file.
|
||||||
"""
|
"""
|
||||||
from typing import Mapping, Any, Dict, Optional
|
from typing import Mapping, Any, Dict, Optional
|
||||||
import importlib
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -145,7 +144,9 @@ class ICURuleLoader:
|
|||||||
LOG.fatal("ICU tokenizer configuration has two token "
|
LOG.fatal("ICU tokenizer configuration has two token "
|
||||||
"analyzers with id '%s'.", name)
|
"analyzers with id '%s'.", name)
|
||||||
raise UsageError("Syntax error in ICU tokenizer config.")
|
raise UsageError("Syntax error in ICU tokenizer config.")
|
||||||
self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules)
|
self.analysis[name] = TokenAnalyzerRule(section,
|
||||||
|
self.normalization_rules,
|
||||||
|
self.config)
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -169,15 +170,18 @@ class TokenAnalyzerRule:
|
|||||||
and creates a new token analyzer on request.
|
and creates a new token analyzer on request.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rules: Mapping[str, Any], normalization_rules: str) -> None:
|
def __init__(self, rules: Mapping[str, Any], normalization_rules: str,
|
||||||
# Find the analysis module
|
config: Configuration) -> None:
|
||||||
module_name = 'nominatim.tokenizer.token_analysis.' \
|
analyzer_name = _get_section(rules, 'analyzer')
|
||||||
+ _get_section(rules, 'analyzer').replace('-', '_')
|
if not analyzer_name or not isinstance(analyzer_name, str):
|
||||||
self._analysis_mod: AnalysisModule = importlib.import_module(module_name)
|
raise UsageError("'analyzer' parameter needs to be simple string")
|
||||||
|
|
||||||
|
self._analysis_mod: AnalysisModule = \
|
||||||
|
config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
|
||||||
|
|
||||||
# Load the configuration.
|
|
||||||
self.config = self._analysis_mod.configure(rules, normalization_rules)
|
self.config = self._analysis_mod.configure(rules, normalization_rules)
|
||||||
|
|
||||||
|
|
||||||
def create(self, normalizer: Any, transliterator: Any) -> Analyser:
|
def create(self, normalizer: Any, transliterator: Any) -> Analyser:
|
||||||
""" Create a new analyser instance for the given rule.
|
""" Create a new analyser instance for the given rule.
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user