forked from hans/Nominatim
harmonize interface of token analysis module
The configure() function now receives a Transliterator object instead of the ICU rules. This harmonizes the parameters with the create function.
This commit is contained in:
@@ -30,7 +30,8 @@ class AnalysisModule(Protocol):
|
||||
""" Protocol for analysis modules.
|
||||
"""
|
||||
|
||||
def configure(self, rules: Mapping[str, Any], normalization_rules: str) -> Any:
|
||||
def configure(self, rules: Mapping[str, Any],
|
||||
normalizer: Any, transliterator: Any) -> Any:
|
||||
""" Prepare the configuration of the analysis module.
|
||||
This function should prepare all data that can be shared
|
||||
between instances of this analyser.
|
||||
@@ -38,8 +39,10 @@ class AnalysisModule(Protocol):
|
||||
Arguments:
|
||||
rules: A dictionary with the additional configuration options
|
||||
as specified in the tokenizer configuration.
|
||||
normalization_rules: ICU rules for normalization as a string
|
||||
that can be used with createFromRules().
|
||||
normalizer: an ICU Transliterator with the compiled normalization
|
||||
rules.
|
||||
transliterator: an ICU tranliterator with the compiled
|
||||
transliteration rules.
|
||||
|
||||
Returns:
|
||||
A data object with the configuration that was set up. May be
|
||||
|
||||
@@ -12,8 +12,6 @@ from collections import defaultdict
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from icu import Transliterator
|
||||
|
||||
from nominatim.config import flatten_config_list
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
@@ -25,7 +23,7 @@ class ICUVariant(NamedTuple):
|
||||
|
||||
|
||||
def get_variant_config(in_rules: Any,
|
||||
normalization_rules: str) -> Tuple[List[Tuple[str, List[str]]], str]:
|
||||
normalizer: Any) -> Tuple[List[Tuple[str, List[str]]], str]:
|
||||
""" Convert the variant definition from the configuration into
|
||||
replacement sets.
|
||||
|
||||
@@ -39,7 +37,7 @@ def get_variant_config(in_rules: Any,
|
||||
vset: Set[ICUVariant] = set()
|
||||
rules = flatten_config_list(in_rules, 'variants')
|
||||
|
||||
vmaker = _VariantMaker(normalization_rules)
|
||||
vmaker = _VariantMaker(normalizer)
|
||||
|
||||
for section in rules:
|
||||
for rule in (section.get('words') or []):
|
||||
@@ -63,9 +61,8 @@ class _VariantMaker:
|
||||
All text in rules is normalized to make sure the variants match later.
|
||||
"""
|
||||
|
||||
def __init__(self, norm_rules: Any) -> None:
|
||||
self.norm = Transliterator.createFromRules("rule_loader_normalization",
|
||||
norm_rules)
|
||||
def __init__(self, normalizer: Any) -> None:
|
||||
self.norm = normalizer
|
||||
|
||||
|
||||
def compute(self, rule: Any) -> Iterator[ICUVariant]:
|
||||
|
||||
@@ -18,13 +18,13 @@ from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantG
|
||||
|
||||
### Configuration section
|
||||
|
||||
def configure(rules: Mapping[str, Any], normalization_rules: str) -> Dict[str, Any]:
|
||||
def configure(rules: Mapping[str, Any], normalizer: Any, _: Any) -> Dict[str, Any]:
|
||||
""" Extract and preprocess the configuration for this module.
|
||||
"""
|
||||
config: Dict[str, Any] = {}
|
||||
|
||||
config['replacements'], config['chars'] = get_variant_config(rules.get('variants'),
|
||||
normalization_rules)
|
||||
normalizer)
|
||||
config['variant_only'] = rules.get('mode', '') == 'variant-only'
|
||||
|
||||
# parse mutation rules
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
Specialized processor for housenumbers. Analyses common housenumber patterns
|
||||
and creates variants for them.
|
||||
"""
|
||||
from typing import Mapping, Any, List, cast
|
||||
from typing import Any, List, cast
|
||||
import re
|
||||
|
||||
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
|
||||
@@ -20,7 +20,7 @@ RE_NAMED_PART = re.compile(r'[a-z]{4}')
|
||||
|
||||
### Configuration section
|
||||
|
||||
def configure(rules: Mapping[str, Any], normalization_rules: str) -> None: # pylint: disable=W0613
|
||||
def configure(*_: Any) -> None:
|
||||
""" All behaviour is currently hard-coded.
|
||||
"""
|
||||
return None
|
||||
|
||||
@@ -8,13 +8,13 @@
|
||||
Specialized processor for postcodes. Supports a 'lookup' variant of the
|
||||
token, which produces variants with optional spaces.
|
||||
"""
|
||||
from typing import Mapping, Any, List
|
||||
from typing import Any, List
|
||||
|
||||
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
|
||||
|
||||
### Configuration section
|
||||
|
||||
def configure(rules: Mapping[str, Any], normalization_rules: str) -> None: # pylint: disable=W0613
|
||||
def configure(*_: Any) -> None:
|
||||
""" All behaviour is currently hard-coded.
|
||||
"""
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user