forked from hans/Nominatim
68 lines
2.6 KiB
Python
68 lines
2.6 KiB
Python
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Common data types and protocols for analysers.
|
|
"""
|
|
from typing import Mapping, List, Any
|
|
|
|
from nominatim.typing import Protocol
|
|
|
|
class Analyzer(Protocol):
|
|
""" The `create()` function of an analysis module needs to return an
|
|
object that implements the following functions.
|
|
"""
|
|
|
|
def normalize(self, name: str) -> str:
|
|
""" Return the normalized form of the name. This is the standard form
|
|
from which possible variants for the name can be derived.
|
|
"""
|
|
|
|
def get_variants_ascii(self, norm_name: str) -> List[str]:
|
|
""" Compute the spelling variants for the given normalized name
|
|
and transliterate the result.
|
|
"""
|
|
|
|
class AnalysisModule(Protocol):
|
|
""" Protocol for analysis modules.
|
|
"""
|
|
|
|
def configure(self, rules: Mapping[str, Any],
|
|
normalizer: Any, transliterator: Any) -> Any:
|
|
""" Prepare the configuration of the analysis module.
|
|
This function should prepare all data that can be shared
|
|
between instances of this analyser.
|
|
|
|
Arguments:
|
|
rules: A dictionary with the additional configuration options
|
|
as specified in the tokenizer configuration.
|
|
normalizer: an ICU Transliterator with the compiled normalization
|
|
rules.
|
|
transliterator: an ICU tranliterator with the compiled
|
|
transliteration rules.
|
|
|
|
Returns:
|
|
A data object with the configuration that was set up. May be
|
|
used freely by the analysis module as needed.
|
|
"""
|
|
|
|
def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
|
|
""" Create a new instance of the analyser.
|
|
A separate instance of the analyser is created for each thread
|
|
when used in multi-threading context.
|
|
|
|
Arguments:
|
|
normalizer: an ICU Transliterator with the compiled normalization
|
|
rules.
|
|
transliterator: an ICU tranliterator with the compiled
|
|
transliteration rules.
|
|
config: The object that was returned by the call to configure().
|
|
|
|
Returns:
|
|
A new analyzer instance. This must be an object that implements
|
|
the Analyzer protocol.
|
|
"""
|