forked from hans/Nominatim
add type hints for sanitizers
This commit is contained in:
119
nominatim/tokenizer/sanitizers/base.py
Normal file
119
nominatim/tokenizer/sanitizers/base.py
Normal file
@@ -0,0 +1,119 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Common data types and protocols for sanitizers.
|
||||
"""
|
||||
from typing import Optional, Dict, List, Mapping, Callable
|
||||
|
||||
from typing_extensions import Protocol, Final
|
||||
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
|
||||
from nominatim.data.place_info import PlaceInfo
|
||||
|
||||
class PlaceName:
|
||||
""" A searchable name for a place together with properties.
|
||||
Every name object saves the name proper and two basic properties:
|
||||
* 'kind' describes the name of the OSM key used without any suffixes
|
||||
(i.e. the part after the colon removed)
|
||||
* 'suffix' contains the suffix of the OSM tag, if any. The suffix
|
||||
is the part of the key after the first colon.
|
||||
In addition to that, the name may have arbitrary additional attributes.
|
||||
Which attributes are used, depends on the token analyser.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, kind: str, suffix: Optional[str]):
|
||||
self.name = name
|
||||
self.kind = kind
|
||||
self.suffix = suffix
|
||||
self.attr: Dict[str, str] = {}
|
||||
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')"
|
||||
|
||||
|
||||
def clone(self, name: Optional[str] = None,
|
||||
kind: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
attr: Optional[Mapping[str, str]] = None) -> 'PlaceName':
|
||||
""" Create a deep copy of the place name, optionally with the
|
||||
given parameters replaced. In the attribute list only the given
|
||||
keys are updated. The list is not replaced completely.
|
||||
In particular, the function cannot to be used to remove an
|
||||
attribute from a place name.
|
||||
"""
|
||||
newobj = PlaceName(name or self.name,
|
||||
kind or self.kind,
|
||||
suffix or self.suffix)
|
||||
|
||||
newobj.attr.update(self.attr)
|
||||
if attr:
|
||||
newobj.attr.update(attr)
|
||||
|
||||
return newobj
|
||||
|
||||
|
||||
def set_attr(self, key: str, value: str) -> None:
|
||||
""" Add the given property to the name. If the property was already
|
||||
set, then the value is overwritten.
|
||||
"""
|
||||
self.attr[key] = value
|
||||
|
||||
|
||||
def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]:
|
||||
""" Return the given property or the value of 'default' if it
|
||||
is not set.
|
||||
"""
|
||||
return self.attr.get(key, default)
|
||||
|
||||
|
||||
def has_attr(self, key: str) -> bool:
|
||||
""" Check if the given attribute is set.
|
||||
"""
|
||||
return key in self.attr
|
||||
|
||||
|
||||
class ProcessInfo:
|
||||
""" Container class for information handed into to handler functions.
|
||||
The 'names' and 'address' members are mutable. A handler must change
|
||||
them by either modifying the lists place or replacing the old content
|
||||
with a new list.
|
||||
"""
|
||||
|
||||
def __init__(self, place: PlaceInfo):
|
||||
self.place: Final = place
|
||||
self.names = self._convert_name_dict(place.name)
|
||||
self.address = self._convert_name_dict(place.address)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _convert_name_dict(names: Optional[Mapping[str, str]]) -> List[PlaceName]:
|
||||
""" Convert a dictionary of names into a list of PlaceNames.
|
||||
The dictionary key is split into the primary part of the key
|
||||
and the suffix (the part after an optional colon).
|
||||
"""
|
||||
out = []
|
||||
|
||||
if names:
|
||||
for key, value in names.items():
|
||||
parts = key.split(':', 1)
|
||||
out.append(PlaceName(value.strip(),
|
||||
parts[0].strip(),
|
||||
parts[1].strip() if len(parts) > 1 else None))
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class SanitizerHandler(Protocol):
|
||||
""" Protocol for sanitizer modules.
|
||||
"""
|
||||
|
||||
def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||
"""
|
||||
A sanitizer must define a single function `create`. It takes the
|
||||
dictionary with the configuration information for the sanitizer and
|
||||
returns a function that transforms name and address.
|
||||
"""
|
||||
Reference in New Issue
Block a user