mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
generalize normalization step for search query
It is now possible to configure functions for changing the query input before it is analysed by the tokenizer. Code is a cleaned-up version of the implementation by @miku.
This commit is contained in:
0
src/nominatim_api/query_preprocessing/__init__.py
Normal file
0
src/nominatim_api/query_preprocessing/__init__.py
Normal file
32
src/nominatim_api/query_preprocessing/base.py
Normal file
32
src/nominatim_api/query_preprocessing/base.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Common data types and protocols for preprocessing.
|
||||
"""
|
||||
from typing import List, Callable
|
||||
|
||||
from ..typing import Protocol
|
||||
from ..search import query as qmod
|
||||
from .config import QueryConfig
|
||||
|
||||
QueryProcessingFunc = Callable[[List[qmod.Phrase]], List[qmod.Phrase]]
|
||||
|
||||
|
||||
class QueryHandler(Protocol):
|
||||
""" Protocol for query modules.
|
||||
"""
|
||||
def create(self, config: QueryConfig) -> QueryProcessingFunc:
|
||||
"""
|
||||
Create a function for sanitizing a place.
|
||||
Arguments:
|
||||
config: A dictionary with the additional configuration options
|
||||
specified in the tokenizer configuration
|
||||
normalizer: A instance to transliterate text
|
||||
Return:
|
||||
The result is a list modified by the preprocessor.
|
||||
"""
|
||||
pass
|
||||
34
src/nominatim_api/query_preprocessing/config.py
Normal file
34
src/nominatim_api/query_preprocessing/config.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Configuration for Sanitizers.
|
||||
"""
|
||||
from typing import Any, TYPE_CHECKING
|
||||
from collections import UserDict
|
||||
|
||||
# working around missing generics in Python < 3.8
|
||||
# See https://github.com/python/typing/issues/60#issuecomment-869757075
|
||||
if TYPE_CHECKING:
|
||||
_BaseUserDict = UserDict[str, Any]
|
||||
else:
|
||||
_BaseUserDict = UserDict
|
||||
|
||||
|
||||
class QueryConfig(_BaseUserDict):
|
||||
""" The `QueryConfig` class is a read-only dictionary
|
||||
with configuration options for the preprocessor.
|
||||
In addition to the usual dictionary functions, the class provides
|
||||
accessors to standard preprocessor options that are used by many of the
|
||||
preprocessors.
|
||||
"""
|
||||
|
||||
def set_normalizer(self, normalizer: Any) -> 'QueryConfig':
|
||||
""" Set the normalizer function to be used.
|
||||
"""
|
||||
self['_normalizer'] = normalizer
|
||||
|
||||
return self
|
||||
26
src/nominatim_api/query_preprocessing/normalize.py
Normal file
26
src/nominatim_api/query_preprocessing/normalize.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Normalize query test using an ICU transliterator.
|
||||
"""
|
||||
from typing import cast
|
||||
|
||||
from .config import QueryConfig
|
||||
from .base import QueryProcessingFunc
|
||||
from ..search.query import Phrase
|
||||
|
||||
|
||||
def create(config: QueryConfig) -> QueryProcessingFunc:
|
||||
normalizer = config.get('_normalizer')
|
||||
|
||||
if not normalizer:
|
||||
return lambda p: p
|
||||
|
||||
return lambda phrases: list(
|
||||
filter(lambda p: p.text,
|
||||
(Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
|
||||
for p in phrases)))
|
||||
Reference in New Issue
Block a user