mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
generalize normalization step for search query
It is now possible to configure functions for changing the query input before it is analysed by the tokenizer. Code is a cleaned-up version of the implementation by @miku.
This commit is contained in:
26
src/nominatim_api/query_preprocessing/normalize.py
Normal file
26
src/nominatim_api/query_preprocessing/normalize.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Normalize query test using an ICU transliterator.
|
||||
"""
|
||||
from typing import cast
|
||||
|
||||
from .config import QueryConfig
|
||||
from .base import QueryProcessingFunc
|
||||
from ..search.query import Phrase
|
||||
|
||||
|
||||
def create(config: QueryConfig) -> QueryProcessingFunc:
|
||||
normalizer = config.get('_normalizer')
|
||||
|
||||
if not normalizer:
|
||||
return lambda p: p
|
||||
|
||||
return lambda phrases: list(
|
||||
filter(lambda p: p.text,
|
||||
(Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
|
||||
for p in phrases)))
|
||||
Reference in New Issue
Block a user