generalize normalization step for search query

It is now possible to configure functions for changing the query
input before it is analysed by the tokenizer.

Code is a cleaned-up version of the implementation by @miku.
This commit is contained in:
Sarah Hoffmann
2024-12-13 11:53:10 +01:00
parent 046665f8d9
commit 2b87c016db
10 changed files with 167 additions and 9 deletions

View File

@@ -0,0 +1,26 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Normalize query test using an ICU transliterator.
"""
from typing import cast
from .config import QueryConfig
from .base import QueryProcessingFunc
from ..search.query import Phrase
def create(config: QueryConfig) -> QueryProcessingFunc:
normalizer = config.get('_normalizer')
if not normalizer:
return lambda p: p
return lambda phrases: list(
filter(lambda p: p.text,
(Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
for p in phrases)))