move text normalization into extra function

2026-02-26 11:08:13 +00:00 · 2023-06-22 10:48:05 +02:00
parent 08dcd05d7b
commit 3a21999a17
1 changed files with 9 additions and 1 deletions
--- a/nominatim/api/search/icu_tokenizer.py
+++ b/nominatim/api/search/icu_tokenizer.py
@@ -153,7 +153,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
        """
        log().section('Analyze query (using ICU tokenizer)')
        normalized = list(filter(lambda p: p.text,
-                                 (qmod.Phrase(p.ptype, self.normalizer.transliterate(p.text))
+                                 (qmod.Phrase(p.ptype, self.normalize_text(p.text))
                                  for p in phrases)))
        query = qmod.QueryStruct(normalized)
        log().var_dump('Normalized query', query.source)
@@ -187,6 +187,14 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
        return query
    def normalize_text(self, text: str) -> str:
        """ Bring the given text into a normalized form. That is the
            standardized form search will work with. All information removed
            at this stage is inevitably lost.
        """
        return cast(str, self.normalizer.transliterate(text))
    def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
        """ Transliterate the phrases and split them into tokens.