move text normalization into extra function

This commit is contained in:
Sarah Hoffmann
2023-06-22 10:48:05 +02:00
parent 08dcd05d7b
commit 3a21999a17

View File

@@ -153,7 +153,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
"""
log().section('Analyze query (using ICU tokenizer)')
normalized = list(filter(lambda p: p.text,
(qmod.Phrase(p.ptype, self.normalizer.transliterate(p.text))
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
for p in phrases)))
query = qmod.QueryStruct(normalized)
log().var_dump('Normalized query', query.source)
@@ -187,6 +187,14 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
return query
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form. That is the
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
return cast(str, self.normalizer.transliterate(text))
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
""" Transliterate the phrases and split them into tokens.