mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
move text normalization into extra function
This commit is contained in:
@@ -153,7 +153,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
"""
|
"""
|
||||||
log().section('Analyze query (using ICU tokenizer)')
|
log().section('Analyze query (using ICU tokenizer)')
|
||||||
normalized = list(filter(lambda p: p.text,
|
normalized = list(filter(lambda p: p.text,
|
||||||
(qmod.Phrase(p.ptype, self.normalizer.transliterate(p.text))
|
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
|
||||||
for p in phrases)))
|
for p in phrases)))
|
||||||
query = qmod.QueryStruct(normalized)
|
query = qmod.QueryStruct(normalized)
|
||||||
log().var_dump('Normalized query', query.source)
|
log().var_dump('Normalized query', query.source)
|
||||||
@@ -187,6 +187,14 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
return query
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_text(self, text: str) -> str:
|
||||||
|
""" Bring the given text into a normalized form. That is the
|
||||||
|
standardized form search will work with. All information removed
|
||||||
|
at this stage is inevitably lost.
|
||||||
|
"""
|
||||||
|
return cast(str, self.normalizer.transliterate(text))
|
||||||
|
|
||||||
|
|
||||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
|
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
|
||||||
""" Transliterate the phrases and split them into tokens.
|
""" Transliterate the phrases and split them into tokens.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user