only consider partials in multi-words for initial count

This ensures that it is less likely that we exclude meaningful words like 'hauptstrasse' just because they are frequent.
2026-02-26 11:08:13 +00:00 · 2021-06-26 11:57:09 +02:00
parent 5dd24b3ef0
commit b9fbfeff67
2 changed files with 3 additions and 3 deletions
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@ -168,7 +168,8 @@ class LegacyICUTokenizer:
                for name, cnt in cur:
                    terms = set()
                    for word in name_proc.get_variants_ascii(name_proc.get_normalized(name)):
-                        terms.update(word.split())
+                        if ' ' in word:
+                            terms.update(word.split())
                    for term in terms:
                        words[term] += cnt