only consider partials in multi-words for initial count

This ensures that it is less likely that we exclude meaningful words like 'hauptstrasse' just because they are frequent.
2026-02-16 15:47:58 +00:00 · 2021-06-26 11:57:09 +02:00
parent 5dd24b3ef0
commit b9fbfeff67
2 changed files with 3 additions and 3 deletions
--- a/test/python/test_tokenizer_legacy_icu.py
+++ b/test/python/test_tokenizer_legacy_icu.py
@@ -150,9 +150,8 @@ def test_init_word_table(tokenizer_factory, test_config, place_row, word_table):
    tok = tokenizer_factory()
    tok.init_new_db(test_config)

-    assert word_table.get_partial_words() == {('test', 1), ('52', 1),
+    assert word_table.get_partial_words() == {('test', 1),
                                              ('no', 1), ('area', 2),
-                                              ('holzstrasse', 1), ('holzstr', 1),
                                              ('holz', 1), ('strasse', 1),
                                              ('str', 1)}