remove documentation around legacy tokenizer

2026-02-16 15:47:58 +00:00 · 2024-09-21 18:27:01 +02:00
parent b54ff7d766
commit 4825a0bda3
7 changed files with 11 additions and 212 deletions
--- a/settings/env.defaults
+++ b/settings/env.defaults
@@ -18,12 +18,6 @@ NOMINATIM_DATABASE_WEBUSER="www-data"
 # Currently available tokenizers: icu, legacy
 NOMINATIM_TOKENIZER="icu"

-# Number of occurrences of a word before it is considered frequent.
-# Similar to the concept of stop words. Frequent partial words get ignored
-# or handled differently during search.
-# Changing this value requires a reimport.
-NOMINATIM_MAX_WORD_FREQUENCY=50000
-
 # If true, admin level changes on places with many contained children are blocked.
 NOMINATIM_LIMIT_REINDEXING=yes

@@ -34,12 +28,6 @@ NOMINATIM_LIMIT_REINDEXING=yes
 # Currently only affects the initial import of country names and special phrases.
 NOMINATIM_LANGUAGES=

-# Rules for normalizing terms for comparisons.
-# The default is to remove accents and punctuation and to lower-case the
-# term. Spaces are kept but collapsed to one standard space.
-# Changing this value requires a reimport.
-NOMINATIM_TERM_NORMALIZATION=":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >;  :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();"
-
 # Configuration file for the tokenizer.
 # The content depends on the tokenizer used. If left empty the default settings
 # for the chosen tokenizer will be used. The configuration can only be set