introduce generic YAML config loader

Adds a function to the Configuration class to load a YAML
file. This means that searching for the file is generalised
and works the same now for all configuration files. Changes
the search logic, so that it is always possible to have a
custom version of the configuration file in the project
directory.

Move ICU tokenizer to use new load function.
This commit is contained in:
Sarah Hoffmann
2021-09-03 18:16:12 +02:00
parent 18554dfed7
commit 1c42780bb5
6 changed files with 139 additions and 82 deletions

View File

@@ -8,7 +8,6 @@ import json
import logging
import re
from textwrap import dedent
from pathlib import Path
from nominatim.db.connection import connect
from nominatim.db.properties import set_property, get_property
@@ -49,12 +48,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
This copies all necessary data in the project directory to make
sure the tokenizer remains stable even over updates.
"""
if config.TOKENIZER_CONFIG:
cfgfile = Path(config.TOKENIZER_CONFIG)
else:
cfgfile = config.config_dir / 'icu_tokenizer.yaml'
loader = ICURuleLoader(cfgfile)
loader = ICURuleLoader(config.load_sub_configuration('icu_tokenizer.yaml',
config='TOKENIZER_CONFIG'))
self.naming_rules = ICUNameProcessorRules(loader=loader)
self.term_normalization = config.TERM_NORMALIZATION
self.max_word_frequency = config.MAX_WORD_FREQUENCY