unify ICUNameProcessorRules and ICURuleLoader

There is no need for the additional layer of indirection that
the ICUNameProcessorRules class adds. The ICURuleLoader can
fill the database properties directly.
This commit is contained in:
Sarah Hoffmann
2021-09-29 17:37:04 +02:00
parent 5e5addcdbf
commit 16daa57e47
14 changed files with 123 additions and 137 deletions

View File

@@ -4,15 +4,17 @@ Tests for import name normalisation and variant generation.
from textwrap import dedent
import pytest
import yaml
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules
from nominatim.errors import UsageError
@pytest.fixture
def cfgfile():
def cfgfile(def_config, tmp_path):
project_dir = tmp_path / 'project_dir'
project_dir.mkdir()
def_config.project_dir = project_dir
def _create_config(*variants, **kwargs):
content = dedent("""\
normalization:
@@ -30,7 +32,9 @@ def cfgfile():
content += '\n'.join((" - " + s for s in variants)) + '\n'
for k, v in kwargs:
content += " {}: {}\n".format(k, v)
return yaml.safe_load(content)
(project_dir / 'icu_tokenizer.yaml').write_text(content)
return def_config
return _create_config
@@ -40,10 +44,9 @@ def get_normalized_variants(proc, name):
def test_variants_empty(cfgfile):
fpath = cfgfile('saint -> 🜵', 'street -> st')
config = cfgfile('saint -> 🜵', 'street -> st')
rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
proc = ICUNameProcessor(rules)
proc = ICURuleLoader(config).make_token_analysis()
assert get_normalized_variants(proc, '🜵') == []
assert get_normalized_variants(proc, '🜳') == []
@@ -83,8 +86,8 @@ VARIANT_TESTS = [
@pytest.mark.parametrize("rules,name,variants", VARIANT_TESTS)
def test_variants(cfgfile, rules, name, variants):
fpath = cfgfile(*rules)
proc = ICUNameProcessor(ICUNameProcessorRules(loader=ICURuleLoader(fpath)))
config = cfgfile(*rules)
proc = ICURuleLoader(config).make_token_analysis()
result = get_normalized_variants(proc, name)
@@ -93,10 +96,8 @@ def test_variants(cfgfile, rules, name, variants):
def test_search_normalized(cfgfile):
fpath = cfgfile('~street => s,st', 'master => mstr')
rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
proc = ICUNameProcessor(rules)
config = cfgfile('~street => s,st', 'master => mstr')
proc = ICURuleLoader(config).make_token_analysis()
assert proc.get_search_normalized('Master Street') == 'master street'
assert proc.get_search_normalized('Earnes St') == 'earnes st'