forked from hans/Nominatim
complete tests for icu tokenizer
This commit is contained in:
@@ -16,12 +16,14 @@ def cfgfile(tmp_path, suffix='.yaml'):
|
||||
content = dedent("""\
|
||||
normalization:
|
||||
- ":: NFD ()"
|
||||
- "'🜳' > ' '"
|
||||
- "[[:Nonspacing Mark:] [:Cf:]] >"
|
||||
- ":: lower ()"
|
||||
- "[[:Punctuation:][:Space:]]+ > ' '"
|
||||
- ":: NFC ()"
|
||||
transliteration:
|
||||
- ":: Latin ()"
|
||||
- "'🜵' > ' '"
|
||||
""")
|
||||
content += "compound_suffixes:\n"
|
||||
content += '\n'.join((" - " + s for s in suffixes)) + '\n'
|
||||
@@ -52,6 +54,17 @@ def test_simple_variants(cfgfile):
|
||||
assert get_normalized_variants(proc, "hallo") == ['hallo']
|
||||
|
||||
|
||||
def test_variants_empty(cfgfile):
|
||||
fpath = cfgfile([], ['saint => 🜵', 'street => st'])
|
||||
|
||||
rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
|
||||
proc = ICUNameProcessor(rules)
|
||||
|
||||
assert get_normalized_variants(proc, '🜵') == []
|
||||
assert get_normalized_variants(proc, '🜳') == []
|
||||
assert get_normalized_variants(proc, 'saint') == ['saint']
|
||||
|
||||
|
||||
def test_multiple_replacements(cfgfile):
|
||||
fpath = cfgfile([], ['saint => s,st', 'street => st'])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user