move generation of normalized token form to analyzer

This gives the analyzer more flexibility in choosing the normalized
form. In particular, an analyzer creating different variants can choose
the variant that will be used as the canonical form.
This commit is contained in:
Sarah Hoffmann
2022-02-15 12:15:18 +01:00
parent 691ec08586
commit 837d44391c
5 changed files with 21 additions and 9 deletions

View File

@@ -32,8 +32,9 @@ def make_analyser(*variants, variant_only=False):
rules['mode'] = 'variant-only'
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
return module.create(trans, config)
return module.create(norm, trans, config)
def get_normalized_variants(proc, name):
@@ -45,8 +46,9 @@ def test_no_variants():
rules = { 'analyzer': 'generic' }
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
proc = module.create(trans, config)
proc = module.create(norm, trans, config)
assert get_normalized_variants(proc, '大德!') == ['dà dé']

View File

@@ -33,8 +33,9 @@ class TestMutationNoVariants:
}
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
self.analysis = module.create(trans, config)
self.analysis = module.create(norm, trans, config)
def variants(self, name):