refactor variant production to use generators

This commit is contained in:
Sarah Hoffmann
2022-01-11 17:51:05 +01:00
parent 21156fc2a2
commit 630ad38a67

View File

@@ -176,14 +176,26 @@ class GenericTokenAnalysis:
""" Compute the spelling variants for the given normalized name """ Compute the spelling variants for the given normalized name
and transliterate the result. and transliterate the result.
""" """
results = set()
for variant in self._generate_word_variants(norm_name):
if not self.variant_only or variant.strip() != norm_name:
trans_name = self.to_ascii.transliterate(variant).strip()
if trans_name:
results.add(trans_name)
return list(results)
def _generate_word_variants(self, norm_name):
baseform = '^ ' + norm_name + ' ^' baseform = '^ ' + norm_name + ' ^'
baselen = len(baseform)
partials = [''] partials = ['']
startpos = 0 startpos = 0
if self.replacements is not None: if self.replacements is not None:
pos = 0 pos = 0
force_space = False force_space = False
while pos < len(baseform): while pos < baselen:
full, repl = self.replacements.longest_prefix_item(baseform[pos:], full, repl = self.replacements.longest_prefix_item(baseform[pos:],
(None, None)) (None, None))
if full is not None: if full is not None:
@@ -207,24 +219,9 @@ class GenericTokenAnalysis:
# No variants detected? Fast return. # No variants detected? Fast return.
if startpos == 0: if startpos == 0:
if self.variant_only: return (norm_name, )
return []
trans_name = self.to_ascii.transliterate(norm_name).strip() if startpos < baselen:
return [trans_name] if trans_name else [] return (part[1:] + baseform[startpos:-1] for part in partials)
return self._compute_result_set(partials, baseform[startpos:], return (part[1:-1] for part in partials)
norm_name if self.variant_only else '')
def _compute_result_set(self, partials, prefix, exclude):
results = set()
for variant in partials:
vname = (variant + prefix)[1:-1].strip()
if vname != exclude:
trans_name = self.to_ascii.transliterate(vname).strip()
if trans_name:
results.add(trans_name)
return list(results)