complete tests for icu tokenizer

This commit is contained in:
Sarah Hoffmann
2021-06-10 17:18:23 +02:00
parent 32ca631b74
commit 9ff4f66f55
6 changed files with 205 additions and 48 deletions

View File

@@ -101,10 +101,19 @@ class ICUNameProcessor:
else:
pos += 1
if startpos == 0:
return [self.to_ascii.transliterate(norm_name)]
results = []
return [self.to_ascii.transliterate(v + baseform[startpos:pos]).strip() for v in variants]
if startpos == 0:
trans_name = self.to_ascii.transliterate(norm_name).strip()
if trans_name:
results.append(trans_name)
else:
for variant in variants:
trans_name = self.to_ascii.transliterate(variant + baseform[startpos:pos]).strip()
if trans_name:
results.append(trans_name)
return results
def get_search_normalized(self, name):