mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
improve penalty for token-split words
The rematch penalty for partial words created by the transliteration need to take into account that they are rematched against the full word. That means that missing beginning and end should not get a significant penalty.
This commit is contained in:
@@ -83,7 +83,7 @@ class ICUToken(qmod.Token):
|
||||
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
|
||||
distance = 0
|
||||
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
|
||||
if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
|
||||
if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
|
||||
distance += 1
|
||||
elif tag == 'replace':
|
||||
distance += max((ato-afrom), (bto-bfrom))
|
||||
|
||||
Reference in New Issue
Block a user