mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 21:34:06 +00:00
Adds language-aware country penalty in forward geocoding
Threads the caller's Accept-Language preference into ForwardGeocoder and uses it in rerank_by_query to check whether the query matches the localized name of a country result. If the caller's locale renders the country name differently (e.g. pt-BR gives "Brasil" ≠ "Brasilia"), the country's importance is added as an accuracy penalty, neutralising its dominance over lower-ranked places. If the locale matches (e.g. Finnish gives "Brasilia" = "Brasilia"), no penalty is applied and the country correctly wins.
This commit is contained in:
@@ -170,11 +170,20 @@ class ForwardGeocoder:
|
||||
if qword not in words:
|
||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||
distance += len(qword) if wdist < 0.4 else 1
|
||||
# Compensate for the fact that country names do not get a
|
||||
# match penalty yet by the tokenizer.
|
||||
# Temporary hack that needs to be removed!
|
||||
# Countries with high importance can dominate results when matched
|
||||
# via an alternate-language name. Apply a language-aware penalty
|
||||
# to offset this.
|
||||
if result.rank_address == 4:
|
||||
distance *= 2
|
||||
if self.params.locales and result.names:
|
||||
loc_names = [result.names[t] for t in self.params.locales.name_tags
|
||||
if t in result.names]
|
||||
if loc_names:
|
||||
norm_loc = self.query_analyzer.normalize_text(' '.join(loc_names))
|
||||
loc_words = set(w for w in re.split('[-,: ]+', norm_loc) if w)
|
||||
if loc_words and loc_words.isdisjoint(qwords):
|
||||
result.accuracy += result.calculated_importance() * 0.5
|
||||
else:
|
||||
distance *= 2
|
||||
result.accuracy += distance * 0.3 / sum(len(w) for w in qwords)
|
||||
|
||||
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
||||
|
||||
Reference in New Issue
Block a user