From 9c2d4f4285575e679e2bf625ba8bd175112c738f Mon Sep 17 00:00:00 2001
From: Itz-Agasta <rupamgolui69@gmail.com>
Date: Fri, 20 Feb 2026 22:27:30 +0530
Subject: [PATCH 1/2] Adds language-aware country penalty in forward geocoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Threads the caller's Accept-Language preference into ForwardGeocoder
and uses it in rerank_by_query to check whether the query matches the
localized name of a country result.

If the caller's locale renders the country name differently (e.g. pt-BR
gives "Brasil" ≠ "Brasilia"), the country's importance is added as an
accuracy penalty, neutralising its dominance over lower-ranked places.

If the locale matches (e.g. Finnish gives "Brasilia" = "Brasilia"),
no penalty is applied and the country correctly wins.
---
 src/nominatim_api/search/geocoder.py | 17 +++++++++++++----
 src/nominatim_api/types.py           | 11 ++++++++++-
 src/nominatim_api/v1/server_glue.py  |  5 +++--
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py
index 3b039342..1fb67cf1 100644
--- a/src/nominatim_api/search/geocoder.py
+++ b/src/nominatim_api/search/geocoder.py
@@ -170,11 +170,20 @@ class ForwardGeocoder:
                 if qword not in words:
                     wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
                     distance += len(qword) if wdist < 0.4 else 1
-            # Compensate for the fact that country names do not get a
-            # match penalty yet by the tokenizer.
-            # Temporary hack that needs to be removed!
+            # Countries with high importance can dominate results when matched
+            # via an alternate-language name. Apply a language-aware penalty
+            # to offset this.
             if result.rank_address == 4:
-                distance *= 2
+                if self.params.locales and result.names:
+                    loc_names = [result.names[t] for t in self.params.locales.name_tags
+                                 if t in result.names]
+                    if loc_names:
+                        norm_loc = self.query_analyzer.normalize_text(' '.join(loc_names))
+                        loc_words = set(w for w in re.split('[-,: ]+', norm_loc) if w)
+                        if loc_words and loc_words.isdisjoint(qwords):
+                            result.accuracy += result.calculated_importance() * 0.5
+                else:
+                    distance *= 2
             result.accuracy += distance * 0.3 / sum(len(w) for w in qwords)
 
     async def lookup_pois(self, categories: List[Tuple[str, str]],
diff --git a/src/nominatim_api/types.py b/src/nominatim_api/types.py
index 92c2b6b9..a9fd29a4 100644
--- a/src/nominatim_api/types.py
+++ b/src/nominatim_api/types.py
@@ -8,7 +8,7 @@
 Complex datatypes used by the Nominatim API.
 """
 from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
-                   Any, List, Sequence
+                   Any, List, Sequence, TYPE_CHECKING
 from collections import abc
 import dataclasses
 import datetime as dt
@@ -17,6 +17,8 @@ import math
 from struct import unpack
 from binascii import unhexlify
 
+if TYPE_CHECKING:
+    from .localization import Locales
 from .errors import UsageError
 
 
@@ -573,6 +575,13 @@ class SearchDetails(LookupDetails):
 
     viewbox_x2: Optional[Bbox] = None
 
+    locales: Optional['Locales'] = dataclasses.field(
+        default=None, metadata={'transform': lambda v: v})
+    """ Locale preferences of the caller.
+        Used during result re-ranking to prefer results that match the
+        caller's locale over results that only match in an alternate language.
+    """
+
     def __post_init__(self) -> None:
         if self.viewbox is not None:
             xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
diff --git a/src/nominatim_api/v1/server_glue.py b/src/nominatim_api/v1/server_glue.py
index c02a1307..995da8d1 100644
--- a/src/nominatim_api/v1/server_glue.py
+++ b/src/nominatim_api/v1/server_glue.py
@@ -334,6 +334,8 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
         details['layers'] = DataLayer.ADDRESS
     else:
         details['layers'] = get_layers(params)
+    details['locales'] = Locales.from_accept_languages(get_accepted_languages(params),
+                                                       params.config().OUTPUT_NAMES)
 
     # unstructured query parameters
     query = params.get('q', None)
@@ -359,8 +361,7 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
     except UsageError as err:
         params.raise_error(str(err))
 
-    Locales.from_accept_languages(get_accepted_languages(params),
-                                  params.config().OUTPUT_NAMES).localize_results(results)
+    details['locales'].localize_results(results)
 
     if details['dedupe'] and len(results) > 1:
         results = helpers.deduplicate_results(results, max_results)

From 36a364ec25876603d477c2cf02cfdb6c39b0b824 Mon Sep 17 00:00:00 2001
From: Itz-Agasta <rupamgolui69@gmail.com>
Date: Mon, 2 Mar 2026 12:36:45 +0530
Subject: [PATCH 2/2] Adds test for locale-sensitive country name matching

Introduces a scenario to verify that a country's alternate-language name
does not dominate search results when the requested locale differs.
Ensures correct result selection for locale-aware geocoding.

Relates to #3210
---
 .../features/db/query/search_simple.feature   | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/bdd/features/db/query/search_simple.feature b/test/bdd/features/db/query/search_simple.feature
index 10a26ed0..e5e771f7 100644
--- a/test/bdd/features/db/query/search_simple.feature
+++ b/test/bdd/features/db/query/search_simple.feature
@@ -80,3 +80,23 @@ Feature: Searching of simple objects
         | Chicago     | Illinois  | IL  |
         | Auburn      | Alabama   | AL  |
         | New Orleans | Louisiana | LA  |
+
+    # github #3210
+    Scenario: Country with alternate-language name does not dominate when locale differs
+        Given the 1.0 grid with origin DE
+         | 1 |    | 2 |
+         |   | 10 |   |
+         | 4 |    | 3 |
+        Given the places
+         | osm  | class    | type           | admin | name+name | name+name:fi | name+name:de | country | geometry    |
+         | R1   | boundary | administrative | 2     | Turgei    | Turgi        | Testland     | de      | (1,2,3,4,1) |
+        Given the places
+         | osm  | class | type    | name+name | geometry |
+         | N10  | place | village | Turgi     | 10       |
+        When importing
+        And geocoding "Turgi"
+         | accept-language |
+         | de              |
+        Then result 0 contains
+         | object |
+         | N10    |