diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index 3b039342..1fb67cf1 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -170,11 +170,20 @@ class ForwardGeocoder: if qword not in words: wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words) distance += len(qword) if wdist < 0.4 else 1 - # Compensate for the fact that country names do not get a - # match penalty yet by the tokenizer. - # Temporary hack that needs to be removed! + # Countries with high importance can dominate results when matched + # via an alternate-language name. Apply a language-aware penalty + # to offset this. if result.rank_address == 4: - distance *= 2 + if self.params.locales and result.names: + loc_names = [result.names[t] for t in self.params.locales.name_tags + if t in result.names] + if loc_names: + norm_loc = self.query_analyzer.normalize_text(' '.join(loc_names)) + loc_words = set(w for w in re.split('[-,: ]+', norm_loc) if w) + if loc_words and loc_words.isdisjoint(qwords): + result.accuracy += result.calculated_importance() * 0.5 + else: + distance *= 2 result.accuracy += distance * 0.3 / sum(len(w) for w in qwords) async def lookup_pois(self, categories: List[Tuple[str, str]], diff --git a/src/nominatim_api/types.py b/src/nominatim_api/types.py index 92c2b6b9..a9fd29a4 100644 --- a/src/nominatim_api/types.py +++ b/src/nominatim_api/types.py @@ -8,7 +8,7 @@ Complex datatypes used by the Nominatim API. """ from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \ - Any, List, Sequence + Any, List, Sequence, TYPE_CHECKING from collections import abc import dataclasses import datetime as dt @@ -17,6 +17,8 @@ import math from struct import unpack from binascii import unhexlify +if TYPE_CHECKING: + from .localization import Locales from .errors import UsageError @@ -573,6 +575,13 @@ class SearchDetails(LookupDetails): viewbox_x2: Optional[Bbox] = None + locales: Optional['Locales'] = dataclasses.field( + default=None, metadata={'transform': lambda v: v}) + """ Locale preferences of the caller. + Used during result re-ranking to prefer results that match the + caller's locale over results that only match in an alternate language. + """ + def __post_init__(self) -> None: if self.viewbox is not None: xext = (self.viewbox.maxlon - self.viewbox.minlon)/2 diff --git a/src/nominatim_api/v1/server_glue.py b/src/nominatim_api/v1/server_glue.py index c02a1307..995da8d1 100644 --- a/src/nominatim_api/v1/server_glue.py +++ b/src/nominatim_api/v1/server_glue.py @@ -334,6 +334,8 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: details['layers'] = DataLayer.ADDRESS else: details['layers'] = get_layers(params) + details['locales'] = Locales.from_accept_languages(get_accepted_languages(params), + params.config().OUTPUT_NAMES) # unstructured query parameters query = params.get('q', None) @@ -359,8 +361,7 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: except UsageError as err: params.raise_error(str(err)) - Locales.from_accept_languages(get_accepted_languages(params), - params.config().OUTPUT_NAMES).localize_results(results) + details['locales'].localize_results(results) if details['dedupe'] and len(results) > 1: results = helpers.deduplicate_results(results, max_results) diff --git a/test/bdd/features/db/query/search_simple.feature b/test/bdd/features/db/query/search_simple.feature index 10a26ed0..e5e771f7 100644 --- a/test/bdd/features/db/query/search_simple.feature +++ b/test/bdd/features/db/query/search_simple.feature @@ -80,3 +80,23 @@ Feature: Searching of simple objects | Chicago | Illinois | IL | | Auburn | Alabama | AL | | New Orleans | Louisiana | LA | + + # github #3210 + Scenario: Country with alternate-language name does not dominate when locale differs + Given the 1.0 grid with origin DE + | 1 | | 2 | + | | 10 | | + | 4 | | 3 | + Given the places + | osm | class | type | admin | name+name | name+name:fi | name+name:de | country | geometry | + | R1 | boundary | administrative | 2 | Turgei | Turgi | Testland | de | (1,2,3,4,1) | + Given the places + | osm | class | type | name+name | geometry | + | N10 | place | village | Turgi | 10 | + When importing + And geocoding "Turgi" + | accept-language | + | de | + Then result 0 contains + | object | + | N10 |