mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-07 02:24:08 +00:00
Merge pull request #3995 from Itz-Agasta/exp
Add Language-aware country penalty in forward geocoding
This commit is contained in:
@@ -170,11 +170,20 @@ class ForwardGeocoder:
|
|||||||
if qword not in words:
|
if qword not in words:
|
||||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||||
distance += len(qword) if wdist < 0.4 else 1
|
distance += len(qword) if wdist < 0.4 else 1
|
||||||
# Compensate for the fact that country names do not get a
|
# Countries with high importance can dominate results when matched
|
||||||
# match penalty yet by the tokenizer.
|
# via an alternate-language name. Apply a language-aware penalty
|
||||||
# Temporary hack that needs to be removed!
|
# to offset this.
|
||||||
if result.rank_address == 4:
|
if result.rank_address == 4:
|
||||||
distance *= 2
|
if self.params.locales and result.names:
|
||||||
|
loc_names = [result.names[t] for t in self.params.locales.name_tags
|
||||||
|
if t in result.names]
|
||||||
|
if loc_names:
|
||||||
|
norm_loc = self.query_analyzer.normalize_text(' '.join(loc_names))
|
||||||
|
loc_words = set(w for w in re.split('[-,: ]+', norm_loc) if w)
|
||||||
|
if loc_words and loc_words.isdisjoint(qwords):
|
||||||
|
result.accuracy += result.calculated_importance() * 0.5
|
||||||
|
else:
|
||||||
|
distance *= 2
|
||||||
result.accuracy += distance * 0.3 / sum(len(w) for w in qwords)
|
result.accuracy += distance * 0.3 / sum(len(w) for w in qwords)
|
||||||
|
|
||||||
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
Complex datatypes used by the Nominatim API.
|
Complex datatypes used by the Nominatim API.
|
||||||
"""
|
"""
|
||||||
from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
|
from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
|
||||||
Any, List, Sequence
|
Any, List, Sequence, TYPE_CHECKING
|
||||||
from collections import abc
|
from collections import abc
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
@@ -17,6 +17,8 @@ import math
|
|||||||
from struct import unpack
|
from struct import unpack
|
||||||
from binascii import unhexlify
|
from binascii import unhexlify
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .localization import Locales
|
||||||
from .errors import UsageError
|
from .errors import UsageError
|
||||||
|
|
||||||
|
|
||||||
@@ -573,6 +575,13 @@ class SearchDetails(LookupDetails):
|
|||||||
|
|
||||||
viewbox_x2: Optional[Bbox] = None
|
viewbox_x2: Optional[Bbox] = None
|
||||||
|
|
||||||
|
locales: Optional['Locales'] = dataclasses.field(
|
||||||
|
default=None, metadata={'transform': lambda v: v})
|
||||||
|
""" Locale preferences of the caller.
|
||||||
|
Used during result re-ranking to prefer results that match the
|
||||||
|
caller's locale over results that only match in an alternate language.
|
||||||
|
"""
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
def __post_init__(self) -> None:
|
||||||
if self.viewbox is not None:
|
if self.viewbox is not None:
|
||||||
xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
|
xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
|
||||||
|
|||||||
@@ -334,6 +334,8 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
|||||||
details['layers'] = DataLayer.ADDRESS
|
details['layers'] = DataLayer.ADDRESS
|
||||||
else:
|
else:
|
||||||
details['layers'] = get_layers(params)
|
details['layers'] = get_layers(params)
|
||||||
|
details['locales'] = Locales.from_accept_languages(get_accepted_languages(params),
|
||||||
|
params.config().OUTPUT_NAMES)
|
||||||
|
|
||||||
# unstructured query parameters
|
# unstructured query parameters
|
||||||
query = params.get('q', None)
|
query = params.get('q', None)
|
||||||
@@ -359,8 +361,7 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
|||||||
except UsageError as err:
|
except UsageError as err:
|
||||||
params.raise_error(str(err))
|
params.raise_error(str(err))
|
||||||
|
|
||||||
Locales.from_accept_languages(get_accepted_languages(params),
|
details['locales'].localize_results(results)
|
||||||
params.config().OUTPUT_NAMES).localize_results(results)
|
|
||||||
|
|
||||||
if details['dedupe'] and len(results) > 1:
|
if details['dedupe'] and len(results) > 1:
|
||||||
results = helpers.deduplicate_results(results, max_results)
|
results = helpers.deduplicate_results(results, max_results)
|
||||||
|
|||||||
@@ -80,3 +80,23 @@ Feature: Searching of simple objects
|
|||||||
| Chicago | Illinois | IL |
|
| Chicago | Illinois | IL |
|
||||||
| Auburn | Alabama | AL |
|
| Auburn | Alabama | AL |
|
||||||
| New Orleans | Louisiana | LA |
|
| New Orleans | Louisiana | LA |
|
||||||
|
|
||||||
|
# github #3210
|
||||||
|
Scenario: Country with alternate-language name does not dominate when locale differs
|
||||||
|
Given the 1.0 grid with origin DE
|
||||||
|
| 1 | | 2 |
|
||||||
|
| | 10 | |
|
||||||
|
| 4 | | 3 |
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | admin | name+name | name+name:fi | name+name:de | country | geometry |
|
||||||
|
| R1 | boundary | administrative | 2 | Turgei | Turgi | Testland | de | (1,2,3,4,1) |
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | name+name | geometry |
|
||||||
|
| N10 | place | village | Turgi | 10 |
|
||||||
|
When importing
|
||||||
|
And geocoding "Turgi"
|
||||||
|
| accept-language |
|
||||||
|
| de |
|
||||||
|
Then result 0 contains
|
||||||
|
| object |
|
||||||
|
| N10 |
|
||||||
|
|||||||
Reference in New Issue
Block a user