From 831fccdaee7deeb1af8bbcd0d802eafe9d4dd92d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 31 May 2025 09:45:28 +0200 Subject: [PATCH 1/5] add FAA codes (US version of IATA codes) for airports --- lib-lua/themes/nominatim/presets.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib-lua/themes/nominatim/presets.lua b/lib-lua/themes/nominatim/presets.lua index 00ff0f52..2bccc3af 100644 --- a/lib-lua/themes/nominatim/presets.lua +++ b/lib-lua/themes/nominatim/presets.lua @@ -332,7 +332,7 @@ module.NAME_TAGS.core = {main = {'name', 'name:*', } module.NAME_TAGS.address = {house = {'addr:housename'}} module.NAME_TAGS.poi = group_merge({main = {'brand'}, - extra = {'iata', 'icao'}}, + extra = {'iata', 'icao', 'faa'}}, module.NAME_TAGS.core) -- Address tagging From f2236f68f18c3084b8a5810dd3cfe7d642a2b51d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 1 Jun 2025 11:53:23 +0200 Subject: [PATCH 2/5] when rematching only distinguish between perfect, somewhat and bad match --- src/nominatim_api/search/geocoder.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index 5fefe5ea..dfc6bc52 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -153,11 +153,10 @@ class ForwardGeocoder: if not words: continue for qword in qwords: - wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words) - if wdist < 0.5: - distance += len(qword) - else: - distance += (1.0 - wdist) * len(qword) + # only add distance penalty if there is no perfect match + if qword not in words: + wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words) + distance += len(qword) if wdist < 0.4 else 1 # Compensate for the fact that country names do not get a # match penalty yet by the tokenizer. # Temporary hack that needs to be removed! From 10a7d1106d52ddb32598cef46b8c58bbf841566a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 1 Jun 2025 11:54:21 +0200 Subject: [PATCH 3/5] reduce influence of query rematching a little bit --- src/nominatim_api/search/geocoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index dfc6bc52..999a4289 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -162,7 +162,7 @@ class ForwardGeocoder: # Temporary hack that needs to be removed! if result.rank_address == 4: distance *= 2 - result.accuracy += distance * 0.4 / sum(len(w) for w in qwords) + result.accuracy += distance * 0.3 / sum(len(w) for w in qwords) async def lookup_pois(self, categories: List[Tuple[str, str]], phrases: List[Phrase]) -> SearchResults: From 90050de717928d70305a55e8e4ba3793e918af38 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 1 Jun 2025 11:55:27 +0200 Subject: [PATCH 4/5] only rerank results if there is more than one With one result order is obvious. --- src/nominatim_api/search/geocoder.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index 999a4289..abf1a992 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -210,9 +210,10 @@ class ForwardGeocoder: results = self.pre_filter_results(results) await add_result_details(self.conn, results, self.params) log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results)) - self.rerank_by_query(query, results) - log().result_dump('Results after reranking', ((r.accuracy, r) for r in results)) - results = self.sort_and_cut_results(results) + if len(results) > 1: + self.rerank_by_query(query, results) + log().result_dump('Results after reranking', ((r.accuracy, r) for r in results)) + results = self.sort_and_cut_results(results) log().result_dump('Final Results', ((r.accuracy, r) for r in results)) return results From 87a8c246a097cd9c218eb87871ba059d0435965a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 1 Jun 2025 12:00:36 +0200 Subject: [PATCH 5/5] improve result cutting when a POI comes out with top importance --- src/nominatim_api/search/geocoder.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index abf1a992..b48e96e8 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -118,17 +118,20 @@ class ForwardGeocoder: """ Remove badly matching results, sort by ranking and limit to the configured number of results. """ - if results: - results.sort(key=lambda r: (r.ranking, 0 if r.bbox is None else -r.bbox.area)) - min_rank = results[0].rank_search - min_ranking = results[0].ranking - results = SearchResults(r for r in results - if (r.ranking + 0.03 * (r.rank_search - min_rank) - < min_ranking + 0.5)) + results.sort(key=lambda r: (r.ranking, 0 if r.bbox is None else -r.bbox.area)) - results = SearchResults(results[:self.limit]) + final = SearchResults() + min_rank = results[0].rank_search + min_ranking = results[0].ranking - return results + for r in results: + if r.ranking + 0.03 * (r.rank_search - min_rank) < min_ranking + 0.5: + final.append(r) + min_rank = min(r.rank_search, min_rank) + if len(final) == self.limit: + break + + return final def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None: """ Adjust the accuracy of the localized result according to how well