diff --git a/src/nominatim_api/search/db_search_builder.py b/src/nominatim_api/search/db_search_builder.py index ae69e4ae..f90c6d7f 100644 --- a/src/nominatim_api/search/db_search_builder.py +++ b/src/nominatim_api/search/db_search_builder.py @@ -187,7 +187,7 @@ class SearchBuilder: dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny)] sdata.housenumbers = dbf.WeightedStrings([], []) - yield dbs.PlaceSearch(0.05, sdata, expected_count, True) + yield dbs.PlaceSearch(0.0, sdata, expected_count, True) def build_name_search(self, sdata: dbf.SearchData, name: qmod.TokenRange, address: List[qmod.TokenRange], @@ -342,7 +342,10 @@ class SearchBuilder: heapq.heappush(todo, (-tlist.end, tlist.end, rank.with_token(t, chgpenalty))) elif tlist.end == trange.end: - ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens) + chgpenalty = self.query.get_in_word_penalty( + qmod.TokenRange(pos, tlist.end)) + ranks.extend(rank.with_token(t, chgpenalty) + for t in tlist.tokens) if len(ranks) >= 10: # Too many variants, bail out and only add diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index 3fcb4101..3b039342 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -80,7 +80,7 @@ class ForwardGeocoder: qs = self.params.query_stats qs['search_min_penalty'] = round(searches[0].penalty, 2) - min_ranking = searches[0].penalty + 2.0 + min_ranking = searches[0].penalty + 1.5 prev_penalty = 0.0 for i, search in enumerate(searches): if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 15): @@ -103,7 +103,9 @@ class ForwardGeocoder: qs['search_min_result_penalty'] = spenalty qs['search_best_penalty_round'] = i results[rhash] = result - min_ranking = min(min_ranking, result.accuracy * 1.2, 2.0) + min_ranking = min(min_ranking, + search.penalty + 0.4, + result.accuracy + 0.1) log().result_dump('Results', ((r.accuracy, r) for r in lookup_results)) prev_penalty = search.penalty if self.timeout.is_elapsed(): diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py index 4be15d67..4ab85fd3 100644 --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@ -202,7 +202,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1]) query.add_token(qmod.TokenRange(start, end), qmod.TOKEN_POSTCODE, - ICUToken(penalty=0.1, token=0, count=1, addr_count=1, + ICUToken(penalty=0.0, token=0, count=1, addr_count=1, lookup_word=pc, word_token=term, info=None)) self.rerank_tokens(query) @@ -288,7 +288,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): if need_hnr and is_full_token \ and len(node.term_normalized) <= 4 and node.term_normalized.isdigit(): query.add_token(qmod.TokenRange(i-1, i), qmod.TOKEN_HOUSENUMBER, - ICUToken(penalty=0.5, token=0, + ICUToken(penalty=0.2, token=0, count=1, addr_count=1, lookup_word=node.term_lookup, word_token=node.term_lookup, info=None)) @@ -309,6 +309,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): len(query.nodes[end].term_lookup) > 4): for token in tokens: token.penalty += 0.39 + if (start + 1 == end): + if partial := query.nodes[start].partial: + partial.penalty += 0.39 # If it looks like a simple housenumber, prefer that. if qmod.TOKEN_HOUSENUMBER in tlist: @@ -319,6 +322,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): if ttype != qmod.TOKEN_HOUSENUMBER: for token in tokens: token.penalty += penalty + if (start + 1 == end): + if partial := query.nodes[start].partial: + partial.penalty += penalty # rerank tokens against the normalized form norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}" diff --git a/src/nominatim_api/search/token_assignment.py b/src/nominatim_api/search/token_assignment.py index 798ee546..159b36ac 100644 --- a/src/nominatim_api/search/token_assignment.py +++ b/src/nominatim_api/search/token_assignment.py @@ -380,17 +380,23 @@ class _TokenSequence: if base.postcode and base.postcode.start == 0: self.penalty += 0.1 + min_penalty = self.penalty + 2.0 + # Left-to-right reading of the address if self.direction != -1: - yield from self._get_assignments_address_forward(base, query) + for result in self._get_assignments_address_forward(base, query): + min_penalty = min(min_penalty, result.penalty) + yield result # Right-to-left reading of the address if self.direction != 1: - yield from self._get_assignments_address_backward(base, query) + for result in self._get_assignments_address_backward(base, query): + min_penalty = min(min_penalty, result.penalty) + yield result # variant for special housenumber searches if base.housenumber and not base.qualifier: - yield dataclasses.replace(base, penalty=self.penalty) + yield dataclasses.replace(base, penalty=min_penalty + 0.1) def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]: diff --git a/src/nominatim_db/data/place_info.py b/src/nominatim_db/data/place_info.py index 5fc6a48a..32f86c96 100644 --- a/src/nominatim_db/data/place_info.py +++ b/src/nominatim_db/data/place_info.py @@ -2,13 +2,13 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Wrapper around place information the indexer gets from the database and hands to the tokenizer. """ -from typing import Optional, Mapping, Any, Tuple +from typing import Optional, Mapping, Any, Tuple, cast class PlaceInfo: @@ -56,7 +56,7 @@ class PlaceInfo: [1]: ../customize/Ranking.md#address-rank """ - return self._info.get('rank_address', 0) + return cast(int, self._info.get('rank_address', 0)) @property def centroid(self) -> Optional[Tuple[float, float]]: diff --git a/test/bdd/features/api/search/v1_geocodejson.feature b/test/bdd/features/api/search/v1_geocodejson.feature index 99fff0e4..785b45d1 100644 --- a/test/bdd/features/api/search/v1_geocodejson.feature +++ b/test/bdd/features/api/search/v1_geocodejson.feature @@ -23,8 +23,8 @@ Feature: Search API geocodejson output Scenario: Search geocodejson - Town street-level address with footway When sending v1/search with format geocodejson - | q | addressdetails | - | burg gutenberg 6000 jahre geschichte | 1 | + | q | addressdetails | + | 6000 jahre geschichte | 1 | Then a HTTP 200 is returned And the result is valid geocodejson And all results contain diff --git a/test/python/api/search/test_token_assignment.py b/test/python/api/search/test_token_assignment.py index e45352d7..0b2d7cb9 100644 --- a/test/python/api/search/test_token_assignment.py +++ b/test/python/api/search/test_token_assignment.py @@ -127,7 +127,7 @@ def test_housenumber_and_street(): check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(1, 2), housenumber=TokenRange(0, 1)), - TokenAssignment(address=[TokenRange(1, 2)], + TokenAssignment(penalty=0.1, address=[TokenRange(1, 2)], housenumber=TokenRange(0, 1))) @@ -138,7 +138,7 @@ def test_housenumber_and_street_backwards(): check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), housenumber=TokenRange(1, 2)), - TokenAssignment(address=[TokenRange(0, 1)], + TokenAssignment(penalty=0.1, address=[TokenRange(0, 1)], housenumber=TokenRange(1, 2))) @@ -154,7 +154,7 @@ def test_housenumber_and_postcode(): housenumber=TokenRange(1, 2), address=[TokenRange(2, 3)], postcode=TokenRange(3, 4)), - TokenAssignment(penalty=pytest.approx(0.3), + TokenAssignment(penalty=pytest.approx(0.4), housenumber=TokenRange(1, 2), address=[TokenRange(0, 1), TokenRange(2, 3)], postcode=TokenRange(3, 4))) @@ -172,7 +172,7 @@ def test_postcode_and_housenumber(): housenumber=TokenRange(3, 4), address=[TokenRange(0, 1)], postcode=TokenRange(1, 2)), - TokenAssignment(penalty=pytest.approx(0.3), + TokenAssignment(penalty=pytest.approx(0.4), housenumber=TokenRange(3, 4), address=[TokenRange(0, 1), TokenRange(2, 3)], postcode=TokenRange(1, 2))) @@ -218,7 +218,7 @@ def test_housenumber_many_phrases(): housenumber=TokenRange(3, 4), address=[TokenRange(0, 1), TokenRange(1, 2), TokenRange(2, 3)]), - TokenAssignment(penalty=0.1, + TokenAssignment(penalty=0.2, housenumber=TokenRange(3, 4), address=[TokenRange(0, 1), TokenRange(1, 2), TokenRange(2, 3), TokenRange(4, 5)]))