mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 10:57:58 +00:00
Merge pull request #3840 from lonvia/normalize-penalties
Improve termination condition for forward search
This commit is contained in:
@@ -187,7 +187,7 @@ class SearchBuilder:
|
||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny)]
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count, True)
|
||||
yield dbs.PlaceSearch(0.0, sdata, expected_count, True)
|
||||
|
||||
def build_name_search(self, sdata: dbf.SearchData,
|
||||
name: qmod.TokenRange, address: List[qmod.TokenRange],
|
||||
@@ -342,7 +342,10 @@ class SearchBuilder:
|
||||
heapq.heappush(todo, (-tlist.end, tlist.end,
|
||||
rank.with_token(t, chgpenalty)))
|
||||
elif tlist.end == trange.end:
|
||||
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
|
||||
chgpenalty = self.query.get_in_word_penalty(
|
||||
qmod.TokenRange(pos, tlist.end))
|
||||
ranks.extend(rank.with_token(t, chgpenalty)
|
||||
for t in tlist.tokens)
|
||||
|
||||
if len(ranks) >= 10:
|
||||
# Too many variants, bail out and only add
|
||||
|
||||
@@ -80,7 +80,7 @@ class ForwardGeocoder:
|
||||
qs = self.params.query_stats
|
||||
|
||||
qs['search_min_penalty'] = round(searches[0].penalty, 2)
|
||||
min_ranking = searches[0].penalty + 2.0
|
||||
min_ranking = searches[0].penalty + 1.5
|
||||
prev_penalty = 0.0
|
||||
for i, search in enumerate(searches):
|
||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 15):
|
||||
@@ -103,7 +103,9 @@ class ForwardGeocoder:
|
||||
qs['search_min_result_penalty'] = spenalty
|
||||
qs['search_best_penalty_round'] = i
|
||||
results[rhash] = result
|
||||
min_ranking = min(min_ranking, result.accuracy * 1.2, 2.0)
|
||||
min_ranking = min(min_ranking,
|
||||
search.penalty + 0.4,
|
||||
result.accuracy + 0.1)
|
||||
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
|
||||
prev_penalty = search.penalty
|
||||
if self.timeout.is_elapsed():
|
||||
|
||||
@@ -202,7 +202,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1])
|
||||
query.add_token(qmod.TokenRange(start, end),
|
||||
qmod.TOKEN_POSTCODE,
|
||||
ICUToken(penalty=0.1, token=0, count=1, addr_count=1,
|
||||
ICUToken(penalty=0.0, token=0, count=1, addr_count=1,
|
||||
lookup_word=pc, word_token=term,
|
||||
info=None))
|
||||
self.rerank_tokens(query)
|
||||
@@ -288,7 +288,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
if need_hnr and is_full_token \
|
||||
and len(node.term_normalized) <= 4 and node.term_normalized.isdigit():
|
||||
query.add_token(qmod.TokenRange(i-1, i), qmod.TOKEN_HOUSENUMBER,
|
||||
ICUToken(penalty=0.5, token=0,
|
||||
ICUToken(penalty=0.2, token=0,
|
||||
count=1, addr_count=1,
|
||||
lookup_word=node.term_lookup,
|
||||
word_token=node.term_lookup, info=None))
|
||||
@@ -309,6 +309,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
len(query.nodes[end].term_lookup) > 4):
|
||||
for token in tokens:
|
||||
token.penalty += 0.39
|
||||
if (start + 1 == end):
|
||||
if partial := query.nodes[start].partial:
|
||||
partial.penalty += 0.39
|
||||
|
||||
# If it looks like a simple housenumber, prefer that.
|
||||
if qmod.TOKEN_HOUSENUMBER in tlist:
|
||||
@@ -319,6 +322,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
if ttype != qmod.TOKEN_HOUSENUMBER:
|
||||
for token in tokens:
|
||||
token.penalty += penalty
|
||||
if (start + 1 == end):
|
||||
if partial := query.nodes[start].partial:
|
||||
partial.penalty += penalty
|
||||
|
||||
# rerank tokens against the normalized form
|
||||
norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}"
|
||||
|
||||
@@ -380,17 +380,23 @@ class _TokenSequence:
|
||||
if base.postcode and base.postcode.start == 0:
|
||||
self.penalty += 0.1
|
||||
|
||||
min_penalty = self.penalty + 2.0
|
||||
|
||||
# Left-to-right reading of the address
|
||||
if self.direction != -1:
|
||||
yield from self._get_assignments_address_forward(base, query)
|
||||
for result in self._get_assignments_address_forward(base, query):
|
||||
min_penalty = min(min_penalty, result.penalty)
|
||||
yield result
|
||||
|
||||
# Right-to-left reading of the address
|
||||
if self.direction != 1:
|
||||
yield from self._get_assignments_address_backward(base, query)
|
||||
for result in self._get_assignments_address_backward(base, query):
|
||||
min_penalty = min(min_penalty, result.penalty)
|
||||
yield result
|
||||
|
||||
# variant for special housenumber searches
|
||||
if base.housenumber and not base.qualifier:
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
yield dataclasses.replace(base, penalty=min_penalty + 0.1)
|
||||
|
||||
|
||||
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Wrapper around place information the indexer gets from the database and hands to
|
||||
the tokenizer.
|
||||
"""
|
||||
from typing import Optional, Mapping, Any, Tuple
|
||||
from typing import Optional, Mapping, Any, Tuple, cast
|
||||
|
||||
|
||||
class PlaceInfo:
|
||||
@@ -56,7 +56,7 @@ class PlaceInfo:
|
||||
|
||||
[1]: ../customize/Ranking.md#address-rank
|
||||
"""
|
||||
return self._info.get('rank_address', 0)
|
||||
return cast(int, self._info.get('rank_address', 0))
|
||||
|
||||
@property
|
||||
def centroid(self) -> Optional[Tuple[float, float]]:
|
||||
|
||||
@@ -23,8 +23,8 @@ Feature: Search API geocodejson output
|
||||
|
||||
Scenario: Search geocodejson - Town street-level address with footway
|
||||
When sending v1/search with format geocodejson
|
||||
| q | addressdetails |
|
||||
| burg gutenberg 6000 jahre geschichte | 1 |
|
||||
| q | addressdetails |
|
||||
| 6000 jahre geschichte | 1 |
|
||||
Then a HTTP 200 is returned
|
||||
And the result is valid geocodejson
|
||||
And all results contain
|
||||
|
||||
@@ -127,7 +127,7 @@ def test_housenumber_and_street():
|
||||
check_assignments(yield_token_assignments(q),
|
||||
TokenAssignment(name=TokenRange(1, 2),
|
||||
housenumber=TokenRange(0, 1)),
|
||||
TokenAssignment(address=[TokenRange(1, 2)],
|
||||
TokenAssignment(penalty=0.1, address=[TokenRange(1, 2)],
|
||||
housenumber=TokenRange(0, 1)))
|
||||
|
||||
|
||||
@@ -138,7 +138,7 @@ def test_housenumber_and_street_backwards():
|
||||
check_assignments(yield_token_assignments(q),
|
||||
TokenAssignment(name=TokenRange(0, 1),
|
||||
housenumber=TokenRange(1, 2)),
|
||||
TokenAssignment(address=[TokenRange(0, 1)],
|
||||
TokenAssignment(penalty=0.1, address=[TokenRange(0, 1)],
|
||||
housenumber=TokenRange(1, 2)))
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ def test_housenumber_and_postcode():
|
||||
housenumber=TokenRange(1, 2),
|
||||
address=[TokenRange(2, 3)],
|
||||
postcode=TokenRange(3, 4)),
|
||||
TokenAssignment(penalty=pytest.approx(0.3),
|
||||
TokenAssignment(penalty=pytest.approx(0.4),
|
||||
housenumber=TokenRange(1, 2),
|
||||
address=[TokenRange(0, 1), TokenRange(2, 3)],
|
||||
postcode=TokenRange(3, 4)))
|
||||
@@ -172,7 +172,7 @@ def test_postcode_and_housenumber():
|
||||
housenumber=TokenRange(3, 4),
|
||||
address=[TokenRange(0, 1)],
|
||||
postcode=TokenRange(1, 2)),
|
||||
TokenAssignment(penalty=pytest.approx(0.3),
|
||||
TokenAssignment(penalty=pytest.approx(0.4),
|
||||
housenumber=TokenRange(3, 4),
|
||||
address=[TokenRange(0, 1), TokenRange(2, 3)],
|
||||
postcode=TokenRange(1, 2)))
|
||||
@@ -218,7 +218,7 @@ def test_housenumber_many_phrases():
|
||||
housenumber=TokenRange(3, 4),
|
||||
address=[TokenRange(0, 1), TokenRange(1, 2),
|
||||
TokenRange(2, 3)]),
|
||||
TokenAssignment(penalty=0.1,
|
||||
TokenAssignment(penalty=0.2,
|
||||
housenumber=TokenRange(3, 4),
|
||||
address=[TokenRange(0, 1), TokenRange(1, 2),
|
||||
TokenRange(2, 3), TokenRange(4, 5)]))
|
||||
|
||||
Reference in New Issue
Block a user