mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Merge pull request #3840 from lonvia/normalize-penalties
Improve termination condition for forward search
This commit is contained in:
@@ -187,7 +187,7 @@ class SearchBuilder:
|
|||||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny)]
|
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny)]
|
||||||
|
|
||||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||||
yield dbs.PlaceSearch(0.05, sdata, expected_count, True)
|
yield dbs.PlaceSearch(0.0, sdata, expected_count, True)
|
||||||
|
|
||||||
def build_name_search(self, sdata: dbf.SearchData,
|
def build_name_search(self, sdata: dbf.SearchData,
|
||||||
name: qmod.TokenRange, address: List[qmod.TokenRange],
|
name: qmod.TokenRange, address: List[qmod.TokenRange],
|
||||||
@@ -342,7 +342,10 @@ class SearchBuilder:
|
|||||||
heapq.heappush(todo, (-tlist.end, tlist.end,
|
heapq.heappush(todo, (-tlist.end, tlist.end,
|
||||||
rank.with_token(t, chgpenalty)))
|
rank.with_token(t, chgpenalty)))
|
||||||
elif tlist.end == trange.end:
|
elif tlist.end == trange.end:
|
||||||
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
|
chgpenalty = self.query.get_in_word_penalty(
|
||||||
|
qmod.TokenRange(pos, tlist.end))
|
||||||
|
ranks.extend(rank.with_token(t, chgpenalty)
|
||||||
|
for t in tlist.tokens)
|
||||||
|
|
||||||
if len(ranks) >= 10:
|
if len(ranks) >= 10:
|
||||||
# Too many variants, bail out and only add
|
# Too many variants, bail out and only add
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ class ForwardGeocoder:
|
|||||||
qs = self.params.query_stats
|
qs = self.params.query_stats
|
||||||
|
|
||||||
qs['search_min_penalty'] = round(searches[0].penalty, 2)
|
qs['search_min_penalty'] = round(searches[0].penalty, 2)
|
||||||
min_ranking = searches[0].penalty + 2.0
|
min_ranking = searches[0].penalty + 1.5
|
||||||
prev_penalty = 0.0
|
prev_penalty = 0.0
|
||||||
for i, search in enumerate(searches):
|
for i, search in enumerate(searches):
|
||||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 15):
|
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 15):
|
||||||
@@ -103,7 +103,9 @@ class ForwardGeocoder:
|
|||||||
qs['search_min_result_penalty'] = spenalty
|
qs['search_min_result_penalty'] = spenalty
|
||||||
qs['search_best_penalty_round'] = i
|
qs['search_best_penalty_round'] = i
|
||||||
results[rhash] = result
|
results[rhash] = result
|
||||||
min_ranking = min(min_ranking, result.accuracy * 1.2, 2.0)
|
min_ranking = min(min_ranking,
|
||||||
|
search.penalty + 0.4,
|
||||||
|
result.accuracy + 0.1)
|
||||||
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
|
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
|
||||||
prev_penalty = search.penalty
|
prev_penalty = search.penalty
|
||||||
if self.timeout.is_elapsed():
|
if self.timeout.is_elapsed():
|
||||||
|
|||||||
@@ -202,7 +202,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1])
|
term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1])
|
||||||
query.add_token(qmod.TokenRange(start, end),
|
query.add_token(qmod.TokenRange(start, end),
|
||||||
qmod.TOKEN_POSTCODE,
|
qmod.TOKEN_POSTCODE,
|
||||||
ICUToken(penalty=0.1, token=0, count=1, addr_count=1,
|
ICUToken(penalty=0.0, token=0, count=1, addr_count=1,
|
||||||
lookup_word=pc, word_token=term,
|
lookup_word=pc, word_token=term,
|
||||||
info=None))
|
info=None))
|
||||||
self.rerank_tokens(query)
|
self.rerank_tokens(query)
|
||||||
@@ -288,7 +288,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
if need_hnr and is_full_token \
|
if need_hnr and is_full_token \
|
||||||
and len(node.term_normalized) <= 4 and node.term_normalized.isdigit():
|
and len(node.term_normalized) <= 4 and node.term_normalized.isdigit():
|
||||||
query.add_token(qmod.TokenRange(i-1, i), qmod.TOKEN_HOUSENUMBER,
|
query.add_token(qmod.TokenRange(i-1, i), qmod.TOKEN_HOUSENUMBER,
|
||||||
ICUToken(penalty=0.5, token=0,
|
ICUToken(penalty=0.2, token=0,
|
||||||
count=1, addr_count=1,
|
count=1, addr_count=1,
|
||||||
lookup_word=node.term_lookup,
|
lookup_word=node.term_lookup,
|
||||||
word_token=node.term_lookup, info=None))
|
word_token=node.term_lookup, info=None))
|
||||||
@@ -309,6 +309,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
len(query.nodes[end].term_lookup) > 4):
|
len(query.nodes[end].term_lookup) > 4):
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
token.penalty += 0.39
|
token.penalty += 0.39
|
||||||
|
if (start + 1 == end):
|
||||||
|
if partial := query.nodes[start].partial:
|
||||||
|
partial.penalty += 0.39
|
||||||
|
|
||||||
# If it looks like a simple housenumber, prefer that.
|
# If it looks like a simple housenumber, prefer that.
|
||||||
if qmod.TOKEN_HOUSENUMBER in tlist:
|
if qmod.TOKEN_HOUSENUMBER in tlist:
|
||||||
@@ -319,6 +322,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
if ttype != qmod.TOKEN_HOUSENUMBER:
|
if ttype != qmod.TOKEN_HOUSENUMBER:
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
token.penalty += penalty
|
token.penalty += penalty
|
||||||
|
if (start + 1 == end):
|
||||||
|
if partial := query.nodes[start].partial:
|
||||||
|
partial.penalty += penalty
|
||||||
|
|
||||||
# rerank tokens against the normalized form
|
# rerank tokens against the normalized form
|
||||||
norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}"
|
norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}"
|
||||||
|
|||||||
@@ -380,17 +380,23 @@ class _TokenSequence:
|
|||||||
if base.postcode and base.postcode.start == 0:
|
if base.postcode and base.postcode.start == 0:
|
||||||
self.penalty += 0.1
|
self.penalty += 0.1
|
||||||
|
|
||||||
|
min_penalty = self.penalty + 2.0
|
||||||
|
|
||||||
# Left-to-right reading of the address
|
# Left-to-right reading of the address
|
||||||
if self.direction != -1:
|
if self.direction != -1:
|
||||||
yield from self._get_assignments_address_forward(base, query)
|
for result in self._get_assignments_address_forward(base, query):
|
||||||
|
min_penalty = min(min_penalty, result.penalty)
|
||||||
|
yield result
|
||||||
|
|
||||||
# Right-to-left reading of the address
|
# Right-to-left reading of the address
|
||||||
if self.direction != 1:
|
if self.direction != 1:
|
||||||
yield from self._get_assignments_address_backward(base, query)
|
for result in self._get_assignments_address_backward(base, query):
|
||||||
|
min_penalty = min(min_penalty, result.penalty)
|
||||||
|
yield result
|
||||||
|
|
||||||
# variant for special housenumber searches
|
# variant for special housenumber searches
|
||||||
if base.housenumber and not base.qualifier:
|
if base.housenumber and not base.qualifier:
|
||||||
yield dataclasses.replace(base, penalty=self.penalty)
|
yield dataclasses.replace(base, penalty=min_penalty + 0.1)
|
||||||
|
|
||||||
|
|
||||||
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||||
|
|||||||
@@ -2,13 +2,13 @@
|
|||||||
#
|
#
|
||||||
# This file is part of Nominatim. (https://nominatim.org)
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
#
|
#
|
||||||
# Copyright (C) 2024 by the Nominatim developer community.
|
# Copyright (C) 2025 by the Nominatim developer community.
|
||||||
# For a full list of authors see the git log.
|
# For a full list of authors see the git log.
|
||||||
"""
|
"""
|
||||||
Wrapper around place information the indexer gets from the database and hands to
|
Wrapper around place information the indexer gets from the database and hands to
|
||||||
the tokenizer.
|
the tokenizer.
|
||||||
"""
|
"""
|
||||||
from typing import Optional, Mapping, Any, Tuple
|
from typing import Optional, Mapping, Any, Tuple, cast
|
||||||
|
|
||||||
|
|
||||||
class PlaceInfo:
|
class PlaceInfo:
|
||||||
@@ -56,7 +56,7 @@ class PlaceInfo:
|
|||||||
|
|
||||||
[1]: ../customize/Ranking.md#address-rank
|
[1]: ../customize/Ranking.md#address-rank
|
||||||
"""
|
"""
|
||||||
return self._info.get('rank_address', 0)
|
return cast(int, self._info.get('rank_address', 0))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def centroid(self) -> Optional[Tuple[float, float]]:
|
def centroid(self) -> Optional[Tuple[float, float]]:
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ Feature: Search API geocodejson output
|
|||||||
|
|
||||||
Scenario: Search geocodejson - Town street-level address with footway
|
Scenario: Search geocodejson - Town street-level address with footway
|
||||||
When sending v1/search with format geocodejson
|
When sending v1/search with format geocodejson
|
||||||
| q | addressdetails |
|
| q | addressdetails |
|
||||||
| burg gutenberg 6000 jahre geschichte | 1 |
|
| 6000 jahre geschichte | 1 |
|
||||||
Then a HTTP 200 is returned
|
Then a HTTP 200 is returned
|
||||||
And the result is valid geocodejson
|
And the result is valid geocodejson
|
||||||
And all results contain
|
And all results contain
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ def test_housenumber_and_street():
|
|||||||
check_assignments(yield_token_assignments(q),
|
check_assignments(yield_token_assignments(q),
|
||||||
TokenAssignment(name=TokenRange(1, 2),
|
TokenAssignment(name=TokenRange(1, 2),
|
||||||
housenumber=TokenRange(0, 1)),
|
housenumber=TokenRange(0, 1)),
|
||||||
TokenAssignment(address=[TokenRange(1, 2)],
|
TokenAssignment(penalty=0.1, address=[TokenRange(1, 2)],
|
||||||
housenumber=TokenRange(0, 1)))
|
housenumber=TokenRange(0, 1)))
|
||||||
|
|
||||||
|
|
||||||
@@ -138,7 +138,7 @@ def test_housenumber_and_street_backwards():
|
|||||||
check_assignments(yield_token_assignments(q),
|
check_assignments(yield_token_assignments(q),
|
||||||
TokenAssignment(name=TokenRange(0, 1),
|
TokenAssignment(name=TokenRange(0, 1),
|
||||||
housenumber=TokenRange(1, 2)),
|
housenumber=TokenRange(1, 2)),
|
||||||
TokenAssignment(address=[TokenRange(0, 1)],
|
TokenAssignment(penalty=0.1, address=[TokenRange(0, 1)],
|
||||||
housenumber=TokenRange(1, 2)))
|
housenumber=TokenRange(1, 2)))
|
||||||
|
|
||||||
|
|
||||||
@@ -154,7 +154,7 @@ def test_housenumber_and_postcode():
|
|||||||
housenumber=TokenRange(1, 2),
|
housenumber=TokenRange(1, 2),
|
||||||
address=[TokenRange(2, 3)],
|
address=[TokenRange(2, 3)],
|
||||||
postcode=TokenRange(3, 4)),
|
postcode=TokenRange(3, 4)),
|
||||||
TokenAssignment(penalty=pytest.approx(0.3),
|
TokenAssignment(penalty=pytest.approx(0.4),
|
||||||
housenumber=TokenRange(1, 2),
|
housenumber=TokenRange(1, 2),
|
||||||
address=[TokenRange(0, 1), TokenRange(2, 3)],
|
address=[TokenRange(0, 1), TokenRange(2, 3)],
|
||||||
postcode=TokenRange(3, 4)))
|
postcode=TokenRange(3, 4)))
|
||||||
@@ -172,7 +172,7 @@ def test_postcode_and_housenumber():
|
|||||||
housenumber=TokenRange(3, 4),
|
housenumber=TokenRange(3, 4),
|
||||||
address=[TokenRange(0, 1)],
|
address=[TokenRange(0, 1)],
|
||||||
postcode=TokenRange(1, 2)),
|
postcode=TokenRange(1, 2)),
|
||||||
TokenAssignment(penalty=pytest.approx(0.3),
|
TokenAssignment(penalty=pytest.approx(0.4),
|
||||||
housenumber=TokenRange(3, 4),
|
housenumber=TokenRange(3, 4),
|
||||||
address=[TokenRange(0, 1), TokenRange(2, 3)],
|
address=[TokenRange(0, 1), TokenRange(2, 3)],
|
||||||
postcode=TokenRange(1, 2)))
|
postcode=TokenRange(1, 2)))
|
||||||
@@ -218,7 +218,7 @@ def test_housenumber_many_phrases():
|
|||||||
housenumber=TokenRange(3, 4),
|
housenumber=TokenRange(3, 4),
|
||||||
address=[TokenRange(0, 1), TokenRange(1, 2),
|
address=[TokenRange(0, 1), TokenRange(1, 2),
|
||||||
TokenRange(2, 3)]),
|
TokenRange(2, 3)]),
|
||||||
TokenAssignment(penalty=0.1,
|
TokenAssignment(penalty=0.2,
|
||||||
housenumber=TokenRange(3, 4),
|
housenumber=TokenRange(3, 4),
|
||||||
address=[TokenRange(0, 1), TokenRange(1, 2),
|
address=[TokenRange(0, 1), TokenRange(1, 2),
|
||||||
TokenRange(2, 3), TokenRange(4, 5)]))
|
TokenRange(2, 3), TokenRange(4, 5)]))
|
||||||
|
|||||||
Reference in New Issue
Block a user