mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
add direction penalties
Direction penalties are estimated by getting the name to address ratio usage for each partial term in the query and computing the linear regression of that ratio over the entire phrase. Or to put it in ither words: we try to determine if the terms at the beginning or the end of the query are more likely to constitute a name. Direction penalties are currently used only in classic name queries.
This commit is contained in:
@@ -286,8 +286,12 @@ class _TokenSequence:
|
||||
log().var_dump('skip forward', (base.postcode, first))
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if self.direction == 1 and query.dir_penalty > 0:
|
||||
penalty += query.dir_penalty
|
||||
|
||||
log().comment('first word = name')
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
yield dataclasses.replace(base, penalty=penalty,
|
||||
name=first, address=base.address[1:])
|
||||
|
||||
# To paraphrase:
|
||||
@@ -300,14 +304,15 @@ class _TokenSequence:
|
||||
or (query.nodes[first.start].ptype != qmod.PHRASE_ANY):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
|
||||
# Penalty for:
|
||||
# * <name>, <street>, <housenumber> , ...
|
||||
# * queries that are comma-separated
|
||||
if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
if self.direction == 0 and query.dir_penalty > 0:
|
||||
penalty += query.dir_penalty
|
||||
|
||||
for i in range(first.start + 1, first.end):
|
||||
name, addr = first.split(i)
|
||||
log().comment(f'split first word = name ({i - first.start})')
|
||||
@@ -326,9 +331,13 @@ class _TokenSequence:
|
||||
log().var_dump('skip backward', (base.postcode, last))
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if self.direction == -1 and query.dir_penalty < 0:
|
||||
penalty -= query.dir_penalty
|
||||
|
||||
if self.direction == -1 or len(base.address) > 1 or base.postcode:
|
||||
log().comment('last word = name')
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
yield dataclasses.replace(base, penalty=penalty,
|
||||
name=last, address=base.address[:-1])
|
||||
|
||||
# To paraphrase:
|
||||
@@ -341,12 +350,14 @@ class _TokenSequence:
|
||||
or (query.nodes[last.start].ptype != qmod.PHRASE_ANY):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if base.housenumber and base.housenumber < last:
|
||||
penalty += 0.4
|
||||
if len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
if self.direction == 0 and query.dir_penalty < 0:
|
||||
penalty -= query.dir_penalty
|
||||
|
||||
for i in range(last.start + 1, last.end):
|
||||
addr, name = last.split(i)
|
||||
log().comment(f'split last word = name ({i - last.start})')
|
||||
@@ -379,11 +390,11 @@ class _TokenSequence:
|
||||
if base.postcode and base.postcode.start == 0:
|
||||
self.penalty += 0.1
|
||||
|
||||
# Right-to-left reading of the address
|
||||
# Left-to-right reading of the address
|
||||
if self.direction != -1:
|
||||
yield from self._get_assignments_address_forward(base, query)
|
||||
|
||||
# Left-to-right reading of the address
|
||||
# Right-to-left reading of the address
|
||||
if self.direction != 1:
|
||||
yield from self._get_assignments_address_backward(base, query)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user