mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
Merge pull request #3678 from lonvia/search-tweaks
Some minor tweaks to postcode parsing in query
This commit is contained in:
@@ -193,10 +193,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
|
|
||||||
self.add_extra_tokens(query)
|
self.add_extra_tokens(query)
|
||||||
for start, end, pc in self.postcode_parser.parse(query):
|
for start, end, pc in self.postcode_parser.parse(query):
|
||||||
|
term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1])
|
||||||
query.add_token(qmod.TokenRange(start, end),
|
query.add_token(qmod.TokenRange(start, end),
|
||||||
qmod.TOKEN_POSTCODE,
|
qmod.TOKEN_POSTCODE,
|
||||||
ICUToken(penalty=0.1, token=0, count=1, addr_count=1,
|
ICUToken(penalty=0.1, token=0, count=1, addr_count=1,
|
||||||
lookup_word=pc, word_token=pc, info=None))
|
lookup_word=pc, word_token=term,
|
||||||
|
info=None))
|
||||||
self.rerank_tokens(query)
|
self.rerank_tokens(query)
|
||||||
|
|
||||||
log().table_dump('Word tokens', _dump_word_tokens(query))
|
log().table_dump('Word tokens', _dump_word_tokens(query))
|
||||||
@@ -267,10 +269,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
|||||||
"""
|
"""
|
||||||
for i, node, tlist in query.iter_token_lists():
|
for i, node, tlist in query.iter_token_lists():
|
||||||
if tlist.ttype == qmod.TOKEN_POSTCODE:
|
if tlist.ttype == qmod.TOKEN_POSTCODE:
|
||||||
|
tlen = len(cast(ICUToken, tlist.tokens[0]).word_token)
|
||||||
for repl in node.starting:
|
for repl in node.starting:
|
||||||
if repl.end == tlist.end and repl.ttype != qmod.TOKEN_POSTCODE \
|
if repl.end == tlist.end and repl.ttype != qmod.TOKEN_POSTCODE \
|
||||||
and (repl.ttype != qmod.TOKEN_HOUSENUMBER
|
and (repl.ttype != qmod.TOKEN_HOUSENUMBER or tlen > 4):
|
||||||
or len(tlist.tokens[0].lookup_word) > 4):
|
|
||||||
repl.add_penalty(0.39)
|
repl.add_penalty(0.39)
|
||||||
elif (tlist.ttype == qmod.TOKEN_HOUSENUMBER
|
elif (tlist.ttype == qmod.TOKEN_HOUSENUMBER
|
||||||
and len(tlist.tokens[0].lookup_word) <= 3):
|
and len(tlist.tokens[0].lookup_word) <= 3):
|
||||||
|
|||||||
@@ -269,10 +269,9 @@ class _TokenSequence:
|
|||||||
# <address>,<postcode> should give preference to address search
|
# <address>,<postcode> should give preference to address search
|
||||||
if base.postcode.start == 0:
|
if base.postcode.start == 0:
|
||||||
penalty = self.penalty
|
penalty = self.penalty
|
||||||
self.direction = -1 # name searches are only possible backwards
|
|
||||||
else:
|
else:
|
||||||
penalty = self.penalty + 0.1
|
penalty = self.penalty + 0.1
|
||||||
self.direction = 1 # name searches are only possible forwards
|
penalty += 0.1 * max(0, len(base.address) - 1)
|
||||||
yield dataclasses.replace(base, penalty=penalty)
|
yield dataclasses.replace(base, penalty=penalty)
|
||||||
|
|
||||||
def _get_assignments_address_forward(self, base: TokenAssignment,
|
def _get_assignments_address_forward(self, base: TokenAssignment,
|
||||||
@@ -282,6 +281,11 @@ class _TokenSequence:
|
|||||||
"""
|
"""
|
||||||
first = base.address[0]
|
first = base.address[0]
|
||||||
|
|
||||||
|
# The postcode must come after the name.
|
||||||
|
if base.postcode and base.postcode < first:
|
||||||
|
log().var_dump('skip forward', (base.postcode, first))
|
||||||
|
return
|
||||||
|
|
||||||
log().comment('first word = name')
|
log().comment('first word = name')
|
||||||
yield dataclasses.replace(base, penalty=self.penalty,
|
yield dataclasses.replace(base, penalty=self.penalty,
|
||||||
name=first, address=base.address[1:])
|
name=first, address=base.address[1:])
|
||||||
@@ -317,7 +321,12 @@ class _TokenSequence:
|
|||||||
"""
|
"""
|
||||||
last = base.address[-1]
|
last = base.address[-1]
|
||||||
|
|
||||||
if self.direction == -1 or len(base.address) > 1:
|
# The postcode must come before the name for backward direction.
|
||||||
|
if base.postcode and base.postcode > last:
|
||||||
|
log().var_dump('skip backward', (base.postcode, last))
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.direction == -1 or len(base.address) > 1 or base.postcode:
|
||||||
log().comment('last word = name')
|
log().comment('last word = name')
|
||||||
yield dataclasses.replace(base, penalty=self.penalty,
|
yield dataclasses.replace(base, penalty=self.penalty,
|
||||||
name=last, address=base.address[:-1])
|
name=last, address=base.address[:-1])
|
||||||
|
|||||||
Reference in New Issue
Block a user