mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
make query upper-case when parsing postcodes
The postcode patterns expect upper-case letters.
This commit is contained in:
@@ -34,14 +34,14 @@ class PostcodeParser:
|
|||||||
unique_patterns: Dict[str, Dict[str, List[str]]] = {}
|
unique_patterns: Dict[str, Dict[str, List[str]]] = {}
|
||||||
for cc, data in cdata.items():
|
for cc, data in cdata.items():
|
||||||
if data.get('postcode'):
|
if data.get('postcode'):
|
||||||
pat = data['postcode']['pattern'].replace('d', '[0-9]').replace('l', '[a-z]')
|
pat = data['postcode']['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
|
||||||
out = data['postcode'].get('output')
|
out = data['postcode'].get('output')
|
||||||
if pat not in unique_patterns:
|
if pat not in unique_patterns:
|
||||||
unique_patterns[pat] = defaultdict(list)
|
unique_patterns[pat] = defaultdict(list)
|
||||||
unique_patterns[pat][out].append(cc)
|
unique_patterns[pat][out].append(cc.upper())
|
||||||
|
|
||||||
self.global_pattern = re.compile(
|
self.global_pattern = re.compile(
|
||||||
'(?:(?P<cc>[a-z][a-z])(?P<space>[ -]?))?(?P<pc>(?:(?:'
|
'(?:(?P<cc>[A-Z][A-Z])(?P<space>[ -]?))?(?P<pc>(?:(?:'
|
||||||
+ ')|(?:'.join(unique_patterns) + '))[:, >].*)')
|
+ ')|(?:'.join(unique_patterns) + '))[:, >].*)')
|
||||||
|
|
||||||
self.local_patterns = [(re.compile(f"{pat}[:, >]"), list(info.items()))
|
self.local_patterns = [(re.compile(f"{pat}[:, >]"), list(info.items()))
|
||||||
@@ -57,25 +57,26 @@ class PostcodeParser:
|
|||||||
nodes = query.nodes
|
nodes = query.nodes
|
||||||
outcodes: Set[Tuple[int, int, str]] = set()
|
outcodes: Set[Tuple[int, int, str]] = set()
|
||||||
|
|
||||||
|
terms = [n.term_normalized.upper() + n.btype for n in nodes]
|
||||||
for i in range(query.num_token_slots()):
|
for i in range(query.num_token_slots()):
|
||||||
if nodes[i].btype in '<,: ' and nodes[i + 1].btype != '`' \
|
if nodes[i].btype in '<,: ' and nodes[i + 1].btype != '`' \
|
||||||
and (i == 0 or nodes[i - 1].ptype != qmod.PHRASE_POSTCODE):
|
and (i == 0 or nodes[i - 1].ptype != qmod.PHRASE_POSTCODE):
|
||||||
if nodes[i].ptype == qmod.PHRASE_ANY:
|
if nodes[i].ptype == qmod.PHRASE_ANY:
|
||||||
word = nodes[i + 1].term_normalized + nodes[i + 1].btype
|
word = terms[i + 1]
|
||||||
if word[-1] in ' -' and nodes[i + 2].btype != '`' \
|
if word[-1] in ' -' and nodes[i + 2].btype != '`' \
|
||||||
and nodes[i + 1].ptype == qmod.PHRASE_ANY:
|
and nodes[i + 1].ptype == qmod.PHRASE_ANY:
|
||||||
word += nodes[i + 2].term_normalized + nodes[i + 2].btype
|
word += terms[i + 2]
|
||||||
if word[-1] in ' -' and nodes[i + 3].btype != '`' \
|
if word[-1] in ' -' and nodes[i + 3].btype != '`' \
|
||||||
and nodes[i + 2].ptype == qmod.PHRASE_ANY:
|
and nodes[i + 2].ptype == qmod.PHRASE_ANY:
|
||||||
word += nodes[i + 3].term_normalized + nodes[i + 3].btype
|
word += terms[i + 3]
|
||||||
|
|
||||||
self._match_word(word, i, False, outcodes)
|
self._match_word(word, i, False, outcodes)
|
||||||
elif nodes[i].ptype == qmod.PHRASE_POSTCODE:
|
elif nodes[i].ptype == qmod.PHRASE_POSTCODE:
|
||||||
word = nodes[i + 1].term_normalized + nodes[i + 1].btype
|
word = terms[i + 1]
|
||||||
for j in range(i + 1, query.num_token_slots()):
|
for j in range(i + 1, query.num_token_slots()):
|
||||||
if nodes[j].ptype != qmod.PHRASE_POSTCODE:
|
if nodes[j].ptype != qmod.PHRASE_POSTCODE:
|
||||||
break
|
break
|
||||||
word += nodes[j + 1].term_normalized + nodes[j + 1].btype
|
word += terms[j + 1]
|
||||||
|
|
||||||
self._match_word(word, i, True, outcodes)
|
self._match_word(word, i, True, outcodes)
|
||||||
|
|
||||||
@@ -98,6 +99,6 @@ class PostcodeParser:
|
|||||||
for out, out_ccs in info:
|
for out, out_ccs in info:
|
||||||
if cc is None or cc in out_ccs:
|
if cc is None or cc in out_ccs:
|
||||||
if out:
|
if out:
|
||||||
outcodes.add((*trange, lm.expand(out).upper()))
|
outcodes.add((*trange, lm.expand(out)))
|
||||||
else:
|
else:
|
||||||
outcodes.add((*trange, lm.group(0)[:-1].upper()))
|
outcodes.add((*trange, lm.group(0)[:-1]))
|
||||||
|
|||||||
@@ -52,6 +52,12 @@ ky:
|
|||||||
postcode:
|
postcode:
|
||||||
pattern: "(d)-(dddd)"
|
pattern: "(d)-(dddd)"
|
||||||
output: KY\1-\2
|
output: KY\1-\2
|
||||||
|
|
||||||
|
gb:
|
||||||
|
postcode:
|
||||||
|
pattern: "(l?ld[A-Z0-9]?) ?(dll)"
|
||||||
|
output: \1 \2
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
return project_env
|
return project_env
|
||||||
@@ -83,6 +89,13 @@ def test_simple_postcode(pc_config, query, pos):
|
|||||||
assert result == {(pos, pos + 1, '45325'), (pos, pos + 1, '453 25')}
|
assert result == {(pos, pos + 1, '45325'), (pos, pos + 1, '453 25')}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('query', ['EC1R 3HF', 'ec1r 3hf'])
|
||||||
|
def test_postcode_matching_case_insensitive(pc_config, query):
|
||||||
|
parser = PostcodeParser(pc_config)
|
||||||
|
|
||||||
|
assert parser.parse(mk_query(query)) == {(0, 2, 'EC1R 3HF')}
|
||||||
|
|
||||||
|
|
||||||
def test_contained_postcode(pc_config):
|
def test_contained_postcode(pc_config):
|
||||||
parser = PostcodeParser(pc_config)
|
parser = PostcodeParser(pc_config)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user