mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
add support for country prefixes in postcodes
This commit is contained in:
@@ -7,7 +7,7 @@
|
|||||||
"""
|
"""
|
||||||
Handling of arbitrary postcode tokens in tokenized query string.
|
Handling of arbitrary postcode tokens in tokenized query string.
|
||||||
"""
|
"""
|
||||||
from typing import Tuple, Set
|
from typing import Tuple, Set, Dict, List
|
||||||
import re
|
import re
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
@@ -31,20 +31,21 @@ class PostcodeParser:
|
|||||||
cdata = yaml.safe_load(config.find_config_file('country_settings.yaml')
|
cdata = yaml.safe_load(config.find_config_file('country_settings.yaml')
|
||||||
.read_text(encoding='utf-8'))
|
.read_text(encoding='utf-8'))
|
||||||
|
|
||||||
unique_patterns = defaultdict(set)
|
unique_patterns: Dict[str, Dict[str, List[str]]] = {}
|
||||||
for cc, data in cdata.items():
|
for cc, data in cdata.items():
|
||||||
if data.get('postcode'):
|
if data.get('postcode'):
|
||||||
pat = data['postcode']['pattern']
|
pat = data['postcode']['pattern'].replace('d', '[0-9]').replace('l', '[a-z]')
|
||||||
out = data['postcode'].get('output')
|
out = data['postcode'].get('output')
|
||||||
unique_patterns[pat.replace('d', '[0-9]').replace('l', '[a-z]')].add(out)
|
if pat not in unique_patterns:
|
||||||
|
unique_patterns[pat] = defaultdict(list)
|
||||||
|
unique_patterns[pat][out].append(cc)
|
||||||
|
|
||||||
self.global_pattern = re.compile(
|
self.global_pattern = re.compile(
|
||||||
'(?:' +
|
'(?:(?P<cc>[a-z][a-z])(?P<space>[ -]?))?(?P<pc>(?:(?:'
|
||||||
'|'.join(f"(?:{k})" for k in unique_patterns)
|
+ ')|(?:'.join(unique_patterns) + '))[:, >].*)')
|
||||||
+ ')[:, >]')
|
|
||||||
|
|
||||||
self.local_patterns = [(re.compile(f"(?:{k})[:, >]"), v)
|
self.local_patterns = [(re.compile(f"{pat}[:, >]"), list(info.items()))
|
||||||
for k, v in unique_patterns.items()]
|
for pat, info in unique_patterns.items()]
|
||||||
|
|
||||||
def parse(self, query: qmod.QueryStruct) -> Set[Tuple[int, int, str]]:
|
def parse(self, query: qmod.QueryStruct) -> Set[Tuple[int, int, str]]:
|
||||||
""" Parse postcodes in the given list of query tokens taking into
|
""" Parse postcodes in the given list of query tokens taking into
|
||||||
@@ -64,18 +65,22 @@ class PostcodeParser:
|
|||||||
if word[-1] in ' -' and nodes[i + 3].btype != '`':
|
if word[-1] in ' -' and nodes[i + 3].btype != '`':
|
||||||
word += nodes[i + 3].term_normalized + nodes[i + 3].btype
|
word += nodes[i + 3].term_normalized + nodes[i + 3].btype
|
||||||
|
|
||||||
# Use global pattern to check for presence of any postocde.
|
# Use global pattern to check for presence of any postcode.
|
||||||
m = self.global_pattern.match(word)
|
m = self.global_pattern.fullmatch(word)
|
||||||
if m:
|
if m:
|
||||||
# If there was a match, check against each pattern separately
|
# If there was a match, check against each pattern separately
|
||||||
# because multiple patterns might be machting at the end.
|
# because multiple patterns might be machting at the end.
|
||||||
|
cc = m.group('cc')
|
||||||
|
pc_word = m.group('pc')
|
||||||
|
cc_spaces = len(m.group('space') or '')
|
||||||
for pattern, info in self.local_patterns:
|
for pattern, info in self.local_patterns:
|
||||||
lm = pattern.match(word)
|
lm = pattern.match(pc_word)
|
||||||
if lm:
|
if lm:
|
||||||
trange = (i, i + sum(c in ' ,-:>' for c in lm.group(0)))
|
trange = (i, i + cc_spaces + sum(c in ' ,-:>' for c in lm.group(0)))
|
||||||
for out in info:
|
for out, out_ccs in info:
|
||||||
if out:
|
if cc is None or cc in out_ccs:
|
||||||
outcodes.add((*trange, lm.expand(out).upper()))
|
if out:
|
||||||
else:
|
outcodes.add((*trange, lm.expand(out).upper()))
|
||||||
outcodes.add((*trange, lm.group(0)[:-1].upper()))
|
else:
|
||||||
|
outcodes.add((*trange, lm.group(0)[:-1].upper()))
|
||||||
return outcodes
|
return outcodes
|
||||||
|
|||||||
Reference in New Issue
Block a user