mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Properly validate postcodes with country code
Include postcode pattern in postcode normalisation regex, instead of removing it from postcode pattern in config. It properly handles postcode validation and normalization when country code is part of the postcode, e.g. for Isle of Man, Jersey, Anguilla, Andorra, Cayman Islands and more. Fixes #3227.
This commit is contained in:
committed by
Paweł Wroniszewski
parent
95c3181a35
commit
fbe40e005d
@@ -25,7 +25,7 @@ class CountryPostcodeMatcher:
|
||||
|
||||
pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
|
||||
|
||||
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
|
||||
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
|
||||
self.pattern = re.compile(pc_pattern)
|
||||
|
||||
self.output = config.get('output', r'\g<0>')
|
||||
|
||||
@@ -4,8 +4,7 @@ ad:
|
||||
languages: ca
|
||||
names: !include country-names/ad.yaml
|
||||
postcode:
|
||||
pattern: "(ddd)"
|
||||
output: AD\1
|
||||
pattern: "ADddd"
|
||||
|
||||
|
||||
# United Arab Emirates (الإمارات العربية المتحدة)
|
||||
@@ -39,8 +38,7 @@ ai:
|
||||
languages: en
|
||||
names: !include country-names/ai.yaml
|
||||
postcode:
|
||||
pattern: "2640"
|
||||
output: AI-2640
|
||||
pattern: "AI-2640"
|
||||
|
||||
|
||||
# Albania (Shqipëria)
|
||||
@@ -965,8 +963,7 @@ ky:
|
||||
languages: en
|
||||
names: !include country-names/ky.yaml
|
||||
postcode:
|
||||
pattern: "(d)-(dddd)"
|
||||
output: KY\1-\2
|
||||
pattern: "KYd-dddd"
|
||||
|
||||
|
||||
# Kazakhstan (Қазақстан)
|
||||
@@ -1002,7 +999,7 @@ lc:
|
||||
languages: en
|
||||
names: !include country-names/lc.yaml
|
||||
postcode:
|
||||
pattern: "(dd) ?(ddd)"
|
||||
pattern: "LC(dd) ?(ddd)"
|
||||
output: LC\1 \2
|
||||
|
||||
|
||||
@@ -1066,8 +1063,7 @@ lv:
|
||||
languages: lv
|
||||
names: !include country-names/lv.yaml
|
||||
postcode:
|
||||
pattern: "(dddd)"
|
||||
output: LV-\1
|
||||
pattern: "LV-dddd"
|
||||
|
||||
|
||||
# Libya (ليبيا)
|
||||
@@ -1102,8 +1098,7 @@ md:
|
||||
languages: ro, ru, uk
|
||||
names: !include country-names/md.yaml
|
||||
postcode:
|
||||
pattern: "(dddd)"
|
||||
output: MD-\1
|
||||
pattern: "MD-dddd"
|
||||
|
||||
|
||||
# Montenegro (Crna Gora / Црна Гора)
|
||||
@@ -1845,8 +1840,7 @@ vc:
|
||||
languages: en
|
||||
names: !include country-names/vc.yaml
|
||||
postcode:
|
||||
pattern: "(dddd)"
|
||||
output: VC\1
|
||||
pattern: "VCdddd"
|
||||
|
||||
|
||||
# Venezuela (Venezuela)
|
||||
@@ -1864,8 +1858,7 @@ vg:
|
||||
languages: en
|
||||
names: !include country-names/vg.yaml
|
||||
postcode:
|
||||
pattern: "(dddd)"
|
||||
output: VG\1
|
||||
pattern: "VGdddd"
|
||||
|
||||
|
||||
# Vietnam (Việt Nam)
|
||||
|
||||
@@ -89,6 +89,143 @@ def test_postcode_sweden_fail(sanitize, postcode):
|
||||
assert sanitize(country='se', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('AD123', 'AD AD123'))
|
||||
def test_postcode_andorra_pass(sanitize, postcode):
|
||||
assert sanitize(country='ad', postcode=postcode) == [('postcode', 'AD123')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('123', 'AD 123', 'AD-123', 'AD1234'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_andorra_fail(sanitize, postcode):
|
||||
assert sanitize(country='ad', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('AI-2640', 'AI AI-2640'))
|
||||
def test_postcode_anguilla_pass(sanitize, postcode):
|
||||
assert sanitize(country='ai', postcode=postcode) == [('postcode', 'AI-2640')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('2640', 'AI 2640', 'AI-2000', 'AI US-2640'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_anguilla_fail(sanitize, postcode):
|
||||
assert sanitize(country='ai', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('BN1111', 'BN 1111', 'BN BN1111', 'BN BN 1111'))
|
||||
def test_postcode_brunei_pass(sanitize, postcode):
|
||||
assert sanitize(country='bn', postcode=postcode) == [('postcode', 'BN1111')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('BN-1111', 'BNN1111'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_brunei_fail(sanitize, postcode):
|
||||
assert sanitize(country='bn', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('IM1 1AA', 'IM11AA', 'IM IM11AA'))
|
||||
def test_postcode_isle_of_man_pass(sanitize, postcode):
|
||||
assert sanitize(country='im', postcode=postcode) == [('postcode', 'IM1 1AA')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('IZ1 1AA', 'IM1 AA'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_isle_of_man_fail(sanitize, postcode):
|
||||
assert sanitize(country='im', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('JE5 0LA', 'JE50LA', 'JE JE50LA', 'je JE5 0LA'))
|
||||
def test_postcode_jersey_pass(sanitize, postcode):
|
||||
assert sanitize(country='je', postcode=postcode) == [('postcode', 'JE5 0LA')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('gb JE5 0LA', 'IM50LA', 'IM5 012'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_jersey_fail(sanitize, postcode):
|
||||
assert sanitize(country='je', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('KY1-1234', 'KY KY1-1234'))
|
||||
def test_postcode_cayman_islands_pass(sanitize, postcode):
|
||||
assert sanitize(country='ky', postcode=postcode) == [('postcode', 'KY1-1234')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('1-1234', 'KY-1234', 'KZ1-1234', 'KY1 1234', 'KY 1-1234', 'KY1-123'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_cayman_islands_fail(sanitize, postcode):
|
||||
assert sanitize(country='ky', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('LC11 222', 'LC LC11 222', 'LC LC11 222'))
|
||||
def test_postcode_saint_lucia_pass(sanitize, postcode):
|
||||
assert sanitize(country='lc', postcode=postcode) == [('postcode', 'LC11 222')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('11 222', '11222', 'LC 11 222'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_saint_lucia_fail(sanitize, postcode):
|
||||
assert sanitize(country='lc', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('LV-1111', 'LV LV-1111'))
|
||||
def test_postcode_latvia_pass(sanitize, postcode):
|
||||
assert sanitize(country='lv', postcode=postcode) == [('postcode', 'LV-1111')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('1111', 'LV 1111', 'LV1111', 'LV LV 1111'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_latvia_fail(sanitize, postcode):
|
||||
assert sanitize(country='lv', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('MD-1111', 'MD MD-1111'))
|
||||
def test_postcode_moldova_pass(sanitize, postcode):
|
||||
assert sanitize(country='md', postcode=postcode) == [('postcode', 'MD-1111')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('1111', 'MD 1111', 'MD1111'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_moldova_fail(sanitize, postcode):
|
||||
assert sanitize(country='md', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('VLT 1117', 'GDJ 1234', 'BZN 2222'))
|
||||
def test_postcode_malta_pass(sanitize, postcode):
|
||||
assert sanitize(country='mt', postcode=postcode) == [('postcode', postcode)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('MTF 1111', 'MT MTF 1111', 'MTF1111', 'MT MTF1111'))
|
||||
def test_postcode_malta_mtarfa_pass(sanitize, postcode):
|
||||
assert sanitize(country='mt', postcode=postcode) == [('postcode', 'MTF 1111')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('1111', 'MTMT 1111'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_malta_fail(sanitize, postcode):
|
||||
assert sanitize(country='mt', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('VC1111', 'VC VC1111'))
|
||||
def test_postcode_saint_vincent_pass(sanitize, postcode):
|
||||
assert sanitize(country='vc', postcode=postcode) == [('postcode', 'VC1111')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('1111', 'VC-1111', 'VC 1111', 'VC11'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_saint_vincent_fail(sanitize, postcode):
|
||||
assert sanitize(country='vc', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('VG1111', 'VG VG1111'))
|
||||
def test_postcode_virgin_islands_pass(sanitize, postcode):
|
||||
assert sanitize(country='vg', postcode=postcode) == [('postcode', 'VG1111')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('1111', 'VG 1111', 'VG-1111'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_virgin_islands_fail(sanitize, postcode):
|
||||
assert sanitize(country='vg', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
|
||||
@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
|
||||
def test_postcode_default_pattern_pass(sanitize, postcode):
|
||||
@@ -99,4 +236,3 @@ def test_postcode_default_pattern_pass(sanitize, postcode):
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
|
||||
def test_postcode_default_pattern_fail(sanitize, postcode):
|
||||
assert sanitize(country='an', postcode=postcode) == []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user