postcodes: introduce a default pattern for countries without postcodes

This commit is contained in:
Sarah Hoffmann
2022-05-24 18:25:37 +02:00
parent 5ba75df507
commit 18864afa8a
3 changed files with 28 additions and 7 deletions

View File

@@ -75,6 +75,12 @@ class _PostcodeSanitizer:
else: else:
raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'") raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
default_pattern = config.get('default-pattern')
if default_pattern is not None and isinstance(default_pattern, str):
self.default_matcher = _PostcodeMatcher('', {'pattern': default_pattern})
else:
self.default_matcher = None
def __call__(self, obj): def __call__(self, obj):
if not obj.address: if not obj.address:
@@ -103,14 +109,16 @@ class _PostcodeSanitizer:
if country in self.country_without_postcode: if country in self.country_without_postcode:
return None return None
matcher = self.country_matcher.get(country) matcher = self.country_matcher.get(country, self.default_matcher)
if matcher is not None: if matcher is None:
match = matcher.match(postcode) return postcode.upper(), ''
if match is None:
return None match = matcher.match(postcode)
return matcher.normalize(match), ' '.join(match.groups()) if match is None:
return None
return matcher.normalize(match), ' '.join(match.groups())
return postcode.upper(), ''

View File

@@ -34,6 +34,7 @@ sanitizers:
- (\A|.*,)[^\d,]{3,}(,.*|\Z) - (\A|.*,)[^\d,]{3,}(,.*|\Z)
- step: clean-postcodes - step: clean-postcodes
convert-to-address: yes convert-to-address: yes
default-pattern: [A-Z0-9- ]{3,12}
- step: split-name-list - step: split-name-list
- step: strip-brace-terms - step: strip-brace-terms
- step: tag-analyzer-by-language - step: tag-analyzer-by-language

View File

@@ -88,3 +88,15 @@ def test_postcode_sweden_pass(sanitize, postcode):
def test_postcode_sweden_fail(sanitize, postcode): def test_postcode_sweden_fail(sanitize, postcode):
assert sanitize(country='se', postcode=postcode) == [] assert sanitize(country='se', postcode=postcode) == []
@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_pass(sanitize, postcode):
assert sanitize(country='an', postcode=postcode) == [('postcode', postcode.upper())]
@pytest.mark.parametrize("postcode", ('C', '12', 'ABC123DEF 456', '1234,5678', '11223;11224'))
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_fail(sanitize, postcode):
assert sanitize(country='an', postcode=postcode) == []