mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 02:47:59 +00:00
initial postcode cleaner for simple patterns
Moves postcodes that are either in countries without a postcode system or don't correspond to the local pattern for postcodes into a field for a normal address part. Makes them searchable but not as a special address. This has two consequences: they are no longer a skippable part of the address and the postcodes cannot be searched on their own.
This commit is contained in:
54
test/python/tokenizer/sanitizers/test_clean_postcodes.py
Normal file
54
test/python/tokenizer/sanitizers/test_clean_postcodes.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that normalizes postcodes.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
from nominatim.tools import country_info
|
||||
|
||||
@pytest.fixture
|
||||
def sanitize(def_config, request):
|
||||
country_info.setup_country_config(def_config)
|
||||
sanitizer_args = {'step': 'clean-postcodes'}
|
||||
for mark in request.node.iter_markers(name="sanitizer_params"):
|
||||
sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
|
||||
|
||||
def _run(country=None, **kwargs):
|
||||
pi = {'address': kwargs}
|
||||
if country is not None:
|
||||
pi['country_code'] = country
|
||||
|
||||
_, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi))
|
||||
|
||||
return sorted([(p.kind, p.name) for p in address])
|
||||
|
||||
return _run
|
||||
|
||||
|
||||
@pytest.mark.parametrize("country", (None, 'ae'))
|
||||
def test_postcode_no_country(sanitize, country):
|
||||
assert sanitize(country=country, postcode='23231') == [('unofficial_postcode', '23231')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("country", (None, 'ae'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_no_country_drop(sanitize, country):
|
||||
assert sanitize(country=country, postcode='23231') == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('12345', ' 34009 '))
|
||||
def test_postcode_pass_good_format(sanitize, postcode):
|
||||
assert sanitize(country='de', postcode=postcode) == [('postcode', postcode.strip())]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('123456', '', ' ', '.....'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_drop_bad_format(sanitize, postcode):
|
||||
assert sanitize(country='de', postcode=postcode) == []
|
||||
Reference in New Issue
Block a user