remove postcodes from countries that don't have them

The postcodes will only be removed as a 'computed postcode' they
are still searchable for the given object.
This commit is contained in:
Sarah Hoffmann
2022-05-12 11:43:47 +02:00
parent 21fb501699
commit 8080625747
6 changed files with 45 additions and 6 deletions

View File

@@ -13,4 +13,4 @@ ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict # 'too-many-ancestors' is triggered already by deriving from UserDict
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
good-names=i,x,y,fd,db good-names=i,x,y,fd,db,cc

View File

@@ -44,6 +44,20 @@ class SanitizerConfig(UserDict):
return values return values
def get_bool(self, param, default=None):
""" Extract a configuration parameter as a boolean.
The parameter must be one of the yaml boolean values or an
user error will be raised. If `default` is given, then the parameter
may also be missing or empty.
"""
value = self.data.get(param, default)
if not isinstance(value, bool):
raise UsageError(f"Parameter '{param}' must be a boolean value ('yes' or 'no'.")
return value
def get_delimiter(self, default=',;'): def get_delimiter(self, default=',;'):
""" Return the 'delimiter' parameter in the configuration as a """ Return the 'delimiter' parameter in the configuration as a
compiled regular expression that can be used to split the names on the compiled regular expression that can be used to split the names on the

View File

@@ -48,8 +48,7 @@ class _AnalyzerByLanguage:
self.deflangs = {} self.deflangs = {}
if use_defaults in ('mono', 'all'): if use_defaults in ('mono', 'all'):
for ccode, prop in country_info.iterate(): for ccode, clangs in country_info.iterate('languages'):
clangs = prop['languages']
if len(clangs) == 1 or use_defaults == 'all': if len(clangs) == 1 or use_defaults == 'all':
if self.whitelist: if self.whitelist:
self.deflangs[ccode] = [l for l in clangs if l in self.whitelist] self.deflangs[ccode] = [l for l in clangs if l in self.whitelist]

View File

@@ -84,11 +84,21 @@ def setup_country_config(config):
_COUNTRY_INFO.load(config) _COUNTRY_INFO.load(config)
def iterate(): def iterate(prop=None):
""" Iterate over country code and properties. """ Iterate over country code and properties.
When `prop` is None, all countries are returned with their complete
set of properties.
If `prop` is given, then only countries are returned where the
given property is set. The second item of the tuple contains only
the content of the given property.
""" """
if prop is None:
return _COUNTRY_INFO.items() return _COUNTRY_INFO.items()
return ((c, p[prop]) for c, p in _COUNTRY_INFO.items() if prop in p)
def setup_country_tables(dsn, sql_dir, ignore_partitions=False): def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides """ Create and populate the tables with basic static data that provides

View File

@@ -32,6 +32,8 @@ sanitizers:
- streetnumber - streetnumber
convert-to-name: convert-to-name:
- (\A|.*,)[^\d,]{3,}(,.*|\Z) - (\A|.*,)[^\d,]{3,}(,.*|\Z)
- step: clean-postcodes
convert-to-address: yes
- step: split-name-list - step: split-name-list
- step: strip-brace-terms - step: strip-brace-terms
- step: tag-analyzer-by-language - step: tag-analyzer-by-language

View File

@@ -246,4 +246,18 @@ Feature: Import of postcodes
| 12 445 4 | ca | 25 | 11 | | 12 445 4 | ca | 25 | 11 |
| A1:BC10 | ca | 25 | 11 | | A1:BC10 | ca | 25 | 11 |
Scenario: Postcodes outside all countries are not added to the postcode and word table
Given the places
| osm | class | type | addr+postcode | addr+housenumber | addr+place | geometry |
| N34 | place | house | 01982 | 111 | Null Island | 0 0.00001 |
And the places
| osm | class | type | name | geometry |
| N1 | place | hamlet | Null Island | 0 0 |
When importing
Then location_postcode contains exactly
| country | postcode | geometry |
And there are no word tokens for postcodes 01982
When sending search query "111, 01982 Null Island"
Then results contain
| osm | display_name |
| N34 | 111, Null Island, 01982 |