ignore invalid coordinates in external postcodes

This commit is contained in:
Sarah Hoffmann
2021-05-13 12:19:20 +02:00
parent 8f2746fe24
commit f5977dac75
2 changed files with 40 additions and 1 deletions

View File

@@ -5,6 +5,7 @@ of artificial postcode centroids.
import csv
import gzip
import logging
from math import isfinite
from psycopg2.extras import execute_values
@@ -12,6 +13,17 @@ from nominatim.db.connection import connect
LOG = logging.getLogger()
def _to_float(num, max_value):
""" Convert the number in string into a float. The number is expected
to be in the range of [-max_value, max_value]. Otherwise rises a
ValueError.
"""
num = float(num)
if not isfinite(num) or num <= -max_value or num >= max_value:
raise ValueError()
return num
class _CountryPostcodesCollector:
""" Collector for postcodes of a single country.
"""
@@ -108,7 +120,8 @@ class _CountryPostcodesCollector:
postcode = analyzer.normalize_postcode(row['postcode'])
if postcode not in self.collected:
try:
self.collected[postcode] = (float(row['lon']), float(row['lat']))
self.collected[postcode] = (_to_float(row['lon'], 180),
_to_float(row['lat'], 90))
except ValueError:
LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
row['lat'], row['lon'], self.country)

View File

@@ -157,3 +157,29 @@ def test_import_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
('xx', 'CD 4511', -10, -5)}
def test_import_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
def test_import_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
('xx', 'CD 4511', -10, -5)}