forked from hans/Nominatim
Merge pull request #2360 from AntoJvlt/postcodes-place-table
Use place instead of placex to compute postcodes
This commit is contained in:
@@ -92,6 +92,14 @@ BEGIN
|
||||
-- Get the existing place_id
|
||||
select * from placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO existingplacex;
|
||||
|
||||
-- Pure postcodes are never queried from placex so we don't add them.
|
||||
-- location_postcodes is filled from the place table directly.
|
||||
IF NEW.class = 'place' AND NEW.type = 'postcode' THEN
|
||||
-- Remove old placex entry.
|
||||
DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id;
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
-- Handle a place changing type by removing the old data
|
||||
-- My generated 'place' types are causing havok because they overlap with real keys
|
||||
-- TODO: move them to their own special purpose key/class to avoid collisions
|
||||
@@ -201,7 +209,7 @@ BEGIN
|
||||
where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type;
|
||||
|
||||
|
||||
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
|
||||
IF NEW.class = 'boundary' AND NEW.type = 'postal_code' THEN
|
||||
IF NEW.address is NULL OR NOT NEW.address ? 'postcode' THEN
|
||||
-- postcode was deleted, no longer retain in placex
|
||||
DELETE FROM placex where place_id = existingplacex.place_id;
|
||||
|
||||
@@ -52,13 +52,17 @@ class UpdateRefresh:
|
||||
|
||||
|
||||
if args.postcodes:
|
||||
LOG.warning("Update postcodes centroid")
|
||||
tokenizer = self._get_tokenizer(args.config)
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or 1)
|
||||
indexer.index_postcodes()
|
||||
if postcodes.can_compute(args.config.get_libpq_dsn()):
|
||||
LOG.warning("Update postcodes centroid")
|
||||
tokenizer = self._get_tokenizer(args.config)
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or 1)
|
||||
indexer.index_postcodes()
|
||||
else:
|
||||
LOG.error("The place table doesn\'t exist. " \
|
||||
"Postcode updates on a frozen database is not possible.")
|
||||
|
||||
if args.word_counts:
|
||||
LOG.warning('Recompute frequency of full-word search terms')
|
||||
|
||||
@@ -199,7 +199,7 @@ def load_data(dsn, threads):
|
||||
conn.perform("""INSERT INTO placex ({0})
|
||||
SELECT {0} FROM place
|
||||
WHERE osm_id % {1} = {2}
|
||||
AND NOT (class='place' and type='houses')
|
||||
AND NOT (class='place' and (type='houses' or type='postcode'))
|
||||
AND ST_IsValid(geometry)
|
||||
""".format(_COPY_COLUMNS, place_threads, imod))
|
||||
sel.register(conn, selectors.EVENT_READ, conn)
|
||||
|
||||
@@ -163,17 +163,17 @@ def update_postcodes(dsn, project_dir, tokenizer):
|
||||
|
||||
# Recompute the list of valid postcodes from placex.
|
||||
with conn.cursor(name="placex_postcodes") as cur:
|
||||
cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
|
||||
FROM (
|
||||
SELECT country_code,
|
||||
token_normalized_postcode(address->'postcode') as pc,
|
||||
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
|
||||
FROM placex
|
||||
WHERE address ? 'postcode' and geometry IS NOT null
|
||||
and country_code is not null
|
||||
GROUP BY country_code, pc) xx
|
||||
WHERE pc is not null
|
||||
ORDER BY country_code, pc""")
|
||||
cur.execute("""
|
||||
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
|
||||
FROM (SELECT
|
||||
COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc,
|
||||
token_normalized_postcode(pl.address->'postcode') as pc,
|
||||
ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid
|
||||
FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
||||
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
|
||||
GROUP BY cc, pc) xx
|
||||
WHERE pc IS NOT null AND cc IS NOT null
|
||||
ORDER BY country_code, pc""")
|
||||
|
||||
collector = None
|
||||
|
||||
@@ -195,3 +195,11 @@ def update_postcodes(dsn, project_dir, tokenizer):
|
||||
conn.commit()
|
||||
|
||||
analyzer.update_postcodes_from_db()
|
||||
|
||||
def can_compute(dsn):
|
||||
"""
|
||||
Check that the place table exists so that
|
||||
postcodes can be computed.
|
||||
"""
|
||||
with connect(dsn) as conn:
|
||||
return conn.table_exists('place')
|
||||
|
||||
@@ -320,7 +320,7 @@ class TestCliWithDb:
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_refresh_postcodes(self, mock_func_factory):
|
||||
def test_refresh_postcodes(self, mock_func_factory, place_table):
|
||||
func_mock = mock_func_factory(nominatim.tools.postcodes, 'update_postcodes')
|
||||
idx_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_postcodes')
|
||||
|
||||
|
||||
@@ -26,6 +26,11 @@ class MockPostcodeTable:
|
||||
geometry GEOMETRY(Geometry, 4326))""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||
RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
|
||||
RETURNS TEXT AS $$ BEGIN
|
||||
RETURN null;
|
||||
END; $$ LANGUAGE plpgsql;
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
@@ -58,15 +63,16 @@ def postcode_table(temp_db_conn, placex_table, word_table):
|
||||
return MockPostcodeTable(temp_db_conn)
|
||||
|
||||
|
||||
def test_postcodes_empty(dsn, postcode_table, tmp_path, tokenizer):
|
||||
def test_postcodes_empty(dsn, postcode_table, place_table,
|
||||
tmp_path, tokenizer):
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
assert not postcode_table.row_set
|
||||
|
||||
|
||||
def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenizer):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='9486'))
|
||||
def test_postcodes_add_new(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='9486'))
|
||||
postcode_table.add('yy', '9486', 99, 34)
|
||||
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
@@ -74,10 +80,9 @@ def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenize
|
||||
assert postcode_table.row_set == {('xx', '9486', 10, 12), }
|
||||
|
||||
|
||||
def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table,
|
||||
tmp_path, tokenizer):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
def test_postcodes_replace_coordinates(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
postcode_table.add('xx', 'AB 4511', 99, 34)
|
||||
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
@@ -85,10 +90,9 @@ def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table,
|
||||
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
|
||||
|
||||
|
||||
def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table,
|
||||
tmp_path, tokenizer):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
def test_postcodes_replace_coordinates_close(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
postcode_table.add('xx', 'AB 4511', 10, 11.99999)
|
||||
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
@@ -96,9 +100,9 @@ def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table,
|
||||
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)}
|
||||
|
||||
|
||||
def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
def test_postcodes_remove(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
postcode_table.add('xx', 'badname', 10, 12)
|
||||
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
@@ -106,32 +110,27 @@ def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer
|
||||
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
|
||||
|
||||
|
||||
def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
|
||||
placex_table.add(country=None, geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
|
||||
def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
assert not postcode_table.row_set
|
||||
|
||||
|
||||
def test_postcodes_remove_all(dsn, postcode_table, tmp_path, tokenizer):
|
||||
def test_postcodes_remove_all(dsn, postcode_table, place_table,
|
||||
tmp_path, tokenizer):
|
||||
postcode_table.add('ch', '5613', 10, 12)
|
||||
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
assert not postcode_table.row_set
|
||||
|
||||
|
||||
def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
|
||||
placex_table.add(country='de', geom='POINT(10 12)',
|
||||
address=dict(postcode='54451'))
|
||||
placex_table.add(country='cc', geom='POINT(100 56)',
|
||||
address=dict(postcode='DD23 T'))
|
||||
placex_table.add(country='de', geom='POINT(10.3 11.0)',
|
||||
address=dict(postcode='54452'))
|
||||
placex_table.add(country='cc', geom='POINT(10.3 11.0)',
|
||||
address=dict(postcode='54452'))
|
||||
def test_postcodes_multi_country(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, 'de', 'POINT(10 12)', dict(postcode='54451'))
|
||||
insert_implicit_postcode(2, 'cc', 'POINT(100 56)', dict(postcode='DD23 T'))
|
||||
insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', dict(postcode='54452'))
|
||||
insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', dict(postcode='54452'))
|
||||
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
@@ -142,10 +141,9 @@ def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, to
|
||||
|
||||
|
||||
@pytest.mark.parametrize("gzipped", [True, False])
|
||||
def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
|
||||
tokenizer, gzipped):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
def test_postcodes_extern(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer, gzipped):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
|
||||
extfile = tmp_path / 'xx_postcodes.csv'
|
||||
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
|
||||
@@ -160,10 +158,9 @@ def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
|
||||
('xx', 'CD 4511', -10, -5)}
|
||||
|
||||
|
||||
def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
|
||||
tmp_path, tokenizer):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
def test_postcodes_extern_bad_column(dsn, postcode_table, tmp_path,
|
||||
insert_implicit_postcode, tokenizer):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
|
||||
extfile = tmp_path / 'xx_postcodes.csv'
|
||||
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
|
||||
@@ -173,10 +170,9 @@ def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
|
||||
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
|
||||
|
||||
|
||||
def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
|
||||
tmp_path, tokenizer):
|
||||
placex_table.add(country='xx', geom='POINT(10 12)',
|
||||
address=dict(postcode='AB 4511'))
|
||||
def test_postcodes_extern_bad_number(dsn, insert_implicit_postcode,
|
||||
postcode_table, tmp_path, tokenizer):
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
|
||||
|
||||
extfile = tmp_path / 'xx_postcodes.csv'
|
||||
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
|
||||
@@ -185,3 +181,33 @@ def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
|
||||
|
||||
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
|
||||
('xx', 'CD 4511', -10, -5)}
|
||||
|
||||
def test_can_compute(dsn, table_factory):
|
||||
assert not postcodes.can_compute(dsn)
|
||||
table_factory('place')
|
||||
assert postcodes.can_compute(dsn)
|
||||
|
||||
def test_no_placex_entry(dsn, tmp_path, temp_db_cursor, place_row, postcode_table, tokenizer):
|
||||
#Rewrite the get_country_code function to verify its execution.
|
||||
temp_db_cursor.execute("""
|
||||
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
|
||||
RETURNS TEXT AS $$ BEGIN
|
||||
RETURN 'fr';
|
||||
END; $$ LANGUAGE plpgsql;
|
||||
""")
|
||||
place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511'))
|
||||
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
assert postcode_table.row_set == {('fr', 'AB 4511', 10, 12)}
|
||||
|
||||
@pytest.fixture
|
||||
def insert_implicit_postcode(placex_table, place_row):
|
||||
"""
|
||||
Inserts data into the placex and place table
|
||||
which can then be used to compute one postcode.
|
||||
"""
|
||||
def _insert_implicit_postcode(osm_id, country, geometry, address):
|
||||
placex_table.add(osm_id=osm_id, country=country, geom=geometry)
|
||||
place_row(osm_id=osm_id, geom='SRID=4326;'+geometry, address=address)
|
||||
|
||||
return _insert_implicit_postcode
|
||||
|
||||
Reference in New Issue
Block a user