Merge pull request #2360 from AntoJvlt/postcodes-place-table

Use place instead of placex to compute postcodes
This commit is contained in:
Sarah Hoffmann
2021-06-16 11:45:07 +02:00
committed by GitHub
6 changed files with 110 additions and 64 deletions

View File

@@ -92,6 +92,14 @@ BEGIN
-- Get the existing place_id
select * from placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO existingplacex;
-- Pure postcodes are never queried from placex so we don't add them.
-- location_postcodes is filled from the place table directly.
IF NEW.class = 'place' AND NEW.type = 'postcode' THEN
-- Remove old placex entry.
DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id;
RETURN NEW;
END IF;
-- Handle a place changing type by removing the old data
-- My generated 'place' types are causing havok because they overlap with real keys
-- TODO: move them to their own special purpose key/class to avoid collisions
@@ -201,7 +209,7 @@ BEGIN
where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type;
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
IF NEW.class = 'boundary' AND NEW.type = 'postal_code' THEN
IF NEW.address is NULL OR NOT NEW.address ? 'postcode' THEN
-- postcode was deleted, no longer retain in placex
DELETE FROM placex where place_id = existingplacex.place_id;

View File

@@ -52,13 +52,17 @@ class UpdateRefresh:
if args.postcodes:
LOG.warning("Update postcodes centroid")
tokenizer = self._get_tokenizer(args.config)
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_postcodes()
if postcodes.can_compute(args.config.get_libpq_dsn()):
LOG.warning("Update postcodes centroid")
tokenizer = self._get_tokenizer(args.config)
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_postcodes()
else:
LOG.error("The place table doesn\'t exist. " \
"Postcode updates on a frozen database is not possible.")
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')

View File

@@ -199,7 +199,7 @@ def load_data(dsn, threads):
conn.perform("""INSERT INTO placex ({0})
SELECT {0} FROM place
WHERE osm_id % {1} = {2}
AND NOT (class='place' and type='houses')
AND NOT (class='place' and (type='houses' or type='postcode'))
AND ST_IsValid(geometry)
""".format(_COPY_COLUMNS, place_threads, imod))
sel.register(conn, selectors.EVENT_READ, conn)

View File

@@ -163,17 +163,17 @@ def update_postcodes(dsn, project_dir, tokenizer):
# Recompute the list of valid postcodes from placex.
with conn.cursor(name="placex_postcodes") as cur:
cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
FROM (
SELECT country_code,
token_normalized_postcode(address->'postcode') as pc,
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
FROM placex
WHERE address ? 'postcode' and geometry IS NOT null
and country_code is not null
GROUP BY country_code, pc) xx
WHERE pc is not null
ORDER BY country_code, pc""")
cur.execute("""
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
FROM (SELECT
COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc,
token_normalized_postcode(pl.address->'postcode') as pc,
ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid
FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
GROUP BY cc, pc) xx
WHERE pc IS NOT null AND cc IS NOT null
ORDER BY country_code, pc""")
collector = None
@@ -195,3 +195,11 @@ def update_postcodes(dsn, project_dir, tokenizer):
conn.commit()
analyzer.update_postcodes_from_db()
def can_compute(dsn):
"""
Check that the place table exists so that
postcodes can be computed.
"""
with connect(dsn) as conn:
return conn.table_exists('place')

View File

@@ -320,7 +320,7 @@ class TestCliWithDb:
assert func_mock.called == 1
def test_refresh_postcodes(self, mock_func_factory):
def test_refresh_postcodes(self, mock_func_factory, place_table):
func_mock = mock_func_factory(nominatim.tools.postcodes, 'update_postcodes')
idx_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_postcodes')

View File

@@ -26,6 +26,11 @@ class MockPostcodeTable:
geometry GEOMETRY(Geometry, 4326))""")
cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
RETURNS TEXT AS $$ BEGIN
RETURN null;
END; $$ LANGUAGE plpgsql;
""")
conn.commit()
@@ -58,15 +63,16 @@ def postcode_table(temp_db_conn, placex_table, word_table):
return MockPostcodeTable(temp_db_conn)
def test_postcodes_empty(dsn, postcode_table, tmp_path, tokenizer):
def test_postcodes_empty(dsn, postcode_table, place_table,
tmp_path, tokenizer):
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
assert not postcode_table.row_set
def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='9486'))
def test_postcodes_add_new(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='9486'))
postcode_table.add('yy', '9486', 99, 34)
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
@@ -74,10 +80,9 @@ def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenize
assert postcode_table.row_set == {('xx', '9486', 10, 12), }
def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table,
tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_replace_coordinates(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
postcode_table.add('xx', 'AB 4511', 99, 34)
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
@@ -85,10 +90,9 @@ def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table,
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table,
tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_replace_coordinates_close(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
postcode_table.add('xx', 'AB 4511', 10, 11.99999)
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
@@ -96,9 +100,9 @@ def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table,
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)}
def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_remove(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
postcode_table.add('xx', 'badname', 10, 12)
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
@@ -106,32 +110,27 @@ def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
placex_table.add(country=None, geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511'))
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
assert not postcode_table.row_set
def test_postcodes_remove_all(dsn, postcode_table, tmp_path, tokenizer):
def test_postcodes_remove_all(dsn, postcode_table, place_table,
tmp_path, tokenizer):
postcode_table.add('ch', '5613', 10, 12)
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
assert not postcode_table.row_set
def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
placex_table.add(country='de', geom='POINT(10 12)',
address=dict(postcode='54451'))
placex_table.add(country='cc', geom='POINT(100 56)',
address=dict(postcode='DD23 T'))
placex_table.add(country='de', geom='POINT(10.3 11.0)',
address=dict(postcode='54452'))
placex_table.add(country='cc', geom='POINT(10.3 11.0)',
address=dict(postcode='54452'))
def test_postcodes_multi_country(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, 'de', 'POINT(10 12)', dict(postcode='54451'))
insert_implicit_postcode(2, 'cc', 'POINT(100 56)', dict(postcode='DD23 T'))
insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', dict(postcode='54452'))
insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', dict(postcode='54452'))
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
@@ -142,10 +141,9 @@ def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, to
@pytest.mark.parametrize("gzipped", [True, False])
def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
tokenizer, gzipped):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_extern(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer, gzipped):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
@@ -160,10 +158,9 @@ def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
('xx', 'CD 4511', -10, -5)}
def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_extern_bad_column(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
@@ -173,10 +170,9 @@ def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
tmp_path, tokenizer):
placex_table.add(country='xx', geom='POINT(10 12)',
address=dict(postcode='AB 4511'))
def test_postcodes_extern_bad_number(dsn, insert_implicit_postcode,
postcode_table, tmp_path, tokenizer):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
@@ -185,3 +181,33 @@ def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
('xx', 'CD 4511', -10, -5)}
def test_can_compute(dsn, table_factory):
assert not postcodes.can_compute(dsn)
table_factory('place')
assert postcodes.can_compute(dsn)
def test_no_placex_entry(dsn, tmp_path, temp_db_cursor, place_row, postcode_table, tokenizer):
#Rewrite the get_country_code function to verify its execution.
temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
RETURNS TEXT AS $$ BEGIN
RETURN 'fr';
END; $$ LANGUAGE plpgsql;
""")
place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511'))
postcodes.update_postcodes(dsn, tmp_path, tokenizer)
assert postcode_table.row_set == {('fr', 'AB 4511', 10, 12)}
@pytest.fixture
def insert_implicit_postcode(placex_table, place_row):
"""
Inserts data into the placex and place table
which can then be used to compute one postcode.
"""
def _insert_implicit_postcode(osm_id, country, geometry, address):
placex_table.add(osm_id=osm_id, country=country, geom=geometry)
place_row(osm_id=osm_id, geom='SRID=4326;'+geometry, address=address)
return _insert_implicit_postcode