diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index 53163746..dd0f1662 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -92,6 +92,14 @@ BEGIN -- Get the existing place_id select * from placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO existingplacex; + -- Pure postcodes are never queried from placex so we don't add them. + -- location_postcodes is filled from the place table directly. + IF NEW.class = 'place' AND NEW.type = 'postcode' THEN + -- Remove old placex entry. + DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id; + RETURN NEW; + END IF; + -- Handle a place changing type by removing the old data -- My generated 'place' types are causing havok because they overlap with real keys -- TODO: move them to their own special purpose key/class to avoid collisions @@ -201,7 +209,7 @@ BEGIN where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type; - IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN + IF NEW.class = 'boundary' AND NEW.type = 'postal_code' THEN IF NEW.address is NULL OR NOT NEW.address ? 'postcode' THEN -- postcode was deleted, no longer retain in placex DELETE FROM placex where place_id = existingplacex.place_id; diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 386516d6..fbc23350 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -52,13 +52,17 @@ class UpdateRefresh: if args.postcodes: - LOG.warning("Update postcodes centroid") - tokenizer = self._get_tokenizer(args.config) - postcodes.update_postcodes(args.config.get_libpq_dsn(), - args.project_dir, tokenizer) - indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, - args.threads or 1) - indexer.index_postcodes() + if postcodes.can_compute(args.config.get_libpq_dsn()): + LOG.warning("Update postcodes centroid") + tokenizer = self._get_tokenizer(args.config) + postcodes.update_postcodes(args.config.get_libpq_dsn(), + args.project_dir, tokenizer) + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, + args.threads or 1) + indexer.index_postcodes() + else: + LOG.error("The place table doesn\'t exist. " \ + "Postcode updates on a frozen database is not possible.") if args.word_counts: LOG.warning('Recompute frequency of full-word search terms') diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 664d3c6b..28a10ebe 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -199,7 +199,7 @@ def load_data(dsn, threads): conn.perform("""INSERT INTO placex ({0}) SELECT {0} FROM place WHERE osm_id % {1} = {2} - AND NOT (class='place' and type='houses') + AND NOT (class='place' and (type='houses' or type='postcode')) AND ST_IsValid(geometry) """.format(_COPY_COLUMNS, place_threads, imod)) sel.register(conn, selectors.EVENT_READ, conn) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index 195d407e..cfd242e2 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -163,17 +163,17 @@ def update_postcodes(dsn, project_dir, tokenizer): # Recompute the list of valid postcodes from placex. with conn.cursor(name="placex_postcodes") as cur: - cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid) - FROM ( - SELECT country_code, - token_normalized_postcode(address->'postcode') as pc, - ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid - FROM placex - WHERE address ? 'postcode' and geometry IS NOT null - and country_code is not null - GROUP BY country_code, pc) xx - WHERE pc is not null - ORDER BY country_code, pc""") + cur.execute(""" + SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) + FROM (SELECT + COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, + token_normalized_postcode(pl.address->'postcode') as pc, + ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid + FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type + WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null + GROUP BY cc, pc) xx + WHERE pc IS NOT null AND cc IS NOT null + ORDER BY country_code, pc""") collector = None @@ -195,3 +195,11 @@ def update_postcodes(dsn, project_dir, tokenizer): conn.commit() analyzer.update_postcodes_from_db() + +def can_compute(dsn): + """ + Check that the place table exists so that + postcodes can be computed. + """ + with connect(dsn) as conn: + return conn.table_exists('place') diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 1d775b1f..d9e01040 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -320,7 +320,7 @@ class TestCliWithDb: assert func_mock.called == 1 - def test_refresh_postcodes(self, mock_func_factory): + def test_refresh_postcodes(self, mock_func_factory, place_table): func_mock = mock_func_factory(nominatim.tools.postcodes, 'update_postcodes') idx_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_postcodes') diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py index 8f54ad41..a3415769 100644 --- a/test/python/test_tools_postcodes.py +++ b/test/python/test_tools_postcodes.py @@ -26,6 +26,11 @@ class MockPostcodeTable: geometry GEOMETRY(Geometry, 4326))""") cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT) RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION get_country_code(place geometry) + RETURNS TEXT AS $$ BEGIN + RETURN null; + END; $$ LANGUAGE plpgsql; """) conn.commit() @@ -58,15 +63,16 @@ def postcode_table(temp_db_conn, placex_table, word_table): return MockPostcodeTable(temp_db_conn) -def test_postcodes_empty(dsn, postcode_table, tmp_path, tokenizer): +def test_postcodes_empty(dsn, postcode_table, place_table, + tmp_path, tokenizer): postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert not postcode_table.row_set -def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='9486')) +def test_postcodes_add_new(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='9486')) postcode_table.add('yy', '9486', 99, 34) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -74,10 +80,9 @@ def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenize assert postcode_table.row_set == {('xx', '9486', 10, 12), } -def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_replace_coordinates(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) postcode_table.add('xx', 'AB 4511', 99, 34) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -85,10 +90,9 @@ def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_replace_coordinates_close(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) postcode_table.add('xx', 'AB 4511', 10, 11.99999) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -96,9 +100,9 @@ def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)} -def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_remove(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) postcode_table.add('xx', 'badname', 10, 12) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -106,32 +110,27 @@ def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country=None, geom='POINT(10 12)', - address=dict(postcode='AB 4511')) - +def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) - assert not postcode_table.row_set -def test_postcodes_remove_all(dsn, postcode_table, tmp_path, tokenizer): +def test_postcodes_remove_all(dsn, postcode_table, place_table, + tmp_path, tokenizer): postcode_table.add('ch', '5613', 10, 12) - postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert not postcode_table.row_set -def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country='de', geom='POINT(10 12)', - address=dict(postcode='54451')) - placex_table.add(country='cc', geom='POINT(100 56)', - address=dict(postcode='DD23 T')) - placex_table.add(country='de', geom='POINT(10.3 11.0)', - address=dict(postcode='54452')) - placex_table.add(country='cc', geom='POINT(10.3 11.0)', - address=dict(postcode='54452')) +def test_postcodes_multi_country(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'de', 'POINT(10 12)', dict(postcode='54451')) + insert_implicit_postcode(2, 'cc', 'POINT(100 56)', dict(postcode='DD23 T')) + insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', dict(postcode='54452')) + insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', dict(postcode='54452')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -142,10 +141,9 @@ def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, to @pytest.mark.parametrize("gzipped", [True, False]) -def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, - tokenizer, gzipped): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_extern(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer, gzipped): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") @@ -160,10 +158,9 @@ def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, ('xx', 'CD 4511', -10, -5)} -def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_extern_bad_column(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") @@ -173,10 +170,9 @@ def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_extern_bad_number(dsn, insert_implicit_postcode, + postcode_table, tmp_path, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0") @@ -185,3 +181,33 @@ def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12), ('xx', 'CD 4511', -10, -5)} + +def test_can_compute(dsn, table_factory): + assert not postcodes.can_compute(dsn) + table_factory('place') + assert postcodes.can_compute(dsn) + +def test_no_placex_entry(dsn, tmp_path, temp_db_cursor, place_row, postcode_table, tokenizer): + #Rewrite the get_country_code function to verify its execution. + temp_db_cursor.execute(""" + CREATE OR REPLACE FUNCTION get_country_code(place geometry) + RETURNS TEXT AS $$ BEGIN + RETURN 'fr'; + END; $$ LANGUAGE plpgsql; + """) + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) + postcodes.update_postcodes(dsn, tmp_path, tokenizer) + + assert postcode_table.row_set == {('fr', 'AB 4511', 10, 12)} + +@pytest.fixture +def insert_implicit_postcode(placex_table, place_row): + """ + Inserts data into the placex and place table + which can then be used to compute one postcode. + """ + def _insert_implicit_postcode(osm_id, country, geometry, address): + placex_table.add(osm_id=osm_id, country=country, geom=geometry) + place_row(osm_id=osm_id, geom='SRID=4326;'+geometry, address=address) + + return _insert_implicit_postcode