From a4733eed90b2ee5e7b0aac2da3f8e71cffd340a6 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Fri, 4 Jun 2021 21:26:13 +0200 Subject: [PATCH 1/7] Use place instead of placex to compute postcodes --- lib-sql/functions/place_triggers.sql | 6 +++++ nominatim/clicmd/refresh.py | 18 ++++++++------ nominatim/tools/database_import.py | 2 +- nominatim/tools/postcodes.py | 36 +++++++++++++++++++--------- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index 53163746..c19b2274 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -77,6 +77,12 @@ BEGIN ELSE -- insert to placex + -- Pure postcodes are never queried from placex so we don't add them. + -- location_postcodes is filled from the place table directly. + IF NEW.class = 'place' AND NEW.type = 'postcode' THEN + RETURN NEW; + END IF; + -- Patch in additional country names IF NEW.admin_level = 2 AND NEW.type = 'administrative' AND NEW.address is not NULL AND NEW.address ? 'country' THEN diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 386516d6..c7142c5f 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -52,13 +52,17 @@ class UpdateRefresh: if args.postcodes: - LOG.warning("Update postcodes centroid") - tokenizer = self._get_tokenizer(args.config) - postcodes.update_postcodes(args.config.get_libpq_dsn(), - args.project_dir, tokenizer) - indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, - args.threads or 1) - indexer.index_postcodes() + if postcodes.can_compute(args.config.get_libpq_dsn()): + LOG.warning("Update postcodes centroid") + tokenizer = self._get_tokenizer(args.config) + postcodes.update_postcodes(args.config.get_libpq_dsn(), + args.project_dir, tokenizer) + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, + args.threads or 1) + indexer.index_postcodes() + else: + LOG.error("The place table doesn\'t exists. " \ + "Postcode updates on a frozen database is not possible.") if args.word_counts: LOG.warning('Recompute frequency of full-word search terms') diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 664d3c6b..28a10ebe 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -199,7 +199,7 @@ def load_data(dsn, threads): conn.perform("""INSERT INTO placex ({0}) SELECT {0} FROM place WHERE osm_id % {1} = {2} - AND NOT (class='place' and type='houses') + AND NOT (class='place' and (type='houses' or type='postcode')) AND ST_IsValid(geometry) """.format(_COPY_COLUMNS, place_threads, imod)) sel.register(conn, selectors.EVENT_READ, conn) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index 195d407e..fd355079 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -163,17 +163,19 @@ def update_postcodes(dsn, project_dir, tokenizer): # Recompute the list of valid postcodes from placex. with conn.cursor(name="placex_postcodes") as cur: - cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid) - FROM ( - SELECT country_code, - token_normalized_postcode(address->'postcode') as pc, - ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid - FROM placex - WHERE address ? 'postcode' and geometry IS NOT null - and country_code is not null - GROUP BY country_code, pc) xx - WHERE pc is not null - ORDER BY country_code, pc""") + cur.execute(""" + SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) + FROM ( + SELECT + COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, + token_normalized_postcode(pl.address->'postcode') as pc, + ST_Centroid(ST_Collect(ST_Centroid(pl.geometry))) as centroid + FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type + WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null + GROUP BY cc, pc + ) xx + WHERE pc IS NOT null AND cc IS NOT null + ORDER BY country_code, pc""") collector = None @@ -195,3 +197,15 @@ def update_postcodes(dsn, project_dir, tokenizer): conn.commit() analyzer.update_postcodes_from_db() + +def can_compute(dsn): + """ + Check that the place table exists so that + postcodes can be computed. + """ + with connect(dsn) as conn: + with conn.cursor() as cur: + cur.execute(""" + select exists(select 1 from information_schema.tables where table_name='place') + """) + return cur.fetchone()[0] From e879814e433d336f056980afd76bac980f1430da Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Mon, 7 Jun 2021 15:02:53 +0200 Subject: [PATCH 2/7] Update tests for postcodes --- test/python/test_cli.py | 2 +- test/python/test_tools_postcodes.py | 93 +++++++++++++++++------------ 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 1d775b1f..d9e01040 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -320,7 +320,7 @@ class TestCliWithDb: assert func_mock.called == 1 - def test_refresh_postcodes(self, mock_func_factory): + def test_refresh_postcodes(self, mock_func_factory, place_table): func_mock = mock_func_factory(nominatim.tools.postcodes, 'update_postcodes') idx_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_postcodes') diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py index 8f54ad41..d5c8ff74 100644 --- a/test/python/test_tools_postcodes.py +++ b/test/python/test_tools_postcodes.py @@ -26,6 +26,11 @@ class MockPostcodeTable: geometry GEOMETRY(Geometry, 4326))""") cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT) RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION get_country_code(place geometry) + RETURNS TEXT AS $$ BEGIN + RETURN (SELECT country_code FROM placex WHERE geometry = place LIMIT 1); + END; $$ LANGUAGE plpgsql; """) conn.commit() @@ -58,15 +63,17 @@ def postcode_table(temp_db_conn, placex_table, word_table): return MockPostcodeTable(temp_db_conn) -def test_postcodes_empty(dsn, postcode_table, tmp_path, tokenizer): +def test_postcodes_empty(dsn, postcode_table, place_table, + tmp_path, tokenizer): postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert not postcode_table.row_set -def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='9486')) +def test_postcodes_add_new(dsn, postcode_table, placex_table, place_row, + tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='9486')) postcode_table.add('yy', '9486', 99, 34) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -75,9 +82,9 @@ def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenize def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) + place_row, tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) postcode_table.add('xx', 'AB 4511', 99, 34) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -86,9 +93,9 @@ def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table, def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) + place_row, tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) postcode_table.add('xx', 'AB 4511', 10, 11.99999) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -96,9 +103,10 @@ def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)} -def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) +def test_postcodes_remove(dsn, placex_table, postcode_table, + place_row, tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) postcode_table.add('xx', 'badname', 10, 12) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -106,46 +114,50 @@ def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country=None, geom='POINT(10 12)', - address=dict(postcode='AB 4511')) - +def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, + place_row, tmp_path, tokenizer): + placex_table.add(country=None, geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert not postcode_table.row_set -def test_postcodes_remove_all(dsn, postcode_table, tmp_path, tokenizer): +def test_postcodes_remove_all(dsn, postcode_table, place_table, + tmp_path, tokenizer): postcode_table.add('ch', '5613', 10, 12) - postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert not postcode_table.row_set -def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, tokenizer): - placex_table.add(country='de', geom='POINT(10 12)', - address=dict(postcode='54451')) - placex_table.add(country='cc', geom='POINT(100 56)', - address=dict(postcode='DD23 T')) - placex_table.add(country='de', geom='POINT(10.3 11.0)', - address=dict(postcode='54452')) - placex_table.add(country='cc', geom='POINT(10.3 11.0)', - address=dict(postcode='54452')) +def test_postcodes_multi_country(dsn, placex_table, postcode_table, + place_row, tmp_path, tokenizer): + placex_table.add(country='de', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='54451')) + + placex_table.add(country='cc', geom='POINT(100 56)') + place_row(geom='SRID=4326;POINT(100 56)', address=dict(postcode='DD23 T')) + + placex_table.add(country='de', geom='POINT(10.3 11.0)') + place_row(geom='SRID=4326;POINT(10.3 11.0)', address=dict(postcode='54452')) + + placex_table.add(country='cc', geom='POINT(10.3 10.0)') + place_row(geom='SRID=4326;POINT(10.3 10.0)', address=dict(postcode='54452')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert postcode_table.row_set == {('de', '54451', 10, 12), ('de', '54452', 10.3, 11.0), - ('cc', '54452', 10.3, 11.0), + ('cc', '54452', 10.3, 10.0), ('cc', 'DD23 T', 100, 56)} @pytest.mark.parametrize("gzipped", [True, False]) def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, - tokenizer, gzipped): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) + place_row, tokenizer, gzipped): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") @@ -161,9 +173,9 @@ def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) + place_row, tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") @@ -174,9 +186,9 @@ def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table, def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)', - address=dict(postcode='AB 4511')) + place_row, tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)') + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0") @@ -185,3 +197,8 @@ def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12), ('xx', 'CD 4511', -10, -5)} + +def test_can_compute(dsn, temp_db_cursor): + assert not postcodes.can_compute(dsn) + temp_db_cursor.execute('CREATE TABLE place()') + assert postcodes.can_compute(dsn) From 47fb7cd3a8c24b232288a31c4ca89e4bc187b01f Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Tue, 8 Jun 2021 09:33:10 +0200 Subject: [PATCH 3/7] Use place_exists() into can_compute() for postcodes --- nominatim/tools/postcodes.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index fd355079..3ab59d59 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -9,7 +9,7 @@ from math import isfinite from psycopg2.extras import execute_values -from nominatim.db.connection import connect +from nominatim.db.connection import _Connection, connect LOG = logging.getLogger() @@ -203,9 +203,5 @@ def can_compute(dsn): Check that the place table exists so that postcodes can be computed. """ - with connect(dsn) as conn: - with conn.cursor() as cur: - cur.execute(""" - select exists(select 1 from information_schema.tables where table_name='place') - """) - return cur.fetchone()[0] + with _Connection(dsn) as conn: + return conn.table_exists('place') From 1c175e3a67fbfb94f29c47bdcd02fb28136e591b Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Tue, 8 Jun 2021 22:39:04 +0200 Subject: [PATCH 4/7] Clean and update tests for postcodes --- nominatim/tools/postcodes.py | 6 +- test/python/test_tools_postcodes.py | 110 +++++++++++++++------------- 2 files changed, 63 insertions(+), 53 deletions(-) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index 3ab59d59..2be3ced6 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -9,7 +9,7 @@ from math import isfinite from psycopg2.extras import execute_values -from nominatim.db.connection import _Connection, connect +from nominatim.db.connection import connect LOG = logging.getLogger() @@ -169,7 +169,7 @@ def update_postcodes(dsn, project_dir, tokenizer): SELECT COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, token_normalized_postcode(pl.address->'postcode') as pc, - ST_Centroid(ST_Collect(ST_Centroid(pl.geometry))) as centroid + COALESCE(ST_Centroid(ST_Collect(plx.centroid)), ST_Centroid(ST_Collect(ST_Centroid(pl.geometry)))) as centroid FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null GROUP BY cc, pc @@ -203,5 +203,5 @@ def can_compute(dsn): Check that the place table exists so that postcodes can be computed. """ - with _Connection(dsn) as conn: + with connect(dsn) as conn: return conn.table_exists('place') diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py index d5c8ff74..7d98c452 100644 --- a/test/python/test_tools_postcodes.py +++ b/test/python/test_tools_postcodes.py @@ -29,7 +29,7 @@ class MockPostcodeTable: CREATE OR REPLACE FUNCTION get_country_code(place geometry) RETURNS TEXT AS $$ BEGIN - RETURN (SELECT country_code FROM placex WHERE geometry = place LIMIT 1); + RETURN null; END; $$ LANGUAGE plpgsql; """) conn.commit() @@ -70,10 +70,9 @@ def test_postcodes_empty(dsn, postcode_table, place_table, assert not postcode_table.row_set -def test_postcodes_add_new(dsn, postcode_table, placex_table, place_row, - tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='9486')) +def test_postcodes_add_new(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='9486')) postcode_table.add('yy', '9486', 99, 34) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -81,10 +80,9 @@ def test_postcodes_add_new(dsn, postcode_table, placex_table, place_row, assert postcode_table.row_set == {('xx', '9486', 10, 12), } -def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_replace_coordinates(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) postcode_table.add('xx', 'AB 4511', 99, 34) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -92,10 +90,9 @@ def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_replace_coordinates_close(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) postcode_table.add('xx', 'AB 4511', 10, 11.99999) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -103,10 +100,9 @@ def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)} -def test_postcodes_remove(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_remove(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) postcode_table.add('xx', 'badname', 10, 12) postcodes.update_postcodes(dsn, tmp_path, tokenizer) @@ -114,12 +110,11 @@ def test_postcodes_remove(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country=None, geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) - + print(postcode_table.row_set) assert not postcode_table.row_set @@ -131,33 +126,25 @@ def test_postcodes_remove_all(dsn, postcode_table, place_table, assert not postcode_table.row_set -def test_postcodes_multi_country(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country='de', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='54451')) - - placex_table.add(country='cc', geom='POINT(100 56)') - place_row(geom='SRID=4326;POINT(100 56)', address=dict(postcode='DD23 T')) - - placex_table.add(country='de', geom='POINT(10.3 11.0)') - place_row(geom='SRID=4326;POINT(10.3 11.0)', address=dict(postcode='54452')) - - placex_table.add(country='cc', geom='POINT(10.3 10.0)') - place_row(geom='SRID=4326;POINT(10.3 10.0)', address=dict(postcode='54452')) +def test_postcodes_multi_country(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'de', 'POINT(10 12)', dict(postcode='54451')) + insert_implicit_postcode(2, 'cc', 'POINT(100 56)', dict(postcode='DD23 T')) + insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', dict(postcode='54452')) + insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', dict(postcode='54452')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) assert postcode_table.row_set == {('de', '54451', 10, 12), ('de', '54452', 10.3, 11.0), - ('cc', '54452', 10.3, 10.0), + ('cc', '54452', 10.3, 11.0), ('cc', 'DD23 T', 100, 56)} @pytest.mark.parametrize("gzipped", [True, False]) -def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, - place_row, tokenizer, gzipped): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_extern(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer, gzipped): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") @@ -172,10 +159,9 @@ def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, ('xx', 'CD 4511', -10, -5)} -def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_extern_bad_column(dsn, postcode_table, tmp_path, + insert_implicit_postcode, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") @@ -185,10 +171,9 @@ def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} -def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table, - place_row, tmp_path, tokenizer): - placex_table.add(country='xx', geom='POINT(10 12)') - place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) +def test_postcodes_extern_bad_number(dsn, insert_implicit_postcode, + postcode_table, tmp_path, tokenizer): + insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511')) extfile = tmp_path / 'xx_postcodes.csv' extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0") @@ -198,7 +183,32 @@ def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12), ('xx', 'CD 4511', -10, -5)} -def test_can_compute(dsn, temp_db_cursor): +def test_can_compute(dsn, table_factory): assert not postcodes.can_compute(dsn) - temp_db_cursor.execute('CREATE TABLE place()') + table_factory('place') assert postcodes.can_compute(dsn) + +def test_no_placex_entry(dsn, tmp_path, temp_db_cursor, place_row, postcode_table, tokenizer): + #Rewrite the get_country_code function to verify its execution. + temp_db_cursor.execute(""" + CREATE OR REPLACE FUNCTION get_country_code(place geometry) + RETURNS TEXT AS $$ BEGIN + RETURN 'fr'; + END; $$ LANGUAGE plpgsql; + """) + place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511')) + postcodes.update_postcodes(dsn, tmp_path, tokenizer) + + assert postcode_table.row_set == {('fr', 'AB 4511', 10, 12)} + +@pytest.fixture +def insert_implicit_postcode(placex_table, place_row): + """ + Inserts data into the placex and place table + which can then be used to compute one postcode. + """ + def _insert_implicit_postcode(osm_id, country, geometry, address): + placex_table.add(osm_id=osm_id, country=country, geom=geometry) + place_row(osm_id=osm_id, geom='SRID=4326;'+geometry, address=address) + + return _insert_implicit_postcode From 9e07a197e982a111d33dd190f422b5b923f4197a Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Wed, 9 Jun 2021 09:24:25 +0200 Subject: [PATCH 5/7] Handle postcode type change in place insert trigger --- lib-sql/functions/place_triggers.sql | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index c19b2274..43bae856 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -77,12 +77,6 @@ BEGIN ELSE -- insert to placex - -- Pure postcodes are never queried from placex so we don't add them. - -- location_postcodes is filled from the place table directly. - IF NEW.class = 'place' AND NEW.type = 'postcode' THEN - RETURN NEW; - END IF; - -- Patch in additional country names IF NEW.admin_level = 2 AND NEW.type = 'administrative' AND NEW.address is not NULL AND NEW.address ? 'country' THEN @@ -98,6 +92,16 @@ BEGIN -- Get the existing place_id select * from placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO existingplacex; + -- Pure postcodes are never queried from placex so we don't add them. + -- location_postcodes is filled from the place table directly. + IF NEW.class = 'place' AND NEW.type = 'postcode' THEN + -- Remove old placex entry if the type changed to postcode. + IF existingplacex.type IS NOT NULL AND existingplacex.type != 'postcode' THEN + DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id; + END IF; + RETURN NEW; + END IF; + -- Handle a place changing type by removing the old data -- My generated 'place' types are causing havok because they overlap with real keys -- TODO: move them to their own special purpose key/class to avoid collisions @@ -207,7 +211,7 @@ BEGIN where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type; - IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN + IF NEW.class = 'boundary' AND NEW.type = 'postal_code' THEN IF NEW.address is NULL OR NOT NEW.address ? 'postcode' THEN -- postcode was deleted, no longer retain in placex DELETE FROM placex where place_id = existingplacex.place_id; From ddf866c4c77256f62fdfd2996ccaf7a6cdcb3074 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Sat, 12 Jun 2021 15:35:51 +0200 Subject: [PATCH 6/7] Always delete old placex entry for type=postcode when inserting a new one into the place table --- lib-sql/functions/place_triggers.sql | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index 43bae856..dd0f1662 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -95,10 +95,8 @@ BEGIN -- Pure postcodes are never queried from placex so we don't add them. -- location_postcodes is filled from the place table directly. IF NEW.class = 'place' AND NEW.type = 'postcode' THEN - -- Remove old placex entry if the type changed to postcode. - IF existingplacex.type IS NOT NULL AND existingplacex.type != 'postcode' THEN - DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id; - END IF; + -- Remove old placex entry. + DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id; RETURN NEW; END IF; From 3676310efea43835142118d821603d9af923f7df Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Sat, 12 Jun 2021 15:46:08 +0200 Subject: [PATCH 7/7] Improved performance of the postcodes query and some code cleaning --- nominatim/clicmd/refresh.py | 2 +- nominatim/tools/postcodes.py | 10 ++++------ test/python/test_tools_postcodes.py | 1 - 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index c7142c5f..fbc23350 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -61,7 +61,7 @@ class UpdateRefresh: args.threads or 1) indexer.index_postcodes() else: - LOG.error("The place table doesn\'t exists. " \ + LOG.error("The place table doesn\'t exist. " \ "Postcode updates on a frozen database is not possible.") if args.word_counts: diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index 2be3ced6..cfd242e2 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -165,15 +165,13 @@ def update_postcodes(dsn, project_dir, tokenizer): with conn.cursor(name="placex_postcodes") as cur: cur.execute(""" SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) - FROM ( - SELECT + FROM (SELECT COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, token_normalized_postcode(pl.address->'postcode') as pc, - COALESCE(ST_Centroid(ST_Collect(plx.centroid)), ST_Centroid(ST_Collect(ST_Centroid(pl.geometry)))) as centroid - FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type + ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid + FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null - GROUP BY cc, pc - ) xx + GROUP BY cc, pc) xx WHERE pc IS NOT null AND cc IS NOT null ORDER BY country_code, pc""") diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py index 7d98c452..a3415769 100644 --- a/test/python/test_tools_postcodes.py +++ b/test/python/test_tools_postcodes.py @@ -114,7 +114,6 @@ def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path, insert_implicit_postcode, tokenizer): insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511')) postcodes.update_postcodes(dsn, tmp_path, tokenizer) - print(postcode_table.row_set) assert not postcode_table.row_set