From 89821d01e09279bc3aacb2de2da69c077c432330 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 23 Dec 2025 12:13:19 +0100 Subject: [PATCH] reorganise layout of location_postcode table Also renames the table as this will make it easier to migrate. --- docs/develop/Database-Layout.md | 2 +- docs/develop/search-tables.plantuml | 7 ++-- lib-sql/functions.sql | 4 +-- lib-sql/functions/placex_triggers.sql | 2 +- lib-sql/functions/utils.sql | 37 ++++++++++++++-------- lib-sql/indices.sql | 7 ++-- lib-sql/table-triggers.sql | 4 +-- lib-sql/tables.sql | 26 +++++++++------ src/nominatim_api/sql/sqlalchemy_schema.py | 7 ++-- src/nominatim_db/indexer/indexer.py | 4 +-- src/nominatim_db/indexer/runners.py | 12 +++---- src/nominatim_db/tokenizer/base.py | 2 +- src/nominatim_db/tools/check_database.py | 4 +-- src/nominatim_db/tools/database_import.py | 4 +-- 14 files changed, 69 insertions(+), 53 deletions(-) diff --git a/docs/develop/Database-Layout.md b/docs/develop/Database-Layout.md index cca10896..ad2cdf0b 100644 --- a/docs/develop/Database-Layout.md +++ b/docs/develop/Database-Layout.md @@ -79,7 +79,7 @@ the placex table. Only three columns are special: Address interpolations are always ways in OSM, which is why there is no column `osm_type`. -The **location_postcode** table holds computed centroids of all postcodes that +The **location_postcodes** table holds computed centroids of all postcodes that can be found in the OSM data. The meaning of the columns is again the same as that of the placex table. diff --git a/docs/develop/search-tables.plantuml b/docs/develop/search-tables.plantuml index 4b8121c6..a452d143 100644 --- a/docs/develop/search-tables.plantuml +++ b/docs/develop/search-tables.plantuml @@ -74,15 +74,16 @@ map place_addressline { isaddress => BOOLEAN } -map location_postcode { +map location_postcodes { place_id => BIGINT + osm_id => BIGINT postcode => TEXT parent_place_id => BIGINT rank_search => SMALLINT - rank_address => SMALLINT indexed_status => SMALLINT indexed_date => TIMESTAMP geometry => GEOMETRY + centroid -> GEOMETRY } placex::place_id <-- search_name::place_id @@ -94,6 +95,6 @@ search_name::nameaddress_vector --> word::word_id place_addressline -[hidden]> location_property_osmline search_name -[hidden]> place_addressline -location_property_osmline -[hidden]-> location_postcode +location_property_osmline -[hidden]-> location_postcodes @enduml diff --git a/lib-sql/functions.sql b/lib-sql/functions.sql index 737a3f21..711ee71c 100644 --- a/lib-sql/functions.sql +++ b/lib-sql/functions.sql @@ -2,7 +2,7 @@ -- -- This file is part of Nominatim. (https://nominatim.org) -- --- Copyright (C) 2022 by the Nominatim developer community. +-- Copyright (C) 2025 by the Nominatim developer community. -- For a full list of authors see the git log. {% include('functions/utils.sql') %} @@ -18,7 +18,7 @@ {% include 'functions/placex_triggers.sql' %} {% endif %} -{% if 'location_postcode' in db.tables %} +{% if 'location_postcodes' in db.tables %} {% include 'functions/postcode_triggers.sql' %} {% endif %} diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 5ac5fdca..b8f40a57 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -1399,7 +1399,7 @@ BEGIN {% if debug %}RAISE WARNING 'placex_delete:12 % %',OLD.osm_type,OLD.osm_id;{% endif %} - UPDATE location_postcode SET indexed_status = 2 WHERE parent_place_id = OLD.place_id; + UPDATE location_postcodes SET indexed_status = 2 WHERE parent_place_id = OLD.place_id; RETURN OLD; diff --git a/lib-sql/functions/utils.sql b/lib-sql/functions/utils.sql index eeee4b0e..9a6228e4 100644 --- a/lib-sql/functions/utils.sql +++ b/lib-sql/functions/utils.sql @@ -139,37 +139,46 @@ $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; --- Find the nearest artificial postcode for the given geometry. --- TODO For areas there should not be more than two inside the geometry. +-- Find the best-matching postcode for the given geometry CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT AS $$ DECLARE outcode TEXT; cnt INTEGER; + location RECORD; BEGIN -- If the geometry is an area then only one postcode must be within -- that area, otherwise consider the area as not having a postcode. IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN - SELECT min(postcode), count(*) FROM - (SELECT postcode FROM location_postcode - WHERE ST_Contains(geom, location_postcode.geometry) LIMIT 2) sub - INTO outcode, cnt; + SELECT min(postcode), count(*) FROM + (SELECT postcode FROM location_postcodes + WHERE geom && location_postcodes.geometry -- want to use the index + AND ST_Contains(geom, location_postcodes.centroid) + AND country_code = country + LIMIT 2) sub + INTO outcode, cnt; IF cnt = 1 THEN RETURN outcode; - ELSE - RETURN null; END IF; + + RETURN null; END IF; - SELECT postcode FROM location_postcode - WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) - AND location_postcode.country_code = country - ORDER BY ST_Distance(geom, location_postcode.geometry) LIMIT 1 - INTO outcode; + -- Otherwise: be fully within the coverage area of a postcode + FOR location IN + SELECT postcode + FROM location_postcodes p + WHERE ST_Covers(p.geometry, geom) + AND p.country_code = country + ORDER BY osm_id is null, ST_Distance(p.centroid, geom) + LIMIT 1 + LOOP + RETURN location.postcode; + END LOOP; - RETURN outcode; + RETURN NULL; END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index 0fdc2001..c23f07ae 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -48,9 +48,6 @@ CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id --- CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}}; ---- -CREATE INDEX IF NOT EXISTS idx_postcode_postcode - ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}}; {% if drop %} --- @@ -77,8 +74,8 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode deferred BOOLEAN ); --- - CREATE INDEX IF NOT EXISTS idx_location_postcode_parent_place_id - ON location_postcode USING BTREE (parent_place_id) {{db.tablespace.address_index}}; + CREATE INDEX IF NOT EXISTS idx_location_postcodes_parent_place_id + ON location_postcodes USING BTREE (parent_place_id) {{db.tablespace.address_index}}; {% endif %} -- Indices only needed for search. diff --git a/lib-sql/table-triggers.sql b/lib-sql/table-triggers.sql index d1a37316..e178e621 100644 --- a/lib-sql/table-triggers.sql +++ b/lib-sql/table-triggers.sql @@ -2,7 +2,7 @@ -- -- This file is part of Nominatim. (https://nominatim.org) -- --- Copyright (C) 2022 by the Nominatim developer community. +-- Copyright (C) 2025 by the Nominatim developer community. -- For a full list of authors see the git log. -- insert creates the location tables, creates location indexes if indexed == true @@ -25,5 +25,5 @@ CREATE TRIGGER place_before_delete BEFORE DELETE ON place CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); -CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode +CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcodes FOR EACH ROW EXECUTE PROCEDURE postcode_update(); diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index 3a00acd1..38703265 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -227,21 +227,29 @@ GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ; GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ; -- Table for synthetic postcodes. -DROP TABLE IF EXISTS location_postcode; -CREATE TABLE location_postcode ( - place_id BIGINT, +DROP TABLE IF EXISTS location_postcodes; +CREATE TABLE location_postcodes ( + place_id BIGINT NOT NULL, parent_place_id BIGINT, + osm_id BIGINT, rank_search SMALLINT, - rank_address SMALLINT, indexed_status SMALLINT, indexed_date TIMESTAMP, country_code varchar(2), - postcode TEXT, - geometry GEOMETRY(Geometry, 4326) + postcode TEXT NOT NULL, + centroid GEOMETRY(Geometry, 4326) NOT NULL, + geometry GEOMETRY(Geometry, 4326) NOT NULL ); -CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {{db.tablespace.search_index}}; -CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {{db.tablespace.address_index}}; -GRANT SELECT ON location_postcode TO "{{config.DATABASE_WEBUSER}}" ; +CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes + USING BTREE (place_id) {{db.tablespace.search_index}}; +CREATE INDEX idx_location_postcodes_geometry ON location_postcodes + USING GIST (geometry) {{db.tablespace.search_index}}; +CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode + ON location_postcodes USING BTREE (postcode, country_code) + {{db.tablespace.search_index}}; +CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid + ON location_postcodes USING BTREE (osm_id) {{db.tablespace.search_index}}; +GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}" ; -- Table to store location of entrance nodes DROP TABLE IF EXISTS placex_entrance; diff --git a/src/nominatim_api/sql/sqlalchemy_schema.py b/src/nominatim_api/sql/sqlalchemy_schema.py index 283f18fb..f9e4c595 100644 --- a/src/nominatim_api/sql/sqlalchemy_schema.py +++ b/src/nominatim_api/sql/sqlalchemy_schema.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ SQLAlchemy definitions for all tables used by the frontend. @@ -67,15 +67,16 @@ class SearchTables: sa.Column('isaddress', sa.Boolean)) self.postcode = sa.Table( - 'location_postcode', meta, + 'location_postcodes', meta, sa.Column('place_id', sa.BigInteger), sa.Column('parent_place_id', sa.BigInteger), + sa.Column('osm_id', sa.BigInteger), sa.Column('rank_search', sa.SmallInteger), - sa.Column('rank_address', sa.SmallInteger), sa.Column('indexed_status', sa.SmallInteger), sa.Column('indexed_date', sa.DateTime), sa.Column('country_code', sa.String(2)), sa.Column('postcode', sa.Text), + sa.Column('centroid', Geometry), sa.Column('geometry', Geometry)) self.osmline = sa.Table( diff --git a/src/nominatim_db/indexer/indexer.py b/src/nominatim_db/indexer/indexer.py index d467efbd..69bc4a85 100644 --- a/src/nominatim_db/indexer/indexer.py +++ b/src/nominatim_db/indexer/indexer.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Main work horse for indexing (computing addresses) the database. @@ -154,7 +154,7 @@ class Indexer: return total async def index_postcodes(self) -> int: - """Index the entries of the location_postcode table. + """Index the entries of the location_postcodes table. """ LOG.warning("Starting indexing postcodes using %s threads", self.num_threads) diff --git a/src/nominatim_db/indexer/runners.py b/src/nominatim_db/indexer/runners.py index e20cf20d..06479cc1 100644 --- a/src/nominatim_db/indexer/runners.py +++ b/src/nominatim_db/indexer/runners.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Mix-ins that provide the actual commands for the indexer for various indexing @@ -143,22 +143,22 @@ class InterpolationRunner: class PostcodeRunner(Runner): - """ Provides the SQL commands for indexing the location_postcode table. + """ Provides the SQL commands for indexing the location_postcodes table. """ def name(self) -> str: - return "postcodes (location_postcode)" + return "postcodes (location_postcodes)" def sql_count_objects(self) -> Query: - return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' + return 'SELECT count(*) FROM location_postcodes WHERE indexed_status > 0' def sql_get_objects(self) -> Query: - return """SELECT place_id FROM location_postcode + return """SELECT place_id FROM location_postcodes WHERE indexed_status > 0 ORDER BY country_code, postcode""" def index_places_query(self, batch_size: int) -> Query: - return pysql.SQL("""UPDATE location_postcode SET indexed_status = 0 + return pysql.SQL("""UPDATE location_postcodes SET indexed_status = 0 WHERE place_id IN ({})""")\ .format(pysql.SQL(',').join((pysql.Placeholder() for _ in range(batch_size)))) diff --git a/src/nominatim_db/tokenizer/base.py b/src/nominatim_db/tokenizer/base.py index 4295aaf4..e764f73f 100644 --- a/src/nominatim_db/tokenizer/base.py +++ b/src/nominatim_db/tokenizer/base.py @@ -71,7 +71,7 @@ class AbstractAnalyzer(ABC): @abstractmethod def update_postcodes_from_db(self) -> None: """ Update the tokenizer's postcode tokens from the current content - of the `location_postcode` table. + of the `location_postcodes` table. """ @abstractmethod diff --git a/src/nominatim_db/tools/check_database.py b/src/nominatim_db/tools/check_database.py index f4020803..96ff9a29 100644 --- a/src/nominatim_db/tools/check_database.py +++ b/src/nominatim_db/tools/check_database.py @@ -113,8 +113,8 @@ def _get_indexes(conn: Connection) -> List[str]: 'idx_placex_geometry_placenode', 'idx_osmline_parent_place_id', 'idx_osmline_parent_osm_id', - 'idx_postcode_id', - 'idx_postcode_postcode' + 'idx_location_postcodes_id', + 'idx_location_postcodes_postcode' ] # These won't exist if --reverse-only import was used diff --git a/src/nominatim_db/tools/database_import.py b/src/nominatim_db/tools/database_import.py index 809a8839..c92c3900 100644 --- a/src/nominatim_db/tools/database_import.py +++ b/src/nominatim_db/tools/database_import.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Functions for setting up and importing a new Nominatim database. @@ -183,7 +183,7 @@ def truncate_data_tables(conn: Connection) -> None: cur.execute('TRUNCATE location_area_country') cur.execute('TRUNCATE location_property_tiger') cur.execute('TRUNCATE location_property_osmline') - cur.execute('TRUNCATE location_postcode') + cur.execute('TRUNCATE location_postcodes') if table_exists(conn, 'search_name'): cur.execute('TRUNCATE search_name') cur.execute('DROP SEQUENCE IF EXISTS seq_place')