diff --git a/lib-sql/functions/interpolation.sql b/lib-sql/functions/interpolation.sql index 96a105ae..fb822033 100644 --- a/lib-sql/functions/interpolation.sql +++ b/lib-sql/functions/interpolation.sql @@ -52,7 +52,9 @@ BEGIN IF parent_place_id is null THEN FOR location IN SELECT place_id FROM placex - WHERE ST_DWithin(geom, placex.geometry, 0.001) and placex.rank_search = 26 + WHERE ST_DWithin(geom, placex.geometry, 0.001) + and placex.rank_search = 26 + and placex.osm_type = 'W' -- needed for index selection ORDER BY CASE WHEN ST_GeometryType(geom) = 'ST_Line' THEN (ST_distance(placex.geometry, ST_LineInterpolatePoint(geom,0))+ ST_distance(placex.geometry, ST_LineInterpolatePoint(geom,0.5))+ diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 6c23fd67..a2276f07 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -197,6 +197,7 @@ BEGIN SELECT place_id FROM placex WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) AND rank_address between 5 and 25 + AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') ORDER BY rank_address desc LOOP RETURN location.place_id; @@ -212,6 +213,7 @@ BEGIN SELECT place_id FROM placex WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) AND rank_address between 5 and 25 + AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') ORDER BY rank_address desc LOOP RETURN location.place_id; @@ -275,7 +277,9 @@ BEGIN -- If extratags has a place tag, look for linked nodes by their place type. -- Area and node still have to have the same name. - IF bnd.extratags ? 'place' and bnd_name is not null THEN + IF bnd.extratags ? 'place' and bnd.extratags->'place' != 'postcode' + and bnd_name is not null + THEN FOR linked_placex IN SELECT * FROM placex WHERE (position(lower(name->'name') in bnd_name) > 0 @@ -284,7 +288,6 @@ BEGIN AND placex.osm_type = 'N' AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id) AND placex.rank_search < 26 -- needed to select the right index - AND placex.type != 'postcode' AND ST_Covers(bnd.geometry, placex.geometry) LOOP {% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %} @@ -846,7 +849,8 @@ BEGIN FROM placex WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative' and admin_level < NEW.admin_level and admin_level > 3 - and rank_address > 0 + and rank_address between 1 and 25 -- for index selection + and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') -- for index selection and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid) ORDER BY admin_level desc LIMIT 1 LOOP diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index 9bbc7527..b1396034 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -10,62 +10,73 @@ CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_placex_rank_search ON placex USING BTREE (rank_search) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_placex_rank_address ON placex USING BTREE (rank_address) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}} WHERE parent_place_id IS NOT NULL; - +--- +CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex + USING GIST (geometry) {{db.tablespace.search_index}}; +--- CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon ON placex USING gist (geometry) {{db.tablespace.search_index}} WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon') AND rank_address between 4 and 25 AND type != 'postcode' AND name is not null AND indexed_status = 0 AND linked_place_id is null; - +--- CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}} WHERE parent_place_id is not null; - +--- CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_postcode_postcode ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}}; +{% if drop %} +--- + DROP INDEX IF EXISTS idx_placex_geometry_address_area_candidates; + DROP INDEX IF EXISTS idx_placex_geometry_buildings; + DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways; + DROP INDEX IF EXISTS idx_placex_wikidata; + DROP INDEX IF EXISTS idx_placex_rank_address_sector; + DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector; +{% else %} -- Indices only needed for updating. - -{% if not drop %} - CREATE INDEX IF NOT EXISTS idx_placex_pendingsector - ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}} - WHERE indexed_status > 0; - +--- CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}}; - +--- CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}}; {% endif %} -- Indices only needed for search. - {% if 'search_name' in db.tables %} +--- CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; +--- CREATE INDEX IF NOT EXISTS idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; +--- CREATE INDEX IF NOT EXISTS idx_search_name_centroid ON search_name USING GIST (centroid) {{db.tablespace.search_index}}; {% if postgres.has_index_non_key_column %} +--- CREATE INDEX IF NOT EXISTS idx_placex_housenumber ON placex USING btree (parent_place_id) INCLUDE (housenumber) {{db.tablespace.search_index}} WHERE housenumber is not null; +--- CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr ON location_property_osmline USING btree(parent_place_id) INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}} diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index 2ad5243c..7ef74349 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -137,7 +137,9 @@ CREATE TABLE place_addressline ( ) {{db.tablespace.search_data}}; CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}}; -drop table if exists placex; +--------- PLACEX - storage for all indexed places ----------------- + +DROP TABLE IF EXISTS placex; CREATE TABLE placex ( place_id BIGINT NOT NULL, parent_place_id BIGINT, @@ -157,24 +159,66 @@ CREATE TABLE placex ( postcode TEXT, centroid GEOMETRY(Geometry, 4326) ) {{db.tablespace.search_data}}; + CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}}; -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tablespace.search_index}}; -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}; -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; +{% for osm_type in ('N', 'W', 'R') %} +CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex + USING BTREE (osm_id) {{db.tablespace.search_index}} + WHERE osm_type = '{{osm_type}}'; +{% endfor %} + +-- Usage: - removing linkage status on update +-- - lookup linked places for /details +CREATE INDEX idx_placex_linked_place_id ON placex + USING BTREE (linked_place_id) {{db.tablespace.address_index}} + WHERE linked_place_id IS NOT NULL; + +-- Usage: - check that admin boundaries do not overtake each other rank-wise +-- - check that place node in a admin boundary with the same address level +-- - boundary is not completely contained in a place area +-- - parenting of large-area or unparentable features CREATE INDEX idx_placex_geometry_address_area_candidates ON placex USING gist (geometry) {{db.tablespace.address_index}} WHERE rank_address between 1 and 25 and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon'); + +-- Usage: - POI is within building with housenumber CREATE INDEX idx_placex_geometry_buildings ON placex USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}} WHERE address is not null and rank_search = 30 and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon'); + +-- Usage: - linking of similar named places to boundaries +-- - linking of place nodes with same type to boundaries +-- - lookupPolygon() CREATE INDEX idx_placex_geometry_placenode ON placex USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}} WHERE osm_type = 'N' and rank_search < 26 - and class = 'place' and type != 'postcode' and linked_place_id is null; -CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26; + and class = 'place' and type != 'postcode'; + +-- Usage: - is node part of a way? +-- - find parent of interpolation spatially +CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex + USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}} + WHERE osm_type = 'W' and rank_search >= 26; + +-- Usage: - linking place nodes by wikidata tag to boundaries +CREATE INDEX idx_placex_wikidata on placex + USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} + WHERE extratags ? 'wikidata' and class = 'place' + and osm_type = 'N' and rank_search < 26; + +-- The following two indexes function as a todo list for indexing. + +CREATE INDEX idx_placex_rank_address_sector ON placex + USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}} + WHERE indexed_status > 0; + +CREATE INDEX idx_placex_rank_boundaries_sector ON placex + USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}} + WHERE class = 'boundary' and type = 'administrative' + and indexed_status > 0; + DROP SEQUENCE IF EXISTS seq_place; CREATE SEQUENCE seq_place start 1; diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 6ffa7afb..29724433 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -15,7 +15,7 @@ from pathlib import Path import psutil from nominatim.config import Configuration -from nominatim.db.connection import connect, Connection +from nominatim.db.connection import connect from nominatim.db import status, properties from nominatim.tokenizer.base import AbstractTokenizer from nominatim.version import version_str @@ -72,6 +72,8 @@ class SetupAll: from ..tools import database_import, refresh, postcodes, freeze from ..indexer.indexer import Indexer + num_threads = args.threads or psutil.cpu_count() or 1 + country_info.setup_country_config(args.config) if args.continue_at is None: @@ -109,8 +111,7 @@ class SetupAll: database_import.truncate_data_tables(conn) LOG.warning('Load data into placex table') - database_import.load_data(args.config.get_libpq_dsn(), - args.threads or psutil.cpu_count() or 1) + database_import.load_data(args.config.get_libpq_dsn(), num_threads) LOG.warning("Setting up tokenizer") tokenizer = self._get_tokenizer(args.continue_at, args.config) @@ -121,18 +122,15 @@ class SetupAll: args.project_dir, tokenizer) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): - if args.continue_at is not None and args.continue_at != 'load-data': - with connect(args.config.get_libpq_dsn()) as conn: - self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') - indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, - args.threads or psutil.cpu_count() or 1) + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads) indexer.index_full(analyse=not args.index_noanalyse) LOG.warning('Post-process tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_search_indices(conn, args.config, - drop=args.no_updates) + drop=args.no_updates, + threads=num_threads) LOG.warning('Create search index for default country names.') country_info.create_country_names(conn, tokenizer, args.config.get_str_list('LANGUAGES')) @@ -188,27 +186,6 @@ class SetupAll: return tokenizer_factory.get_tokenizer_for_db(config) - def _create_pending_index(self, conn: Connection, tablespace: str) -> None: - """ Add a supporting index for finding places still to be indexed. - - This index is normally created at the end of the import process - for later updates. When indexing was partially done, then this - index can greatly improve speed going through already indexed data. - """ - if conn.index_exists('idx_placex_pendingsector'): - return - - with conn.cursor() as cur: - LOG.warning('Creating support index') - if tablespace: - tablespace = 'TABLESPACE ' + tablespace - cur.execute(f"""CREATE INDEX idx_placex_pendingsector - ON placex USING BTREE (rank_address,geometry_sector) - {tablespace} WHERE indexed_status > 0 - """) - conn.commit() - - def _finalize_database(self, dsn: str, offline: bool) -> None: """ Determine the database date and set the status accordingly. """ diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index b450422d..31b4a8c0 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -11,6 +11,7 @@ from typing import Set, Dict, Any import jinja2 from nominatim.db.connection import Connection +from nominatim.db.async_connection import WorkerPool from nominatim.config import Configuration def _get_partitions(conn: Connection) -> Set[int]: @@ -96,3 +97,21 @@ class SQLPreprocessor: with conn.cursor() as cur: cur.execute(sql) conn.commit() + + + def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1, + **kwargs: Any) -> None: + """ Execure the given SQL files using parallel asynchronous connections. + The keyword arguments may supply additional parameters for + preprocessing. + + After preprocessing the SQL code is cut at lines containing only + '---'. Each chunk is sent to one of the `num_threads` workers. + """ + sql = self.env.get_template(name).render(**kwargs) + + parts = sql.split('\n---\n') + + with WorkerPool(dsn, num_threads) as pool: + for part in parts: + pool.next_free_worker().perform(part) diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index 80358f20..437775db 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -114,9 +114,10 @@ def _get_indexes(conn: Connection) -> List[str]: indexes.extend(('idx_placex_housenumber', 'idx_osmline_parent_osm_id_with_hnr')) if conn.table_exists('place'): - indexes.extend(('idx_placex_pendingsector', - 'idx_location_area_country_place_id', - 'idx_place_osm_unique')) + indexes.extend(('idx_location_area_country_place_id', + 'idx_place_osm_unique', + 'idx_placex_rank_address_sector', + 'idx_placex_rank_boundaries_sector')) return indexes @@ -199,7 +200,7 @@ def check_tokenizer(_: Connection, config: Configuration) -> CheckResult: def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult: """ Checking for wikipedia/wikidata data """ - if not conn.table_exists('search_name'): + if not conn.table_exists('search_name') or not conn.table_exists('place'): return CheckState.NOT_APPLICABLE with conn.cursor() as cur: diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 447e90f1..f6ebe90d 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -225,7 +225,8 @@ def load_data(dsn: str, threads: int) -> None: cur.execute('ANALYSE') -def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None: +def create_search_indices(conn: Connection, config: Configuration, + drop: bool = False, threads: int = 1) -> None: """ Create tables that have explicit partitioning. """ @@ -243,4 +244,5 @@ def create_search_indices(conn: Connection, config: Configuration, drop: bool = sql = SQLPreprocessor(conn, config) - sql.run_sql_file(conn, 'indices.sql', drop=drop) + sql.run_parallel_sql_file(config.get_libpq_dsn(), + 'indices.sql', min(8, threads), drop=drop) diff --git a/test/python/cli/test_cmd_import.py b/test/python/cli/test_cmd_import.py index d545c760..737c4e5c 100644 --- a/test/python/cli/test_cmd_import.py +++ b/test/python/cli/test_cmd_import.py @@ -105,11 +105,8 @@ class TestCliImportWithDb: for mock in mocks: assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) - assert temp_db_conn.index_exists('idx_placex_pendingsector') - # Calling it again still works for the index assert self.call_nominatim('import', '--continue', 'indexing') == 0 - assert temp_db_conn.index_exists('idx_placex_pendingsector') def test_import_continue_postprocess(self, mock_func_factory):