consolidate indexes over geometry_sectors

The index over geometry_sectors are mainly used for ordering
the places which need indexing. That means they function effectively
as a TODO list. Consolodate them so that they always only contain
the places which are still to do. Also add the appropriate index
for the boundary indexing phase.
This commit is contained in:
Sarah Hoffmann
2022-09-21 10:38:58 +02:00
parent 860f3559a1
commit f4d3ae6f70
5 changed files with 25 additions and 37 deletions

View File

@@ -5,6 +5,13 @@
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- The following indicies are only useful during imoprt when all of placex is processed.
{% if drop %}
DROP INDEX IF EXISTS idx_placex_rank_address_sector;
DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector;
{% endif %}
-- Indices used only during search and update.
-- These indices are created only after the indexing process is done.
@@ -39,10 +46,6 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode
-- Indices only needed for updating.
{% if not drop %}
---
CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
---
CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};

View File

@@ -162,7 +162,6 @@ CREATE INDEX idx_placex_node_osmid ON placex USING BTREE (osm_id) {{db.tablespac
CREATE INDEX idx_placex_way_osmid ON placex USING BTREE (osm_id) {{db.tablespace.search_index}} WHERE osm_type = 'W';
CREATE INDEX idx_placex_relation_osmid ON placex USING BTREE (osm_id) {{db.tablespace.search_index}} WHERE osm_type = 'R';
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
USING gist (geometry) {{db.tablespace.address_index}}
@@ -178,6 +177,18 @@ CREATE INDEX idx_placex_geometry_placenode ON placex
and class = 'place' and type != 'postcode' and linked_place_id is null;
CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
-- The following two indexes function as a todo list for indexing.
CREATE INDEX idx_placex_rank_address_sector ON placex
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
WHERE class = 'boundary' and type = 'administrative'
and indexed_status > 0;
DROP SEQUENCE IF EXISTS seq_place;
CREATE SEQUENCE seq_place start 1;
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;

View File

@@ -15,7 +15,7 @@ from pathlib import Path
import psutil
from nominatim.config import Configuration
from nominatim.db.connection import connect, Connection
from nominatim.db.connection import connect
from nominatim.db import status, properties
from nominatim.tokenizer.base import AbstractTokenizer
from nominatim.version import version_str
@@ -122,9 +122,6 @@ class SetupAll:
args.project_dir, tokenizer)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
if args.continue_at is not None and args.continue_at != 'load-data':
with connect(args.config.get_libpq_dsn()) as conn:
self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
@@ -189,27 +186,6 @@ class SetupAll:
return tokenizer_factory.get_tokenizer_for_db(config)
def _create_pending_index(self, conn: Connection, tablespace: str) -> None:
""" Add a supporting index for finding places still to be indexed.
This index is normally created at the end of the import process
for later updates. When indexing was partially done, then this
index can greatly improve speed going through already indexed data.
"""
if conn.index_exists('idx_placex_pendingsector'):
return
with conn.cursor() as cur:
LOG.warning('Creating support index')
if tablespace:
tablespace = 'TABLESPACE ' + tablespace
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector)
{tablespace} WHERE indexed_status > 0
""")
conn.commit()
def _finalize_database(self, dsn: str, offline: bool) -> None:
""" Determine the database date and set the status accordingly.
"""

View File

@@ -114,9 +114,10 @@ def _get_indexes(conn: Connection) -> List[str]:
indexes.extend(('idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))
if conn.table_exists('place'):
indexes.extend(('idx_placex_pendingsector',
'idx_location_area_country_place_id',
'idx_place_osm_unique'))
indexes.extend(('idx_location_area_country_place_id',
'idx_place_osm_unique',
'idx_placex_rank_address_sector',
'idx_placex_rank_boundaries_sector'))
return indexes
@@ -199,7 +200,7 @@ def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
""" Checking for wikipedia/wikidata data
"""
if not conn.table_exists('search_name'):
if not conn.table_exists('search_name') or not conn.table_exists('place'):
return CheckState.NOT_APPLICABLE
with conn.cursor() as cur:

View File

@@ -105,11 +105,8 @@ class TestCliImportWithDb:
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
assert temp_db_conn.index_exists('idx_placex_pendingsector')
# Calling it again still works for the index
assert self.call_nominatim('import', '--continue', 'indexing') == 0
assert temp_db_conn.index_exists('idx_placex_pendingsector')
def test_import_continue_postprocess(self, mock_func_factory):