consolidate indexes over geometry_sectors

The index over geometry_sectors are mainly used for ordering
the places which need indexing. That means they function effectively
as a TODO list. Consolodate them so that they always only contain
the places which are still to do. Also add the appropriate index
for the boundary indexing phase.
This commit is contained in:
Sarah Hoffmann
2022-09-21 10:38:58 +02:00
parent 860f3559a1
commit f4d3ae6f70
5 changed files with 25 additions and 37 deletions

View File

@@ -15,7 +15,7 @@ from pathlib import Path
import psutil
from nominatim.config import Configuration
from nominatim.db.connection import connect, Connection
from nominatim.db.connection import connect
from nominatim.db import status, properties
from nominatim.tokenizer.base import AbstractTokenizer
from nominatim.version import version_str
@@ -122,9 +122,6 @@ class SetupAll:
args.project_dir, tokenizer)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
if args.continue_at is not None and args.continue_at != 'load-data':
with connect(args.config.get_libpq_dsn()) as conn:
self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
@@ -189,27 +186,6 @@ class SetupAll:
return tokenizer_factory.get_tokenizer_for_db(config)
def _create_pending_index(self, conn: Connection, tablespace: str) -> None:
""" Add a supporting index for finding places still to be indexed.
This index is normally created at the end of the import process
for later updates. When indexing was partially done, then this
index can greatly improve speed going through already indexed data.
"""
if conn.index_exists('idx_placex_pendingsector'):
return
with conn.cursor() as cur:
LOG.warning('Creating support index')
if tablespace:
tablespace = 'TABLESPACE ' + tablespace
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector)
{tablespace} WHERE indexed_status > 0
""")
conn.commit()
def _finalize_database(self, dsn: str, offline: bool) -> None:
""" Determine the database date and set the status accordingly.
"""

View File

@@ -114,9 +114,10 @@ def _get_indexes(conn: Connection) -> List[str]:
indexes.extend(('idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))
if conn.table_exists('place'):
indexes.extend(('idx_placex_pendingsector',
'idx_location_area_country_place_id',
'idx_place_osm_unique'))
indexes.extend(('idx_location_area_country_place_id',
'idx_place_osm_unique',
'idx_placex_rank_address_sector',
'idx_placex_rank_boundaries_sector'))
return indexes
@@ -199,7 +200,7 @@ def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
""" Checking for wikipedia/wikidata data
"""
if not conn.table_exists('search_name'):
if not conn.table_exists('search_name') or not conn.table_exists('place'):
return CheckState.NOT_APPLICABLE
with conn.cursor() as cur: