mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
consolidate indexes over geometry_sectors
The index over geometry_sectors are mainly used for ordering the places which need indexing. That means they function effectively as a TODO list. Consolodate them so that they always only contain the places which are still to do. Also add the appropriate index for the boundary indexing phase.
This commit is contained in:
@@ -5,6 +5,13 @@
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- The following indicies are only useful during imoprt when all of placex is processed.
|
||||
|
||||
{% if drop %}
|
||||
DROP INDEX IF EXISTS idx_placex_rank_address_sector;
|
||||
DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector;
|
||||
{% endif %}
|
||||
|
||||
-- Indices used only during search and update.
|
||||
-- These indices are created only after the indexing process is done.
|
||||
|
||||
@@ -39,10 +46,6 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode
|
||||
-- Indices only needed for updating.
|
||||
|
||||
{% if not drop %}
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
|
||||
ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
|
||||
|
||||
@@ -162,7 +162,6 @@ CREATE INDEX idx_placex_node_osmid ON placex USING BTREE (osm_id) {{db.tablespac
|
||||
CREATE INDEX idx_placex_way_osmid ON placex USING BTREE (osm_id) {{db.tablespace.search_index}} WHERE osm_type = 'W';
|
||||
CREATE INDEX idx_placex_relation_osmid ON placex USING BTREE (osm_id) {{db.tablespace.search_index}} WHERE osm_type = 'R';
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
|
||||
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
|
||||
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
|
||||
USING gist (geometry) {{db.tablespace.address_index}}
|
||||
@@ -178,6 +177,18 @@ CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
and class = 'place' and type != 'postcode' and linked_place_id is null;
|
||||
CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
|
||||
|
||||
-- The following two indexes function as a todo list for indexing.
|
||||
|
||||
CREATE INDEX idx_placex_rank_address_sector ON placex
|
||||
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
|
||||
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
|
||||
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE class = 'boundary' and type = 'administrative'
|
||||
and indexed_status > 0;
|
||||
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_place;
|
||||
CREATE SEQUENCE seq_place start 1;
|
||||
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
||||
@@ -15,7 +15,7 @@ from pathlib import Path
|
||||
import psutil
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import connect, Connection
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.db import status, properties
|
||||
from nominatim.tokenizer.base import AbstractTokenizer
|
||||
from nominatim.version import version_str
|
||||
@@ -122,9 +122,6 @@ class SetupAll:
|
||||
args.project_dir, tokenizer)
|
||||
|
||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||
if args.continue_at is not None and args.continue_at != 'load-data':
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
@@ -189,27 +186,6 @@ class SetupAll:
|
||||
return tokenizer_factory.get_tokenizer_for_db(config)
|
||||
|
||||
|
||||
def _create_pending_index(self, conn: Connection, tablespace: str) -> None:
|
||||
""" Add a supporting index for finding places still to be indexed.
|
||||
|
||||
This index is normally created at the end of the import process
|
||||
for later updates. When indexing was partially done, then this
|
||||
index can greatly improve speed going through already indexed data.
|
||||
"""
|
||||
if conn.index_exists('idx_placex_pendingsector'):
|
||||
return
|
||||
|
||||
with conn.cursor() as cur:
|
||||
LOG.warning('Creating support index')
|
||||
if tablespace:
|
||||
tablespace = 'TABLESPACE ' + tablespace
|
||||
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector)
|
||||
{tablespace} WHERE indexed_status > 0
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _finalize_database(self, dsn: str, offline: bool) -> None:
|
||||
""" Determine the database date and set the status accordingly.
|
||||
"""
|
||||
|
||||
@@ -114,9 +114,10 @@ def _get_indexes(conn: Connection) -> List[str]:
|
||||
indexes.extend(('idx_placex_housenumber',
|
||||
'idx_osmline_parent_osm_id_with_hnr'))
|
||||
if conn.table_exists('place'):
|
||||
indexes.extend(('idx_placex_pendingsector',
|
||||
'idx_location_area_country_place_id',
|
||||
'idx_place_osm_unique'))
|
||||
indexes.extend(('idx_location_area_country_place_id',
|
||||
'idx_place_osm_unique',
|
||||
'idx_placex_rank_address_sector',
|
||||
'idx_placex_rank_boundaries_sector'))
|
||||
|
||||
return indexes
|
||||
|
||||
@@ -199,7 +200,7 @@ def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
|
||||
def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
|
||||
""" Checking for wikipedia/wikidata data
|
||||
"""
|
||||
if not conn.table_exists('search_name'):
|
||||
if not conn.table_exists('search_name') or not conn.table_exists('place'):
|
||||
return CheckState.NOT_APPLICABLE
|
||||
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -105,11 +105,8 @@ class TestCliImportWithDb:
|
||||
for mock in mocks:
|
||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||
|
||||
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
||||
|
||||
# Calling it again still works for the index
|
||||
assert self.call_nominatim('import', '--continue', 'indexing') == 0
|
||||
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
||||
|
||||
|
||||
def test_import_continue_postprocess(self, mock_func_factory):
|
||||
|
||||
Reference in New Issue
Block a user