forked from hans/Nominatim
Merge pull request #2829 from lonvia/optimize-indexes
Further optimize indexes
This commit is contained in:
@@ -52,7 +52,9 @@ BEGIN
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
FOR location IN SELECT place_id FROM placex
|
||||
WHERE ST_DWithin(geom, placex.geometry, 0.001) and placex.rank_search = 26
|
||||
WHERE ST_DWithin(geom, placex.geometry, 0.001)
|
||||
and placex.rank_search = 26
|
||||
and placex.osm_type = 'W' -- needed for index selection
|
||||
ORDER BY CASE WHEN ST_GeometryType(geom) = 'ST_Line' THEN
|
||||
(ST_distance(placex.geometry, ST_LineInterpolatePoint(geom,0))+
|
||||
ST_distance(placex.geometry, ST_LineInterpolatePoint(geom,0.5))+
|
||||
|
||||
@@ -197,6 +197,7 @@ BEGIN
|
||||
SELECT place_id FROM placex
|
||||
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
|
||||
AND rank_address between 5 and 25
|
||||
AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
ORDER BY rank_address desc
|
||||
LOOP
|
||||
RETURN location.place_id;
|
||||
@@ -212,6 +213,7 @@ BEGIN
|
||||
SELECT place_id FROM placex
|
||||
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
|
||||
AND rank_address between 5 and 25
|
||||
AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
ORDER BY rank_address desc
|
||||
LOOP
|
||||
RETURN location.place_id;
|
||||
@@ -275,7 +277,9 @@ BEGIN
|
||||
|
||||
-- If extratags has a place tag, look for linked nodes by their place type.
|
||||
-- Area and node still have to have the same name.
|
||||
IF bnd.extratags ? 'place' and bnd_name is not null THEN
|
||||
IF bnd.extratags ? 'place' and bnd.extratags->'place' != 'postcode'
|
||||
and bnd_name is not null
|
||||
THEN
|
||||
FOR linked_placex IN
|
||||
SELECT * FROM placex
|
||||
WHERE (position(lower(name->'name') in bnd_name) > 0
|
||||
@@ -284,7 +288,6 @@ BEGIN
|
||||
AND placex.osm_type = 'N'
|
||||
AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
|
||||
AND placex.rank_search < 26 -- needed to select the right index
|
||||
AND placex.type != 'postcode'
|
||||
AND ST_Covers(bnd.geometry, placex.geometry)
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %}
|
||||
@@ -846,7 +849,8 @@ BEGIN
|
||||
FROM placex
|
||||
WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
|
||||
and admin_level < NEW.admin_level and admin_level > 3
|
||||
and rank_address > 0
|
||||
and rank_address between 1 and 25 -- for index selection
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') -- for index selection
|
||||
and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid)
|
||||
ORDER BY admin_level desc LIMIT 1
|
||||
LOOP
|
||||
|
||||
@@ -10,62 +10,73 @@
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id
|
||||
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_rank_search
|
||||
ON placex USING BTREE (rank_search) {{db.tablespace.search_index}};
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_rank_address
|
||||
ON placex USING BTREE (rank_address) {{db.tablespace.search_index}};
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
|
||||
ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
|
||||
WHERE parent_place_id IS NOT NULL;
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
|
||||
ON placex USING gist (geometry) {{db.tablespace.search_index}}
|
||||
WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
|
||||
AND rank_address between 4 and 25 AND type != 'postcode'
|
||||
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
|
||||
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}
|
||||
WHERE parent_place_id is not null;
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id
|
||||
ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}};
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_postcode_postcode
|
||||
ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}};
|
||||
|
||||
{% if drop %}
|
||||
---
|
||||
DROP INDEX IF EXISTS idx_placex_geometry_address_area_candidates;
|
||||
DROP INDEX IF EXISTS idx_placex_geometry_buildings;
|
||||
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
|
||||
DROP INDEX IF EXISTS idx_placex_wikidata;
|
||||
DROP INDEX IF EXISTS idx_placex_rank_address_sector;
|
||||
DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector;
|
||||
{% else %}
|
||||
-- Indices only needed for updating.
|
||||
|
||||
{% if not drop %}
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
|
||||
ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
|
||||
|
||||
---
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique
|
||||
ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}};
|
||||
{% endif %}
|
||||
|
||||
-- Indices only needed for search.
|
||||
|
||||
{% if 'search_name' in db.tables %}
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector
|
||||
ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_search_name_name_vector
|
||||
ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_search_name_centroid
|
||||
ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
|
||||
|
||||
{% if postgres.has_index_non_key_column %}
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
|
||||
ON placex USING btree (parent_place_id)
|
||||
INCLUDE (housenumber) {{db.tablespace.search_index}}
|
||||
WHERE housenumber is not null;
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
|
||||
ON location_property_osmline USING btree(parent_place_id)
|
||||
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
|
||||
|
||||
@@ -137,7 +137,9 @@ CREATE TABLE place_addressline (
|
||||
) {{db.tablespace.search_data}};
|
||||
CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
|
||||
drop table if exists placex;
|
||||
--------- PLACEX - storage for all indexed places -----------------
|
||||
|
||||
DROP TABLE IF EXISTS placex;
|
||||
CREATE TABLE placex (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
@@ -157,24 +159,66 @@ CREATE TABLE placex (
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Geometry, 4326)
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
|
||||
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
|
||||
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
{% for osm_type in ('N', 'W', 'R') %}
|
||||
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
|
||||
USING BTREE (osm_id) {{db.tablespace.search_index}}
|
||||
WHERE osm_type = '{{osm_type}}';
|
||||
{% endfor %}
|
||||
|
||||
-- Usage: - removing linkage status on update
|
||||
-- - lookup linked places for /details
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex
|
||||
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
|
||||
WHERE linked_place_id IS NOT NULL;
|
||||
|
||||
-- Usage: - check that admin boundaries do not overtake each other rank-wise
|
||||
-- - check that place node in a admin boundary with the same address level
|
||||
-- - boundary is not completely contained in a place area
|
||||
-- - parenting of large-area or unparentable features
|
||||
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
|
||||
USING gist (geometry) {{db.tablespace.address_index}}
|
||||
WHERE rank_address between 1 and 25
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
|
||||
-- Usage: - POI is within building with housenumber
|
||||
CREATE INDEX idx_placex_geometry_buildings ON placex
|
||||
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
|
||||
WHERE address is not null and rank_search = 30
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
|
||||
-- Usage: - linking of similar named places to boundaries
|
||||
-- - linking of place nodes with same type to boundaries
|
||||
-- - lookupPolygon()
|
||||
CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'N' and rank_search < 26
|
||||
and class = 'place' and type != 'postcode' and linked_place_id is null;
|
||||
CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
|
||||
and class = 'place' and type != 'postcode';
|
||||
|
||||
-- Usage: - is node part of a way?
|
||||
-- - find parent of interpolation spatially
|
||||
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
|
||||
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'W' and rank_search >= 26;
|
||||
|
||||
-- Usage: - linking place nodes by wikidata tag to boundaries
|
||||
CREATE INDEX idx_placex_wikidata on placex
|
||||
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
|
||||
WHERE extratags ? 'wikidata' and class = 'place'
|
||||
and osm_type = 'N' and rank_search < 26;
|
||||
|
||||
-- The following two indexes function as a todo list for indexing.
|
||||
|
||||
CREATE INDEX idx_placex_rank_address_sector ON placex
|
||||
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
|
||||
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
|
||||
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE class = 'boundary' and type = 'administrative'
|
||||
and indexed_status > 0;
|
||||
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_place;
|
||||
CREATE SEQUENCE seq_place start 1;
|
||||
|
||||
@@ -15,7 +15,7 @@ from pathlib import Path
|
||||
import psutil
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import connect, Connection
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.db import status, properties
|
||||
from nominatim.tokenizer.base import AbstractTokenizer
|
||||
from nominatim.version import version_str
|
||||
@@ -72,6 +72,8 @@ class SetupAll:
|
||||
from ..tools import database_import, refresh, postcodes, freeze
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
num_threads = args.threads or psutil.cpu_count() or 1
|
||||
|
||||
country_info.setup_country_config(args.config)
|
||||
|
||||
if args.continue_at is None:
|
||||
@@ -109,8 +111,7 @@ class SetupAll:
|
||||
database_import.truncate_data_tables(conn)
|
||||
|
||||
LOG.warning('Load data into placex table')
|
||||
database_import.load_data(args.config.get_libpq_dsn(),
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
database_import.load_data(args.config.get_libpq_dsn(), num_threads)
|
||||
|
||||
LOG.warning("Setting up tokenizer")
|
||||
tokenizer = self._get_tokenizer(args.continue_at, args.config)
|
||||
@@ -121,18 +122,15 @@ class SetupAll:
|
||||
args.project_dir, tokenizer)
|
||||
|
||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||
if args.continue_at is not None and args.continue_at != 'load-data':
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
|
||||
LOG.warning('Post-process tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.create_search_indices(conn, args.config,
|
||||
drop=args.no_updates)
|
||||
drop=args.no_updates,
|
||||
threads=num_threads)
|
||||
LOG.warning('Create search index for default country names.')
|
||||
country_info.create_country_names(conn, tokenizer,
|
||||
args.config.get_str_list('LANGUAGES'))
|
||||
@@ -188,27 +186,6 @@ class SetupAll:
|
||||
return tokenizer_factory.get_tokenizer_for_db(config)
|
||||
|
||||
|
||||
def _create_pending_index(self, conn: Connection, tablespace: str) -> None:
|
||||
""" Add a supporting index for finding places still to be indexed.
|
||||
|
||||
This index is normally created at the end of the import process
|
||||
for later updates. When indexing was partially done, then this
|
||||
index can greatly improve speed going through already indexed data.
|
||||
"""
|
||||
if conn.index_exists('idx_placex_pendingsector'):
|
||||
return
|
||||
|
||||
with conn.cursor() as cur:
|
||||
LOG.warning('Creating support index')
|
||||
if tablespace:
|
||||
tablespace = 'TABLESPACE ' + tablespace
|
||||
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector)
|
||||
{tablespace} WHERE indexed_status > 0
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _finalize_database(self, dsn: str, offline: bool) -> None:
|
||||
""" Determine the database date and set the status accordingly.
|
||||
"""
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Set, Dict, Any
|
||||
import jinja2
|
||||
|
||||
from nominatim.db.connection import Connection
|
||||
from nominatim.db.async_connection import WorkerPool
|
||||
from nominatim.config import Configuration
|
||||
|
||||
def _get_partitions(conn: Connection) -> Set[int]:
|
||||
@@ -96,3 +97,21 @@ class SQLPreprocessor:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
|
||||
**kwargs: Any) -> None:
|
||||
""" Execure the given SQL files using parallel asynchronous connections.
|
||||
The keyword arguments may supply additional parameters for
|
||||
preprocessing.
|
||||
|
||||
After preprocessing the SQL code is cut at lines containing only
|
||||
'---'. Each chunk is sent to one of the `num_threads` workers.
|
||||
"""
|
||||
sql = self.env.get_template(name).render(**kwargs)
|
||||
|
||||
parts = sql.split('\n---\n')
|
||||
|
||||
with WorkerPool(dsn, num_threads) as pool:
|
||||
for part in parts:
|
||||
pool.next_free_worker().perform(part)
|
||||
|
||||
@@ -114,9 +114,10 @@ def _get_indexes(conn: Connection) -> List[str]:
|
||||
indexes.extend(('idx_placex_housenumber',
|
||||
'idx_osmline_parent_osm_id_with_hnr'))
|
||||
if conn.table_exists('place'):
|
||||
indexes.extend(('idx_placex_pendingsector',
|
||||
'idx_location_area_country_place_id',
|
||||
'idx_place_osm_unique'))
|
||||
indexes.extend(('idx_location_area_country_place_id',
|
||||
'idx_place_osm_unique',
|
||||
'idx_placex_rank_address_sector',
|
||||
'idx_placex_rank_boundaries_sector'))
|
||||
|
||||
return indexes
|
||||
|
||||
@@ -199,7 +200,7 @@ def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
|
||||
def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
|
||||
""" Checking for wikipedia/wikidata data
|
||||
"""
|
||||
if not conn.table_exists('search_name'):
|
||||
if not conn.table_exists('search_name') or not conn.table_exists('place'):
|
||||
return CheckState.NOT_APPLICABLE
|
||||
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -225,7 +225,8 @@ def load_data(dsn: str, threads: int) -> None:
|
||||
cur.execute('ANALYSE')
|
||||
|
||||
|
||||
def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None:
|
||||
def create_search_indices(conn: Connection, config: Configuration,
|
||||
drop: bool = False, threads: int = 1) -> None:
|
||||
""" Create tables that have explicit partitioning.
|
||||
"""
|
||||
|
||||
@@ -243,4 +244,5 @@ def create_search_indices(conn: Connection, config: Configuration, drop: bool =
|
||||
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
|
||||
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
||||
sql.run_parallel_sql_file(config.get_libpq_dsn(),
|
||||
'indices.sql', min(8, threads), drop=drop)
|
||||
|
||||
@@ -105,11 +105,8 @@ class TestCliImportWithDb:
|
||||
for mock in mocks:
|
||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||
|
||||
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
||||
|
||||
# Calling it again still works for the index
|
||||
assert self.call_nominatim('import', '--continue', 'indexing') == 0
|
||||
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
||||
|
||||
|
||||
def test_import_continue_postprocess(self, mock_func_factory):
|
||||
|
||||
Reference in New Issue
Block a user