run final index creation in parallel

This commit is contained in:
Sarah Hoffmann
2022-09-14 15:37:39 +02:00
parent bef1aebf1c
commit ed3dd81d04
3 changed files with 25 additions and 18 deletions

View File

@@ -72,6 +72,8 @@ class SetupAll:
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
num_threads = args.threads or psutil.cpu_count() or 1
country_info.setup_country_config(args.config)
if args.continue_at is None:
@@ -109,8 +111,7 @@ class SetupAll:
database_import.truncate_data_tables(conn)
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
args.threads or psutil.cpu_count() or 1)
database_import.load_data(args.config.get_libpq_dsn(), num_threads)
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
@@ -125,14 +126,14 @@ class SetupAll:
with connect(args.config.get_libpq_dsn()) as conn:
self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.create_search_indices(conn, args.config,
drop=args.no_updates)
drop=args.no_updates,
threads=num_threads)
LOG.warning('Create search index for default country names.')
country_info.create_country_names(conn, tokenizer,
args.config.get_str_list('LANGUAGES'))

View File

@@ -225,7 +225,8 @@ def load_data(dsn: str, threads: int) -> None:
cur.execute('ANALYSE')
def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None:
def create_search_indices(conn: Connection, config: Configuration,
drop: bool = False, threads: int = 1) -> None:
""" Create tables that have explicit partitioning.
"""
@@ -243,4 +244,5 @@ def create_search_indices(conn: Connection, config: Configuration, drop: bool =
sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'indices.sql', drop=drop)
sql.run_parallel_sql_file(config.get_libpq_dsn(),
'indices.sql', min(8, threads), drop=drop)