port code to psycopg3

This commit is contained in:
Sarah Hoffmann
2024-07-05 10:43:10 +02:00
parent 3742fa2929
commit 9659afbade
57 changed files with 800 additions and 1330 deletions

View File

@@ -10,6 +10,7 @@ Implementation of the 'add-data' subcommand.
from typing import cast
import argparse
import logging
import asyncio
import psutil
@@ -64,15 +65,10 @@ class UpdateAddData:
def run(self, args: NominatimArgs) -> int:
from ..tokenizer import factory as tokenizer_factory
from ..tools import tiger_data, add_osm_data
from ..tools import add_osm_data
if args.tiger_data:
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
return tiger_data.add_tiger_data(args.tiger_data,
args.config,
args.threads or psutil.cpu_count() or 1,
tokenizer)
return asyncio.run(self._add_tiger_data(args))
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
if args.file or args.diff:
@@ -99,3 +95,16 @@ class UpdateAddData:
osm2pgsql_params)
return 0
async def _add_tiger_data(self, args: NominatimArgs) -> int:
from ..tokenizer import factory as tokenizer_factory
from ..tools import tiger_data
assert args.tiger_data
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
return await tiger_data.add_tiger_data(args.tiger_data,
args.config,
args.threads or psutil.cpu_count() or 1,
tokenizer)

View File

@@ -8,6 +8,7 @@
Implementation of the 'index' subcommand.
"""
import argparse
import asyncio
import psutil
@@ -44,19 +45,7 @@ class UpdateIndex:
def run(self, args: NominatimArgs) -> int:
from ..indexer.indexer import Indexer
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
indexer.index_postcodes()
asyncio.run(self._do_index(args))
if not args.no_boundaries and not args.boundaries_only \
and args.minrank == 0 and args.maxrank == 30:
@@ -64,3 +53,22 @@ class UpdateIndex:
status.set_indexed(conn, True)
return 0
async def _do_index(self, args: NominatimArgs) -> None:
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
from ..indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
has_pending = True # run at least once
while has_pending:
if not args.no_boundaries:
await indexer.index_boundaries(args.minrank, args.maxrank)
if not args.boundaries_only:
await indexer.index_by_rank(args.minrank, args.maxrank)
await indexer.index_postcodes()
has_pending = indexer.has_pending()

View File

@@ -11,6 +11,7 @@ from typing import Tuple, Optional
import argparse
import logging
from pathlib import Path
import asyncio
from ..config import Configuration
from ..db.connection import connect, table_exists
@@ -99,7 +100,7 @@ class UpdateRefresh:
args.project_dir, tokenizer)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_postcodes()
asyncio.run(indexer.index_postcodes())
else:
LOG.error("The place table doesn't exist. "
"Postcode updates on a frozen database is not possible.")

View File

@@ -13,6 +13,7 @@ import datetime as dt
import logging
import socket
import time
import asyncio
from ..db import status
from ..db.connection import connect
@@ -123,7 +124,7 @@ class UpdateReplication:
return update_interval
def _update(self, args: NominatimArgs) -> None:
async def _update(self, args: NominatimArgs) -> None:
# pylint: disable=too-many-locals
from ..tools import replication
from ..indexer.indexer import Indexer
@@ -161,7 +162,7 @@ class UpdateReplication:
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
indexer.index_full(analyse=False)
await indexer.index_full(analyse=False)
with connect(dsn) as conn:
status.set_indexed(conn, True)
@@ -172,8 +173,7 @@ class UpdateReplication:
if state is replication.UpdateState.NO_CHANGES and \
args.catch_up or update_interval > 40*60:
while indexer.has_pending():
indexer.index_full(analyse=False)
await indexer.index_full(analyse=False)
if LOG.isEnabledFor(logging.WARNING):
assert batchdate is not None
@@ -196,5 +196,5 @@ class UpdateReplication:
if args.check_for_updates:
return self._check_for_updates(args)
self._update(args)
asyncio.run(self._update(args))
return 0

View File

@@ -11,6 +11,7 @@ from typing import Optional
import argparse
import logging
from pathlib import Path
import asyncio
import psutil
@@ -71,14 +72,6 @@ class SetupAll:
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
from ..data import country_info
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
num_threads = args.threads or psutil.cpu_count() or 1
country_info.setup_country_config(args.config)
if args.osm_file is None and args.continue_at is None and not args.prepare_database:
raise UsageError("No input files (use --osm-file).")
@@ -90,6 +83,16 @@ class SetupAll:
"Cannot use --continue and --prepare-database together."
)
return asyncio.run(self.async_run(args))
async def async_run(self, args: NominatimArgs) -> int:
from ..data import country_info
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
num_threads = args.threads or psutil.cpu_count() or 1
country_info.setup_country_config(args.config)
if args.prepare_database or args.continue_at is None:
LOG.warning('Creating database')
@@ -99,39 +102,7 @@ class SetupAll:
return 0
if args.continue_at in (None, 'import-from-file'):
files = args.get_osm_file_list()
if not files:
raise UsageError("No input files (use --osm-file).")
if args.continue_at in ('import-from-file', None):
# Check if the correct plugins are installed
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
args.config.lib_dir.data,
args.no_partitions)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not '
'use Wikipedia importance data.')
LOG.warning('Importing secondary importance raster data')
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) != 0:
LOG.error('Secondary importance file not imported. '
'Falling back to default ranking.')
self._setup_tables(args.config, args.reverse_only)
self._base_import(args)
if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Initialise tables')
@@ -139,7 +110,7 @@ class SetupAll:
database_import.truncate_data_tables(conn)
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(), num_threads)
await database_import.load_data(args.config.get_libpq_dsn(), num_threads)
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
@@ -153,13 +124,13 @@ class SetupAll:
('import-from-file', 'load-data', 'indexing', None):
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
await indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.create_search_indices(conn, args.config,
drop=args.no_updates,
threads=num_threads)
await database_import.create_search_indices(conn, args.config,
drop=args.no_updates,
threads=num_threads)
LOG.warning('Create search index for default country names.')
country_info.create_country_names(conn, tokenizer,
args.config.get_str_list('LANGUAGES'))
@@ -180,6 +151,45 @@ class SetupAll:
return 0
def _base_import(self, args: NominatimArgs) -> None:
from ..tools import database_import, refresh
from ..data import country_info
files = args.get_osm_file_list()
if not files:
raise UsageError("No input files (use --osm-file).")
if args.continue_at in ('import-from-file', None):
# Check if the correct plugins are installed
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
args.config.lib_dir.data,
args.no_partitions)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not '
'use Wikipedia importance data.')
LOG.warning('Importing secondary importance raster data')
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) != 0:
LOG.error('Secondary importance file not imported. '
'Falling back to default ranking.')
self._setup_tables(args.config, args.reverse_only)
def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
""" Set up the basic database layout: tables, indexes and functions.
"""