convert functon creation to python

The new functions always creates normal and partitioned functions.
Also adds specialised connection and cursor classes for adding
frequently used helper functions.
This commit is contained in:
Sarah Hoffmann
2021-01-24 14:35:35 +01:00
parent 94fa7162be
commit 5b46fcad8e
9 changed files with 285 additions and 57 deletions

View File

@@ -8,10 +8,9 @@ import argparse
import logging
from pathlib import Path
import psycopg2
from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_api_script
from .db.connection import connect
LOG = logging.getLogger()
@@ -370,27 +369,28 @@ class UpdateRefresh:
@staticmethod
def run(args):
import nominatim.tools.refresh
from .tools import refresh
conn = psycopg2.connect(args.config.get_libpq_dsn())
conn = connect(args.config.get_libpq_dsn())
if args.postcodes:
LOG.warning("Update postcodes centroid")
nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
refresh.update_postcodes(conn, args.data_dir)
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
refresh.recompute_word_counts(conn, args.data_dir)
if args.address_levels:
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
LOG.warning('Updating address levels from %s', cfg)
nominatim.tools.refresh.load_address_levels_from_file(conn, cfg)
refresh.load_address_levels_from_file(conn, cfg)
if args.functions:
params = ['setup.php', '--create-functions', '--create-partition-functions']
if args.diffs:
params.append('--enable-diff-updates')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
LOG.warning('Create functions')
refresh.create_functions(conn, args.config, args.data_dir,
args.diffs, args.enable_debug_statements)
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)

View File

@@ -20,6 +20,7 @@ class Configuration:
"""
def __init__(self, project_dir, config_dir):
self.project_dir = project_dir
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
if project_dir is not None:
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
@@ -36,6 +37,13 @@ class Configuration:
return os.environ.get(name) or self._config[name]
def get_bool(self, name):
""" Return the given configuration parameters as a boolean.
Values of '1', 'yes' and 'true' are accepted as truthy values,
everything else is interpreted as false.
"""
return self.__getattr__(name).lower() in ('1', 'yes', 'true')
def get_libpq_dsn(self):
""" Get configured database DSN converted into the key/value format
understood by libpq and psycopg.

View File

@@ -2,6 +2,7 @@
Functions for bringing auxiliary data in the database up-to-date.
"""
import json
import re
from psycopg2.extras import execute_values
@@ -69,3 +70,100 @@ def load_address_levels_from_file(conn, config_file):
"""
with config_file.open('r') as fdesc:
load_address_levels(conn, 'address_levels', json.load(fdesc))
PLPGSQL_BASE_MODULES = (
'utils.sql',
'normalization.sql',
'ranking.sql',
'importance.sql',
'address_lookup.sql',
'interpolation.sql'
)
PLPGSQL_TABLE_MODULES = (
('place', 'place_triggers.sql'),
('placex', 'placex_triggers.sql'),
('location_postcode', 'postcode_triggers.sql')
)
def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
""" Read all applicable SQLs containing PL/pgSQL functions, replace
placefolders and execute them.
"""
sql_func_dir = sql_dir / 'functions'
sql = ''
# Get the basic set of functions that is always imported.
for sql_file in PLPGSQL_BASE_MODULES:
with (sql_func_dir / sql_file).open('r') as fdesc:
sql += fdesc.read()
# Some files require the presence of a certain table
for table, fname in PLPGSQL_TABLE_MODULES:
if conn.table_exists(table):
with (sql_func_dir / fname).open('r') as fdesc:
sql += fdesc.read()
# Replace placeholders.
sql = sql.replace('{modulepath}',
config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
if enable_diff_updates:
sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
if enable_debug:
sql = sql.replace('--DEBUG:', '')
if config.get_bool('LIMIT_REINDEXING'):
sql = sql.replace('--LIMIT INDEXING:', '')
if not config.get_bool('USE_US_TIGER_DATA'):
sql = sql.replace('-- %NOTIGERDATA% ', '')
if not config.get_bool('USE_AUX_LOCATION_DATA'):
sql = sql.replace('-- %NOAUXDATA% ', '')
reverse_only = 'false' if conn.table_exists('search_name') else 'true'
return sql.replace('%REVERSE-ONLY%', reverse_only)
def replace_partition_string(sql, partitions):
""" Replace a partition template with the actual partition code.
"""
for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
repl = ''
for part in partitions:
repl += match.replace('-partition-', str(part))
sql = sql.replace(match, repl)
return sql
def _get_partition_function_sql(conn, sql_dir):
""" Create functions that work on partition tables.
"""
with conn.cursor() as cur:
cur.execute('SELECT distinct partition FROM country_name')
partitions = set([0])
for row in cur:
partitions.add(row[0])
with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
sql = fdesc.read()
return replace_partition_string(sql, sorted(partitions))
def create_functions(conn, config, data_dir,
enable_diff_updates=True, enable_debug=False):
""" (Re)create the PL/pgSQL functions.
"""
sql_dir = data_dir / 'sql'
sql = _get_standard_function_sql(conn, config, sql_dir,
enable_diff_updates, enable_debug)
sql += _get_partition_function_sql(conn, sql_dir)
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()