introduce jinja2 for preprocessing SQL

Replaces various hand-crafted replacements of varying format with
a single Jinja2 templating mechanism. Allows full access to
configuration if necessary.
This commit is contained in:
Sarah Hoffmann
2021-03-03 17:37:22 +01:00
parent 8ea7e04363
commit d2bd6aa78d
11 changed files with 320 additions and 307 deletions

View File

@@ -0,0 +1,62 @@
"""
Preprocessing of SQL files.
"""
import jinja2
def _get_partitions(conn):
""" Get the set of partitions currently in use.
"""
with conn.cursor() as cur:
cur.execute('SELECT DISTINCT partition FROM country_name')
partitions = set([0])
for row in cur:
partitions.add(row[0])
return partitions
def _get_tables(conn):
""" Return the set of tables currently in use.
Only includes non-partitioned
"""
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
return set((row[0] for row in list(cur)))
class SQLPreprocessor: # pylint: disable=too-few-public-methods
""" A environment for preprocessing SQL files from the
lib-sql directory.
The preprocessor provides a number of default filters and variables.
The variables may be overwritten when rendering an SQL file.
The preprocessing is currently based on the jinja2 templating library
and follows its syntax.
"""
def __init__(self, conn, config, sqllib_dir):
self.env = jinja2.Environment(autoescape=False,
loader=jinja2.FileSystemLoader(str(sqllib_dir)))
db_info = {}
db_info['partitions'] = _get_partitions(conn)
db_info['tables'] = _get_tables(conn)
db_info['reverse_only'] = 'search_name' not in db_info['tables']
self.env.globals['config'] = config
self.env.globals['db'] = db_info
self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
str((config.project_dir / 'module').resolve())
def run_sql_file(self, conn, name, **kwargs):
""" Execute the given SQL file on the connection. The keyword arguments
may supply additional parameters for preprocessing.
"""
sql = self.env.get_template(name).render(**kwargs)
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()

View File

@@ -3,12 +3,12 @@ Functions for bringing auxiliary data in the database up-to-date.
"""
import json
import logging
import re
from textwrap import dedent
from psycopg2.extras import execute_values
from ..db.utils import execute_file
from ..db.sql_preprocessor import SQLPreprocessor
from ..version import NOMINATIM_VERSION
LOG = logging.getLogger()
@@ -76,100 +76,17 @@ def load_address_levels_from_file(conn, config_file):
with config_file.open('r') as fdesc:
load_address_levels(conn, 'address_levels', json.load(fdesc))
PLPGSQL_BASE_MODULES = (
'utils.sql',
'normalization.sql',
'ranking.sql',
'importance.sql',
'address_lookup.sql',
'interpolation.sql'
)
PLPGSQL_TABLE_MODULES = (
('place', 'place_triggers.sql'),
('placex', 'placex_triggers.sql'),
('location_postcode', 'postcode_triggers.sql')
)
def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
""" Read all applicable SQLs containing PL/pgSQL functions, replace
placefolders and execute them.
"""
sql_func_dir = sql_dir / 'functions'
sql = ''
# Get the basic set of functions that is always imported.
for sql_file in PLPGSQL_BASE_MODULES:
with (sql_func_dir / sql_file).open('r') as fdesc:
sql += fdesc.read()
# Some files require the presence of a certain table
for table, fname in PLPGSQL_TABLE_MODULES:
if conn.table_exists(table):
with (sql_func_dir / fname).open('r') as fdesc:
sql += fdesc.read()
# Replace placeholders.
sql = sql.replace('{modulepath}',
config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
if enable_diff_updates:
sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
if enable_debug:
sql = sql.replace('--DEBUG:', '')
if config.get_bool('LIMIT_REINDEXING'):
sql = sql.replace('--LIMIT INDEXING:', '')
if not config.get_bool('USE_US_TIGER_DATA'):
sql = sql.replace('-- %NOTIGERDATA% ', '')
if not config.get_bool('USE_AUX_LOCATION_DATA'):
sql = sql.replace('-- %NOAUXDATA% ', '')
reverse_only = 'false' if conn.table_exists('search_name') else 'true'
return sql.replace('%REVERSE-ONLY%', reverse_only)
def replace_partition_string(sql, partitions):
""" Replace a partition template with the actual partition code.
"""
for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
repl = ''
for part in partitions:
repl += match.replace('-partition-', str(part))
sql = sql.replace(match, repl)
return sql
def _get_partition_function_sql(conn, sql_dir):
""" Create functions that work on partition tables.
"""
with conn.cursor() as cur:
cur.execute('SELECT distinct partition FROM country_name')
partitions = set([0])
for row in cur:
partitions.add(row[0])
with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
sql = fdesc.read()
return replace_partition_string(sql, sorted(partitions))
def create_functions(conn, config, sql_dir,
def create_functions(conn, config, sqllib_dir,
enable_diff_updates=True, enable_debug=False):
""" (Re)create the PL/pgSQL functions.
"""
sql = _get_standard_function_sql(conn, config, sql_dir,
enable_diff_updates, enable_debug)
sql += _get_partition_function_sql(conn, sql_dir)
sql = SQLPreprocessor(conn, config, sqllib_dir)
with conn.cursor() as cur:
cur.execute(sql)
sql.run_sql_file(conn, 'functions.sql',
disable_diff_update=not enable_diff_updates,
debug=enable_debug)
conn.commit()
WEBSITE_SCRIPTS = (