mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 10:27:57 +00:00
convert functon creation to python
The new functions always creates normal and partitioned functions. Also adds specialised connection and cursor classes for adding frequently used helper functions.
This commit is contained in:
@@ -143,12 +143,7 @@ if ($aResult['init-updates']) {
|
||||
}
|
||||
|
||||
if (!$aResult['no-update-functions']) {
|
||||
// instantiate setupClass to use the function therein
|
||||
$cSetup = new SetupFunctions(array(
|
||||
'enable-diff-updates' => true,
|
||||
'verbose' => $aResult['verbose']
|
||||
));
|
||||
$cSetup->createFunctions();
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--functions')->run();
|
||||
}
|
||||
|
||||
$sDatabaseDate = getDatabaseDate($oDB);
|
||||
|
||||
@@ -290,9 +290,7 @@ class SetupFunctions
|
||||
public function createPartitionFunctions()
|
||||
{
|
||||
info('Create Partition Functions');
|
||||
|
||||
$sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql');
|
||||
$this->pgsqlRunPartitionScript($sTemplate);
|
||||
$this->createSqlFunctions(); // also create partition functions
|
||||
}
|
||||
|
||||
public function importWikipediaArticles()
|
||||
@@ -788,43 +786,18 @@ class SetupFunctions
|
||||
|
||||
private function createSqlFunctions()
|
||||
{
|
||||
$sBasePath = CONST_DataDir.'/sql/functions/';
|
||||
$sTemplate = file_get_contents($sBasePath.'utils.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'normalization.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'ranking.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'importance.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'address_lookup.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'interpolation.sql');
|
||||
if ($this->db()->tableExists('place')) {
|
||||
$sTemplate .= file_get_contents($sBasePath.'place_triggers.sql');
|
||||
}
|
||||
if ($this->db()->tableExists('placex')) {
|
||||
$sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql');
|
||||
}
|
||||
if ($this->db()->tableExists('location_postcode')) {
|
||||
$sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql');
|
||||
}
|
||||
$sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate);
|
||||
if ($this->bEnableDiffUpdates) {
|
||||
$sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
|
||||
$oCmd = (clone($this->oNominatimCmd))
|
||||
->addParams('refresh', '--functions');
|
||||
|
||||
if (!$this->bEnableDiffUpdates) {
|
||||
$oCmd->addParams('--no-diff-updates');
|
||||
}
|
||||
|
||||
if ($this->bEnableDebugStatements) {
|
||||
$sTemplate = str_replace('--DEBUG:', '', $sTemplate);
|
||||
}
|
||||
if (getSettingBool('LIMIT_REINDEXING')) {
|
||||
$sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
|
||||
}
|
||||
if (!getSettingBool('USE_US_TIGER_DATA')) {
|
||||
$sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
|
||||
}
|
||||
if (!getSettingBool('USE_AUX_LOCATION_DATA')) {
|
||||
$sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
|
||||
$oCmd->addParams('--enable-debug-statements');
|
||||
}
|
||||
|
||||
$sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false';
|
||||
$sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate);
|
||||
|
||||
$this->pgsqlRunScript($sTemplate);
|
||||
$oCmd->run();
|
||||
}
|
||||
|
||||
private function pgsqlRunPartitionScript($sTemplate)
|
||||
|
||||
@@ -8,10 +8,9 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
|
||||
from .config import Configuration
|
||||
from .tools.exec_utils import run_legacy_script, run_api_script
|
||||
from .db.connection import connect
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -370,27 +369,28 @@ class UpdateRefresh:
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
import nominatim.tools.refresh
|
||||
from .tools import refresh
|
||||
|
||||
conn = psycopg2.connect(args.config.get_libpq_dsn())
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
|
||||
if args.postcodes:
|
||||
LOG.warning("Update postcodes centroid")
|
||||
nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
|
||||
refresh.update_postcodes(conn, args.data_dir)
|
||||
|
||||
if args.word_counts:
|
||||
LOG.warning('Recompute frequency of full-word search terms')
|
||||
nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
|
||||
refresh.recompute_word_counts(conn, args.data_dir)
|
||||
|
||||
if args.address_levels:
|
||||
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
|
||||
LOG.warning('Updating address levels from %s', cfg)
|
||||
nominatim.tools.refresh.load_address_levels_from_file(conn, cfg)
|
||||
refresh.load_address_levels_from_file(conn, cfg)
|
||||
|
||||
if args.functions:
|
||||
params = ['setup.php', '--create-functions', '--create-partition-functions']
|
||||
if args.diffs:
|
||||
params.append('--enable-diff-updates')
|
||||
if args.enable_debug_statements:
|
||||
params.append('--enable-debug-statements')
|
||||
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
|
||||
LOG.warning('Create functions')
|
||||
refresh.create_functions(conn, args.config, args.data_dir,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
|
||||
if args.wiki_data:
|
||||
run_legacy_script('setup.php', '--import-wikipedia-articles',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
|
||||
@@ -20,6 +20,7 @@ class Configuration:
|
||||
"""
|
||||
|
||||
def __init__(self, project_dir, config_dir):
|
||||
self.project_dir = project_dir
|
||||
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
|
||||
if project_dir is not None:
|
||||
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
|
||||
@@ -36,6 +37,13 @@ class Configuration:
|
||||
|
||||
return os.environ.get(name) or self._config[name]
|
||||
|
||||
def get_bool(self, name):
|
||||
""" Return the given configuration parameters as a boolean.
|
||||
Values of '1', 'yes' and 'true' are accepted as truthy values,
|
||||
everything else is interpreted as false.
|
||||
"""
|
||||
return self.__getattr__(name).lower() in ('1', 'yes', 'true')
|
||||
|
||||
def get_libpq_dsn(self):
|
||||
""" Get configured database DSN converted into the key/value format
|
||||
understood by libpq and psycopg.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Functions for bringing auxiliary data in the database up-to-date.
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
|
||||
from psycopg2.extras import execute_values
|
||||
|
||||
@@ -69,3 +70,100 @@ def load_address_levels_from_file(conn, config_file):
|
||||
"""
|
||||
with config_file.open('r') as fdesc:
|
||||
load_address_levels(conn, 'address_levels', json.load(fdesc))
|
||||
|
||||
PLPGSQL_BASE_MODULES = (
|
||||
'utils.sql',
|
||||
'normalization.sql',
|
||||
'ranking.sql',
|
||||
'importance.sql',
|
||||
'address_lookup.sql',
|
||||
'interpolation.sql'
|
||||
)
|
||||
|
||||
PLPGSQL_TABLE_MODULES = (
|
||||
('place', 'place_triggers.sql'),
|
||||
('placex', 'placex_triggers.sql'),
|
||||
('location_postcode', 'postcode_triggers.sql')
|
||||
)
|
||||
|
||||
def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
|
||||
""" Read all applicable SQLs containing PL/pgSQL functions, replace
|
||||
placefolders and execute them.
|
||||
"""
|
||||
sql_func_dir = sql_dir / 'functions'
|
||||
sql = ''
|
||||
|
||||
# Get the basic set of functions that is always imported.
|
||||
for sql_file in PLPGSQL_BASE_MODULES:
|
||||
with (sql_func_dir / sql_file).open('r') as fdesc:
|
||||
sql += fdesc.read()
|
||||
|
||||
# Some files require the presence of a certain table
|
||||
for table, fname in PLPGSQL_TABLE_MODULES:
|
||||
if conn.table_exists(table):
|
||||
with (sql_func_dir / fname).open('r') as fdesc:
|
||||
sql += fdesc.read()
|
||||
|
||||
# Replace placeholders.
|
||||
sql = sql.replace('{modulepath}',
|
||||
config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
|
||||
|
||||
if enable_diff_updates:
|
||||
sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
|
||||
|
||||
if enable_debug:
|
||||
sql = sql.replace('--DEBUG:', '')
|
||||
|
||||
if config.get_bool('LIMIT_REINDEXING'):
|
||||
sql = sql.replace('--LIMIT INDEXING:', '')
|
||||
|
||||
if not config.get_bool('USE_US_TIGER_DATA'):
|
||||
sql = sql.replace('-- %NOTIGERDATA% ', '')
|
||||
|
||||
if not config.get_bool('USE_AUX_LOCATION_DATA'):
|
||||
sql = sql.replace('-- %NOAUXDATA% ', '')
|
||||
|
||||
reverse_only = 'false' if conn.table_exists('search_name') else 'true'
|
||||
|
||||
return sql.replace('%REVERSE-ONLY%', reverse_only)
|
||||
|
||||
|
||||
def replace_partition_string(sql, partitions):
|
||||
""" Replace a partition template with the actual partition code.
|
||||
"""
|
||||
for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
|
||||
repl = ''
|
||||
for part in partitions:
|
||||
repl += match.replace('-partition-', str(part))
|
||||
sql = sql.replace(match, repl)
|
||||
|
||||
return sql
|
||||
|
||||
def _get_partition_function_sql(conn, sql_dir):
|
||||
""" Create functions that work on partition tables.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT distinct partition FROM country_name')
|
||||
partitions = set([0])
|
||||
for row in cur:
|
||||
partitions.add(row[0])
|
||||
|
||||
with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
|
||||
sql = fdesc.read()
|
||||
|
||||
return replace_partition_string(sql, sorted(partitions))
|
||||
|
||||
def create_functions(conn, config, data_dir,
|
||||
enable_diff_updates=True, enable_debug=False):
|
||||
""" (Re)create the PL/pgSQL functions.
|
||||
"""
|
||||
sql_dir = data_dir / 'sql'
|
||||
|
||||
sql = _get_standard_function_sql(conn, config, sql_dir,
|
||||
enable_diff_updates, enable_debug)
|
||||
sql += _get_partition_function_sql(conn, sql_dir)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
|
||||
conn.commit()
|
||||
|
||||
@@ -98,7 +98,6 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("command,params", [
|
||||
('functions', ('setup.php',)),
|
||||
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
|
||||
('importance', ('update.php', '--recompute-importance')),
|
||||
('website', ('setup.php', '--setup-website')),
|
||||
@@ -114,6 +113,7 @@ def test_refresh_legacy_command(mock_run_legacy, command, params):
|
||||
('postcodes', 'update_postcodes'),
|
||||
('word-counts', 'recompute_word_counts'),
|
||||
('address-levels', 'load_address_levels_from_file'),
|
||||
('functions', 'create_functions'),
|
||||
])
|
||||
def test_refresh_command(monkeypatch, command, func):
|
||||
func_mock = MockParamCapture()
|
||||
@@ -129,7 +129,6 @@ def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
|
||||
assert mock_run_legacy.called == 2
|
||||
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params", [
|
||||
('search', '--query', 'new'),
|
||||
('reverse', '--lat', '0', '--lon', '0'),
|
||||
|
||||
@@ -15,6 +15,7 @@ def test_no_project_dir():
|
||||
|
||||
assert config.DATABASE_WEBUSER == 'www-data'
|
||||
|
||||
|
||||
def test_prefer_project_setting_over_default():
|
||||
with tempfile.TemporaryDirectory() as project_dir:
|
||||
with open(project_dir + '/.env', 'w') as envfile:
|
||||
@@ -24,6 +25,7 @@ def test_prefer_project_setting_over_default():
|
||||
|
||||
assert config.DATABASE_WEBUSER == 'apache'
|
||||
|
||||
|
||||
def test_prefer_os_environ_over_project_setting(monkeypatch):
|
||||
with tempfile.TemporaryDirectory() as project_dir:
|
||||
with open(project_dir + '/.env', 'w') as envfile:
|
||||
@@ -35,6 +37,7 @@ def test_prefer_os_environ_over_project_setting(monkeypatch):
|
||||
|
||||
assert config.DATABASE_WEBUSER == 'nobody'
|
||||
|
||||
|
||||
def test_get_os_env_add_defaults(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@@ -42,6 +45,7 @@ def test_get_os_env_add_defaults(monkeypatch):
|
||||
|
||||
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data'
|
||||
|
||||
|
||||
def test_get_os_env_prefer_os_environ(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@@ -49,11 +53,13 @@ def test_get_os_env_prefer_os_environ(monkeypatch):
|
||||
|
||||
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_default():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.get_libpq_dsn() == 'dbname=nominatim'
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_php(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@@ -62,6 +68,7 @@ def test_get_libpq_dsn_convert_php(monkeypatch):
|
||||
|
||||
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_libpq(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@@ -69,3 +76,20 @@ def test_get_libpq_dsn_convert_libpq(monkeypatch):
|
||||
'host=localhost dbname=gis password=foo')
|
||||
|
||||
assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value,result",
|
||||
[(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] +
|
||||
[(x, False) for x in ('0', 'false', 'no', 'NO', 'x')])
|
||||
def test_get_bool(monkeypatch, value, result):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
|
||||
|
||||
assert config.get_bool('FOOBAR') == result
|
||||
|
||||
def test_get_bool_empty():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.DATABASE_MODULE_PATH == ''
|
||||
assert config.get_bool('DATABASE_MODULE_PATH') == False
|
||||
|
||||
32
test/python/test_db_connection.py
Normal file
32
test/python/test_db_connection.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Tests for specialised conenction and cursor classes.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_connection_table_exists(db, temp_db_cursor):
|
||||
assert db.table_exists('foobar') == False
|
||||
|
||||
temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
|
||||
|
||||
assert db.table_exists('foobar') == True
|
||||
|
||||
|
||||
def test_cursor_scalar(db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
|
||||
|
||||
with db.cursor() as cur:
|
||||
assert cur.scalar('SELECT count(*) FROM dummy') == 0
|
||||
|
||||
def test_cursor_scalar_many_rows(db):
|
||||
with db.cursor() as cur:
|
||||
with pytest.raises(ValueError):
|
||||
cur.scalar('SELECT * FROM pg_tables')
|
||||
99
test/python/test_tools_refresh_create_functions.py
Normal file
99
test/python/test_tools_refresh_create_functions.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Tests for creating PL/pgSQL functions for Nominatim.
|
||||
"""
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql
|
||||
|
||||
SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve()
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
@pytest.fixture
|
||||
def db_with_tables(db):
|
||||
with db.cursor() as cur:
|
||||
for table in ('place', 'placex', 'location_postcode'):
|
||||
cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table))
|
||||
|
||||
return db
|
||||
|
||||
|
||||
def test_standard_functions_replace_module_default(db, def_config):
|
||||
def_config.project_dir = Path('.')
|
||||
sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert sql.find('{modulepath}') < 0
|
||||
assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0
|
||||
|
||||
|
||||
def test_standard_functions_replace_module_custom(monkeypatch, db, def_config):
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom')
|
||||
sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert sql.find('{modulepath}') < 0
|
||||
assert sql.find("'custom/nominatim.so'") >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_diff(db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('%DIFFUPDATES%') < 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_debug(db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('--DEBUG') < 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('--LIMIT INDEXING') < 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('%NOTIGERDATA%') >= 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('%NOAUXDATA%') >= 0) == enabled
|
||||
|
||||
|
||||
def test_partition_function(temp_db_cursor, db, def_config):
|
||||
temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)")
|
||||
|
||||
sql = _get_partition_function_sql(db, SQL_DIR)
|
||||
|
||||
assert sql
|
||||
assert sql.find('-partition-') < 0
|
||||
Reference in New Issue
Block a user