move update code for postcode and word count to Python

Adds also tests for the new function to execute a SQL script.
This commit is contained in:
Sarah Hoffmann
2021-01-22 23:25:37 +01:00
parent e6d9485c4a
commit e6c2842b66
6 changed files with 114 additions and 44 deletions

View File

@@ -104,14 +104,12 @@ if ($fPostgresVersion >= 11.0) {
); );
} }
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('index');
if ($aResult['quiet']) { if ($aResult['quiet']) {
$oIndexCmd->addParams('--quiet'); $oNominatimCmd->addParams('--quiet');
} }
if ($aResult['verbose']) { if ($aResult['verbose']) {
$oIndexCmd->addParams('--verbose'); $oNominatimCmd->addParams('--verbose');
} }
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY'); $sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
@@ -220,9 +218,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
} }
if ($aResult['calculate-postcodes']) { if ($aResult['calculate-postcodes']) {
info('Update postcodes centroids'); (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
$sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
runSQLScript($sTemplate, true, true);
} }
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@@ -271,15 +267,11 @@ if ($bHaveDiff) {
} }
if ($aResult['recompute-word-counts']) { if ($aResult['recompute-word-counts']) {
info('Recompute frequency of full-word search terms'); (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
$sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
runSQLScript($sTemplate, true, true);
} }
if ($aResult['index']) { if ($aResult['index']) {
$oCmd = (clone $oIndexCmd) (clone $oNominatimCmd)->addParams('index', '--minrank', $aResult['index-rank'])->run();
->addParams('--minrank', $aResult['index-rank']);
$oCmd->run();
} }
if ($aResult['update-address-levels']) { if ($aResult['update-address-levels']) {
@@ -421,7 +413,8 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
if (!$aResult['no-index']) { if (!$aResult['no-index']) {
$fCMDStartTime = time(); $fCMDStartTime = time();
$oThisIndexCmd = clone($oIndexCmd); $oThisIndexCmd = clone($oNominatimCmd);
$oThisIndexCmd->addParams('index');
echo $oThisIndexCmd->escapedCmd()."\n"; echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run(); $iErrorLevel = $oThisIndexCmd->run();
if ($iErrorLevel) { if ($iErrorLevel) {

View File

@@ -8,9 +8,13 @@ import argparse
import logging import logging
from pathlib import Path from pathlib import Path
import psycopg2
from .config import Configuration from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_api_script from .tools.exec_utils import run_legacy_script, run_api_script
LOG = logging.getLogger()
def _num_system_cpus(): def _num_system_cpus():
try: try:
cpus = len(os.sched_getaffinity(0)) cpus = len(os.sched_getaffinity(0))
@@ -366,32 +370,35 @@ class UpdateRefresh:
@staticmethod @staticmethod
def run(args): def run(args):
if args.postcodes: import nominatim.tools.refresh
run_legacy_script('update.php', '--calculate-postcodes',
nominatim_env=args, throw_on_fail=True) with psycopg2.connect(args.config.get_libpq_dsn()) as conn:
if args.word_counts: if args.postcodes:
run_legacy_script('update.php', '--recompute-word-counts', LOG.warning("Update postcodes centroid")
nominatim_env=args, throw_on_fail=True) nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
if args.address_levels: if args.word_counts:
run_legacy_script('update.php', '--update-address-levels', LOG.warning('Recompute frequency of full-word search terms')
nominatim_env=args, throw_on_fail=True) nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
if args.functions: if args.address_levels:
params = ['setup.php', '--create-functions', '--create-partition-functions'] run_legacy_script('update.php', '--update-address-levels',
if args.diffs: nominatim_env=args, throw_on_fail=True)
params.append('--enable-diff-updates') if args.functions:
if args.enable_debug_statements: params = ['setup.php', '--create-functions', '--create-partition-functions']
params.append('--enable-debug-statements') if args.diffs:
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True) params.append('--enable-diff-updates')
if args.wiki_data: if args.enable_debug_statements:
run_legacy_script('setup.php', '--import-wikipedia-articles', params.append('--enable-debug-statements')
nominatim_env=args, throw_on_fail=True) run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
# Attention: importance MUST come after wiki data import. if args.wiki_data:
if args.importance: run_legacy_script('setup.php', '--import-wikipedia-articles',
run_legacy_script('update.php', '--recompute-importance', nominatim_env=args, throw_on_fail=True)
nominatim_env=args, throw_on_fail=True) # Attention: importance MUST come after wiki data import.
if args.website: if args.importance:
run_legacy_script('setup.php', '--setup-website', run_legacy_script('update.php', '--recompute-importance',
nominatim_env=args, throw_on_fail=True) nominatim_env=args, throw_on_fail=True)
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
return 0 return 0

11
nominatim/db/utils.py Normal file
View File

@@ -0,0 +1,11 @@
"""
Helper functions for handling DB accesses.
"""
def execute_file(conn, fname):
""" Read an SQL file and run its contents against the given connection.
"""
with fname.open('r') as fdesc:
sql = fdesc.read()
with conn.cursor() as cur:
cur.execute(sql)

View File

@@ -0,0 +1,16 @@
"""
Functions for bringing auxiliary data in the database up-to-date.
"""
from ..db.utils import execute_file
def update_postcodes(conn, datadir):
""" Recalculate postcode centroids and add, remove and update entries in the
location_postcode table. `conn` is an opne connection to the database.
"""
execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
def recompute_word_counts(conn, datadir):
""" Compute the frequency of full-word search terms.
"""
execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')

View File

@@ -6,6 +6,7 @@ import pytest
import nominatim.cli import nominatim.cli
import nominatim.indexer.indexer import nominatim.indexer.indexer
import nominatim.tools.refresh
def call_nominatim(*args): def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module', return nominatim.cli.nominatim(module_dir='build/module',
@@ -99,21 +100,30 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
@pytest.mark.parametrize("command,params", [ @pytest.mark.parametrize("command,params", [
('postcodes', ('update.php', '--calculate-postcodes')),
('word-counts', ('update.php', '--recompute-word-counts')),
('address-levels', ('update.php', '--update-address-levels')), ('address-levels', ('update.php', '--update-address-levels')),
('functions', ('setup.php',)), ('functions', ('setup.php',)),
('wiki-data', ('setup.php', '--import-wikipedia-articles')), ('wiki-data', ('setup.php', '--import-wikipedia-articles')),
('importance', ('update.php', '--recompute-importance')), ('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')), ('website', ('setup.php', '--setup-website')),
]) ])
def test_refresh_command(mock_run_legacy, command, params): def test_refresh_legacy_command(mock_run_legacy, command, params):
assert 0 == call_nominatim('refresh', '--' + command) assert 0 == call_nominatim('refresh', '--' + command)
assert mock_run_legacy.called == 1 assert mock_run_legacy.called == 1
assert len(mock_run_legacy.last_args) >= len(params) assert len(mock_run_legacy.last_args) >= len(params)
assert mock_run_legacy.last_args[:len(params)] == params assert mock_run_legacy.last_args[:len(params)] == params
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
])
def test_refresh_command(monkeypatch, command, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
assert 0 == call_nominatim('refresh', '--' + command)
assert func_mock.called == 1
def test_refresh_importance_computed_after_wiki_import(mock_run_legacy): def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data') assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')

View File

@@ -0,0 +1,33 @@
"""
Tests for DB utility functions in db.utils
"""
import psycopg2
import pytest
import nominatim.db.utils as db_utils
def test_execute_file_success(temp_db, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
with psycopg2.connect('dbname=' + temp_db) as conn:
db_utils.execute_file(conn, tmpfile)
with conn.cursor() as cur:
cur.execute('SELECT * FROM test')
assert cur.rowcount == 1
assert cur.fetchone()[0] == 56
def test_execute_file_bad_file(temp_db, tmp_path):
with psycopg2.connect('dbname=' + temp_db) as conn:
with pytest.raises(FileNotFoundError):
db_utils.execute_file(conn, tmp_path / 'test2.sql')
def test_execute_file_bad_sql(temp_db, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
with psycopg2.connect('dbname=' + temp_db) as conn:
with pytest.raises(psycopg2.ProgrammingError):
db_utils.execute_file(conn, tmpfile)