diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 8b20b55d..323d36d7 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -52,7 +52,7 @@ jobs: run: phpcs --report-width=120 . - name: Python linting - run: pylint nominatim + run: pylint --extension-pkg-whitelist=osmium nominatim - name: PHP unit tests run: phpunit ./ diff --git a/CMakeLists.txt b/CMakeLists.txt index c6f63a8b..d579bf1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,7 +191,7 @@ if (BUILD_TESTS) if (PYLINT) message(STATUS "Using pylint binary ${PYLINT}") add_test(NAME pylint - COMMAND ${PYLINT} nominatim + COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) else() message(WARNING "pylint not found. Python linting tests disabled.") diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 552d1da1..6798c39d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -53,7 +53,7 @@ The coding style is enforced with PHPCS and pylint. It can be tested with: ``` phpcs --report-width=120 --colors . -pylint3 nominatim +pylint3 --extension-pkg-whitelist=osmium nominatim ``` ## Testing diff --git a/docs/admin/Update.md b/docs/admin/Update.md index 4b1a2be7..01980bd8 100644 --- a/docs/admin/Update.md +++ b/docs/admin/Update.md @@ -19,15 +19,6 @@ Run (as the same user who will later run the updates): pip3 install --user osmium ``` -Nominatim needs a tool called `pyosmium-get-changes` which comes with -Pyosmium. You need to tell Nominatim where to find it. Add the -following line to your `.env`: - - NOMINATIM_PYOSMIUM_BINARY=/home/user/.local/bin/pyosmium-get-changes - -The path above is fine if you used the `--user` parameter with pip. -Replace `user` with your user name. - #### Setting up the update process Next the update needs to be initialised. By default Nominatim is configured diff --git a/lib/admin/update.php b/lib/admin/update.php index fe9658b5..fba5300b 100644 --- a/lib/admin/update.php +++ b/lib/admin/update.php @@ -4,7 +4,6 @@ require_once(CONST_LibDir.'/init-cmd.php'); require_once(CONST_LibDir.'/setup_functions.php'); require_once(CONST_LibDir.'/setup/SetupClass.php'); -require_once(CONST_LibDir.'/setup/AddressLevelParser.php'); ini_set('memory_limit', '800M'); @@ -104,99 +103,27 @@ if ($fPostgresVersion >= 11.0) { ); } - -$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) - ->addParams('index'); +$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); if ($aResult['quiet']) { - $oIndexCmd->addParams('--quiet'); + $oNominatimCmd->addParams('--quiet'); } if ($aResult['verbose']) { - $oIndexCmd->addParams('--verbose'); + $oNominatimCmd->addParams('--verbose'); } -$sPyosmiumBin = getSetting('PYOSMIUM_BINARY'); -$sBaseURL = getSetting('REPLICATION_URL'); - if ($aResult['init-updates']) { - // sanity check that the replication URL is correct - $sBaseState = file_get_contents($sBaseURL.'/state.txt'); - if ($sBaseState === false) { - echo "\nCannot find state.txt file at the configured replication URL.\n"; - echo "Does the URL point to a directory containing OSM update data?\n\n"; - fail('replication URL not reachable.'); - } - // sanity check for pyosmium-get-changes - if (!$sPyosmiumBin) { - echo "\nNOMINATIM_PYOSMIUM_BINARY not configured.\n"; - echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n"; - echo "in your local .env file.\n\n"; - fail('NOMINATIM_PYOSMIUM_BINARY not configured'); + $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init'); + + if ($aResult['no-update-functions']) { + $oCmd->addParams('--no-update-functions'); } - $aOutput = 0; - $oCMD = new \Nominatim\Shell($sPyosmiumBin, '--help'); - exec($oCMD->escapedCmd(), $aOutput, $iRet); - - if ($iRet != 0) { - echo "Cannot execute pyosmium-get-changes.\n"; - echo "Make sure you have pyosmium installed correctly\n"; - echo "and have set up NOMINATIM_PYOSMIUM_BINARY to point to pyosmium-get-changes.\n"; - fail('pyosmium-get-changes not found or not usable'); - } - - if (!$aResult['no-update-functions']) { - // instantiate setupClass to use the function therein - $cSetup = new SetupFunctions(array( - 'enable-diff-updates' => true, - 'verbose' => $aResult['verbose'] - )); - $cSetup->createFunctions(); - } - - $sDatabaseDate = getDatabaseDate($oDB); - if (!$sDatabaseDate) { - fail('Cannot determine date of database.'); - } - $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60)); - - // get the appropriate state id - $aOutput = 0; - $oCMD = (new \Nominatim\Shell($sPyosmiumBin)) - ->addParams('--start-date', $sWindBack) - ->addParams('--server', $sBaseURL); - - exec($oCMD->escapedCmd(), $aOutput, $iRet); - if ($iRet != 0 || $aOutput[0] == 'None') { - fail('Error running pyosmium tools'); - } - - $oDB->exec('TRUNCATE import_status'); - $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('"; - $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)'; - - try { - $oDB->exec($sSQL); - } catch (\Nominatim\DatabaseError $e) { - fail('Could not enter sequence into database.'); - } - - echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n"; + $oCmd->run(); } if ($aResult['check-for-updates']) { - $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status'); - - if (!$aLastState['sequence_id']) { - fail('Updates not set up. Please run ./utils/update.php --init-updates.'); - } - - $oCmd = (new \Nominatim\Shell(CONST_BinDir.'/check_server_for_updates.py')) - ->addParams($sBaseURL) - ->addParams($aLastState['sequence_id']); - $iRet = $oCmd->run(); - - exit($iRet); + exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run()); } if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { @@ -220,9 +147,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { } if ($aResult['calculate-postcodes']) { - info('Update postcodes centroids'); - $sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql'); - runSQLScript($sTemplate, true, true); + (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run(); } $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; @@ -271,22 +196,18 @@ if ($bHaveDiff) { } if ($aResult['recompute-word-counts']) { - info('Recompute frequency of full-word search terms'); - $sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql'); - runSQLScript($sTemplate, true, true); + (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run(); } if ($aResult['index']) { - $oCmd = (clone $oIndexCmd) - ->addParams('--minrank', $aResult['index-rank']); - $oCmd->run(); + (clone $oNominatimCmd) + ->addParams('index', '--minrank', $aResult['index-rank']) + ->addParams('--threads', $aResult['index-instances']) + ->run(); } if ($aResult['update-address-levels']) { - $sAddressLevelConfig = getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json'); - echo 'Updating address levels from '.$sAddressLevelConfig.".\n"; - $oAlParser = new \Nominatim\Setup\AddressLevelParser($sAddressLevelConfig); - $oAlParser->createTable($oDB, 'address_levels'); + (clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run(); } if ($aResult['recompute-importance']) { @@ -307,145 +228,17 @@ if ($aResult['recompute-importance']) { } if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { - // - if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) { - fail('Error: Update interval too low for download.geofabrik.de. ' . - "Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n"); + $oCmd = (clone($oNominatimCmd)) + ->addParams('replication') + ->addParams('--threads', $aResult['index-instances']); + + if (!$aResult['import-osmosis-all']) { + $oCmd->addParams('--once'); } - $sImportFile = CONST_InstallDir.'/osmosischange.osc'; - - $oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin)) - ->addParams('--server', $sBaseURL) - ->addParams('--outfile', $sImportFile) - ->addParams('--size', getSetting('REPLICATION_MAX_DIFF')); - - $oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile); - - while (true) { - $fStartTime = time(); - $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status'); - - if (!$aLastState['sequence_id']) { - echo "Updates not set up. Please run ./utils/update.php --init-updates.\n"; - exit(1); - } - - echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n"; - - $sBatchEnd = $aLastState['lastimportdate']; - $iEndSequence = $aLastState['sequence_id']; - - if ($aLastState['indexed']) { - // Sleep if the update interval has not yet been reached. - $fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL'); - if ($fNextUpdate > $fStartTime) { - $iSleepTime = $fNextUpdate - $fStartTime; - echo "Waiting for next update for $iSleepTime sec."; - sleep($iSleepTime); - } - - // Download the next batch of changes. - do { - $fCMDStartTime = time(); - $iNextSeq = (int) $aLastState['sequence_id']; - unset($aOutput); - - $oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq); - echo $oCMD->escapedCmd()."\n"; - if (file_exists($sImportFile)) { - unlink($sImportFile); - } - exec($oCMD->escapedCmd(), $aOutput, $iResult); - - if ($iResult == 3) { - $sSleep = getSetting('REPLICATION_RECHECK_INTERVAL'); - echo 'No new updates. Sleeping for '.$sSleep." sec.\n"; - sleep($sSleep); - } elseif ($iResult != 0) { - echo 'ERROR: updates failed.'; - exit($iResult); - } else { - $iEndSequence = (int)$aOutput[0]; - } - } while ($iResult); - - // get the newest object from the diff file - $sBatchEnd = 0; - $iRet = 0; - $oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile); - exec($oCMD->escapedCmd(), $sBatchEnd, $iRet); - if ($iRet == 5) { - echo "Diff file is empty. skipping import.\n"; - if (!$aResult['import-osmosis-all']) { - exit(0); - } else { - continue; - } - } - if ($iRet != 0) { - fail('Error getting date from diff file.'); - } - $sBatchEnd = $sBatchEnd[0]; - - // Import the file - $fCMDStartTime = time(); - - - echo $oCMDImport->escapedCmd()."\n"; - unset($sJunk); - $iErrorLevel = $oCMDImport->run(); - if ($iErrorLevel) { - echo "Error executing osm2pgsql: $iErrorLevel\n"; - exit($iErrorLevel); - } - - // write the update logs - $iFileSize = filesize($sImportFile); - $sSQL = 'INSERT INTO import_osmosis_log'; - $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)'; - $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'"; - $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','"; - $sSQL .= date('Y-m-d H:i:s')."','import')"; - var_Dump($sSQL); - $oDB->exec($sSQL); - - // update the status - $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence"; - var_Dump($sSQL); - $oDB->exec($sSQL); - echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - } - - // Index file - if (!$aResult['no-index']) { - $fCMDStartTime = time(); - - $oThisIndexCmd = clone($oIndexCmd); - echo $oThisIndexCmd->escapedCmd()."\n"; - $iErrorLevel = $oThisIndexCmd->run(); - if ($iErrorLevel) { - echo "Error: $iErrorLevel\n"; - exit($iErrorLevel); - } - - $sSQL = 'INSERT INTO import_osmosis_log'; - $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)'; - $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'"; - $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','"; - $sSQL .= date('Y-m-d H:i:s')."','index')"; - var_Dump($sSQL); - $oDB->exec($sSQL); - echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - } else { - if ($aResult['import-osmosis-all']) { - echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n"; - exit(1); - } - } - - $fDuration = time() - $fStartTime; - echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n"; - if (!$aResult['import-osmosis-all']) exit(0); + if ($aResult['no-index']) { + $oCmd->addParams('--no-index'); } + + exit($oCmd->run()); } diff --git a/lib/setup/AddressLevelParser.php b/lib/setup/AddressLevelParser.php deleted file mode 100644 index a399c955..00000000 --- a/lib/setup/AddressLevelParser.php +++ /dev/null @@ -1,98 +0,0 @@ -aLevels = json_decode($sJson, true); - if (!$this->aLevels) { - switch (json_last_error()) { - case JSON_ERROR_NONE: - break; - case JSON_ERROR_DEPTH: - fail('JSON error - Maximum stack depth exceeded'); - break; - case JSON_ERROR_STATE_MISMATCH: - fail('JSON error - Underflow or the modes mismatch'); - break; - case JSON_ERROR_CTRL_CHAR: - fail('JSON error - Unexpected control character found'); - break; - case JSON_ERROR_SYNTAX: - fail('JSON error - Syntax error, malformed JSON'); - break; - case JSON_ERROR_UTF8: - fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded'); - break; - default: - fail('JSON error - Unknown error'); - break; - } - } - } - - /** - * Dump the description into a database table. - * - * @param object $oDB Database conneciton to use. - * @param string $sTable Name of table to create. - * - * @return null - * - * A new table is created. Any previously existing table is dropped. - * The table has the following columns: - * country, class, type, rank_search, rank_address. - */ - public function createTable($oDB, $sTable) - { - $oDB->exec('DROP TABLE IF EXISTS '.$sTable); - $sSql = 'CREATE TABLE '.$sTable; - $sSql .= '(country_code varchar(2), class TEXT, type TEXT,'; - $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)'; - $oDB->exec($sSql); - - $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)'; - $oDB->exec($sSql); - - $sSql = 'INSERT INTO '.$sTable.' VALUES '; - foreach ($this->aLevels as $aLevel) { - $aCountries = array(); - if (isset($aLevel['countries'])) { - foreach ($aLevel['countries'] as $sCountry) { - $aCountries[$sCountry] = $oDB->getDBQuoted($sCountry); - } - } else { - $aCountries['NULL'] = 'NULL'; - } - foreach ($aLevel['tags'] as $sKey => $aValues) { - foreach ($aValues as $sValue => $mRanks) { - $aFields = array( - $oDB->getDBQuoted($sKey), - $sValue ? $oDB->getDBQuoted($sValue) : 'NULL' - ); - if (is_array($mRanks)) { - $aFields[] = (string) $mRanks[0]; - $aFields[] = (string) $mRanks[1]; - } else { - $aFields[] = (string) $mRanks; - $aFields[] = (string) $mRanks; - } - $sLine = ','.join(',', $aFields).'),'; - - foreach ($aCountries as $sCountries) { - $sSql .= '('.$sCountries.$sLine; - } - } - } - } - $oDB->exec(rtrim($sSql, ',')); - } -} diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index d17fdca7..a865b8f0 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -2,7 +2,6 @@ namespace Nominatim\Setup; -require_once(CONST_LibDir.'/setup/AddressLevelParser.php'); require_once(CONST_LibDir.'/Shell.php'); class SetupFunctions @@ -19,6 +18,7 @@ class SetupFunctions protected $bNoPartitions; protected $bDrop; protected $oDB = null; + protected $oNominatimCmd; public function __construct(array $aCMDResult) { @@ -81,6 +81,14 @@ class SetupFunctions } $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop']; + + $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); + if ($this->bQuiet) { + $this->oNominatimCmd->addParams('--quiet'); + } + if ($this->bVerbose) { + $this->oNominatimCmd->addParams('--verbose'); + } } public function createDB() @@ -256,8 +264,7 @@ class SetupFunctions $this->dropTable('search_name'); } - $oAlParser = new AddressLevelParser(getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json')); - $oAlParser->createTable($this->db(), 'address_levels'); + (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run(); } public function createTableTriggers() @@ -283,9 +290,7 @@ class SetupFunctions public function createPartitionFunctions() { info('Create Partition Functions'); - - $sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql'); - $this->pgsqlRunPartitionScript($sTemplate); + $this->createSqlFunctions(); // also create partition functions } public function importWikipediaArticles() @@ -549,19 +554,10 @@ class SetupFunctions { $this->checkModulePresence(); // raises exception on failure - $oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) - ->addParams('index'); - - if ($this->bQuiet) { - $oBaseCmd->addParams('-q'); - } - if ($this->bVerbose) { - $oBaseCmd->addParams('-v'); - } + $oBaseCmd = (clone $this->oNominatimCmd)->addParams('index'); info('Index ranks 0 - 4'); $oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4); - echo $oCmd->escapedCmd(); $iStatus = $oCmd->run(); if ($iStatus != 0) { @@ -790,43 +786,18 @@ class SetupFunctions private function createSqlFunctions() { - $sBasePath = CONST_DataDir.'/sql/functions/'; - $sTemplate = file_get_contents($sBasePath.'utils.sql'); - $sTemplate .= file_get_contents($sBasePath.'normalization.sql'); - $sTemplate .= file_get_contents($sBasePath.'ranking.sql'); - $sTemplate .= file_get_contents($sBasePath.'importance.sql'); - $sTemplate .= file_get_contents($sBasePath.'address_lookup.sql'); - $sTemplate .= file_get_contents($sBasePath.'interpolation.sql'); - if ($this->db()->tableExists('place')) { - $sTemplate .= file_get_contents($sBasePath.'place_triggers.sql'); - } - if ($this->db()->tableExists('placex')) { - $sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql'); - } - if ($this->db()->tableExists('location_postcode')) { - $sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql'); - } - $sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate); - if ($this->bEnableDiffUpdates) { - $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate); + $oCmd = (clone($this->oNominatimCmd)) + ->addParams('refresh', '--functions'); + + if (!$this->bEnableDiffUpdates) { + $oCmd->addParams('--no-diff-updates'); } + if ($this->bEnableDebugStatements) { - $sTemplate = str_replace('--DEBUG:', '', $sTemplate); - } - if (getSettingBool('LIMIT_REINDEXING')) { - $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate); - } - if (!getSettingBool('USE_US_TIGER_DATA')) { - $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate); - } - if (!getSettingBool('USE_AUX_LOCATION_DATA')) { - $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate); + $oCmd->addParams('--enable-debug-statements'); } - $sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false'; - $sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate); - - $this->pgsqlRunScript($sTemplate); + $oCmd->run(); } private function pgsqlRunPartitionScript($sTemplate) diff --git a/nominatim/cli.py b/nominatim/cli.py index 6c110ce7..4873308d 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -2,16 +2,21 @@ Command-line interface to the Nominatim functions for import, update, database administration and querying. """ -import sys +import datetime as dt import os +import sys +import time import argparse import logging from pathlib import Path from .config import Configuration from .tools.exec_utils import run_legacy_script, run_api_script +from .db.connection import connect +from .db import status +from .errors import UsageError -from .indexer.indexer import Indexer +LOG = logging.getLogger() def _num_system_cpus(): try: @@ -85,7 +90,27 @@ class CommandlineParser: args.config = Configuration(args.project_dir, args.data_dir / 'settings') - return args.command.run(args) + try: + return args.command.run(args) + except UsageError as exception: + log = logging.getLogger() + if log.isEnabledFor(logging.DEBUG): + raise # use Python's exception printing + log.fatal('FATAL: %s', exception) + + # If we get here, then execution has failed in some way. + return 1 + + +def _osm2pgsql_options_from_args(args, default_cache, default_threads): + """ Set up the stanadrd osm2pgsql from the command line arguments. + """ + return dict(osm2pgsql=args.osm2pgsql_path, + osm2pgsql_cache=args.osm2pgsql_cache or default_cache, + osm2pgsql_style=args.config.get_import_style_file(), + threads=args.threads or default_threads, + dsn=args.config.get_libpq_dsn(), + flatnode_file=args.config.FLATNODE_FILE) ##### Subcommand classes # @@ -98,6 +123,8 @@ class CommandlineParser: # # No need to document the functions each time. # pylint: disable=C0111 +# Using non-top-level imports to make pyosmium optional for replication only. +# pylint: disable=E0012,C0415 class SetupAll: @@ -230,26 +257,123 @@ class UpdateReplication: group.add_argument('--no-index', action='store_false', dest='do_index', help="""Do not index the new data. Only applicable together with --once""") + group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + help='Size of cache to be used by osm2pgsql (in MB)') + + @staticmethod + def _init_replication(args): + from .tools import replication, refresh + + LOG.warning("Initialising replication updates") + conn = connect(args.config.get_libpq_dsn()) + replication.init_replication(conn, base_url=args.config.REPLICATION_URL) + if args.update_functions: + LOG.warning("Create functions") + refresh.create_functions(conn, args.config, args.data_dir, + True, False) + conn.close() + return 0 + + + @staticmethod + def _check_for_updates(args): + from .tools import replication + + conn = connect(args.config.get_libpq_dsn()) + ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL) + conn.close() + return ret + + @staticmethod + def _report_update(batchdate, start_import, start_index): + def round_time(delta): + return dt.timedelta(seconds=int(delta.total_seconds())) + + end = dt.datetime.now(dt.timezone.utc) + LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.", + round_time((start_index or end) - start_import), + "Indexing: {} ".format(round_time(end - start_index)) + if start_index else '', + round_time(end - start_import), + round_time(end - batchdate)) + + @staticmethod + def _update(args): + from .tools import replication + from .indexer.indexer import Indexer + + params = _osm2pgsql_options_from_args(args, 2000, 1) + params.update(base_url=args.config.REPLICATION_URL, + update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'), + import_file=args.project_dir / 'osmosischange.osc', + max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'), + indexed_only=not args.once) + + # Sanity check to not overwhelm the Geofabrik servers. + if 'download.geofabrik.de'in params['base_url']\ + and params['update_interval'] < 86400: + LOG.fatal("Update interval too low for download.geofabrik.de.\n" + "Please check install documentation " + "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#" + "setting-up-the-update-process).") + raise UsageError("Invalid replication update interval setting.") + + if not args.once: + if not args.do_index: + LOG.fatal("Indexing cannot be disabled when running updates continuously.") + raise UsageError("Bad argument '--no-index'.") + recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL') + + while True: + conn = connect(args.config.get_libpq_dsn()) + start = dt.datetime.now(dt.timezone.utc) + state = replication.update(conn, params) + status.log_status(conn, start, 'import') + batchdate, _, _ = status.get_status(conn) + conn.close() + + if state is not replication.UpdateState.NO_CHANGES and args.do_index: + index_start = dt.datetime.now(dt.timezone.utc) + indexer = Indexer(args.config.get_libpq_dsn(), + args.threads or 1) + indexer.index_boundaries(0, 30) + indexer.index_by_rank(0, 30) + + conn = connect(args.config.get_libpq_dsn()) + status.set_indexed(conn, True) + status.log_status(conn, index_start, 'index') + conn.close() + else: + index_start = None + + if LOG.isEnabledFor(logging.WARNING): + UpdateReplication._report_update(batchdate, start, index_start) + + if args.once: + break + + if state is replication.UpdateState.NO_CHANGES: + LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval) + time.sleep(recheck_interval) + + return state.value @staticmethod def run(args): - params = ['update.php'] + try: + import osmium # pylint: disable=W0611 + except ModuleNotFoundError: + LOG.fatal("pyosmium not installed. Replication functions not available.\n" + "To install pyosmium via pip: pip3 install osmium") + return 1 + if args.init: - params.append('--init-updates') - if not args.update_functions: - params.append('--no-update-functions') - elif args.check_for_updates: - params.append('--check-for-updates') - else: - if args.once: - params.append('--import-osmosis') - else: - params.append('--import-osmosis-all') - if not args.do_index: - params.append('--no-index') + return UpdateReplication._init_replication(args) - return run_legacy_script(*params, nominatim_env=args) + if args.check_for_updates: + return UpdateReplication._check_for_updates(args) + return UpdateReplication._update(args) class UpdateAddData: """\ @@ -320,6 +444,8 @@ class UpdateIndex: @staticmethod def run(args): + from .indexer.indexer import Indexer + indexer = Indexer(args.config.get_libpq_dsn(), args.threads or _num_system_cpus() or 1) @@ -328,8 +454,11 @@ class UpdateIndex: if not args.boundaries_only: indexer.index_by_rank(args.minrank, args.maxrank) - if not args.no_boundaries and not args.boundaries_only: - indexer.update_status_table() + if not args.no_boundaries and not args.boundaries_only \ + and args.minrank == 0 and args.maxrank == 30: + conn = connect(args.config.get_libpq_dsn()) + status.set_indexed(conn, True) + conn.close() return 0 @@ -366,22 +495,34 @@ class UpdateRefresh: @staticmethod def run(args): + from .tools import refresh + if args.postcodes: - run_legacy_script('update.php', '--calculate-postcodes', - nominatim_env=args, throw_on_fail=True) + LOG.warning("Update postcodes centroid") + conn = connect(args.config.get_libpq_dsn()) + refresh.update_postcodes(conn, args.data_dir) + conn.close() + if args.word_counts: - run_legacy_script('update.php', '--recompute-word-counts', - nominatim_env=args, throw_on_fail=True) + LOG.warning('Recompute frequency of full-word search terms') + conn = connect(args.config.get_libpq_dsn()) + refresh.recompute_word_counts(conn, args.data_dir) + conn.close() + if args.address_levels: - run_legacy_script('update.php', '--update-address-levels', - nominatim_env=args, throw_on_fail=True) + cfg = Path(args.config.ADDRESS_LEVEL_CONFIG) + LOG.warning('Updating address levels from %s', cfg) + conn = connect(args.config.get_libpq_dsn()) + refresh.load_address_levels_from_file(conn, cfg) + conn.close() + if args.functions: - params = ['setup.php', '--create-functions', '--create-partition-functions'] - if args.diffs: - params.append('--enable-diff-updates') - if args.enable_debug_statements: - params.append('--enable-debug-statements') - run_legacy_script(*params, nominatim_env=args, throw_on_fail=True) + LOG.warning('Create functions') + conn = connect(args.config.get_libpq_dsn()) + refresh.create_functions(conn, args.config, args.data_dir, + args.diffs, args.enable_debug_statements) + conn.close() + if args.wiki_data: run_legacy_script('setup.php', '--import-wikipedia-articles', nominatim_env=args, throw_on_fail=True) @@ -392,6 +533,7 @@ class UpdateRefresh: if args.website: run_legacy_script('setup.php', '--setup-website', nominatim_env=args, throw_on_fail=True) + return 0 diff --git a/nominatim/config.py b/nominatim/config.py index 458c828f..4de2052e 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -1,10 +1,16 @@ """ Nominatim configuration accessor. """ +import logging import os +from pathlib import Path from dotenv import dotenv_values +from .errors import UsageError + +LOG = logging.getLogger() + class Configuration: """ Load and manage the project configuration. @@ -20,27 +26,75 @@ class Configuration: """ def __init__(self, project_dir, config_dir): + self.project_dir = project_dir + self.config_dir = config_dir self._config = dotenv_values(str((config_dir / 'env.defaults').resolve())) if project_dir is not None: self._config.update(dotenv_values(str((project_dir / '.env').resolve()))) + # Add defaults for variables that are left empty to set the default. + # They may still be overwritten by environment variables. + if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']: + self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \ + str(config_dir / 'address-levels.json') + + def __getattr__(self, name): name = 'NOMINATIM_' + name return os.environ.get(name) or self._config[name] + def get_bool(self, name): + """ Return the given configuration parameter as a boolean. + Values of '1', 'yes' and 'true' are accepted as truthy values, + everything else is interpreted as false. + """ + return self.__getattr__(name).lower() in ('1', 'yes', 'true') + + + def get_int(self, name): + """ Return the given configuration parameter as an int. + """ + try: + return int(self.__getattr__(name)) + except ValueError: + LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name) + raise UsageError("Configuration error.") + + def get_libpq_dsn(self): """ Get configured database DSN converted into the key/value format understood by libpq and psycopg. """ dsn = self.DATABASE_DSN + def quote_param(param): + key, val = param.split('=') + val = val.replace('\\', '\\\\').replace("'", "\\'") + if ' ' in val: + val = "'" + val + "'" + return key + '=' + val + if dsn.startswith('pgsql:'): # Old PHP DSN format. Convert before returning. - return dsn[6:].replace(';', ' ') + return ' '.join([quote_param(p) for p in dsn[6:].split(';')]) return dsn + + def get_import_style_file(self): + """ Return the import style file as a path object. Translates the + name of the standard styles automatically into a file in the + config style. + """ + style = self.__getattr__('IMPORT_STYLE') + + if style in ('admin', 'street', 'address', 'full', 'extratags'): + return self.config_dir / 'import-{}.style'.format(style) + + return Path(style) + + def get_os_env(self): """ Return a copy of the OS environment with the Nominatim configuration merged in. diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py new file mode 100644 index 00000000..4d30151d --- /dev/null +++ b/nominatim/db/connection.py @@ -0,0 +1,58 @@ +""" +Specialised connection and cursor functions. +""" +import logging + +import psycopg2 +import psycopg2.extensions +import psycopg2.extras + +class _Cursor(psycopg2.extras.DictCursor): + """ A cursor returning dict-like objects and providing specialised + execution functions. + """ + + def execute(self, query, args=None): # pylint: disable=W0221 + """ Query execution that logs the SQL query when debugging is enabled. + """ + logger = logging.getLogger() + logger.debug(self.mogrify(query, args).decode('utf-8')) + + super().execute(query, args) + + def scalar(self, sql, args=None): + """ Execute query that returns a single value. The value is returned. + If the query yields more than one row, a ValueError is raised. + """ + self.execute(sql, args) + + if self.rowcount != 1: + raise RuntimeError("Query did not return a single row.") + + return self.fetchone()[0] + + +class _Connection(psycopg2.extensions.connection): + """ A connection that provides the specialised cursor by default and + adds convenience functions for administrating the database. + """ + + def cursor(self, cursor_factory=_Cursor, **kwargs): + """ Return a new cursor. By default the specialised cursor is returned. + """ + return super().cursor(cursor_factory=cursor_factory, **kwargs) + + def table_exists(self, table): + """ Check that a table with the given name exists in the database. + """ + with self.cursor() as cur: + num = cur.scalar("""SELECT count(*) FROM pg_tables + WHERE tablename = %s""", (table, )) + return num == 1 + + +def connect(dsn): + """ Open a connection to the database using the specialised connection + factory. + """ + return psycopg2.connect(dsn, connection_factory=_Connection) diff --git a/nominatim/db/status.py b/nominatim/db/status.py new file mode 100644 index 00000000..75da3c16 --- /dev/null +++ b/nominatim/db/status.py @@ -0,0 +1,82 @@ +""" +Access and helper functions for the status and status log table. +""" +import datetime as dt +import logging +import re + +from ..tools.exec_utils import get_url +from ..errors import UsageError + +LOG = logging.getLogger() + +def compute_database_date(conn): + """ Determine the date of the database from the newest object in the + data base. + """ + # First, find the node with the highest ID in the database + with conn.cursor() as cur: + osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'") + + if osmid is None: + LOG.fatal("No data found in the database.") + raise UsageError("No data found in the database.") + + LOG.info("Using node id %d for timestamp lookup", osmid) + # Get the node from the API to find the timestamp when it was created. + node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid) + data = get_url(node_url) + + match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data) + + if match is None: + LOG.fatal("The node data downloaded from the API does not contain valid data.\n" + "URL used: %s", node_url) + raise UsageError("Bad API data.") + + LOG.debug("Found timestamp %s", match[1]) + + return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc) + + +def set_status(conn, date, seq=None, indexed=True): + """ Replace the current status with the given status. + """ + assert date.tzinfo == dt.timezone.utc + with conn.cursor() as cur: + cur.execute("TRUNCATE TABLE import_status") + cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed) + VALUES (%s, %s, %s)""", (date, seq, indexed)) + + conn.commit() + + +def get_status(conn): + """ Return the current status as a triple of (date, sequence, indexed). + If status has not been set up yet, a triple of None is returned. + """ + with conn.cursor() as cur: + cur.execute("SELECT * FROM import_status LIMIT 1") + if cur.rowcount < 1: + return None, None, None + + row = cur.fetchone() + return row['lastimportdate'], row['sequence_id'], row['indexed'] + + +def set_indexed(conn, state): + """ Set the indexed flag in the status table to the given state. + """ + with conn.cursor() as cur: + cur.execute("UPDATE import_status SET indexed = %s", (state, )) + conn.commit() + + +def log_status(conn, start, event, batchsize=None): + """ Write a new status line to the `import_osmosis_log` table. + """ + with conn.cursor() as cur: + cur.execute("""INSERT INTO import_osmosis_log + (batchend, batchseq, batchsize, starttime, endtime, event) + SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""", + (batchsize, start, event)) diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py new file mode 100644 index 00000000..abd72519 --- /dev/null +++ b/nominatim/db/utils.py @@ -0,0 +1,12 @@ +""" +Helper functions for handling DB accesses. +""" + +def execute_file(conn, fname): + """ Read an SQL file and run its contents against the given connection. + """ + with fname.open('r') as fdesc: + sql = fdesc.read() + with conn.cursor() as cur: + cur.execute(sql) + conn.commit() diff --git a/nominatim/errors.py b/nominatim/errors.py new file mode 100644 index 00000000..e77f956a --- /dev/null +++ b/nominatim/errors.py @@ -0,0 +1,8 @@ +""" +Custom exception and error classes for Nominatim. +""" + +class UsageError(Exception): + """ An error raised because of bad user input. This error will usually + not cause a stack trace to be printed unless debugging is enabled. + """ diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 9e16e293..03bed986 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -2,9 +2,17 @@ Helper functions for executing external programs. """ import logging +import os import subprocess +import urllib.request as urlrequest from urllib.parse import urlencode +from psycopg2.extensions import parse_dsn + +from ..version import NOMINATIM_VERSION + +LOG = logging.getLogger() + def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False): """ Run a Nominatim PHP script with the given arguments. @@ -80,3 +88,51 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, print(result[content_start + 4:].replace('\\n', '\n')) return 0 + + +def run_osm2pgsql(options): + """ Run osm2pgsql with the given options. + """ + env = os.environ + cmd = [options['osm2pgsql'], + '--hstore', '--latlon', '--slim', + '--with-forward-dependencies', 'false', + '--log-progress', 'true', + '--number-processes', str(options['threads']), + '--cache', str(options['osm2pgsql_cache']), + '--output', 'gazetteer', + '--style', str(options['osm2pgsql_style']) + ] + if options['append']: + cmd.append('--append') + + if options['flatnode_file']: + cmd.extend(('--flat-nodes', options['flatnode_file'])) + + dsn = parse_dsn(options['dsn']) + if 'password' in dsn: + env['PGPASSWORD'] = dsn['password'] + if 'dbname' in dsn: + cmd.extend(('-d', dsn['dbname'])) + if 'user' in dsn: + cmd.extend(('--username', dsn['user'])) + for param in ('host', 'port'): + if param in dsn: + cmd.extend(('--' + param, dsn[param])) + + cmd.append(str(options['import_file'])) + + subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True) + + +def get_url(url): + """ Get the contents from the given URL and return it as a UTF-8 string. + """ + headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION} + + try: + with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: + return response.read().decode('utf-8') + except: + LOG.fatal('Failed to load URL: %s', url) + raise diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py new file mode 100644 index 00000000..5fbb07f8 --- /dev/null +++ b/nominatim/tools/refresh.py @@ -0,0 +1,169 @@ +""" +Functions for bringing auxiliary data in the database up-to-date. +""" +import json +import re + +from psycopg2.extras import execute_values + +from ..db.utils import execute_file + +def update_postcodes(conn, datadir): + """ Recalculate postcode centroids and add, remove and update entries in the + location_postcode table. `conn` is an opne connection to the database. + """ + execute_file(conn, datadir / 'sql' / 'update-postcodes.sql') + + +def recompute_word_counts(conn, datadir): + """ Compute the frequency of full-word search terms. + """ + execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql') + + +def _add_address_level_rows_from_entry(rows, entry): + """ Converts a single entry from the JSON format for address rank + descriptions into a flat format suitable for inserting into a + PostgreSQL table and adds these lines to `rows`. + """ + countries = entry.get('countries') or (None, ) + for key, values in entry['tags'].items(): + for value, ranks in values.items(): + if isinstance(ranks, list): + rank_search, rank_address = ranks + else: + rank_search = rank_address = ranks + if not value: + value = None + for country in countries: + rows.append((country, key, value, rank_search, rank_address)) + +def load_address_levels(conn, table, levels): + """ Replace the `address_levels` table with the contents of `levels'. + + A new table is created any previously existing table is dropped. + The table has the following columns: + country, class, type, rank_search, rank_address + """ + rows = [] + for entry in levels: + _add_address_level_rows_from_entry(rows, entry) + + with conn.cursor() as cur: + cur.execute('DROP TABLE IF EXISTS {}'.format(table)) + + cur.execute("""CREATE TABLE {} (country_code varchar(2), + class TEXT, + type TEXT, + rank_search SMALLINT, + rank_address SMALLINT)""".format(table)) + + execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows) + + cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) + + conn.commit() + +def load_address_levels_from_file(conn, config_file): + """ Replace the `address_levels` table with the contents of the config + file. + """ + with config_file.open('r') as fdesc: + load_address_levels(conn, 'address_levels', json.load(fdesc)) + +PLPGSQL_BASE_MODULES = ( + 'utils.sql', + 'normalization.sql', + 'ranking.sql', + 'importance.sql', + 'address_lookup.sql', + 'interpolation.sql' +) + +PLPGSQL_TABLE_MODULES = ( + ('place', 'place_triggers.sql'), + ('placex', 'placex_triggers.sql'), + ('location_postcode', 'postcode_triggers.sql') +) + +def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug): + """ Read all applicable SQLs containing PL/pgSQL functions, replace + placefolders and execute them. + """ + sql_func_dir = sql_dir / 'functions' + sql = '' + + # Get the basic set of functions that is always imported. + for sql_file in PLPGSQL_BASE_MODULES: + with (sql_func_dir / sql_file).open('r') as fdesc: + sql += fdesc.read() + + # Some files require the presence of a certain table + for table, fname in PLPGSQL_TABLE_MODULES: + if conn.table_exists(table): + with (sql_func_dir / fname).open('r') as fdesc: + sql += fdesc.read() + + # Replace placeholders. + sql = sql.replace('{modulepath}', + config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve())) + + if enable_diff_updates: + sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--') + + if enable_debug: + sql = sql.replace('--DEBUG:', '') + + if config.get_bool('LIMIT_REINDEXING'): + sql = sql.replace('--LIMIT INDEXING:', '') + + if not config.get_bool('USE_US_TIGER_DATA'): + sql = sql.replace('-- %NOTIGERDATA% ', '') + + if not config.get_bool('USE_AUX_LOCATION_DATA'): + sql = sql.replace('-- %NOAUXDATA% ', '') + + reverse_only = 'false' if conn.table_exists('search_name') else 'true' + + return sql.replace('%REVERSE-ONLY%', reverse_only) + + +def replace_partition_string(sql, partitions): + """ Replace a partition template with the actual partition code. + """ + for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S): + repl = '' + for part in partitions: + repl += match.replace('-partition-', str(part)) + sql = sql.replace(match, repl) + + return sql + +def _get_partition_function_sql(conn, sql_dir): + """ Create functions that work on partition tables. + """ + with conn.cursor() as cur: + cur.execute('SELECT distinct partition FROM country_name') + partitions = set([0]) + for row in cur: + partitions.add(row[0]) + + with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc: + sql = fdesc.read() + + return replace_partition_string(sql, sorted(partitions)) + +def create_functions(conn, config, data_dir, + enable_diff_updates=True, enable_debug=False): + """ (Re)create the PL/pgSQL functions. + """ + sql_dir = data_dir / 'sql' + + sql = _get_standard_function_sql(conn, config, sql_dir, + enable_diff_updates, enable_debug) + sql += _get_partition_function_sql(conn, sql_dir) + + with conn.cursor() as cur: + cur.execute(sql) + + conn.commit() diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py new file mode 100644 index 00000000..c7d0d3e5 --- /dev/null +++ b/nominatim/tools/replication.py @@ -0,0 +1,119 @@ +""" +Functions for updating a database from a replication source. +""" +import datetime as dt +from enum import Enum +import logging +import time + +from osmium.replication.server import ReplicationServer +from osmium import WriteHandler + +from ..db import status +from .exec_utils import run_osm2pgsql +from ..errors import UsageError + +LOG = logging.getLogger() + +def init_replication(conn, base_url): + """ Set up replication for the server at the given base URL. + """ + LOG.info("Using replication source: %s", base_url) + date = status.compute_database_date(conn) + + # margin of error to make sure we get all data + date -= dt.timedelta(hours=3) + + repl = ReplicationServer(base_url) + + seq = repl.timestamp_to_sequence(date) + + if seq is None: + LOG.fatal("Cannot reach the configured replication service '%s'.\n" + "Does the URL point to a directory containing OSM update data?", + base_url) + raise UsageError("Failed to reach replication service") + + status.set_status(conn, date=date, seq=seq) + + LOG.warning("Updates intialised at sequence %s (%s)", seq, date) + + +def check_for_updates(conn, base_url): + """ Check if new data is available from the replication service at the + given base URL. + """ + _, seq, _ = status.get_status(conn) + + if seq is None: + LOG.error("Replication not set up. " + "Please run 'nominatim replication --init' first.") + return 254 + + state = ReplicationServer(base_url).get_state_info() + + if state is None: + LOG.error("Cannot get state for URL %s.", base_url) + return 253 + + if state.sequence <= seq: + LOG.warning("Database is up to date.") + return 2 + + LOG.warning("New data available (%i => %i).", seq, state.sequence) + return 0 + +class UpdateState(Enum): + """ Possible states after an update has run. + """ + + UP_TO_DATE = 0 + MORE_PENDING = 2 + NO_CHANGES = 3 + + +def update(conn, options): + """ Update database from the next batch of data. Returns the state of + updates according to `UpdateState`. + """ + startdate, startseq, indexed = status.get_status(conn) + + if startseq is None: + LOG.error("Replication not set up. " + "Please run 'nominatim replication --init' first.") + raise UsageError("Replication not set up.") + + if not indexed and options['indexed_only']: + LOG.info("Skipping update. There is data that needs indexing.") + return UpdateState.MORE_PENDING + + last_since_update = dt.datetime.now(dt.timezone.utc) - startdate + update_interval = dt.timedelta(seconds=options['update_interval']) + if last_since_update < update_interval: + duration = (update_interval - last_since_update).seconds + LOG.warning("Sleeping for %s sec before next update.", duration) + time.sleep(duration) + + if options['import_file'].exists(): + options['import_file'].unlink() + + # Read updates into file. + repl = ReplicationServer(options['base_url']) + + outhandler = WriteHandler(str(options['import_file'])) + endseq = repl.apply_diffs(outhandler, startseq, + max_size=options['max_diff_size'] * 1024) + outhandler.close() + + if endseq is None: + return UpdateState.NO_CHANGES + + # Consume updates with osm2pgsql. + options['append'] = True + run_osm2pgsql(options) + + # Write the current status to the file + endstate = repl.get_state_info(endseq) + status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False) + + return UpdateState.UP_TO_DATE diff --git a/nominatim/version.py b/nominatim/version.py new file mode 100644 index 00000000..a2ddc9fa --- /dev/null +++ b/nominatim/version.py @@ -0,0 +1,5 @@ +""" +Version information for Nominatim. +""" + +NOMINATIM_VERSION = "3.6.0" diff --git a/settings/env.defaults b/settings/env.defaults index fbad3e33..e2eda340 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -57,6 +57,9 @@ NOMINATIM_HTTP_PROXY_HOST=proxy.mydomain.com NOMINATIM_HTTP_PROXY_PORT=3128 NOMINATIM_HTTP_PROXY_LOGIN= NOMINATIM_HTTP_PROXY_PASSWORD= +# Also set these standard environment variables. +# HTTP_PROXY="http://user:pass@10.10.1.10:1080" +# HTTPS_PROXY="http://user:pass@10.10.1.10:1080" # Location of the osm2pgsql binary. # When empty, osm2pgsql is expected to reside in the osm2pgsql directory in @@ -64,10 +67,6 @@ NOMINATIM_HTTP_PROXY_PASSWORD= # EXPERT ONLY. You should usually use the supplied osm2pgsql. NOMINATIM_OSM2PGSQL_BINARY= -# Location of pyosmium-get-changes. -# Only needed when running updates. -NOMINATIM_PYOSMIUM_BINARY= - # Directory where to find US Tiger data files to import. # Used with setup.php --import-tiger-data. When unset, the data is expected # to be located under 'data/tiger' in the source tree. diff --git a/sql/tables.sql b/sql/tables.sql index 5686bcd2..8647e304 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -1,6 +1,6 @@ drop table if exists import_status; CREATE TABLE import_status ( - lastimportdate timestamp NOT NULL, + lastimportdate timestamp with time zone NOT NULL, sequence_id integer, indexed boolean ); diff --git a/test/python/conftest.py b/test/python/conftest.py index 1cc9ef9c..8b0ba145 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -1,26 +1,155 @@ +import itertools import sys from pathlib import Path import psycopg2 +import psycopg2.extras import pytest +SRC_DIR = Path(__file__) / '..' / '..' / '..' + # always test against the source -sys.path.insert(0, str((Path(__file__) / '..' / '..' / '..').resolve())) +sys.path.insert(0, str(SRC_DIR.resolve())) + +from nominatim.config import Configuration +from nominatim.db import connection + +class _TestingCursor(psycopg2.extras.DictCursor): + """ Extension to the DictCursor class that provides execution + short-cuts that simplify writing assertions. + """ + + def scalar(self, sql, params=None): + """ Execute a query with a single return value and return this value. + Raises an assertion when not exactly one row is returned. + """ + self.execute(sql, params) + assert self.rowcount == 1 + return self.fetchone()[0] + + def row_set(self, sql, params=None): + """ Execute a query and return the result as a set of tuples. + """ + self.execute(sql, params) + if self.rowcount == 1: + return set(tuple(self.fetchone())) + + return set((tuple(row) for row in self)) @pytest.fixture def temp_db(monkeypatch): + """ Create an empty database for the test. The database name is also + exported into NOMINATIM_DATABASE_DSN. + """ name = 'test_nominatim_python_unittest' - with psycopg2.connect(database='postgres') as conn: - conn.set_isolation_level(0) - with conn.cursor() as cur: - cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) - cur.execute('CREATE DATABASE {}'.format(name)) + conn = psycopg2.connect(database='postgres') + + conn.set_isolation_level(0) + with conn.cursor() as cur: + cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) + cur.execute('CREATE DATABASE {}'.format(name)) + + conn.close() monkeypatch.setenv('NOMINATIM_DATABASE_DSN' , 'dbname=' + name) yield name - with psycopg2.connect(database='postgres') as conn: - conn.set_isolation_level(0) - with conn.cursor() as cur: - cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) + conn = psycopg2.connect(database='postgres') + + conn.set_isolation_level(0) + with conn.cursor() as cur: + cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) + + conn.close() + +@pytest.fixture +def temp_db_with_extensions(temp_db): + conn = psycopg2.connect(database=temp_db) + with conn.cursor() as cur: + cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;') + conn.commit() + conn.close() + + return temp_db + +@pytest.fixture +def temp_db_conn(temp_db): + """ Connection to the test database. + """ + conn = connection.connect('dbname=' + temp_db) + yield conn + conn.close() + + +@pytest.fixture +def temp_db_cursor(temp_db): + """ Connection and cursor towards the test database. The connection will + be in auto-commit mode. + """ + conn = psycopg2.connect('dbname=' + temp_db) + conn.set_isolation_level(0) + with conn.cursor(cursor_factory=_TestingCursor) as cur: + yield cur + conn.close() + + +@pytest.fixture +def def_config(): + return Configuration(None, SRC_DIR.resolve() / 'settings') + + +@pytest.fixture +def status_table(temp_db_conn): + """ Create an empty version of the status table and + the status logging table. + """ + with temp_db_conn.cursor() as cur: + cur.execute("""CREATE TABLE import_status ( + lastimportdate timestamp with time zone NOT NULL, + sequence_id integer, + indexed boolean + )""") + cur.execute("""CREATE TABLE import_osmosis_log ( + batchend timestamp, + batchseq integer, + batchsize bigint, + starttime timestamp, + endtime timestamp, + event text + )""") + temp_db_conn.commit() + + +@pytest.fixture +def place_table(temp_db_with_extensions, temp_db_conn): + """ Create an empty version of the place table. + """ + with temp_db_conn.cursor() as cur: + cur.execute("""CREATE TABLE place ( + osm_id int8 NOT NULL, + osm_type char(1) NOT NULL, + class text NOT NULL, + type text NOT NULL, + name hstore, + admin_level smallint, + address hstore, + extratags hstore, + geometry Geometry(Geometry,4326) NOT NULL)""") + temp_db_conn.commit() + + +@pytest.fixture +def place_row(place_table, temp_db_cursor): + """ A factory for rows in the place table. The table is created as a + prerequisite to the fixture. + """ + idseq = itertools.count(1001) + def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None, + admin_level=None, address=None, extratags=None, geom=None): + temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", + (osm_id or next(idseq), osm_type, cls, typ, names, + admin_level, address, extratags, + geom or 'SRID=4326;POINT(0 0 )')) + + return _insert diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 92b42372..cde84759 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -1,10 +1,21 @@ """ Tests for command line interface wrapper. + +These tests just check that the various command line parameters route to the +correct functionionality. They use a lot of monkeypatching to avoid executing +the actual functions. """ +import datetime as dt import psycopg2 import pytest +import time import nominatim.cli +import nominatim.indexer.indexer +import nominatim.tools.refresh +import nominatim.tools.replication +from nominatim.errors import UsageError +from nominatim.db import status def call_nominatim(*args): return nominatim.cli.nominatim(module_dir='build/module', @@ -18,9 +29,9 @@ class MockParamCapture: """ Mock that records the parameters with which a function was called as well as the number of calls. """ - def __init__(self): + def __init__(self, retval=0): self.called = 0 - self.return_value = 0 + self.return_value = retval def __call__(self, *args, **kwargs): self.called += 1 @@ -54,7 +65,6 @@ def test_cli_help(capsys): (('import', '--continue', 'load-data'), 'setup'), (('freeze',), 'setup'), (('special-phrases',), 'specialphrases'), - (('replication',), 'update'), (('add-data', '--tiger-data', 'tiger'), 'setup'), (('add-data', '--file', 'foo.osm'), 'update'), (('check-database',), 'check_import_finished'), @@ -82,14 +92,12 @@ def test_add_data_command(mock_run_legacy, name, oid): (['--boundaries-only'], 1, 0), (['--no-boundaries'], 0, 1), (['--boundaries-only', '--no-boundaries'], 0, 0)]) -def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks): - with psycopg2.connect(database=temp_db) as conn: - with conn.cursor() as cur: - cur.execute("CREATE TABLE import_status (indexed bool)") +def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): + temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)") bnd_mock = MockParamCapture() - monkeypatch.setattr(nominatim.cli.Indexer, 'index_boundaries', bnd_mock) + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock) rank_mock = MockParamCapture() - monkeypatch.setattr(nominatim.cli.Indexer, 'index_by_rank', rank_mock) + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock) assert 0 == call_nominatim('index', *params) @@ -98,29 +106,115 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks): @pytest.mark.parametrize("command,params", [ - ('postcodes', ('update.php', '--calculate-postcodes')), - ('word-counts', ('update.php', '--recompute-word-counts')), - ('address-levels', ('update.php', '--update-address-levels')), - ('functions', ('setup.php',)), ('wiki-data', ('setup.php', '--import-wikipedia-articles')), ('importance', ('update.php', '--recompute-importance')), ('website', ('setup.php', '--setup-website')), ]) -def test_refresh_command(mock_run_legacy, command, params): +def test_refresh_legacy_command(mock_run_legacy, temp_db, command, params): assert 0 == call_nominatim('refresh', '--' + command) assert mock_run_legacy.called == 1 assert len(mock_run_legacy.last_args) >= len(params) assert mock_run_legacy.last_args[:len(params)] == params +@pytest.mark.parametrize("command,func", [ + ('postcodes', 'update_postcodes'), + ('word-counts', 'recompute_word_counts'), + ('address-levels', 'load_address_levels_from_file'), + ('functions', 'create_functions'), + ]) +def test_refresh_command(monkeypatch, temp_db, command, func): + func_mock = MockParamCapture() + monkeypatch.setattr(nominatim.tools.refresh, func, func_mock) -def test_refresh_importance_computed_after_wiki_import(mock_run_legacy): + assert 0 == call_nominatim('refresh', '--' + command) + assert func_mock.called == 1 + + +def test_refresh_importance_computed_after_wiki_import(mock_run_legacy, temp_db): assert 0 == call_nominatim('refresh', '--importance', '--wiki-data') assert mock_run_legacy.called == 2 assert mock_run_legacy.last_args == ('update.php', '--recompute-importance') +@pytest.mark.parametrize("params,func", [ + (('--init', '--no-update-functions'), 'init_replication'), + (('--check-for-updates',), 'check_for_updates') + ]) +def test_replication_command(monkeypatch, temp_db, params, func): + func_mock = MockParamCapture() + monkeypatch.setattr(nominatim.tools.replication, func, func_mock) + + assert 0 == call_nominatim('replication', *params) + assert func_mock.called == 1 + + +def test_replication_update_bad_interval(monkeypatch, temp_db): + monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx') + + assert call_nominatim('replication') == 1 + + +def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db): + monkeypatch.setenv('NOMINATIM_REPLICATION_URL', + 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates') + + assert call_nominatim('replication') == 1 + + +@pytest.mark.parametrize("state, retval", [ + (nominatim.tools.replication.UpdateState.UP_TO_DATE, 0), + (nominatim.tools.replication.UpdateState.NO_CHANGES, 3) + ]) +def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn, + status_table, state, retval): + status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) + func_mock = MockParamCapture(retval=state) + monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock) + + assert retval == call_nominatim('replication', '--once', '--no-index') + + +def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table): + status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) + states = [nominatim.tools.replication.UpdateState.UP_TO_DATE, + nominatim.tools.replication.UpdateState.UP_TO_DATE] + monkeypatch.setattr(nominatim.tools.replication, 'update', + lambda *args, **kwargs: states.pop()) + + index_mock = MockParamCapture() + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock) + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock) + + with pytest.raises(IndexError): + call_nominatim('replication') + + assert index_mock.called == 4 + + +def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table): + status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) + states = [nominatim.tools.replication.UpdateState.NO_CHANGES, + nominatim.tools.replication.UpdateState.UP_TO_DATE] + monkeypatch.setattr(nominatim.tools.replication, 'update', + lambda *args, **kwargs: states.pop()) + + index_mock = MockParamCapture() + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock) + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock) + + sleep_mock = MockParamCapture() + monkeypatch.setattr(time, 'sleep', sleep_mock) + + with pytest.raises(IndexError): + call_nominatim('replication') + + assert index_mock.called == 2 + assert sleep_mock.called == 1 + assert sleep_mock.last_args[0] == 60 + + @pytest.mark.parametrize("params", [ ('search', '--query', 'new'), ('reverse', '--lat', '0', '--lon', '0'), diff --git a/test/python/test_config.py b/test/python/test_config.py index bada9d86..4578be13 100644 --- a/test/python/test_config.py +++ b/test/python/test_config.py @@ -7,6 +7,7 @@ import tempfile import pytest from nominatim.config import Configuration +from nominatim.errors import UsageError DEFCFG_DIR = Path(__file__) / '..' / '..' / '..' / 'settings' @@ -15,6 +16,7 @@ def test_no_project_dir(): assert config.DATABASE_WEBUSER == 'www-data' + def test_prefer_project_setting_over_default(): with tempfile.TemporaryDirectory() as project_dir: with open(project_dir + '/.env', 'w') as envfile: @@ -24,6 +26,7 @@ def test_prefer_project_setting_over_default(): assert config.DATABASE_WEBUSER == 'apache' + def test_prefer_os_environ_over_project_setting(monkeypatch): with tempfile.TemporaryDirectory() as project_dir: with open(project_dir + '/.env', 'w') as envfile: @@ -35,6 +38,7 @@ def test_prefer_os_environ_over_project_setting(monkeypatch): assert config.DATABASE_WEBUSER == 'nobody' + def test_get_os_env_add_defaults(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -42,6 +46,7 @@ def test_get_os_env_add_defaults(monkeypatch): assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data' + def test_get_os_env_prefer_os_environ(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -49,11 +54,13 @@ def test_get_os_env_prefer_os_environ(monkeypatch): assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody' + def test_get_libpq_dsn_convert_default(): config = Configuration(None, DEFCFG_DIR) assert config.get_libpq_dsn() == 'dbname=nominatim' + def test_get_libpq_dsn_convert_php(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -62,6 +69,19 @@ def test_get_libpq_dsn_convert_php(monkeypatch): assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost' + +@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"), + ("xy'z", "xy\\'z"), + ]) +def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_DATABASE_DSN', + 'pgsql:dbname=gis;password={}'.format(val)) + + assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect) + + def test_get_libpq_dsn_convert_libpq(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -69,3 +89,68 @@ def test_get_libpq_dsn_convert_libpq(monkeypatch): 'host=localhost dbname=gis password=foo') assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo' + + +@pytest.mark.parametrize("value,result", + [(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] + + [(x, False) for x in ('0', 'false', 'no', 'NO', 'x')]) +def test_get_bool(monkeypatch, value, result): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_FOOBAR', value) + + assert config.get_bool('FOOBAR') == result + +def test_get_bool_empty(): + config = Configuration(None, DEFCFG_DIR) + + assert config.DATABASE_MODULE_PATH == '' + assert config.get_bool('DATABASE_MODULE_PATH') == False + + +@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1), + ('85762513444', 85762513444)]) +def test_get_int_success(monkeypatch, value, result): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_FOOBAR', value) + + assert config.get_int('FOOBAR') == result + + +@pytest.mark.parametrize("value", ['1b', 'fg', '0x23']) +def test_get_int_bad_values(monkeypatch, value): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_FOOBAR', value) + + with pytest.raises(UsageError): + config.get_int('FOOBAR') + + +def test_get_int_empty(): + config = Configuration(None, DEFCFG_DIR) + + assert config.DATABASE_MODULE_PATH == '' + + with pytest.raises(UsageError): + config.get_int('DATABASE_MODULE_PATH') + + +def test_get_import_style_intern(monkeypatch): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street') + + expected = DEFCFG_DIR / 'import-street.style' + + assert config.get_import_style_file() == expected + + +@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye']) +def test_get_import_style_intern(monkeypatch, value): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value) + + assert str(config.get_import_style_file()) == value diff --git a/test/python/test_db_connection.py b/test/python/test_db_connection.py new file mode 100644 index 00000000..ef1ae741 --- /dev/null +++ b/test/python/test_db_connection.py @@ -0,0 +1,32 @@ +""" +Tests for specialised conenction and cursor classes. +""" +import pytest + +from nominatim.db.connection import connect + +@pytest.fixture +def db(temp_db): + conn = connect('dbname=' + temp_db) + yield conn + conn.close() + + +def test_connection_table_exists(db, temp_db_cursor): + assert db.table_exists('foobar') == False + + temp_db_cursor.execute('CREATE TABLE foobar (id INT)') + + assert db.table_exists('foobar') == True + + +def test_cursor_scalar(db, temp_db_cursor): + temp_db_cursor.execute('CREATE TABLE dummy (id INT)') + + with db.cursor() as cur: + assert cur.scalar('SELECT count(*) FROM dummy') == 0 + +def test_cursor_scalar_many_rows(db): + with db.cursor() as cur: + with pytest.raises(RuntimeError): + cur.scalar('SELECT * FROM pg_tables') diff --git a/test/python/test_db_status.py b/test/python/test_db_status.py new file mode 100644 index 00000000..399a0036 --- /dev/null +++ b/test/python/test_db_status.py @@ -0,0 +1,114 @@ +""" +Tests for status table manipulation. +""" +import datetime as dt + +import pytest + +import nominatim.db.status +from nominatim.errors import UsageError + +def test_compute_database_date_place_empty(status_table, place_table, temp_db_conn): + with pytest.raises(UsageError): + nominatim.db.status.compute_database_date(temp_db_conn) + +OSM_NODE_DATA = """\ + + + + +""" + +def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn): + place_row(osm_type='N', osm_id=45673) + + requested_url = [] + def mock_url(url): + requested_url.append(url) + return OSM_NODE_DATA + + monkeypatch.setattr(nominatim.db.status, "get_url", mock_url) + + date = nominatim.db.status.compute_database_date(temp_db_conn) + + assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1'] + assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc) + + +def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn): + place_row(osm_type='N', osm_id=45673) + + requested_url = [] + def mock_url(url): + requested_url.append(url) + return ' 0 + +def test_load_ranks_from_file(temp_db_conn, temp_db_cursor, tmp_path): + test_file = tmp_path / 'test_levels.json' + test_file.write_text('[{"tags":{"place":{"sea":2}}}]') + + load_address_levels_from_file(temp_db_conn, test_file) + + assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0 + + +def test_load_ranks_from_broken_file(temp_db_conn, tmp_path): + test_file = tmp_path / 'test_levels.json' + test_file.write_text('[{"tags":"place":{"sea":2}}}]') + + with pytest.raises(json.decoder.JSONDecodeError): + load_address_levels_from_file(temp_db_conn, test_file) + + +def test_load_ranks_country(temp_db_conn, temp_db_cursor): + load_address_levels(temp_db_conn, 'levels', + [{"tags": {"place": {"village": 14}}}, + {"countries": ['de'], + "tags": {"place": {"village": 15}}}, + {"countries": ['uk', 'us' ], + "tags": {"place": {"village": 16}}} + ]) + + assert temp_db_cursor.row_set('SELECT * FROM levels') == \ + set([(None, 'place', 'village', 14, 14), + ('de', 'place', 'village', 15, 15), + ('uk', 'place', 'village', 16, 16), + ('us', 'place', 'village', 16, 16), + ]) + + +def test_load_ranks_default_value(temp_db_conn, temp_db_cursor): + load_address_levels(temp_db_conn, 'levels', + [{"tags": {"boundary": {"": 28}}}, + {"countries": ['hu'], + "tags": {"boundary": {"": 29}}} + ]) + + assert temp_db_cursor.row_set('SELECT * FROM levels') == \ + set([(None, 'boundary', None, 28, 28), + ('hu', 'boundary', None, 29, 29), + ]) + + +def test_load_ranks_multiple_keys(temp_db_conn, temp_db_cursor): + load_address_levels(temp_db_conn, 'levels', + [{"tags": + {"place": {"city": 14}, + "boundary": {"administrative2" : 4}} + }]) + + assert temp_db_cursor.row_set('SELECT * FROM levels') == \ + set([(None, 'place', 'city', 14, 14), + (None, 'boundary', 'administrative2', 4, 4), + ]) + + +def test_load_ranks_address(temp_db_conn, temp_db_cursor): + load_address_levels(temp_db_conn, 'levels', + [{"tags": + {"place": {"city": 14, + "town" : [14, 13]}} + }]) + + assert temp_db_cursor.row_set('SELECT * FROM levels') == \ + set([(None, 'place', 'city', 14, 14), + (None, 'place', 'town', 14, 13), + ]) diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py new file mode 100644 index 00000000..4807e64f --- /dev/null +++ b/test/python/test_tools_refresh_create_functions.py @@ -0,0 +1,99 @@ +""" +Tests for creating PL/pgSQL functions for Nominatim. +""" +from pathlib import Path +import pytest + +from nominatim.db.connection import connect +from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql + +SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve() + +@pytest.fixture +def db(temp_db): + conn = connect('dbname=' + temp_db) + yield conn + conn.close() + +@pytest.fixture +def db_with_tables(db): + with db.cursor() as cur: + for table in ('place', 'placex', 'location_postcode'): + cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table)) + + return db + + +def test_standard_functions_replace_module_default(db, def_config): + def_config.project_dir = Path('.') + sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False) + + assert sql + assert sql.find('{modulepath}') < 0 + assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0 + + +def test_standard_functions_replace_module_custom(monkeypatch, db, def_config): + monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom') + sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False) + + assert sql + assert sql.find('{modulepath}') < 0 + assert sql.find("'custom/nominatim.so'") >= 0 + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_diff(db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False) + + assert sql + assert (sql.find('%DIFFUPDATES%') < 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_debug(db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled) + + assert sql + assert (sql.find('--DEBUG') < 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) + + assert sql + assert (sql.find('--LIMIT INDEXING') < 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) + + assert sql + assert (sql.find('%NOTIGERDATA%') >= 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) + + assert sql + assert (sql.find('%NOAUXDATA%') >= 0) == enabled + + +def test_partition_function(temp_db_cursor, db, def_config): + temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)") + + sql = _get_partition_function_sql(db, SQL_DIR) + + assert sql + assert sql.find('-partition-') < 0 diff --git a/test/python/test_tools_replication.py b/test/python/test_tools_replication.py new file mode 100644 index 00000000..156385ad --- /dev/null +++ b/test/python/test_tools_replication.py @@ -0,0 +1,138 @@ +""" +Tests for replication functionality. +""" +import datetime as dt +import time + +import pytest +from osmium.replication.server import OsmosisState + +import nominatim.tools.replication +import nominatim.db.status as status +from nominatim.errors import UsageError + +OSM_NODE_DATA = """\ + + + + +""" + +### init replication + +def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor): + place_row(osm_type='N', osm_id=100) + + monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA) + + with pytest.raises(UsageError, match="Failed to reach replication service"): + nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io') + + +def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor): + place_row(osm_type='N', osm_id=100) + + monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA) + monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, + "timestamp_to_sequence", + lambda self, date: 234) + + nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io') + + temp_db_cursor.execute("SELECT * FROM import_status") + + expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc) + assert temp_db_cursor.rowcount == 1 + assert temp_db_cursor.fetchone() == [expected_date, 234, True] + + +### checking for updates + +def test_check_for_updates_empty_status_table(status_table, temp_db_conn): + assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254 + + +def test_check_for_updates_seq_not_set(status_table, temp_db_conn): + status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc)) + + assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254 + + +def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn): + status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345) + + monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, + "get_state_info", lambda self: None) + + assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253 + + +@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)]) +def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn, + server_sequence, result): + date = dt.datetime.now(dt.timezone.utc) + status.set_status(temp_db_conn, date, seq=345) + + monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, + "get_state_info", + lambda self: OsmosisState(server_sequence, date)) + + assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result + + +### updating + +@pytest.fixture +def update_options(tmpdir): + return dict(base_url='https://test.io', + indexed_only=False, + update_interval=3600, + import_file=tmpdir / 'foo.osm', + max_diff_size=1) + +def test_update_empty_status_table(status_table, temp_db_conn): + with pytest.raises(UsageError): + nominatim.tools.replication.update(temp_db_conn, {}) + + +def test_update_already_indexed(status_table, temp_db_conn): + status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False) + + assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \ + == nominatim.tools.replication.UpdateState.MORE_PENDING + + +def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options): + date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1) + status.set_status(temp_db_conn, date, seq=34) + + monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, + "apply_diffs", + lambda *args, **kwargs: None) + + sleeptime = [] + monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s)) + + assert nominatim.tools.replication.update(temp_db_conn, update_options) \ + == nominatim.tools.replication.UpdateState.NO_CHANGES + + assert not sleeptime + + +def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options): + date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30) + status.set_status(temp_db_conn, date, seq=34) + + monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, + "apply_diffs", + lambda *args, **kwargs: None) + + sleeptime = [] + monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s)) + + assert nominatim.tools.replication.update(temp_db_conn, update_options) \ + == nominatim.tools.replication.UpdateState.NO_CHANGES + + assert len(sleeptime) == 1 + assert sleeptime[0] < 3600 + assert sleeptime[0] > 0 diff --git a/utils/check_server_for_updates.py b/utils/check_server_for_updates.py deleted file mode 100755 index bcc9d0ba..00000000 --- a/utils/check_server_for_updates.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python3 - -import sys -from osmium.replication import server - -if __name__ == '__main__': - if len(sys.argv) != 3: - print("Usage: python check_server_for_updates.py ") - sys.exit(254) - - seqid = int(sys.argv[2]) - - state = server.ReplicationServer(sys.argv[1]).get_state_info() - - if state is None: - print("ERROR: Cannot get state from URL %s." % (sys.argv[1], )) - sys.exit(253) - - if state.sequence <= seqid: - print("Database up to date.") - sys.exit(1) - - print("New data available (%i => %i)." % (seqid, state.sequence)) - sys.exit(0) diff --git a/utils/osm_file_date.py b/utils/osm_file_date.py deleted file mode 100755 index 0443e6ac..00000000 --- a/utils/osm_file_date.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 - -import osmium -import sys -import datetime - - -class Datecounter(osmium.SimpleHandler): - - filedate = None - - def date(self, o): - ts = o.timestamp - if self.filedate is None or ts > self.filedate: - self.filedate = ts - - node = date - way = date - relation = date - - -if __name__ == '__main__': - if len(sys.argv) != 2: - print("Usage: python osm_file_date.py ") - sys.exit(-1) - - h = Datecounter() - - h.apply_file(sys.argv[1]) - - if h.filedate is None: - exit(5) - - print(h.filedate)