diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index 8b20b55d..323d36d7 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -52,7 +52,7 @@ jobs:
run: phpcs --report-width=120 .
- name: Python linting
- run: pylint nominatim
+ run: pylint --extension-pkg-whitelist=osmium nominatim
- name: PHP unit tests
run: phpunit ./
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c6f63a8b..d579bf1a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -191,7 +191,7 @@ if (BUILD_TESTS)
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
- COMMAND ${PYLINT} nominatim
+ COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 552d1da1..6798c39d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -53,7 +53,7 @@ The coding style is enforced with PHPCS and pylint. It can be tested with:
```
phpcs --report-width=120 --colors .
-pylint3 nominatim
+pylint3 --extension-pkg-whitelist=osmium nominatim
```
## Testing
diff --git a/docs/admin/Update.md b/docs/admin/Update.md
index 4b1a2be7..01980bd8 100644
--- a/docs/admin/Update.md
+++ b/docs/admin/Update.md
@@ -19,15 +19,6 @@ Run (as the same user who will later run the updates):
pip3 install --user osmium
```
-Nominatim needs a tool called `pyosmium-get-changes` which comes with
-Pyosmium. You need to tell Nominatim where to find it. Add the
-following line to your `.env`:
-
- NOMINATIM_PYOSMIUM_BINARY=/home/user/.local/bin/pyosmium-get-changes
-
-The path above is fine if you used the `--user` parameter with pip.
-Replace `user` with your user name.
-
#### Setting up the update process
Next the update needs to be initialised. By default Nominatim is configured
diff --git a/lib/admin/update.php b/lib/admin/update.php
index fe9658b5..fba5300b 100644
--- a/lib/admin/update.php
+++ b/lib/admin/update.php
@@ -4,7 +4,6 @@
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/setup_functions.php');
require_once(CONST_LibDir.'/setup/SetupClass.php');
-require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
ini_set('memory_limit', '800M');
@@ -104,99 +103,27 @@ if ($fPostgresVersion >= 11.0) {
);
}
-
-$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
- ->addParams('index');
+$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
if ($aResult['quiet']) {
- $oIndexCmd->addParams('--quiet');
+ $oNominatimCmd->addParams('--quiet');
}
if ($aResult['verbose']) {
- $oIndexCmd->addParams('--verbose');
+ $oNominatimCmd->addParams('--verbose');
}
-$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
-$sBaseURL = getSetting('REPLICATION_URL');
-
if ($aResult['init-updates']) {
- // sanity check that the replication URL is correct
- $sBaseState = file_get_contents($sBaseURL.'/state.txt');
- if ($sBaseState === false) {
- echo "\nCannot find state.txt file at the configured replication URL.\n";
- echo "Does the URL point to a directory containing OSM update data?\n\n";
- fail('replication URL not reachable.');
- }
- // sanity check for pyosmium-get-changes
- if (!$sPyosmiumBin) {
- echo "\nNOMINATIM_PYOSMIUM_BINARY not configured.\n";
- echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
- echo "in your local .env file.\n\n";
- fail('NOMINATIM_PYOSMIUM_BINARY not configured');
+ $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
+
+ if ($aResult['no-update-functions']) {
+ $oCmd->addParams('--no-update-functions');
}
- $aOutput = 0;
- $oCMD = new \Nominatim\Shell($sPyosmiumBin, '--help');
- exec($oCMD->escapedCmd(), $aOutput, $iRet);
-
- if ($iRet != 0) {
- echo "Cannot execute pyosmium-get-changes.\n";
- echo "Make sure you have pyosmium installed correctly\n";
- echo "and have set up NOMINATIM_PYOSMIUM_BINARY to point to pyosmium-get-changes.\n";
- fail('pyosmium-get-changes not found or not usable');
- }
-
- if (!$aResult['no-update-functions']) {
- // instantiate setupClass to use the function therein
- $cSetup = new SetupFunctions(array(
- 'enable-diff-updates' => true,
- 'verbose' => $aResult['verbose']
- ));
- $cSetup->createFunctions();
- }
-
- $sDatabaseDate = getDatabaseDate($oDB);
- if (!$sDatabaseDate) {
- fail('Cannot determine date of database.');
- }
- $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
-
- // get the appropriate state id
- $aOutput = 0;
- $oCMD = (new \Nominatim\Shell($sPyosmiumBin))
- ->addParams('--start-date', $sWindBack)
- ->addParams('--server', $sBaseURL);
-
- exec($oCMD->escapedCmd(), $aOutput, $iRet);
- if ($iRet != 0 || $aOutput[0] == 'None') {
- fail('Error running pyosmium tools');
- }
-
- $oDB->exec('TRUNCATE import_status');
- $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
- $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
-
- try {
- $oDB->exec($sSQL);
- } catch (\Nominatim\DatabaseError $e) {
- fail('Could not enter sequence into database.');
- }
-
- echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
+ $oCmd->run();
}
if ($aResult['check-for-updates']) {
- $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
-
- if (!$aLastState['sequence_id']) {
- fail('Updates not set up. Please run ./utils/update.php --init-updates.');
- }
-
- $oCmd = (new \Nominatim\Shell(CONST_BinDir.'/check_server_for_updates.py'))
- ->addParams($sBaseURL)
- ->addParams($aLastState['sequence_id']);
- $iRet = $oCmd->run();
-
- exit($iRet);
+ exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
}
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
@@ -220,9 +147,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
}
if ($aResult['calculate-postcodes']) {
- info('Update postcodes centroids');
- $sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
- runSQLScript($sTemplate, true, true);
+ (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
}
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@@ -271,22 +196,18 @@ if ($bHaveDiff) {
}
if ($aResult['recompute-word-counts']) {
- info('Recompute frequency of full-word search terms');
- $sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
- runSQLScript($sTemplate, true, true);
+ (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
}
if ($aResult['index']) {
- $oCmd = (clone $oIndexCmd)
- ->addParams('--minrank', $aResult['index-rank']);
- $oCmd->run();
+ (clone $oNominatimCmd)
+ ->addParams('index', '--minrank', $aResult['index-rank'])
+ ->addParams('--threads', $aResult['index-instances'])
+ ->run();
}
if ($aResult['update-address-levels']) {
- $sAddressLevelConfig = getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json');
- echo 'Updating address levels from '.$sAddressLevelConfig.".\n";
- $oAlParser = new \Nominatim\Setup\AddressLevelParser($sAddressLevelConfig);
- $oAlParser->createTable($oDB, 'address_levels');
+ (clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
}
if ($aResult['recompute-importance']) {
@@ -307,145 +228,17 @@ if ($aResult['recompute-importance']) {
}
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
- //
- if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) {
- fail('Error: Update interval too low for download.geofabrik.de. ' .
- "Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
+ $oCmd = (clone($oNominatimCmd))
+ ->addParams('replication')
+ ->addParams('--threads', $aResult['index-instances']);
+
+ if (!$aResult['import-osmosis-all']) {
+ $oCmd->addParams('--once');
}
- $sImportFile = CONST_InstallDir.'/osmosischange.osc';
-
- $oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin))
- ->addParams('--server', $sBaseURL)
- ->addParams('--outfile', $sImportFile)
- ->addParams('--size', getSetting('REPLICATION_MAX_DIFF'));
-
- $oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
-
- while (true) {
- $fStartTime = time();
- $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
-
- if (!$aLastState['sequence_id']) {
- echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
- exit(1);
- }
-
- echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
-
- $sBatchEnd = $aLastState['lastimportdate'];
- $iEndSequence = $aLastState['sequence_id'];
-
- if ($aLastState['indexed']) {
- // Sleep if the update interval has not yet been reached.
- $fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL');
- if ($fNextUpdate > $fStartTime) {
- $iSleepTime = $fNextUpdate - $fStartTime;
- echo "Waiting for next update for $iSleepTime sec.";
- sleep($iSleepTime);
- }
-
- // Download the next batch of changes.
- do {
- $fCMDStartTime = time();
- $iNextSeq = (int) $aLastState['sequence_id'];
- unset($aOutput);
-
- $oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
- echo $oCMD->escapedCmd()."\n";
- if (file_exists($sImportFile)) {
- unlink($sImportFile);
- }
- exec($oCMD->escapedCmd(), $aOutput, $iResult);
-
- if ($iResult == 3) {
- $sSleep = getSetting('REPLICATION_RECHECK_INTERVAL');
- echo 'No new updates. Sleeping for '.$sSleep." sec.\n";
- sleep($sSleep);
- } elseif ($iResult != 0) {
- echo 'ERROR: updates failed.';
- exit($iResult);
- } else {
- $iEndSequence = (int)$aOutput[0];
- }
- } while ($iResult);
-
- // get the newest object from the diff file
- $sBatchEnd = 0;
- $iRet = 0;
- $oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
- exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
- if ($iRet == 5) {
- echo "Diff file is empty. skipping import.\n";
- if (!$aResult['import-osmosis-all']) {
- exit(0);
- } else {
- continue;
- }
- }
- if ($iRet != 0) {
- fail('Error getting date from diff file.');
- }
- $sBatchEnd = $sBatchEnd[0];
-
- // Import the file
- $fCMDStartTime = time();
-
-
- echo $oCMDImport->escapedCmd()."\n";
- unset($sJunk);
- $iErrorLevel = $oCMDImport->run();
- if ($iErrorLevel) {
- echo "Error executing osm2pgsql: $iErrorLevel\n";
- exit($iErrorLevel);
- }
-
- // write the update logs
- $iFileSize = filesize($sImportFile);
- $sSQL = 'INSERT INTO import_osmosis_log';
- $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
- $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
- $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
- $sSQL .= date('Y-m-d H:i:s')."','import')";
- var_Dump($sSQL);
- $oDB->exec($sSQL);
-
- // update the status
- $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
- var_Dump($sSQL);
- $oDB->exec($sSQL);
- echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
- }
-
- // Index file
- if (!$aResult['no-index']) {
- $fCMDStartTime = time();
-
- $oThisIndexCmd = clone($oIndexCmd);
- echo $oThisIndexCmd->escapedCmd()."\n";
- $iErrorLevel = $oThisIndexCmd->run();
- if ($iErrorLevel) {
- echo "Error: $iErrorLevel\n";
- exit($iErrorLevel);
- }
-
- $sSQL = 'INSERT INTO import_osmosis_log';
- $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
- $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
- $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
- $sSQL .= date('Y-m-d H:i:s')."','index')";
- var_Dump($sSQL);
- $oDB->exec($sSQL);
- echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
- } else {
- if ($aResult['import-osmosis-all']) {
- echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
- exit(1);
- }
- }
-
- $fDuration = time() - $fStartTime;
- echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
- if (!$aResult['import-osmosis-all']) exit(0);
+ if ($aResult['no-index']) {
+ $oCmd->addParams('--no-index');
}
+
+ exit($oCmd->run());
}
diff --git a/lib/setup/AddressLevelParser.php b/lib/setup/AddressLevelParser.php
deleted file mode 100644
index a399c955..00000000
--- a/lib/setup/AddressLevelParser.php
+++ /dev/null
@@ -1,98 +0,0 @@
-aLevels = json_decode($sJson, true);
- if (!$this->aLevels) {
- switch (json_last_error()) {
- case JSON_ERROR_NONE:
- break;
- case JSON_ERROR_DEPTH:
- fail('JSON error - Maximum stack depth exceeded');
- break;
- case JSON_ERROR_STATE_MISMATCH:
- fail('JSON error - Underflow or the modes mismatch');
- break;
- case JSON_ERROR_CTRL_CHAR:
- fail('JSON error - Unexpected control character found');
- break;
- case JSON_ERROR_SYNTAX:
- fail('JSON error - Syntax error, malformed JSON');
- break;
- case JSON_ERROR_UTF8:
- fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
- break;
- default:
- fail('JSON error - Unknown error');
- break;
- }
- }
- }
-
- /**
- * Dump the description into a database table.
- *
- * @param object $oDB Database conneciton to use.
- * @param string $sTable Name of table to create.
- *
- * @return null
- *
- * A new table is created. Any previously existing table is dropped.
- * The table has the following columns:
- * country, class, type, rank_search, rank_address.
- */
- public function createTable($oDB, $sTable)
- {
- $oDB->exec('DROP TABLE IF EXISTS '.$sTable);
- $sSql = 'CREATE TABLE '.$sTable;
- $sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
- $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
- $oDB->exec($sSql);
-
- $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)';
- $oDB->exec($sSql);
-
- $sSql = 'INSERT INTO '.$sTable.' VALUES ';
- foreach ($this->aLevels as $aLevel) {
- $aCountries = array();
- if (isset($aLevel['countries'])) {
- foreach ($aLevel['countries'] as $sCountry) {
- $aCountries[$sCountry] = $oDB->getDBQuoted($sCountry);
- }
- } else {
- $aCountries['NULL'] = 'NULL';
- }
- foreach ($aLevel['tags'] as $sKey => $aValues) {
- foreach ($aValues as $sValue => $mRanks) {
- $aFields = array(
- $oDB->getDBQuoted($sKey),
- $sValue ? $oDB->getDBQuoted($sValue) : 'NULL'
- );
- if (is_array($mRanks)) {
- $aFields[] = (string) $mRanks[0];
- $aFields[] = (string) $mRanks[1];
- } else {
- $aFields[] = (string) $mRanks;
- $aFields[] = (string) $mRanks;
- }
- $sLine = ','.join(',', $aFields).'),';
-
- foreach ($aCountries as $sCountries) {
- $sSql .= '('.$sCountries.$sLine;
- }
- }
- }
- }
- $oDB->exec(rtrim($sSql, ','));
- }
-}
diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php
index d17fdca7..a865b8f0 100755
--- a/lib/setup/SetupClass.php
+++ b/lib/setup/SetupClass.php
@@ -2,7 +2,6 @@
namespace Nominatim\Setup;
-require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
require_once(CONST_LibDir.'/Shell.php');
class SetupFunctions
@@ -19,6 +18,7 @@ class SetupFunctions
protected $bNoPartitions;
protected $bDrop;
protected $oDB = null;
+ protected $oNominatimCmd;
public function __construct(array $aCMDResult)
{
@@ -81,6 +81,14 @@ class SetupFunctions
}
$this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
+
+ $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
+ if ($this->bQuiet) {
+ $this->oNominatimCmd->addParams('--quiet');
+ }
+ if ($this->bVerbose) {
+ $this->oNominatimCmd->addParams('--verbose');
+ }
}
public function createDB()
@@ -256,8 +264,7 @@ class SetupFunctions
$this->dropTable('search_name');
}
- $oAlParser = new AddressLevelParser(getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json'));
- $oAlParser->createTable($this->db(), 'address_levels');
+ (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
}
public function createTableTriggers()
@@ -283,9 +290,7 @@ class SetupFunctions
public function createPartitionFunctions()
{
info('Create Partition Functions');
-
- $sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql');
- $this->pgsqlRunPartitionScript($sTemplate);
+ $this->createSqlFunctions(); // also create partition functions
}
public function importWikipediaArticles()
@@ -549,19 +554,10 @@ class SetupFunctions
{
$this->checkModulePresence(); // raises exception on failure
- $oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
- ->addParams('index');
-
- if ($this->bQuiet) {
- $oBaseCmd->addParams('-q');
- }
- if ($this->bVerbose) {
- $oBaseCmd->addParams('-v');
- }
+ $oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
info('Index ranks 0 - 4');
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
- echo $oCmd->escapedCmd();
$iStatus = $oCmd->run();
if ($iStatus != 0) {
@@ -790,43 +786,18 @@ class SetupFunctions
private function createSqlFunctions()
{
- $sBasePath = CONST_DataDir.'/sql/functions/';
- $sTemplate = file_get_contents($sBasePath.'utils.sql');
- $sTemplate .= file_get_contents($sBasePath.'normalization.sql');
- $sTemplate .= file_get_contents($sBasePath.'ranking.sql');
- $sTemplate .= file_get_contents($sBasePath.'importance.sql');
- $sTemplate .= file_get_contents($sBasePath.'address_lookup.sql');
- $sTemplate .= file_get_contents($sBasePath.'interpolation.sql');
- if ($this->db()->tableExists('place')) {
- $sTemplate .= file_get_contents($sBasePath.'place_triggers.sql');
- }
- if ($this->db()->tableExists('placex')) {
- $sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql');
- }
- if ($this->db()->tableExists('location_postcode')) {
- $sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql');
- }
- $sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate);
- if ($this->bEnableDiffUpdates) {
- $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
+ $oCmd = (clone($this->oNominatimCmd))
+ ->addParams('refresh', '--functions');
+
+ if (!$this->bEnableDiffUpdates) {
+ $oCmd->addParams('--no-diff-updates');
}
+
if ($this->bEnableDebugStatements) {
- $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
- }
- if (getSettingBool('LIMIT_REINDEXING')) {
- $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
- }
- if (!getSettingBool('USE_US_TIGER_DATA')) {
- $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
- }
- if (!getSettingBool('USE_AUX_LOCATION_DATA')) {
- $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
+ $oCmd->addParams('--enable-debug-statements');
}
- $sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false';
- $sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate);
-
- $this->pgsqlRunScript($sTemplate);
+ $oCmd->run();
}
private function pgsqlRunPartitionScript($sTemplate)
diff --git a/nominatim/cli.py b/nominatim/cli.py
index 6c110ce7..4873308d 100644
--- a/nominatim/cli.py
+++ b/nominatim/cli.py
@@ -2,16 +2,21 @@
Command-line interface to the Nominatim functions for import, update,
database administration and querying.
"""
-import sys
+import datetime as dt
import os
+import sys
+import time
import argparse
import logging
from pathlib import Path
from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_api_script
+from .db.connection import connect
+from .db import status
+from .errors import UsageError
-from .indexer.indexer import Indexer
+LOG = logging.getLogger()
def _num_system_cpus():
try:
@@ -85,7 +90,27 @@ class CommandlineParser:
args.config = Configuration(args.project_dir, args.data_dir / 'settings')
- return args.command.run(args)
+ try:
+ return args.command.run(args)
+ except UsageError as exception:
+ log = logging.getLogger()
+ if log.isEnabledFor(logging.DEBUG):
+ raise # use Python's exception printing
+ log.fatal('FATAL: %s', exception)
+
+ # If we get here, then execution has failed in some way.
+ return 1
+
+
+def _osm2pgsql_options_from_args(args, default_cache, default_threads):
+ """ Set up the stanadrd osm2pgsql from the command line arguments.
+ """
+ return dict(osm2pgsql=args.osm2pgsql_path,
+ osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
+ osm2pgsql_style=args.config.get_import_style_file(),
+ threads=args.threads or default_threads,
+ dsn=args.config.get_libpq_dsn(),
+ flatnode_file=args.config.FLATNODE_FILE)
##### Subcommand classes
#
@@ -98,6 +123,8 @@ class CommandlineParser:
#
# No need to document the functions each time.
# pylint: disable=C0111
+# Using non-top-level imports to make pyosmium optional for replication only.
+# pylint: disable=E0012,C0415
class SetupAll:
@@ -230,26 +257,123 @@ class UpdateReplication:
group.add_argument('--no-index', action='store_false', dest='do_index',
help="""Do not index the new data. Only applicable
together with --once""")
+ group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+ help='Size of cache to be used by osm2pgsql (in MB)')
+
+ @staticmethod
+ def _init_replication(args):
+ from .tools import replication, refresh
+
+ LOG.warning("Initialising replication updates")
+ conn = connect(args.config.get_libpq_dsn())
+ replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
+ if args.update_functions:
+ LOG.warning("Create functions")
+ refresh.create_functions(conn, args.config, args.data_dir,
+ True, False)
+ conn.close()
+ return 0
+
+
+ @staticmethod
+ def _check_for_updates(args):
+ from .tools import replication
+
+ conn = connect(args.config.get_libpq_dsn())
+ ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
+ conn.close()
+ return ret
+
+ @staticmethod
+ def _report_update(batchdate, start_import, start_index):
+ def round_time(delta):
+ return dt.timedelta(seconds=int(delta.total_seconds()))
+
+ end = dt.datetime.now(dt.timezone.utc)
+ LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
+ round_time((start_index or end) - start_import),
+ "Indexing: {} ".format(round_time(end - start_index))
+ if start_index else '',
+ round_time(end - start_import),
+ round_time(end - batchdate))
+
+ @staticmethod
+ def _update(args):
+ from .tools import replication
+ from .indexer.indexer import Indexer
+
+ params = _osm2pgsql_options_from_args(args, 2000, 1)
+ params.update(base_url=args.config.REPLICATION_URL,
+ update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
+ import_file=args.project_dir / 'osmosischange.osc',
+ max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
+ indexed_only=not args.once)
+
+ # Sanity check to not overwhelm the Geofabrik servers.
+ if 'download.geofabrik.de'in params['base_url']\
+ and params['update_interval'] < 86400:
+ LOG.fatal("Update interval too low for download.geofabrik.de.\n"
+ "Please check install documentation "
+ "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
+ "setting-up-the-update-process).")
+ raise UsageError("Invalid replication update interval setting.")
+
+ if not args.once:
+ if not args.do_index:
+ LOG.fatal("Indexing cannot be disabled when running updates continuously.")
+ raise UsageError("Bad argument '--no-index'.")
+ recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+
+ while True:
+ conn = connect(args.config.get_libpq_dsn())
+ start = dt.datetime.now(dt.timezone.utc)
+ state = replication.update(conn, params)
+ status.log_status(conn, start, 'import')
+ batchdate, _, _ = status.get_status(conn)
+ conn.close()
+
+ if state is not replication.UpdateState.NO_CHANGES and args.do_index:
+ index_start = dt.datetime.now(dt.timezone.utc)
+ indexer = Indexer(args.config.get_libpq_dsn(),
+ args.threads or 1)
+ indexer.index_boundaries(0, 30)
+ indexer.index_by_rank(0, 30)
+
+ conn = connect(args.config.get_libpq_dsn())
+ status.set_indexed(conn, True)
+ status.log_status(conn, index_start, 'index')
+ conn.close()
+ else:
+ index_start = None
+
+ if LOG.isEnabledFor(logging.WARNING):
+ UpdateReplication._report_update(batchdate, start, index_start)
+
+ if args.once:
+ break
+
+ if state is replication.UpdateState.NO_CHANGES:
+ LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
+ time.sleep(recheck_interval)
+
+ return state.value
@staticmethod
def run(args):
- params = ['update.php']
+ try:
+ import osmium # pylint: disable=W0611
+ except ModuleNotFoundError:
+ LOG.fatal("pyosmium not installed. Replication functions not available.\n"
+ "To install pyosmium via pip: pip3 install osmium")
+ return 1
+
if args.init:
- params.append('--init-updates')
- if not args.update_functions:
- params.append('--no-update-functions')
- elif args.check_for_updates:
- params.append('--check-for-updates')
- else:
- if args.once:
- params.append('--import-osmosis')
- else:
- params.append('--import-osmosis-all')
- if not args.do_index:
- params.append('--no-index')
+ return UpdateReplication._init_replication(args)
- return run_legacy_script(*params, nominatim_env=args)
+ if args.check_for_updates:
+ return UpdateReplication._check_for_updates(args)
+ return UpdateReplication._update(args)
class UpdateAddData:
"""\
@@ -320,6 +444,8 @@ class UpdateIndex:
@staticmethod
def run(args):
+ from .indexer.indexer import Indexer
+
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or _num_system_cpus() or 1)
@@ -328,8 +454,11 @@ class UpdateIndex:
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
- if not args.no_boundaries and not args.boundaries_only:
- indexer.update_status_table()
+ if not args.no_boundaries and not args.boundaries_only \
+ and args.minrank == 0 and args.maxrank == 30:
+ conn = connect(args.config.get_libpq_dsn())
+ status.set_indexed(conn, True)
+ conn.close()
return 0
@@ -366,22 +495,34 @@ class UpdateRefresh:
@staticmethod
def run(args):
+ from .tools import refresh
+
if args.postcodes:
- run_legacy_script('update.php', '--calculate-postcodes',
- nominatim_env=args, throw_on_fail=True)
+ LOG.warning("Update postcodes centroid")
+ conn = connect(args.config.get_libpq_dsn())
+ refresh.update_postcodes(conn, args.data_dir)
+ conn.close()
+
if args.word_counts:
- run_legacy_script('update.php', '--recompute-word-counts',
- nominatim_env=args, throw_on_fail=True)
+ LOG.warning('Recompute frequency of full-word search terms')
+ conn = connect(args.config.get_libpq_dsn())
+ refresh.recompute_word_counts(conn, args.data_dir)
+ conn.close()
+
if args.address_levels:
- run_legacy_script('update.php', '--update-address-levels',
- nominatim_env=args, throw_on_fail=True)
+ cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
+ LOG.warning('Updating address levels from %s', cfg)
+ conn = connect(args.config.get_libpq_dsn())
+ refresh.load_address_levels_from_file(conn, cfg)
+ conn.close()
+
if args.functions:
- params = ['setup.php', '--create-functions', '--create-partition-functions']
- if args.diffs:
- params.append('--enable-diff-updates')
- if args.enable_debug_statements:
- params.append('--enable-debug-statements')
- run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
+ LOG.warning('Create functions')
+ conn = connect(args.config.get_libpq_dsn())
+ refresh.create_functions(conn, args.config, args.data_dir,
+ args.diffs, args.enable_debug_statements)
+ conn.close()
+
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
@@ -392,6 +533,7 @@ class UpdateRefresh:
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
+
return 0
diff --git a/nominatim/config.py b/nominatim/config.py
index 458c828f..4de2052e 100644
--- a/nominatim/config.py
+++ b/nominatim/config.py
@@ -1,10 +1,16 @@
"""
Nominatim configuration accessor.
"""
+import logging
import os
+from pathlib import Path
from dotenv import dotenv_values
+from .errors import UsageError
+
+LOG = logging.getLogger()
+
class Configuration:
""" Load and manage the project configuration.
@@ -20,27 +26,75 @@ class Configuration:
"""
def __init__(self, project_dir, config_dir):
+ self.project_dir = project_dir
+ self.config_dir = config_dir
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
if project_dir is not None:
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
+ # Add defaults for variables that are left empty to set the default.
+ # They may still be overwritten by environment variables.
+ if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']:
+ self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
+ str(config_dir / 'address-levels.json')
+
+
def __getattr__(self, name):
name = 'NOMINATIM_' + name
return os.environ.get(name) or self._config[name]
+ def get_bool(self, name):
+ """ Return the given configuration parameter as a boolean.
+ Values of '1', 'yes' and 'true' are accepted as truthy values,
+ everything else is interpreted as false.
+ """
+ return self.__getattr__(name).lower() in ('1', 'yes', 'true')
+
+
+ def get_int(self, name):
+ """ Return the given configuration parameter as an int.
+ """
+ try:
+ return int(self.__getattr__(name))
+ except ValueError:
+ LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
+ raise UsageError("Configuration error.")
+
+
def get_libpq_dsn(self):
""" Get configured database DSN converted into the key/value format
understood by libpq and psycopg.
"""
dsn = self.DATABASE_DSN
+ def quote_param(param):
+ key, val = param.split('=')
+ val = val.replace('\\', '\\\\').replace("'", "\\'")
+ if ' ' in val:
+ val = "'" + val + "'"
+ return key + '=' + val
+
if dsn.startswith('pgsql:'):
# Old PHP DSN format. Convert before returning.
- return dsn[6:].replace(';', ' ')
+ return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
return dsn
+
+ def get_import_style_file(self):
+ """ Return the import style file as a path object. Translates the
+ name of the standard styles automatically into a file in the
+ config style.
+ """
+ style = self.__getattr__('IMPORT_STYLE')
+
+ if style in ('admin', 'street', 'address', 'full', 'extratags'):
+ return self.config_dir / 'import-{}.style'.format(style)
+
+ return Path(style)
+
+
def get_os_env(self):
""" Return a copy of the OS environment with the Nominatim configuration
merged in.
diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py
new file mode 100644
index 00000000..4d30151d
--- /dev/null
+++ b/nominatim/db/connection.py
@@ -0,0 +1,58 @@
+"""
+Specialised connection and cursor functions.
+"""
+import logging
+
+import psycopg2
+import psycopg2.extensions
+import psycopg2.extras
+
+class _Cursor(psycopg2.extras.DictCursor):
+ """ A cursor returning dict-like objects and providing specialised
+ execution functions.
+ """
+
+ def execute(self, query, args=None): # pylint: disable=W0221
+ """ Query execution that logs the SQL query when debugging is enabled.
+ """
+ logger = logging.getLogger()
+ logger.debug(self.mogrify(query, args).decode('utf-8'))
+
+ super().execute(query, args)
+
+ def scalar(self, sql, args=None):
+ """ Execute query that returns a single value. The value is returned.
+ If the query yields more than one row, a ValueError is raised.
+ """
+ self.execute(sql, args)
+
+ if self.rowcount != 1:
+ raise RuntimeError("Query did not return a single row.")
+
+ return self.fetchone()[0]
+
+
+class _Connection(psycopg2.extensions.connection):
+ """ A connection that provides the specialised cursor by default and
+ adds convenience functions for administrating the database.
+ """
+
+ def cursor(self, cursor_factory=_Cursor, **kwargs):
+ """ Return a new cursor. By default the specialised cursor is returned.
+ """
+ return super().cursor(cursor_factory=cursor_factory, **kwargs)
+
+ def table_exists(self, table):
+ """ Check that a table with the given name exists in the database.
+ """
+ with self.cursor() as cur:
+ num = cur.scalar("""SELECT count(*) FROM pg_tables
+ WHERE tablename = %s""", (table, ))
+ return num == 1
+
+
+def connect(dsn):
+ """ Open a connection to the database using the specialised connection
+ factory.
+ """
+ return psycopg2.connect(dsn, connection_factory=_Connection)
diff --git a/nominatim/db/status.py b/nominatim/db/status.py
new file mode 100644
index 00000000..75da3c16
--- /dev/null
+++ b/nominatim/db/status.py
@@ -0,0 +1,82 @@
+"""
+Access and helper functions for the status and status log table.
+"""
+import datetime as dt
+import logging
+import re
+
+from ..tools.exec_utils import get_url
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+def compute_database_date(conn):
+ """ Determine the date of the database from the newest object in the
+ data base.
+ """
+ # First, find the node with the highest ID in the database
+ with conn.cursor() as cur:
+ osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
+
+ if osmid is None:
+ LOG.fatal("No data found in the database.")
+ raise UsageError("No data found in the database.")
+
+ LOG.info("Using node id %d for timestamp lookup", osmid)
+ # Get the node from the API to find the timestamp when it was created.
+ node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
+ data = get_url(node_url)
+
+ match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
+
+ if match is None:
+ LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
+ "URL used: %s", node_url)
+ raise UsageError("Bad API data.")
+
+ LOG.debug("Found timestamp %s", match[1])
+
+ return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
+
+
+def set_status(conn, date, seq=None, indexed=True):
+ """ Replace the current status with the given status.
+ """
+ assert date.tzinfo == dt.timezone.utc
+ with conn.cursor() as cur:
+ cur.execute("TRUNCATE TABLE import_status")
+ cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
+ VALUES (%s, %s, %s)""", (date, seq, indexed))
+
+ conn.commit()
+
+
+def get_status(conn):
+ """ Return the current status as a triple of (date, sequence, indexed).
+ If status has not been set up yet, a triple of None is returned.
+ """
+ with conn.cursor() as cur:
+ cur.execute("SELECT * FROM import_status LIMIT 1")
+ if cur.rowcount < 1:
+ return None, None, None
+
+ row = cur.fetchone()
+ return row['lastimportdate'], row['sequence_id'], row['indexed']
+
+
+def set_indexed(conn, state):
+ """ Set the indexed flag in the status table to the given state.
+ """
+ with conn.cursor() as cur:
+ cur.execute("UPDATE import_status SET indexed = %s", (state, ))
+ conn.commit()
+
+
+def log_status(conn, start, event, batchsize=None):
+ """ Write a new status line to the `import_osmosis_log` table.
+ """
+ with conn.cursor() as cur:
+ cur.execute("""INSERT INTO import_osmosis_log
+ (batchend, batchseq, batchsize, starttime, endtime, event)
+ SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
+ (batchsize, start, event))
diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py
new file mode 100644
index 00000000..abd72519
--- /dev/null
+++ b/nominatim/db/utils.py
@@ -0,0 +1,12 @@
+"""
+Helper functions for handling DB accesses.
+"""
+
+def execute_file(conn, fname):
+ """ Read an SQL file and run its contents against the given connection.
+ """
+ with fname.open('r') as fdesc:
+ sql = fdesc.read()
+ with conn.cursor() as cur:
+ cur.execute(sql)
+ conn.commit()
diff --git a/nominatim/errors.py b/nominatim/errors.py
new file mode 100644
index 00000000..e77f956a
--- /dev/null
+++ b/nominatim/errors.py
@@ -0,0 +1,8 @@
+"""
+Custom exception and error classes for Nominatim.
+"""
+
+class UsageError(Exception):
+ """ An error raised because of bad user input. This error will usually
+ not cause a stack trace to be printed unless debugging is enabled.
+ """
diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py
index 9e16e293..03bed986 100644
--- a/nominatim/tools/exec_utils.py
+++ b/nominatim/tools/exec_utils.py
@@ -2,9 +2,17 @@
Helper functions for executing external programs.
"""
import logging
+import os
import subprocess
+import urllib.request as urlrequest
from urllib.parse import urlencode
+from psycopg2.extensions import parse_dsn
+
+from ..version import NOMINATIM_VERSION
+
+LOG = logging.getLogger()
+
def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
""" Run a Nominatim PHP script with the given arguments.
@@ -80,3 +88,51 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
print(result[content_start + 4:].replace('\\n', '\n'))
return 0
+
+
+def run_osm2pgsql(options):
+ """ Run osm2pgsql with the given options.
+ """
+ env = os.environ
+ cmd = [options['osm2pgsql'],
+ '--hstore', '--latlon', '--slim',
+ '--with-forward-dependencies', 'false',
+ '--log-progress', 'true',
+ '--number-processes', str(options['threads']),
+ '--cache', str(options['osm2pgsql_cache']),
+ '--output', 'gazetteer',
+ '--style', str(options['osm2pgsql_style'])
+ ]
+ if options['append']:
+ cmd.append('--append')
+
+ if options['flatnode_file']:
+ cmd.extend(('--flat-nodes', options['flatnode_file']))
+
+ dsn = parse_dsn(options['dsn'])
+ if 'password' in dsn:
+ env['PGPASSWORD'] = dsn['password']
+ if 'dbname' in dsn:
+ cmd.extend(('-d', dsn['dbname']))
+ if 'user' in dsn:
+ cmd.extend(('--username', dsn['user']))
+ for param in ('host', 'port'):
+ if param in dsn:
+ cmd.extend(('--' + param, dsn[param]))
+
+ cmd.append(str(options['import_file']))
+
+ subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
+
+
+def get_url(url):
+ """ Get the contents from the given URL and return it as a UTF-8 string.
+ """
+ headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION}
+
+ try:
+ with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
+ return response.read().decode('utf-8')
+ except:
+ LOG.fatal('Failed to load URL: %s', url)
+ raise
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
new file mode 100644
index 00000000..5fbb07f8
--- /dev/null
+++ b/nominatim/tools/refresh.py
@@ -0,0 +1,169 @@
+"""
+Functions for bringing auxiliary data in the database up-to-date.
+"""
+import json
+import re
+
+from psycopg2.extras import execute_values
+
+from ..db.utils import execute_file
+
+def update_postcodes(conn, datadir):
+ """ Recalculate postcode centroids and add, remove and update entries in the
+ location_postcode table. `conn` is an opne connection to the database.
+ """
+ execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
+
+
+def recompute_word_counts(conn, datadir):
+ """ Compute the frequency of full-word search terms.
+ """
+ execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
+
+
+def _add_address_level_rows_from_entry(rows, entry):
+ """ Converts a single entry from the JSON format for address rank
+ descriptions into a flat format suitable for inserting into a
+ PostgreSQL table and adds these lines to `rows`.
+ """
+ countries = entry.get('countries') or (None, )
+ for key, values in entry['tags'].items():
+ for value, ranks in values.items():
+ if isinstance(ranks, list):
+ rank_search, rank_address = ranks
+ else:
+ rank_search = rank_address = ranks
+ if not value:
+ value = None
+ for country in countries:
+ rows.append((country, key, value, rank_search, rank_address))
+
+def load_address_levels(conn, table, levels):
+ """ Replace the `address_levels` table with the contents of `levels'.
+
+ A new table is created any previously existing table is dropped.
+ The table has the following columns:
+ country, class, type, rank_search, rank_address
+ """
+ rows = []
+ for entry in levels:
+ _add_address_level_rows_from_entry(rows, entry)
+
+ with conn.cursor() as cur:
+ cur.execute('DROP TABLE IF EXISTS {}'.format(table))
+
+ cur.execute("""CREATE TABLE {} (country_code varchar(2),
+ class TEXT,
+ type TEXT,
+ rank_search SMALLINT,
+ rank_address SMALLINT)""".format(table))
+
+ execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
+
+ cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
+
+ conn.commit()
+
+def load_address_levels_from_file(conn, config_file):
+ """ Replace the `address_levels` table with the contents of the config
+ file.
+ """
+ with config_file.open('r') as fdesc:
+ load_address_levels(conn, 'address_levels', json.load(fdesc))
+
+PLPGSQL_BASE_MODULES = (
+ 'utils.sql',
+ 'normalization.sql',
+ 'ranking.sql',
+ 'importance.sql',
+ 'address_lookup.sql',
+ 'interpolation.sql'
+)
+
+PLPGSQL_TABLE_MODULES = (
+ ('place', 'place_triggers.sql'),
+ ('placex', 'placex_triggers.sql'),
+ ('location_postcode', 'postcode_triggers.sql')
+)
+
+def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
+ """ Read all applicable SQLs containing PL/pgSQL functions, replace
+ placefolders and execute them.
+ """
+ sql_func_dir = sql_dir / 'functions'
+ sql = ''
+
+ # Get the basic set of functions that is always imported.
+ for sql_file in PLPGSQL_BASE_MODULES:
+ with (sql_func_dir / sql_file).open('r') as fdesc:
+ sql += fdesc.read()
+
+ # Some files require the presence of a certain table
+ for table, fname in PLPGSQL_TABLE_MODULES:
+ if conn.table_exists(table):
+ with (sql_func_dir / fname).open('r') as fdesc:
+ sql += fdesc.read()
+
+ # Replace placeholders.
+ sql = sql.replace('{modulepath}',
+ config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
+
+ if enable_diff_updates:
+ sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
+
+ if enable_debug:
+ sql = sql.replace('--DEBUG:', '')
+
+ if config.get_bool('LIMIT_REINDEXING'):
+ sql = sql.replace('--LIMIT INDEXING:', '')
+
+ if not config.get_bool('USE_US_TIGER_DATA'):
+ sql = sql.replace('-- %NOTIGERDATA% ', '')
+
+ if not config.get_bool('USE_AUX_LOCATION_DATA'):
+ sql = sql.replace('-- %NOAUXDATA% ', '')
+
+ reverse_only = 'false' if conn.table_exists('search_name') else 'true'
+
+ return sql.replace('%REVERSE-ONLY%', reverse_only)
+
+
+def replace_partition_string(sql, partitions):
+ """ Replace a partition template with the actual partition code.
+ """
+ for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
+ repl = ''
+ for part in partitions:
+ repl += match.replace('-partition-', str(part))
+ sql = sql.replace(match, repl)
+
+ return sql
+
+def _get_partition_function_sql(conn, sql_dir):
+ """ Create functions that work on partition tables.
+ """
+ with conn.cursor() as cur:
+ cur.execute('SELECT distinct partition FROM country_name')
+ partitions = set([0])
+ for row in cur:
+ partitions.add(row[0])
+
+ with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
+ sql = fdesc.read()
+
+ return replace_partition_string(sql, sorted(partitions))
+
+def create_functions(conn, config, data_dir,
+ enable_diff_updates=True, enable_debug=False):
+ """ (Re)create the PL/pgSQL functions.
+ """
+ sql_dir = data_dir / 'sql'
+
+ sql = _get_standard_function_sql(conn, config, sql_dir,
+ enable_diff_updates, enable_debug)
+ sql += _get_partition_function_sql(conn, sql_dir)
+
+ with conn.cursor() as cur:
+ cur.execute(sql)
+
+ conn.commit()
diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py
new file mode 100644
index 00000000..c7d0d3e5
--- /dev/null
+++ b/nominatim/tools/replication.py
@@ -0,0 +1,119 @@
+"""
+Functions for updating a database from a replication source.
+"""
+import datetime as dt
+from enum import Enum
+import logging
+import time
+
+from osmium.replication.server import ReplicationServer
+from osmium import WriteHandler
+
+from ..db import status
+from .exec_utils import run_osm2pgsql
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+def init_replication(conn, base_url):
+ """ Set up replication for the server at the given base URL.
+ """
+ LOG.info("Using replication source: %s", base_url)
+ date = status.compute_database_date(conn)
+
+ # margin of error to make sure we get all data
+ date -= dt.timedelta(hours=3)
+
+ repl = ReplicationServer(base_url)
+
+ seq = repl.timestamp_to_sequence(date)
+
+ if seq is None:
+ LOG.fatal("Cannot reach the configured replication service '%s'.\n"
+ "Does the URL point to a directory containing OSM update data?",
+ base_url)
+ raise UsageError("Failed to reach replication service")
+
+ status.set_status(conn, date=date, seq=seq)
+
+ LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
+
+
+def check_for_updates(conn, base_url):
+ """ Check if new data is available from the replication service at the
+ given base URL.
+ """
+ _, seq, _ = status.get_status(conn)
+
+ if seq is None:
+ LOG.error("Replication not set up. "
+ "Please run 'nominatim replication --init' first.")
+ return 254
+
+ state = ReplicationServer(base_url).get_state_info()
+
+ if state is None:
+ LOG.error("Cannot get state for URL %s.", base_url)
+ return 253
+
+ if state.sequence <= seq:
+ LOG.warning("Database is up to date.")
+ return 2
+
+ LOG.warning("New data available (%i => %i).", seq, state.sequence)
+ return 0
+
+class UpdateState(Enum):
+ """ Possible states after an update has run.
+ """
+
+ UP_TO_DATE = 0
+ MORE_PENDING = 2
+ NO_CHANGES = 3
+
+
+def update(conn, options):
+ """ Update database from the next batch of data. Returns the state of
+ updates according to `UpdateState`.
+ """
+ startdate, startseq, indexed = status.get_status(conn)
+
+ if startseq is None:
+ LOG.error("Replication not set up. "
+ "Please run 'nominatim replication --init' first.")
+ raise UsageError("Replication not set up.")
+
+ if not indexed and options['indexed_only']:
+ LOG.info("Skipping update. There is data that needs indexing.")
+ return UpdateState.MORE_PENDING
+
+ last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
+ update_interval = dt.timedelta(seconds=options['update_interval'])
+ if last_since_update < update_interval:
+ duration = (update_interval - last_since_update).seconds
+ LOG.warning("Sleeping for %s sec before next update.", duration)
+ time.sleep(duration)
+
+ if options['import_file'].exists():
+ options['import_file'].unlink()
+
+ # Read updates into file.
+ repl = ReplicationServer(options['base_url'])
+
+ outhandler = WriteHandler(str(options['import_file']))
+ endseq = repl.apply_diffs(outhandler, startseq,
+ max_size=options['max_diff_size'] * 1024)
+ outhandler.close()
+
+ if endseq is None:
+ return UpdateState.NO_CHANGES
+
+ # Consume updates with osm2pgsql.
+ options['append'] = True
+ run_osm2pgsql(options)
+
+ # Write the current status to the file
+ endstate = repl.get_state_info(endseq)
+ status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False)
+
+ return UpdateState.UP_TO_DATE
diff --git a/nominatim/version.py b/nominatim/version.py
new file mode 100644
index 00000000..a2ddc9fa
--- /dev/null
+++ b/nominatim/version.py
@@ -0,0 +1,5 @@
+"""
+Version information for Nominatim.
+"""
+
+NOMINATIM_VERSION = "3.6.0"
diff --git a/settings/env.defaults b/settings/env.defaults
index fbad3e33..e2eda340 100644
--- a/settings/env.defaults
+++ b/settings/env.defaults
@@ -57,6 +57,9 @@ NOMINATIM_HTTP_PROXY_HOST=proxy.mydomain.com
NOMINATIM_HTTP_PROXY_PORT=3128
NOMINATIM_HTTP_PROXY_LOGIN=
NOMINATIM_HTTP_PROXY_PASSWORD=
+# Also set these standard environment variables.
+# HTTP_PROXY="http://user:pass@10.10.1.10:1080"
+# HTTPS_PROXY="http://user:pass@10.10.1.10:1080"
# Location of the osm2pgsql binary.
# When empty, osm2pgsql is expected to reside in the osm2pgsql directory in
@@ -64,10 +67,6 @@ NOMINATIM_HTTP_PROXY_PASSWORD=
# EXPERT ONLY. You should usually use the supplied osm2pgsql.
NOMINATIM_OSM2PGSQL_BINARY=
-# Location of pyosmium-get-changes.
-# Only needed when running updates.
-NOMINATIM_PYOSMIUM_BINARY=
-
# Directory where to find US Tiger data files to import.
# Used with setup.php --import-tiger-data. When unset, the data is expected
# to be located under 'data/tiger' in the source tree.
diff --git a/sql/tables.sql b/sql/tables.sql
index 5686bcd2..8647e304 100644
--- a/sql/tables.sql
+++ b/sql/tables.sql
@@ -1,6 +1,6 @@
drop table if exists import_status;
CREATE TABLE import_status (
- lastimportdate timestamp NOT NULL,
+ lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean
);
diff --git a/test/python/conftest.py b/test/python/conftest.py
index 1cc9ef9c..8b0ba145 100644
--- a/test/python/conftest.py
+++ b/test/python/conftest.py
@@ -1,26 +1,155 @@
+import itertools
import sys
from pathlib import Path
import psycopg2
+import psycopg2.extras
import pytest
+SRC_DIR = Path(__file__) / '..' / '..' / '..'
+
# always test against the source
-sys.path.insert(0, str((Path(__file__) / '..' / '..' / '..').resolve()))
+sys.path.insert(0, str(SRC_DIR.resolve()))
+
+from nominatim.config import Configuration
+from nominatim.db import connection
+
+class _TestingCursor(psycopg2.extras.DictCursor):
+ """ Extension to the DictCursor class that provides execution
+ short-cuts that simplify writing assertions.
+ """
+
+ def scalar(self, sql, params=None):
+ """ Execute a query with a single return value and return this value.
+ Raises an assertion when not exactly one row is returned.
+ """
+ self.execute(sql, params)
+ assert self.rowcount == 1
+ return self.fetchone()[0]
+
+ def row_set(self, sql, params=None):
+ """ Execute a query and return the result as a set of tuples.
+ """
+ self.execute(sql, params)
+ if self.rowcount == 1:
+ return set(tuple(self.fetchone()))
+
+ return set((tuple(row) for row in self))
@pytest.fixture
def temp_db(monkeypatch):
+ """ Create an empty database for the test. The database name is also
+ exported into NOMINATIM_DATABASE_DSN.
+ """
name = 'test_nominatim_python_unittest'
- with psycopg2.connect(database='postgres') as conn:
- conn.set_isolation_level(0)
- with conn.cursor() as cur:
- cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
- cur.execute('CREATE DATABASE {}'.format(name))
+ conn = psycopg2.connect(database='postgres')
+
+ conn.set_isolation_level(0)
+ with conn.cursor() as cur:
+ cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+ cur.execute('CREATE DATABASE {}'.format(name))
+
+ conn.close()
monkeypatch.setenv('NOMINATIM_DATABASE_DSN' , 'dbname=' + name)
yield name
- with psycopg2.connect(database='postgres') as conn:
- conn.set_isolation_level(0)
- with conn.cursor() as cur:
- cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+ conn = psycopg2.connect(database='postgres')
+
+ conn.set_isolation_level(0)
+ with conn.cursor() as cur:
+ cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+
+ conn.close()
+
+@pytest.fixture
+def temp_db_with_extensions(temp_db):
+ conn = psycopg2.connect(database=temp_db)
+ with conn.cursor() as cur:
+ cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;')
+ conn.commit()
+ conn.close()
+
+ return temp_db
+
+@pytest.fixture
+def temp_db_conn(temp_db):
+ """ Connection to the test database.
+ """
+ conn = connection.connect('dbname=' + temp_db)
+ yield conn
+ conn.close()
+
+
+@pytest.fixture
+def temp_db_cursor(temp_db):
+ """ Connection and cursor towards the test database. The connection will
+ be in auto-commit mode.
+ """
+ conn = psycopg2.connect('dbname=' + temp_db)
+ conn.set_isolation_level(0)
+ with conn.cursor(cursor_factory=_TestingCursor) as cur:
+ yield cur
+ conn.close()
+
+
+@pytest.fixture
+def def_config():
+ return Configuration(None, SRC_DIR.resolve() / 'settings')
+
+
+@pytest.fixture
+def status_table(temp_db_conn):
+ """ Create an empty version of the status table and
+ the status logging table.
+ """
+ with temp_db_conn.cursor() as cur:
+ cur.execute("""CREATE TABLE import_status (
+ lastimportdate timestamp with time zone NOT NULL,
+ sequence_id integer,
+ indexed boolean
+ )""")
+ cur.execute("""CREATE TABLE import_osmosis_log (
+ batchend timestamp,
+ batchseq integer,
+ batchsize bigint,
+ starttime timestamp,
+ endtime timestamp,
+ event text
+ )""")
+ temp_db_conn.commit()
+
+
+@pytest.fixture
+def place_table(temp_db_with_extensions, temp_db_conn):
+ """ Create an empty version of the place table.
+ """
+ with temp_db_conn.cursor() as cur:
+ cur.execute("""CREATE TABLE place (
+ osm_id int8 NOT NULL,
+ osm_type char(1) NOT NULL,
+ class text NOT NULL,
+ type text NOT NULL,
+ name hstore,
+ admin_level smallint,
+ address hstore,
+ extratags hstore,
+ geometry Geometry(Geometry,4326) NOT NULL)""")
+ temp_db_conn.commit()
+
+
+@pytest.fixture
+def place_row(place_table, temp_db_cursor):
+ """ A factory for rows in the place table. The table is created as a
+ prerequisite to the fixture.
+ """
+ idseq = itertools.count(1001)
+ def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
+ admin_level=None, address=None, extratags=None, geom=None):
+ temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
+ (osm_id or next(idseq), osm_type, cls, typ, names,
+ admin_level, address, extratags,
+ geom or 'SRID=4326;POINT(0 0 )'))
+
+ return _insert
diff --git a/test/python/test_cli.py b/test/python/test_cli.py
index 92b42372..cde84759 100644
--- a/test/python/test_cli.py
+++ b/test/python/test_cli.py
@@ -1,10 +1,21 @@
"""
Tests for command line interface wrapper.
+
+These tests just check that the various command line parameters route to the
+correct functionionality. They use a lot of monkeypatching to avoid executing
+the actual functions.
"""
+import datetime as dt
import psycopg2
import pytest
+import time
import nominatim.cli
+import nominatim.indexer.indexer
+import nominatim.tools.refresh
+import nominatim.tools.replication
+from nominatim.errors import UsageError
+from nominatim.db import status
def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module',
@@ -18,9 +29,9 @@ class MockParamCapture:
""" Mock that records the parameters with which a function was called
as well as the number of calls.
"""
- def __init__(self):
+ def __init__(self, retval=0):
self.called = 0
- self.return_value = 0
+ self.return_value = retval
def __call__(self, *args, **kwargs):
self.called += 1
@@ -54,7 +65,6 @@ def test_cli_help(capsys):
(('import', '--continue', 'load-data'), 'setup'),
(('freeze',), 'setup'),
(('special-phrases',), 'specialphrases'),
- (('replication',), 'update'),
(('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'),
(('check-database',), 'check_import_finished'),
@@ -82,14 +92,12 @@ def test_add_data_command(mock_run_legacy, name, oid):
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
- with psycopg2.connect(database=temp_db) as conn:
- with conn.cursor() as cur:
- cur.execute("CREATE TABLE import_status (indexed bool)")
+def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
+ temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = MockParamCapture()
- monkeypatch.setattr(nominatim.cli.Indexer, 'index_boundaries', bnd_mock)
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
rank_mock = MockParamCapture()
- monkeypatch.setattr(nominatim.cli.Indexer, 'index_by_rank', rank_mock)
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
assert 0 == call_nominatim('index', *params)
@@ -98,29 +106,115 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
@pytest.mark.parametrize("command,params", [
- ('postcodes', ('update.php', '--calculate-postcodes')),
- ('word-counts', ('update.php', '--recompute-word-counts')),
- ('address-levels', ('update.php', '--update-address-levels')),
- ('functions', ('setup.php',)),
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')),
])
-def test_refresh_command(mock_run_legacy, command, params):
+def test_refresh_legacy_command(mock_run_legacy, temp_db, command, params):
assert 0 == call_nominatim('refresh', '--' + command)
assert mock_run_legacy.called == 1
assert len(mock_run_legacy.last_args) >= len(params)
assert mock_run_legacy.last_args[:len(params)] == params
+@pytest.mark.parametrize("command,func", [
+ ('postcodes', 'update_postcodes'),
+ ('word-counts', 'recompute_word_counts'),
+ ('address-levels', 'load_address_levels_from_file'),
+ ('functions', 'create_functions'),
+ ])
+def test_refresh_command(monkeypatch, temp_db, command, func):
+ func_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
-def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
+ assert 0 == call_nominatim('refresh', '--' + command)
+ assert func_mock.called == 1
+
+
+def test_refresh_importance_computed_after_wiki_import(mock_run_legacy, temp_db):
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
assert mock_run_legacy.called == 2
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
+@pytest.mark.parametrize("params,func", [
+ (('--init', '--no-update-functions'), 'init_replication'),
+ (('--check-for-updates',), 'check_for_updates')
+ ])
+def test_replication_command(monkeypatch, temp_db, params, func):
+ func_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
+
+ assert 0 == call_nominatim('replication', *params)
+ assert func_mock.called == 1
+
+
+def test_replication_update_bad_interval(monkeypatch, temp_db):
+ monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
+
+ assert call_nominatim('replication') == 1
+
+
+def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
+ monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
+ 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
+
+ assert call_nominatim('replication') == 1
+
+
+@pytest.mark.parametrize("state, retval", [
+ (nominatim.tools.replication.UpdateState.UP_TO_DATE, 0),
+ (nominatim.tools.replication.UpdateState.NO_CHANGES, 3)
+ ])
+def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
+ status_table, state, retval):
+ status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
+ func_mock = MockParamCapture(retval=state)
+ monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
+
+ assert retval == call_nominatim('replication', '--once', '--no-index')
+
+
+def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table):
+ status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
+ states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
+ nominatim.tools.replication.UpdateState.UP_TO_DATE]
+ monkeypatch.setattr(nominatim.tools.replication, 'update',
+ lambda *args, **kwargs: states.pop())
+
+ index_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
+
+ with pytest.raises(IndexError):
+ call_nominatim('replication')
+
+ assert index_mock.called == 4
+
+
+def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table):
+ status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
+ states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
+ nominatim.tools.replication.UpdateState.UP_TO_DATE]
+ monkeypatch.setattr(nominatim.tools.replication, 'update',
+ lambda *args, **kwargs: states.pop())
+
+ index_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
+
+ sleep_mock = MockParamCapture()
+ monkeypatch.setattr(time, 'sleep', sleep_mock)
+
+ with pytest.raises(IndexError):
+ call_nominatim('replication')
+
+ assert index_mock.called == 2
+ assert sleep_mock.called == 1
+ assert sleep_mock.last_args[0] == 60
+
+
@pytest.mark.parametrize("params", [
('search', '--query', 'new'),
('reverse', '--lat', '0', '--lon', '0'),
diff --git a/test/python/test_config.py b/test/python/test_config.py
index bada9d86..4578be13 100644
--- a/test/python/test_config.py
+++ b/test/python/test_config.py
@@ -7,6 +7,7 @@ import tempfile
import pytest
from nominatim.config import Configuration
+from nominatim.errors import UsageError
DEFCFG_DIR = Path(__file__) / '..' / '..' / '..' / 'settings'
@@ -15,6 +16,7 @@ def test_no_project_dir():
assert config.DATABASE_WEBUSER == 'www-data'
+
def test_prefer_project_setting_over_default():
with tempfile.TemporaryDirectory() as project_dir:
with open(project_dir + '/.env', 'w') as envfile:
@@ -24,6 +26,7 @@ def test_prefer_project_setting_over_default():
assert config.DATABASE_WEBUSER == 'apache'
+
def test_prefer_os_environ_over_project_setting(monkeypatch):
with tempfile.TemporaryDirectory() as project_dir:
with open(project_dir + '/.env', 'w') as envfile:
@@ -35,6 +38,7 @@ def test_prefer_os_environ_over_project_setting(monkeypatch):
assert config.DATABASE_WEBUSER == 'nobody'
+
def test_get_os_env_add_defaults(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@@ -42,6 +46,7 @@ def test_get_os_env_add_defaults(monkeypatch):
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data'
+
def test_get_os_env_prefer_os_environ(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@@ -49,11 +54,13 @@ def test_get_os_env_prefer_os_environ(monkeypatch):
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
+
def test_get_libpq_dsn_convert_default():
config = Configuration(None, DEFCFG_DIR)
assert config.get_libpq_dsn() == 'dbname=nominatim'
+
def test_get_libpq_dsn_convert_php(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@@ -62,6 +69,19 @@ def test_get_libpq_dsn_convert_php(monkeypatch):
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
+
+@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"),
+ ("xy'z", "xy\\'z"),
+ ])
+def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
+ 'pgsql:dbname=gis;password={}'.format(val))
+
+ assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect)
+
+
def test_get_libpq_dsn_convert_libpq(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@@ -69,3 +89,68 @@ def test_get_libpq_dsn_convert_libpq(monkeypatch):
'host=localhost dbname=gis password=foo')
assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'
+
+
+@pytest.mark.parametrize("value,result",
+ [(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] +
+ [(x, False) for x in ('0', 'false', 'no', 'NO', 'x')])
+def test_get_bool(monkeypatch, value, result):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+ assert config.get_bool('FOOBAR') == result
+
+def test_get_bool_empty():
+ config = Configuration(None, DEFCFG_DIR)
+
+ assert config.DATABASE_MODULE_PATH == ''
+ assert config.get_bool('DATABASE_MODULE_PATH') == False
+
+
+@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
+ ('85762513444', 85762513444)])
+def test_get_int_success(monkeypatch, value, result):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+ assert config.get_int('FOOBAR') == result
+
+
+@pytest.mark.parametrize("value", ['1b', 'fg', '0x23'])
+def test_get_int_bad_values(monkeypatch, value):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+ with pytest.raises(UsageError):
+ config.get_int('FOOBAR')
+
+
+def test_get_int_empty():
+ config = Configuration(None, DEFCFG_DIR)
+
+ assert config.DATABASE_MODULE_PATH == ''
+
+ with pytest.raises(UsageError):
+ config.get_int('DATABASE_MODULE_PATH')
+
+
+def test_get_import_style_intern(monkeypatch):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street')
+
+ expected = DEFCFG_DIR / 'import-street.style'
+
+ assert config.get_import_style_file() == expected
+
+
+@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye'])
+def test_get_import_style_intern(monkeypatch, value):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
+
+ assert str(config.get_import_style_file()) == value
diff --git a/test/python/test_db_connection.py b/test/python/test_db_connection.py
new file mode 100644
index 00000000..ef1ae741
--- /dev/null
+++ b/test/python/test_db_connection.py
@@ -0,0 +1,32 @@
+"""
+Tests for specialised conenction and cursor classes.
+"""
+import pytest
+
+from nominatim.db.connection import connect
+
+@pytest.fixture
+def db(temp_db):
+ conn = connect('dbname=' + temp_db)
+ yield conn
+ conn.close()
+
+
+def test_connection_table_exists(db, temp_db_cursor):
+ assert db.table_exists('foobar') == False
+
+ temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
+
+ assert db.table_exists('foobar') == True
+
+
+def test_cursor_scalar(db, temp_db_cursor):
+ temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
+
+ with db.cursor() as cur:
+ assert cur.scalar('SELECT count(*) FROM dummy') == 0
+
+def test_cursor_scalar_many_rows(db):
+ with db.cursor() as cur:
+ with pytest.raises(RuntimeError):
+ cur.scalar('SELECT * FROM pg_tables')
diff --git a/test/python/test_db_status.py b/test/python/test_db_status.py
new file mode 100644
index 00000000..399a0036
--- /dev/null
+++ b/test/python/test_db_status.py
@@ -0,0 +1,114 @@
+"""
+Tests for status table manipulation.
+"""
+import datetime as dt
+
+import pytest
+
+import nominatim.db.status
+from nominatim.errors import UsageError
+
+def test_compute_database_date_place_empty(status_table, place_table, temp_db_conn):
+ with pytest.raises(UsageError):
+ nominatim.db.status.compute_database_date(temp_db_conn)
+
+OSM_NODE_DATA = """\
+
+
+
+
+"""
+
+def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
+ place_row(osm_type='N', osm_id=45673)
+
+ requested_url = []
+ def mock_url(url):
+ requested_url.append(url)
+ return OSM_NODE_DATA
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
+
+ date = nominatim.db.status.compute_database_date(temp_db_conn)
+
+ assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
+ assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
+
+
+def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
+ place_row(osm_type='N', osm_id=45673)
+
+ requested_url = []
+ def mock_url(url):
+ requested_url.append(url)
+ return ' 0
+
+def test_load_ranks_from_file(temp_db_conn, temp_db_cursor, tmp_path):
+ test_file = tmp_path / 'test_levels.json'
+ test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
+
+ load_address_levels_from_file(temp_db_conn, test_file)
+
+ assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
+
+
+def test_load_ranks_from_broken_file(temp_db_conn, tmp_path):
+ test_file = tmp_path / 'test_levels.json'
+ test_file.write_text('[{"tags":"place":{"sea":2}}}]')
+
+ with pytest.raises(json.decoder.JSONDecodeError):
+ load_address_levels_from_file(temp_db_conn, test_file)
+
+
+def test_load_ranks_country(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags": {"place": {"village": 14}}},
+ {"countries": ['de'],
+ "tags": {"place": {"village": 15}}},
+ {"countries": ['uk', 'us' ],
+ "tags": {"place": {"village": 16}}}
+ ])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'place', 'village', 14, 14),
+ ('de', 'place', 'village', 15, 15),
+ ('uk', 'place', 'village', 16, 16),
+ ('us', 'place', 'village', 16, 16),
+ ])
+
+
+def test_load_ranks_default_value(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags": {"boundary": {"": 28}}},
+ {"countries": ['hu'],
+ "tags": {"boundary": {"": 29}}}
+ ])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'boundary', None, 28, 28),
+ ('hu', 'boundary', None, 29, 29),
+ ])
+
+
+def test_load_ranks_multiple_keys(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags":
+ {"place": {"city": 14},
+ "boundary": {"administrative2" : 4}}
+ }])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'place', 'city', 14, 14),
+ (None, 'boundary', 'administrative2', 4, 4),
+ ])
+
+
+def test_load_ranks_address(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags":
+ {"place": {"city": 14,
+ "town" : [14, 13]}}
+ }])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'place', 'city', 14, 14),
+ (None, 'place', 'town', 14, 13),
+ ])
diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py
new file mode 100644
index 00000000..4807e64f
--- /dev/null
+++ b/test/python/test_tools_refresh_create_functions.py
@@ -0,0 +1,99 @@
+"""
+Tests for creating PL/pgSQL functions for Nominatim.
+"""
+from pathlib import Path
+import pytest
+
+from nominatim.db.connection import connect
+from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql
+
+SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve()
+
+@pytest.fixture
+def db(temp_db):
+ conn = connect('dbname=' + temp_db)
+ yield conn
+ conn.close()
+
+@pytest.fixture
+def db_with_tables(db):
+ with db.cursor() as cur:
+ for table in ('place', 'placex', 'location_postcode'):
+ cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table))
+
+ return db
+
+
+def test_standard_functions_replace_module_default(db, def_config):
+ def_config.project_dir = Path('.')
+ sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
+
+ assert sql
+ assert sql.find('{modulepath}') < 0
+ assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0
+
+
+def test_standard_functions_replace_module_custom(monkeypatch, db, def_config):
+ monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom')
+ sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
+
+ assert sql
+ assert sql.find('{modulepath}') < 0
+ assert sql.find("'custom/nominatim.so'") >= 0
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_diff(db_with_tables, def_config, enabled):
+ def_config.project_dir = Path('.')
+ sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False)
+
+ assert sql
+ assert (sql.find('%DIFFUPDATES%') < 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_debug(db_with_tables, def_config, enabled):
+ def_config.project_dir = Path('.')
+ sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled)
+
+ assert sql
+ assert (sql.find('--DEBUG') < 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled):
+ def_config.project_dir = Path('.')
+ monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no')
+ sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
+
+ assert sql
+ assert (sql.find('--LIMIT INDEXING') < 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled):
+ def_config.project_dir = Path('.')
+ monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no')
+ sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
+
+ assert sql
+ assert (sql.find('%NOTIGERDATA%') >= 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled):
+ def_config.project_dir = Path('.')
+ monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no')
+ sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
+
+ assert sql
+ assert (sql.find('%NOAUXDATA%') >= 0) == enabled
+
+
+def test_partition_function(temp_db_cursor, db, def_config):
+ temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)")
+
+ sql = _get_partition_function_sql(db, SQL_DIR)
+
+ assert sql
+ assert sql.find('-partition-') < 0
diff --git a/test/python/test_tools_replication.py b/test/python/test_tools_replication.py
new file mode 100644
index 00000000..156385ad
--- /dev/null
+++ b/test/python/test_tools_replication.py
@@ -0,0 +1,138 @@
+"""
+Tests for replication functionality.
+"""
+import datetime as dt
+import time
+
+import pytest
+from osmium.replication.server import OsmosisState
+
+import nominatim.tools.replication
+import nominatim.db.status as status
+from nominatim.errors import UsageError
+
+OSM_NODE_DATA = """\
+
+
+
+
+"""
+
+### init replication
+
+def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
+ place_row(osm_type='N', osm_id=100)
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
+
+ with pytest.raises(UsageError, match="Failed to reach replication service"):
+ nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
+
+
+def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
+ place_row(osm_type='N', osm_id=100)
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "timestamp_to_sequence",
+ lambda self, date: 234)
+
+ nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
+
+ temp_db_cursor.execute("SELECT * FROM import_status")
+
+ expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
+ assert temp_db_cursor.rowcount == 1
+ assert temp_db_cursor.fetchone() == [expected_date, 234, True]
+
+
+### checking for updates
+
+def test_check_for_updates_empty_status_table(status_table, temp_db_conn):
+ assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
+
+
+def test_check_for_updates_seq_not_set(status_table, temp_db_conn):
+ status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc))
+
+ assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
+
+
+def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
+ status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345)
+
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "get_state_info", lambda self: None)
+
+ assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
+
+
+@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)])
+def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
+ server_sequence, result):
+ date = dt.datetime.now(dt.timezone.utc)
+ status.set_status(temp_db_conn, date, seq=345)
+
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "get_state_info",
+ lambda self: OsmosisState(server_sequence, date))
+
+ assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
+
+
+### updating
+
+@pytest.fixture
+def update_options(tmpdir):
+ return dict(base_url='https://test.io',
+ indexed_only=False,
+ update_interval=3600,
+ import_file=tmpdir / 'foo.osm',
+ max_diff_size=1)
+
+def test_update_empty_status_table(status_table, temp_db_conn):
+ with pytest.raises(UsageError):
+ nominatim.tools.replication.update(temp_db_conn, {})
+
+
+def test_update_already_indexed(status_table, temp_db_conn):
+ status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False)
+
+ assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \
+ == nominatim.tools.replication.UpdateState.MORE_PENDING
+
+
+def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options):
+ date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1)
+ status.set_status(temp_db_conn, date, seq=34)
+
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "apply_diffs",
+ lambda *args, **kwargs: None)
+
+ sleeptime = []
+ monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
+
+ assert nominatim.tools.replication.update(temp_db_conn, update_options) \
+ == nominatim.tools.replication.UpdateState.NO_CHANGES
+
+ assert not sleeptime
+
+
+def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options):
+ date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30)
+ status.set_status(temp_db_conn, date, seq=34)
+
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "apply_diffs",
+ lambda *args, **kwargs: None)
+
+ sleeptime = []
+ monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
+
+ assert nominatim.tools.replication.update(temp_db_conn, update_options) \
+ == nominatim.tools.replication.UpdateState.NO_CHANGES
+
+ assert len(sleeptime) == 1
+ assert sleeptime[0] < 3600
+ assert sleeptime[0] > 0
diff --git a/utils/check_server_for_updates.py b/utils/check_server_for_updates.py
deleted file mode 100755
index bcc9d0ba..00000000
--- a/utils/check_server_for_updates.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-from osmium.replication import server
-
-if __name__ == '__main__':
- if len(sys.argv) != 3:
- print("Usage: python check_server_for_updates.py ")
- sys.exit(254)
-
- seqid = int(sys.argv[2])
-
- state = server.ReplicationServer(sys.argv[1]).get_state_info()
-
- if state is None:
- print("ERROR: Cannot get state from URL %s." % (sys.argv[1], ))
- sys.exit(253)
-
- if state.sequence <= seqid:
- print("Database up to date.")
- sys.exit(1)
-
- print("New data available (%i => %i)." % (seqid, state.sequence))
- sys.exit(0)
diff --git a/utils/osm_file_date.py b/utils/osm_file_date.py
deleted file mode 100755
index 0443e6ac..00000000
--- a/utils/osm_file_date.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-
-import osmium
-import sys
-import datetime
-
-
-class Datecounter(osmium.SimpleHandler):
-
- filedate = None
-
- def date(self, o):
- ts = o.timestamp
- if self.filedate is None or ts > self.filedate:
- self.filedate = ts
-
- node = date
- way = date
- relation = date
-
-
-if __name__ == '__main__':
- if len(sys.argv) != 2:
- print("Usage: python osm_file_date.py ")
- sys.exit(-1)
-
- h = Datecounter()
-
- h.apply_file(sys.argv[1])
-
- if h.filedate is None:
- exit(5)
-
- print(h.filedate)