Merge pull request #2186 from lonvia/port-import-to-python

Move setup procedure to Python
This commit is contained in:
Sarah Hoffmann
2021-02-27 12:09:23 +01:00
committed by GitHub
48 changed files with 1746 additions and 793 deletions

View File

@@ -6,7 +6,7 @@ runs:
steps:
- name: Install prerequisits
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil
shell: bash
- name: Download dependencies

11
.pylintrc Normal file
View File

@@ -0,0 +1,11 @@
[MASTER]
extension-pkg-whitelist=osmium
[MESSAGES CONTROL]
[TYPECHECK]
# closing added here because it sometimes triggers a false positive with
# 'with' statements.
ignored-classes=NominatimArgs,closing

View File

@@ -177,7 +177,7 @@ if (BUILD_TESTS)
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
COMMAND ${PYLINT} nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")

View File

@@ -41,10 +41,11 @@ For running Nominatim:
* [Python 3](https://www.python.org/) (3.5+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil] (https://github.com/giampaolo/psutil)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
( PHP-cgi (for running queries from the command line)
* PHP-cgi (for running queries from the command line)
For running continuous updates:

View File

@@ -48,7 +48,7 @@ class Shell
return join(' ', $aEscaped);
}
public function run()
public function run($bExitOnFail = false)
{
$sCmd = $this->escapedCmd();
// $aEnv does not need escaping, proc_open seems to handle it fine
@@ -67,6 +67,11 @@ class Shell
fclose($aPipes[0]); // no stdin
$iStat = proc_close($hProc);
if ($iStat != 0 && $bExitOnFail) {
exit($iStat);
}
return $iStat;
}

View File

@@ -56,6 +56,29 @@ setupHTTPProxy();
$bDidSomething = false;
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
// by default, use all but one processor, but never more than 15.
$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1));
function run($oCmd)
{
global $iInstances;
global $aCMDResult;
$oCmd->addParams('--threads', $iInstances);
if ($aCMDResult['ignore-errors'] ?? false) {
$oCmd->addParams('--ignore-errors');
}
if ($aCMDResult['quiet'] ?? false) {
$oCmd->addParams('--quiet');
}
if ($aCMDResult['verbose'] ?? false) {
$oCmd->addParams('--verbose');
}
$oCmd->run(true);
}
//*******************************************************
// Making some sanity check:
// Check if osm-file is set and points to a valid file
@@ -72,17 +95,30 @@ $oSetup = new SetupFunctions($aCMDResult);
// go through complete process if 'all' is selected or start selected functions
if ($aCMDResult['create-db'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createDB();
run((clone($oNominatimCmd))->addParams('transition', '--create-db'));
}
if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->setupDB();
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
if ($aCMDResult['no-partitions'] ?? false) {
$oCmd->addParams('--no-partitions');
}
run($oCmd);
}
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->importData($aCMDResult['osm-file']);
$oCmd = (clone($oNominatimCmd))
->addParams('transition', '--import-data')
->addParams('--osm-file', $aCMDResult['osm-file']);
if ($aCMDResult['drop'] ?? false) {
$oCmd->addParams('--drop');
}
run($oCmd);
}
if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
@@ -104,17 +140,18 @@ if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) {
if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createPartitionFunctions();
$oSetup->createFunctions(); // also create partition functions
}
if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->importWikipediaArticles();
// ignore errors!
(clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run();
}
if ($aCMDResult['load-data'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->loadData($aCMDResult['disable-token-precalc']);
run((clone($oNominatimCmd))->addParams('transition', '--load-data'));
}
if ($aCMDResult['import-tiger-data']) {
@@ -130,12 +167,17 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
if ($aCMDResult['index'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->index($aCMDResult['index-noanalyse']);
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--index');
if ($aCMDResult['index-noanalyse'] ?? false) {
$oCmd->addParams('--no-analyse');
}
run($oCmd);
}
if ($aCMDResult['drop']) {
$bDidSomething = true;
$oSetup->drop($aCMDResult);
run((clone($oNominatimCmd))->addParams('freeze'));
}
if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
@@ -150,7 +192,7 @@ if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
if ($aCMDResult['setup-website'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->setupWebsite();
run((clone($oNominatimCmd))->addParams('refresh', '--website'));
}
// ******************************************************

View File

@@ -104,11 +104,17 @@ if ($fPostgresVersion >= 11.0) {
}
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
if ($aResult['quiet']) {
$oNominatimCmd->addParams('--quiet');
}
if ($aResult['verbose']) {
$oNominatimCmd->addParams('--verbose');
function run($oCmd)
{
global $aCMDResult;
if ($aCMDResult['quiet'] ?? false) {
$oCmd->addParams('--quiet');
}
if ($aCMDResult['verbose'] ?? false) {
$oCmd->addParams('--verbose');
}
$oCmd->run(true);
}
@@ -119,7 +125,7 @@ if ($aResult['init-updates']) {
$oCmd->addParams('--no-update-functions');
}
$oCmd->run();
run($oCmd);
}
if ($aResult['check-for-updates']) {
@@ -147,7 +153,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
}
if ($aResult['calculate-postcodes']) {
(clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
run((clone($oNominatimCmd))->addParams('refresh', '--postcodes'));
}
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@@ -196,35 +202,21 @@ if ($bHaveDiff) {
}
if ($aResult['recompute-word-counts']) {
(clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
run((clone($oNominatimCmd))->addParams('refresh', '--word-counts'));
}
if ($aResult['index']) {
(clone $oNominatimCmd)
run((clone $oNominatimCmd)
->addParams('index', '--minrank', $aResult['index-rank'])
->addParams('--threads', $aResult['index-instances'])
->run();
->addParams('--threads', $aResult['index-instances']));
}
if ($aResult['update-address-levels']) {
(clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
run((clone($oNominatimCmd))->addParams('refresh', '--address-levels'));
}
if ($aResult['recompute-importance']) {
echo "Updating importance values for database.\n";
$oDB = new Nominatim\DB();
$oDB->connect();
$sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;';
$sSQL .= 'UPDATE placex SET (wikipedia, importance) =';
$sSQL .= ' (SELECT wikipedia, importance';
$sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));';
$sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance';
$sSQL .= ' FROM placex d';
$sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null';
$sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);';
$sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;';
$oDB->exec($sSQL);
run((clone($oNominatimCmd))->addParams('refresh', '--importance'));
}
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
@@ -240,5 +232,5 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
$oCmd->addParams('--no-index');
}
exit($oCmd->run());
run($oCmd);
}

View File

@@ -6,7 +6,6 @@ require_once(CONST_LibDir.'/Shell.php');
class SetupFunctions
{
protected $iCacheMemory;
protected $iInstances;
protected $aDSNInfo;
protected $bQuiet;
@@ -31,16 +30,6 @@ class SetupFunctions
warn('resetting threads to '.$this->iInstances);
}
if (isset($aCMDResult['osm2pgsql-cache'])) {
$this->iCacheMemory = $aCMDResult['osm2pgsql-cache'];
} elseif (getSetting('FLATNODE_FILE')) {
// When flatnode files are enabled then disable cache per default.
$this->iCacheMemory = 0;
} else {
// Otherwise: Assume we can steal all the cache memory in the box.
$this->iCacheMemory = getCacheMemoryMB();
}
// parse database string
$this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
if (!isset($this->aDSNInfo['port'])) {
@@ -84,156 +73,6 @@ class SetupFunctions
}
}
public function createDB()
{
info('Create DB');
$oDB = new \Nominatim\DB;
if ($oDB->checkConnection()) {
fail('database already exists ('.getSetting('DATABASE_DSN').')');
}
$oCmd = (new \Nominatim\Shell('createdb'))
->addParams('-E', 'UTF-8')
->addParams('-p', $this->aDSNInfo['port']);
if (isset($this->aDSNInfo['username'])) {
$oCmd->addParams('-U', $this->aDSNInfo['username']);
}
if (isset($this->aDSNInfo['password'])) {
$oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
}
if (isset($this->aDSNInfo['hostspec'])) {
$oCmd->addParams('-h', $this->aDSNInfo['hostspec']);
}
$oCmd->addParams($this->aDSNInfo['database']);
$result = $oCmd->run();
if ($result != 0) fail('Error executing external command: '.$oCmd->escapedCmd());
}
public function setupDB()
{
info('Setup DB');
$fPostgresVersion = $this->db()->getPostgresVersion();
echo 'Postgres version found: '.$fPostgresVersion."\n";
if ($fPostgresVersion < 9.03) {
fail('Minimum supported version of Postgresql is 9.3.');
}
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
$fPostgisVersion = $this->db()->getPostgisVersion();
echo 'Postgis version found: '.$fPostgisVersion."\n";
if ($fPostgisVersion < 2.2) {
echo "Minimum required Postgis version 2.2\n";
exit(1);
}
$sPgUser = getSetting('DATABASE_WEBUSER');
$i = $this->db()->getOne("select count(*) from pg_user where usename = '$sPgUser'");
if ($i == 0) {
echo "\nERROR: Web user '".$sPgUser."' does not exist. Create it with:\n";
echo "\n createuser ".$sPgUser."\n\n";
exit(1);
}
if (!getSetting('DATABASE_MODULE_PATH')) {
// If no custom module path is set then copy the module into the
// project directory, but only if it is not the same file already
// (aka we are running from the build dir).
$sDest = CONST_InstallDir.'/module';
if ($sDest != CONST_Default_ModulePath) {
if (!file_exists($sDest)) {
mkdir($sDest);
}
if (!copy(CONST_Default_ModulePath.'/nominatim.so', $sDest.'/nominatim.so')) {
echo "Failed to copy database module to $sDest.";
exit(1);
}
chmod($sDest.'/nominatim.so', 0755);
info("Database module installed at $sDest.");
} else {
info('Running from build directory. Leaving database module as is.');
}
} else {
info('Using database module from DATABASE_MODULE_PATH ('.getSetting('DATABASE_MODULE_PATH').').');
}
// Try accessing the C module, so we know early if something is wrong
$this->checkModulePresence(); // raises exception on failure
$this->pgsqlRunScriptFile(CONST_DataDir.'/country_name.sql');
$this->pgsqlRunScriptFile(CONST_DataDir.'/country_osm_grid.sql.gz');
if ($this->bNoPartitions) {
$this->pgsqlRunScript('update country_name set partition = 0');
}
}
public function importData($sOSMFile)
{
info('Import data');
if (!file_exists(getOsm2pgsqlBinary())) {
echo "Check NOMINATIM_OSM2PGSQL_BINARY in your local .env file.\n";
echo "Normally you should not need to set this manually.\n";
fail("osm2pgsql not found in '".getOsm2pgsqlBinary()."'");
}
$oCmd = new \Nominatim\Shell(getOsm2pgsqlBinary());
$oCmd->addParams('--style', getImportStyle());
if (getSetting('FLATNODE_FILE')) {
$oCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
}
if (getSetting('TABLESPACE_OSM_DATA')) {
$oCmd->addParams('--tablespace-slim-data', getSetting('TABLESPACE_OSM_DATA'));
}
if (getSetting('TABLESPACE_OSM_INDEX')) {
$oCmd->addParams('--tablespace-slim-index', getSetting('TABLESPACE_OSM_INDEX'));
}
if (getSetting('TABLESPACE_PLACE_DATA')) {
$oCmd->addParams('--tablespace-main-data', getSetting('TABLESPACE_PLACE_DATA'));
}
if (getSetting('TABLESPACE_PLACE_INDEX')) {
$oCmd->addParams('--tablespace-main-index', getSetting('TABLESPACE_PLACE_INDEX'));
}
$oCmd->addParams('--latlong', '--slim', '--create');
$oCmd->addParams('--output', 'gazetteer');
$oCmd->addParams('--hstore');
$oCmd->addParams('--number-processes', 1);
$oCmd->addParams('--with-forward-dependencies', 'false');
$oCmd->addParams('--log-progress', 'true');
$oCmd->addParams('--cache', $this->iCacheMemory);
$oCmd->addParams('--port', $this->aDSNInfo['port']);
if (isset($this->aDSNInfo['username'])) {
$oCmd->addParams('--username', $this->aDSNInfo['username']);
}
if (isset($this->aDSNInfo['password'])) {
$oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
}
if (isset($this->aDSNInfo['hostspec'])) {
$oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
}
$oCmd->addParams('--database', $this->aDSNInfo['database']);
$oCmd->addParams($sOSMFile);
$oCmd->run();
if (!$this->sIgnoreErrors && !$this->db()->getRow('select * from place limit 1')) {
fail('No Data');
}
if ($this->bDrop) {
$this->dropTable('planet_osm_nodes');
$this->removeFlatnodeFile();
}
}
public function createFunctions()
{
info('Create Functions');
@@ -280,153 +119,6 @@ class SetupFunctions
$this->pgsqlRunPartitionScript($sTemplate);
}
public function createPartitionFunctions()
{
info('Create Partition Functions');
$this->createSqlFunctions(); // also create partition functions
}
public function importWikipediaArticles()
{
$sWikiArticlePath = getSetting('WIKIPEDIA_DATA_PATH', CONST_InstallDir);
$sWikiArticlesFile = $sWikiArticlePath.'/wikimedia-importance.sql.gz';
if (file_exists($sWikiArticlesFile)) {
info('Importing wikipedia articles and redirects');
$this->dropTable('wikipedia_article');
$this->dropTable('wikipedia_redirect');
$this->pgsqlRunScriptFile($sWikiArticlesFile);
} else {
warn('wikipedia importance dump file not found - places will have default importance');
}
}
public function loadData($bDisableTokenPrecalc)
{
info('Drop old Data');
$oDB = $this->db();
$oDB->exec('TRUNCATE word');
echo '.';
$oDB->exec('TRUNCATE placex');
echo '.';
$oDB->exec('TRUNCATE location_property_osmline');
echo '.';
$oDB->exec('TRUNCATE place_addressline');
echo '.';
$oDB->exec('TRUNCATE location_area');
echo '.';
if (!$this->dbReverseOnly()) {
$oDB->exec('TRUNCATE search_name');
echo '.';
}
$oDB->exec('TRUNCATE search_name_blank');
echo '.';
$oDB->exec('DROP SEQUENCE seq_place');
echo '.';
$oDB->exec('CREATE SEQUENCE seq_place start 100000');
echo '.';
$sSQL = 'select distinct partition from country_name';
$aPartitions = $oDB->getCol($sSQL);
if (!$this->bNoPartitions) $aPartitions[] = 0;
foreach ($aPartitions as $sPartition) {
$oDB->exec('TRUNCATE location_road_'.$sPartition);
echo '.';
}
// used by getorcreate_word_id to ignore frequent partial words
$sSQL = 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS ';
$sSQL .= '$$ SELECT '.getSetting('MAX_WORD_FREQUENCY').' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE';
$oDB->exec($sSQL);
echo ".\n";
// pre-create the word list
if (!$bDisableTokenPrecalc) {
info('Loading word list');
$this->pgsqlRunScriptFile(CONST_DataDir.'/words.sql');
}
info('Load Data');
$sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry';
$aDBInstances = array();
$iLoadThreads = max(1, $this->iInstances - 1);
for ($i = 0; $i < $iLoadThreads; $i++) {
// https://secure.php.net/manual/en/function.pg-connect.php
$DSN = getSetting('DATABASE_DSN');
$DSN = preg_replace('/^pgsql:/', '', $DSN);
$DSN = preg_replace('/;/', ' ', $DSN);
$aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
pg_ping($aDBInstances[$i]);
}
for ($i = 0; $i < $iLoadThreads; $i++) {
$sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i";
$sSQL .= " and not (class='place' and type='houses' and osm_type='W'";
$sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')";
$sSQL .= ' and ST_IsValid(geometry)';
if ($this->bVerbose) echo "$sSQL\n";
if (!pg_send_query($aDBInstances[$i], $sSQL)) {
fail(pg_last_error($aDBInstances[$i]));
}
}
// last thread for interpolation lines
// https://secure.php.net/manual/en/function.pg-connect.php
$DSN = getSetting('DATABASE_DSN');
$DSN = preg_replace('/^pgsql:/', '', $DSN);
$DSN = preg_replace('/;/', ' ', $DSN);
$aDBInstances[$iLoadThreads] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
pg_ping($aDBInstances[$iLoadThreads]);
$sSQL = 'insert into location_property_osmline';
$sSQL .= ' (osm_id, address, linegeo)';
$sSQL .= ' SELECT osm_id, address, geometry from place where ';
$sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'";
if ($this->bVerbose) echo "$sSQL\n";
if (!pg_send_query($aDBInstances[$iLoadThreads], $sSQL)) {
fail(pg_last_error($aDBInstances[$iLoadThreads]));
}
$bFailed = false;
for ($i = 0; $i <= $iLoadThreads; $i++) {
while (($hPGresult = pg_get_result($aDBInstances[$i])) !== false) {
$resultStatus = pg_result_status($hPGresult);
// PGSQL_EMPTY_QUERY, PGSQL_COMMAND_OK, PGSQL_TUPLES_OK,
// PGSQL_COPY_OUT, PGSQL_COPY_IN, PGSQL_BAD_RESPONSE,
// PGSQL_NONFATAL_ERROR and PGSQL_FATAL_ERROR
// echo 'Query result ' . $i . ' is: ' . $resultStatus . "\n";
if ($resultStatus != PGSQL_COMMAND_OK && $resultStatus != PGSQL_TUPLES_OK) {
$resultError = pg_result_error($hPGresult);
echo '-- error text ' . $i . ': ' . $resultError . "\n";
$bFailed = true;
}
}
}
if ($bFailed) {
fail('SQL errors loading placex and/or location_property_osmline tables');
}
for ($i = 0; $i < $this->iInstances; $i++) {
pg_close($aDBInstances[$i]);
}
echo "\n";
info('Reanalysing database');
$this->pgsqlRunScript('ANALYSE');
$sDatabaseDate = getDatabaseDate($oDB);
$oDB->exec('TRUNCATE import_status');
if (!$sDatabaseDate) {
warn('could not determine database date.');
} else {
$sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')";
$oDB->exec($sSQL);
echo "Latest data imported from $sDatabaseDate.\n";
}
}
public function importTigerData($sTigerPath)
{
info('Import Tiger data');
@@ -560,49 +252,6 @@ class SetupFunctions
$this->db()->exec($sSQL);
}
public function index($bIndexNoanalyse)
{
$this->checkModulePresence(); // raises exception on failure
$oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
info('Index ranks 0 - 4');
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index administrative boundaries');
$oCmd = (clone $oBaseCmd)->addParams('--boundaries-only');
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
info('Index ranks 5 - 25');
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index ranks 26 - 30');
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
info('Index postcodes');
$sSQL = 'UPDATE location_postcode SET indexed_status = 0';
$this->db()->exec($sSQL);
}
public function createSearchIndices()
{
info('Create Search indices');
@@ -655,21 +304,6 @@ class SetupFunctions
$this->pgsqlRunScript($sSQL);
}
public function drop()
{
(clone($this->oNominatimCmd))->addParams('freeze')->run();
}
/**
* Setup the directory for the API scripts.
*
* @return null
*/
public function setupWebsite()
{
(clone($this->oNominatimCmd))->addParams('refresh', '--website')->run();
}
/**
* Return the connection to the database.
*
@@ -688,15 +322,6 @@ class SetupFunctions
return $this->oDB;
}
private function removeFlatnodeFile()
{
$sFName = getSetting('FLATNODE_FILE');
if ($sFName && file_exists($sFName)) {
if ($this->bVerbose) echo 'Deleting '.$sFName."\n";
unlink($sFName);
}
}
private function pgsqlRunScript($sScript, $bfatal = true)
{
runSQLScript(
@@ -720,7 +345,7 @@ class SetupFunctions
$oCmd->addParams('--enable-debug-statements');
}
$oCmd->run();
$oCmd->run(!$this->sIgnoreErrors);
}
private function pgsqlRunPartitionScript($sTemplate)

View File

@@ -12,6 +12,7 @@ from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_php_server
from .errors import UsageError
from . import clicmd
from .clicmd.args import NominatimArgs
LOG = logging.getLogger()
@@ -62,7 +63,8 @@ class CommandlineParser:
""" Parse the command line arguments of the program and execute the
appropriate subcommand.
"""
args = self.parser.parse_args(args=kwargs.get('cli_args'))
args = NominatimArgs()
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
if args.subcommand is None:
self.parser.print_help()
@@ -73,12 +75,14 @@ class CommandlineParser:
setattr(args, arg, Path(kwargs[arg]))
args.project_dir = Path(args.project_dir).resolve()
logging.basicConfig(stream=sys.stderr,
format='%(asctime)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
if 'cli_args' not in kwargs:
logging.basicConfig(stream=sys.stderr,
format='%(asctime)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
args.config = Configuration(args.project_dir, args.config_dir)
args.config = Configuration(args.project_dir, args.config_dir,
environ=kwargs.get('environ', os.environ))
log = logging.getLogger()
log.warning('Using project directory: %s', str(args.project_dir))
@@ -109,70 +113,6 @@ class CommandlineParser:
# pylint: disable=E0012,C0415
class SetupAll:
"""\
Create a new Nominatim database from an OSM file.
"""
@staticmethod
def add_args(parser):
group_name = parser.add_argument_group('Required arguments')
group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file',
help='OSM file to be imported.')
group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
group = parser.add_argument_group('Optional arguments')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--reverse-only', action='store_true',
help='Do not create tables and indexes for searching')
group.add_argument('--enable-debug-statements', action='store_true',
help='Include debug warning statements in SQL code')
group.add_argument('--no-partitions', action='store_true',
help="""Do not partition search indices
(speeds up import of single country extracts)""")
group.add_argument('--no-updates', action='store_true',
help="""Do not keep tables that are only needed for
updating the database later""")
group = parser.add_argument_group('Expert options')
group.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
group.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index')
@staticmethod
def run(args):
params = ['setup.php']
if args.osm_file:
params.extend(('--all', '--osm-file', args.osm_file))
else:
if args.continue_at == 'load-data':
params.append('--load-data')
if args.continue_at in ('load-data', 'indexing'):
params.append('--index')
params.extend(('--create-search-indices', '--create-country-names',
'--setup-website'))
if args.osm2pgsql_cache:
params.extend(('--osm2pgsql-cache', args.osm2pgsql_cache))
if args.reverse_only:
params.append('--reverse-only')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
if args.no_partitions:
params.append('--no-partitions')
if args.no_updates:
params.append('--drop')
if args.ignore_errors:
params.append('--ignore-errors')
if args.index_noanalyse:
params.append('--index-noanalyse')
return run_legacy_script(*params, nominatim_env=args)
class SetupSpecialPhrases:
"""\
Maintain special phrases.
@@ -330,7 +270,7 @@ def nominatim(**kwargs):
"""
parser = CommandlineParser('nominatim', nominatim.__doc__)
parser.add_subcommand('import', SetupAll)
parser.add_subcommand('import', clicmd.SetupAll)
parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication)
@@ -354,4 +294,6 @@ def nominatim(**kwargs):
else:
parser.parser.epilog = 'php-cgi not found. Query commands not available.'
parser.add_subcommand('transition', clicmd.AdminTransition)
return parser.run(**kwargs)

View File

@@ -2,9 +2,11 @@
Subcommand definitions for the command-line tool.
"""
from .setup import SetupAll
from .replication import UpdateReplication
from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
from .index import UpdateIndex
from .refresh import UpdateRefresh
from .admin import AdminFuncs
from .freeze import SetupFreeze
from .transition import AdminTransition

View File

@@ -54,9 +54,8 @@ class AdminFuncs:
if args.analyse_indexing:
LOG.warning('Analysing performance of indexing function')
from ..tools import admin
conn = connect(args.config.get_libpq_dsn())
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
return 0

27
nominatim/clicmd/args.py Normal file
View File

@@ -0,0 +1,27 @@
"""
Provides custom functions over command-line arguments.
"""
class NominatimArgs: # pylint: disable=too-few-public-methods
""" Customized namespace class for the nominatim command line tool
to receive the command-line arguments.
"""
def osm2pgsql_options(self, default_cache, default_threads):
""" Return the standard osm2pgsql options that can be derived
from the command line arguments. The resulting dict can be
further customized and then used in `run_osm2pgsql()`.
"""
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
osm2pgsql_style=self.config.get_import_style_file(),
threads=self.threads or default_threads,
dsn=self.config.get_libpq_dsn(),
flatnode_file=self.config.FLATNODE_FILE,
tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
slim_index=self.config.TABLESPACE_OSM_INDEX,
main_data=self.config.TABLESPACE_PLACE_DATA,
main_index=self.config.TABLESPACE_PLACE_INDEX
)
)

View File

@@ -29,9 +29,8 @@ class SetupFreeze:
def run(args):
from ..tools import freeze
conn = connect(args.config.get_libpq_dsn())
freeze.drop_update_tables(conn)
with connect(args.config.get_libpq_dsn()) as conn:
freeze.drop_update_tables(conn)
freeze.drop_flatnode_file(args.config.FLATNODE_FILE)
conn.close()
return 0

View File

@@ -1,7 +1,7 @@
"""
Implementation of the 'index' subcommand.
"""
import os
import psutil
from ..db import status
from ..db.connection import connect
@@ -11,14 +11,6 @@ from ..db.connection import connect
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
def _num_system_cpus():
try:
cpus = len(os.sched_getaffinity(0))
except NotImplementedError:
cpus = None
return cpus or os.cpu_count()
class UpdateIndex:
"""\
@@ -42,7 +34,7 @@ class UpdateIndex:
from ..indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or _num_system_cpus() or 1)
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)
@@ -51,8 +43,7 @@ class UpdateIndex:
if not args.no_boundaries and not args.boundaries_only \
and args.minrank == 0 and args.maxrank == 30:
conn = connect(args.config.get_libpq_dsn())
status.set_indexed(conn, True)
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
status.set_indexed(conn, True)
return 0

View File

@@ -5,7 +5,6 @@ import logging
from pathlib import Path
from ..db.connection import connect
from ..tools.exec_utils import run_legacy_script
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
@@ -50,37 +49,39 @@ class UpdateRefresh:
if args.postcodes:
LOG.warning("Update postcodes centroid")
conn = connect(args.config.get_libpq_dsn())
refresh.update_postcodes(conn, args.sqllib_dir)
conn.close()
refresh.update_postcodes(args.config.get_libpq_dsn(), args.sqllib_dir)
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
conn = connect(args.config.get_libpq_dsn())
refresh.recompute_word_counts(conn, args.sqllib_dir)
conn.close()
refresh.recompute_word_counts(args.config.get_libpq_dsn(), args.sqllib_dir)
if args.address_levels:
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
LOG.warning('Updating address levels from %s', cfg)
conn = connect(args.config.get_libpq_dsn())
refresh.load_address_levels_from_file(conn, cfg)
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
refresh.load_address_levels_from_file(conn, cfg)
if args.functions:
LOG.warning('Create functions')
conn = connect(args.config.get_libpq_dsn())
refresh.create_functions(conn, args.config, args.sqllib_dir,
args.diffs, args.enable_debug_statements)
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config, args.sqllib_dir,
args.diffs, args.enable_debug_statements)
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
or args.project_dir)
LOG.warning('Import wikipdia article importance from %s', data_path)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.fatal('FATAL: Wikipedia importance dump file not found')
return 1
# Attention: importance MUST come after wiki data import.
if args.importance:
run_legacy_script('update.php', '--recompute-importance',
nominatim_env=args, throw_on_fail=True)
LOG.warning('Update importance values for database')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.recompute_importance(conn)
if args.website:
webdir = args.project_dir / 'website'
LOG.warning('Setting up website directory at %s', webdir)

View File

@@ -17,17 +17,6 @@ LOG = logging.getLogger()
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415
def _osm2pgsql_options_from_args(args, default_cache, default_threads):
""" Set up the standard osm2pgsql from the command line arguments.
"""
return dict(osm2pgsql=args.osm2pgsql_path,
osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
osm2pgsql_style=args.config.get_import_style_file(),
threads=args.threads or default_threads,
dsn=args.config.get_libpq_dsn(),
flatnode_file=args.config.FLATNODE_FILE)
class UpdateReplication:
"""\
Update the database using an online replication service.
@@ -62,13 +51,12 @@ class UpdateReplication:
from ..tools import replication, refresh
LOG.warning("Initialising replication updates")
conn = connect(args.config.get_libpq_dsn())
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
if args.update_functions:
LOG.warning("Create functions")
refresh.create_functions(conn, args.config, args.sqllib_dir,
True, False)
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
if args.update_functions:
LOG.warning("Create functions")
refresh.create_functions(conn, args.config, args.sqllib_dir,
True, False)
return 0
@@ -76,10 +64,8 @@ class UpdateReplication:
def _check_for_updates(args):
from ..tools import replication
conn = connect(args.config.get_libpq_dsn())
ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
conn.close()
return ret
with connect(args.config.get_libpq_dsn()) as conn:
return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
@staticmethod
def _report_update(batchdate, start_import, start_index):
@@ -99,7 +85,7 @@ class UpdateReplication:
from ..tools import replication
from ..indexer.indexer import Indexer
params = _osm2pgsql_options_from_args(args, 2000, 1)
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
params.update(base_url=args.config.REPLICATION_URL,
update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
import_file=args.project_dir / 'osmosischange.osc',
@@ -122,13 +108,12 @@ class UpdateReplication:
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
while True:
conn = connect(args.config.get_libpq_dsn())
start = dt.datetime.now(dt.timezone.utc)
state = replication.update(conn, params)
if state is not replication.UpdateState.NO_CHANGES:
status.log_status(conn, start, 'import')
batchdate, _, _ = status.get_status(conn)
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
start = dt.datetime.now(dt.timezone.utc)
state = replication.update(conn, params)
if state is not replication.UpdateState.NO_CHANGES:
status.log_status(conn, start, 'import')
batchdate, _, _ = status.get_status(conn)
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
@@ -137,10 +122,9 @@ class UpdateReplication:
indexer.index_boundaries(0, 30)
indexer.index_by_rank(0, 30)
conn = connect(args.config.get_libpq_dsn())
status.set_indexed(conn, True)
status.log_status(conn, index_start, 'index')
conn.close()
with connect(args.config.get_libpq_dsn()) as conn:
status.set_indexed(conn, True)
status.log_status(conn, index_start, 'index')
else:
index_start = None

141
nominatim/clicmd/setup.py Normal file
View File

@@ -0,0 +1,141 @@
"""
Implementation of the 'import' subcommand.
"""
import logging
from pathlib import Path
import psutil
from ..tools.exec_utils import run_legacy_script
from ..db.connection import connect
from ..db import status
from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class SetupAll:
"""\
Create a new Nominatim database from an OSM file.
"""
@staticmethod
def add_args(parser):
group_name = parser.add_argument_group('Required arguments')
group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file', metavar='FILE',
help='OSM file to be imported.')
group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
group = parser.add_argument_group('Optional arguments')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--reverse-only', action='store_true',
help='Do not create tables and indexes for searching')
group.add_argument('--no-partitions', action='store_true',
help="""Do not partition search indices
(speeds up import of single country extracts)""")
group.add_argument('--no-updates', action='store_true',
help="""Do not keep tables that are only needed for
updating the database later""")
group = parser.add_argument_group('Expert options')
group.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
group.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index')
@staticmethod
def run(args): # pylint: disable=too-many-statements
from ..tools import database_import
from ..tools import refresh
from ..indexer.indexer import Indexer
if args.osm_file and not Path(args.osm_file).is_file():
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
raise UsageError('Cannot access file.')
if args.continue_at is None:
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
args.data_dir,
args.no_partitions,
rouser=args.config.DATABASE_WEBUSER)
LOG.warning('Installing database module')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.install_module(args.module_dir, args.project_dir,
args.config.DATABASE_MODULE_PATH,
conn=conn)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Create functions (1st pass)')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config, args.sqllib_dir,
False, False)
LOG.warning('Create tables')
params = ['setup.php', '--create-tables', '--create-partition-tables']
if args.reverse_only:
params.append('--reverse-only')
run_legacy_script(*params, nominatim_env=args)
LOG.warning('Create functions (2nd pass)')
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config, args.sqllib_dir,
False, False)
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Will be using default importances.')
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
args.data_dir,
args.threads or psutil.cpu_count() or 1)
LOG.warning('Calculate postcodes')
run_legacy_script('setup.php', '--calculate-postcodes', nominatim_env=args)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or psutil.cpu_count() or 1)
indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
params = ['setup.php', '--create-search-indices', '--create-country-names']
if args.no_updates:
params.append('--drop')
run_legacy_script(*params, nominatim_env=args)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
refresh.setup_website(webdir, args.phplib_dir, args.config)
with connect(args.config.get_libpq_dsn()) as conn:
try:
dbdate = status.compute_database_date(conn)
status.set_status(conn, dbdate)
LOG.info('Database is at %s.', dbdate)
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)
return 0

View File

@@ -0,0 +1,101 @@
"""
Implementation of the 'transition' subcommand.
This subcommand provides standins for functions that were available
through the PHP scripts but are now no longer directly accessible.
This module will be removed as soon as the transition phase is over.
"""
import logging
from pathlib import Path
from ..db.connection import connect
from ..db import status
from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class AdminTransition:
"""\
Internal functions for code transition. Do not use.
"""
@staticmethod
def add_args(parser):
group = parser.add_argument_group('Sub-functions')
group.add_argument('--create-db', action='store_true',
help='Create nominatim db')
group.add_argument('--setup-db', action='store_true',
help='Build a blank nominatim db')
group.add_argument('--import-data', action='store_true',
help='Import a osm file')
group.add_argument('--load-data', action='store_true',
help='Copy data to live tables from import table')
group.add_argument('--index', action='store_true',
help='Index the data')
group = parser.add_argument_group('Options')
group.add_argument('--no-partitions', action='store_true',
help='Do not partition search indices')
group.add_argument('--osm-file', metavar='FILE',
help='File to import')
group.add_argument('--drop', action='store_true',
help='Drop tables needed for updates, making the database readonly')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--no-analyse', action='store_true',
help='Do not perform analyse operations during index')
group.add_argument('--ignore-errors', action='store_true',
help="Ignore certain erros on import.")
@staticmethod
def run(args):
from ..tools import database_import
if args.create_db:
LOG.warning('Create DB')
database_import.create_db(args.config.get_libpq_dsn())
if args.setup_db:
LOG.warning('Setup DB')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.setup_extensions(conn)
database_import.install_module(args.module_dir, args.project_dir,
args.config.DATABASE_MODULE_PATH,
conn=conn)
database_import.import_base_data(args.config.get_libpq_dsn(),
args.data_dir, args.no_partitions)
if args.import_data:
LOG.warning('Import data')
if not args.osm_file:
raise UsageError('Missing required --osm-file argument')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.drop,
ignore_errors=args.ignore_errors)
if args.load_data:
LOG.warning('Load data')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
database_import.load_data(args.config.get_libpq_dsn(),
args.data_dir,
args.threads or 1)
with connect(args.config.get_libpq_dsn()) as conn:
try:
status.set_status(conn, status.compute_database_date(conn))
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)
if args.index:
LOG.warning('Indexing')
from ..indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1)
indexer.index_full()

View File

@@ -1,7 +1,9 @@
"""
Specialised connection and cursor functions.
"""
import contextlib
import logging
import os
import psycopg2
import psycopg2.extensions
@@ -9,6 +11,8 @@ import psycopg2.extras
from ..errors import UsageError
LOG = logging.getLogger()
class _Cursor(psycopg2.extras.DictCursor):
""" A cursor returning dict-like objects and providing specialised
execution functions.
@@ -17,8 +21,7 @@ class _Cursor(psycopg2.extras.DictCursor):
def execute(self, query, args=None): # pylint: disable=W0221
""" Query execution that logs the SQL query when debugging is enabled.
"""
logger = logging.getLogger()
logger.debug(self.mogrify(query, args).decode('utf-8'))
LOG.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
@@ -72,21 +75,98 @@ class _Connection(psycopg2.extensions.connection):
return True
def drop_table(self, name, if_exists=True):
""" Drop the table with the given name.
Set `if_exists` to False if a non-existant table should raise
an exception instead of just being ignored.
"""
with self.cursor() as cur:
cur.execute("""DROP TABLE {} "{}"
""".format('IF EXISTS' if if_exists else '', name))
self.commit()
def server_version_tuple(self):
""" Return the server version as a tuple of (major, minor).
Converts correctly for pre-10 and post-10 PostgreSQL versions.
"""
version = self.server_version
if version < 100000:
return (version / 10000, (version % 10000) / 100)
return (int(version / 10000), (version % 10000) / 100)
return (int(version / 10000), version % 10000)
def postgis_version_tuple(self):
""" Return the postgis version installed in the database as a
tuple of (major, minor). Assumes that the PostGIS extension
has been installed already.
"""
with self.cursor() as cur:
version = cur.scalar('SELECT postgis_lib_version()')
return tuple((int(x) for x in version.split('.')[:2]))
return (version / 10000, version % 10000)
def connect(dsn):
""" Open a connection to the database using the specialised connection
factory.
factory. The returned object may be used in conjunction with 'with'.
When used outside a context manager, use the `connection` attribute
to get the connection.
"""
try:
return psycopg2.connect(dsn, connection_factory=_Connection)
conn = psycopg2.connect(dsn, connection_factory=_Connection)
ctxmgr = contextlib.closing(conn)
ctxmgr.connection = conn
return ctxmgr
except psycopg2.OperationalError as err:
raise UsageError("Cannot connect to database: {}".format(err)) from err
# Translation from PG connection string parameters to PG environment variables.
# Derived from https://www.postgresql.org/docs/current/libpq-envars.html.
_PG_CONNECTION_STRINGS = {
'host': 'PGHOST',
'hostaddr': 'PGHOSTADDR',
'port': 'PGPORT',
'dbname': 'PGDATABASE',
'user': 'PGUSER',
'password': 'PGPASSWORD',
'passfile': 'PGPASSFILE',
'channel_binding': 'PGCHANNELBINDING',
'service': 'PGSERVICE',
'options': 'PGOPTIONS',
'application_name': 'PGAPPNAME',
'sslmode': 'PGSSLMODE',
'requiressl': 'PGREQUIRESSL',
'sslcompression': 'PGSSLCOMPRESSION',
'sslcert': 'PGSSLCERT',
'sslkey': 'PGSSLKEY',
'sslrootcert': 'PGSSLROOTCERT',
'sslcrl': 'PGSSLCRL',
'requirepeer': 'PGREQUIREPEER',
'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION',
'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
'gssencmode': 'PGGSSENCMODE',
'krbsrvname': 'PGKRBSRVNAME',
'gsslib': 'PGGSSLIB',
'connect_timeout': 'PGCONNECT_TIMEOUT',
'target_session_attrs': 'PGTARGETSESSIONATTRS',
}
def get_pg_env(dsn, base_env=None):
""" Return a copy of `base_env` with the environment variables for
PostgresSQL set up from the given database connection string.
If `base_env` is None, then the OS environment is used as a base
environment.
"""
env = dict(base_env if base_env is not None else os.environ)
for param, value in psycopg2.extensions.parse_dsn(dsn).items():
if param in _PG_CONNECTION_STRINGS:
env[_PG_CONNECTION_STRINGS[param]] = value
else:
LOG.error("Unknown connection parameter '%s' ignored.", param)
return env

View File

@@ -1,12 +1,59 @@
"""
Helper functions for handling DB accesses.
"""
import subprocess
import logging
import gzip
def execute_file(conn, fname):
""" Read an SQL file and run its contents against the given connection.
from .connection import get_pg_env
from ..errors import UsageError
LOG = logging.getLogger()
def _pipe_to_proc(proc, fdesc):
chunk = fdesc.read(2048)
while chunk and proc.poll() is None:
try:
proc.stdin.write(chunk)
except BrokenPipeError as exc:
raise UsageError("Failed to execute SQL file.") from exc
chunk = fdesc.read(2048)
return len(chunk)
def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None):
""" Read an SQL file and run its contents against the given database
using psql. Use `pre_code` and `post_code` to run extra commands
before or after executing the file. The commands are run within the
same session, so they may be used to wrap the file execution in a
transaction.
"""
with fname.open('r') as fdesc:
sql = fdesc.read()
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()
cmd = ['psql']
if not ignore_errors:
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
if not LOG.isEnabledFor(logging.INFO):
cmd.append('--quiet')
proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
try:
if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
if pre_code:
proc.stdin.write((pre_code + ';').encode('utf-8'))
if fname.suffix == '.gz':
with gzip.open(str(fname), 'rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
else:
with fname.open('rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
if remain == 0 and post_code:
proc.stdin.write((';' + post_code).encode('utf-8'))
finally:
proc.stdin.close()
ret = proc.wait()
if ret != 0 or remain > 0:
raise UsageError("Failed to execute SQL file.")

View File

@@ -61,8 +61,8 @@ class InterpolationRunner:
@staticmethod
def sql_index_place(ids):
return """UPDATE location_property_osmline
SET indexed_status = 0 WHERE place_id IN ({})"""\
.format(','.join((str(i) for i in ids)))
SET indexed_status = 0 WHERE place_id IN ({})
""".format(','.join((str(i) for i in ids)))
class BoundaryRunner:
""" Returns SQL commands for indexing the administrative boundaries
@@ -79,57 +79,171 @@ class BoundaryRunner:
return """SELECT count(*) FROM placex
WHERE indexed_status > 0
AND rank_search = {}
AND class = 'boundary' and type = 'administrative'""".format(self.rank)
AND class = 'boundary' and type = 'administrative'
""".format(self.rank)
def sql_get_objects(self):
return """SELECT place_id FROM placex
WHERE indexed_status > 0 and rank_search = {}
and class = 'boundary' and type = 'administrative'
ORDER BY partition, admin_level""".format(self.rank)
ORDER BY partition, admin_level
""".format(self.rank)
@staticmethod
def sql_index_place(ids):
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
.format(','.join((str(i) for i in ids)))
class PostcodeRunner:
""" Provides the SQL commands for indexing the location_postcode table.
"""
@staticmethod
def name():
return "postcodes (location_postcode)"
@staticmethod
def sql_count_objects():
return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
@staticmethod
def sql_get_objects():
return """SELECT place_id FROM location_postcode
WHERE indexed_status > 0
ORDER BY country_code, postcode"""
@staticmethod
def sql_index_place(ids):
return """UPDATE location_postcode SET indexed_status = 0
WHERE place_id IN ({})
""".format(','.join((str(i) for i in ids)))
def _analyse_db_if(conn, condition):
if condition:
with conn.cursor() as cur:
cur.execute('ANALYSE')
class Indexer:
""" Main indexing routine.
"""
def __init__(self, dsn, num_threads):
self.conn = psycopg2.connect(dsn)
self.threads = [DBConnection(dsn) for _ in range(num_threads)]
self.dsn = dsn
self.num_threads = num_threads
self.conn = None
self.threads = []
def _setup_connections(self):
self.conn = psycopg2.connect(self.dsn)
self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)]
def _close_connections(self):
if self.conn:
self.conn.close()
self.conn = None
for thread in self.threads:
thread.close()
self.threads = []
def index_full(self, analyse=True):
""" Index the complete database. This will first index boudnaries
followed by all other objects. When `analyse` is True, then the
database will be analysed at the appropriate places to
ensure that database statistics are updated.
"""
conn = psycopg2.connect(self.dsn)
try:
self.index_by_rank(0, 4)
_analyse_db_if(conn, analyse)
self.index_boundaries(0, 30)
_analyse_db_if(conn, analyse)
self.index_by_rank(5, 25)
_analyse_db_if(conn, analyse)
self.index_by_rank(26, 30)
_analyse_db_if(conn, analyse)
self.index_postcodes()
_analyse_db_if(conn, analyse)
finally:
conn.close()
def index_boundaries(self, minrank, maxrank):
""" Index only administrative boundaries within the given rank range.
"""
LOG.warning("Starting indexing boundaries using %s threads",
len(self.threads))
self.num_threads)
for rank in range(max(minrank, 4), min(maxrank, 26)):
self.index(BoundaryRunner(rank))
self._setup_connections()
try:
for rank in range(max(minrank, 4), min(maxrank, 26)):
self.index(BoundaryRunner(rank))
finally:
self._close_connections()
def index_by_rank(self, minrank, maxrank):
""" Run classic indexing by rank.
""" Index all entries of placex in the given rank range (inclusive)
in order of their address rank.
When rank 30 is requested then also interpolations and
places with address rank 0 will be indexed.
"""
maxrank = min(maxrank, 30)
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
minrank, maxrank, len(self.threads))
minrank, maxrank, self.num_threads)
for rank in range(max(1, minrank), maxrank):
self.index(RankRunner(rank))
self._setup_connections()
if maxrank == 30:
self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
self.index(RankRunner(30), 20)
else:
self.index(RankRunner(maxrank))
try:
for rank in range(max(1, minrank), maxrank):
self.index(RankRunner(rank))
if maxrank == 30:
self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
self.index(RankRunner(30), 20)
else:
self.index(RankRunner(maxrank))
finally:
self._close_connections()
def index_postcodes(self):
"""Index the entries ofthe location_postcode table.
"""
LOG.warning("Starting indexing postcodes using %s threads", self.num_threads)
self._setup_connections()
try:
self.index(PostcodeRunner(), 20)
finally:
self._close_connections()
def update_status_table(self):
""" Update the status in the status table to 'indexed'.
"""
with self.conn.cursor() as cur:
cur.execute('UPDATE import_status SET indexed = true')
self.conn.commit()
conn = psycopg2.connect(self.dsn)
try:
with conn.cursor() as cur:
cur.execute('UPDATE import_status SET indexed = true')
conn.commit()
finally:
conn.close()
def index(self, obj, batch=1):
""" Index a single rank or table. `obj` describes the SQL to use

View File

@@ -60,7 +60,7 @@ def check_database(config):
""" Run a number of checks on the database and return the status.
"""
try:
conn = connect(config.get_libpq_dsn())
conn = connect(config.get_libpq_dsn()).connection
except UsageError as err:
conn = _BadConnection(str(err))

View File

@@ -0,0 +1,260 @@
"""
Functions for setting up and importing a new Nominatim database.
"""
import logging
import os
import selectors
import subprocess
import shutil
from pathlib import Path
import psutil
import psycopg2
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
from ..db.async_connection import DBConnection
from .exec_utils import run_osm2pgsql
from ..errors import UsageError
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None):
""" Create a new database for Nominatim and populate it with the
essential extensions and data.
"""
LOG.warning('Creating database')
create_db(dsn, rouser)
LOG.warning('Setting up database')
with connect(dsn) as conn:
setup_extensions(conn)
LOG.warning('Loading basic data')
import_base_data(dsn, data_dir, no_partitions)
def create_db(dsn, rouser=None):
""" Create a new database for the given DSN. Fails when the database
already exists or the PostgreSQL version is too old.
Uses `createdb` to create the database.
If 'rouser' is given, then the function also checks that the user
with that given name exists.
Requires superuser rights by the caller.
"""
proc = subprocess.run(['createdb'], env=get_pg_env(dsn), check=False)
if proc.returncode != 0:
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
postgres_version = conn.server_version_tuple()
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1],
postgres_version[0], postgres_version[1])
raise UsageError('PostgreSQL server is too old.')
if rouser is not None:
with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
LOG.fatal("Web user '%s' does not exists. Create it with:\n"
"\n createuser %s", rouser, rouser)
raise UsageError('Missing read-only user.')
def setup_extensions(conn):
""" Set up all extensions needed for Nominatim. Also checks that the
versions of the extensions are sufficient.
"""
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
conn.commit()
postgis_version = conn.postgis_version_tuple()
if postgis_version < POSTGIS_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of PostGIS is %d.%d. '
'Found version %d.%d.',
POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1],
postgis_version[0], postgis_version[1])
raise UsageError('PostGIS version is too old.')
def install_module(src_dir, project_dir, module_dir, conn=None):
""" Copy the normalization module from src_dir into the project
directory under the '/module' directory. If 'module_dir' is set, then
use the module from there instead and check that it is accessible
for Postgresql.
The function detects when the installation is run from the
build directory. It doesn't touch the module in that case.
If 'conn' is given, then the function also tests if the module
can be access via the given database.
"""
if not module_dir:
module_dir = project_dir / 'module'
if not module_dir.exists() or not src_dir.samefile(module_dir):
if not module_dir.exists():
module_dir.mkdir()
destfile = module_dir / 'nominatim.so'
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
destfile.chmod(0o755)
LOG.info('Database module installed at %s', str(destfile))
else:
LOG.info('Running from build directory. Leaving database module as is.')
else:
LOG.info("Using custom path for database module at '%s'", module_dir)
if conn is not None:
with conn.cursor() as cur:
try:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS '{}/nominatim.so', 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
""".format(module_dir))
except psycopg2.DatabaseError as err:
LOG.fatal("Error accessing database module: %s", err)
raise UsageError("Database module cannot be accessed.") from err
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0')
conn.commit()
def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
""" Import the given OSM file. 'options' contains the list of
default settings for osm2pgsql.
"""
options['import_file'] = osm_file
options['append'] = False
options['threads'] = 1
if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
# Make some educated guesses about cache size based on the size
# of the import file and the available memory.
mem = psutil.virtual_memory()
fsize = os.stat(str(osm_file)).st_size
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
fsize * 2) / 1024 / 1024) + 1
run_osm2pgsql(options)
with connect(options['dsn']) as conn:
if not ignore_errors:
with conn.cursor() as cur:
cur.execute('SELECT * FROM place LIMIT 1')
if cur.rowcount == 0:
raise UsageError('No data imported by osm2pgsql.')
if drop:
conn.drop_table('planet_osm_nodes')
if drop:
if options['flatnode_file']:
Path(options['flatnode_file']).unlink()
def truncate_data_tables(conn, max_word_frequency=None):
""" Truncate all data tables to prepare for a fresh load.
"""
with conn.cursor() as cur:
cur.execute('TRUNCATE word')
cur.execute('TRUNCATE placex')
cur.execute('TRUNCATE place_addressline')
cur.execute('TRUNCATE location_area')
cur.execute('TRUNCATE location_area_country')
cur.execute('TRUNCATE location_property')
cur.execute('TRUNCATE location_property_tiger')
cur.execute('TRUNCATE location_property_osmline')
cur.execute('TRUNCATE location_postcode')
if conn.table_exists('search_name'):
cur.execute('TRUNCATE search_name')
cur.execute('DROP SEQUENCE IF EXISTS seq_place')
cur.execute('CREATE SEQUENCE seq_place start 100000')
cur.execute("""SELECT tablename FROM pg_tables
WHERE tablename LIKE 'location_road_%'""")
for table in [r[0] for r in list(cur)]:
cur.execute('TRUNCATE ' + table)
if max_word_frequency is not None:
# Used by getorcreate_word_id to ignore frequent partial words.
cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
RETURNS integer AS $$
SELECT {} as maxwordfreq;
$$ LANGUAGE SQL IMMUTABLE
""".format(max_word_frequency))
conn.commit()
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
def load_data(dsn, data_dir, threads):
""" Copy data into the word and placex table.
"""
# Pre-calculate the most important terms in the word list.
db_utils.execute_file(dsn, data_dir / 'words.sql')
sel = selectors.DefaultSelector()
# Then copy data from place to placex in <threads - 1> chunks.
place_threads = max(1, threads - 1)
for imod in range(place_threads):
conn = DBConnection(dsn)
conn.connect()
conn.perform("""INSERT INTO placex ({0})
SELECT {0} FROM place
WHERE osm_id % {1} = {2}
AND NOT (class='place' and type='houses')
AND ST_IsValid(geometry)
""".format(_COPY_COLUMNS, place_threads, imod))
sel.register(conn, selectors.EVENT_READ, conn)
# Address interpolations go into another table.
conn = DBConnection(dsn)
conn.connect()
conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
SELECT osm_id, address, geometry FROM place
WHERE class='place' and type='houses' and osm_type='W'
and ST_GeometryType(geometry) = 'ST_LineString'
""")
sel.register(conn, selectors.EVENT_READ, conn)
# Now wait for all of them to finish.
todo = place_threads + 1
while todo > 0:
for key, _ in sel.select(1):
conn = key.data
sel.unregister(conn)
conn.wait()
conn.close()
todo -= 1
print('.', end='', flush=True)
print('\n')
with connect(dsn) as conn:
with conn.cursor() as cur:
cur.execute('ANALYSE')

View File

@@ -2,14 +2,12 @@
Helper functions for executing external programs.
"""
import logging
import os
import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
from psycopg2.extensions import parse_dsn
from ..version import NOMINATIM_VERSION
from ..db.connection import get_pg_env
LOG = logging.getLogger()
@@ -100,7 +98,7 @@ def run_php_server(server_address, base_dir):
def run_osm2pgsql(options):
""" Run osm2pgsql with the given options.
"""
env = os.environ
env = get_pg_env(options['dsn'])
cmd = [options['osm2pgsql'],
'--hstore', '--latlon', '--slim',
'--with-forward-dependencies', 'false',
@@ -112,20 +110,18 @@ def run_osm2pgsql(options):
]
if options['append']:
cmd.append('--append')
else:
cmd.append('--create')
if options['flatnode_file']:
cmd.extend(('--flat-nodes', options['flatnode_file']))
dsn = parse_dsn(options['dsn'])
if 'password' in dsn:
env['PGPASSWORD'] = dsn['password']
if 'dbname' in dsn:
cmd.extend(('-d', dsn['dbname']))
if 'user' in dsn:
cmd.extend(('--username', dsn['user']))
for param in ('host', 'port'):
if param in dsn:
cmd.extend(('--' + param, dsn[param]))
for key, param in (('slim_data', '--tablespace-slim-data'),
('slim_index', '--tablespace-slim-index'),
('main_data', '--tablespace-main-data'),
('main_index', '--tablespace-main-index')):
if options['tablespaces'][key]:
cmd.extend((param, options['tablespaces'][key]))
if options.get('disable_jit', False):
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'

View File

@@ -12,17 +12,17 @@ from ..db.utils import execute_file
LOG = logging.getLogger()
def update_postcodes(conn, sql_dir):
def update_postcodes(dsn, sql_dir):
""" Recalculate postcode centroids and add, remove and update entries in the
location_postcode table. `conn` is an opne connection to the database.
"""
execute_file(conn, sql_dir / 'update-postcodes.sql')
execute_file(dsn, sql_dir / 'update-postcodes.sql')
def recompute_word_counts(conn, sql_dir):
def recompute_word_counts(dsn, sql_dir):
""" Compute the frequency of full-word search terms.
"""
execute_file(conn, sql_dir / 'words_from_search_name.sql')
execute_file(dsn, sql_dir / 'words_from_search_name.sql')
def _add_address_level_rows_from_entry(rows, entry):
@@ -200,6 +200,53 @@ PHP_CONST_DEFS = (
)
def import_wikipedia_articles(dsn, data_path, ignore_errors=False):
""" Replaces the wikipedia importance tables with new data.
The import is run in a single transaction so that the new data
is replace seemlessly.
Returns 0 if all was well and 1 if the importance file could not
be found. Throws an exception if there was an error reading the file.
"""
datafile = data_path / 'wikimedia-importance.sql.gz'
if not datafile.exists():
return 1
pre_code = """BEGIN;
DROP TABLE IF EXISTS "wikipedia_article";
DROP TABLE IF EXISTS "wikipedia_redirect"
"""
post_code = "COMMIT"
execute_file(dsn, datafile, ignore_errors=ignore_errors,
pre_code=pre_code, post_code=post_code)
return 0
def recompute_importance(conn):
""" Recompute wikipedia links and importance for all entries in placex.
This is a long-running operations that must not be executed in
parallel with updates.
"""
with conn.cursor() as cur:
cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
cur.execute("""
UPDATE placex SET (wikipedia, importance) =
(SELECT wikipedia, importance
FROM compute_importance(extratags, country_code, osm_type, osm_id))
""")
cur.execute("""
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
FROM placex d
WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
and (s.wikipedia is null or s.importance < d.importance);
""")
cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
conn.commit()
def setup_website(basedir, phplib_dir, config):
""" Create the website script stubs.
"""

View File

@@ -3,3 +3,6 @@ Version information for Nominatim.
"""
NOMINATIM_VERSION = "3.6.0"
POSTGRESQL_REQUIRED_VERSION = (9, 3)
POSTGIS_REQUIRED_VERSION = (2, 2)

View File

@@ -10,5 +10,8 @@ bdd-no-test-db:
php:
cd php && phpunit ./
python:
pytest python
.PHONY: bdd php no-test-db

View File

@@ -7,6 +7,7 @@ import psycopg2.extras
sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
from nominatim import cli
from nominatim.config import Configuration
from nominatim.tools import refresh
from steps.utils import run_script
@@ -88,18 +89,18 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
self.test_env['NOMINATIM_DATADIR'] = self.src_dir / 'data'
self.test_env['NOMINATIM_SQLDIR'] = self.src_dir / 'lib-sql'
self.test_env['NOMINATIM_CONFIGDIR'] = self.src_dir / 'settings'
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = self.build_dir / 'module'
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
else:
# avoid module being copied into the temporary environment
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
if self.website_dir is not None:
self.website_dir.cleanup()
@@ -182,9 +183,9 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
try:
self.run_setup_script('all', osm_file=self.api_test_file)
self.run_nominatim('import', '--osm-file', str(self.api_test_file))
self.run_setup_script('import-tiger-data')
self.run_setup_script('drop')
self.run_nominatim('freeze')
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
@@ -249,12 +250,25 @@ class NominatimEnvironment:
"""
with db.cursor() as cur:
while True:
self.run_update_script('index')
self.run_nominatim('index')
cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
if cur.rowcount == 0:
return
def run_nominatim(self, *cmdline):
""" Run the nominatim command-line tool via the library.
"""
cli.nominatim(module_dir='',
osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
phplib_dir=str(self.src_dir / 'lib-php'),
sqllib_dir=str(self.src_dir / 'lib-sql'),
data_dir=str(self.src_dir / 'data'),
config_dir=str(self.src_dir / 'settings'),
cli_args=cmdline,
phpcgi_path='',
environ=self.test_env)
def run_setup_script(self, *args, **kwargs):
""" Run the Nominatim setup script with the given arguments.
"""
@@ -285,7 +299,7 @@ class NominatimEnvironment:
""" Copy data from place to the placex and location_property_osmline
tables invoking the appropriate triggers.
"""
self.run_setup_script('create-functions', 'create-partition-functions')
self.run_nominatim('refresh', '--functions', '--no-diff-updates')
with db.cursor() as cur:
cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,

View File

@@ -5,6 +5,7 @@ import psycopg2.extras
from place_inserter import PlaceColumn
from table_compare import NominatimID, DBRow
from nominatim.indexer.indexer import Indexer
def check_database_integrity(context):
""" Check some generic constraints on the tables.
@@ -85,7 +86,12 @@ def import_and_index_data_from_place_table(context):
""" Import data previously set up in the place table.
"""
context.nominatim.copy_from_place(context.db)
context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse')
context.nominatim.run_setup_script('calculate-postcodes')
# Call directly as the refresh function does not include postcodes.
indexer = Indexer(context.nominatim.test_env['NOMINATIM_DATABASE_DSN'][6:], 1)
indexer.index_full(analyse=False)
check_database_integrity(context)
@when("updating places")
@@ -93,8 +99,7 @@ def update_place_table(context):
""" Update the place table with the given data. Also runs all triggers
related to updates and reindexes the new data.
"""
context.nominatim.run_setup_script(
'create-functions', 'create-partition-functions', 'enable-diff-updates')
context.nominatim.run_nominatim('refresh', '--functions')
with context.db.cursor() as cur:
for row in context.table:
PlaceColumn(context).add_row(row, False).db_insert(cur)
@@ -106,7 +111,7 @@ def update_place_table(context):
def update_postcodes(context):
""" Rerun the calculation of postcodes.
"""
context.nominatim.run_update_script('calculate-postcodes')
context.nominatim.run_nominatim('refresh', '--postcodes')
@when("marking for delete (?P<oids>.*)")
def delete_places(context, oids):
@@ -114,8 +119,7 @@ def delete_places(context, oids):
separated by commas. Also runs all triggers
related to updates and reindexes the new data.
"""
context.nominatim.run_setup_script(
'create-functions', 'create-partition-functions', 'enable-diff-updates')
context.nominatim.run_nominatim('refresh', '--functions')
with context.db.cursor() as cur:
for oid in oids.split(','):
NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')

View File

@@ -75,9 +75,8 @@ def update_from_osm_file(context):
The data is expected as attached text in OPL format.
"""
context.nominatim.copy_from_place(context.db)
context.nominatim.run_setup_script('index', 'index-noanalyse')
context.nominatim.run_setup_script('create-functions', 'create-partition-functions',
'enable-diff-updates')
context.nominatim.run_nominatim('index')
context.nominatim.run_nominatim('refresh', '--functions')
# create an OSM file and import it
fname = write_opl_file(context.text, context.osm)

View File

@@ -43,6 +43,11 @@ class _TestingCursor(psycopg2.extras.DictCursor):
WHERE tablename = %s""", (table, ))
return num == 1
def table_rows(self, table):
""" Return the number of rows in the given table.
"""
return self.scalar('SELECT count(*) FROM ' + table)
@pytest.fixture
def temp_db(monkeypatch):
@@ -71,6 +76,12 @@ def temp_db(monkeypatch):
conn.close()
@pytest.fixture
def dsn(temp_db):
return 'dbname=' + temp_db
@pytest.fixture
def temp_db_with_extensions(temp_db):
conn = psycopg2.connect(database=temp_db)
@@ -85,9 +96,8 @@ def temp_db_with_extensions(temp_db):
def temp_db_conn(temp_db):
""" Connection to the test database.
"""
conn = connection.connect('dbname=' + temp_db)
yield conn
conn.close()
with connection.connect('dbname=' + temp_db) as conn:
yield conn
@pytest.fixture
@@ -102,10 +112,25 @@ def temp_db_cursor(temp_db):
conn.close()
@pytest.fixture
def table_factory(temp_db_cursor):
def mk_table(name, definition='id INT', content=None):
temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
if content is not None:
if not isinstance(content, str):
content = '),('.join([str(x) for x in content])
temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
return mk_table
@pytest.fixture
def def_config():
return Configuration(None, SRC_DIR.resolve() / 'settings')
@pytest.fixture
def src_dir():
return SRC_DIR.resolve()
@pytest.fixture
def status_table(temp_db_conn):
@@ -158,7 +183,7 @@ def place_row(place_table, temp_db_cursor):
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
(osm_id or next(idseq), osm_type, cls, typ, names,
admin_level, address, extratags,
geom or 'SRID=4326;POINT(0 0 )'))
geom or 'SRID=4326;POINT(0 0)'))
return _insert
@@ -168,7 +193,7 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
"""
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE placex (
place_id BIGINT NOT NULL,
place_id BIGINT,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
@@ -191,9 +216,53 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326))
""")
centroid GEOMETRY(Geometry, 4326))""")
temp_db_conn.commit()
@pytest.fixture
def osmline_table(temp_db_with_extensions, temp_db_conn):
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE location_property_osmline (
place_id BIGINT,
osm_id BIGINT,
parent_place_id BIGINT,
geometry_sector INTEGER,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
partition SMALLINT,
indexed_status SMALLINT,
linegeo GEOMETRY,
interpolationtype TEXT,
address HSTORE,
postcode TEXT,
country_code VARCHAR(2))""")
temp_db_conn.commit()
@pytest.fixture
def word_table(temp_db, temp_db_conn):
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE word (
word_id INTEGER,
word_token text,
word text,
class text,
type text,
country_code varchar(2),
search_name_count INTEGER,
operator TEXT)""")
temp_db_conn.commit()
@pytest.fixture
def osm2pgsql_options(temp_db):
return dict(osm2pgsql='echo',
osm2pgsql_cache=10,
osm2pgsql_style='style.file',
threads=1,
dsn='dbname=' + temp_db,
flatnode_file='',
tablespaces=dict(slim_data='', slim_index='',
main_data='', main_index=''))

18
test/python/mocks.py Normal file
View File

@@ -0,0 +1,18 @@
"""
Custom mocks for testing.
"""
class MockParamCapture:
""" Mock that records the parameters with which a function was called
as well as the number of calls.
"""
def __init__(self, retval=0):
self.called = 0
self.return_value = retval
def __call__(self, *args, **kwargs):
self.called += 1
self.last_args = args
self.last_kwargs = kwargs
return self.return_value

View File

@@ -5,47 +5,36 @@ These tests just check that the various command line parameters route to the
correct functionionality. They use a lot of monkeypatching to avoid executing
the actual functions.
"""
import datetime as dt
import psycopg2
from pathlib import Path
import pytest
import time
import nominatim.cli
import nominatim.clicmd.api
import nominatim.clicmd.refresh
import nominatim.clicmd.admin
import nominatim.clicmd.setup
import nominatim.indexer.indexer
import nominatim.tools.admin
import nominatim.tools.check_database
import nominatim.tools.database_import
import nominatim.tools.freeze
import nominatim.tools.refresh
import nominatim.tools.replication
from nominatim.errors import UsageError
from nominatim.db import status
from mocks import MockParamCapture
SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module',
osm2pgsql_path='build/osm2pgsql/osm2pgsql',
phplib_dir='lib-php',
data_dir='.',
phplib_dir=str(SRC_DIR / 'lib-php'),
data_dir=str(SRC_DIR / 'data'),
phpcgi_path='/usr/bin/php-cgi',
sqllib_dir='lib-sql',
config_dir='settings',
sqllib_dir=str(SRC_DIR / 'lib-sql'),
config_dir=str(SRC_DIR / 'settings'),
cli_args=args)
class MockParamCapture:
""" Mock that records the parameters with which a function was called
as well as the number of calls.
"""
def __init__(self, retval=0):
self.called = 0
self.return_value = retval
def __call__(self, *args, **kwargs):
self.called += 1
self.last_args = args
self.last_kwargs = kwargs
return self.return_value
@pytest.fixture
def mock_run_legacy(monkeypatch):
@@ -53,6 +42,7 @@ def mock_run_legacy(monkeypatch):
monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
return mock
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
@@ -62,6 +52,7 @@ def mock_func_factory(monkeypatch):
return get_mock
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
"""
@@ -72,7 +63,6 @@ def test_cli_help(capsys):
@pytest.mark.parametrize("command,script", [
(('import', '--continue', 'load-data'), 'setup'),
(('special-phrases',), 'specialphrases'),
(('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'),
@@ -85,6 +75,36 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
assert mock_run_legacy.last_args[0] == script + '.php'
def test_import_missing_file(temp_db):
assert 1 == call_nominatim('import', '--osm-file', 'sfsafegweweggdgw.reh.erh')
def test_import_bad_file(temp_db):
assert 1 == call_nominatim('import', '--osm-file', '.')
def test_import_full(temp_db, mock_func_factory):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
mock_func_factory(nominatim.tools.database_import, 'install_module'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
]
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script')
assert 0 == call_nominatim('import', '--osm-file', __file__)
assert cf_mock.called > 1
for mock in mocks:
assert mock.called == 1
def test_freeze_command(mock_func_factory, temp_db):
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')
@@ -146,24 +166,13 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
assert rank_mock.called == do_ranks
@pytest.mark.parametrize("command,params", [
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
('importance', ('update.php', '--recompute-importance')),
])
def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
assert 0 == call_nominatim('refresh', '--' + command)
assert mock_run_legacy.called == 1
assert len(mock_run_legacy.last_args) >= len(params)
assert mock_run_legacy.last_args[:len(params)] == params
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
('address-levels', 'load_address_levels_from_file'),
('functions', 'create_functions'),
('wiki-data', 'import_wikipedia_articles'),
('importance', 'recompute_importance'),
('website', 'setup_website'),
])
def test_refresh_command(mock_func_factory, temp_db, command, func):
@@ -173,86 +182,16 @@ def test_refresh_command(mock_func_factory, temp_db, command, func):
assert func_mock.called == 1
def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
calls = []
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
lambda *args, **kwargs: calls.append('import') or 0)
monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance',
lambda *args, **kwargs: calls.append('update'))
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
assert mock_run_legacy.called == 2
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
@pytest.mark.parametrize("params,func", [
(('--init', '--no-update-functions'), 'init_replication'),
(('--check-for-updates',), 'check_for_updates')
])
def test_replication_command(mock_func_factory, temp_db, params, func):
func_mock = mock_func_factory(nominatim.tools.replication, func)
assert 0 == call_nominatim('replication', *params)
assert func_mock.called == 1
def test_replication_update_bad_interval(monkeypatch, temp_db):
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
assert call_nominatim('replication') == 1
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
assert call_nominatim('replication') == 1
@pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
nominatim.tools.replication.UpdateState.NO_CHANGES])
def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn,
status_table, state):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
func_mock = mock_func_factory(nominatim.tools.replication, 'update')
assert 0 == call_nominatim('replication', '--once', '--no-index')
def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
nominatim.tools.replication.UpdateState.UP_TO_DATE]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
index_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
with pytest.raises(IndexError):
call_nominatim('replication')
assert index_mock.called == 4
def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
nominatim.tools.replication.UpdateState.UP_TO_DATE]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
index_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
sleep_mock = MockParamCapture()
monkeypatch.setattr(time, 'sleep', sleep_mock)
with pytest.raises(IndexError):
call_nominatim('replication')
assert index_mock.called == 2
assert sleep_mock.called == 1
assert sleep_mock.last_args[0] == 60
assert calls == ['import', 'update']
def test_serve_command(mock_func_factory):

View File

@@ -0,0 +1,127 @@
"""
Tests for replication command of command-line interface wrapper.
"""
import datetime as dt
import time
from pathlib import Path
import pytest
import nominatim.cli
import nominatim.indexer.indexer
import nominatim.tools.replication
from nominatim.db import status
from mocks import MockParamCapture
SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module',
osm2pgsql_path='build/osm2pgsql/osm2pgsql',
phplib_dir=str(SRC_DIR / 'lib-php'),
data_dir=str(SRC_DIR / 'data'),
phpcgi_path='/usr/bin/php-cgi',
sqllib_dir=str(SRC_DIR / 'lib-sql'),
config_dir=str(SRC_DIR / 'settings'),
cli_args=['replication'] + list(args))
@pytest.fixture
def index_mock(monkeypatch):
mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
return mock
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
mock = MockParamCapture()
monkeypatch.setattr(module, func, mock)
return mock
return get_mock
@pytest.fixture
def init_status(temp_db_conn, status_table):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
return 1
@pytest.fixture
def update_mock(mock_func_factory, init_status):
return mock_func_factory(nominatim.tools.replication, 'update')
@pytest.mark.parametrize("params,func", [
(('--init', '--no-update-functions'), 'init_replication'),
(('--check-for-updates',), 'check_for_updates')
])
def test_replication_command(mock_func_factory, temp_db, params, func):
func_mock = mock_func_factory(nominatim.tools.replication, func)
assert 0 == call_nominatim(*params)
assert func_mock.called == 1
def test_replication_update_bad_interval(monkeypatch, temp_db):
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
assert call_nominatim() == 1
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
assert call_nominatim() == 1
def test_replication_update_once_no_index(update_mock):
assert 0 == call_nominatim('--once', '--no-index')
assert str(update_mock.last_args[1]['osm2pgsql']) == 'build/osm2pgsql/osm2pgsql'
def test_replication_update_custom_osm2pgsql(monkeypatch, update_mock):
monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', '/secret/osm2pgsql')
assert 0 == call_nominatim('--once', '--no-index')
assert str(update_mock.last_args[1]['osm2pgsql']) == '/secret/osm2pgsql'
def test_replication_update_custom_threads(update_mock):
assert 0 == call_nominatim('--once', '--no-index', '--threads', '4')
assert update_mock.last_args[1]['threads'] == 4
def test_replication_update_continuous(monkeypatch, init_status, index_mock):
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
nominatim.tools.replication.UpdateState.UP_TO_DATE]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
with pytest.raises(IndexError):
call_nominatim()
assert index_mock.called == 4
def test_replication_update_continuous_no_change(monkeypatch, init_status, index_mock):
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
nominatim.tools.replication.UpdateState.UP_TO_DATE]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
sleep_mock = MockParamCapture()
monkeypatch.setattr(time, 'sleep', sleep_mock)
with pytest.raises(IndexError):
call_nominatim()
assert index_mock.called == 2
assert sleep_mock.called == 1
assert sleep_mock.last_args[0] == 60

View File

@@ -2,20 +2,20 @@
Tests for specialised conenction and cursor classes.
"""
import pytest
import psycopg2
from nominatim.db.connection import connect
from nominatim.db.connection import connect, get_pg_env
@pytest.fixture
def db(temp_db):
conn = connect('dbname=' + temp_db)
yield conn
conn.close()
with connect('dbname=' + temp_db) as conn:
yield conn
def test_connection_table_exists(db, temp_db_cursor):
def test_connection_table_exists(db, table_factory):
assert db.table_exists('foobar') == False
temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
table_factory('foobar')
assert db.table_exists('foobar') == True
@@ -31,6 +31,22 @@ def test_connection_index_exists(db, temp_db_cursor):
assert db.index_exists('some_index', table='bar') == False
def test_drop_table_existing(db, table_factory):
table_factory('dummy')
assert db.table_exists('dummy')
db.drop_table('dummy')
assert not db.table_exists('dummy')
def test_drop_table_non_existsing(db):
db.drop_table('dfkjgjriogjigjgjrdghehtre')
def test_drop_table_non_existing_force(db):
with pytest.raises(psycopg2.ProgrammingError, match='.*does not exist.*'):
db.drop_table('dfkjgjriogjigjgjrdghehtre', if_exists=False)
def test_connection_server_version_tuple(db):
ver = db.server_version_tuple()
@@ -38,8 +54,19 @@ def test_connection_server_version_tuple(db):
assert len(ver) == 2
assert ver[0] > 8
def test_cursor_scalar(db, temp_db_cursor):
temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
def test_connection_postgis_version_tuple(db, temp_db_cursor):
temp_db_cursor.execute('CREATE EXTENSION postgis')
ver = db.postgis_version_tuple()
assert isinstance(ver, tuple)
assert len(ver) == 2
assert ver[0] >= 2
def test_cursor_scalar(db, table_factory):
table_factory('dummy')
with db.cursor() as cur:
assert cur.scalar('SELECT count(*) FROM dummy') == 0
@@ -49,3 +76,24 @@ def test_cursor_scalar_many_rows(db):
with db.cursor() as cur:
with pytest.raises(RuntimeError):
cur.scalar('SELECT * FROM pg_tables')
def test_get_pg_env_add_variable(monkeypatch):
monkeypatch.delenv('PGPASSWORD', raising=False)
env = get_pg_env('user=fooF')
assert env['PGUSER'] == 'fooF'
assert 'PGPASSWORD' not in env
def test_get_pg_env_overwrite_variable(monkeypatch):
monkeypatch.setenv('PGUSER', 'some default')
env = get_pg_env('user=overwriter')
assert env['PGUSER'] == 'overwriter'
def test_get_pg_env_ignore_unknown():
env = get_pg_env('tty=stuff', base_env={})
assert env == {}

View File

@@ -5,26 +5,58 @@ import psycopg2
import pytest
import nominatim.db.utils as db_utils
from nominatim.errors import UsageError
def test_execute_file_success(temp_db_conn, tmp_path):
def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
db_utils.execute_file(temp_db_conn, tmpfile)
db_utils.execute_file(dsn, tmpfile)
with temp_db_conn.cursor() as cur:
cur.execute('SELECT * FROM test')
temp_db_cursor.execute('SELECT * FROM test')
assert cur.rowcount == 1
assert cur.fetchone()[0] == 56
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone()[0] == 56
def test_execute_file_bad_file(temp_db_conn, tmp_path):
def test_execute_file_bad_file(dsn, tmp_path):
with pytest.raises(FileNotFoundError):
db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql')
db_utils.execute_file(dsn, tmp_path / 'test2.sql')
def test_execute_file_bad_sql(temp_db_conn, tmp_path):
def test_execute_file_bad_sql(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
with pytest.raises(psycopg2.ProgrammingError):
db_utils.execute_file(temp_db_conn, tmpfile)
with pytest.raises(UsageError):
db_utils.execute_file(dsn, tmpfile)
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('INSERT INTO test VALUES(4)')
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
temp_db_cursor.execute('SELECT * FROM test')
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone()[0] == 4
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT)')
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')
temp_db_cursor.execute('SELECT * FROM test')
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone()[0] == 23

View File

@@ -12,6 +12,7 @@ class IndexerTestDB:
def __init__(self, conn):
self.placex_id = itertools.count(100000)
self.osmline_id = itertools.count(500000)
self.postcode_id = itertools.count(700000)
self.conn = conn
self.conn.set_isolation_level(0)
@@ -31,6 +32,12 @@ class IndexerTestDB:
indexed_status SMALLINT,
indexed_date TIMESTAMP,
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_postcode (
place_id BIGINT,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
country_code varchar(2),
postcode TEXT)""")
cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
AS $$
BEGIN
@@ -39,10 +46,10 @@ class IndexerTestDB:
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;""")
cur.execute("""CREATE TRIGGER placex_update BEFORE UPDATE ON placex
FOR EACH ROW EXECUTE PROCEDURE date_update()""")
cur.execute("""CREATE TRIGGER osmline_update BEFORE UPDATE ON location_property_osmline
FOR EACH ROW EXECUTE PROCEDURE date_update()""")
for table in ('placex', 'location_property_osmline', 'location_postcode'):
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
FOR EACH ROW EXECUTE PROCEDURE date_update()
""".format(table))
def scalar(self, query):
with self.conn.cursor() as cur:
@@ -74,6 +81,15 @@ class IndexerTestDB:
(next_id, sector))
return next_id
def add_postcode(self, country, postcode):
next_id = next(self.postcode_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_postcode
(place_id, indexed_status, country_code, postcode)
VALUES (%s, 1, %s, %s)""",
(next_id, country, postcode))
return next_id
def placex_unindexed(self):
return self.scalar('SELECT count(*) from placex where indexed_status > 0')
@@ -87,7 +103,7 @@ def test_db(temp_db_conn):
@pytest.mark.parametrize("threads", [1, 15])
def test_index_full(test_db, threads):
def test_index_all_by_rank(test_db, threads):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@@ -184,3 +200,35 @@ def test_index_boundaries(test_db, threads):
assert 0 == test_db.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND class != 'boundary'""")
@pytest.mark.parametrize("threads", [1, 15])
def test_index_postcodes(test_db, threads):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
idx = Indexer('dbname=test_nominatim_python_unittest', threads)
idx.index_postcodes()
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
WHERE indexed_status != 0""")
def test_index_full(test_db):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
for postcode in range(1000):
test_db.add_postcode('de', postcode)
idx = Indexer('dbname=test_nominatim_python_unittest', 4)
idx.index_full()
assert 0 == test_db.placex_unindexed()
assert 0 == test_db.osmline_unindexed()
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
WHERE indexed_status != 0""")

View File

@@ -9,9 +9,8 @@ from nominatim.tools import admin
@pytest.fixture
def db(temp_db, placex_table):
conn = connect('dbname=' + temp_db)
yield conn
conn.close()
with connect('dbname=' + temp_db) as conn:
yield conn
def test_analyse_indexing_no_objects(db):
with pytest.raises(UsageError):

View File

@@ -10,6 +10,10 @@ def test_check_database_unknown_db(def_config, monkeypatch):
assert 1 == chkdb.check_database(def_config)
def test_check_database_fatal_test(def_config, temp_db):
assert 1 == chkdb.check_database(def_config)
def test_check_conection_good(temp_db_conn, def_config):
assert chkdb.check_connection(temp_db_conn, def_config) == chkdb.CheckState.OK
@@ -59,6 +63,10 @@ def test_check_database_indexes_bad(temp_db_conn, def_config):
assert chkdb.check_database_indexes(temp_db_conn, def_config) == chkdb.CheckState.FAIL
def test_check_database_indexes_valid(temp_db_conn, def_config):
assert chkdb.check_database_index_valid(temp_db_conn, def_config) == chkdb.CheckState.OK
def test_check_tiger_table_disabled(temp_db_conn, def_config, monkeypatch):
monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA' , 'no')
assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.NOT_APPLICABLE

View File

@@ -0,0 +1,202 @@
"""
Tests for functions to import a new database.
"""
import pytest
import psycopg2
import sys
from pathlib import Path
from nominatim.tools import database_import
from nominatim.errors import UsageError
@pytest.fixture
def nonexistant_db():
dbname = 'test_nominatim_python_unittest'
conn = psycopg2.connect(database='postgres')
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
yield dbname
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
@pytest.mark.parametrize("no_partitions", (True, False))
def test_setup_skeleton(src_dir, nonexistant_db, no_partitions):
database_import.setup_database_skeleton('dbname=' + nonexistant_db,
src_dir / 'data', no_partitions)
conn = psycopg2.connect(database=nonexistant_db)
try:
with conn.cursor() as cur:
cur.execute("SELECT distinct partition FROM country_name")
partitions = set([r[0] for r in list(cur)])
if no_partitions:
assert partitions == set([0])
else:
assert len(partitions) > 10
finally:
conn.close()
def test_create_db_success(nonexistant_db):
database_import.create_db('dbname=' + nonexistant_db, rouser='www-data')
conn = psycopg2.connect(database=nonexistant_db)
conn.close()
def test_create_db_already_exists(temp_db):
with pytest.raises(UsageError):
database_import.create_db('dbname=' + temp_db)
def test_create_db_unsupported_version(nonexistant_db, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGRESQL_REQUIRED_VERSION', (100, 4))
with pytest.raises(UsageError, match='PostgreSQL server is too old.'):
database_import.create_db('dbname=' + nonexistant_db)
def test_create_db_missing_ro_user(nonexistant_db):
with pytest.raises(UsageError, match='Missing read-only user.'):
database_import.create_db('dbname=' + nonexistant_db, rouser='sdfwkjkjgdugu2;jgsafkljas;')
def test_setup_extensions(temp_db_conn, temp_db_cursor):
database_import.setup_extensions(temp_db_conn)
temp_db_cursor.execute('CREATE TABLE t (h HSTORE, geom GEOMETRY(Geometry, 4326))')
def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
with pytest.raises(UsageError, match='PostGIS version is too old.'):
database_import.setup_extensions(temp_db_conn)
def test_install_module(tmp_path):
src_dir = tmp_path / 'source'
src_dir.mkdir()
(src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
project_dir = tmp_path / 'project'
project_dir.mkdir()
database_import.install_module(src_dir, project_dir, '')
outfile = project_dir / 'module' / 'nominatim.so'
assert outfile.exists()
assert outfile.read_text() == 'TEST nomiantim.so'
assert outfile.stat().st_mode == 33261
def test_install_module_custom(tmp_path):
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
assert not (tmp_path / 'module').exists()
def test_install_module_fail_access(temp_db_conn, tmp_path):
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
database_import.install_module(tmp_path, tmp_path, '',
conn=temp_db_conn)
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
temp_db_cursor.execute('CREATE EXTENSION hstore')
temp_db_cursor.execute('CREATE EXTENSION postgis')
database_import.import_base_data('dbname=' + temp_db, src_dir / 'data')
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0
def test_import_base_data_ignore_partitions(src_dir, temp_db, temp_db_cursor):
temp_db_cursor.execute('CREATE EXTENSION hstore')
temp_db_cursor.execute('CREATE EXTENSION postgis')
database_import.import_base_data('dbname=' + temp_db, src_dir / 'data',
ignore_partitions=True)
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name WHERE partition != 0') == 0
def test_import_osm_data_simple(temp_db_cursor,osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
temp_db_cursor.execute('INSERT INTO place values (1)')
database_import.import_osm_data('file.pdf', osm2pgsql_options)
def test_import_osm_data_simple_no_data(temp_db_cursor,osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
with pytest.raises(UsageError, match='No data.*'):
database_import.import_osm_data('file.pdf', osm2pgsql_options)
def test_import_osm_data_drop(temp_db_conn, temp_db_cursor, tmp_path, osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
temp_db_cursor.execute('CREATE TABLE planet_osm_nodes (id INT)')
temp_db_cursor.execute('INSERT INTO place values (1)')
flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True)
assert not flatfile.exists()
assert not temp_db_conn.table_exists('planet_osm_nodes')
def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
temp_db_cursor.execute('INSERT INTO place values (1)')
osm2pgsql_options['osm2pgsql_cache'] = 0
database_import.import_osm_data(Path(__file__), osm2pgsql_options)
def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
tables = ('word', 'placex', 'place_addressline', 'location_area',
'location_area_country', 'location_property',
'location_property_tiger', 'location_property_osmline',
'location_postcode', 'search_name', 'location_road_23')
for table in tables:
table_factory(table, content=(1, 2, 3))
database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23)
for table in tables:
assert temp_db_cursor.table_rows(table) == 0
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
temp_db_cursor, threads):
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""".format(func))
for oid in range(100, 130):
place_row(osm_id=oid)
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
geom='SRID=4326;LINESTRING(0 0, 10 10)')
database_import.load_data(dsn, src_dir / 'data', threads)
assert temp_db_cursor.table_rows('placex') == 30
assert temp_db_cursor.table_rows('location_property_osmline') == 1

View File

@@ -105,8 +105,15 @@ def test_run_api_with_extra_env(tmp_project_dir):
### run_osm2pgsql
def test_run_osm2pgsql():
exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
osm2pgsql_style='./my.style',
import_file='foo.bar'))
def test_run_osm2pgsql(osm2pgsql_options):
osm2pgsql_options['append'] = False
osm2pgsql_options['import_file'] = 'foo.bar'
osm2pgsql_options['tablespaces']['osm_data'] = 'extra'
exec_utils.run_osm2pgsql(osm2pgsql_options)
def test_run_osm2pgsql_disable_jit(osm2pgsql_options):
osm2pgsql_options['append'] = True
osm2pgsql_options['import_file'] = 'foo.bar'
osm2pgsql_options['disable_jit'] = True
exec_utils.run_osm2pgsql(osm2pgsql_options)

View File

@@ -0,0 +1,26 @@
"""
Test for various refresh functions.
"""
from pathlib import Path
import pytest
from nominatim.tools import refresh
TEST_DIR = (Path(__file__) / '..' / '..').resolve()
def test_refresh_import_wikipedia_not_existing(dsn):
assert 1 == refresh.import_wikipedia_articles(dsn, Path('.'))
@pytest.mark.parametrize("replace", (True, False))
def test_refresh_import_wikipedia(dsn, table_factory, temp_db_cursor, replace):
if replace:
table_factory('wikipedia_article')
table_factory('wikipedia_redirect')
# use the small wikipedia file for the API testdb
assert 0 == refresh.import_wikipedia_articles(dsn, TEST_DIR / 'testdb')
assert temp_db_cursor.scalar('SELECT count(*) FROM wikipedia_article') > 0
assert temp_db_cursor.scalar('SELECT count(*) FROM wikipedia_redirect') > 0

View File

@@ -2,9 +2,10 @@
Tests for function for importing address ranks.
"""
import json
import pytest
from pathlib import Path
import pytest
from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):

View File

@@ -11,9 +11,8 @@ SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve()
@pytest.fixture
def db(temp_db):
conn = connect('dbname=' + temp_db)
yield conn
conn.close()
with connect('dbname=' + temp_db) as conn:
yield conn
@pytest.fixture
def db_with_tables(db):

View File

@@ -42,7 +42,7 @@
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
pip3 install --user psycopg2 python-dotenv
pip3 install --user psycopg2 python-dotenv psutil
#

View File

@@ -35,7 +35,7 @@
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
pip3 install --user psycopg2 python-dotenv
pip3 install --user psycopg2 python-dotenv psutil
#

View File

@@ -30,7 +30,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl python3-pip \
python3-psycopg2 git
python3-psycopg2 python3-psutil git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip:

View File

@@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl python3-dotenv \
python3-psycopg2 git
python3-psycopg2 python3-psutil git
#
# System Configuration