mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Merge pull request #2186 from lonvia/port-import-to-python
Move setup procedure to Python
This commit is contained in:
2
.github/actions/build-nominatim/action.yml
vendored
2
.github/actions/build-nominatim/action.yml
vendored
@@ -6,7 +6,7 @@ runs:
|
||||
steps:
|
||||
- name: Install prerequisits
|
||||
run: |
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil
|
||||
shell: bash
|
||||
|
||||
- name: Download dependencies
|
||||
|
||||
11
.pylintrc
Normal file
11
.pylintrc
Normal file
@@ -0,0 +1,11 @@
|
||||
[MASTER]
|
||||
|
||||
extension-pkg-whitelist=osmium
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# closing added here because it sometimes triggers a false positive with
|
||||
# 'with' statements.
|
||||
ignored-classes=NominatimArgs,closing
|
||||
@@ -177,7 +177,7 @@ if (BUILD_TESTS)
|
||||
if (PYLINT)
|
||||
message(STATUS "Using pylint binary ${PYLINT}")
|
||||
add_test(NAME pylint
|
||||
COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
|
||||
COMMAND ${PYLINT} nominatim
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
else()
|
||||
message(WARNING "pylint not found. Python linting tests disabled.")
|
||||
|
||||
@@ -41,10 +41,11 @@ For running Nominatim:
|
||||
* [Python 3](https://www.python.org/) (3.5+)
|
||||
* [Psycopg2](https://www.psycopg.org) (2.7+)
|
||||
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
|
||||
* [psutil] (https://github.com/giampaolo/psutil)
|
||||
* [PHP](https://php.net) (7.0 or later)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
( PHP-cgi (for running queries from the command line)
|
||||
* PHP-cgi (for running queries from the command line)
|
||||
|
||||
For running continuous updates:
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class Shell
|
||||
return join(' ', $aEscaped);
|
||||
}
|
||||
|
||||
public function run()
|
||||
public function run($bExitOnFail = false)
|
||||
{
|
||||
$sCmd = $this->escapedCmd();
|
||||
// $aEnv does not need escaping, proc_open seems to handle it fine
|
||||
@@ -67,6 +67,11 @@ class Shell
|
||||
fclose($aPipes[0]); // no stdin
|
||||
|
||||
$iStat = proc_close($hProc);
|
||||
|
||||
if ($iStat != 0 && $bExitOnFail) {
|
||||
exit($iStat);
|
||||
}
|
||||
|
||||
return $iStat;
|
||||
}
|
||||
|
||||
|
||||
@@ -56,6 +56,29 @@ setupHTTPProxy();
|
||||
|
||||
$bDidSomething = false;
|
||||
|
||||
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
|
||||
// by default, use all but one processor, but never more than 15.
|
||||
$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1));
|
||||
|
||||
function run($oCmd)
|
||||
{
|
||||
global $iInstances;
|
||||
global $aCMDResult;
|
||||
$oCmd->addParams('--threads', $iInstances);
|
||||
if ($aCMDResult['ignore-errors'] ?? false) {
|
||||
$oCmd->addParams('--ignore-errors');
|
||||
}
|
||||
if ($aCMDResult['quiet'] ?? false) {
|
||||
$oCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aCMDResult['verbose'] ?? false) {
|
||||
$oCmd->addParams('--verbose');
|
||||
}
|
||||
$oCmd->run(true);
|
||||
}
|
||||
|
||||
|
||||
//*******************************************************
|
||||
// Making some sanity check:
|
||||
// Check if osm-file is set and points to a valid file
|
||||
@@ -72,17 +95,30 @@ $oSetup = new SetupFunctions($aCMDResult);
|
||||
// go through complete process if 'all' is selected or start selected functions
|
||||
if ($aCMDResult['create-db'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->createDB();
|
||||
run((clone($oNominatimCmd))->addParams('transition', '--create-db'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->setupDB();
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
|
||||
|
||||
if ($aCMDResult['no-partitions'] ?? false) {
|
||||
$oCmd->addParams('--no-partitions');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->importData($aCMDResult['osm-file']);
|
||||
$oCmd = (clone($oNominatimCmd))
|
||||
->addParams('transition', '--import-data')
|
||||
->addParams('--osm-file', $aCMDResult['osm-file']);
|
||||
if ($aCMDResult['drop'] ?? false) {
|
||||
$oCmd->addParams('--drop');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
|
||||
@@ -104,17 +140,18 @@ if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) {
|
||||
|
||||
if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->createPartitionFunctions();
|
||||
$oSetup->createFunctions(); // also create partition functions
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->importWikipediaArticles();
|
||||
// ignore errors!
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run();
|
||||
}
|
||||
|
||||
if ($aCMDResult['load-data'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->loadData($aCMDResult['disable-token-precalc']);
|
||||
run((clone($oNominatimCmd))->addParams('transition', '--load-data'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-tiger-data']) {
|
||||
@@ -130,12 +167,17 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
|
||||
|
||||
if ($aCMDResult['index'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->index($aCMDResult['index-noanalyse']);
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--index');
|
||||
if ($aCMDResult['index-noanalyse'] ?? false) {
|
||||
$oCmd->addParams('--no-analyse');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['drop']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->drop($aCMDResult);
|
||||
run((clone($oNominatimCmd))->addParams('freeze'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
|
||||
@@ -150,7 +192,7 @@ if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
|
||||
|
||||
if ($aCMDResult['setup-website'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->setupWebsite();
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--website'));
|
||||
}
|
||||
|
||||
// ******************************************************
|
||||
|
||||
@@ -104,11 +104,17 @@ if ($fPostgresVersion >= 11.0) {
|
||||
}
|
||||
|
||||
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
if ($aResult['quiet']) {
|
||||
$oNominatimCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aResult['verbose']) {
|
||||
$oNominatimCmd->addParams('--verbose');
|
||||
|
||||
function run($oCmd)
|
||||
{
|
||||
global $aCMDResult;
|
||||
if ($aCMDResult['quiet'] ?? false) {
|
||||
$oCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aCMDResult['verbose'] ?? false) {
|
||||
$oCmd->addParams('--verbose');
|
||||
}
|
||||
$oCmd->run(true);
|
||||
}
|
||||
|
||||
|
||||
@@ -119,7 +125,7 @@ if ($aResult['init-updates']) {
|
||||
$oCmd->addParams('--no-update-functions');
|
||||
}
|
||||
|
||||
$oCmd->run();
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aResult['check-for-updates']) {
|
||||
@@ -147,7 +153,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
|
||||
}
|
||||
|
||||
if ($aResult['calculate-postcodes']) {
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--postcodes'));
|
||||
}
|
||||
|
||||
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
|
||||
@@ -196,35 +202,21 @@ if ($bHaveDiff) {
|
||||
}
|
||||
|
||||
if ($aResult['recompute-word-counts']) {
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--word-counts'));
|
||||
}
|
||||
|
||||
if ($aResult['index']) {
|
||||
(clone $oNominatimCmd)
|
||||
run((clone $oNominatimCmd)
|
||||
->addParams('index', '--minrank', $aResult['index-rank'])
|
||||
->addParams('--threads', $aResult['index-instances'])
|
||||
->run();
|
||||
->addParams('--threads', $aResult['index-instances']));
|
||||
}
|
||||
|
||||
if ($aResult['update-address-levels']) {
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--address-levels'));
|
||||
}
|
||||
|
||||
if ($aResult['recompute-importance']) {
|
||||
echo "Updating importance values for database.\n";
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
|
||||
$sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;';
|
||||
$sSQL .= 'UPDATE placex SET (wikipedia, importance) =';
|
||||
$sSQL .= ' (SELECT wikipedia, importance';
|
||||
$sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));';
|
||||
$sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance';
|
||||
$sSQL .= ' FROM placex d';
|
||||
$sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null';
|
||||
$sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);';
|
||||
$sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;';
|
||||
$oDB->exec($sSQL);
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--importance'));
|
||||
}
|
||||
|
||||
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
@@ -240,5 +232,5 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
$oCmd->addParams('--no-index');
|
||||
}
|
||||
|
||||
exit($oCmd->run());
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ require_once(CONST_LibDir.'/Shell.php');
|
||||
|
||||
class SetupFunctions
|
||||
{
|
||||
protected $iCacheMemory;
|
||||
protected $iInstances;
|
||||
protected $aDSNInfo;
|
||||
protected $bQuiet;
|
||||
@@ -31,16 +30,6 @@ class SetupFunctions
|
||||
warn('resetting threads to '.$this->iInstances);
|
||||
}
|
||||
|
||||
if (isset($aCMDResult['osm2pgsql-cache'])) {
|
||||
$this->iCacheMemory = $aCMDResult['osm2pgsql-cache'];
|
||||
} elseif (getSetting('FLATNODE_FILE')) {
|
||||
// When flatnode files are enabled then disable cache per default.
|
||||
$this->iCacheMemory = 0;
|
||||
} else {
|
||||
// Otherwise: Assume we can steal all the cache memory in the box.
|
||||
$this->iCacheMemory = getCacheMemoryMB();
|
||||
}
|
||||
|
||||
// parse database string
|
||||
$this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
|
||||
if (!isset($this->aDSNInfo['port'])) {
|
||||
@@ -84,156 +73,6 @@ class SetupFunctions
|
||||
}
|
||||
}
|
||||
|
||||
public function createDB()
|
||||
{
|
||||
info('Create DB');
|
||||
$oDB = new \Nominatim\DB;
|
||||
|
||||
if ($oDB->checkConnection()) {
|
||||
fail('database already exists ('.getSetting('DATABASE_DSN').')');
|
||||
}
|
||||
|
||||
$oCmd = (new \Nominatim\Shell('createdb'))
|
||||
->addParams('-E', 'UTF-8')
|
||||
->addParams('-p', $this->aDSNInfo['port']);
|
||||
|
||||
if (isset($this->aDSNInfo['username'])) {
|
||||
$oCmd->addParams('-U', $this->aDSNInfo['username']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['password'])) {
|
||||
$oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['hostspec'])) {
|
||||
$oCmd->addParams('-h', $this->aDSNInfo['hostspec']);
|
||||
}
|
||||
$oCmd->addParams($this->aDSNInfo['database']);
|
||||
|
||||
$result = $oCmd->run();
|
||||
if ($result != 0) fail('Error executing external command: '.$oCmd->escapedCmd());
|
||||
}
|
||||
|
||||
public function setupDB()
|
||||
{
|
||||
info('Setup DB');
|
||||
|
||||
$fPostgresVersion = $this->db()->getPostgresVersion();
|
||||
echo 'Postgres version found: '.$fPostgresVersion."\n";
|
||||
|
||||
if ($fPostgresVersion < 9.03) {
|
||||
fail('Minimum supported version of Postgresql is 9.3.');
|
||||
}
|
||||
|
||||
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
|
||||
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
|
||||
|
||||
$fPostgisVersion = $this->db()->getPostgisVersion();
|
||||
echo 'Postgis version found: '.$fPostgisVersion."\n";
|
||||
|
||||
if ($fPostgisVersion < 2.2) {
|
||||
echo "Minimum required Postgis version 2.2\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$sPgUser = getSetting('DATABASE_WEBUSER');
|
||||
$i = $this->db()->getOne("select count(*) from pg_user where usename = '$sPgUser'");
|
||||
if ($i == 0) {
|
||||
echo "\nERROR: Web user '".$sPgUser."' does not exist. Create it with:\n";
|
||||
echo "\n createuser ".$sPgUser."\n\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!getSetting('DATABASE_MODULE_PATH')) {
|
||||
// If no custom module path is set then copy the module into the
|
||||
// project directory, but only if it is not the same file already
|
||||
// (aka we are running from the build dir).
|
||||
$sDest = CONST_InstallDir.'/module';
|
||||
if ($sDest != CONST_Default_ModulePath) {
|
||||
if (!file_exists($sDest)) {
|
||||
mkdir($sDest);
|
||||
}
|
||||
if (!copy(CONST_Default_ModulePath.'/nominatim.so', $sDest.'/nominatim.so')) {
|
||||
echo "Failed to copy database module to $sDest.";
|
||||
exit(1);
|
||||
}
|
||||
chmod($sDest.'/nominatim.so', 0755);
|
||||
info("Database module installed at $sDest.");
|
||||
} else {
|
||||
info('Running from build directory. Leaving database module as is.');
|
||||
}
|
||||
} else {
|
||||
info('Using database module from DATABASE_MODULE_PATH ('.getSetting('DATABASE_MODULE_PATH').').');
|
||||
}
|
||||
// Try accessing the C module, so we know early if something is wrong
|
||||
$this->checkModulePresence(); // raises exception on failure
|
||||
|
||||
$this->pgsqlRunScriptFile(CONST_DataDir.'/country_name.sql');
|
||||
$this->pgsqlRunScriptFile(CONST_DataDir.'/country_osm_grid.sql.gz');
|
||||
|
||||
if ($this->bNoPartitions) {
|
||||
$this->pgsqlRunScript('update country_name set partition = 0');
|
||||
}
|
||||
}
|
||||
|
||||
public function importData($sOSMFile)
|
||||
{
|
||||
info('Import data');
|
||||
|
||||
if (!file_exists(getOsm2pgsqlBinary())) {
|
||||
echo "Check NOMINATIM_OSM2PGSQL_BINARY in your local .env file.\n";
|
||||
echo "Normally you should not need to set this manually.\n";
|
||||
fail("osm2pgsql not found in '".getOsm2pgsqlBinary()."'");
|
||||
}
|
||||
|
||||
$oCmd = new \Nominatim\Shell(getOsm2pgsqlBinary());
|
||||
$oCmd->addParams('--style', getImportStyle());
|
||||
|
||||
if (getSetting('FLATNODE_FILE')) {
|
||||
$oCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
|
||||
}
|
||||
if (getSetting('TABLESPACE_OSM_DATA')) {
|
||||
$oCmd->addParams('--tablespace-slim-data', getSetting('TABLESPACE_OSM_DATA'));
|
||||
}
|
||||
if (getSetting('TABLESPACE_OSM_INDEX')) {
|
||||
$oCmd->addParams('--tablespace-slim-index', getSetting('TABLESPACE_OSM_INDEX'));
|
||||
}
|
||||
if (getSetting('TABLESPACE_PLACE_DATA')) {
|
||||
$oCmd->addParams('--tablespace-main-data', getSetting('TABLESPACE_PLACE_DATA'));
|
||||
}
|
||||
if (getSetting('TABLESPACE_PLACE_INDEX')) {
|
||||
$oCmd->addParams('--tablespace-main-index', getSetting('TABLESPACE_PLACE_INDEX'));
|
||||
}
|
||||
$oCmd->addParams('--latlong', '--slim', '--create');
|
||||
$oCmd->addParams('--output', 'gazetteer');
|
||||
$oCmd->addParams('--hstore');
|
||||
$oCmd->addParams('--number-processes', 1);
|
||||
$oCmd->addParams('--with-forward-dependencies', 'false');
|
||||
$oCmd->addParams('--log-progress', 'true');
|
||||
$oCmd->addParams('--cache', $this->iCacheMemory);
|
||||
$oCmd->addParams('--port', $this->aDSNInfo['port']);
|
||||
|
||||
if (isset($this->aDSNInfo['username'])) {
|
||||
$oCmd->addParams('--username', $this->aDSNInfo['username']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['password'])) {
|
||||
$oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['hostspec'])) {
|
||||
$oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
|
||||
}
|
||||
$oCmd->addParams('--database', $this->aDSNInfo['database']);
|
||||
$oCmd->addParams($sOSMFile);
|
||||
$oCmd->run();
|
||||
|
||||
if (!$this->sIgnoreErrors && !$this->db()->getRow('select * from place limit 1')) {
|
||||
fail('No Data');
|
||||
}
|
||||
|
||||
if ($this->bDrop) {
|
||||
$this->dropTable('planet_osm_nodes');
|
||||
$this->removeFlatnodeFile();
|
||||
}
|
||||
}
|
||||
|
||||
public function createFunctions()
|
||||
{
|
||||
info('Create Functions');
|
||||
@@ -280,153 +119,6 @@ class SetupFunctions
|
||||
$this->pgsqlRunPartitionScript($sTemplate);
|
||||
}
|
||||
|
||||
public function createPartitionFunctions()
|
||||
{
|
||||
info('Create Partition Functions');
|
||||
$this->createSqlFunctions(); // also create partition functions
|
||||
}
|
||||
|
||||
public function importWikipediaArticles()
|
||||
{
|
||||
$sWikiArticlePath = getSetting('WIKIPEDIA_DATA_PATH', CONST_InstallDir);
|
||||
$sWikiArticlesFile = $sWikiArticlePath.'/wikimedia-importance.sql.gz';
|
||||
if (file_exists($sWikiArticlesFile)) {
|
||||
info('Importing wikipedia articles and redirects');
|
||||
$this->dropTable('wikipedia_article');
|
||||
$this->dropTable('wikipedia_redirect');
|
||||
$this->pgsqlRunScriptFile($sWikiArticlesFile);
|
||||
} else {
|
||||
warn('wikipedia importance dump file not found - places will have default importance');
|
||||
}
|
||||
}
|
||||
|
||||
public function loadData($bDisableTokenPrecalc)
|
||||
{
|
||||
info('Drop old Data');
|
||||
|
||||
$oDB = $this->db();
|
||||
|
||||
$oDB->exec('TRUNCATE word');
|
||||
echo '.';
|
||||
$oDB->exec('TRUNCATE placex');
|
||||
echo '.';
|
||||
$oDB->exec('TRUNCATE location_property_osmline');
|
||||
echo '.';
|
||||
$oDB->exec('TRUNCATE place_addressline');
|
||||
echo '.';
|
||||
$oDB->exec('TRUNCATE location_area');
|
||||
echo '.';
|
||||
if (!$this->dbReverseOnly()) {
|
||||
$oDB->exec('TRUNCATE search_name');
|
||||
echo '.';
|
||||
}
|
||||
$oDB->exec('TRUNCATE search_name_blank');
|
||||
echo '.';
|
||||
$oDB->exec('DROP SEQUENCE seq_place');
|
||||
echo '.';
|
||||
$oDB->exec('CREATE SEQUENCE seq_place start 100000');
|
||||
echo '.';
|
||||
|
||||
$sSQL = 'select distinct partition from country_name';
|
||||
$aPartitions = $oDB->getCol($sSQL);
|
||||
|
||||
if (!$this->bNoPartitions) $aPartitions[] = 0;
|
||||
foreach ($aPartitions as $sPartition) {
|
||||
$oDB->exec('TRUNCATE location_road_'.$sPartition);
|
||||
echo '.';
|
||||
}
|
||||
|
||||
// used by getorcreate_word_id to ignore frequent partial words
|
||||
$sSQL = 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS ';
|
||||
$sSQL .= '$$ SELECT '.getSetting('MAX_WORD_FREQUENCY').' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE';
|
||||
$oDB->exec($sSQL);
|
||||
echo ".\n";
|
||||
|
||||
// pre-create the word list
|
||||
if (!$bDisableTokenPrecalc) {
|
||||
info('Loading word list');
|
||||
$this->pgsqlRunScriptFile(CONST_DataDir.'/words.sql');
|
||||
}
|
||||
|
||||
info('Load Data');
|
||||
$sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry';
|
||||
|
||||
$aDBInstances = array();
|
||||
$iLoadThreads = max(1, $this->iInstances - 1);
|
||||
for ($i = 0; $i < $iLoadThreads; $i++) {
|
||||
// https://secure.php.net/manual/en/function.pg-connect.php
|
||||
$DSN = getSetting('DATABASE_DSN');
|
||||
$DSN = preg_replace('/^pgsql:/', '', $DSN);
|
||||
$DSN = preg_replace('/;/', ' ', $DSN);
|
||||
$aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
|
||||
pg_ping($aDBInstances[$i]);
|
||||
}
|
||||
|
||||
for ($i = 0; $i < $iLoadThreads; $i++) {
|
||||
$sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i";
|
||||
$sSQL .= " and not (class='place' and type='houses' and osm_type='W'";
|
||||
$sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')";
|
||||
$sSQL .= ' and ST_IsValid(geometry)';
|
||||
if ($this->bVerbose) echo "$sSQL\n";
|
||||
if (!pg_send_query($aDBInstances[$i], $sSQL)) {
|
||||
fail(pg_last_error($aDBInstances[$i]));
|
||||
}
|
||||
}
|
||||
|
||||
// last thread for interpolation lines
|
||||
// https://secure.php.net/manual/en/function.pg-connect.php
|
||||
$DSN = getSetting('DATABASE_DSN');
|
||||
$DSN = preg_replace('/^pgsql:/', '', $DSN);
|
||||
$DSN = preg_replace('/;/', ' ', $DSN);
|
||||
$aDBInstances[$iLoadThreads] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
|
||||
pg_ping($aDBInstances[$iLoadThreads]);
|
||||
$sSQL = 'insert into location_property_osmline';
|
||||
$sSQL .= ' (osm_id, address, linegeo)';
|
||||
$sSQL .= ' SELECT osm_id, address, geometry from place where ';
|
||||
$sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'";
|
||||
if ($this->bVerbose) echo "$sSQL\n";
|
||||
if (!pg_send_query($aDBInstances[$iLoadThreads], $sSQL)) {
|
||||
fail(pg_last_error($aDBInstances[$iLoadThreads]));
|
||||
}
|
||||
|
||||
$bFailed = false;
|
||||
for ($i = 0; $i <= $iLoadThreads; $i++) {
|
||||
while (($hPGresult = pg_get_result($aDBInstances[$i])) !== false) {
|
||||
$resultStatus = pg_result_status($hPGresult);
|
||||
// PGSQL_EMPTY_QUERY, PGSQL_COMMAND_OK, PGSQL_TUPLES_OK,
|
||||
// PGSQL_COPY_OUT, PGSQL_COPY_IN, PGSQL_BAD_RESPONSE,
|
||||
// PGSQL_NONFATAL_ERROR and PGSQL_FATAL_ERROR
|
||||
// echo 'Query result ' . $i . ' is: ' . $resultStatus . "\n";
|
||||
if ($resultStatus != PGSQL_COMMAND_OK && $resultStatus != PGSQL_TUPLES_OK) {
|
||||
$resultError = pg_result_error($hPGresult);
|
||||
echo '-- error text ' . $i . ': ' . $resultError . "\n";
|
||||
$bFailed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($bFailed) {
|
||||
fail('SQL errors loading placex and/or location_property_osmline tables');
|
||||
}
|
||||
|
||||
for ($i = 0; $i < $this->iInstances; $i++) {
|
||||
pg_close($aDBInstances[$i]);
|
||||
}
|
||||
|
||||
echo "\n";
|
||||
info('Reanalysing database');
|
||||
$this->pgsqlRunScript('ANALYSE');
|
||||
|
||||
$sDatabaseDate = getDatabaseDate($oDB);
|
||||
$oDB->exec('TRUNCATE import_status');
|
||||
if (!$sDatabaseDate) {
|
||||
warn('could not determine database date.');
|
||||
} else {
|
||||
$sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')";
|
||||
$oDB->exec($sSQL);
|
||||
echo "Latest data imported from $sDatabaseDate.\n";
|
||||
}
|
||||
}
|
||||
|
||||
public function importTigerData($sTigerPath)
|
||||
{
|
||||
info('Import Tiger data');
|
||||
@@ -560,49 +252,6 @@ class SetupFunctions
|
||||
$this->db()->exec($sSQL);
|
||||
}
|
||||
|
||||
public function index($bIndexNoanalyse)
|
||||
{
|
||||
$this->checkModulePresence(); // raises exception on failure
|
||||
|
||||
$oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
|
||||
|
||||
info('Index ranks 0 - 4');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
|
||||
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
}
|
||||
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
|
||||
|
||||
info('Index administrative boundaries');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--boundaries-only');
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
}
|
||||
|
||||
info('Index ranks 5 - 25');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25);
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
}
|
||||
|
||||
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
|
||||
|
||||
info('Index ranks 26 - 30');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26);
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
}
|
||||
|
||||
info('Index postcodes');
|
||||
$sSQL = 'UPDATE location_postcode SET indexed_status = 0';
|
||||
$this->db()->exec($sSQL);
|
||||
}
|
||||
|
||||
public function createSearchIndices()
|
||||
{
|
||||
info('Create Search indices');
|
||||
@@ -655,21 +304,6 @@ class SetupFunctions
|
||||
$this->pgsqlRunScript($sSQL);
|
||||
}
|
||||
|
||||
public function drop()
|
||||
{
|
||||
(clone($this->oNominatimCmd))->addParams('freeze')->run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup the directory for the API scripts.
|
||||
*
|
||||
* @return null
|
||||
*/
|
||||
public function setupWebsite()
|
||||
{
|
||||
(clone($this->oNominatimCmd))->addParams('refresh', '--website')->run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the connection to the database.
|
||||
*
|
||||
@@ -688,15 +322,6 @@ class SetupFunctions
|
||||
return $this->oDB;
|
||||
}
|
||||
|
||||
private function removeFlatnodeFile()
|
||||
{
|
||||
$sFName = getSetting('FLATNODE_FILE');
|
||||
if ($sFName && file_exists($sFName)) {
|
||||
if ($this->bVerbose) echo 'Deleting '.$sFName."\n";
|
||||
unlink($sFName);
|
||||
}
|
||||
}
|
||||
|
||||
private function pgsqlRunScript($sScript, $bfatal = true)
|
||||
{
|
||||
runSQLScript(
|
||||
@@ -720,7 +345,7 @@ class SetupFunctions
|
||||
$oCmd->addParams('--enable-debug-statements');
|
||||
}
|
||||
|
||||
$oCmd->run();
|
||||
$oCmd->run(!$this->sIgnoreErrors);
|
||||
}
|
||||
|
||||
private function pgsqlRunPartitionScript($sTemplate)
|
||||
|
||||
@@ -12,6 +12,7 @@ from .config import Configuration
|
||||
from .tools.exec_utils import run_legacy_script, run_php_server
|
||||
from .errors import UsageError
|
||||
from . import clicmd
|
||||
from .clicmd.args import NominatimArgs
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -62,7 +63,8 @@ class CommandlineParser:
|
||||
""" Parse the command line arguments of the program and execute the
|
||||
appropriate subcommand.
|
||||
"""
|
||||
args = self.parser.parse_args(args=kwargs.get('cli_args'))
|
||||
args = NominatimArgs()
|
||||
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
|
||||
|
||||
if args.subcommand is None:
|
||||
self.parser.print_help()
|
||||
@@ -73,12 +75,14 @@ class CommandlineParser:
|
||||
setattr(args, arg, Path(kwargs[arg]))
|
||||
args.project_dir = Path(args.project_dir).resolve()
|
||||
|
||||
logging.basicConfig(stream=sys.stderr,
|
||||
format='%(asctime)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
level=max(4 - args.verbose, 1) * 10)
|
||||
if 'cli_args' not in kwargs:
|
||||
logging.basicConfig(stream=sys.stderr,
|
||||
format='%(asctime)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
level=max(4 - args.verbose, 1) * 10)
|
||||
|
||||
args.config = Configuration(args.project_dir, args.config_dir)
|
||||
args.config = Configuration(args.project_dir, args.config_dir,
|
||||
environ=kwargs.get('environ', os.environ))
|
||||
|
||||
log = logging.getLogger()
|
||||
log.warning('Using project directory: %s', str(args.project_dir))
|
||||
@@ -109,70 +113,6 @@ class CommandlineParser:
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
|
||||
class SetupAll:
|
||||
"""\
|
||||
Create a new Nominatim database from an OSM file.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Required arguments')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--osm-file',
|
||||
help='OSM file to be imported.')
|
||||
group.add_argument('--continue', dest='continue_at',
|
||||
choices=['load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted')
|
||||
group = parser.add_argument_group('Optional arguments')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--reverse-only', action='store_true',
|
||||
help='Do not create tables and indexes for searching')
|
||||
group.add_argument('--enable-debug-statements', action='store_true',
|
||||
help='Include debug warning statements in SQL code')
|
||||
group.add_argument('--no-partitions', action='store_true',
|
||||
help="""Do not partition search indices
|
||||
(speeds up import of single country extracts)""")
|
||||
group.add_argument('--no-updates', action='store_true',
|
||||
help="""Do not keep tables that are only needed for
|
||||
updating the database later""")
|
||||
group = parser.add_argument_group('Expert options')
|
||||
group.add_argument('--ignore-errors', action='store_true',
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index')
|
||||
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
params = ['setup.php']
|
||||
if args.osm_file:
|
||||
params.extend(('--all', '--osm-file', args.osm_file))
|
||||
else:
|
||||
if args.continue_at == 'load-data':
|
||||
params.append('--load-data')
|
||||
if args.continue_at in ('load-data', 'indexing'):
|
||||
params.append('--index')
|
||||
params.extend(('--create-search-indices', '--create-country-names',
|
||||
'--setup-website'))
|
||||
if args.osm2pgsql_cache:
|
||||
params.extend(('--osm2pgsql-cache', args.osm2pgsql_cache))
|
||||
if args.reverse_only:
|
||||
params.append('--reverse-only')
|
||||
if args.enable_debug_statements:
|
||||
params.append('--enable-debug-statements')
|
||||
if args.no_partitions:
|
||||
params.append('--no-partitions')
|
||||
if args.no_updates:
|
||||
params.append('--drop')
|
||||
if args.ignore_errors:
|
||||
params.append('--ignore-errors')
|
||||
if args.index_noanalyse:
|
||||
params.append('--index-noanalyse')
|
||||
|
||||
return run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
|
||||
class SetupSpecialPhrases:
|
||||
"""\
|
||||
Maintain special phrases.
|
||||
@@ -330,7 +270,7 @@ def nominatim(**kwargs):
|
||||
"""
|
||||
parser = CommandlineParser('nominatim', nominatim.__doc__)
|
||||
|
||||
parser.add_subcommand('import', SetupAll)
|
||||
parser.add_subcommand('import', clicmd.SetupAll)
|
||||
parser.add_subcommand('freeze', clicmd.SetupFreeze)
|
||||
parser.add_subcommand('replication', clicmd.UpdateReplication)
|
||||
|
||||
@@ -354,4 +294,6 @@ def nominatim(**kwargs):
|
||||
else:
|
||||
parser.parser.epilog = 'php-cgi not found. Query commands not available.'
|
||||
|
||||
parser.add_subcommand('transition', clicmd.AdminTransition)
|
||||
|
||||
return parser.run(**kwargs)
|
||||
|
||||
@@ -2,9 +2,11 @@
|
||||
Subcommand definitions for the command-line tool.
|
||||
"""
|
||||
|
||||
from .setup import SetupAll
|
||||
from .replication import UpdateReplication
|
||||
from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
|
||||
from .index import UpdateIndex
|
||||
from .refresh import UpdateRefresh
|
||||
from .admin import AdminFuncs
|
||||
from .freeze import SetupFreeze
|
||||
from .transition import AdminTransition
|
||||
|
||||
@@ -54,9 +54,8 @@ class AdminFuncs:
|
||||
if args.analyse_indexing:
|
||||
LOG.warning('Analysing performance of indexing function')
|
||||
from ..tools import admin
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
27
nominatim/clicmd/args.py
Normal file
27
nominatim/clicmd/args.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""
|
||||
Provides custom functions over command-line arguments.
|
||||
"""
|
||||
|
||||
|
||||
class NominatimArgs: # pylint: disable=too-few-public-methods
|
||||
""" Customized namespace class for the nominatim command line tool
|
||||
to receive the command-line arguments.
|
||||
"""
|
||||
|
||||
def osm2pgsql_options(self, default_cache, default_threads):
|
||||
""" Return the standard osm2pgsql options that can be derived
|
||||
from the command line arguments. The resulting dict can be
|
||||
further customized and then used in `run_osm2pgsql()`.
|
||||
"""
|
||||
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
|
||||
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
|
||||
osm2pgsql_style=self.config.get_import_style_file(),
|
||||
threads=self.threads or default_threads,
|
||||
dsn=self.config.get_libpq_dsn(),
|
||||
flatnode_file=self.config.FLATNODE_FILE,
|
||||
tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
|
||||
slim_index=self.config.TABLESPACE_OSM_INDEX,
|
||||
main_data=self.config.TABLESPACE_PLACE_DATA,
|
||||
main_index=self.config.TABLESPACE_PLACE_INDEX
|
||||
)
|
||||
)
|
||||
@@ -29,9 +29,8 @@ class SetupFreeze:
|
||||
def run(args):
|
||||
from ..tools import freeze
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
freeze.drop_update_tables(conn)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
freeze.drop_update_tables(conn)
|
||||
freeze.drop_flatnode_file(args.config.FLATNODE_FILE)
|
||||
conn.close()
|
||||
|
||||
return 0
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
Implementation of the 'index' subcommand.
|
||||
"""
|
||||
import os
|
||||
import psutil
|
||||
|
||||
from ..db import status
|
||||
from ..db.connection import connect
|
||||
@@ -11,14 +11,6 @@ from ..db.connection import connect
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
def _num_system_cpus():
|
||||
try:
|
||||
cpus = len(os.sched_getaffinity(0))
|
||||
except NotImplementedError:
|
||||
cpus = None
|
||||
|
||||
return cpus or os.cpu_count()
|
||||
|
||||
|
||||
class UpdateIndex:
|
||||
"""\
|
||||
@@ -42,7 +34,7 @@ class UpdateIndex:
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or _num_system_cpus() or 1)
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
if not args.no_boundaries:
|
||||
indexer.index_boundaries(args.minrank, args.maxrank)
|
||||
@@ -51,8 +43,7 @@ class UpdateIndex:
|
||||
|
||||
if not args.no_boundaries and not args.boundaries_only \
|
||||
and args.minrank == 0 and args.maxrank == 30:
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
status.set_indexed(conn, True)
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
status.set_indexed(conn, True)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -5,7 +5,6 @@ import logging
|
||||
from pathlib import Path
|
||||
|
||||
from ..db.connection import connect
|
||||
from ..tools.exec_utils import run_legacy_script
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
@@ -50,37 +49,39 @@ class UpdateRefresh:
|
||||
|
||||
if args.postcodes:
|
||||
LOG.warning("Update postcodes centroid")
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.update_postcodes(conn, args.sqllib_dir)
|
||||
conn.close()
|
||||
refresh.update_postcodes(args.config.get_libpq_dsn(), args.sqllib_dir)
|
||||
|
||||
if args.word_counts:
|
||||
LOG.warning('Recompute frequency of full-word search terms')
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.recompute_word_counts(conn, args.sqllib_dir)
|
||||
conn.close()
|
||||
refresh.recompute_word_counts(args.config.get_libpq_dsn(), args.sqllib_dir)
|
||||
|
||||
if args.address_levels:
|
||||
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
|
||||
LOG.warning('Updating address levels from %s', cfg)
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.load_address_levels_from_file(conn, cfg)
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.load_address_levels_from_file(conn, cfg)
|
||||
|
||||
if args.functions:
|
||||
LOG.warning('Create functions')
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
|
||||
if args.wiki_data:
|
||||
run_legacy_script('setup.php', '--import-wikipedia-articles',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
|
||||
or args.project_dir)
|
||||
LOG.warning('Import wikipdia article importance from %s', data_path)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.fatal('FATAL: Wikipedia importance dump file not found')
|
||||
return 1
|
||||
|
||||
# Attention: importance MUST come after wiki data import.
|
||||
if args.importance:
|
||||
run_legacy_script('update.php', '--recompute-importance',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
LOG.warning('Update importance values for database')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.recompute_importance(conn)
|
||||
|
||||
if args.website:
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setting up website directory at %s', webdir)
|
||||
|
||||
@@ -17,17 +17,6 @@ LOG = logging.getLogger()
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
def _osm2pgsql_options_from_args(args, default_cache, default_threads):
|
||||
""" Set up the standard osm2pgsql from the command line arguments.
|
||||
"""
|
||||
return dict(osm2pgsql=args.osm2pgsql_path,
|
||||
osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
|
||||
osm2pgsql_style=args.config.get_import_style_file(),
|
||||
threads=args.threads or default_threads,
|
||||
dsn=args.config.get_libpq_dsn(),
|
||||
flatnode_file=args.config.FLATNODE_FILE)
|
||||
|
||||
|
||||
class UpdateReplication:
|
||||
"""\
|
||||
Update the database using an online replication service.
|
||||
@@ -62,13 +51,12 @@ class UpdateReplication:
|
||||
from ..tools import replication, refresh
|
||||
|
||||
LOG.warning("Initialising replication updates")
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
|
||||
if args.update_functions:
|
||||
LOG.warning("Create functions")
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
True, False)
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
|
||||
if args.update_functions:
|
||||
LOG.warning("Create functions")
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
True, False)
|
||||
return 0
|
||||
|
||||
|
||||
@@ -76,10 +64,8 @@ class UpdateReplication:
|
||||
def _check_for_updates(args):
|
||||
from ..tools import replication
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
|
||||
conn.close()
|
||||
return ret
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
|
||||
|
||||
@staticmethod
|
||||
def _report_update(batchdate, start_import, start_index):
|
||||
@@ -99,7 +85,7 @@ class UpdateReplication:
|
||||
from ..tools import replication
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
params = _osm2pgsql_options_from_args(args, 2000, 1)
|
||||
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
|
||||
params.update(base_url=args.config.REPLICATION_URL,
|
||||
update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
|
||||
import_file=args.project_dir / 'osmosischange.osc',
|
||||
@@ -122,13 +108,12 @@ class UpdateReplication:
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
|
||||
while True:
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
state = replication.update(conn, params)
|
||||
if state is not replication.UpdateState.NO_CHANGES:
|
||||
status.log_status(conn, start, 'import')
|
||||
batchdate, _, _ = status.get_status(conn)
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
state = replication.update(conn, params)
|
||||
if state is not replication.UpdateState.NO_CHANGES:
|
||||
status.log_status(conn, start, 'import')
|
||||
batchdate, _, _ = status.get_status(conn)
|
||||
|
||||
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
||||
index_start = dt.datetime.now(dt.timezone.utc)
|
||||
@@ -137,10 +122,9 @@ class UpdateReplication:
|
||||
indexer.index_boundaries(0, 30)
|
||||
indexer.index_by_rank(0, 30)
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
status.set_indexed(conn, True)
|
||||
status.log_status(conn, index_start, 'index')
|
||||
conn.close()
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
status.set_indexed(conn, True)
|
||||
status.log_status(conn, index_start, 'index')
|
||||
else:
|
||||
index_start = None
|
||||
|
||||
|
||||
141
nominatim/clicmd/setup.py
Normal file
141
nominatim/clicmd/setup.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Implementation of the 'import' subcommand.
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from ..tools.exec_utils import run_legacy_script
|
||||
from ..db.connection import connect
|
||||
from ..db import status
|
||||
from ..errors import UsageError
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class SetupAll:
|
||||
"""\
|
||||
Create a new Nominatim database from an OSM file.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Required arguments')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--osm-file', metavar='FILE',
|
||||
help='OSM file to be imported.')
|
||||
group.add_argument('--continue', dest='continue_at',
|
||||
choices=['load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted')
|
||||
group = parser.add_argument_group('Optional arguments')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--reverse-only', action='store_true',
|
||||
help='Do not create tables and indexes for searching')
|
||||
group.add_argument('--no-partitions', action='store_true',
|
||||
help="""Do not partition search indices
|
||||
(speeds up import of single country extracts)""")
|
||||
group.add_argument('--no-updates', action='store_true',
|
||||
help="""Do not keep tables that are only needed for
|
||||
updating the database later""")
|
||||
group = parser.add_argument_group('Expert options')
|
||||
group.add_argument('--ignore-errors', action='store_true',
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index')
|
||||
|
||||
|
||||
@staticmethod
|
||||
def run(args): # pylint: disable=too-many-statements
|
||||
from ..tools import database_import
|
||||
from ..tools import refresh
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
if args.osm_file and not Path(args.osm_file).is_file():
|
||||
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
|
||||
raise UsageError('Cannot access file.')
|
||||
|
||||
if args.continue_at is None:
|
||||
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
|
||||
args.data_dir,
|
||||
args.no_partitions,
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
|
||||
LOG.warning('Installing database module')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.install_module(args.module_dir, args.project_dir,
|
||||
args.config.DATABASE_MODULE_PATH,
|
||||
conn=conn)
|
||||
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(Path(args.osm_file),
|
||||
args.osm2pgsql_options(0, 1),
|
||||
drop=args.no_updates,
|
||||
ignore_errors=args.ignore_errors)
|
||||
|
||||
LOG.warning('Create functions (1st pass)')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
False, False)
|
||||
|
||||
LOG.warning('Create tables')
|
||||
params = ['setup.php', '--create-tables', '--create-partition-tables']
|
||||
if args.reverse_only:
|
||||
params.append('--reverse-only')
|
||||
run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
LOG.warning('Create functions (2nd pass)')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config, args.sqllib_dir,
|
||||
False, False)
|
||||
|
||||
LOG.warning('Importing wikipedia importance data')
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.error('Wikipedia importance dump file not found. '
|
||||
'Will be using default importances.')
|
||||
|
||||
LOG.warning('Initialise tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
|
||||
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
LOG.warning('Load data into placex table')
|
||||
database_import.load_data(args.config.get_libpq_dsn(),
|
||||
args.data_dir,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
LOG.warning('Calculate postcodes')
|
||||
run_legacy_script('setup.php', '--calculate-postcodes', nominatim_env=args)
|
||||
|
||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
|
||||
LOG.warning('Post-process tables')
|
||||
params = ['setup.php', '--create-search-indices', '--create-country-names']
|
||||
if args.no_updates:
|
||||
params.append('--drop')
|
||||
run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setup website at %s', webdir)
|
||||
refresh.setup_website(webdir, args.phplib_dir, args.config)
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
try:
|
||||
dbdate = status.compute_database_date(conn)
|
||||
status.set_status(conn, dbdate)
|
||||
LOG.info('Database is at %s.', dbdate)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
return 0
|
||||
101
nominatim/clicmd/transition.py
Normal file
101
nominatim/clicmd/transition.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Implementation of the 'transition' subcommand.
|
||||
|
||||
This subcommand provides standins for functions that were available
|
||||
through the PHP scripts but are now no longer directly accessible.
|
||||
This module will be removed as soon as the transition phase is over.
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from ..db.connection import connect
|
||||
from ..db import status
|
||||
from ..errors import UsageError
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class AdminTransition:
|
||||
"""\
|
||||
Internal functions for code transition. Do not use.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group = parser.add_argument_group('Sub-functions')
|
||||
group.add_argument('--create-db', action='store_true',
|
||||
help='Create nominatim db')
|
||||
group.add_argument('--setup-db', action='store_true',
|
||||
help='Build a blank nominatim db')
|
||||
group.add_argument('--import-data', action='store_true',
|
||||
help='Import a osm file')
|
||||
group.add_argument('--load-data', action='store_true',
|
||||
help='Copy data to live tables from import table')
|
||||
group.add_argument('--index', action='store_true',
|
||||
help='Index the data')
|
||||
group = parser.add_argument_group('Options')
|
||||
group.add_argument('--no-partitions', action='store_true',
|
||||
help='Do not partition search indices')
|
||||
group.add_argument('--osm-file', metavar='FILE',
|
||||
help='File to import')
|
||||
group.add_argument('--drop', action='store_true',
|
||||
help='Drop tables needed for updates, making the database readonly')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--no-analyse', action='store_true',
|
||||
help='Do not perform analyse operations during index')
|
||||
group.add_argument('--ignore-errors', action='store_true',
|
||||
help="Ignore certain erros on import.")
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from ..tools import database_import
|
||||
|
||||
if args.create_db:
|
||||
LOG.warning('Create DB')
|
||||
database_import.create_db(args.config.get_libpq_dsn())
|
||||
|
||||
if args.setup_db:
|
||||
LOG.warning('Setup DB')
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.setup_extensions(conn)
|
||||
database_import.install_module(args.module_dir, args.project_dir,
|
||||
args.config.DATABASE_MODULE_PATH,
|
||||
conn=conn)
|
||||
|
||||
database_import.import_base_data(args.config.get_libpq_dsn(),
|
||||
args.data_dir, args.no_partitions)
|
||||
|
||||
if args.import_data:
|
||||
LOG.warning('Import data')
|
||||
if not args.osm_file:
|
||||
raise UsageError('Missing required --osm-file argument')
|
||||
database_import.import_osm_data(Path(args.osm_file),
|
||||
args.osm2pgsql_options(0, 1),
|
||||
drop=args.drop,
|
||||
ignore_errors=args.ignore_errors)
|
||||
|
||||
if args.load_data:
|
||||
LOG.warning('Load data')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
|
||||
database_import.load_data(args.config.get_libpq_dsn(),
|
||||
args.data_dir,
|
||||
args.threads or 1)
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
try:
|
||||
status.set_status(conn, status.compute_database_date(conn))
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
if args.index:
|
||||
LOG.warning('Indexing')
|
||||
from ..indexer.indexer import Indexer
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1)
|
||||
indexer.index_full()
|
||||
@@ -1,7 +1,9 @@
|
||||
"""
|
||||
Specialised connection and cursor functions.
|
||||
"""
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extensions
|
||||
@@ -9,6 +11,8 @@ import psycopg2.extras
|
||||
|
||||
from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class _Cursor(psycopg2.extras.DictCursor):
|
||||
""" A cursor returning dict-like objects and providing specialised
|
||||
execution functions.
|
||||
@@ -17,8 +21,7 @@ class _Cursor(psycopg2.extras.DictCursor):
|
||||
def execute(self, query, args=None): # pylint: disable=W0221
|
||||
""" Query execution that logs the SQL query when debugging is enabled.
|
||||
"""
|
||||
logger = logging.getLogger()
|
||||
logger.debug(self.mogrify(query, args).decode('utf-8'))
|
||||
LOG.debug(self.mogrify(query, args).decode('utf-8'))
|
||||
|
||||
super().execute(query, args)
|
||||
|
||||
@@ -72,21 +75,98 @@ class _Connection(psycopg2.extensions.connection):
|
||||
return True
|
||||
|
||||
|
||||
def drop_table(self, name, if_exists=True):
|
||||
""" Drop the table with the given name.
|
||||
Set `if_exists` to False if a non-existant table should raise
|
||||
an exception instead of just being ignored.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
cur.execute("""DROP TABLE {} "{}"
|
||||
""".format('IF EXISTS' if if_exists else '', name))
|
||||
self.commit()
|
||||
|
||||
|
||||
def server_version_tuple(self):
|
||||
""" Return the server version as a tuple of (major, minor).
|
||||
Converts correctly for pre-10 and post-10 PostgreSQL versions.
|
||||
"""
|
||||
version = self.server_version
|
||||
if version < 100000:
|
||||
return (version / 10000, (version % 10000) / 100)
|
||||
return (int(version / 10000), (version % 10000) / 100)
|
||||
|
||||
return (int(version / 10000), version % 10000)
|
||||
|
||||
|
||||
def postgis_version_tuple(self):
|
||||
""" Return the postgis version installed in the database as a
|
||||
tuple of (major, minor). Assumes that the PostGIS extension
|
||||
has been installed already.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
version = cur.scalar('SELECT postgis_lib_version()')
|
||||
|
||||
return tuple((int(x) for x in version.split('.')[:2]))
|
||||
|
||||
return (version / 10000, version % 10000)
|
||||
|
||||
def connect(dsn):
|
||||
""" Open a connection to the database using the specialised connection
|
||||
factory.
|
||||
factory. The returned object may be used in conjunction with 'with'.
|
||||
When used outside a context manager, use the `connection` attribute
|
||||
to get the connection.
|
||||
"""
|
||||
try:
|
||||
return psycopg2.connect(dsn, connection_factory=_Connection)
|
||||
conn = psycopg2.connect(dsn, connection_factory=_Connection)
|
||||
ctxmgr = contextlib.closing(conn)
|
||||
ctxmgr.connection = conn
|
||||
return ctxmgr
|
||||
except psycopg2.OperationalError as err:
|
||||
raise UsageError("Cannot connect to database: {}".format(err)) from err
|
||||
|
||||
|
||||
# Translation from PG connection string parameters to PG environment variables.
|
||||
# Derived from https://www.postgresql.org/docs/current/libpq-envars.html.
|
||||
_PG_CONNECTION_STRINGS = {
|
||||
'host': 'PGHOST',
|
||||
'hostaddr': 'PGHOSTADDR',
|
||||
'port': 'PGPORT',
|
||||
'dbname': 'PGDATABASE',
|
||||
'user': 'PGUSER',
|
||||
'password': 'PGPASSWORD',
|
||||
'passfile': 'PGPASSFILE',
|
||||
'channel_binding': 'PGCHANNELBINDING',
|
||||
'service': 'PGSERVICE',
|
||||
'options': 'PGOPTIONS',
|
||||
'application_name': 'PGAPPNAME',
|
||||
'sslmode': 'PGSSLMODE',
|
||||
'requiressl': 'PGREQUIRESSL',
|
||||
'sslcompression': 'PGSSLCOMPRESSION',
|
||||
'sslcert': 'PGSSLCERT',
|
||||
'sslkey': 'PGSSLKEY',
|
||||
'sslrootcert': 'PGSSLROOTCERT',
|
||||
'sslcrl': 'PGSSLCRL',
|
||||
'requirepeer': 'PGREQUIREPEER',
|
||||
'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION',
|
||||
'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION',
|
||||
'gssencmode': 'PGGSSENCMODE',
|
||||
'krbsrvname': 'PGKRBSRVNAME',
|
||||
'gsslib': 'PGGSSLIB',
|
||||
'connect_timeout': 'PGCONNECT_TIMEOUT',
|
||||
'target_session_attrs': 'PGTARGETSESSIONATTRS',
|
||||
}
|
||||
|
||||
|
||||
def get_pg_env(dsn, base_env=None):
|
||||
""" Return a copy of `base_env` with the environment variables for
|
||||
PostgresSQL set up from the given database connection string.
|
||||
If `base_env` is None, then the OS environment is used as a base
|
||||
environment.
|
||||
"""
|
||||
env = dict(base_env if base_env is not None else os.environ)
|
||||
|
||||
for param, value in psycopg2.extensions.parse_dsn(dsn).items():
|
||||
if param in _PG_CONNECTION_STRINGS:
|
||||
env[_PG_CONNECTION_STRINGS[param]] = value
|
||||
else:
|
||||
LOG.error("Unknown connection parameter '%s' ignored.", param)
|
||||
|
||||
return env
|
||||
|
||||
@@ -1,12 +1,59 @@
|
||||
"""
|
||||
Helper functions for handling DB accesses.
|
||||
"""
|
||||
import subprocess
|
||||
import logging
|
||||
import gzip
|
||||
|
||||
def execute_file(conn, fname):
|
||||
""" Read an SQL file and run its contents against the given connection.
|
||||
from .connection import get_pg_env
|
||||
from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _pipe_to_proc(proc, fdesc):
|
||||
chunk = fdesc.read(2048)
|
||||
while chunk and proc.poll() is None:
|
||||
try:
|
||||
proc.stdin.write(chunk)
|
||||
except BrokenPipeError as exc:
|
||||
raise UsageError("Failed to execute SQL file.") from exc
|
||||
chunk = fdesc.read(2048)
|
||||
|
||||
return len(chunk)
|
||||
|
||||
def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None):
|
||||
""" Read an SQL file and run its contents against the given database
|
||||
using psql. Use `pre_code` and `post_code` to run extra commands
|
||||
before or after executing the file. The commands are run within the
|
||||
same session, so they may be used to wrap the file execution in a
|
||||
transaction.
|
||||
"""
|
||||
with fname.open('r') as fdesc:
|
||||
sql = fdesc.read()
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
cmd = ['psql']
|
||||
if not ignore_errors:
|
||||
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
cmd.append('--quiet')
|
||||
proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
|
||||
|
||||
try:
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
||||
|
||||
if pre_code:
|
||||
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
||||
|
||||
if fname.suffix == '.gz':
|
||||
with gzip.open(str(fname), 'rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
else:
|
||||
with fname.open('rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
|
||||
if remain == 0 and post_code:
|
||||
proc.stdin.write((';' + post_code).encode('utf-8'))
|
||||
finally:
|
||||
proc.stdin.close()
|
||||
ret = proc.wait()
|
||||
|
||||
if ret != 0 or remain > 0:
|
||||
raise UsageError("Failed to execute SQL file.")
|
||||
|
||||
@@ -61,8 +61,8 @@ class InterpolationRunner:
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return """UPDATE location_property_osmline
|
||||
SET indexed_status = 0 WHERE place_id IN ({})"""\
|
||||
.format(','.join((str(i) for i in ids)))
|
||||
SET indexed_status = 0 WHERE place_id IN ({})
|
||||
""".format(','.join((str(i) for i in ids)))
|
||||
|
||||
class BoundaryRunner:
|
||||
""" Returns SQL commands for indexing the administrative boundaries
|
||||
@@ -79,57 +79,171 @@ class BoundaryRunner:
|
||||
return """SELECT count(*) FROM placex
|
||||
WHERE indexed_status > 0
|
||||
AND rank_search = {}
|
||||
AND class = 'boundary' and type = 'administrative'""".format(self.rank)
|
||||
AND class = 'boundary' and type = 'administrative'
|
||||
""".format(self.rank)
|
||||
|
||||
def sql_get_objects(self):
|
||||
return """SELECT place_id FROM placex
|
||||
WHERE indexed_status > 0 and rank_search = {}
|
||||
and class = 'boundary' and type = 'administrative'
|
||||
ORDER BY partition, admin_level""".format(self.rank)
|
||||
ORDER BY partition, admin_level
|
||||
""".format(self.rank)
|
||||
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
|
||||
.format(','.join((str(i) for i in ids)))
|
||||
|
||||
|
||||
class PostcodeRunner:
|
||||
""" Provides the SQL commands for indexing the location_postcode table.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def name():
|
||||
return "postcodes (location_postcode)"
|
||||
|
||||
@staticmethod
|
||||
def sql_count_objects():
|
||||
return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
|
||||
|
||||
@staticmethod
|
||||
def sql_get_objects():
|
||||
return """SELECT place_id FROM location_postcode
|
||||
WHERE indexed_status > 0
|
||||
ORDER BY country_code, postcode"""
|
||||
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return """UPDATE location_postcode SET indexed_status = 0
|
||||
WHERE place_id IN ({})
|
||||
""".format(','.join((str(i) for i in ids)))
|
||||
|
||||
|
||||
def _analyse_db_if(conn, condition):
|
||||
if condition:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('ANALYSE')
|
||||
|
||||
|
||||
class Indexer:
|
||||
""" Main indexing routine.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn, num_threads):
|
||||
self.conn = psycopg2.connect(dsn)
|
||||
self.threads = [DBConnection(dsn) for _ in range(num_threads)]
|
||||
self.dsn = dsn
|
||||
self.num_threads = num_threads
|
||||
self.conn = None
|
||||
self.threads = []
|
||||
|
||||
|
||||
def _setup_connections(self):
|
||||
self.conn = psycopg2.connect(self.dsn)
|
||||
self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)]
|
||||
|
||||
|
||||
def _close_connections(self):
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
for thread in self.threads:
|
||||
thread.close()
|
||||
self.threads = []
|
||||
|
||||
|
||||
def index_full(self, analyse=True):
|
||||
""" Index the complete database. This will first index boudnaries
|
||||
followed by all other objects. When `analyse` is True, then the
|
||||
database will be analysed at the appropriate places to
|
||||
ensure that database statistics are updated.
|
||||
"""
|
||||
conn = psycopg2.connect(self.dsn)
|
||||
|
||||
try:
|
||||
self.index_by_rank(0, 4)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_boundaries(0, 30)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_by_rank(5, 25)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_by_rank(26, 30)
|
||||
_analyse_db_if(conn, analyse)
|
||||
|
||||
self.index_postcodes()
|
||||
_analyse_db_if(conn, analyse)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def index_boundaries(self, minrank, maxrank):
|
||||
""" Index only administrative boundaries within the given rank range.
|
||||
"""
|
||||
LOG.warning("Starting indexing boundaries using %s threads",
|
||||
len(self.threads))
|
||||
self.num_threads)
|
||||
|
||||
for rank in range(max(minrank, 4), min(maxrank, 26)):
|
||||
self.index(BoundaryRunner(rank))
|
||||
self._setup_connections()
|
||||
|
||||
try:
|
||||
for rank in range(max(minrank, 4), min(maxrank, 26)):
|
||||
self.index(BoundaryRunner(rank))
|
||||
finally:
|
||||
self._close_connections()
|
||||
|
||||
def index_by_rank(self, minrank, maxrank):
|
||||
""" Run classic indexing by rank.
|
||||
""" Index all entries of placex in the given rank range (inclusive)
|
||||
in order of their address rank.
|
||||
|
||||
When rank 30 is requested then also interpolations and
|
||||
places with address rank 0 will be indexed.
|
||||
"""
|
||||
maxrank = min(maxrank, 30)
|
||||
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
|
||||
minrank, maxrank, len(self.threads))
|
||||
minrank, maxrank, self.num_threads)
|
||||
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self.index(RankRunner(rank))
|
||||
self._setup_connections()
|
||||
|
||||
if maxrank == 30:
|
||||
self.index(RankRunner(0))
|
||||
self.index(InterpolationRunner(), 20)
|
||||
self.index(RankRunner(30), 20)
|
||||
else:
|
||||
self.index(RankRunner(maxrank))
|
||||
try:
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self.index(RankRunner(rank))
|
||||
|
||||
if maxrank == 30:
|
||||
self.index(RankRunner(0))
|
||||
self.index(InterpolationRunner(), 20)
|
||||
self.index(RankRunner(30), 20)
|
||||
else:
|
||||
self.index(RankRunner(maxrank))
|
||||
finally:
|
||||
self._close_connections()
|
||||
|
||||
|
||||
def index_postcodes(self):
|
||||
"""Index the entries ofthe location_postcode table.
|
||||
"""
|
||||
LOG.warning("Starting indexing postcodes using %s threads", self.num_threads)
|
||||
|
||||
self._setup_connections()
|
||||
|
||||
try:
|
||||
self.index(PostcodeRunner(), 20)
|
||||
finally:
|
||||
self._close_connections()
|
||||
|
||||
def update_status_table(self):
|
||||
""" Update the status in the status table to 'indexed'.
|
||||
"""
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute('UPDATE import_status SET indexed = true')
|
||||
self.conn.commit()
|
||||
conn = psycopg2.connect(self.dsn)
|
||||
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('UPDATE import_status SET indexed = true')
|
||||
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def index(self, obj, batch=1):
|
||||
""" Index a single rank or table. `obj` describes the SQL to use
|
||||
|
||||
@@ -60,7 +60,7 @@ def check_database(config):
|
||||
""" Run a number of checks on the database and return the status.
|
||||
"""
|
||||
try:
|
||||
conn = connect(config.get_libpq_dsn())
|
||||
conn = connect(config.get_libpq_dsn()).connection
|
||||
except UsageError as err:
|
||||
conn = _BadConnection(str(err))
|
||||
|
||||
|
||||
260
nominatim/tools/database_import.py
Normal file
260
nominatim/tools/database_import.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
Functions for setting up and importing a new Nominatim database.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import selectors
|
||||
import subprocess
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import psycopg2
|
||||
|
||||
from ..db.connection import connect, get_pg_env
|
||||
from ..db import utils as db_utils
|
||||
from ..db.async_connection import DBConnection
|
||||
from .exec_utils import run_osm2pgsql
|
||||
from ..errors import UsageError
|
||||
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None):
|
||||
""" Create a new database for Nominatim and populate it with the
|
||||
essential extensions and data.
|
||||
"""
|
||||
LOG.warning('Creating database')
|
||||
create_db(dsn, rouser)
|
||||
|
||||
LOG.warning('Setting up database')
|
||||
with connect(dsn) as conn:
|
||||
setup_extensions(conn)
|
||||
|
||||
LOG.warning('Loading basic data')
|
||||
import_base_data(dsn, data_dir, no_partitions)
|
||||
|
||||
|
||||
def create_db(dsn, rouser=None):
|
||||
""" Create a new database for the given DSN. Fails when the database
|
||||
already exists or the PostgreSQL version is too old.
|
||||
Uses `createdb` to create the database.
|
||||
|
||||
If 'rouser' is given, then the function also checks that the user
|
||||
with that given name exists.
|
||||
|
||||
Requires superuser rights by the caller.
|
||||
"""
|
||||
proc = subprocess.run(['createdb'], env=get_pg_env(dsn), check=False)
|
||||
|
||||
if proc.returncode != 0:
|
||||
raise UsageError('Creating new database failed.')
|
||||
|
||||
with connect(dsn) as conn:
|
||||
postgres_version = conn.server_version_tuple()
|
||||
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
|
||||
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
|
||||
'Found version %d.%d.',
|
||||
POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1],
|
||||
postgres_version[0], postgres_version[1])
|
||||
raise UsageError('PostgreSQL server is too old.')
|
||||
|
||||
if rouser is not None:
|
||||
with conn.cursor() as cur:
|
||||
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
|
||||
(rouser, ))
|
||||
if cnt == 0:
|
||||
LOG.fatal("Web user '%s' does not exists. Create it with:\n"
|
||||
"\n createuser %s", rouser, rouser)
|
||||
raise UsageError('Missing read-only user.')
|
||||
|
||||
|
||||
|
||||
def setup_extensions(conn):
|
||||
""" Set up all extensions needed for Nominatim. Also checks that the
|
||||
versions of the extensions are sufficient.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
|
||||
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
|
||||
conn.commit()
|
||||
|
||||
postgis_version = conn.postgis_version_tuple()
|
||||
if postgis_version < POSTGIS_REQUIRED_VERSION:
|
||||
LOG.fatal('Minimum supported version of PostGIS is %d.%d. '
|
||||
'Found version %d.%d.',
|
||||
POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1],
|
||||
postgis_version[0], postgis_version[1])
|
||||
raise UsageError('PostGIS version is too old.')
|
||||
|
||||
|
||||
def install_module(src_dir, project_dir, module_dir, conn=None):
|
||||
""" Copy the normalization module from src_dir into the project
|
||||
directory under the '/module' directory. If 'module_dir' is set, then
|
||||
use the module from there instead and check that it is accessible
|
||||
for Postgresql.
|
||||
|
||||
The function detects when the installation is run from the
|
||||
build directory. It doesn't touch the module in that case.
|
||||
|
||||
If 'conn' is given, then the function also tests if the module
|
||||
can be access via the given database.
|
||||
"""
|
||||
if not module_dir:
|
||||
module_dir = project_dir / 'module'
|
||||
|
||||
if not module_dir.exists() or not src_dir.samefile(module_dir):
|
||||
|
||||
if not module_dir.exists():
|
||||
module_dir.mkdir()
|
||||
|
||||
destfile = module_dir / 'nominatim.so'
|
||||
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
|
||||
destfile.chmod(0o755)
|
||||
|
||||
LOG.info('Database module installed at %s', str(destfile))
|
||||
else:
|
||||
LOG.info('Running from build directory. Leaving database module as is.')
|
||||
else:
|
||||
LOG.info("Using custom path for database module at '%s'", module_dir)
|
||||
|
||||
if conn is not None:
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
DROP FUNCTION nominatim_test_import_func(text)
|
||||
""".format(module_dir))
|
||||
except psycopg2.DatabaseError as err:
|
||||
LOG.fatal("Error accessing database module: %s", err)
|
||||
raise UsageError("Database module cannot be accessed.") from err
|
||||
|
||||
|
||||
def import_base_data(dsn, sql_dir, ignore_partitions=False):
|
||||
""" Create and populate the tables with basic static data that provides
|
||||
the background for geocoding. Data is assumed to not yet exist.
|
||||
"""
|
||||
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
|
||||
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
|
||||
|
||||
if ignore_partitions:
|
||||
with connect(dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('UPDATE country_name SET partition = 0')
|
||||
conn.commit()
|
||||
|
||||
|
||||
def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
|
||||
""" Import the given OSM file. 'options' contains the list of
|
||||
default settings for osm2pgsql.
|
||||
"""
|
||||
options['import_file'] = osm_file
|
||||
options['append'] = False
|
||||
options['threads'] = 1
|
||||
|
||||
if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
|
||||
# Make some educated guesses about cache size based on the size
|
||||
# of the import file and the available memory.
|
||||
mem = psutil.virtual_memory()
|
||||
fsize = os.stat(str(osm_file)).st_size
|
||||
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
|
||||
fsize * 2) / 1024 / 1024) + 1
|
||||
|
||||
run_osm2pgsql(options)
|
||||
|
||||
with connect(options['dsn']) as conn:
|
||||
if not ignore_errors:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT * FROM place LIMIT 1')
|
||||
if cur.rowcount == 0:
|
||||
raise UsageError('No data imported by osm2pgsql.')
|
||||
|
||||
if drop:
|
||||
conn.drop_table('planet_osm_nodes')
|
||||
|
||||
if drop:
|
||||
if options['flatnode_file']:
|
||||
Path(options['flatnode_file']).unlink()
|
||||
|
||||
|
||||
def truncate_data_tables(conn, max_word_frequency=None):
|
||||
""" Truncate all data tables to prepare for a fresh load.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('TRUNCATE word')
|
||||
cur.execute('TRUNCATE placex')
|
||||
cur.execute('TRUNCATE place_addressline')
|
||||
cur.execute('TRUNCATE location_area')
|
||||
cur.execute('TRUNCATE location_area_country')
|
||||
cur.execute('TRUNCATE location_property')
|
||||
cur.execute('TRUNCATE location_property_tiger')
|
||||
cur.execute('TRUNCATE location_property_osmline')
|
||||
cur.execute('TRUNCATE location_postcode')
|
||||
if conn.table_exists('search_name'):
|
||||
cur.execute('TRUNCATE search_name')
|
||||
cur.execute('DROP SEQUENCE IF EXISTS seq_place')
|
||||
cur.execute('CREATE SEQUENCE seq_place start 100000')
|
||||
|
||||
cur.execute("""SELECT tablename FROM pg_tables
|
||||
WHERE tablename LIKE 'location_road_%'""")
|
||||
|
||||
for table in [r[0] for r in list(cur)]:
|
||||
cur.execute('TRUNCATE ' + table)
|
||||
|
||||
if max_word_frequency is not None:
|
||||
# Used by getorcreate_word_id to ignore frequent partial words.
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
|
||||
RETURNS integer AS $$
|
||||
SELECT {} as maxwordfreq;
|
||||
$$ LANGUAGE SQL IMMUTABLE
|
||||
""".format(max_word_frequency))
|
||||
conn.commit()
|
||||
|
||||
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
|
||||
|
||||
def load_data(dsn, data_dir, threads):
|
||||
""" Copy data into the word and placex table.
|
||||
"""
|
||||
# Pre-calculate the most important terms in the word list.
|
||||
db_utils.execute_file(dsn, data_dir / 'words.sql')
|
||||
|
||||
sel = selectors.DefaultSelector()
|
||||
# Then copy data from place to placex in <threads - 1> chunks.
|
||||
place_threads = max(1, threads - 1)
|
||||
for imod in range(place_threads):
|
||||
conn = DBConnection(dsn)
|
||||
conn.connect()
|
||||
conn.perform("""INSERT INTO placex ({0})
|
||||
SELECT {0} FROM place
|
||||
WHERE osm_id % {1} = {2}
|
||||
AND NOT (class='place' and type='houses')
|
||||
AND ST_IsValid(geometry)
|
||||
""".format(_COPY_COLUMNS, place_threads, imod))
|
||||
sel.register(conn, selectors.EVENT_READ, conn)
|
||||
|
||||
# Address interpolations go into another table.
|
||||
conn = DBConnection(dsn)
|
||||
conn.connect()
|
||||
conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
|
||||
SELECT osm_id, address, geometry FROM place
|
||||
WHERE class='place' and type='houses' and osm_type='W'
|
||||
and ST_GeometryType(geometry) = 'ST_LineString'
|
||||
""")
|
||||
sel.register(conn, selectors.EVENT_READ, conn)
|
||||
|
||||
# Now wait for all of them to finish.
|
||||
todo = place_threads + 1
|
||||
while todo > 0:
|
||||
for key, _ in sel.select(1):
|
||||
conn = key.data
|
||||
sel.unregister(conn)
|
||||
conn.wait()
|
||||
conn.close()
|
||||
todo -= 1
|
||||
print('.', end='', flush=True)
|
||||
print('\n')
|
||||
|
||||
with connect(dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('ANALYSE')
|
||||
@@ -2,14 +2,12 @@
|
||||
Helper functions for executing external programs.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import urllib.request as urlrequest
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from psycopg2.extensions import parse_dsn
|
||||
|
||||
from ..version import NOMINATIM_VERSION
|
||||
from ..db.connection import get_pg_env
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -100,7 +98,7 @@ def run_php_server(server_address, base_dir):
|
||||
def run_osm2pgsql(options):
|
||||
""" Run osm2pgsql with the given options.
|
||||
"""
|
||||
env = os.environ
|
||||
env = get_pg_env(options['dsn'])
|
||||
cmd = [options['osm2pgsql'],
|
||||
'--hstore', '--latlon', '--slim',
|
||||
'--with-forward-dependencies', 'false',
|
||||
@@ -112,20 +110,18 @@ def run_osm2pgsql(options):
|
||||
]
|
||||
if options['append']:
|
||||
cmd.append('--append')
|
||||
else:
|
||||
cmd.append('--create')
|
||||
|
||||
if options['flatnode_file']:
|
||||
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
||||
|
||||
dsn = parse_dsn(options['dsn'])
|
||||
if 'password' in dsn:
|
||||
env['PGPASSWORD'] = dsn['password']
|
||||
if 'dbname' in dsn:
|
||||
cmd.extend(('-d', dsn['dbname']))
|
||||
if 'user' in dsn:
|
||||
cmd.extend(('--username', dsn['user']))
|
||||
for param in ('host', 'port'):
|
||||
if param in dsn:
|
||||
cmd.extend(('--' + param, dsn[param]))
|
||||
for key, param in (('slim_data', '--tablespace-slim-data'),
|
||||
('slim_index', '--tablespace-slim-index'),
|
||||
('main_data', '--tablespace-main-data'),
|
||||
('main_index', '--tablespace-main-index')):
|
||||
if options['tablespaces'][key]:
|
||||
cmd.extend((param, options['tablespaces'][key]))
|
||||
|
||||
if options.get('disable_jit', False):
|
||||
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
|
||||
|
||||
@@ -12,17 +12,17 @@ from ..db.utils import execute_file
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def update_postcodes(conn, sql_dir):
|
||||
def update_postcodes(dsn, sql_dir):
|
||||
""" Recalculate postcode centroids and add, remove and update entries in the
|
||||
location_postcode table. `conn` is an opne connection to the database.
|
||||
"""
|
||||
execute_file(conn, sql_dir / 'update-postcodes.sql')
|
||||
execute_file(dsn, sql_dir / 'update-postcodes.sql')
|
||||
|
||||
|
||||
def recompute_word_counts(conn, sql_dir):
|
||||
def recompute_word_counts(dsn, sql_dir):
|
||||
""" Compute the frequency of full-word search terms.
|
||||
"""
|
||||
execute_file(conn, sql_dir / 'words_from_search_name.sql')
|
||||
execute_file(dsn, sql_dir / 'words_from_search_name.sql')
|
||||
|
||||
|
||||
def _add_address_level_rows_from_entry(rows, entry):
|
||||
@@ -200,6 +200,53 @@ PHP_CONST_DEFS = (
|
||||
)
|
||||
|
||||
|
||||
def import_wikipedia_articles(dsn, data_path, ignore_errors=False):
|
||||
""" Replaces the wikipedia importance tables with new data.
|
||||
The import is run in a single transaction so that the new data
|
||||
is replace seemlessly.
|
||||
|
||||
Returns 0 if all was well and 1 if the importance file could not
|
||||
be found. Throws an exception if there was an error reading the file.
|
||||
"""
|
||||
datafile = data_path / 'wikimedia-importance.sql.gz'
|
||||
|
||||
if not datafile.exists():
|
||||
return 1
|
||||
|
||||
pre_code = """BEGIN;
|
||||
DROP TABLE IF EXISTS "wikipedia_article";
|
||||
DROP TABLE IF EXISTS "wikipedia_redirect"
|
||||
"""
|
||||
post_code = "COMMIT"
|
||||
execute_file(dsn, datafile, ignore_errors=ignore_errors,
|
||||
pre_code=pre_code, post_code=post_code)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def recompute_importance(conn):
|
||||
""" Recompute wikipedia links and importance for all entries in placex.
|
||||
This is a long-running operations that must not be executed in
|
||||
parallel with updates.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
|
||||
cur.execute("""
|
||||
UPDATE placex SET (wikipedia, importance) =
|
||||
(SELECT wikipedia, importance
|
||||
FROM compute_importance(extratags, country_code, osm_type, osm_id))
|
||||
""")
|
||||
cur.execute("""
|
||||
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
|
||||
FROM placex d
|
||||
WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
|
||||
and (s.wikipedia is null or s.importance < d.importance);
|
||||
""")
|
||||
|
||||
cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
|
||||
conn.commit()
|
||||
|
||||
|
||||
def setup_website(basedir, phplib_dir, config):
|
||||
""" Create the website script stubs.
|
||||
"""
|
||||
|
||||
@@ -3,3 +3,6 @@ Version information for Nominatim.
|
||||
"""
|
||||
|
||||
NOMINATIM_VERSION = "3.6.0"
|
||||
|
||||
POSTGRESQL_REQUIRED_VERSION = (9, 3)
|
||||
POSTGIS_REQUIRED_VERSION = (2, 2)
|
||||
|
||||
@@ -10,5 +10,8 @@ bdd-no-test-db:
|
||||
php:
|
||||
cd php && phpunit ./
|
||||
|
||||
python:
|
||||
pytest python
|
||||
|
||||
|
||||
.PHONY: bdd php no-test-db
|
||||
|
||||
@@ -7,6 +7,7 @@ import psycopg2.extras
|
||||
|
||||
sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
|
||||
|
||||
from nominatim import cli
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.tools import refresh
|
||||
from steps.utils import run_script
|
||||
@@ -88,18 +89,18 @@ class NominatimEnvironment:
|
||||
self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
|
||||
self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
|
||||
self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
|
||||
self.test_env['NOMINATIM_DATADIR'] = self.src_dir / 'data'
|
||||
self.test_env['NOMINATIM_SQLDIR'] = self.src_dir / 'lib-sql'
|
||||
self.test_env['NOMINATIM_CONFIGDIR'] = self.src_dir / 'settings'
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = self.build_dir / 'module'
|
||||
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
|
||||
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
|
||||
self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
|
||||
self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
|
||||
self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
|
||||
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
|
||||
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
|
||||
|
||||
if self.server_module_path:
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
|
||||
else:
|
||||
# avoid module being copied into the temporary environment
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
|
||||
|
||||
if self.website_dir is not None:
|
||||
self.website_dir.cleanup()
|
||||
@@ -182,9 +183,9 @@ class NominatimEnvironment:
|
||||
self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
|
||||
|
||||
try:
|
||||
self.run_setup_script('all', osm_file=self.api_test_file)
|
||||
self.run_nominatim('import', '--osm-file', str(self.api_test_file))
|
||||
self.run_setup_script('import-tiger-data')
|
||||
self.run_setup_script('drop')
|
||||
self.run_nominatim('freeze')
|
||||
|
||||
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
|
||||
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
|
||||
@@ -249,12 +250,25 @@ class NominatimEnvironment:
|
||||
"""
|
||||
with db.cursor() as cur:
|
||||
while True:
|
||||
self.run_update_script('index')
|
||||
self.run_nominatim('index')
|
||||
|
||||
cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
|
||||
if cur.rowcount == 0:
|
||||
return
|
||||
|
||||
def run_nominatim(self, *cmdline):
|
||||
""" Run the nominatim command-line tool via the library.
|
||||
"""
|
||||
cli.nominatim(module_dir='',
|
||||
osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
|
||||
phplib_dir=str(self.src_dir / 'lib-php'),
|
||||
sqllib_dir=str(self.src_dir / 'lib-sql'),
|
||||
data_dir=str(self.src_dir / 'data'),
|
||||
config_dir=str(self.src_dir / 'settings'),
|
||||
cli_args=cmdline,
|
||||
phpcgi_path='',
|
||||
environ=self.test_env)
|
||||
|
||||
def run_setup_script(self, *args, **kwargs):
|
||||
""" Run the Nominatim setup script with the given arguments.
|
||||
"""
|
||||
@@ -285,7 +299,7 @@ class NominatimEnvironment:
|
||||
""" Copy data from place to the placex and location_property_osmline
|
||||
tables invoking the appropriate triggers.
|
||||
"""
|
||||
self.run_setup_script('create-functions', 'create-partition-functions')
|
||||
self.run_nominatim('refresh', '--functions', '--no-diff-updates')
|
||||
|
||||
with db.cursor() as cur:
|
||||
cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,
|
||||
|
||||
@@ -5,6 +5,7 @@ import psycopg2.extras
|
||||
from place_inserter import PlaceColumn
|
||||
from table_compare import NominatimID, DBRow
|
||||
|
||||
from nominatim.indexer.indexer import Indexer
|
||||
|
||||
def check_database_integrity(context):
|
||||
""" Check some generic constraints on the tables.
|
||||
@@ -85,7 +86,12 @@ def import_and_index_data_from_place_table(context):
|
||||
""" Import data previously set up in the place table.
|
||||
"""
|
||||
context.nominatim.copy_from_place(context.db)
|
||||
context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse')
|
||||
context.nominatim.run_setup_script('calculate-postcodes')
|
||||
|
||||
# Call directly as the refresh function does not include postcodes.
|
||||
indexer = Indexer(context.nominatim.test_env['NOMINATIM_DATABASE_DSN'][6:], 1)
|
||||
indexer.index_full(analyse=False)
|
||||
|
||||
check_database_integrity(context)
|
||||
|
||||
@when("updating places")
|
||||
@@ -93,8 +99,7 @@ def update_place_table(context):
|
||||
""" Update the place table with the given data. Also runs all triggers
|
||||
related to updates and reindexes the new data.
|
||||
"""
|
||||
context.nominatim.run_setup_script(
|
||||
'create-functions', 'create-partition-functions', 'enable-diff-updates')
|
||||
context.nominatim.run_nominatim('refresh', '--functions')
|
||||
with context.db.cursor() as cur:
|
||||
for row in context.table:
|
||||
PlaceColumn(context).add_row(row, False).db_insert(cur)
|
||||
@@ -106,7 +111,7 @@ def update_place_table(context):
|
||||
def update_postcodes(context):
|
||||
""" Rerun the calculation of postcodes.
|
||||
"""
|
||||
context.nominatim.run_update_script('calculate-postcodes')
|
||||
context.nominatim.run_nominatim('refresh', '--postcodes')
|
||||
|
||||
@when("marking for delete (?P<oids>.*)")
|
||||
def delete_places(context, oids):
|
||||
@@ -114,8 +119,7 @@ def delete_places(context, oids):
|
||||
separated by commas. Also runs all triggers
|
||||
related to updates and reindexes the new data.
|
||||
"""
|
||||
context.nominatim.run_setup_script(
|
||||
'create-functions', 'create-partition-functions', 'enable-diff-updates')
|
||||
context.nominatim.run_nominatim('refresh', '--functions')
|
||||
with context.db.cursor() as cur:
|
||||
for oid in oids.split(','):
|
||||
NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
|
||||
|
||||
@@ -75,9 +75,8 @@ def update_from_osm_file(context):
|
||||
The data is expected as attached text in OPL format.
|
||||
"""
|
||||
context.nominatim.copy_from_place(context.db)
|
||||
context.nominatim.run_setup_script('index', 'index-noanalyse')
|
||||
context.nominatim.run_setup_script('create-functions', 'create-partition-functions',
|
||||
'enable-diff-updates')
|
||||
context.nominatim.run_nominatim('index')
|
||||
context.nominatim.run_nominatim('refresh', '--functions')
|
||||
|
||||
# create an OSM file and import it
|
||||
fname = write_opl_file(context.text, context.osm)
|
||||
|
||||
@@ -43,6 +43,11 @@ class _TestingCursor(psycopg2.extras.DictCursor):
|
||||
WHERE tablename = %s""", (table, ))
|
||||
return num == 1
|
||||
|
||||
def table_rows(self, table):
|
||||
""" Return the number of rows in the given table.
|
||||
"""
|
||||
return self.scalar('SELECT count(*) FROM ' + table)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(monkeypatch):
|
||||
@@ -71,6 +76,12 @@ def temp_db(monkeypatch):
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dsn(temp_db):
|
||||
return 'dbname=' + temp_db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db_with_extensions(temp_db):
|
||||
conn = psycopg2.connect(database=temp_db)
|
||||
@@ -85,9 +96,8 @@ def temp_db_with_extensions(temp_db):
|
||||
def temp_db_conn(temp_db):
|
||||
""" Connection to the test database.
|
||||
"""
|
||||
conn = connection.connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
with connection.connect('dbname=' + temp_db) as conn:
|
||||
yield conn
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -102,10 +112,25 @@ def temp_db_cursor(temp_db):
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def table_factory(temp_db_cursor):
|
||||
def mk_table(name, definition='id INT', content=None):
|
||||
temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
|
||||
if content is not None:
|
||||
if not isinstance(content, str):
|
||||
content = '),('.join([str(x) for x in content])
|
||||
temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
|
||||
|
||||
return mk_table
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def def_config():
|
||||
return Configuration(None, SRC_DIR.resolve() / 'settings')
|
||||
|
||||
@pytest.fixture
|
||||
def src_dir():
|
||||
return SRC_DIR.resolve()
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(temp_db_conn):
|
||||
@@ -158,7 +183,7 @@ def place_row(place_table, temp_db_cursor):
|
||||
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
(osm_id or next(idseq), osm_type, cls, typ, names,
|
||||
admin_level, address, extratags,
|
||||
geom or 'SRID=4326;POINT(0 0 )'))
|
||||
geom or 'SRID=4326;POINT(0 0)'))
|
||||
|
||||
return _insert
|
||||
|
||||
@@ -168,7 +193,7 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
|
||||
"""
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE placex (
|
||||
place_id BIGINT NOT NULL,
|
||||
place_id BIGINT,
|
||||
parent_place_id BIGINT,
|
||||
linked_place_id BIGINT,
|
||||
importance FLOAT,
|
||||
@@ -191,9 +216,53 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
|
||||
country_code varchar(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Geometry, 4326))
|
||||
""")
|
||||
centroid GEOMETRY(Geometry, 4326))""")
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def osmline_table(temp_db_with_extensions, temp_db_conn):
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE location_property_osmline (
|
||||
place_id BIGINT,
|
||||
osm_id BIGINT,
|
||||
parent_place_id BIGINT,
|
||||
geometry_sector INTEGER,
|
||||
indexed_date TIMESTAMP,
|
||||
startnumber INTEGER,
|
||||
endnumber INTEGER,
|
||||
partition SMALLINT,
|
||||
indexed_status SMALLINT,
|
||||
linegeo GEOMETRY,
|
||||
interpolationtype TEXT,
|
||||
address HSTORE,
|
||||
postcode TEXT,
|
||||
country_code VARCHAR(2))""")
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def word_table(temp_db, temp_db_conn):
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text,
|
||||
word text,
|
||||
class text,
|
||||
type text,
|
||||
country_code varchar(2),
|
||||
search_name_count INTEGER,
|
||||
operator TEXT)""")
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def osm2pgsql_options(temp_db):
|
||||
return dict(osm2pgsql='echo',
|
||||
osm2pgsql_cache=10,
|
||||
osm2pgsql_style='style.file',
|
||||
threads=1,
|
||||
dsn='dbname=' + temp_db,
|
||||
flatnode_file='',
|
||||
tablespaces=dict(slim_data='', slim_index='',
|
||||
main_data='', main_index=''))
|
||||
|
||||
18
test/python/mocks.py
Normal file
18
test/python/mocks.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Custom mocks for testing.
|
||||
"""
|
||||
|
||||
|
||||
class MockParamCapture:
|
||||
""" Mock that records the parameters with which a function was called
|
||||
as well as the number of calls.
|
||||
"""
|
||||
def __init__(self, retval=0):
|
||||
self.called = 0
|
||||
self.return_value = retval
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.called += 1
|
||||
self.last_args = args
|
||||
self.last_kwargs = kwargs
|
||||
return self.return_value
|
||||
@@ -5,47 +5,36 @@ These tests just check that the various command line parameters route to the
|
||||
correct functionionality. They use a lot of monkeypatching to avoid executing
|
||||
the actual functions.
|
||||
"""
|
||||
import datetime as dt
|
||||
import psycopg2
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import time
|
||||
|
||||
import nominatim.cli
|
||||
import nominatim.clicmd.api
|
||||
import nominatim.clicmd.refresh
|
||||
import nominatim.clicmd.admin
|
||||
import nominatim.clicmd.setup
|
||||
import nominatim.indexer.indexer
|
||||
import nominatim.tools.admin
|
||||
import nominatim.tools.check_database
|
||||
import nominatim.tools.database_import
|
||||
import nominatim.tools.freeze
|
||||
import nominatim.tools.refresh
|
||||
import nominatim.tools.replication
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.db import status
|
||||
|
||||
from mocks import MockParamCapture
|
||||
|
||||
SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
|
||||
|
||||
def call_nominatim(*args):
|
||||
return nominatim.cli.nominatim(module_dir='build/module',
|
||||
osm2pgsql_path='build/osm2pgsql/osm2pgsql',
|
||||
phplib_dir='lib-php',
|
||||
data_dir='.',
|
||||
phplib_dir=str(SRC_DIR / 'lib-php'),
|
||||
data_dir=str(SRC_DIR / 'data'),
|
||||
phpcgi_path='/usr/bin/php-cgi',
|
||||
sqllib_dir='lib-sql',
|
||||
config_dir='settings',
|
||||
sqllib_dir=str(SRC_DIR / 'lib-sql'),
|
||||
config_dir=str(SRC_DIR / 'settings'),
|
||||
cli_args=args)
|
||||
|
||||
class MockParamCapture:
|
||||
""" Mock that records the parameters with which a function was called
|
||||
as well as the number of calls.
|
||||
"""
|
||||
def __init__(self, retval=0):
|
||||
self.called = 0
|
||||
self.return_value = retval
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.called += 1
|
||||
self.last_args = args
|
||||
self.last_kwargs = kwargs
|
||||
return self.return_value
|
||||
|
||||
@pytest.fixture
|
||||
def mock_run_legacy(monkeypatch):
|
||||
@@ -53,6 +42,7 @@ def mock_run_legacy(monkeypatch):
|
||||
monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_func_factory(monkeypatch):
|
||||
def get_mock(module, func):
|
||||
@@ -62,6 +52,7 @@ def mock_func_factory(monkeypatch):
|
||||
|
||||
return get_mock
|
||||
|
||||
|
||||
def test_cli_help(capsys):
|
||||
""" Running nominatim tool without arguments prints help.
|
||||
"""
|
||||
@@ -72,7 +63,6 @@ def test_cli_help(capsys):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("command,script", [
|
||||
(('import', '--continue', 'load-data'), 'setup'),
|
||||
(('special-phrases',), 'specialphrases'),
|
||||
(('add-data', '--tiger-data', 'tiger'), 'setup'),
|
||||
(('add-data', '--file', 'foo.osm'), 'update'),
|
||||
@@ -85,6 +75,36 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
|
||||
assert mock_run_legacy.last_args[0] == script + '.php'
|
||||
|
||||
|
||||
def test_import_missing_file(temp_db):
|
||||
assert 1 == call_nominatim('import', '--osm-file', 'sfsafegweweggdgw.reh.erh')
|
||||
|
||||
|
||||
def test_import_bad_file(temp_db):
|
||||
assert 1 == call_nominatim('import', '--osm-file', '.')
|
||||
|
||||
|
||||
def test_import_full(temp_db, mock_func_factory):
|
||||
mocks = [
|
||||
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'install_module'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
|
||||
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'load_data'),
|
||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||
]
|
||||
|
||||
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
||||
mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script')
|
||||
|
||||
assert 0 == call_nominatim('import', '--osm-file', __file__)
|
||||
|
||||
assert cf_mock.called > 1
|
||||
|
||||
for mock in mocks:
|
||||
assert mock.called == 1
|
||||
|
||||
def test_freeze_command(mock_func_factory, temp_db):
|
||||
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
|
||||
mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')
|
||||
@@ -146,24 +166,13 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
|
||||
assert rank_mock.called == do_ranks
|
||||
|
||||
|
||||
@pytest.mark.parametrize("command,params", [
|
||||
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
|
||||
('importance', ('update.php', '--recompute-importance')),
|
||||
])
|
||||
def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
|
||||
mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
|
||||
|
||||
assert 0 == call_nominatim('refresh', '--' + command)
|
||||
|
||||
assert mock_run_legacy.called == 1
|
||||
assert len(mock_run_legacy.last_args) >= len(params)
|
||||
assert mock_run_legacy.last_args[:len(params)] == params
|
||||
|
||||
@pytest.mark.parametrize("command,func", [
|
||||
('postcodes', 'update_postcodes'),
|
||||
('word-counts', 'recompute_word_counts'),
|
||||
('address-levels', 'load_address_levels_from_file'),
|
||||
('functions', 'create_functions'),
|
||||
('wiki-data', 'import_wikipedia_articles'),
|
||||
('importance', 'recompute_importance'),
|
||||
('website', 'setup_website'),
|
||||
])
|
||||
def test_refresh_command(mock_func_factory, temp_db, command, func):
|
||||
@@ -173,86 +182,16 @@ def test_refresh_command(mock_func_factory, temp_db, command, func):
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
|
||||
mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
|
||||
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
|
||||
calls = []
|
||||
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
|
||||
lambda *args, **kwargs: calls.append('import') or 0)
|
||||
monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance',
|
||||
lambda *args, **kwargs: calls.append('update'))
|
||||
|
||||
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
|
||||
|
||||
assert mock_run_legacy.called == 2
|
||||
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params,func", [
|
||||
(('--init', '--no-update-functions'), 'init_replication'),
|
||||
(('--check-for-updates',), 'check_for_updates')
|
||||
])
|
||||
def test_replication_command(mock_func_factory, temp_db, params, func):
|
||||
func_mock = mock_func_factory(nominatim.tools.replication, func)
|
||||
|
||||
assert 0 == call_nominatim('replication', *params)
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
|
||||
|
||||
assert call_nominatim('replication') == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
|
||||
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
|
||||
|
||||
assert call_nominatim('replication') == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
|
||||
nominatim.tools.replication.UpdateState.NO_CHANGES])
|
||||
def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn,
|
||||
status_table, state):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
func_mock = mock_func_factory(nominatim.tools.replication, 'update')
|
||||
|
||||
assert 0 == call_nominatim('replication', '--once', '--no-index')
|
||||
|
||||
|
||||
def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
index_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim('replication')
|
||||
|
||||
assert index_mock.called == 4
|
||||
|
||||
|
||||
def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
index_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
|
||||
|
||||
sleep_mock = MockParamCapture()
|
||||
monkeypatch.setattr(time, 'sleep', sleep_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim('replication')
|
||||
|
||||
assert index_mock.called == 2
|
||||
assert sleep_mock.called == 1
|
||||
assert sleep_mock.last_args[0] == 60
|
||||
assert calls == ['import', 'update']
|
||||
|
||||
|
||||
def test_serve_command(mock_func_factory):
|
||||
|
||||
127
test/python/test_cli_replication.py
Normal file
127
test/python/test_cli_replication.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""
|
||||
Tests for replication command of command-line interface wrapper.
|
||||
"""
|
||||
import datetime as dt
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
import nominatim.cli
|
||||
import nominatim.indexer.indexer
|
||||
import nominatim.tools.replication
|
||||
from nominatim.db import status
|
||||
|
||||
from mocks import MockParamCapture
|
||||
|
||||
SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
|
||||
|
||||
def call_nominatim(*args):
|
||||
return nominatim.cli.nominatim(module_dir='build/module',
|
||||
osm2pgsql_path='build/osm2pgsql/osm2pgsql',
|
||||
phplib_dir=str(SRC_DIR / 'lib-php'),
|
||||
data_dir=str(SRC_DIR / 'data'),
|
||||
phpcgi_path='/usr/bin/php-cgi',
|
||||
sqllib_dir=str(SRC_DIR / 'lib-sql'),
|
||||
config_dir=str(SRC_DIR / 'settings'),
|
||||
cli_args=['replication'] + list(args))
|
||||
|
||||
@pytest.fixture
|
||||
def index_mock(monkeypatch):
|
||||
mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
|
||||
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_func_factory(monkeypatch):
|
||||
def get_mock(module, func):
|
||||
mock = MockParamCapture()
|
||||
monkeypatch.setattr(module, func, mock)
|
||||
return mock
|
||||
|
||||
return get_mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def init_status(temp_db_conn, status_table):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
return 1
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def update_mock(mock_func_factory, init_status):
|
||||
return mock_func_factory(nominatim.tools.replication, 'update')
|
||||
|
||||
@pytest.mark.parametrize("params,func", [
|
||||
(('--init', '--no-update-functions'), 'init_replication'),
|
||||
(('--check-for-updates',), 'check_for_updates')
|
||||
])
|
||||
def test_replication_command(mock_func_factory, temp_db, params, func):
|
||||
func_mock = mock_func_factory(nominatim.tools.replication, func)
|
||||
|
||||
assert 0 == call_nominatim(*params)
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
|
||||
|
||||
assert call_nominatim() == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
|
||||
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
|
||||
|
||||
assert call_nominatim() == 1
|
||||
|
||||
|
||||
def test_replication_update_once_no_index(update_mock):
|
||||
assert 0 == call_nominatim('--once', '--no-index')
|
||||
|
||||
assert str(update_mock.last_args[1]['osm2pgsql']) == 'build/osm2pgsql/osm2pgsql'
|
||||
|
||||
|
||||
def test_replication_update_custom_osm2pgsql(monkeypatch, update_mock):
|
||||
monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', '/secret/osm2pgsql')
|
||||
assert 0 == call_nominatim('--once', '--no-index')
|
||||
|
||||
assert str(update_mock.last_args[1]['osm2pgsql']) == '/secret/osm2pgsql'
|
||||
|
||||
|
||||
def test_replication_update_custom_threads(update_mock):
|
||||
assert 0 == call_nominatim('--once', '--no-index', '--threads', '4')
|
||||
|
||||
assert update_mock.last_args[1]['threads'] == 4
|
||||
|
||||
|
||||
def test_replication_update_continuous(monkeypatch, init_status, index_mock):
|
||||
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim()
|
||||
|
||||
assert index_mock.called == 4
|
||||
|
||||
|
||||
def test_replication_update_continuous_no_change(monkeypatch, init_status, index_mock):
|
||||
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
sleep_mock = MockParamCapture()
|
||||
monkeypatch.setattr(time, 'sleep', sleep_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim()
|
||||
|
||||
assert index_mock.called == 2
|
||||
assert sleep_mock.called == 1
|
||||
assert sleep_mock.last_args[0] == 60
|
||||
@@ -2,20 +2,20 @@
|
||||
Tests for specialised conenction and cursor classes.
|
||||
"""
|
||||
import pytest
|
||||
import psycopg2
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.db.connection import connect, get_pg_env
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
with connect('dbname=' + temp_db) as conn:
|
||||
yield conn
|
||||
|
||||
|
||||
def test_connection_table_exists(db, temp_db_cursor):
|
||||
def test_connection_table_exists(db, table_factory):
|
||||
assert db.table_exists('foobar') == False
|
||||
|
||||
temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
|
||||
table_factory('foobar')
|
||||
|
||||
assert db.table_exists('foobar') == True
|
||||
|
||||
@@ -31,6 +31,22 @@ def test_connection_index_exists(db, temp_db_cursor):
|
||||
assert db.index_exists('some_index', table='bar') == False
|
||||
|
||||
|
||||
def test_drop_table_existing(db, table_factory):
|
||||
table_factory('dummy')
|
||||
assert db.table_exists('dummy')
|
||||
|
||||
db.drop_table('dummy')
|
||||
assert not db.table_exists('dummy')
|
||||
|
||||
|
||||
def test_drop_table_non_existsing(db):
|
||||
db.drop_table('dfkjgjriogjigjgjrdghehtre')
|
||||
|
||||
|
||||
def test_drop_table_non_existing_force(db):
|
||||
with pytest.raises(psycopg2.ProgrammingError, match='.*does not exist.*'):
|
||||
db.drop_table('dfkjgjriogjigjgjrdghehtre', if_exists=False)
|
||||
|
||||
def test_connection_server_version_tuple(db):
|
||||
ver = db.server_version_tuple()
|
||||
|
||||
@@ -38,8 +54,19 @@ def test_connection_server_version_tuple(db):
|
||||
assert len(ver) == 2
|
||||
assert ver[0] > 8
|
||||
|
||||
def test_cursor_scalar(db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
|
||||
|
||||
def test_connection_postgis_version_tuple(db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE EXTENSION postgis')
|
||||
|
||||
ver = db.postgis_version_tuple()
|
||||
|
||||
assert isinstance(ver, tuple)
|
||||
assert len(ver) == 2
|
||||
assert ver[0] >= 2
|
||||
|
||||
|
||||
def test_cursor_scalar(db, table_factory):
|
||||
table_factory('dummy')
|
||||
|
||||
with db.cursor() as cur:
|
||||
assert cur.scalar('SELECT count(*) FROM dummy') == 0
|
||||
@@ -49,3 +76,24 @@ def test_cursor_scalar_many_rows(db):
|
||||
with db.cursor() as cur:
|
||||
with pytest.raises(RuntimeError):
|
||||
cur.scalar('SELECT * FROM pg_tables')
|
||||
|
||||
|
||||
def test_get_pg_env_add_variable(monkeypatch):
|
||||
monkeypatch.delenv('PGPASSWORD', raising=False)
|
||||
env = get_pg_env('user=fooF')
|
||||
|
||||
assert env['PGUSER'] == 'fooF'
|
||||
assert 'PGPASSWORD' not in env
|
||||
|
||||
|
||||
def test_get_pg_env_overwrite_variable(monkeypatch):
|
||||
monkeypatch.setenv('PGUSER', 'some default')
|
||||
env = get_pg_env('user=overwriter')
|
||||
|
||||
assert env['PGUSER'] == 'overwriter'
|
||||
|
||||
|
||||
def test_get_pg_env_ignore_unknown():
|
||||
env = get_pg_env('tty=stuff', base_env={})
|
||||
|
||||
assert env == {}
|
||||
|
||||
@@ -5,26 +5,58 @@ import psycopg2
|
||||
import pytest
|
||||
|
||||
import nominatim.db.utils as db_utils
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
def test_execute_file_success(temp_db_conn, tmp_path):
|
||||
def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
|
||||
|
||||
db_utils.execute_file(temp_db_conn, tmpfile)
|
||||
db_utils.execute_file(dsn, tmpfile)
|
||||
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute('SELECT * FROM test')
|
||||
temp_db_cursor.execute('SELECT * FROM test')
|
||||
|
||||
assert cur.rowcount == 1
|
||||
assert cur.fetchone()[0] == 56
|
||||
assert temp_db_cursor.rowcount == 1
|
||||
assert temp_db_cursor.fetchone()[0] == 56
|
||||
|
||||
def test_execute_file_bad_file(temp_db_conn, tmp_path):
|
||||
def test_execute_file_bad_file(dsn, tmp_path):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql')
|
||||
db_utils.execute_file(dsn, tmp_path / 'test2.sql')
|
||||
|
||||
def test_execute_file_bad_sql(temp_db_conn, tmp_path):
|
||||
|
||||
def test_execute_file_bad_sql(dsn, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)')
|
||||
|
||||
with pytest.raises(psycopg2.ProgrammingError):
|
||||
db_utils.execute_file(temp_db_conn, tmpfile)
|
||||
with pytest.raises(UsageError):
|
||||
db_utils.execute_file(dsn, tmpfile)
|
||||
|
||||
|
||||
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
|
||||
|
||||
|
||||
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('INSERT INTO test VALUES(4)')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
|
||||
|
||||
temp_db_cursor.execute('SELECT * FROM test')
|
||||
|
||||
assert temp_db_cursor.rowcount == 1
|
||||
assert temp_db_cursor.fetchone()[0] == 4
|
||||
|
||||
|
||||
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE TABLE test (id INT)')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')
|
||||
|
||||
temp_db_cursor.execute('SELECT * FROM test')
|
||||
|
||||
assert temp_db_cursor.rowcount == 1
|
||||
assert temp_db_cursor.fetchone()[0] == 23
|
||||
|
||||
@@ -12,6 +12,7 @@ class IndexerTestDB:
|
||||
def __init__(self, conn):
|
||||
self.placex_id = itertools.count(100000)
|
||||
self.osmline_id = itertools.count(500000)
|
||||
self.postcode_id = itertools.count(700000)
|
||||
|
||||
self.conn = conn
|
||||
self.conn.set_isolation_level(0)
|
||||
@@ -31,6 +32,12 @@ class IndexerTestDB:
|
||||
indexed_status SMALLINT,
|
||||
indexed_date TIMESTAMP,
|
||||
geometry_sector INTEGER)""")
|
||||
cur.execute("""CREATE TABLE location_postcode (
|
||||
place_id BIGINT,
|
||||
indexed_status SMALLINT,
|
||||
indexed_date TIMESTAMP,
|
||||
country_code varchar(2),
|
||||
postcode TEXT)""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
|
||||
AS $$
|
||||
BEGIN
|
||||
@@ -39,10 +46,10 @@ class IndexerTestDB:
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END; $$ LANGUAGE plpgsql;""")
|
||||
cur.execute("""CREATE TRIGGER placex_update BEFORE UPDATE ON placex
|
||||
FOR EACH ROW EXECUTE PROCEDURE date_update()""")
|
||||
cur.execute("""CREATE TRIGGER osmline_update BEFORE UPDATE ON location_property_osmline
|
||||
FOR EACH ROW EXECUTE PROCEDURE date_update()""")
|
||||
for table in ('placex', 'location_property_osmline', 'location_postcode'):
|
||||
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
|
||||
FOR EACH ROW EXECUTE PROCEDURE date_update()
|
||||
""".format(table))
|
||||
|
||||
def scalar(self, query):
|
||||
with self.conn.cursor() as cur:
|
||||
@@ -74,6 +81,15 @@ class IndexerTestDB:
|
||||
(next_id, sector))
|
||||
return next_id
|
||||
|
||||
def add_postcode(self, country, postcode):
|
||||
next_id = next(self.postcode_id)
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO location_postcode
|
||||
(place_id, indexed_status, country_code, postcode)
|
||||
VALUES (%s, 1, %s, %s)""",
|
||||
(next_id, country, postcode))
|
||||
return next_id
|
||||
|
||||
def placex_unindexed(self):
|
||||
return self.scalar('SELECT count(*) from placex where indexed_status > 0')
|
||||
|
||||
@@ -87,7 +103,7 @@ def test_db(temp_db_conn):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_full(test_db, threads):
|
||||
def test_index_all_by_rank(test_db, threads):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
@@ -184,3 +200,35 @@ def test_index_boundaries(test_db, threads):
|
||||
assert 0 == test_db.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND class != 'boundary'""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_postcodes(test_db, threads):
|
||||
for postcode in range(1000):
|
||||
test_db.add_postcode('de', postcode)
|
||||
for postcode in range(32000, 33000):
|
||||
test_db.add_postcode('us', postcode)
|
||||
|
||||
idx = Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx.index_postcodes()
|
||||
|
||||
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
|
||||
WHERE indexed_status != 0""")
|
||||
|
||||
|
||||
def test_index_full(test_db):
|
||||
for rank in range(4, 10):
|
||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
for postcode in range(1000):
|
||||
test_db.add_postcode('de', postcode)
|
||||
|
||||
idx = Indexer('dbname=test_nominatim_python_unittest', 4)
|
||||
idx.index_full()
|
||||
|
||||
assert 0 == test_db.placex_unindexed()
|
||||
assert 0 == test_db.osmline_unindexed()
|
||||
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
|
||||
WHERE indexed_status != 0""")
|
||||
|
||||
@@ -9,9 +9,8 @@ from nominatim.tools import admin
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db, placex_table):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
with connect('dbname=' + temp_db) as conn:
|
||||
yield conn
|
||||
|
||||
def test_analyse_indexing_no_objects(db):
|
||||
with pytest.raises(UsageError):
|
||||
|
||||
@@ -10,6 +10,10 @@ def test_check_database_unknown_db(def_config, monkeypatch):
|
||||
assert 1 == chkdb.check_database(def_config)
|
||||
|
||||
|
||||
def test_check_database_fatal_test(def_config, temp_db):
|
||||
assert 1 == chkdb.check_database(def_config)
|
||||
|
||||
|
||||
def test_check_conection_good(temp_db_conn, def_config):
|
||||
assert chkdb.check_connection(temp_db_conn, def_config) == chkdb.CheckState.OK
|
||||
|
||||
@@ -59,6 +63,10 @@ def test_check_database_indexes_bad(temp_db_conn, def_config):
|
||||
assert chkdb.check_database_indexes(temp_db_conn, def_config) == chkdb.CheckState.FAIL
|
||||
|
||||
|
||||
def test_check_database_indexes_valid(temp_db_conn, def_config):
|
||||
assert chkdb.check_database_index_valid(temp_db_conn, def_config) == chkdb.CheckState.OK
|
||||
|
||||
|
||||
def test_check_tiger_table_disabled(temp_db_conn, def_config, monkeypatch):
|
||||
monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA' , 'no')
|
||||
assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.NOT_APPLICABLE
|
||||
|
||||
202
test/python/test_tools_database_import.py
Normal file
202
test/python/test_tools_database_import.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
Tests for functions to import a new database.
|
||||
"""
|
||||
import pytest
|
||||
import psycopg2
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim.tools import database_import
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
@pytest.fixture
|
||||
def nonexistant_db():
|
||||
dbname = 'test_nominatim_python_unittest'
|
||||
|
||||
conn = psycopg2.connect(database='postgres')
|
||||
|
||||
conn.set_isolation_level(0)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
|
||||
|
||||
yield dbname
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
|
||||
|
||||
@pytest.mark.parametrize("no_partitions", (True, False))
|
||||
def test_setup_skeleton(src_dir, nonexistant_db, no_partitions):
|
||||
database_import.setup_database_skeleton('dbname=' + nonexistant_db,
|
||||
src_dir / 'data', no_partitions)
|
||||
|
||||
conn = psycopg2.connect(database=nonexistant_db)
|
||||
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT distinct partition FROM country_name")
|
||||
partitions = set([r[0] for r in list(cur)])
|
||||
if no_partitions:
|
||||
assert partitions == set([0])
|
||||
else:
|
||||
assert len(partitions) > 10
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_create_db_success(nonexistant_db):
|
||||
database_import.create_db('dbname=' + nonexistant_db, rouser='www-data')
|
||||
|
||||
conn = psycopg2.connect(database=nonexistant_db)
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_create_db_already_exists(temp_db):
|
||||
with pytest.raises(UsageError):
|
||||
database_import.create_db('dbname=' + temp_db)
|
||||
|
||||
|
||||
def test_create_db_unsupported_version(nonexistant_db, monkeypatch):
|
||||
monkeypatch.setattr(database_import, 'POSTGRESQL_REQUIRED_VERSION', (100, 4))
|
||||
|
||||
with pytest.raises(UsageError, match='PostgreSQL server is too old.'):
|
||||
database_import.create_db('dbname=' + nonexistant_db)
|
||||
|
||||
|
||||
def test_create_db_missing_ro_user(nonexistant_db):
|
||||
with pytest.raises(UsageError, match='Missing read-only user.'):
|
||||
database_import.create_db('dbname=' + nonexistant_db, rouser='sdfwkjkjgdugu2;jgsafkljas;')
|
||||
|
||||
|
||||
def test_setup_extensions(temp_db_conn, temp_db_cursor):
|
||||
database_import.setup_extensions(temp_db_conn)
|
||||
|
||||
temp_db_cursor.execute('CREATE TABLE t (h HSTORE, geom GEOMETRY(Geometry, 4326))')
|
||||
|
||||
|
||||
def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch):
|
||||
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
|
||||
|
||||
with pytest.raises(UsageError, match='PostGIS version is too old.'):
|
||||
database_import.setup_extensions(temp_db_conn)
|
||||
|
||||
|
||||
def test_install_module(tmp_path):
|
||||
src_dir = tmp_path / 'source'
|
||||
src_dir.mkdir()
|
||||
(src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
project_dir = tmp_path / 'project'
|
||||
project_dir.mkdir()
|
||||
|
||||
database_import.install_module(src_dir, project_dir, '')
|
||||
|
||||
outfile = project_dir / 'module' / 'nominatim.so'
|
||||
|
||||
assert outfile.exists()
|
||||
assert outfile.read_text() == 'TEST nomiantim.so'
|
||||
assert outfile.stat().st_mode == 33261
|
||||
|
||||
|
||||
def test_install_module_custom(tmp_path):
|
||||
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
|
||||
|
||||
assert not (tmp_path / 'module').exists()
|
||||
|
||||
|
||||
def test_install_module_fail_access(temp_db_conn, tmp_path):
|
||||
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
|
||||
database_import.install_module(tmp_path, tmp_path, '',
|
||||
conn=temp_db_conn)
|
||||
|
||||
|
||||
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE EXTENSION hstore')
|
||||
temp_db_cursor.execute('CREATE EXTENSION postgis')
|
||||
database_import.import_base_data('dbname=' + temp_db, src_dir / 'data')
|
||||
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0
|
||||
|
||||
|
||||
def test_import_base_data_ignore_partitions(src_dir, temp_db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE EXTENSION hstore')
|
||||
temp_db_cursor.execute('CREATE EXTENSION postgis')
|
||||
database_import.import_base_data('dbname=' + temp_db, src_dir / 'data',
|
||||
ignore_partitions=True)
|
||||
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name WHERE partition != 0') == 0
|
||||
|
||||
|
||||
def test_import_osm_data_simple(temp_db_cursor,osm2pgsql_options):
|
||||
temp_db_cursor.execute('CREATE TABLE place (id INT)')
|
||||
temp_db_cursor.execute('INSERT INTO place values (1)')
|
||||
|
||||
database_import.import_osm_data('file.pdf', osm2pgsql_options)
|
||||
|
||||
|
||||
def test_import_osm_data_simple_no_data(temp_db_cursor,osm2pgsql_options):
|
||||
temp_db_cursor.execute('CREATE TABLE place (id INT)')
|
||||
|
||||
with pytest.raises(UsageError, match='No data.*'):
|
||||
database_import.import_osm_data('file.pdf', osm2pgsql_options)
|
||||
|
||||
|
||||
def test_import_osm_data_drop(temp_db_conn, temp_db_cursor, tmp_path, osm2pgsql_options):
|
||||
temp_db_cursor.execute('CREATE TABLE place (id INT)')
|
||||
temp_db_cursor.execute('CREATE TABLE planet_osm_nodes (id INT)')
|
||||
temp_db_cursor.execute('INSERT INTO place values (1)')
|
||||
|
||||
flatfile = tmp_path / 'flatfile'
|
||||
flatfile.write_text('touch')
|
||||
|
||||
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
|
||||
|
||||
database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True)
|
||||
|
||||
assert not flatfile.exists()
|
||||
assert not temp_db_conn.table_exists('planet_osm_nodes')
|
||||
|
||||
|
||||
def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
|
||||
temp_db_cursor.execute('CREATE TABLE place (id INT)')
|
||||
temp_db_cursor.execute('INSERT INTO place values (1)')
|
||||
|
||||
osm2pgsql_options['osm2pgsql_cache'] = 0
|
||||
|
||||
database_import.import_osm_data(Path(__file__), osm2pgsql_options)
|
||||
|
||||
|
||||
def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
|
||||
tables = ('word', 'placex', 'place_addressline', 'location_area',
|
||||
'location_area_country', 'location_property',
|
||||
'location_property_tiger', 'location_property_osmline',
|
||||
'location_postcode', 'search_name', 'location_road_23')
|
||||
for table in tables:
|
||||
table_factory(table, content=(1, 2, 3))
|
||||
|
||||
database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23)
|
||||
|
||||
for table in tables:
|
||||
assert temp_db_cursor.table_rows(table) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
|
||||
temp_db_cursor, threads):
|
||||
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
|
||||
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
|
||||
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
|
||||
""".format(func))
|
||||
for oid in range(100, 130):
|
||||
place_row(osm_id=oid)
|
||||
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
|
||||
geom='SRID=4326;LINESTRING(0 0, 10 10)')
|
||||
|
||||
database_import.load_data(dsn, src_dir / 'data', threads)
|
||||
|
||||
assert temp_db_cursor.table_rows('placex') == 30
|
||||
assert temp_db_cursor.table_rows('location_property_osmline') == 1
|
||||
@@ -105,8 +105,15 @@ def test_run_api_with_extra_env(tmp_project_dir):
|
||||
|
||||
### run_osm2pgsql
|
||||
|
||||
def test_run_osm2pgsql():
|
||||
exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
|
||||
dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
|
||||
osm2pgsql_style='./my.style',
|
||||
import_file='foo.bar'))
|
||||
def test_run_osm2pgsql(osm2pgsql_options):
|
||||
osm2pgsql_options['append'] = False
|
||||
osm2pgsql_options['import_file'] = 'foo.bar'
|
||||
osm2pgsql_options['tablespaces']['osm_data'] = 'extra'
|
||||
exec_utils.run_osm2pgsql(osm2pgsql_options)
|
||||
|
||||
|
||||
def test_run_osm2pgsql_disable_jit(osm2pgsql_options):
|
||||
osm2pgsql_options['append'] = True
|
||||
osm2pgsql_options['import_file'] = 'foo.bar'
|
||||
osm2pgsql_options['disable_jit'] = True
|
||||
exec_utils.run_osm2pgsql(osm2pgsql_options)
|
||||
|
||||
26
test/python/test_tools_refresh.py
Normal file
26
test/python/test_tools_refresh.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Test for various refresh functions.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from nominatim.tools import refresh
|
||||
|
||||
TEST_DIR = (Path(__file__) / '..' / '..').resolve()
|
||||
|
||||
def test_refresh_import_wikipedia_not_existing(dsn):
|
||||
assert 1 == refresh.import_wikipedia_articles(dsn, Path('.'))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("replace", (True, False))
|
||||
def test_refresh_import_wikipedia(dsn, table_factory, temp_db_cursor, replace):
|
||||
if replace:
|
||||
table_factory('wikipedia_article')
|
||||
table_factory('wikipedia_redirect')
|
||||
|
||||
# use the small wikipedia file for the API testdb
|
||||
assert 0 == refresh.import_wikipedia_articles(dsn, TEST_DIR / 'testdb')
|
||||
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM wikipedia_article') > 0
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM wikipedia_redirect') > 0
|
||||
@@ -2,9 +2,10 @@
|
||||
Tests for function for importing address ranks.
|
||||
"""
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
|
||||
|
||||
def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
|
||||
|
||||
@@ -11,9 +11,8 @@ SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve()
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
with connect('dbname=' + temp_db) as conn:
|
||||
yield conn
|
||||
|
||||
@pytest.fixture
|
||||
def db_with_tables(db):
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
python3-pip python3-setuptools python3-devel \
|
||||
expat-devel zlib-devel
|
||||
|
||||
pip3 install --user psycopg2 python-dotenv
|
||||
pip3 install --user psycopg2 python-dotenv psutil
|
||||
|
||||
|
||||
#
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
python3-pip python3-setuptools python3-devel \
|
||||
expat-devel zlib-devel
|
||||
|
||||
pip3 install --user psycopg2 python-dotenv
|
||||
pip3 install --user psycopg2 python-dotenv psutil
|
||||
|
||||
|
||||
#
|
||||
|
||||
@@ -30,7 +30,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
|
||||
postgresql-contrib-10 postgresql-10-postgis-scripts \
|
||||
php php-pgsql php-intl python3-pip \
|
||||
python3-psycopg2 git
|
||||
python3-psycopg2 python3-psutil git
|
||||
|
||||
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
|
||||
# install the latest version from pip:
|
||||
|
||||
@@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
postgresql-server-dev-12 postgresql-12-postgis-3 \
|
||||
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
|
||||
php php-pgsql php-intl python3-dotenv \
|
||||
python3-psycopg2 git
|
||||
python3-psycopg2 python3-psutil git
|
||||
|
||||
#
|
||||
# System Configuration
|
||||
|
||||
Reference in New Issue
Block a user