From bd4b1b2d08e3428d43245ee8e3b4bb5aa9c2e5f6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 25 May 2017 16:26:09 +0200 Subject: [PATCH 1/2] switch from osmosis to pyosmium for updates Pyosmium comes with convenient functions for finding the right state and does not require external files for rembering the state. Updates can now conveniently set up by simply running ./utils/update.php --init-updates and state is kept directly in the import_status table. This change requires an update in the database schema. Run the following to update: ALTER TABLE import_status ADD COLUMN sequence_id integer; ALTER TABLE import_status ADD COLUMN indexed boolean; ALTER TABLE import_osmosis_log ADD COLUMN batchseq integer; --- lib/lib.php | 12 +++ settings/defaults.php | 4 +- sql/tables.sql | 5 +- utils/setup.php | 84 ++-------------- utils/update.php | 229 ++++++++++++++++++++---------------------- 5 files changed, 133 insertions(+), 201 deletions(-) diff --git a/lib/lib.php b/lib/lib.php index a5351918..d47aca57 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -33,6 +33,18 @@ function getCacheMemoryMB() return (int)($aMatches[1]/1024); } +function getDatabaseDate(&$oDB) +{ + // Find the newest node in the DB + $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'"); + // Lookup the timestamp that node was created + $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1"; + $sLastNodeXML = file_get_contents($sLastNodeURL); + preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate); + + return $aLastNodeDate[1]; +} + function bySearchRank($a, $b) { diff --git a/settings/defaults.php b/settings/defaults.php index a35c3cc8..41acd8e6 100644 --- a/settings/defaults.php +++ b/settings/defaults.php @@ -41,7 +41,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true); // Paths @define('CONST_ExtraDataPath', CONST_BasePath.'/data'); @define('CONST_Osm2pgsql_Binary', CONST_InstallPath.'/osm2pgsql/osm2pgsql'); -@define('CONST_Osmosis_Binary', '@OSMOSIS_PATH@'); +@define('CONST_Pyosmium_Binary', 'pyosmium-get-changes'); @define('CONST_Tiger_Data_Path', CONST_ExtraDataPath.'/tiger'); @define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath); @@ -67,7 +67,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true); // Replication settings @define('CONST_Replication_Url', 'http://planet.openstreetmap.org/replication/minute'); -@define('CONST_Replication_MaxInterval', '3600'); +@define('CONST_Replication_Max_Diff_size', '30'); // MB of update data to download per batch @define('CONST_Replication_Update_Interval', '60'); // How often upstream publishes diffs @define('CONST_Replication_Recheck_Interval', '60'); // How long to sleep if no update found yet diff --git a/sql/tables.sql b/sql/tables.sql index 6263025c..f3217d5a 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -1,12 +1,15 @@ drop table if exists import_status; CREATE TABLE import_status ( - lastimportdate timestamp NOT NULL + lastimportdate timestamp NOT NULL, + sequence_id integer, + indexed boolean ); GRANT SELECT ON import_status TO "{www-user}" ; drop table if exists import_osmosis_log; CREATE TABLE import_osmosis_log ( batchend timestamp, + batchseq integer, batchsize integer, starttime timestamp, endtime timestamp, diff --git a/utils/setup.php b/utils/setup.php index b0a3bf60..fe9926b6 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -404,6 +404,12 @@ if ($aCMDResult['load-data'] || $aCMDResult['all']) { echo "\n"; echo "Reanalysing database...\n"; pgsqlRunScript('ANALYSE'); + + $sDatabaseDate = getDatabaseDate($oDB); + pg_query($oDB->connection, 'TRUNCATE import_status'); + $sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')"; + pg_query($oDB->connection, $sSQL); + echo "Latest data imported from $sDatabaseDate.\n"; } if ($aCMDResult['import-tiger-data']) { @@ -500,83 +506,9 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { } } -if ($aCMDResult['osmosis-init'] || ($aCMDResult['all'] && !$aCMDResult['drop'])) { // no use doing osmosis-init when dropping update tables +if ($aCMDResult['osmosis-init']) { $bDidSomething = true; - $oDB =& getDB(); - - if (!file_exists(CONST_Osmosis_Binary)) { - echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n"; - if (!$aCMDResult['all']) { - fail("osmosis not found in '".CONST_Osmosis_Binary."'"); - } - } else { - if (file_exists(CONST_InstallPath.'/settings/configuration.txt')) { - echo "settings/configuration.txt already exists\n"; - } else { - passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_InstallPath.'/settings'); - // update osmosis configuration.txt with our settings - passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_InstallPath.'/settings/configuration.txt'); - passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_InstallPath.'/settings/configuration.txt'); - } - - // Find the last node in the DB - $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'"); - - // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed) - $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1"; - $sLastNodeXML = file_get_contents($sLastNodeURL); - preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate); - $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60); - - // Search for the correct state file - uses file timestamps so need to sort by date descending - $sRepURL = CONST_Replication_Url."/"; - $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1"); - // download.geofabrik.de: 000/26-Feb-2013 11:53 - // planet.openstreetmap.org: 273/ 2013-03-11 07:41 - - preg_match_all('#([0-9]{3}/)\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER); - if ($aRepMatches) { - $aPrevRepMatch = false; - foreach ($aRepMatches as $aRepMatch) { - if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break; - $aPrevRepMatch = $aRepMatch; - } - if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch; - - $sRepURL .= $aRepMatch[1]; - $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1"); - preg_match_all('#([0-9]{3}/)\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER); - $aPrevRepMatch = false; - foreach ($aRepMatches as $aRepMatch) { - if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break; - $aPrevRepMatch = $aRepMatch; - } - if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch; - - $sRepURL .= $aRepMatch[1]; - $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1"); - preg_match_all('#([0-9]{3}).state.txt\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER); - $aPrevRepMatch = false; - foreach ($aRepMatches as $aRepMatch) { - if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break; - $aPrevRepMatch = $aRepMatch; - } - if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch; - - $sRepURL .= $aRepMatch[1].'.state.txt'; - echo "Getting state file: $sRepURL\n"; - $sStateFile = file_get_contents($sRepURL); - if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file"); - file_put_contents(CONST_InstallPath.'/settings/state.txt', $sStateFile); - echo "Updating DB status\n"; - pg_query($oDB->connection, 'TRUNCATE import_status'); - $sSQL = "INSERT INTO import_status VALUES('".$aRepMatch[2]."')"; - pg_query($oDB->connection, $sSQL); - } else { - if (!$aCMDResult['all']) { - fail("Cannot read state file directory."); - } - } - } + echo "Command 'osmosis-init' no longer available, please use utils/update.php --init-updates.\n"; } if ($aCMDResult['index'] || $aCMDResult['all']) { diff --git a/utils/update.php b/utils/update.php index d8ac134a..b106c567 100755 --- a/utils/update.php +++ b/utils/update.php @@ -12,8 +12,9 @@ $aCMDOptions array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), - array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'), - array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'), + array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'), + array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'), + array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'), array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'), array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'), @@ -57,10 +58,39 @@ if (!is_null(CONST_Osm2pgsql_Flatnode_File)) { $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } +if ($aResult['init-updates']) { + $sSetup = CONST_InstallPath.'/utils/setup.php'; + $iRet = -1; + passthru($sSetup.' --create-functions --enable-diff-updates', $iRet); + if ($iRet != 0) { + fail('Error running setup script'); + } -if (isset($aResult['import-diff'])) { - // import diff directly (e.g. from osmosis --rri) - $sNextFile = $aResult['import-diff']; + $sDatabaseDate = getDatabaseDate($oDB); + $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', + strtotime($sDatabaseDate) - (3*60*60)); + + // get the appropriate state id + $aOutput = 0; + exec(CONST_Pyosmium_Get_Changes.' -D '.$sWindBack.' --server '.CONST_Replication_Url, + $aOutput, $iRet); + if ($iRet != 0) { + fail('Error running pyosmium tools'); + } + + pg_query($oDB->connection, 'TRUNCATE import_status'); + $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('"; + $sSQL .= $sDatabaseDate."',".$aOutput[0].", true)"; + if (!pg_query($oDB->connection, $sSQL)) { + fail("Could not enter sequence into database."); + } + + echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n"; +} + +if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { + // import diffs and files directly (e.g. from osmosis --rri) + $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; if (!file_exists($sNextFile)) { fail("Cannot open $sNextFile\n"); } @@ -79,16 +109,6 @@ if (isset($aResult['import-diff'])) { $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc'; $bHaveDiff = false; -if (isset($aResult['import-file']) && $aResult['import-file']) { - $bHaveDiff = true; - $sCMD = CONST_Osmosis_Binary.' --read-xml \''.$aResult['import-file'].'\' --read-empty --derive-change --write-xml-change '.$sTemporaryFile; - echo $sCMD."\n"; - exec($sCMD, $sJunk, $iErrorLevel); - if ($iErrorLevel) { - fail("Error converting osm to osc, osmosis returned: $iErrorLevel\n"); - } -} - $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api']; $sContentURL = ''; if (isset($aResult['import-node']) && $aResult['import-node']) { @@ -116,33 +136,8 @@ if (isset($aResult['import-relation']) && $aResult['import-relation']) { } if ($sContentURL) { - $sModifyXMLstr = file_get_contents($sContentURL); + file_put_contents($sTemporaryFile, file_get_contents($sContentURL)); $bHaveDiff = true; - - $aSpec = array( - 0 => array("pipe", "r"), // stdin - 1 => array("pipe", "w"), // stdout - 2 => array("pipe", "w") // stderr - ); - $sCMD = CONST_Osmosis_Binary.' --read-xml - --read-empty --derive-change --write-xml-change '.$sTemporaryFile; - echo $sCMD."\n"; - $hProc = proc_open($sCMD, $aSpec, $aPipes); - if (!is_resource($hProc)) { - fail("Error converting osm to osc, osmosis failed\n"); - } - fwrite($aPipes[0], $sModifyXMLstr); - fclose($aPipes[0]); - $sOut = stream_get_contents($aPipes[1]); - if ($aResult['verbose']) echo $sOut; - fclose($aPipes[1]); - $sErrors = stream_get_contents($aPipes[2]); - if ($aResult['verbose']) echo $sErrors; - fclose($aPipes[2]); - if ($iError = proc_close($hProc)) { - echo $sOut; - echo $sErrors; - fail("Error converting osm to osc, osmosis returned: $iError\n"); - } } if ($bHaveDiff) { @@ -166,7 +161,7 @@ if ($aResult['deduplicate']) { $aPartitions = chksql($oDB->getCol($sSQL)); $aPartitions[] = 0; - // we don't care about empty search_name_* artitions, they can't contain mentions of duplicates + // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates foreach ($aPartitions as $i => $sPartition) { $sSQL = "select count(*) from search_name_".$sPartition; $nEntries = chksql($oDB->getOne($sSQL)); @@ -236,10 +231,8 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { fail("Error: Update interval too low for download.geofabrik.de. Please check install documentation (http://wiki.openstreetmap.org/wiki/Nominatim/Installation#Updates)\n"); } - $sImportFile = CONST_BasePath.'/data/osmosischange.osc'; - $sOsmosisConfigDirectory = CONST_InstallPath.'/settings'; - $sCMDDownload = CONST_Osmosis_Binary.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile; - $sCMDCheckReplicationLag = CONST_Osmosis_Binary.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory; + $sImportFile = CONST_InstallPath.'/osmosischange.osc'; + $sCMDDownload = CONST_Pyosmium_Get_Changes.' --server '.CONST_Replication_Url.' -o '.$sImportFile.' -s '.CONST_Replication_Max_Diff_size; $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile; $sCMDIndex = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances']; @@ -247,103 +240,95 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { $fStartTime = time(); $iFileSize = 1001; - if (!file_exists($sImportFile)) { - // First check if there are new updates published (except for minutelies - there's always new diffs to process) - if (CONST_Replication_Update_Interval > 60) { - unset($aReplicationLag); - exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel); - while ($iErrorLevel > 0 || $aReplicationLag[0] < 1) { - if ($iErrorLevel) { - echo "Error: $iErrorLevel. "; - echo "Re-trying: ".$sCMDCheckReplicationLag." in ".CONST_Replication_Recheck_Interval." secs\n"; - } else { - echo "."; - } + $aLastState = chksql($oDB->getRow('SELECT * FROM import_status')); + + if (!$aLastState['sequence_id']) { + echo "Updates not set up. Please run ./utils/update.php --init-updates.\n"; + exit(1); + } + + echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n"; + + $sBatchEnd = $aLastState['lastimportdate']; + $iEndSequence = $aLastState['sequence_id']; + + if ($aLastState['indexed'] == 't') { + // Sleep if the update interval has not yet been reached. + $fNextUpdate = $aLastState['lastimportdate'] + CONST_Replication_Update_Interval; + if ($fNextUpdate > $fStartTime) { + $iSleepTime = $fNextUpdate - $fStartTime; + echo "Waiting for next update for $iSleepTime sec."; + sleep($iSleepTime); + } + + // Download the next batch of changes. + unlink($sImportFile); + do { + $fCMDStartTime = time(); + $iNextSeq = (int) $aLastState['sequence_id'] + 1; + unset($aOutput); + echo "$sCMDDownload -I $iNextSeq\n"; + exec($sCMDDownload.' -I '.$iNextSeq, $aOutput, $iResult); + + if ($iResult == 3) { + echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n"; sleep(CONST_Replication_Recheck_Interval); - unset($aReplicationLag); - exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel); + } else if ($iResult != 0) { + echo 'ERROR: updates failed.'; + exit($iResult); + } else { + $iEndSequence = (int)$aOutput[0]; } - // There are new replication files - use osmosis to download the file - echo "\n".date('Y-m-d H:i:s')." Replication Delay is ".$aReplicationLag[0]."\n"; - } - $fStartTime = time(); + } while ($iResult); + + // Import the file $fCMDStartTime = time(); - echo $sCMDDownload."\n"; - exec($sCMDDownload, $sJunk, $iErrorLevel); - while ($iErrorLevel > 0) { - echo "Error: $iErrorLevel\n"; - sleep(60); - echo 'Re-trying: '.$sCMDDownload."\n"; - exec($sCMDDownload, $sJunk, $iErrorLevel); + echo $sCMDImport."\n"; + unset($sJunk); + exec($sCMDImport, $sJunk, $iErrorLevel); + if ($iErrorLevel) { + echo "Error executing osm2pgsql: $iErrorLevel\n"; + exit($iErrorLevel); } + + // write the update logs $iFileSize = filesize($sImportFile); - $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); - $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','osmosis')"; + $sBatchEnd = getDatabaseDate($oDB); + $sSQL = "INSERT INTO import_osmosis_log (batchend, batchseq, batchsize, starttime, endtime, event) values ('$sBatchEnd',$iEndSequence,$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','import')"; var_Dump($sSQL); - $oDB->query($sSQL); - echo date('Y-m-d H:i:s')." Completed osmosis step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; + chksql($oDB->query($sSQL)); + + // update the status + $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence"; + var_Dump($sSQL); + chksql($oDB->query($sSQL)); + echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; } - $iFileSize = filesize($sImportFile); - $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); - - // Import the file - $fCMDStartTime = time(); - echo $sCMDImport."\n"; - exec($sCMDImport, $sJunk, $iErrorLevel); - if ($iErrorLevel) { - echo "Error: $iErrorLevel\n"; - exit($iErrorLevel); - } - $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','osm2pgsql')"; - var_Dump($sSQL); - $oDB->query($sSQL); - echo date('Y-m-d H:i:s')." Completed osm2pgsql step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - - // Archive for debug? - unlink($sImportFile); - - $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); - // Index file - $sThisIndexCmd = $sCMDIndex; - $fCMDStartTime = time(); - if (!$aResult['no-index']) { + $sThisIndexCmd = $sCMDIndex; + $fCMDStartTime = time(); + echo "$sThisIndexCmd\n"; exec($sThisIndexCmd, $sJunk, $iErrorLevel); if ($iErrorLevel) { echo "Error: $iErrorLevel\n"; exit($iErrorLevel); } + + $sSQL = "INSERT INTO import_osmosis_log (batchend, batchseq, batchsize, starttime, endtime, event) values ('$sBatchEnd',$iEndSequence,$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')"; + var_Dump($sSQL); + $oDB->query($sSQL); + echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; + + $sSQL = "update import_status set indexed = true"; + $oDB->query($sSQL); } - $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')"; - var_Dump($sSQL); - $oDB->query($sSQL); - echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - - $sSQL = "update import_status set lastimportdate = '$sBatchEnd'"; - $oDB->query($sSQL); - $fDuration = time() - $fStartTime; echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n"; if (!$aResult['import-osmosis-all']) exit(0); - - if (CONST_Replication_Update_Interval > 60) { - $iSleep = max(0, (strtotime($sBatchEnd)+CONST_Replication_Update_Interval-time())); - } else { - $iSleep = max(0, CONST_Replication_Update_Interval-$fDuration); - } - echo date('Y-m-d H:i:s')." Sleeping $iSleep seconds\n"; - sleep($iSleep); } } - -function getosmosistimestamp($sOsmosisConfigDirectory) -{ - $sStateFile = file_get_contents($sOsmosisConfigDirectory.'/state.txt'); - preg_match('#timestamp=(.+)#', $sStateFile, $aResult); - return str_replace('\:', ':', $aResult[1]); -} From 65afe13f0a9f6632a9de063e9df6202360a714d6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 25 May 2017 16:53:44 +0200 Subject: [PATCH 2/2] update docs for osmosis to pyosmium change --- CMakeLists.txt | 12 ++++----- docs/CMakeLists.txt | 1 + docs/Import_and_update.md | 45 +++++++++++++++------------------- docs/Installation.md | 2 +- settings/defaults.php | 2 +- vagrant/install-on-centos-7.sh | 2 +- 6 files changed, 30 insertions(+), 34 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cbbcb9db..d80c4e6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,13 +50,13 @@ find_package(PostgreSQL REQUIRED) include_directories(${PostgreSQL_INCLUDE_DIRS}) link_directories(${PostgreSQL_LIBRARY_DIRS}) -find_program(OSMOSIS osmosis) -if (NOT EXISTS "${OSMOSIS}") - set(OSMOSIS_PATH "/nonexistent") - message(WARNING "Osmosis not found (required for updates)") +find_program(PYOSMIUM pyosmium-get-changes) +if (NOT EXISTS "${PYOSMIUM}") + set(PYOSMIUM_PATH "/nonexistent") + message(WARNING "pyosmium-get-changes not found (required for updates)") else() - set(OSMOSIS_PATH "${OSMOSIS}") - message(STATUS "Using osmosis at ${OSMOSIS_PATH}") + set(PYOSMIUM_PATH "${PYOSMIUM}") + message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}") endif() diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index bf453a77..7da84491 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -21,6 +21,7 @@ endforeach() set (GENERALDOCFILES Installation.md Import_and_update.md + Faq.md ) foreach (df ${GENERALDOCFILES}) diff --git a/docs/Import_and_update.md b/docs/Import_and_update.md index d839a8a5..266a5d8b 100644 --- a/docs/Import_and_update.md +++ b/docs/Import_and_update.md @@ -118,21 +118,24 @@ Updates ======= There are many different possibilities to update your Nominatim database. -The following section describes how to keep it up-to-date with osmosis. +The following section describes how to keep it up-to-date with Pyosmium. For a list of other methods see the output of `./utils/update.php --help`. -Installing the newest version of osmosis ----------------------------------------- +Installing the newest version of Pyosmium +----------------------------------------- -The version of osmosis that comes with your distribution should be sufficient -in most cases. +It is recommended to install Pyosmium via pip: -If you want to install it by hand, get the latest version from the -[Osmosis website](http://wiki.openstreetmap.org/wiki/Osmosis). Then -tell Nominatim to use this version by adding the following line to -your `settings/local.php`: + pip install --user osmium - @define('CONST_Osmosis_Binary', '/usr/local/bin/osmosis'); +Nominatim needs a tool called `pyosmium-get-updates` that comes with +Pyosmium. You need to tell Nominatim where to find it. Add the +following line to your `settings/local.php`: + + @define('CONST_Pyosmium_Binary', '/home/user/.local/bin/pyosmium-get-changes'); + +The path above is fine if you used the `--user` parameter with pip. +Replace `user` with your user name. Setting up the update process ----------------------------- @@ -146,35 +149,27 @@ diffs for Ireland from geofabrik add the following: // base URL of the replication service @define('CONST_Replication_Url', 'http://download.geofabrik.de/europe/ireland-and-northern-ireland-updates'); - // Process each update separately, osmosis cannot merge multiple updates - @define('CONST_Replication_MaxInterval', '40000'); // How often upstream publishes diffs @define('CONST_Replication_Update_Interval', '86400'); // How long to sleep if no update found yet @define('CONST_Replication_Recheck_Interval', '900'); +To set up the update process now run the following command: -Delete any existing `settings/configuration.txt`, then run the following command -to create the osmosis configuration files: + ./utils/update --init-updates - ./utils/setup.php --osmosis-init +It outputs the date where updates will start. Recheck that this date is +what you expect. -Enabling hierarchical updates ------------------------------ - -When a place is updated in the database, all places that contain this place -in their address need to be updated as well. These hierarchical updates are -disabled by default because they slow down the initial import. -Enable them with the following command: - - ./utils/setup.php --create-functions --enable-diff-updates +The --init-updates command needs to be rerun whenever the replication service +is changed. Updating Nominatim ------------------ The following command will keep your database constantly up to date: - ./utils/update.php --import-osmosis-all --no-npi + ./utils/update.php --import-osmosis-all If you have imported multiple country extracts and want to keep them up-to-date, have a look at the script in diff --git a/docs/Installation.md b/docs/Installation.md index c4678ed9..41f76df1 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -44,7 +44,7 @@ For running Nominatim: For running continuous updates: - * [osmosis](http://wiki.openstreetmap.org/wiki/Osmosis) + * [pyosmium](http://osmcode.org/pyosmium/) ### Hardware diff --git a/settings/defaults.php b/settings/defaults.php index 41acd8e6..16711542 100644 --- a/settings/defaults.php +++ b/settings/defaults.php @@ -41,7 +41,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true); // Paths @define('CONST_ExtraDataPath', CONST_BasePath.'/data'); @define('CONST_Osm2pgsql_Binary', CONST_InstallPath.'/osm2pgsql/osm2pgsql'); -@define('CONST_Pyosmium_Binary', 'pyosmium-get-changes'); +@define('CONST_Pyosmium_Binary', '@PYOSMIUM_PATH@'); @define('CONST_Tiger_Data_Path', CONST_ExtraDataPath.'/tiger'); @define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath); diff --git a/vagrant/install-on-centos-7.sh b/vagrant/install-on-centos-7.sh index 02c7d943..8aeedcc6 100755 --- a/vagrant/install-on-centos-7.sh +++ b/vagrant/install-on-centos-7.sh @@ -7,7 +7,7 @@ # ================================ # # These instructions expect that you have a freshly installed CentOS version 7. -# Make sure all packages are are up-to-date by running: +# Make sure all packages are up-to-date by running: # sudo yum update -y