Switch to sql.gz format for wikipedia data

The dump import is no longer needed.
This commit is contained in:
Sarah Hoffmann
2019-11-17 10:09:31 +01:00
parent e4555a208d
commit cd3ddec746
2 changed files with 6 additions and 24 deletions

View File

@@ -41,10 +41,9 @@ but it will improve the quality of the results if this is installed.
This data is available as a binary download: This data is available as a binary download:
cd $NOMINATIM_SOURCE_DIR/data cd $NOMINATIM_SOURCE_DIR/data
wget https://www.nominatim.org/data/wikimedia_importance.sql.gz wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
The file is about 1GB and it adds around 10GB to the install The file is about 400MB and adds around 4GB to Nominatim database.
size of Nominatim. They also increase the install time by an hour or so.
*NOTE:* if you forgot to download the wikipedia rankings, you can also add *NOTE:* if you forgot to download the wikipedia rankings, you can also add
them after the import by running `./utils/setup.php --import-wikipedia-articles` them after the import by running `./utils/setup.php --import-wikipedia-articles`

View File

@@ -323,10 +323,12 @@ class SetupFunctions
public function importWikipediaArticles() public function importWikipediaArticles()
{ {
$sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikimedia_importance.sql.gz'; $this->pgExec('DROP TABLE wikipedia_article');
$this->pgExec('DROP TABLE wikipedia_redirect');
$sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikimedia-importance.sql.gz';
if (file_exists($sWikiArticlesFile)) { if (file_exists($sWikiArticlesFile)) {
info('Importing wikipedia articles and redirects'); info('Importing wikipedia articles and redirects');
$this->pgsqlRunDropAndRestore($sWikiArticlesFile); $this->pgsqlRunScriptFile($sWikiArticlesFile);
} else { } else {
warn('wikipedia importance dump file not found - places will have default importance'); warn('wikipedia importance dump file not found - places will have default importance');
} }
@@ -737,25 +739,6 @@ class SetupFunctions
} }
} }
private function pgsqlRunDropAndRestore($sDumpFile)
{
$sCMD = 'pg_restore'
.' -p '.escapeshellarg($this->aDSNInfo['port'])
.' -d '.escapeshellarg($this->aDSNInfo['database'])
.' --no-owner -Fc --clean '.escapeshellarg($sDumpFile);
if ($this->oDB->getPostgresVersion() >= 9.04) {
$sCMD .= ' --if-exists';
}
if (isset($this->aDSNInfo['hostspec'])) {
$sCMD .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
}
if (isset($this->aDSNInfo['username'])) {
$sCMD .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
}
$this->runWithPgEnv($sCMD);
}
private function pgsqlRunScript($sScript, $bfatal = true) private function pgsqlRunScript($sScript, $bfatal = true)
{ {
runSQLScript( runSQLScript(