DB Scheme changes: alter table placex add column calculated_country_code varchar(2);

function changes:
-----------------
Move to ST_PointOnSurface from ST_Centroid in various places to avoid looking up a point outside the polygon
Move to ST_Covers from ST_Contains to include points on admin boundaries
Re-order preference for get_country_code now our data is better. country_osm_grid is now the preffered source.
Fix code to calculate country code in placex_insert, rank_search test was too early
Add extra field to placex 'calculated_country_code' to improve structure of code
Move split_geometery function out of add_location into its own function
Rewrite split_geometery to be more efficient.
Change place_insert to do more updates and less delete/inserts (delete is slow)
Include wikipedia links in details.php ouput
Cleanup no longer used geometry validation (adding overhead)
Include debug statements in function.sql (--DEBUG: ) and add flag to setup.php to turn them on

setup.php:
----------
add flag --disable-token-precalc to speed up debuging
add flag --index-noanalyse to disable analysising DB at rank 4 and 26 (previously removed, but on my local DB it seems to be required)
add flag --enable-diff-updates (modifier to --create-functions) to turn on the code required for diff updates without having to modify functions.sql
add flag --enable-debug-statements (modifier to --create-functions) to turn on debug warning statements

update.php:
-----------
added flag --no-index to import osmosis changes without indexing them
extend the hack to allow import of JOSM generated osm files

country_grid.sql - reference copy of the sql used to generate the country_osm_grid table, needs cleanup
This commit is contained in:
Brian Quinion
2012-05-22 16:27:42 +02:00
parent f17c5345ec
commit 8d221e2c82
11 changed files with 321 additions and 202 deletions

View File

@@ -20,17 +20,21 @@
array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse opertions during index (EXPERT)'),
array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
@@ -137,7 +141,9 @@
$bDidSomething = true;
if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
$sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
$sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
$sTemplate = str_replace('{modulepath}', CONST_BasePath.'/module', $sTemplate);
if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate);
if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
pgsqlRunScript($sTemplate);
}
@@ -265,12 +271,15 @@
echo '.';
// pre-create the word list
if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct postcode as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!pg_query($oDB->connection, 'select count(getorcreate_housenumber_id(v)) from (select distinct housenumber as v from place where housenumber is not null) as w;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!$aCMDResult['disable-token-precalc'])
{
if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct postcode as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection));
echo '.';
if (!pg_query($oDB->connection, 'select count(getorcreate_housenumber_id(v)) from (select distinct housenumber as v from place where housenumber is not null) as w;')) fail(pg_last_error($oDB->connection));
echo '.';
}
$aDBInstances = array();
for($i = 0; $i < $iInstances; $i++)
@@ -431,7 +440,11 @@
$sOutputFile = '';
if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
$sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile;
passthru($sBaseCmd);
passthru($sBaseCmd.' -R 4');
if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
passthru($sBaseCmd.' -r 5 -R 25');
if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
passthru($sBaseCmd.' -r 26');
}
if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])

View File

@@ -15,6 +15,8 @@
array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'),
array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'),
array('no-npi', '', 0, 1, 0, 0, 'bool', 'Do not write npi index files'),
array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
array('import-npi-all', '', 0, 1, 0, 0, 'bool', 'Import npi pre-indexed files'),
@@ -35,7 +37,6 @@
array('index-estrate', '', 0, 1, 1, 1, 'int', 'Estimated indexed items per second (def:30)'),
array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
array('no-npi', '', 0, 1, 0, 0, 'bool', 'Do not write npi index files'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
@@ -148,6 +149,8 @@
// Hack into a modify request
$sModifyXML = str_replace('<osm version="0.6" generator="OpenStreetMap server">',
'<osmChange version="0.6" generator="OpenStreetMap server"><modify>', $sModifyXML);
$sModifyXML = str_replace('<osm version=\'0.6\' upload=\'true\' generator=\'JOSM\'>',
'<osmChange version="0.6" generator="OpenStreetMap server"><modify>', $sModifyXML);
$sModifyXML = str_replace('</osm>', '</modify></osmChange>', $sModifyXML);
// Outputing this is too verbose
@@ -360,10 +363,15 @@
// Archive for debug?
unlink($sImportFile);
// }
$sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
$sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
// Index file
// Index file
$sThisIndexCmd = $sCMDIndex;
if (!$aResult['no-npi'])
{
$fCMDStartTime = time();
$iFileID = $oDB->getOne('select nextval(\'file\')');
if (PEAR::isError($iFileID))
@@ -376,20 +384,20 @@
$sFileDir .= '/'.str_pad(floor($iFileID/1000) % 1000, 3, '0', STR_PAD_LEFT);
if (!is_dir($sFileDir)) mkdir($sFileDir, 0777, true);
$sThisIndexCmd = $sCMDIndex;
if (!$aResult['no-npi']) {
$sThisIndexCmd .= $sFileDir;
$sThisIndexCmd .= '/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT);
$sThisIndexCmd .= ".npi.out";
$sThisIndexCmd .= $sFileDir;
$sThisIndexCmd .= '/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT);
$sThisIndexCmd .= ".npi.out";
preg_match('#^([0-9]{4})-([0-9]{2})-([0-9]{2})#', $sBatchEnd, $aBatchMatch);
$sFileDir = CONST_BasePath.'/export/index/';
$sFileDir .= $aBatchMatch[1].'/'.$aBatchMatch[2];
preg_match('#^([0-9]{4})-([0-9]{2})-([0-9]{2})#', $sBatchEnd, $aBatchMatch);
$sFileDir = CONST_BasePath.'/export/index/';
$sFileDir .= $aBatchMatch[1].'/'.$aBatchMatch[2];
if (!is_dir($sFileDir)) mkdir($sFileDir, 0777, true);
file_put_contents($sFileDir.'/'.$aBatchMatch[3].'.idx', "$sBatchEnd\t$iFileID\n", FILE_APPEND);
}
if (!is_dir($sFileDir)) mkdir($sFileDir, 0777, true);
file_put_contents($sFileDir.'/'.$aBatchMatch[3].'.idx', "$sBatchEnd\t$iFileID\n", FILE_APPEND);
}
if (!$aResult['no-index'])
{
echo "$sThisIndexCmd\n";
exec($sThisIndexCmd, $sJunk, $iErrorLevel);
if ($iErrorLevel)
@@ -398,7 +406,8 @@
exit;
}
if (!$aResult['no-npi']) {
if (!$aResult['no-npi'])
{
$sFileDir = CONST_BasePath.'/export/diff/';
$sFileDir .= str_pad(floor($iFileID/1000000), 3, '0', STR_PAD_LEFT);
$sFileDir .= '/'.str_pad(floor($iFileID/1000) % 1000, 3, '0', STR_PAD_LEFT);
@@ -415,23 +424,23 @@
rename($sFileDir.'/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT).".npi.out.bz2",
$sFileDir.'/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT).".npi.bz2");
}
}
echo "Completed for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
$sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')";
$oDB->query($sSQL);
echo "Completed for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
$sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')";
$oDB->query($sSQL);
$sSQL = "update import_status set lastimportdate = '$sBatchEnd'";
$oDB->query($sSQL);
$sSQL = "update import_status set lastimportdate = '$sBatchEnd'";
$oDB->query($sSQL);
$fDuration = time() - $fStartTime;
echo "Completed for $sBatchEnd in ".round($fDuration/60,2)."\n";
if (!$aResult['import-osmosis-all']) exit;
$fDuration = time() - $fStartTime;
echo "Completed for $sBatchEnd in ".round($fDuration/60,2)."\n";
if (!$aResult['import-osmosis-all']) exit;
// }
echo "Sleeping ".max(0,60-$fDuration)." seconds\n";
sleep(max(0,60-$fDuration));
}
}
if ($aResult['import-npi-all'])