changed export.php to work with current master

This commit is contained in:
gemo1011
2018-06-27 14:17:08 +02:00
parent dfb9579a73
commit 073221d321

View File

@@ -1,79 +1,75 @@
#!/usr/bin/php -Cq #!/usr/bin/php -Cq
<?php <?php
# Script to extract structured city and street data // Script to extract structured city and street data
# from a running nominatim instance as CSV data // from a running nominatim instance as CSV data
require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php'); require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
ini_set('memory_limit', '800M'); require_once(CONST_BasePath.'/lib/init-cmd.php');
require_once(CONST_BasePath.'/lib/ParameterParser.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array( $aCMDOptions = array(
"Export addresses as CSV file from a Nominatim database", 'Export addresses as CSV file from a Nominatim database',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'), array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'), array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'), array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'), array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'), array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'), array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'), array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'), array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'), array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
"\nAddress ranks: continent, country, state, county, city, suburb, street, path", "\nAddress ranks: continent, country, state, county, city, suburb, street, path",
"Additional output types: postcode, placeid (placeid for each object)", 'Additional output types: postcode, placeid (placeid for each object)',
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks", "\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
"can be merged into one column by simply using a comma-separated list.", 'can be merged into one column by simply using a comma-separated list.',
"\nDefault output-type: street", "\nDefault output-type: street",
"Default output format: street;suburb;city;county;state;country" 'Default output format: street;suburb;city;county;state;country'
); );
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
$aRankmap = array( 'continent' => 1, $aRankmap = array(
'country' => 4, 'continent' => 1,
'state' => 8, 'country' => 4,
'county' => 12, 'state' => 8,
'city' => 16, 'county' => 12,
'suburb' => 20, 'city' => 16,
'street' => 26, 'suburb' => 20,
'path' => 27 'street' => 26,
); 'path' => 27
);
$oDB =& getDB(); $oDB =& getDB();
if (isset($aCMDResult['output-type'])) if (isset($aCMDResult['output-type'])) {
{
if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']); if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']);
$iOutputRank = $aRankmap[$aCMDResult['output-type']]; $iOutputRank = $aRankmap[$aCMDResult['output-type']];
} } else {
else
{
$iOutputRank = $aRankmap['street']; $iOutputRank = $aRankmap['street'];
} }
// Preferred language // Preferred language
$oParams = new Nominatim\ParameterParser();
if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx'; if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx';
$aLangPrefOrder = getPreferredLanguages($aCMDResult['language']); $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
$sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]"; $sLanguagePrefArraySQL = 'ARRAY['.join(',', array_map('getDBQuoted', $aLangPrefOrder)).']';
// output formatting: build up a lookup table that maps address ranks to columns // output formatting: build up a lookup table that maps address ranks to columns
$aColumnMapping = array(); $aColumnMapping = array();
$iNumCol = 0; $iNumCol = 0;
If (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country'; if (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
foreach (preg_split('/\s*;\s*/',$aCMDResult['output-format']) as $sColumn) foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
{
$bHasData = false; $bHasData = false;
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
{ if ($sRank == 'postcode' || $sRank == 'placeid') {
if ($sRank == 'postcode' || $sRank == 'placeid')
{
$aColumnMapping[$sRank] = $iNumCol; $aColumnMapping[$sRank] = $iNumCol;
$bHasData = true; $bHasData = true;
} } elseif (isset($aRankmap[$sRank])) {
elseif (isset($aRankmap[$sRank]))
{
$iRank = $aRankmap[$sRank]; $iRank = $aRankmap[$sRank];
if ($iRank <= $iOutputRank) { if ($iRank <= $iOutputRank) {
$aColumnMapping[(string)$iRank] = $iNumCol; $aColumnMapping[(string)$iRank] = $iNumCol;
@@ -87,39 +83,34 @@
// build the query for objects // build the query for objects
$sPlacexSQL = 'select min(place_id) as place_id, '; $sPlacexSQL = 'select min(place_id) as place_id, ';
$sPlacexSQL .= 'array_agg(place_id) as place_ids, '; $sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
$sPlacexSQL .= 'calculated_country_code as cc, '; $sPlacexSQL .= 'country_code as cc, ';
// get the address places excluding postcodes // get the address places excluding postcodes
$sPlacexSQL .= 'array(select address_place_id from place_addressline a where a.place_id = placex.place_id and isaddress and address_place_id != placex.place_id and not cached_rank_address in (5,11) and cached_rank_address > 2 order by cached_rank_address) as address'; $sPlacexSQL .= 'array(select address_place_id from place_addressline a where a.place_id = placex.place_id and isaddress and address_place_id != placex.place_id and not cached_rank_address in (5,11) and cached_rank_address > 2 order by cached_rank_address) as address';
$sPlacexSQL .= " from placex where name is not null and linked_place_id is null"; $sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
$sPlacexSQL .= ' and rank_address = '.$iOutputRank; $sPlacexSQL .= ' and rank_address = '.$iOutputRank;
if (isset($aCMDResult['restrict-to-country'])) if (isset($aCMDResult['restrict-to-country'])) {
{ $sPlacexSQL .= ' and country_code = '.getDBQuoted($aCMDResult['restrict-to-country']);
$sPlacexSQL .= ' and calculated_country_code = '.getDBQuoted($aCMDResult['restrict-to-country']);
} }
// restriction to parent place id // restriction to parent place id
$sParentId = false; $sParentId = false;
$sOsmType = false; $sOsmType = false;
if (isset($aCMDResult['restrict-to-osm-node'])) if (isset($aCMDResult['restrict-to-osm-node'])) {
{
$sOsmType = 'N'; $sOsmType = 'N';
$sOsmId = $aCMDResult['restrict-to-osm-node']; $sOsmId = $aCMDResult['restrict-to-osm-node'];
} }
if (isset($aCMDResult['restrict-to-osm-way'])) if (isset($aCMDResult['restrict-to-osm-way'])) {
{
$sOsmType = 'W'; $sOsmType = 'W';
$sOsmId = $aCMDResult['restrict-to-osm-way']; $sOsmId = $aCMDResult['restrict-to-osm-way'];
} }
if (isset($aCMDResult['restrict-to-osm-relation'])) if (isset($aCMDResult['restrict-to-osm-relation'])) {
{
$sOsmType = 'R'; $sOsmType = 'R';
$sOsmId = $aCMDResult['restrict-to-osm-relation']; $sOsmId = $aCMDResult['restrict-to-osm-relation'];
} }
if ($sOsmType) if ($sOsmType) {
{
$sSQL = 'select place_id from placex where'; $sSQL = 'select place_id from placex where';
$sSQL .= ' osm_type = '.getDBQuoted($sOsmType); $sSQL .= ' osm_type = '.getDBQuoted($sOsmType);
$sSQL .= ' and osm_id = '.$sOsmId; $sSQL .= ' and osm_id = '.$sOsmId;
@@ -127,67 +118,55 @@
if (PEAR::isError($sParentId)) fail(pg_last_error($oDB->connection)); if (PEAR::isError($sParentId)) fail(pg_last_error($oDB->connection));
if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId); if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId);
} }
if ($sParentId) if ($sParentId) {
{
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)'; $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
} }
$sPlacexSQL .= " group by name->'name', address, calculated_country_code"; $sPlacexSQL .= " group by name->'name', address, country_code, placex.place_id";
# Iterate over placeids // Iterate over placeids
# to get further hierarchical information // to get further hierarchical information
//var_dump($sPlacexSQL); //var_dump($sPlacexSQL);
$aRes =& $oDB->query($sPlacexSQL); $aRes =& $oDB->query($sPlacexSQL);
if (PEAR::isError($aRes)) fail(pg_last_error($oDB->connection)); if (PEAR::isError($aRes)) fail(pg_last_error($oDB->connection));
$fOutstream = fopen("php://output", 'w'); $fOutstream = fopen('php://output', 'w');
while ($aRes->fetchInto($aRow)) while ($aRes->fetchInto($aRow)) {
{ //var_dump($aRow);
//var_dump($aRow); $iPlaceID = $aRow['place_id'];
$iPlaceID = $aRow['place_id']; $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, -1)";
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID)"; $sSQL .= ' WHERE isaddress';
$sSQL .= " WHERE isaddress"; $sSQL .= ' order by rank_address desc,isaddress desc';
$sSQL .= " order by rank_address desc,isaddress desc"; $aAddressLines = $oDB->getAll($sSQL);
$aAddressLines = $oDB->getAll($sSQL);
if (PEAR::IsError($aAddressLines)) fail(pg_last_error($oDB->connection)); if (PEAR::IsError($aAddressLines)) fail(pg_last_error($oDB->connection));
$aOutput = array_fill(0, $iNumCol, ''); $aOutput = array_fill(0, $iNumCol, '');
# output address parts // output address parts
foreach ($aAddressLines as $aAddress) foreach ($aAddressLines as $aAddress) {
{ if (isset($aColumnMapping[$aAddress['rank_address']])) {
if (isset($aColumnMapping[$aAddress['rank_address']]))
{
$aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname']; $aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
} }
} }
# output postcode // output postcode
if (isset($aColumnMapping['postcode'])) if (isset($aColumnMapping['postcode'])) {
{ if ($aCMDResult['output-all-postcodes']) {
if ($aCMDResult['output-all-postcodes']) $sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
{ } else {
$sSQL = "select array_agg(px.postcode) from placex px join place_addressline pa "; $sSQL = 'select px.postcode from placex px join place_addressline pa ';
} }
else $sSQL .= 'on px.place_id = pa.address_place_id ';
{ $sSQL .= 'where pa.cached_rank_address in (5,11) ';
$sSQL = "select px.postcode from placex px join place_addressline pa "; $sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in ('.substr($aRow['place_ids'], 1, -1).')) ';
} $sSQL .= 'group by postcode order by count(*) desc limit 1';
$sSQL .= "on px.place_id = pa.address_place_id ";
$sSQL .= "where pa.cached_rank_address in (5,11) ";
$sSQL .= "and pa.place_id in (select place_id from place_addressline where address_place_id in (".substr($aRow['place_ids'], 1, -1).")) ";
$sSQL .= "group by postcode order by count(*) desc limit 1";
$sRes = $oDB->getOne($sSQL); $sRes = $oDB->getOne($sSQL);
if (PEAR::IsError($sRes)) fail(pg_last_error($oDB->connection)); if (PEAR::IsError($sRes)) fail(pg_last_error($oDB->connection));
if ($aCMDResult['output-all-postcodes']) if ($aCMDResult['output-all-postcodes']) {
{
$aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1); $aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
} } else {
else
{
$aOutput[$aColumnMapping['postcode']] = $sRes; $aOutput[$aColumnMapping['postcode']] = $sRes;
} }
} }
if (isset($aColumnMapping['placeid'])) if (isset($aColumnMapping['placeid'])) {
{
$aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1); $aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
} }
fputcsv($fOutstream, $aOutput); fputcsv($fOutstream, $aOutput);