forked from hans/Nominatim
Merge pull request #2401 from lonvia/port-add-data-to-python
Port add-data functions from PHP to Python
This commit is contained in:
@@ -1,150 +0,0 @@
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/setup_functions.php');
|
||||
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Import / update / index osm data',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
|
||||
|
||||
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
|
||||
array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
|
||||
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
|
||||
|
||||
array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
|
||||
array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
|
||||
array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
|
||||
array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
|
||||
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
setupHTTPProxy();
|
||||
|
||||
date_default_timezone_set('Etc/UTC');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
$fPostgresVersion = $oDB->getPostgresVersion();
|
||||
|
||||
$aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) {
|
||||
$aDSNInfo['port'] = 5432;
|
||||
}
|
||||
|
||||
// cache memory to be used by osm2pgsql, should not be more than the available memory
|
||||
$iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
|
||||
if ($iCacheMemory + 500 > getTotalMemoryMB()) {
|
||||
$iCacheMemory = getCacheMemoryMB();
|
||||
echo "WARNING: resetting cache memory to $iCacheMemory\n";
|
||||
}
|
||||
|
||||
$oOsm2pgsqlCmd = (new \Nominatim\Shell(getOsm2pgsqlBinary()))
|
||||
->addParams('--hstore')
|
||||
->addParams('--latlong')
|
||||
->addParams('--append')
|
||||
->addParams('--slim')
|
||||
->addParams('--with-forward-dependencies', 'false')
|
||||
->addParams('--log-progress', 'true')
|
||||
->addParams('--number-processes', 1)
|
||||
->addParams('--cache', $iCacheMemory)
|
||||
->addParams('--output', 'gazetteer')
|
||||
->addParams('--style', getImportStyle())
|
||||
->addParams('--database', $aDSNInfo['database'])
|
||||
->addParams('--port', $aDSNInfo['port']);
|
||||
|
||||
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
|
||||
$oOsm2pgsqlCmd->addParams('--host', $aDSNInfo['hostspec']);
|
||||
}
|
||||
if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
|
||||
$oOsm2pgsqlCmd->addParams('--user', $aDSNInfo['username']);
|
||||
}
|
||||
if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
|
||||
$oOsm2pgsqlCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
|
||||
}
|
||||
if (getSetting('FLATNODE_FILE')) {
|
||||
$oOsm2pgsqlCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
|
||||
}
|
||||
if ($fPostgresVersion >= 11.0) {
|
||||
$oOsm2pgsqlCmd->addEnvPair(
|
||||
'PGOPTIONS',
|
||||
'-c jit=off -c max_parallel_workers_per_gather=0'
|
||||
);
|
||||
}
|
||||
|
||||
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
|
||||
// import diffs and files directly (e.g. from osmosis --rri)
|
||||
$sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
|
||||
|
||||
if (!file_exists($sNextFile)) {
|
||||
fail("Cannot open $sNextFile\n");
|
||||
}
|
||||
|
||||
// Import the file
|
||||
$oCMD = (clone $oOsm2pgsqlCmd)->addParams($sNextFile);
|
||||
echo $oCMD->escapedCmd()."\n";
|
||||
$iRet = $oCMD->run();
|
||||
|
||||
if ($iRet) {
|
||||
fail("Error from osm2pgsql, $iRet\n");
|
||||
}
|
||||
|
||||
// Don't update the import status - we don't know what this file contains
|
||||
}
|
||||
|
||||
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
|
||||
$bHaveDiff = false;
|
||||
$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
|
||||
$sContentURL = '';
|
||||
if (isset($aResult['import-node']) && $aResult['import-node']) {
|
||||
if ($bUseOSMApi) {
|
||||
$sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
|
||||
} else {
|
||||
$sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($aResult['import-way']) && $aResult['import-way']) {
|
||||
if ($bUseOSMApi) {
|
||||
$sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
|
||||
} else {
|
||||
$sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');%3E;);out%20meta;';
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($aResult['import-relation']) && $aResult['import-relation']) {
|
||||
if ($bUseOSMApi) {
|
||||
$sContentURL = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
|
||||
} else {
|
||||
$sContentURL = 'https://overpass-api.de/api/interpreter?data=(rel(id:'.$aResult['import-relation'].');%3E;);out%20meta;';
|
||||
}
|
||||
}
|
||||
|
||||
if ($sContentURL) {
|
||||
file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
|
||||
$bHaveDiff = true;
|
||||
}
|
||||
|
||||
if ($bHaveDiff) {
|
||||
// import generated change file
|
||||
|
||||
$oCMD = (clone $oOsm2pgsqlCmd)->addParams($sTemporaryFile);
|
||||
echo $oCMD->escapedCmd()."\n";
|
||||
|
||||
$iRet = $oCMD->run();
|
||||
if ($iRet) {
|
||||
fail("osm2pgsql exited with error level $iRet\n");
|
||||
}
|
||||
}
|
||||
@@ -114,63 +114,6 @@ class CommandlineParser:
|
||||
#
|
||||
# No need to document the functions each time.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=E0012,C0415
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
|
||||
Data is only imported, not indexed. You need to call `nominatim index`
|
||||
to complete the process.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Source')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--file', metavar='FILE',
|
||||
help='Import data from an OSM file')
|
||||
group.add_argument('--diff', metavar='FILE',
|
||||
help='Import data from an OSM diff file')
|
||||
group.add_argument('--node', metavar='ID', type=int,
|
||||
help='Import a single node from the API')
|
||||
group.add_argument('--way', metavar='ID', type=int,
|
||||
help='Import a single way from the API')
|
||||
group.add_argument('--relation', metavar='ID', type=int,
|
||||
help='Import a single relation from the API')
|
||||
group.add_argument('--tiger-data', metavar='DIR',
|
||||
help='Add housenumbers from the US TIGER census database.')
|
||||
group = parser.add_argument_group('Extra arguments')
|
||||
group.add_argument('--use-main-api', action='store_true',
|
||||
help='Use OSM API instead of Overpass to download objects')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.tools import tiger_data
|
||||
|
||||
if args.tiger_data:
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
return tiger_data.add_tiger_data(args.tiger_data,
|
||||
args.config, args.threads or 1,
|
||||
tokenizer)
|
||||
|
||||
params = ['update.php']
|
||||
if args.file:
|
||||
params.extend(('--import-file', args.file))
|
||||
elif args.diff:
|
||||
params.extend(('--import-diff', args.diff))
|
||||
elif args.node:
|
||||
params.extend(('--import-node', args.node))
|
||||
elif args.way:
|
||||
params.extend(('--import-way', args.way))
|
||||
elif args.relation:
|
||||
params.extend(('--import-relation', args.relation))
|
||||
if args.use_main_api:
|
||||
params.append('--use-main-api')
|
||||
return run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
|
||||
class QueryExport:
|
||||
"""\
|
||||
Export addresses as CSV file from the database.
|
||||
@@ -261,7 +204,7 @@ def get_set_parser(**kwargs):
|
||||
|
||||
parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases)
|
||||
|
||||
parser.add_subcommand('add-data', UpdateAddData)
|
||||
parser.add_subcommand('add-data', clicmd.UpdateAddData)
|
||||
parser.add_subcommand('index', clicmd.UpdateIndex)
|
||||
parser.add_subcommand('refresh', clicmd.UpdateRefresh())
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from nominatim.clicmd.replication import UpdateReplication
|
||||
from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
|
||||
from nominatim.clicmd.index import UpdateIndex
|
||||
from nominatim.clicmd.refresh import UpdateRefresh
|
||||
from nominatim.clicmd.add_data import UpdateAddData
|
||||
from nominatim.clicmd.admin import AdminFuncs
|
||||
from nominatim.clicmd.freeze import SetupFreeze
|
||||
from nominatim.clicmd.special_phrases import ImportSpecialPhrases
|
||||
|
||||
76
nominatim/clicmd/add_data.py
Normal file
76
nominatim/clicmd/add_data.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Implementation of the 'add-data' subcommand.
|
||||
"""
|
||||
import logging
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
|
||||
Data is only imported, not indexed. You need to call `nominatim index`
|
||||
to complete the process.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Source')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--file', metavar='FILE',
|
||||
help='Import data from an OSM file or diff file')
|
||||
group.add_argument('--diff', metavar='FILE',
|
||||
help='Import data from an OSM diff file (deprecated: use --file)')
|
||||
group.add_argument('--node', metavar='ID', type=int,
|
||||
help='Import a single node from the API')
|
||||
group.add_argument('--way', metavar='ID', type=int,
|
||||
help='Import a single way from the API')
|
||||
group.add_argument('--relation', metavar='ID', type=int,
|
||||
help='Import a single relation from the API')
|
||||
group.add_argument('--tiger-data', metavar='DIR',
|
||||
help='Add housenumbers from the US TIGER census database.')
|
||||
group = parser.add_argument_group('Extra arguments')
|
||||
group.add_argument('--use-main-api', action='store_true',
|
||||
help='Use OSM API instead of Overpass to download objects')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
|
||||
help='Set timeout for file downloads.')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.tools import tiger_data, add_osm_data
|
||||
|
||||
if args.tiger_data:
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
return tiger_data.add_tiger_data(args.tiger_data,
|
||||
args.config, args.threads or 1,
|
||||
tokenizer)
|
||||
|
||||
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
|
||||
if args.file or args.diff:
|
||||
return add_osm_data.add_data_from_file(args.file or args.diff,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.node:
|
||||
return add_osm_data.add_osm_object('node', args.node,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.way:
|
||||
return add_osm_data.add_osm_object('way', args.way,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.relation:
|
||||
return add_osm_data.add_osm_object('relation', args.relation,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
return 0
|
||||
46
nominatim/tools/add_osm_data.py
Normal file
46
nominatim/tools/add_osm_data.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
Function to add additional OSM data from a file or the API into the database.
|
||||
"""
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import urllib
|
||||
|
||||
from nominatim.tools.exec_utils import run_osm2pgsql, get_url
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def add_data_from_file(fname, options):
|
||||
""" Adds data from a OSM file to the database. The file may be a normal
|
||||
OSM file or a diff file in all formats supported by libosmium.
|
||||
"""
|
||||
options['import_file'] = Path(fname)
|
||||
options['append'] = True
|
||||
run_osm2pgsql(options)
|
||||
|
||||
# No status update. We don't know where the file came from.
|
||||
return 0
|
||||
|
||||
|
||||
def add_osm_object(osm_type, osm_id, use_main_api, options):
|
||||
""" Add or update a single OSM object from the latest version of the
|
||||
API.
|
||||
"""
|
||||
if use_main_api:
|
||||
base_url = f'https://www.openstreetmap.org/api/0.6/{osm_type}/{osm_id}'
|
||||
if osm_type in ('way', 'relation'):
|
||||
base_url += '/full'
|
||||
else:
|
||||
# use Overpass API
|
||||
if osm_type == 'node':
|
||||
data = f'node({osm_id});out meta;'
|
||||
elif osm_type == 'way':
|
||||
data = f'(way({osm_id});>;);out meta;'
|
||||
else:
|
||||
data = f'(rel(id:{osm_id});>;);out meta;'
|
||||
base_url = 'https://overpass-api.de/api/interpreter?' \
|
||||
+ urllib.parse.urlencode({'data': data})
|
||||
|
||||
options['append'] = True
|
||||
options['import_data'] = get_url(base_url).encode('utf-8')
|
||||
|
||||
run_osm2pgsql(options)
|
||||
@@ -128,9 +128,14 @@ def run_osm2pgsql(options):
|
||||
if options.get('disable_jit', False):
|
||||
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
|
||||
|
||||
cmd.append(str(options['import_file']))
|
||||
if 'import_data' in options:
|
||||
cmd.extend(('-r', 'xml', '-'))
|
||||
else:
|
||||
cmd.append(str(options['import_file']))
|
||||
|
||||
subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
|
||||
subprocess.run(cmd, cwd=options.get('cwd', '.'),
|
||||
input=options.get('import_data'),
|
||||
env=env, check=True)
|
||||
|
||||
|
||||
def get_url(url):
|
||||
|
||||
@@ -15,6 +15,7 @@ import nominatim.clicmd.admin
|
||||
import nominatim.clicmd.setup
|
||||
import nominatim.indexer.indexer
|
||||
import nominatim.tools.admin
|
||||
import nominatim.tools.add_osm_data
|
||||
import nominatim.tools.check_database
|
||||
import nominatim.tools.database_import
|
||||
import nominatim.tools.freeze
|
||||
@@ -60,7 +61,6 @@ class TestCli:
|
||||
|
||||
|
||||
@pytest.mark.parametrize("command,script", [
|
||||
(('add-data', '--file', 'foo.osm'), 'update'),
|
||||
(('export',), 'export')
|
||||
])
|
||||
def test_legacy_commands_simple(self, mock_run_legacy, command, script):
|
||||
@@ -88,13 +88,20 @@ class TestCli:
|
||||
assert mock.called == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
|
||||
('node', 12), ('way', 8), ('relation', 32)])
|
||||
def test_add_data_command(self, mock_run_legacy, name, oid):
|
||||
@pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc')])
|
||||
def test_add_data_file_command(self, mock_func_factory, name, oid):
|
||||
mock_run_legacy = mock_func_factory(nominatim.tools.add_osm_data, 'add_data_from_file')
|
||||
assert self.call_nominatim('add-data', '--' + name, str(oid)) == 0
|
||||
|
||||
assert mock_run_legacy.called == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,oid", [('node', 12), ('way', 8), ('relation', 32)])
|
||||
def test_add_data_object_command(self, mock_func_factory, name, oid):
|
||||
mock_run_legacy = mock_func_factory(nominatim.tools.add_osm_data, 'add_osm_object')
|
||||
assert self.call_nominatim('add-data', '--' + name, str(oid)) == 0
|
||||
|
||||
assert mock_run_legacy.called == 1
|
||||
assert mock_run_legacy.last_args == ('update.php', '--import-' + name, oid)
|
||||
|
||||
|
||||
def test_serve_command(self, mock_func_factory):
|
||||
|
||||
Reference in New Issue
Block a user