port export function to Python

Some of the parameters have been renoved as they don't make sense
anymore.
This commit is contained in:
Sarah Hoffmann
2023-07-17 22:37:07 +02:00
parent faeee7528f
commit 0804cc0cff
7 changed files with 200 additions and 281 deletions

View File

@@ -16,6 +16,7 @@ import from this file, not from the source files directly.
from .core import (NominatimAPI as NominatimAPI,
NominatimAPIAsync as NominatimAPIAsync)
from .connection import (SearchConnection as SearchConnection)
from .status import (StatusResult as StatusResult)
from .types import (PlaceID as PlaceID,
OsmID as OsmID,

View File

@@ -2,13 +2,13 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Command-line interface to the Nominatim functions for import, update,
database administration and querying.
"""
from typing import Optional, Any, List, Union
from typing import Optional, Any
import importlib
import logging
import os
@@ -17,7 +17,7 @@ import argparse
from pathlib import Path
from nominatim.config import Configuration
from nominatim.tools.exec_utils import run_legacy_script, run_php_server
from nominatim.tools.exec_utils import run_php_server
from nominatim.errors import UsageError
from nominatim import clicmd
from nominatim import version
@@ -140,60 +140,6 @@ class CommandlineParser:
#
# No need to document the functions each time.
# pylint: disable=C0111
class QueryExport:
"""\
Export addresses as CSV file from the database.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Output arguments')
group.add_argument('--output-type', default='street',
choices=('continent', 'country', 'state', 'county',
'city', 'suburb', 'street', 'path'),
help='Type of places to output (default: street)')
group.add_argument('--output-format',
default='street;suburb;city;county;state;country',
help=("Semicolon-separated list of address types "
"(see --output-type). Multiple ranks can be "
"merged into one column by simply using a "
"comma-separated list."))
group.add_argument('--output-all-postcodes', action='store_true',
help=("List all postcodes for address instead of "
"just the most likely one"))
group.add_argument('--language',
help=("Preferred language for output "
"(use local name, if omitted)"))
group = parser.add_argument_group('Filter arguments')
group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
help='Export only objects within country')
group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
help='Export only children of this OSM node')
group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
help='Export only children of this OSM way')
group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
help='Export only children of this OSM relation')
def run(self, args: NominatimArgs) -> int:
params: List[Union[int, str]] = [
'--output-type', args.output_type,
'--output-format', args.output_format]
if args.output_all_postcodes:
params.append('--output-all-postcodes')
if args.language:
params.extend(('--language', args.language))
if args.restrict_to_country:
params.extend(('--restrict-to-country', args.restrict_to_country))
if args.restrict_to_osm_node:
params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node))
if args.restrict_to_osm_way:
params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way))
if args.restrict_to_osm_relation:
params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation))
return run_legacy_script('export.php', *params, config=args.config)
class AdminServe:
"""\
Start a simple web server for serving the API.
@@ -260,7 +206,7 @@ def get_set_parser() -> CommandlineParser:
parser.add_subcommand('admin', clicmd.AdminFuncs())
parser.add_subcommand('export', QueryExport())
parser.add_subcommand('export', clicmd.QueryExport())
parser.add_subcommand('serve', AdminServe())
parser.add_subcommand('search', clicmd.APISearch())

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Subcommand definitions for the command-line tool.
@@ -24,3 +24,4 @@ from nominatim.clicmd.add_data import UpdateAddData as UpdateAddData
from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
from nominatim.clicmd.export import QueryExport as QueryExport

View File

@@ -100,9 +100,6 @@ class NominatimArgs:
output_all_postcodes: bool
language: Optional[str]
restrict_to_country: Optional[str]
restrict_to_osm_node: Optional[int]
restrict_to_osm_way: Optional[int]
restrict_to_osm_relation: Optional[int]
# Arguments to 'refresh'
postcodes: bool

193
nominatim/clicmd/export.py Normal file
View File

@@ -0,0 +1,193 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'export' subcommand.
"""
from typing import Optional, List, cast
import logging
import argparse
import asyncio
import csv
import sys
import sqlalchemy as sa
from nominatim.clicmd.args import NominatimArgs
import nominatim.api as napi
from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
from nominatim.api.types import LookupDetails
from nominatim.errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
# Needed for SQLAlchemy
# pylint: disable=singleton-comparison
LOG = logging.getLogger()
RANK_RANGE_MAP = {
'country': (4, 4),
'state': (5, 9),
'county': (10, 12),
'city': (13, 16),
'suburb': (17, 21),
'street': (26, 26),
'path': (27, 27)
}
RANK_TO_OUTPUT_MAP = {
4: 'country',
5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
10: 'county', 11: 'county', 12: 'county',
13: 'city', 14: 'city', 15: 'city', 16: 'city',
17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
26: 'street', 27: 'path'}
class QueryExport:
"""\
Export places as CSV file from the database.
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_argument_group('Output arguments')
group.add_argument('--output-type', default='street',
choices=('country', 'state', 'county',
'city', 'suburb', 'street', 'path'),
help='Type of places to output (default: street)')
group.add_argument('--output-format',
default='street;suburb;city;county;state;country',
help=("Semicolon-separated list of address types "
"(see --output-type)."))
group.add_argument('--language',
help=("Preferred language for output "
"(use local name, if omitted)"))
group = parser.add_argument_group('Filter arguments')
group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
help='Export only objects within country')
group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
dest='node',
help='Export only children of this OSM node')
group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
dest='way',
help='Export only children of this OSM way')
group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
dest='relation',
help='Export only children of this OSM relation')
def run(self, args: NominatimArgs) -> int:
return asyncio.run(export(args))
async def export(args: NominatimArgs) -> int:
""" The actual export as a asynchronous function.
"""
api = napi.NominatimAPIAsync(args.project_dir)
output_range = RANK_RANGE_MAP[args.output_type]
writer = init_csv_writer(args.output_format)
async with api.begin() as conn, api.begin() as detail_conn:
t = conn.t.placex
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.admin_level,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia, t.c.indexed_date,
t.c.rank_address, t.c.rank_search,
t.c.centroid)\
.where(t.c.linked_place_id == None)\
.where(t.c.rank_address.between(*output_range))
parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
if parent_place_id:
taddr = conn.t.addressline
sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
.where(taddr.c.address_place_id == parent_place_id)\
.where(taddr.c.isaddress)
if args.restrict_to_country:
sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
results = []
for row in await conn.execute(sql):
result = create_from_placex_row(row, ReverseResult)
if result is not None:
results.append(result)
if len(results) == 1000:
await dump_results(detail_conn, results, writer, args.language)
results = []
if results:
await dump_results(detail_conn, results, writer, args.language)
return 0
def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
fields = output_format.split(';')
writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
writer.writeheader()
return writer
async def dump_results(conn: napi.SearchConnection,
results: List[ReverseResult],
writer: 'csv.DictWriter[str]',
lang: Optional[str]) -> None:
await add_result_details(conn, results,
LookupDetails(address_details=True))
locale = napi.Locales([lang] if lang else None)
for result in results:
data = {'placeid': result.place_id,
'postcode': result.postcode}
result.localize(locale)
for line in (result.address_rows or []):
if line.isaddress and line.local_name\
and line.rank_address in RANK_TO_OUTPUT_MAP:
data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
writer.writerow(data)
async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
way_id: Optional[int],
relation_id: Optional[int]) -> Optional[int]:
""" Get the place ID for the given OSM object.
"""
if node_id is not None:
osm_type, osm_id = 'N', node_id
elif way_id is not None:
osm_type, osm_id = 'W', way_id
elif relation_id is not None:
osm_type, osm_id = 'R', relation_id
else:
return None
t = conn.t.placex
sql = sa.select(t.c.place_id).limit(1)\
.where(t.c.osm_type == osm_type)\
.where(t.c.osm_id == osm_id)\
.where(t.c.rank_address > 0)\
.order_by(t.c.rank_address)
for result in await conn.execute(sql):
return cast(int, result[0])
raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')