Introduction of SPCsvLoader to load special phrases from a csv file

This commit is contained in:
AntoJvlt
2021-05-10 23:09:00 +02:00
parent 00959fac57
commit 9d83da830f
6 changed files with 91 additions and 27 deletions

View File

@@ -2,11 +2,12 @@
Implementation of the 'special-phrases' command.
"""
import logging
from nominatim.errors import UsageError
from pathlib import Path
from nominatim.tools import SPWikiLoader
from nominatim.tools import SPImporter
from nominatim.errors import UsageError
from nominatim.db.connection import connect
from nominatim.tools.special_phrases.sp_importer import SPImporter
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
LOG = logging.getLogger()
@@ -24,23 +25,33 @@ class ImportSpecialPhrases:
group = parser.add_argument_group('Input arguments')
group.add_argument('--import-from-wiki', action='store_true',
help='Import special phrases from the OSM wiki to the database.')
group.add_argument('--csv-file', metavar='FILE',
help='CSV file containing phrases to import.')
group.add_argument('--import-from-csv', metavar='FILE',
help='Import special phrases from a CSV file.')
@staticmethod
def run(args):
from ..tokenizer import factory as tokenizer_factory
if args.import_from_wiki:
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, args.phplib_dir, db_connection, SPWikiLoader(args.config)
).import_phrases(tokenizer)
ImportSpecialPhrases.start_import(args, SPWikiLoader(args.config))
if args.csv_file:
if not Path(args.csv_file).is_file():
LOG.fatal("CSV file '%s' does not exist.", args.csv_file)
if args.import_from_csv:
if not Path(args.import_from_csv).is_file():
LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv)
raise UsageError('Cannot access file.')
ImportSpecialPhrases.start_import(args, SPCsvLoader(args.import_from_csv))
return 0
@staticmethod
def start_import(args, loader):
"""
Create the SPImporter object containing the right
SPLoader and then start the import of special phrases.
"""
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, args.phplib_dir, db_connection, loader
).import_phrases(tokenizer)

View File

@@ -2,7 +2,3 @@
Module with functions for importing, updating Nominatim databases
as well as general maintenance helpers.
"""
from nominatim.tools.special_phrases.sp_importer import SPImporter
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase

View File

@@ -0,0 +1,54 @@
"""
Module containing the SPCsvLoader class.
The class allows to load phrases from a csv file.
"""
import csv
import os
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tools.special_phrases.sp_loader import SPLoader
from nominatim.errors import UsageError
class SPCsvLoader(SPLoader):
"""
Base class for special phrases loaders.
Handle the loading of special phrases from external sources.
"""
def __init__(self, csv_path):
super().__init__()
self.csv_path = csv_path
self.has_been_read = False
def __next__(self):
if self.has_been_read:
raise StopIteration()
self.has_been_read = True
SPCsvLoader.check_csv_validity(self.csv_path)
return SPCsvLoader.parse_csv(self.csv_path)
@staticmethod
def parse_csv(csv_path):
"""
Open and parse the given csv file.
Create the corresponding SpecialPhrases.
"""
phrases = set()
with open(csv_path) as file:
reader = csv.DictReader(file, delimiter=',')
for row in reader:
phrases.add(
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
)
return phrases
@staticmethod
def check_csv_validity(csv_path):
"""
Check that the csv file has the right extension.
"""
_, extension = os.path.splitext(csv_path)
if extension != '.csv':
raise UsageError('The file {} is not a csv file.'.format(csv_path))

View File

@@ -1,8 +1,8 @@
"""
Module containing the SPWikiLoader class.
"""
import logging
import re
import logging
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tools.special_phrases.sp_loader import SPLoader
from nominatim.tools.exec_utils import get_url

View File

@@ -256,7 +256,7 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
assert rank_mock.called == do_ranks
def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock):
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_from_wiki')
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
call_nominatim('special-phrases', '--import-from-wiki')

View File

@@ -2,14 +2,15 @@
Tests for import special phrases methods
of the class SPImporter.
"""
from nominatim.tools import SpecialPhrase
from nominatim.tools import SPWikiLoader
from nominatim.errors import UsageError
from pathlib import Path
import tempfile
from shutil import copyfile
import pytest
from nominatim.tools import SPImporter
from nominatim.tools.special_phrases.sp_importer import SPImporter
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
TEST_BASE_DIR = Path(__file__) / '..' / '..'
@@ -187,7 +188,7 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, sp_importer,
placex_table, tokenizer_mock):
"""
Check that the main import_from_wiki() method is well executed.
Check that the main import_phrases() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
grand access to the web user and executing the SQL functions for amenities.
It should also update the database well by deleting or preserving existing entries
@@ -200,7 +201,9 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, sp_importer,
CREATE TABLE place_classtype_amenity_animal_shelter();
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
monkeypatch.setattr('nominatim.tools.SPWikiLoader._get_wiki_content', mock_get_wiki_content)
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
mock_get_wiki_content)
tokenizer = tokenizer_mock()
sp_importer.import_phrases(tokenizer)
@@ -244,7 +247,7 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, sp_importer,
else:
assert not temp_db_cursor.fetchone()
def mock_get_wiki_content(self, lang):
def mock_get_wiki_content(lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.