replace NOMINATIM_PHRASE_CONFIG with command line option

This commit is contained in:
Sarah Hoffmann
2021-10-22 14:41:14 +02:00
parent cefae021db
commit c77df2d1eb
9 changed files with 43 additions and 146 deletions

View File

@@ -35,6 +35,13 @@ class ImportSpecialPhrases:
An example file can be found in the Nominatim sources at
'test/testdb/full_en_phrases_test.csv'.
The import can be further configured to ignore specific key/value pairs.
This is particularly useful when importing phrases from the wiki. The
default configuration excludes some very common tags like building=yes.
The configuration can be customized by putting a file `phrase-settings.json`
with custom rules into the project directory or by using the `--config`
option to point to another configuration file.
"""
@staticmethod
def add_args(parser):
@@ -45,6 +52,9 @@ class ImportSpecialPhrases:
help='Import special phrases from a CSV file')
group.add_argument('--no-replace', action='store_true',
help='Keep the old phrases and only add the new ones')
group.add_argument('--config', action='store',
help='Configuration file for black/white listing '
'(default: phrase-settings.json)')
@staticmethod
def run(args):
@@ -72,5 +82,5 @@ class ImportSpecialPhrases:
should_replace = not args.no_replace
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, args.phplib_dir, db_connection, loader
args.config, db_connection, loader
).import_phrases(tokenizer, should_replace)

View File

@@ -4,6 +4,7 @@ Nominatim configuration accessor.
import logging
import os
from pathlib import Path
import json
import yaml
from dotenv import dotenv_values
@@ -161,14 +162,19 @@ class Configuration:
is loaded using this function and added at the position in the
configuration tree.
"""
assert Path(filename).suffix == '.yaml'
configfile = self.find_config_file(filename, config)
configfile = self._find_config_file(filename, config)
if configfile.suffix in ('.yaml', '.yml'):
return self._load_from_yaml(configfile)
return self._load_from_yaml(configfile)
if configfile.suffix == '.json':
with configfile.open('r') as cfg:
return json.load(cfg)
raise UsageError(f"Config file '{configfile}' has unknown format.")
def _find_config_file(self, filename, config=None):
def find_config_file(self, filename, config=None):
""" Resolve the location of a configuration file given a filename and
an optional configuration option with the file name.
Raises a UsageError when the file cannot be found or is not
@@ -221,7 +227,7 @@ class Configuration:
if Path(fname).is_absolute():
configfile = Path(fname)
else:
configfile = self._find_config_file(loader.construct_scalar(node))
configfile = self.find_config_file(loader.construct_scalar(node))
if configfile.suffix != '.yaml':
LOG.fatal("Format error while reading '%s': only YAML format supported.",

View File

@@ -8,15 +8,9 @@
valids anymore are removed.
"""
import logging
import os
from os.path import isfile
from pathlib import Path
import re
import subprocess
import json
from psycopg2.sql import Identifier, Literal, SQL
from nominatim.errors import UsageError
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
@@ -33,9 +27,8 @@ class SPImporter():
Take a sp loader which load the phrases from an external source.
"""
def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None:
def __init__(self, config, db_connection, sp_loader) -> None:
self.config = config
self.phplib_dir = phplib_dir
self.db_connection = db_connection
self.sp_loader = sp_loader
self.statistics_handler = SpecialPhrasesImporterStatistics()
@@ -101,13 +94,8 @@ class SPImporter():
"""
Load white and black lists from phrases-settings.json.
"""
settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
settings = self.config.load_sub_configuration('phrase-settings.json')
if self.config.PHRASE_CONFIG:
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
with settings_path.open("r") as json_settings:
settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
def _check_sanity(self, phrase):
@@ -255,29 +243,3 @@ class SPImporter():
for table in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_deleted()
db_cursor.drop_table(table)
def _convert_php_settings_if_needed(self, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
"""
if not isfile(file_path):
raise UsageError(str(file_path) + ' is not a valid file.')
file, extension = os.path.splitext(file_path)
json_file_path = Path(file + '.json').resolve()
if extension not in ('.php', '.json'):
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
if extension == '.php' and not isfile(json_file_path):
try:
subprocess.run(['/usr/bin/env', 'php', '-Cq',
(self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
file_path], check=True)
LOG.warning('special_phrase configuration file has been converted to json.')
except subprocess.CalledProcessError:
LOG.error('Error while converting %s to json.', file_path)
raise
return json_file_path