Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.

This commit is contained in:
AntoJvlt
2021-03-22 00:07:55 +01:00
parent 1a93319093
commit 6d56cbb3e8
8 changed files with 246 additions and 39 deletions

View File

@@ -216,7 +216,7 @@ endif()
include(GNUInstallDirs)
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
if (BUILD_IMPORTER)
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
@@ -257,9 +257,8 @@ if (BUILD_API)
endif()
install(FILES settings/env.defaults
settings/__init__.py
settings/address-levels.json
settings/phrase_settings.py
settings/phrase-settings.json
settings/import-admin.style
settings/import-street.style
settings/import-address.style

View File

@@ -0,0 +1,163 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
ini_set('memory_limit', '800M');
ini_set('display_errors', 'stderr');
$aCMDOptions
= array(
'Import and export special phrases',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
setupHTTPProxy();
include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
if ($aCMDResult['wiki-import']) {
$oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
$aPairs = array();
$sLanguageIn = getSetting(
'LANGUAGES',
'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
);
foreach (explode(',', $sLanguageIn) as $sLanguage) {
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
$sWikiPageXML = file_get_contents($sURL);
if (!preg_match_all(
'#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
$sWikiPageXML,
$aMatches,
PREG_SET_ORDER
)) {
continue;
}
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
if ($oNormalizer !== null) {
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
} else {
$sTrans = null;
}
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
// quotes into the wiki
$sType = preg_replace('/(&quot;|")/', '', $sType);
// sanity check, in case somebody added garbage in the wiki
if (preg_match('/^\\w+$/', $sClass) < 1
|| preg_match('/^\\w+$/', $sType) < 1
) {
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
exit;
}
// blacklisting: disallow certain class/type combinations
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
continue;
}
// whitelisting: if class is in whitelist, allow only tags in the list
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
continue;
}
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
switch (trim($aMatch[4])) {
case 'near':
printf(
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
case 'in':
printf(
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
default:
printf(
"SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
}
}
}
echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
foreach ($aPairs as $aPair) {
$sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
if ($sql_tablespace) {
$sql_tablespace = ' TABLESPACE '.$sql_tablespace;
}
printf(
'CREATE TABLE place_classtype_%s_%s'
. $sql_tablespace
. ' AS'
. ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
. " WHERE class = '%s' AND type = '%s'"
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'CREATE INDEX idx_place_classtype_%s_%s_centroid'
. ' ON place_classtype_%s_%s USING GIST (centroid)'
. $sql_tablespace
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'CREATE INDEX idx_place_classtype_%s_%s_place_id'
. ' ON place_classtype_%s_%s USING btree(place_id)'
. $sql_tablespace
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
getSetting('DATABASE_WEBUSER')
);
}
echo 'DROP INDEX idx_placex_classtype;';
}

View File

@@ -1,9 +1,16 @@
<?php
$phpPhraseSettingsFile = $argv[1];
<<<<<<< HEAD
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
=======
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile)."/".basename($phpPhraseSettingsFile, ".php").".json";
if(file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile))
{
>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
include $phpPhraseSettingsFile;
$data = array();
@@ -16,4 +23,8 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
fwrite($jsonFile, json_encode($data));
fclose($jsonFile);
<<<<<<< HEAD
}
=======
}
>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.

View File

@@ -25,5 +25,5 @@ class ImportSpecialPhrases:
if args.from_wiki:
LOG.warning('Special phrases importation starting')
with connect(args.config.get_libpq_dsn()) as db_connection:
import_from_wiki(args.config, db_connection)
import_from_wiki(args, db_connection)
return 0

View File

@@ -2,26 +2,32 @@
Functions to import special phrases into the database.
"""
import logging
import os
import re
import subprocess
import sys
import json
from os.path import isfile
from psycopg2.sql import Identifier, Literal, SQL
from settings.phrase_settings import BLACK_LIST, WHITE_LIST
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
def import_from_wiki(config, db_connection, languages=None):
def import_from_wiki(args, db_connection, languages=None):
# pylint: disable-msg=too-many-locals
"""
Iterate through all specified languages and
extract corresponding special phrases from the wiki.
"""
black_list, white_list = _load_white_and_black_lists(args)
#Compile the match regex to increase performance for the following loop.
occurence_pattern = re.compile(
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
)
sanity_check_pattern = re.compile(r'^\w+$')
languages = _get_languages(config) if not languages else languages
languages = _get_languages(args.config) if not languages else languages
#array for pairs of class/type
pairs = dict()
@@ -43,10 +49,10 @@ def import_from_wiki(config, db_connection, languages=None):
_check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
#blacklisting: disallow certain class/type combinations
if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
continue
#whitelisting: if class is in whitelist, allow only tags in the list
if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
continue
#add class/type to the pairs dict
@@ -56,10 +62,23 @@ def import_from_wiki(config, db_connection, languages=None):
db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
)
_create_place_classtype_table_and_indexes(db_connection, config, pairs)
_create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
db_connection.commit()
LOG.warning('Import done.')
def _load_white_and_black_lists(args):
"""
Load white and black lists from phrases-settings.json.
"""
config = args.config
settings_path = str(config.config_dir)+'/phrase-settings.json'
if config.PHRASE_CONFIG:
settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
with open(settings_path, "r") as json_settings:
settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
def _get_languages(config):
"""
@@ -199,3 +218,22 @@ def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
Identifier(config.DATABASE_WEBUSER)))
def _convert_php_settings_if_needed(args, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
"""
file, extension = os.path.splitext(file_path)
json_file_path = f'{file}.json'
if extension == '.php' and not isfile(json_file_path):
try:
subprocess.run(['/usr/bin/env', 'php', '-Cq',
args.phplib_dir / 'migration/phraseSettingsToJson.php',
file_path], check=True)
LOG.warning('special_phrase configuration file has been converted to json.')
return json_file_path
except subprocess.CalledProcessError:
LOG.error('Error while converting %s to json.', file_path)
raise
else:
return json_file_path

View File

@@ -1,3 +0,0 @@
"""
Module for settings
"""

View File

@@ -0,0 +1,25 @@
{
"Comments": [
"Black list correspond to class/type combinations to exclude",
"If a class is in the white list then all types will",
"be ignored except the ones given in the list.",
"Also use this list to exclude an entire class from special phrases."
],
"blackList": {
"bounday": [
"administrative"
],
"place": [
"house",
"houses"
]
},
"whiteList": {
"highway": [
"bus_stop",
"rest_area",
"raceway'"
],
"building": []
}
}

View File

@@ -1,26 +0,0 @@
"""
These settings control the import of special phrases from the wiki.
"""
#class/type combinations to exclude
BLACK_LIST = {
'bounday': [
'administrative'
],
'place': [
'house',
'houses'
]
}
#If a class is in the white list then all types will
#be ignored except the ones given in the list.
#Also use this list to exclude an entire class from
#special phrases.
WHITE_LIST = {
'highway': [
'bus_stop',
'rest_area',
'raceway'
],
'building': []
}