forked from hans/Nominatim
Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
This commit is contained in:
@@ -216,7 +216,7 @@ endif()
|
||||
include(GNUInstallDirs)
|
||||
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
|
||||
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
|
||||
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
|
||||
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
|
||||
|
||||
if (BUILD_IMPORTER)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
|
||||
@@ -257,9 +257,8 @@ if (BUILD_API)
|
||||
endif()
|
||||
|
||||
install(FILES settings/env.defaults
|
||||
settings/__init__.py
|
||||
settings/address-levels.json
|
||||
settings/phrase_settings.py
|
||||
settings/phrase-settings.json
|
||||
settings/import-admin.style
|
||||
settings/import-street.style
|
||||
settings/import-address.style
|
||||
|
||||
163
lib-php/admin/specialphrases.php
Normal file
163
lib-php/admin/specialphrases.php
Normal file
@@ -0,0 +1,163 @@
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
ini_set('display_errors', 'stderr');
|
||||
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Import and export special phrases',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
setupHTTPProxy();
|
||||
|
||||
include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
|
||||
|
||||
if ($aCMDResult['wiki-import']) {
|
||||
$oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
|
||||
$aPairs = array();
|
||||
|
||||
$sLanguageIn = getSetting(
|
||||
'LANGUAGES',
|
||||
'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
|
||||
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
|
||||
);
|
||||
|
||||
foreach (explode(',', $sLanguageIn) as $sLanguage) {
|
||||
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
|
||||
$sWikiPageXML = file_get_contents($sURL);
|
||||
|
||||
if (!preg_match_all(
|
||||
'#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
|
||||
$sWikiPageXML,
|
||||
$aMatches,
|
||||
PREG_SET_ORDER
|
||||
)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($aMatches as $aMatch) {
|
||||
$sLabel = trim($aMatch[1]);
|
||||
if ($oNormalizer !== null) {
|
||||
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
|
||||
} else {
|
||||
$sTrans = null;
|
||||
}
|
||||
$sClass = trim($aMatch[2]);
|
||||
$sType = trim($aMatch[3]);
|
||||
// hack around a bug where building=yes was imported with
|
||||
// quotes into the wiki
|
||||
$sType = preg_replace('/("|")/', '', $sType);
|
||||
// sanity check, in case somebody added garbage in the wiki
|
||||
if (preg_match('/^\\w+$/', $sClass) < 1
|
||||
|| preg_match('/^\\w+$/', $sType) < 1
|
||||
) {
|
||||
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
|
||||
exit;
|
||||
}
|
||||
// blacklisting: disallow certain class/type combinations
|
||||
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
|
||||
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
|
||||
continue;
|
||||
}
|
||||
// whitelisting: if class is in whitelist, allow only tags in the list
|
||||
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
|
||||
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
|
||||
continue;
|
||||
}
|
||||
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
|
||||
|
||||
switch (trim($aMatch[4])) {
|
||||
case 'near':
|
||||
printf(
|
||||
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
|
||||
pg_escape_string($sLabel),
|
||||
$sTrans,
|
||||
$sClass,
|
||||
$sType
|
||||
);
|
||||
break;
|
||||
case 'in':
|
||||
printf(
|
||||
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
|
||||
pg_escape_string($sLabel),
|
||||
$sTrans,
|
||||
$sClass,
|
||||
$sType
|
||||
);
|
||||
break;
|
||||
default:
|
||||
printf(
|
||||
"SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
|
||||
pg_escape_string($sLabel),
|
||||
$sTrans,
|
||||
$sClass,
|
||||
$sType
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
|
||||
|
||||
foreach ($aPairs as $aPair) {
|
||||
$sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
|
||||
if ($sql_tablespace) {
|
||||
$sql_tablespace = ' TABLESPACE '.$sql_tablespace;
|
||||
}
|
||||
|
||||
printf(
|
||||
'CREATE TABLE place_classtype_%s_%s'
|
||||
. $sql_tablespace
|
||||
. ' AS'
|
||||
. ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
|
||||
. " WHERE class = '%s' AND type = '%s'"
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1])
|
||||
);
|
||||
|
||||
printf(
|
||||
'CREATE INDEX idx_place_classtype_%s_%s_centroid'
|
||||
. ' ON place_classtype_%s_%s USING GIST (centroid)'
|
||||
. $sql_tablespace
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1])
|
||||
);
|
||||
|
||||
printf(
|
||||
'CREATE INDEX idx_place_classtype_%s_%s_place_id'
|
||||
. ' ON place_classtype_%s_%s USING btree(place_id)'
|
||||
. $sql_tablespace
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1])
|
||||
);
|
||||
|
||||
printf(
|
||||
'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
getSetting('DATABASE_WEBUSER')
|
||||
);
|
||||
}
|
||||
|
||||
echo 'DROP INDEX idx_placex_classtype;';
|
||||
}
|
||||
@@ -1,9 +1,16 @@
|
||||
<?php
|
||||
|
||||
$phpPhraseSettingsFile = $argv[1];
|
||||
<<<<<<< HEAD
|
||||
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
|
||||
|
||||
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
|
||||
=======
|
||||
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile)."/".basename($phpPhraseSettingsFile, ".php").".json";
|
||||
|
||||
if(file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile))
|
||||
{
|
||||
>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
|
||||
include $phpPhraseSettingsFile;
|
||||
|
||||
$data = array();
|
||||
@@ -16,4 +23,8 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
|
||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
||||
fwrite($jsonFile, json_encode($data));
|
||||
fclose($jsonFile);
|
||||
<<<<<<< HEAD
|
||||
}
|
||||
=======
|
||||
}
|
||||
>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
|
||||
|
||||
@@ -25,5 +25,5 @@ class ImportSpecialPhrases:
|
||||
if args.from_wiki:
|
||||
LOG.warning('Special phrases importation starting')
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
import_from_wiki(args.config, db_connection)
|
||||
import_from_wiki(args, db_connection)
|
||||
return 0
|
||||
|
||||
@@ -2,26 +2,32 @@
|
||||
Functions to import special phrases into the database.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import json
|
||||
from os.path import isfile
|
||||
from psycopg2.sql import Identifier, Literal, SQL
|
||||
from settings.phrase_settings import BLACK_LIST, WHITE_LIST
|
||||
from nominatim.tools.exec_utils import get_url
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def import_from_wiki(config, db_connection, languages=None):
|
||||
def import_from_wiki(args, db_connection, languages=None):
|
||||
# pylint: disable-msg=too-many-locals
|
||||
"""
|
||||
Iterate through all specified languages and
|
||||
extract corresponding special phrases from the wiki.
|
||||
"""
|
||||
black_list, white_list = _load_white_and_black_lists(args)
|
||||
|
||||
#Compile the match regex to increase performance for the following loop.
|
||||
occurence_pattern = re.compile(
|
||||
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
|
||||
)
|
||||
sanity_check_pattern = re.compile(r'^\w+$')
|
||||
|
||||
languages = _get_languages(config) if not languages else languages
|
||||
languages = _get_languages(args.config) if not languages else languages
|
||||
|
||||
#array for pairs of class/type
|
||||
pairs = dict()
|
||||
@@ -43,10 +49,10 @@ def import_from_wiki(config, db_connection, languages=None):
|
||||
_check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
|
||||
|
||||
#blacklisting: disallow certain class/type combinations
|
||||
if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
|
||||
if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
|
||||
continue
|
||||
#whitelisting: if class is in whitelist, allow only tags in the list
|
||||
if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
|
||||
if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
|
||||
continue
|
||||
|
||||
#add class/type to the pairs dict
|
||||
@@ -56,10 +62,23 @@ def import_from_wiki(config, db_connection, languages=None):
|
||||
db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
|
||||
)
|
||||
|
||||
_create_place_classtype_table_and_indexes(db_connection, config, pairs)
|
||||
_create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
|
||||
db_connection.commit()
|
||||
LOG.warning('Import done.')
|
||||
|
||||
def _load_white_and_black_lists(args):
|
||||
"""
|
||||
Load white and black lists from phrases-settings.json.
|
||||
"""
|
||||
config = args.config
|
||||
settings_path = str(config.config_dir)+'/phrase-settings.json'
|
||||
|
||||
if config.PHRASE_CONFIG:
|
||||
settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
|
||||
|
||||
with open(settings_path, "r") as json_settings:
|
||||
settings = json.load(json_settings)
|
||||
return settings['blackList'], settings['whiteList']
|
||||
|
||||
def _get_languages(config):
|
||||
"""
|
||||
@@ -199,3 +218,22 @@ def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
|
||||
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
|
||||
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
||||
Identifier(config.DATABASE_WEBUSER)))
|
||||
|
||||
def _convert_php_settings_if_needed(args, file_path):
|
||||
"""
|
||||
Convert php settings file of special phrases to json file if it is still in php format.
|
||||
"""
|
||||
file, extension = os.path.splitext(file_path)
|
||||
json_file_path = f'{file}.json'
|
||||
if extension == '.php' and not isfile(json_file_path):
|
||||
try:
|
||||
subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
||||
args.phplib_dir / 'migration/phraseSettingsToJson.php',
|
||||
file_path], check=True)
|
||||
LOG.warning('special_phrase configuration file has been converted to json.')
|
||||
return json_file_path
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.error('Error while converting %s to json.', file_path)
|
||||
raise
|
||||
else:
|
||||
return json_file_path
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
"""
|
||||
Module for settings
|
||||
"""
|
||||
25
settings/phrase-settings.json
Normal file
25
settings/phrase-settings.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"Comments": [
|
||||
"Black list correspond to class/type combinations to exclude",
|
||||
"If a class is in the white list then all types will",
|
||||
"be ignored except the ones given in the list.",
|
||||
"Also use this list to exclude an entire class from special phrases."
|
||||
],
|
||||
"blackList": {
|
||||
"bounday": [
|
||||
"administrative"
|
||||
],
|
||||
"place": [
|
||||
"house",
|
||||
"houses"
|
||||
]
|
||||
},
|
||||
"whiteList": {
|
||||
"highway": [
|
||||
"bus_stop",
|
||||
"rest_area",
|
||||
"raceway'"
|
||||
],
|
||||
"building": []
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
"""
|
||||
These settings control the import of special phrases from the wiki.
|
||||
"""
|
||||
#class/type combinations to exclude
|
||||
BLACK_LIST = {
|
||||
'bounday': [
|
||||
'administrative'
|
||||
],
|
||||
'place': [
|
||||
'house',
|
||||
'houses'
|
||||
]
|
||||
}
|
||||
|
||||
#If a class is in the white list then all types will
|
||||
#be ignored except the ones given in the list.
|
||||
#Also use this list to exclude an entire class from
|
||||
#special phrases.
|
||||
WHITE_LIST = {
|
||||
'highway': [
|
||||
'bus_stop',
|
||||
'rest_area',
|
||||
'raceway'
|
||||
],
|
||||
'building': []
|
||||
}
|
||||
Reference in New Issue
Block a user