mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
This commit is contained in:
@@ -216,7 +216,7 @@ endif()
|
|||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
|
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
|
||||||
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
|
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
|
||||||
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
|
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
|
||||||
|
|
||||||
if (BUILD_IMPORTER)
|
if (BUILD_IMPORTER)
|
||||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
|
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
|
||||||
@@ -257,9 +257,8 @@ if (BUILD_API)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
install(FILES settings/env.defaults
|
install(FILES settings/env.defaults
|
||||||
settings/__init__.py
|
|
||||||
settings/address-levels.json
|
settings/address-levels.json
|
||||||
settings/phrase_settings.py
|
settings/phrase-settings.json
|
||||||
settings/import-admin.style
|
settings/import-admin.style
|
||||||
settings/import-street.style
|
settings/import-street.style
|
||||||
settings/import-address.style
|
settings/import-address.style
|
||||||
|
|||||||
163
lib-php/admin/specialphrases.php
Normal file
163
lib-php/admin/specialphrases.php
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
<?php
|
||||||
|
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||||
|
|
||||||
|
require_once(CONST_LibDir.'/init-cmd.php');
|
||||||
|
ini_set('memory_limit', '800M');
|
||||||
|
ini_set('display_errors', 'stderr');
|
||||||
|
|
||||||
|
$aCMDOptions
|
||||||
|
= array(
|
||||||
|
'Import and export special phrases',
|
||||||
|
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||||
|
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||||
|
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||||
|
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
|
||||||
|
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||||
|
);
|
||||||
|
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||||
|
|
||||||
|
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||||
|
setupHTTPProxy();
|
||||||
|
|
||||||
|
include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
|
||||||
|
|
||||||
|
if ($aCMDResult['wiki-import']) {
|
||||||
|
$oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
|
||||||
|
$aPairs = array();
|
||||||
|
|
||||||
|
$sLanguageIn = getSetting(
|
||||||
|
'LANGUAGES',
|
||||||
|
'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
|
||||||
|
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach (explode(',', $sLanguageIn) as $sLanguage) {
|
||||||
|
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
|
||||||
|
$sWikiPageXML = file_get_contents($sURL);
|
||||||
|
|
||||||
|
if (!preg_match_all(
|
||||||
|
'#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
|
||||||
|
$sWikiPageXML,
|
||||||
|
$aMatches,
|
||||||
|
PREG_SET_ORDER
|
||||||
|
)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($aMatches as $aMatch) {
|
||||||
|
$sLabel = trim($aMatch[1]);
|
||||||
|
if ($oNormalizer !== null) {
|
||||||
|
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
|
||||||
|
} else {
|
||||||
|
$sTrans = null;
|
||||||
|
}
|
||||||
|
$sClass = trim($aMatch[2]);
|
||||||
|
$sType = trim($aMatch[3]);
|
||||||
|
// hack around a bug where building=yes was imported with
|
||||||
|
// quotes into the wiki
|
||||||
|
$sType = preg_replace('/("|")/', '', $sType);
|
||||||
|
// sanity check, in case somebody added garbage in the wiki
|
||||||
|
if (preg_match('/^\\w+$/', $sClass) < 1
|
||||||
|
|| preg_match('/^\\w+$/', $sType) < 1
|
||||||
|
) {
|
||||||
|
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
|
||||||
|
exit;
|
||||||
|
}
|
||||||
|
// blacklisting: disallow certain class/type combinations
|
||||||
|
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
|
||||||
|
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// whitelisting: if class is in whitelist, allow only tags in the list
|
||||||
|
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
|
||||||
|
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
|
||||||
|
|
||||||
|
switch (trim($aMatch[4])) {
|
||||||
|
case 'near':
|
||||||
|
printf(
|
||||||
|
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
|
||||||
|
pg_escape_string($sLabel),
|
||||||
|
$sTrans,
|
||||||
|
$sClass,
|
||||||
|
$sType
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
case 'in':
|
||||||
|
printf(
|
||||||
|
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
|
||||||
|
pg_escape_string($sLabel),
|
||||||
|
$sTrans,
|
||||||
|
$sClass,
|
||||||
|
$sType
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printf(
|
||||||
|
"SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
|
||||||
|
pg_escape_string($sLabel),
|
||||||
|
$sTrans,
|
||||||
|
$sClass,
|
||||||
|
$sType
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
|
||||||
|
|
||||||
|
foreach ($aPairs as $aPair) {
|
||||||
|
$sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
|
||||||
|
if ($sql_tablespace) {
|
||||||
|
$sql_tablespace = ' TABLESPACE '.$sql_tablespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(
|
||||||
|
'CREATE TABLE place_classtype_%s_%s'
|
||||||
|
. $sql_tablespace
|
||||||
|
. ' AS'
|
||||||
|
. ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
|
||||||
|
. " WHERE class = '%s' AND type = '%s'"
|
||||||
|
. ";\n",
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1]),
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
printf(
|
||||||
|
'CREATE INDEX idx_place_classtype_%s_%s_centroid'
|
||||||
|
. ' ON place_classtype_%s_%s USING GIST (centroid)'
|
||||||
|
. $sql_tablespace
|
||||||
|
. ";\n",
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1]),
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
printf(
|
||||||
|
'CREATE INDEX idx_place_classtype_%s_%s_place_id'
|
||||||
|
. ' ON place_classtype_%s_%s USING btree(place_id)'
|
||||||
|
. $sql_tablespace
|
||||||
|
. ";\n",
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1]),
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
printf(
|
||||||
|
'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
|
||||||
|
. ";\n",
|
||||||
|
pg_escape_string($aPair[0]),
|
||||||
|
pg_escape_string($aPair[1]),
|
||||||
|
getSetting('DATABASE_WEBUSER')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
echo 'DROP INDEX idx_placex_classtype;';
|
||||||
|
}
|
||||||
@@ -1,9 +1,16 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
$phpPhraseSettingsFile = $argv[1];
|
$phpPhraseSettingsFile = $argv[1];
|
||||||
|
<<<<<<< HEAD
|
||||||
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
|
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
|
||||||
|
|
||||||
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
|
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
|
||||||
|
=======
|
||||||
|
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile)."/".basename($phpPhraseSettingsFile, ".php").".json";
|
||||||
|
|
||||||
|
if(file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile))
|
||||||
|
{
|
||||||
|
>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
|
||||||
include $phpPhraseSettingsFile;
|
include $phpPhraseSettingsFile;
|
||||||
|
|
||||||
$data = array();
|
$data = array();
|
||||||
@@ -16,4 +23,8 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
|
|||||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
||||||
fwrite($jsonFile, json_encode($data));
|
fwrite($jsonFile, json_encode($data));
|
||||||
fclose($jsonFile);
|
fclose($jsonFile);
|
||||||
|
<<<<<<< HEAD
|
||||||
}
|
}
|
||||||
|
=======
|
||||||
|
}
|
||||||
|
>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
|
||||||
|
|||||||
@@ -25,5 +25,5 @@ class ImportSpecialPhrases:
|
|||||||
if args.from_wiki:
|
if args.from_wiki:
|
||||||
LOG.warning('Special phrases importation starting')
|
LOG.warning('Special phrases importation starting')
|
||||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||||
import_from_wiki(args.config, db_connection)
|
import_from_wiki(args, db_connection)
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
@@ -2,26 +2,32 @@
|
|||||||
Functions to import special phrases into the database.
|
Functions to import special phrases into the database.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import json
|
||||||
|
from os.path import isfile
|
||||||
from psycopg2.sql import Identifier, Literal, SQL
|
from psycopg2.sql import Identifier, Literal, SQL
|
||||||
from settings.phrase_settings import BLACK_LIST, WHITE_LIST
|
|
||||||
from nominatim.tools.exec_utils import get_url
|
from nominatim.tools.exec_utils import get_url
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
def import_from_wiki(config, db_connection, languages=None):
|
def import_from_wiki(args, db_connection, languages=None):
|
||||||
|
# pylint: disable-msg=too-many-locals
|
||||||
"""
|
"""
|
||||||
Iterate through all specified languages and
|
Iterate through all specified languages and
|
||||||
extract corresponding special phrases from the wiki.
|
extract corresponding special phrases from the wiki.
|
||||||
"""
|
"""
|
||||||
|
black_list, white_list = _load_white_and_black_lists(args)
|
||||||
|
|
||||||
#Compile the match regex to increase performance for the following loop.
|
#Compile the match regex to increase performance for the following loop.
|
||||||
occurence_pattern = re.compile(
|
occurence_pattern = re.compile(
|
||||||
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
|
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
|
||||||
)
|
)
|
||||||
sanity_check_pattern = re.compile(r'^\w+$')
|
sanity_check_pattern = re.compile(r'^\w+$')
|
||||||
|
|
||||||
languages = _get_languages(config) if not languages else languages
|
languages = _get_languages(args.config) if not languages else languages
|
||||||
|
|
||||||
#array for pairs of class/type
|
#array for pairs of class/type
|
||||||
pairs = dict()
|
pairs = dict()
|
||||||
@@ -43,10 +49,10 @@ def import_from_wiki(config, db_connection, languages=None):
|
|||||||
_check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
|
_check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
|
||||||
|
|
||||||
#blacklisting: disallow certain class/type combinations
|
#blacklisting: disallow certain class/type combinations
|
||||||
if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
|
if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
|
||||||
continue
|
continue
|
||||||
#whitelisting: if class is in whitelist, allow only tags in the list
|
#whitelisting: if class is in whitelist, allow only tags in the list
|
||||||
if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
|
if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
#add class/type to the pairs dict
|
#add class/type to the pairs dict
|
||||||
@@ -56,10 +62,23 @@ def import_from_wiki(config, db_connection, languages=None):
|
|||||||
db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
|
db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
|
||||||
)
|
)
|
||||||
|
|
||||||
_create_place_classtype_table_and_indexes(db_connection, config, pairs)
|
_create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
|
||||||
db_connection.commit()
|
db_connection.commit()
|
||||||
LOG.warning('Import done.')
|
LOG.warning('Import done.')
|
||||||
|
|
||||||
|
def _load_white_and_black_lists(args):
|
||||||
|
"""
|
||||||
|
Load white and black lists from phrases-settings.json.
|
||||||
|
"""
|
||||||
|
config = args.config
|
||||||
|
settings_path = str(config.config_dir)+'/phrase-settings.json'
|
||||||
|
|
||||||
|
if config.PHRASE_CONFIG:
|
||||||
|
settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
|
||||||
|
|
||||||
|
with open(settings_path, "r") as json_settings:
|
||||||
|
settings = json.load(json_settings)
|
||||||
|
return settings['blackList'], settings['whiteList']
|
||||||
|
|
||||||
def _get_languages(config):
|
def _get_languages(config):
|
||||||
"""
|
"""
|
||||||
@@ -199,3 +218,22 @@ def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
|
|||||||
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
|
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
|
||||||
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
||||||
Identifier(config.DATABASE_WEBUSER)))
|
Identifier(config.DATABASE_WEBUSER)))
|
||||||
|
|
||||||
|
def _convert_php_settings_if_needed(args, file_path):
|
||||||
|
"""
|
||||||
|
Convert php settings file of special phrases to json file if it is still in php format.
|
||||||
|
"""
|
||||||
|
file, extension = os.path.splitext(file_path)
|
||||||
|
json_file_path = f'{file}.json'
|
||||||
|
if extension == '.php' and not isfile(json_file_path):
|
||||||
|
try:
|
||||||
|
subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
||||||
|
args.phplib_dir / 'migration/phraseSettingsToJson.php',
|
||||||
|
file_path], check=True)
|
||||||
|
LOG.warning('special_phrase configuration file has been converted to json.')
|
||||||
|
return json_file_path
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
LOG.error('Error while converting %s to json.', file_path)
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
return json_file_path
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
"""
|
|
||||||
Module for settings
|
|
||||||
"""
|
|
||||||
25
settings/phrase-settings.json
Normal file
25
settings/phrase-settings.json
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"Comments": [
|
||||||
|
"Black list correspond to class/type combinations to exclude",
|
||||||
|
"If a class is in the white list then all types will",
|
||||||
|
"be ignored except the ones given in the list.",
|
||||||
|
"Also use this list to exclude an entire class from special phrases."
|
||||||
|
],
|
||||||
|
"blackList": {
|
||||||
|
"bounday": [
|
||||||
|
"administrative"
|
||||||
|
],
|
||||||
|
"place": [
|
||||||
|
"house",
|
||||||
|
"houses"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"whiteList": {
|
||||||
|
"highway": [
|
||||||
|
"bus_stop",
|
||||||
|
"rest_area",
|
||||||
|
"raceway'"
|
||||||
|
],
|
||||||
|
"building": []
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
"""
|
|
||||||
These settings control the import of special phrases from the wiki.
|
|
||||||
"""
|
|
||||||
#class/type combinations to exclude
|
|
||||||
BLACK_LIST = {
|
|
||||||
'bounday': [
|
|
||||||
'administrative'
|
|
||||||
],
|
|
||||||
'place': [
|
|
||||||
'house',
|
|
||||||
'houses'
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
#If a class is in the white list then all types will
|
|
||||||
#be ignored except the ones given in the list.
|
|
||||||
#Also use this list to exclude an entire class from
|
|
||||||
#special phrases.
|
|
||||||
WHITE_LIST = {
|
|
||||||
'highway': [
|
|
||||||
'bus_stop',
|
|
||||||
'rest_area',
|
|
||||||
'raceway'
|
|
||||||
],
|
|
||||||
'building': []
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user