mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 10:27:57 +00:00
Merge pull request #2228 from AntoJvlt/import-special-phrases-porting-python
Import special phrases porting python
This commit is contained in:
2
.github/actions/build-nominatim/action.yml
vendored
2
.github/actions/build-nominatim/action.yml
vendored
@@ -6,7 +6,7 @@ runs:
|
||||
steps:
|
||||
- name: Install prerequisites
|
||||
run: |
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
|
||||
shell: bash
|
||||
|
||||
- name: Download dependencies
|
||||
|
||||
2
.github/workflows/ci-tests.yml
vendored
2
.github/workflows/ci-tests.yml
vendored
@@ -120,7 +120,7 @@ jobs:
|
||||
working-directory: data-env
|
||||
|
||||
- name: Import special phrases
|
||||
run: nominatim special-phrases --from-wiki | psql -d nominatim
|
||||
run: nominatim special-phrases --import-from-wiki
|
||||
working-directory: data-env
|
||||
|
||||
- name: Check import
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,3 +9,4 @@ data/wiki_specialphrases.sql
|
||||
data/osmosischange.osc
|
||||
|
||||
.vagrant
|
||||
data/country_osm_grid.sql.gz
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
[MASTER]
|
||||
|
||||
extension-pkg-whitelist=osmium
|
||||
ignored-modules=icu
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
|
||||
@@ -114,7 +114,6 @@ if (BUILD_IMPORTER)
|
||||
export.php
|
||||
query.php
|
||||
setup.php
|
||||
specialphrases.php
|
||||
update.php
|
||||
warm.php
|
||||
)
|
||||
@@ -259,7 +258,7 @@ endif()
|
||||
|
||||
install(FILES settings/env.defaults
|
||||
settings/address-levels.json
|
||||
settings/phrase_settings.php
|
||||
settings/phrase-settings.json
|
||||
settings/import-admin.style
|
||||
settings/import-street.style
|
||||
settings/import-address.style
|
||||
|
||||
@@ -268,10 +268,9 @@ running this function.
|
||||
|
||||
If you want to be able to search for places by their type through
|
||||
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
|
||||
you also need to enable these key phrases like this:
|
||||
you also need to import these key phrases like this:
|
||||
|
||||
nominatim special-phrases --from-wiki > specialphrases.sql
|
||||
psql -d nominatim -f specialphrases.sql
|
||||
nominatim special-phrases --import-from-wiki
|
||||
|
||||
Note that this command downloads the phrases from the wiki link above. You
|
||||
need internet access for the step.
|
||||
|
||||
@@ -30,6 +30,7 @@ For compiling:
|
||||
* [proj](https://proj.org/)
|
||||
* [bzip2](http://www.bzip.org/)
|
||||
* [zlib](https://www.zlib.net/)
|
||||
* [ICU](http://site.icu-project.org/)
|
||||
* [Boost libraries](https://www.boost.org/), including system and filesystem
|
||||
* PostgreSQL client libraries
|
||||
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
|
||||
@@ -43,6 +44,7 @@ For running Nominatim:
|
||||
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
|
||||
* [psutil](https://github.com/giampaolo/psutil)
|
||||
* [Jinja2](https://palletsprojects.com/p/jinja/)
|
||||
* [PyICU](https://pypi.org/project/PyICU/)
|
||||
* [PHP](https://php.net) (7.0 or later)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
|
||||
@@ -1,163 +1,11 @@
|
||||
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
ini_set('display_errors', 'stderr');
|
||||
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Import and export special phrases',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
loadSettings(getcwd());
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
setupHTTPProxy();
|
||||
|
||||
include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
|
||||
|
||||
if ($aCMDResult['wiki-import']) {
|
||||
$oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
|
||||
$aPairs = array();
|
||||
|
||||
$sLanguageIn = getSetting(
|
||||
'LANGUAGES',
|
||||
'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
|
||||
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
|
||||
);
|
||||
|
||||
foreach (explode(',', $sLanguageIn) as $sLanguage) {
|
||||
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
|
||||
$sWikiPageXML = file_get_contents($sURL);
|
||||
|
||||
if (!preg_match_all(
|
||||
'#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
|
||||
$sWikiPageXML,
|
||||
$aMatches,
|
||||
PREG_SET_ORDER
|
||||
)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($aMatches as $aMatch) {
|
||||
$sLabel = trim($aMatch[1]);
|
||||
if ($oNormalizer !== null) {
|
||||
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
|
||||
} else {
|
||||
$sTrans = null;
|
||||
}
|
||||
$sClass = trim($aMatch[2]);
|
||||
$sType = trim($aMatch[3]);
|
||||
// hack around a bug where building=yes was imported with
|
||||
// quotes into the wiki
|
||||
$sType = preg_replace('/("|")/', '', $sType);
|
||||
// sanity check, in case somebody added garbage in the wiki
|
||||
if (preg_match('/^\\w+$/', $sClass) < 1
|
||||
|| preg_match('/^\\w+$/', $sType) < 1
|
||||
) {
|
||||
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
|
||||
exit;
|
||||
}
|
||||
// blacklisting: disallow certain class/type combinations
|
||||
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
|
||||
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
|
||||
continue;
|
||||
}
|
||||
// whitelisting: if class is in whitelist, allow only tags in the list
|
||||
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
|
||||
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
|
||||
continue;
|
||||
}
|
||||
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
|
||||
|
||||
switch (trim($aMatch[4])) {
|
||||
case 'near':
|
||||
printf(
|
||||
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
|
||||
pg_escape_string($sLabel),
|
||||
$sTrans,
|
||||
$sClass,
|
||||
$sType
|
||||
);
|
||||
break;
|
||||
case 'in':
|
||||
printf(
|
||||
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
|
||||
pg_escape_string($sLabel),
|
||||
$sTrans,
|
||||
$sClass,
|
||||
$sType
|
||||
);
|
||||
break;
|
||||
default:
|
||||
printf(
|
||||
"SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
|
||||
pg_escape_string($sLabel),
|
||||
$sTrans,
|
||||
$sClass,
|
||||
$sType
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
|
||||
|
||||
foreach ($aPairs as $aPair) {
|
||||
$sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
|
||||
if ($sql_tablespace) {
|
||||
$sql_tablespace = ' TABLESPACE '.$sql_tablespace;
|
||||
}
|
||||
|
||||
printf(
|
||||
'CREATE TABLE place_classtype_%s_%s'
|
||||
. $sql_tablespace
|
||||
. ' AS'
|
||||
. ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
|
||||
. " WHERE class = '%s' AND type = '%s'"
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1])
|
||||
);
|
||||
|
||||
printf(
|
||||
'CREATE INDEX idx_place_classtype_%s_%s_centroid'
|
||||
. ' ON place_classtype_%s_%s USING GIST (centroid)'
|
||||
. $sql_tablespace
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1])
|
||||
);
|
||||
|
||||
printf(
|
||||
'CREATE INDEX idx_place_classtype_%s_%s_place_id'
|
||||
. ' ON place_classtype_%s_%s USING btree(place_id)'
|
||||
. $sql_tablespace
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1])
|
||||
);
|
||||
|
||||
printf(
|
||||
'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
|
||||
. ";\n",
|
||||
pg_escape_string($aPair[0]),
|
||||
pg_escape_string($aPair[1]),
|
||||
getSetting('DATABASE_WEBUSER')
|
||||
);
|
||||
}
|
||||
|
||||
echo 'DROP INDEX idx_placex_classtype;';
|
||||
}
|
||||
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('special-phrases', '--import-from-wiki')
|
||||
->run();
|
||||
|
||||
19
lib-php/migration/PhraseSettingsToJson.php
Normal file
19
lib-php/migration/PhraseSettingsToJson.php
Normal file
@@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
$phpPhraseSettingsFile = $argv[1];
|
||||
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
|
||||
|
||||
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
|
||||
include $phpPhraseSettingsFile;
|
||||
|
||||
$data = array();
|
||||
|
||||
if (isset($aTagsBlacklist))
|
||||
$data['blackList'] = $aTagsBlacklist;
|
||||
if (isset($aTagsWhitelist))
|
||||
$data['whiteList'] = $aTagsWhitelist;
|
||||
|
||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
||||
fwrite($jsonFile, json_encode($data));
|
||||
fclose($jsonFile);
|
||||
}
|
||||
@@ -112,30 +112,6 @@ class CommandlineParser:
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
|
||||
class SetupSpecialPhrases:
|
||||
"""\
|
||||
Maintain special phrases.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group = parser.add_argument_group('Input arguments')
|
||||
group.add_argument('--from-wiki', action='store_true',
|
||||
help='Pull special phrases from the OSM wiki.')
|
||||
group = parser.add_argument_group('Output arguments')
|
||||
group.add_argument('-o', '--output', default='-',
|
||||
help="""File to write the preprocessed phrases to.
|
||||
If omitted, it will be written to stdout.""")
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
if args.output != '-':
|
||||
raise NotImplementedError('Only output to stdout is currently implemented.')
|
||||
return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args)
|
||||
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
@@ -278,7 +254,7 @@ def nominatim(**kwargs):
|
||||
parser.add_subcommand('freeze', clicmd.SetupFreeze)
|
||||
parser.add_subcommand('replication', clicmd.UpdateReplication)
|
||||
|
||||
parser.add_subcommand('special-phrases', SetupSpecialPhrases)
|
||||
parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases)
|
||||
|
||||
parser.add_subcommand('add-data', UpdateAddData)
|
||||
parser.add_subcommand('index', clicmd.UpdateIndex)
|
||||
|
||||
@@ -10,3 +10,4 @@ from .refresh import UpdateRefresh
|
||||
from .admin import AdminFuncs
|
||||
from .freeze import SetupFreeze
|
||||
from .transition import AdminTransition
|
||||
from .special_phrases import ImportSpecialPhrases
|
||||
|
||||
31
nominatim/clicmd/special_phrases.py
Normal file
31
nominatim/clicmd/special_phrases.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
Implementation of the 'special-phrases' command.
|
||||
"""
|
||||
import logging
|
||||
from nominatim.tools.special_phrases import SpecialPhrasesImporter
|
||||
from nominatim.db.connection import connect
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
|
||||
class ImportSpecialPhrases:
|
||||
"""\
|
||||
Import special phrases.
|
||||
"""
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group = parser.add_argument_group('Input arguments')
|
||||
group.add_argument('--import-from-wiki', action='store_true',
|
||||
help='Import special phrases from the OSM wiki to the database.')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
if args.import_from_wiki:
|
||||
LOG.warning('Special phrases importation starting')
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
SpecialPhrasesImporter(
|
||||
args.config, args.phplib_dir, db_connection
|
||||
).import_from_wiki()
|
||||
return 0
|
||||
278
nominatim/tools/special_phrases.py
Normal file
278
nominatim/tools/special_phrases.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""
|
||||
Functions to import special phrases into the database.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
import subprocess
|
||||
import json
|
||||
from os.path import isfile
|
||||
from icu import Transliterator
|
||||
from psycopg2.sql import Identifier, Literal, SQL
|
||||
from nominatim.tools.exec_utils import get_url
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
class SpecialPhrasesImporter():
|
||||
# pylint: disable-msg=too-many-instance-attributes
|
||||
# pylint: disable-msg=too-few-public-methods
|
||||
"""
|
||||
Class handling the process of special phrases importations.
|
||||
"""
|
||||
def __init__(self, config, phplib_dir, db_connection) -> None:
|
||||
self.db_connection = db_connection
|
||||
self.config = config
|
||||
self.phplib_dir = phplib_dir
|
||||
self.black_list, self.white_list = self._load_white_and_black_lists()
|
||||
#Compile the regex here to increase performances.
|
||||
self.occurence_pattern = re.compile(
|
||||
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
|
||||
)
|
||||
self.sanity_check_pattern = re.compile(r'^\w+$')
|
||||
self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
|
||||
self.config.TERM_NORMALIZATION)
|
||||
|
||||
def import_from_wiki(self, languages=None):
|
||||
"""
|
||||
Iterate through all specified languages and
|
||||
extract corresponding special phrases from the wiki.
|
||||
"""
|
||||
if languages is not None and not isinstance(languages, list):
|
||||
raise TypeError('The \'languages\' argument should be of type list.')
|
||||
|
||||
#Get all languages to process.
|
||||
languages = self._load_languages() if not languages else languages
|
||||
|
||||
#Store pairs of class/type for further processing
|
||||
class_type_pairs = set()
|
||||
|
||||
for lang in languages:
|
||||
LOG.warning('Import phrases for lang: %s', lang)
|
||||
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
|
||||
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
|
||||
|
||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||
self.db_connection.commit()
|
||||
LOG.warning('Import done.')
|
||||
|
||||
def _load_white_and_black_lists(self):
|
||||
"""
|
||||
Load white and black lists from phrases-settings.json.
|
||||
"""
|
||||
settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
|
||||
|
||||
if self.config.PHRASE_CONFIG:
|
||||
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
|
||||
|
||||
with open(settings_path, "r") as json_settings:
|
||||
settings = json.load(json_settings)
|
||||
return settings['blackList'], settings['whiteList']
|
||||
|
||||
def _load_languages(self):
|
||||
"""
|
||||
Get list of all languages from env config file
|
||||
or default if there is no languages configured.
|
||||
The system will extract special phrases only from all specified languages.
|
||||
"""
|
||||
default_languages = [
|
||||
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
||||
return self.config.LANGUAGES or default_languages
|
||||
|
||||
@staticmethod
|
||||
def _get_wiki_content(lang):
|
||||
"""
|
||||
Request and return the wiki page's content
|
||||
corresponding to special phrases for a given lang.
|
||||
Requested URL Example :
|
||||
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
||||
"""
|
||||
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
|
||||
return get_url(url)
|
||||
|
||||
def _check_sanity(self, lang, phrase_class, phrase_type):
|
||||
"""
|
||||
Check sanity of given inputs in case somebody added garbage in the wiki.
|
||||
If a bad class/type is detected the system will exit with an error.
|
||||
"""
|
||||
type_matchs = self.sanity_check_pattern.findall(phrase_type)
|
||||
class_matchs = self.sanity_check_pattern.findall(phrase_class)
|
||||
|
||||
if len(class_matchs) < 1 or len(type_matchs) < 1:
|
||||
raise UsageError("Bad class/type for language {}: {}={}".format(
|
||||
lang, phrase_class, phrase_type))
|
||||
|
||||
def _process_xml_content(self, xml_content, lang):
|
||||
"""
|
||||
Process given xml content by extracting matching patterns.
|
||||
Matching patterns are processed there and returned in a
|
||||
set of class/type pairs.
|
||||
"""
|
||||
#One match will be of format [label, class, type, operator, plural]
|
||||
matches = self.occurence_pattern.findall(xml_content)
|
||||
#Store pairs of class/type for further processing
|
||||
class_type_pairs = set()
|
||||
|
||||
for match in matches:
|
||||
phrase_label = match[0].strip()
|
||||
normalized_label = self.transliterator.transliterate(phrase_label)
|
||||
phrase_class = match[1].strip()
|
||||
phrase_type = match[2].strip()
|
||||
phrase_operator = match[3].strip()
|
||||
#hack around a bug where building=yes was imported with quotes into the wiki
|
||||
phrase_type = re.sub(r'\"|"', '', phrase_type)
|
||||
|
||||
#sanity check, in case somebody added garbage in the wiki
|
||||
self._check_sanity(lang, phrase_class, phrase_type)
|
||||
|
||||
#blacklisting: disallow certain class/type combinations
|
||||
if (
|
||||
phrase_class in self.black_list.keys() and
|
||||
phrase_type in self.black_list[phrase_class]
|
||||
):
|
||||
continue
|
||||
#whitelisting: if class is in whitelist, allow only tags in the list
|
||||
if (
|
||||
phrase_class in self.white_list.keys() and
|
||||
phrase_type not in self.white_list[phrase_class]
|
||||
):
|
||||
continue
|
||||
|
||||
#add class/type to the pairs dict
|
||||
class_type_pairs.add((phrase_class, phrase_type))
|
||||
|
||||
self._process_amenity(
|
||||
phrase_label, normalized_label, phrase_class,
|
||||
phrase_type, phrase_operator
|
||||
)
|
||||
|
||||
return class_type_pairs
|
||||
|
||||
def _process_amenity(self, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, phrase_operator):
|
||||
# pylint: disable-msg=too-many-arguments
|
||||
"""
|
||||
Add phrase lookup and corresponding class and
|
||||
type to the word table based on the operator.
|
||||
"""
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
if phrase_operator == 'near':
|
||||
db_cursor.execute("""SELECT getorcreate_amenityoperator(
|
||||
make_standard_name(%s), %s, %s, %s, 'near')""",
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
elif phrase_operator == 'in':
|
||||
db_cursor.execute("""SELECT getorcreate_amenityoperator(
|
||||
make_standard_name(%s), %s, %s, %s, 'in')""",
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
else:
|
||||
db_cursor.execute("""SELECT getorcreate_amenity(
|
||||
make_standard_name(%s), %s, %s, %s)""",
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_table_and_indexes(self, class_type_pairs):
|
||||
"""
|
||||
Create table place_classtype for each given pair.
|
||||
Also create indexes on place_id and centroid.
|
||||
"""
|
||||
LOG.warning('Create tables and indexes...')
|
||||
|
||||
sql_tablespace = self.config.TABLESPACE_AUX_DATA
|
||||
if sql_tablespace:
|
||||
sql_tablespace = ' TABLESPACE '+sql_tablespace
|
||||
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
||||
|
||||
for pair in class_type_pairs:
|
||||
phrase_class = pair[0]
|
||||
phrase_type = pair[1]
|
||||
|
||||
#Table creation
|
||||
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
|
||||
|
||||
#Indexes creation
|
||||
self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
|
||||
|
||||
#Grant access on read to the web user.
|
||||
self._grant_access_to_webuser(phrase_class, phrase_type)
|
||||
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
||||
|
||||
|
||||
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
|
||||
"""
|
||||
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
|
||||
"""
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL("""
|
||||
CREATE TABLE IF NOT EXISTS {{}} {}
|
||||
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
||||
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
|
||||
.format(Identifier(table_name), Literal(phrase_class),
|
||||
Literal(phrase_type)))
|
||||
|
||||
|
||||
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
|
||||
"""
|
||||
Create indexes on centroid and place_id for the place_classtype table.
|
||||
"""
|
||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
||||
base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
#Index on centroid
|
||||
if not self.db_connection.index_exists(index_prefix + 'centroid'):
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL("""
|
||||
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
|
||||
.format(Identifier(index_prefix + 'centroid'),
|
||||
Identifier(base_table)), sql_tablespace)
|
||||
|
||||
#Index on place_id
|
||||
if not self.db_connection.index_exists(index_prefix + 'place_id'):
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(
|
||||
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
|
||||
.format(Identifier(index_prefix + 'place_id'),
|
||||
Identifier(base_table)))
|
||||
|
||||
|
||||
def _grant_access_to_webuser(self, phrase_class, phrase_type):
|
||||
"""
|
||||
Grant access on read to the table place_classtype for the webuser.
|
||||
"""
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
|
||||
.format(Identifier(table_name),
|
||||
Identifier(self.config.DATABASE_WEBUSER)))
|
||||
|
||||
def _convert_php_settings_if_needed(self, file_path):
|
||||
"""
|
||||
Convert php settings file of special phrases to json file if it is still in php format.
|
||||
"""
|
||||
if not isfile(file_path):
|
||||
raise UsageError(str(file_path) + ' is not a valid file.')
|
||||
|
||||
file, extension = os.path.splitext(file_path)
|
||||
json_file_path = Path(file + '.json').resolve()
|
||||
|
||||
if extension not in('.php', '.json'):
|
||||
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
|
||||
|
||||
if extension == '.php' and not isfile(json_file_path):
|
||||
try:
|
||||
subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
||||
(self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
|
||||
file_path], check=True)
|
||||
LOG.warning('special_phrase configuration file has been converted to json.')
|
||||
return json_file_path
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.error('Error while converting %s to json.', file_path)
|
||||
raise
|
||||
else:
|
||||
return json_file_path
|
||||
@@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH=
|
||||
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
||||
|
||||
# Configuration file for special phrase import.
|
||||
# When unset, the internal default settings from 'settings/phrase_settings.php'
|
||||
# When unset, the internal default settings from 'settings/phrase-settings.json'
|
||||
# are used.
|
||||
NOMINATIM_PHRASE_CONFIG=
|
||||
|
||||
|
||||
25
settings/phrase-settings.json
Normal file
25
settings/phrase-settings.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"Comments": [
|
||||
"Black list correspond to class/type combinations to exclude",
|
||||
"If a class is in the white list then all types will",
|
||||
"be ignored except the ones given in the list.",
|
||||
"Also use this list to exclude an entire class from special phrases."
|
||||
],
|
||||
"blackList": {
|
||||
"bounday": [
|
||||
"administrative"
|
||||
],
|
||||
"place": [
|
||||
"house",
|
||||
"houses"
|
||||
]
|
||||
},
|
||||
"whiteList": {
|
||||
"highway": [
|
||||
"bus_stop",
|
||||
"rest_area",
|
||||
"raceway'"
|
||||
],
|
||||
"building": []
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ from pathlib import Path
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
import tempfile
|
||||
|
||||
SRC_DIR = Path(__file__) / '..' / '..' / '..'
|
||||
|
||||
@@ -133,6 +134,13 @@ def def_config():
|
||||
def src_dir():
|
||||
return SRC_DIR.resolve()
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_phplib_dir():
|
||||
with tempfile.TemporaryDirectory() as phpdir:
|
||||
(Path(phpdir) / 'admin').mkdir()
|
||||
|
||||
yield Path(phpdir)
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(temp_db_conn):
|
||||
""" Create an empty version of the status table and
|
||||
|
||||
BIN
test/python/sample.tar.gz
Normal file
BIN
test/python/sample.tar.gz
Normal file
Binary file not shown.
@@ -64,7 +64,6 @@ def test_cli_help(capsys):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("command,script", [
|
||||
(('special-phrases',), 'specialphrases'),
|
||||
(('add-data', '--file', 'foo.osm'), 'update'),
|
||||
(('export',), 'export')
|
||||
])
|
||||
@@ -172,6 +171,12 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
|
||||
assert bnd_mock.called == do_bnds
|
||||
assert rank_mock.called == do_ranks
|
||||
|
||||
def test_special_phrases_command(temp_db, mock_func_factory):
|
||||
func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
|
||||
|
||||
call_nominatim('special-phrases', '--import-from-wiki')
|
||||
|
||||
assert func.called == 1
|
||||
|
||||
@pytest.mark.parametrize("command,func", [
|
||||
('postcodes', 'update_postcodes'),
|
||||
|
||||
@@ -9,13 +9,6 @@ import pytest
|
||||
|
||||
import nominatim.tools.exec_utils as exec_utils
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_phplib_dir():
|
||||
with tempfile.TemporaryDirectory() as phpdir:
|
||||
(Path(phpdir) / 'admin').mkdir()
|
||||
|
||||
yield Path(phpdir)
|
||||
|
||||
@pytest.fixture
|
||||
def nominatim_env(tmp_phplib_dir, def_config):
|
||||
class _NominatimEnv:
|
||||
|
||||
346
test/python/test_tools_import_special_phrases.py
Normal file
346
test/python/test_tools_import_special_phrases.py
Normal file
@@ -0,0 +1,346 @@
|
||||
"""
|
||||
Tests for import special phrases methods
|
||||
of the class SpecialPhrasesImporter.
|
||||
"""
|
||||
from nominatim.errors import UsageError
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
from shutil import copyfile
|
||||
import pytest
|
||||
from nominatim.tools.special_phrases import SpecialPhrasesImporter
|
||||
|
||||
TEST_BASE_DIR = Path(__file__) / '..' / '..'
|
||||
|
||||
def test_check_sanity_class(special_phrases_importer):
|
||||
"""
|
||||
Check for _check_sanity() method.
|
||||
If a wrong class or type is given, an UsageError should raise.
|
||||
If a good class and type are given, nothing special happens.
|
||||
"""
|
||||
with pytest.raises(UsageError) as wrong_class:
|
||||
special_phrases_importer._check_sanity('en', '', 'type')
|
||||
|
||||
with pytest.raises(UsageError) as wrong_type:
|
||||
special_phrases_importer._check_sanity('en', 'class', '')
|
||||
|
||||
special_phrases_importer._check_sanity('en', 'class', 'type')
|
||||
|
||||
assert wrong_class and wrong_type
|
||||
|
||||
def test_load_white_and_black_lists(special_phrases_importer):
|
||||
"""
|
||||
Test that _load_white_and_black_lists() well return
|
||||
black list and white list and that they are of dict type.
|
||||
"""
|
||||
black_list, white_list = special_phrases_importer._load_white_and_black_lists()
|
||||
|
||||
assert isinstance(black_list, dict) and isinstance(white_list, dict)
|
||||
|
||||
def test_convert_php_settings(special_phrases_importer):
|
||||
"""
|
||||
Test that _convert_php_settings_if_needed() convert the given
|
||||
php file to a json file.
|
||||
"""
|
||||
php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
|
||||
copyfile(php_file, temp_settings)
|
||||
special_phrases_importer._convert_php_settings_if_needed(temp_settings)
|
||||
|
||||
assert (Path(temp_dir) / 'phrase_settings.json').is_file()
|
||||
|
||||
def test_convert_settings_wrong_file(special_phrases_importer):
|
||||
"""
|
||||
Test that _convert_php_settings_if_needed() raise an exception
|
||||
if the given file is not a valid file.
|
||||
"""
|
||||
|
||||
with pytest.raises(UsageError) as exceptioninfos:
|
||||
special_phrases_importer._convert_php_settings_if_needed('random_file')
|
||||
|
||||
assert str(exceptioninfos.value) == 'random_file is not a valid file.'
|
||||
|
||||
def test_convert_settings_json_already_exist(special_phrases_importer):
|
||||
"""
|
||||
Test that if we give to '_convert_php_settings_if_needed' a php file path
|
||||
and that a the corresponding json file already exists, it is returned.
|
||||
"""
|
||||
php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
|
||||
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
|
||||
|
||||
returned = special_phrases_importer._convert_php_settings_if_needed(php_file)
|
||||
|
||||
assert returned == json_file
|
||||
|
||||
def test_convert_settings_giving_json(special_phrases_importer):
|
||||
"""
|
||||
Test that if we give to '_convert_php_settings_if_needed' a json file path
|
||||
the same path is directly returned
|
||||
"""
|
||||
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase-settings.json').resolve()
|
||||
|
||||
returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
|
||||
|
||||
assert returned == json_file
|
||||
|
||||
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
|
||||
word_table, temp_db_conn):
|
||||
"""
|
||||
Test that _process_amenity() execute well the
|
||||
getorcreate_amenityoperator() SQL function and that
|
||||
the 2 differents operators are well handled.
|
||||
"""
|
||||
special_phrases_importer._process_amenity('', '', '', '', 'near')
|
||||
special_phrases_importer._process_amenity('', '', '', '', 'in')
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
|
||||
results = temp_db_cursor.fetchall()
|
||||
|
||||
assert len(results) == 2
|
||||
|
||||
def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
|
||||
temp_db_conn):
|
||||
"""
|
||||
Test that _process_amenity() execute well the
|
||||
getorcreate_amenity() SQL function.
|
||||
"""
|
||||
special_phrases_importer._process_amenity('', '', '', '', '')
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
|
||||
result = temp_db_cursor.fetchone()
|
||||
|
||||
assert result
|
||||
|
||||
def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
|
||||
"""
|
||||
Test that _create_place_classtype_indexes() create the
|
||||
place_id index and centroid index on the right place_class_type table.
|
||||
"""
|
||||
phrase_class = 'class'
|
||||
phrase_type = 'type'
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("CREATE EXTENSION postgis;")
|
||||
temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
|
||||
|
||||
special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
|
||||
|
||||
assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
|
||||
|
||||
def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer):
|
||||
"""
|
||||
Test that _create_place_classtype_table() create
|
||||
the right place_classtype table.
|
||||
"""
|
||||
phrase_class = 'class'
|
||||
phrase_type = 'type'
|
||||
special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type)
|
||||
|
||||
assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
|
||||
|
||||
def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer):
|
||||
"""
|
||||
Test that _grant_access_to_webuser() give
|
||||
right access to the web user.
|
||||
"""
|
||||
phrase_class = 'class'
|
||||
phrase_type = 'type'
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
|
||||
|
||||
special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type)
|
||||
|
||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
|
||||
|
||||
def test_create_place_classtype_table_and_indexes(
|
||||
temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
|
||||
getorcreate_amenityoperator_funcs, special_phrases_importer):
|
||||
"""
|
||||
Test that _create_place_classtype_table_and_indexes()
|
||||
create the right place_classtype tables and place_id indexes
|
||||
and centroid indexes and grant access to the web user
|
||||
for the given set of pairs.
|
||||
"""
|
||||
pairs = set([('class1', 'type1'), ('class2', 'type2')])
|
||||
|
||||
special_phrases_importer._create_place_classtype_table_and_indexes(pairs)
|
||||
|
||||
for pair in pairs:
|
||||
assert check_table_exist(temp_db_conn, pair[0], pair[1])
|
||||
assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
|
||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
|
||||
|
||||
def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
|
||||
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
|
||||
"""
|
||||
Test that _process_xml_content() process the given xml content right
|
||||
by executing the right SQL functions for amenities and
|
||||
by returning the right set of pairs.
|
||||
"""
|
||||
class_test = 'aerialway'
|
||||
type_test = 'zip_line'
|
||||
|
||||
#Converted output set to a dict for easy assert further.
|
||||
results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
|
||||
|
||||
assert check_amenities_with_op(temp_db_conn)
|
||||
assert check_amenities_without_op(temp_db_conn)
|
||||
assert results[class_test] and type_test in results.values()
|
||||
|
||||
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
|
||||
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
|
||||
"""
|
||||
Check that the main import_from_wiki() method is well executed.
|
||||
It should create the place_classtype table, the place_id and centroid indexes,
|
||||
grand access to the web user and executing the SQL functions for amenities.
|
||||
"""
|
||||
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
|
||||
special_phrases_importer.import_from_wiki(['en'])
|
||||
|
||||
class_test = 'aerialway'
|
||||
type_test = 'zip_line'
|
||||
|
||||
assert check_table_exist(temp_db_conn, class_test, type_test)
|
||||
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
|
||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
|
||||
assert check_amenities_with_op(temp_db_conn)
|
||||
assert check_amenities_without_op(temp_db_conn)
|
||||
|
||||
def mock_get_wiki_content(lang):
|
||||
"""
|
||||
Mock the _get_wiki_content() method to return
|
||||
static xml test file content.
|
||||
"""
|
||||
return get_test_xml_wiki_content()
|
||||
|
||||
def get_test_xml_wiki_content():
|
||||
"""
|
||||
return the content of the static xml test file.
|
||||
"""
|
||||
xml_test_content_path = (TEST_BASE_DIR / 'testdata' / 'special_phrases_test_content.txt').resolve()
|
||||
with open(xml_test_content_path) as xml_content_reader:
|
||||
return xml_content_reader.read()
|
||||
|
||||
def check_table_exist(temp_db_conn, phrase_class, phrase_type):
|
||||
"""
|
||||
Verify that the place_classtype table exists for the given
|
||||
phrase_class and phrase_type.
|
||||
"""
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("""
|
||||
SELECT *
|
||||
FROM information_schema.tables
|
||||
WHERE table_type='BASE TABLE'
|
||||
AND table_name='{}'""".format(table_name))
|
||||
return temp_db_cursor.fetchone()
|
||||
|
||||
def check_grant_access(temp_db_conn, user, phrase_class, phrase_type):
|
||||
"""
|
||||
Check that the web user has been granted right access to the
|
||||
place_classtype table of the given phrase_class and phrase_type.
|
||||
"""
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("""
|
||||
SELECT * FROM information_schema.role_table_grants
|
||||
WHERE table_name='{}'
|
||||
AND grantee='{}'
|
||||
AND privilege_type='SELECT'""".format(table_name, user))
|
||||
return temp_db_cursor.fetchone()
|
||||
|
||||
def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
|
||||
"""
|
||||
Check that the place_id index and centroid index exist for the
|
||||
place_classtype table of the given phrase_class and phrase_type.
|
||||
"""
|
||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
||||
|
||||
return (
|
||||
temp_db_conn.index_exists(index_prefix + 'centroid')
|
||||
and
|
||||
temp_db_conn.index_exists(index_prefix + 'place_id')
|
||||
)
|
||||
|
||||
def check_amenities_with_op(temp_db_conn):
|
||||
"""
|
||||
Check that the test table for the SQL function getorcreate_amenityoperator()
|
||||
contains more than one value (so that the SQL function was call more than one time).
|
||||
"""
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM temp_with_operator")
|
||||
return len(temp_db_cursor.fetchall()) > 1
|
||||
|
||||
def check_amenities_without_op(temp_db_conn):
|
||||
"""
|
||||
Check that the test table for the SQL function getorcreate_amenity()
|
||||
contains more than one value (so that the SQL function was call more than one time).
|
||||
"""
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM temp_without_operator")
|
||||
return len(temp_db_cursor.fetchall()) > 1
|
||||
|
||||
@pytest.fixture
|
||||
def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
|
||||
"""
|
||||
Return an instance of SpecialPhrasesImporter.
|
||||
"""
|
||||
return SpecialPhrasesImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn)
|
||||
|
||||
@pytest.fixture
|
||||
def temp_phplib_dir_with_migration():
|
||||
"""
|
||||
Return temporary phpdir with migration subdirectory and
|
||||
PhraseSettingsToJson.php script inside.
|
||||
"""
|
||||
migration_file = (TEST_BASE_DIR / '..' / 'lib-php' / 'migration'
|
||||
/ 'PhraseSettingsToJson.php').resolve()
|
||||
with tempfile.TemporaryDirectory() as phpdir:
|
||||
(Path(phpdir) / 'migration').mkdir()
|
||||
migration_dest_path = (Path(phpdir) / 'migration' / 'PhraseSettingsToJson.php').resolve()
|
||||
copyfile(migration_file, migration_dest_path)
|
||||
|
||||
yield Path(phpdir)
|
||||
|
||||
@pytest.fixture
|
||||
def make_strandard_name_func(temp_db_cursor):
|
||||
temp_db_cursor.execute("""
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
RETURN trim(name); --Basically return only the trimed name for the tests
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;""")
|
||||
|
||||
@pytest.fixture
|
||||
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
|
||||
temp_db_cursor.execute("""
|
||||
CREATE TABLE temp_without_operator(op TEXT);
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text)
|
||||
RETURNS void as $$
|
||||
BEGIN
|
||||
INSERT INTO temp_without_operator VALUES('no_operator');
|
||||
END;
|
||||
$$ LANGUAGE plpgsql""")
|
||||
|
||||
@pytest.fixture
|
||||
def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
|
||||
temp_db_cursor.execute("""
|
||||
CREATE TABLE temp_with_operator(op TEXT);
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text, op text)
|
||||
RETURNS void as $$
|
||||
BEGIN
|
||||
INSERT INTO temp_with_operator VALUES(op);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql""")
|
||||
78
test/testdata/special_phrases_test_content.txt
vendored
Normal file
78
test/testdata/special_phrases_test_content.txt
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
<mediawiki xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>OpenStreetMap Wiki</sitename>
|
||||
<dbname>wiki</dbname>
|
||||
<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.35.1</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="-2" case="first-letter">Media</namespace>
|
||||
<namespace key="-1" case="first-letter">Special</namespace>
|
||||
<namespace key="0" case="first-letter"/>
|
||||
<namespace key="1" case="first-letter">Talk</namespace>
|
||||
<namespace key="2" case="first-letter">User</namespace>
|
||||
<namespace key="3" case="first-letter">User talk</namespace>
|
||||
<namespace key="4" case="first-letter">Wiki</namespace>
|
||||
<namespace key="5" case="first-letter">Wiki talk</namespace>
|
||||
<namespace key="6" case="first-letter">File</namespace>
|
||||
<namespace key="7" case="first-letter">File talk</namespace>
|
||||
<namespace key="8" case="first-letter">MediaWiki</namespace>
|
||||
<namespace key="9" case="first-letter">MediaWiki talk</namespace>
|
||||
<namespace key="10" case="first-letter">Template</namespace>
|
||||
<namespace key="11" case="first-letter">Template talk</namespace>
|
||||
<namespace key="12" case="first-letter">Help</namespace>
|
||||
<namespace key="13" case="first-letter">Help talk</namespace>
|
||||
<namespace key="14" case="first-letter">Category</namespace>
|
||||
<namespace key="15" case="first-letter">Category talk</namespace>
|
||||
<namespace key="120" case="first-letter">Item</namespace>
|
||||
<namespace key="121" case="first-letter">Item talk</namespace>
|
||||
<namespace key="122" case="first-letter">Property</namespace>
|
||||
<namespace key="123" case="first-letter">Property talk</namespace>
|
||||
<namespace key="200" case="first-letter">DE</namespace>
|
||||
<namespace key="201" case="first-letter">DE talk</namespace>
|
||||
<namespace key="202" case="first-letter">FR</namespace>
|
||||
<namespace key="203" case="first-letter">FR talk</namespace>
|
||||
<namespace key="204" case="first-letter">ES</namespace>
|
||||
<namespace key="205" case="first-letter">ES talk</namespace>
|
||||
<namespace key="206" case="first-letter">IT</namespace>
|
||||
<namespace key="207" case="first-letter">IT talk</namespace>
|
||||
<namespace key="208" case="first-letter">NL</namespace>
|
||||
<namespace key="209" case="first-letter">NL talk</namespace>
|
||||
<namespace key="210" case="first-letter">RU</namespace>
|
||||
<namespace key="211" case="first-letter">RU talk</namespace>
|
||||
<namespace key="212" case="first-letter">JA</namespace>
|
||||
<namespace key="213" case="first-letter">JA talk</namespace>
|
||||
<namespace key="710" case="first-letter">TimedText</namespace>
|
||||
<namespace key="711" case="first-letter">TimedText talk</namespace>
|
||||
<namespace key="828" case="first-letter">Module</namespace>
|
||||
<namespace key="829" case="first-letter">Module talk</namespace>
|
||||
<namespace key="2300" case="first-letter">Gadget</namespace>
|
||||
<namespace key="2301" case="first-letter">Gadget talk</namespace>
|
||||
<namespace key="2302" case="case-sensitive">Gadget definition</namespace>
|
||||
<namespace key="2303" case="case-sensitive">Gadget definition talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>Nominatim/Special Phrases/EN</title>
|
||||
<ns>0</ns>
|
||||
<id>67365</id>
|
||||
<revision>
|
||||
<id>2100424</id>
|
||||
<parentid>2100422</parentid>
|
||||
<timestamp>2021-01-27T20:29:53Z</timestamp>
|
||||
<contributor>
|
||||
<username>Violaine Do</username>
|
||||
<id>88152</id>
|
||||
</contributor>
|
||||
<minor/>
|
||||
<comment>/* en */ add coworking amenity</comment>
|
||||
<origin>2100424</origin>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
|
||||
== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Zip Lines near || aerialway || zip_line || near || Y |- | Zip Wire || aerialway || zip_line || - || N |- | Zip Wires || aerialway || zip_line || - || Y |- | Zip Wire in || aerialway || zip_line || in || N |- | Zip Wires in || aerialway || zip_line || in || Y |- | Zip Wire near || aerialway || zip_line || near || N |} [[Category:Word list]]
|
||||
</text>
|
||||
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
0
test/testfiles/phrase-settings.json
Normal file
0
test/testfiles/phrase-settings.json
Normal file
0
test/testfiles/random_file.html
Normal file
0
test/testfiles/random_file.html
Normal file
@@ -40,9 +40,9 @@
|
||||
php-pgsql php php-intl libpqxx-devel \
|
||||
proj-epsg bzip2-devel proj-devel boost-devel \
|
||||
python3-pip python3-setuptools python3-devel \
|
||||
expat-devel zlib-devel
|
||||
expat-devel zlib-devel libicu-dev
|
||||
|
||||
pip3 install --user psycopg2 python-dotenv psutil Jinja2
|
||||
pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
|
||||
|
||||
|
||||
#
|
||||
|
||||
@@ -33,9 +33,9 @@
|
||||
php-pgsql php php-intl php-json libpq-devel \
|
||||
bzip2-devel proj-devel boost-devel \
|
||||
python3-pip python3-setuptools python3-devel \
|
||||
expat-devel zlib-devel
|
||||
expat-devel zlib-devel libicu-dev
|
||||
|
||||
pip3 install --user psycopg2 python-dotenv psutil Jinja2
|
||||
pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
|
||||
|
||||
|
||||
#
|
||||
|
||||
@@ -29,8 +29,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
libbz2-dev libpq-dev libproj-dev \
|
||||
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
|
||||
postgresql-contrib-10 postgresql-10-postgis-scripts \
|
||||
php php-pgsql php-intl python3-pip \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 git
|
||||
php php-pgsql php-intl libicu-dev python3-pip \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
|
||||
|
||||
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
|
||||
# install the latest version from pip:
|
||||
|
||||
@@ -32,8 +32,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
libbz2-dev libpq-dev libproj-dev \
|
||||
postgresql-server-dev-12 postgresql-12-postgis-3 \
|
||||
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
|
||||
php php-pgsql php-intl python3-dotenv \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 git
|
||||
php php-pgsql php-intl libicu-dev python3-dotenv \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
|
||||
|
||||
#
|
||||
# System Configuration
|
||||
|
||||
Reference in New Issue
Block a user