Merge pull request #2228 from AntoJvlt/import-special-phrases-porting-python

Import special phrases porting python
This commit is contained in:
Sarah Hoffmann
2021-03-29 09:49:35 +02:00
committed by GitHub
28 changed files with 816 additions and 206 deletions

View File

@@ -6,7 +6,7 @@ runs:
steps:
- name: Install prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
shell: bash
- name: Download dependencies

View File

@@ -120,7 +120,7 @@ jobs:
working-directory: data-env
- name: Import special phrases
run: nominatim special-phrases --from-wiki | psql -d nominatim
run: nominatim special-phrases --import-from-wiki
working-directory: data-env
- name: Check import

1
.gitignore vendored
View File

@@ -9,3 +9,4 @@ data/wiki_specialphrases.sql
data/osmosischange.osc
.vagrant
data/country_osm_grid.sql.gz

View File

@@ -1,6 +1,7 @@
[MASTER]
extension-pkg-whitelist=osmium
ignored-modules=icu
[MESSAGES CONTROL]

View File

@@ -114,7 +114,6 @@ if (BUILD_IMPORTER)
export.php
query.php
setup.php
specialphrases.php
update.php
warm.php
)
@@ -259,7 +258,7 @@ endif()
install(FILES settings/env.defaults
settings/address-levels.json
settings/phrase_settings.php
settings/phrase-settings.json
settings/import-admin.style
settings/import-street.style
settings/import-address.style

View File

@@ -268,10 +268,9 @@ running this function.
If you want to be able to search for places by their type through
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to enable these key phrases like this:
you also need to import these key phrases like this:
nominatim special-phrases --from-wiki > specialphrases.sql
psql -d nominatim -f specialphrases.sql
nominatim special-phrases --import-from-wiki
Note that this command downloads the phrases from the wiki link above. You
need internet access for the step.

View File

@@ -30,6 +30,7 @@ For compiling:
* [proj](https://proj.org/)
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
@@ -43,6 +44,7 @@ For running Nominatim:
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [PyICU](https://pypi.org/project/PyICU/)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)

View File

@@ -1,163 +1,11 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
ini_set('memory_limit', '800M');
ini_set('display_errors', 'stderr');
$aCMDOptions
= array(
'Import and export special phrases',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings(getcwd());
loadSettings($aCMDResult['project-dir'] ?? getcwd());
setupHTTPProxy();
include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
if ($aCMDResult['wiki-import']) {
$oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
$aPairs = array();
$sLanguageIn = getSetting(
'LANGUAGES',
'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
);
foreach (explode(',', $sLanguageIn) as $sLanguage) {
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
$sWikiPageXML = file_get_contents($sURL);
if (!preg_match_all(
'#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
$sWikiPageXML,
$aMatches,
PREG_SET_ORDER
)) {
continue;
}
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
if ($oNormalizer !== null) {
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
} else {
$sTrans = null;
}
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
// quotes into the wiki
$sType = preg_replace('/(&quot;|")/', '', $sType);
// sanity check, in case somebody added garbage in the wiki
if (preg_match('/^\\w+$/', $sClass) < 1
|| preg_match('/^\\w+$/', $sType) < 1
) {
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
exit;
}
// blacklisting: disallow certain class/type combinations
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
continue;
}
// whitelisting: if class is in whitelist, allow only tags in the list
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
continue;
}
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
switch (trim($aMatch[4])) {
case 'near':
printf(
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
case 'in':
printf(
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
default:
printf(
"SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
}
}
}
echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
foreach ($aPairs as $aPair) {
$sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
if ($sql_tablespace) {
$sql_tablespace = ' TABLESPACE '.$sql_tablespace;
}
printf(
'CREATE TABLE place_classtype_%s_%s'
. $sql_tablespace
. ' AS'
. ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
. " WHERE class = '%s' AND type = '%s'"
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'CREATE INDEX idx_place_classtype_%s_%s_centroid'
. ' ON place_classtype_%s_%s USING GIST (centroid)'
. $sql_tablespace
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'CREATE INDEX idx_place_classtype_%s_%s_place_id'
. ' ON place_classtype_%s_%s USING btree(place_id)'
. $sql_tablespace
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
getSetting('DATABASE_WEBUSER')
);
}
echo 'DROP INDEX idx_placex_classtype;';
}
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('special-phrases', '--import-from-wiki')
->run();

View File

@@ -0,0 +1,19 @@
<?php
$phpPhraseSettingsFile = $argv[1];
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
include $phpPhraseSettingsFile;
$data = array();
if (isset($aTagsBlacklist))
$data['blackList'] = $aTagsBlacklist;
if (isset($aTagsWhitelist))
$data['whiteList'] = $aTagsWhitelist;
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
fwrite($jsonFile, json_encode($data));
fclose($jsonFile);
}

View File

@@ -112,30 +112,6 @@ class CommandlineParser:
# pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415
class SetupSpecialPhrases:
"""\
Maintain special phrases.
"""
@staticmethod
def add_args(parser):
group = parser.add_argument_group('Input arguments')
group.add_argument('--from-wiki', action='store_true',
help='Pull special phrases from the OSM wiki.')
group = parser.add_argument_group('Output arguments')
group.add_argument('-o', '--output', default='-',
help="""File to write the preprocessed phrases to.
If omitted, it will be written to stdout.""")
@staticmethod
def run(args):
if args.output != '-':
raise NotImplementedError('Only output to stdout is currently implemented.')
return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args)
class UpdateAddData:
"""\
Add additional data from a file or an online source.
@@ -278,7 +254,7 @@ def nominatim(**kwargs):
parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication)
parser.add_subcommand('special-phrases', SetupSpecialPhrases)
parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases)
parser.add_subcommand('add-data', UpdateAddData)
parser.add_subcommand('index', clicmd.UpdateIndex)

View File

@@ -10,3 +10,4 @@ from .refresh import UpdateRefresh
from .admin import AdminFuncs
from .freeze import SetupFreeze
from .transition import AdminTransition
from .special_phrases import ImportSpecialPhrases

View File

@@ -0,0 +1,31 @@
"""
Implementation of the 'special-phrases' command.
"""
import logging
from nominatim.tools.special_phrases import SpecialPhrasesImporter
from nominatim.db.connection import connect
LOG = logging.getLogger()
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
class ImportSpecialPhrases:
"""\
Import special phrases.
"""
@staticmethod
def add_args(parser):
group = parser.add_argument_group('Input arguments')
group.add_argument('--import-from-wiki', action='store_true',
help='Import special phrases from the OSM wiki to the database.')
@staticmethod
def run(args):
if args.import_from_wiki:
LOG.warning('Special phrases importation starting')
with connect(args.config.get_libpq_dsn()) as db_connection:
SpecialPhrasesImporter(
args.config, args.phplib_dir, db_connection
).import_from_wiki()
return 0

View File

@@ -0,0 +1,278 @@
"""
Functions to import special phrases into the database.
"""
import logging
import os
from pathlib import Path
import re
import subprocess
import json
from os.path import isfile
from icu import Transliterator
from psycopg2.sql import Identifier, Literal, SQL
from nominatim.tools.exec_utils import get_url
from nominatim.errors import UsageError
LOG = logging.getLogger()
class SpecialPhrasesImporter():
# pylint: disable-msg=too-many-instance-attributes
# pylint: disable-msg=too-few-public-methods
"""
Class handling the process of special phrases importations.
"""
def __init__(self, config, phplib_dir, db_connection) -> None:
self.db_connection = db_connection
self.config = config
self.phplib_dir = phplib_dir
self.black_list, self.white_list = self._load_white_and_black_lists()
#Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
)
self.sanity_check_pattern = re.compile(r'^\w+$')
self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
self.config.TERM_NORMALIZATION)
def import_from_wiki(self, languages=None):
"""
Iterate through all specified languages and
extract corresponding special phrases from the wiki.
"""
if languages is not None and not isinstance(languages, list):
raise TypeError('The \'languages\' argument should be of type list.')
#Get all languages to process.
languages = self._load_languages() if not languages else languages
#Store pairs of class/type for further processing
class_type_pairs = set()
for lang in languages:
LOG.warning('Import phrases for lang: %s', lang)
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
self._create_place_classtype_table_and_indexes(class_type_pairs)
self.db_connection.commit()
LOG.warning('Import done.')
def _load_white_and_black_lists(self):
"""
Load white and black lists from phrases-settings.json.
"""
settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
if self.config.PHRASE_CONFIG:
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
with open(settings_path, "r") as json_settings:
settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
def _load_languages(self):
"""
Get list of all languages from env config file
or default if there is no languages configured.
The system will extract special phrases only from all specified languages.
"""
default_languages = [
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
return self.config.LANGUAGES or default_languages
@staticmethod
def _get_wiki_content(lang):
"""
Request and return the wiki page's content
corresponding to special phrases for a given lang.
Requested URL Example :
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
"""
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
return get_url(url)
def _check_sanity(self, lang, phrase_class, phrase_type):
"""
Check sanity of given inputs in case somebody added garbage in the wiki.
If a bad class/type is detected the system will exit with an error.
"""
type_matchs = self.sanity_check_pattern.findall(phrase_type)
class_matchs = self.sanity_check_pattern.findall(phrase_class)
if len(class_matchs) < 1 or len(type_matchs) < 1:
raise UsageError("Bad class/type for language {}: {}={}".format(
lang, phrase_class, phrase_type))
def _process_xml_content(self, xml_content, lang):
"""
Process given xml content by extracting matching patterns.
Matching patterns are processed there and returned in a
set of class/type pairs.
"""
#One match will be of format [label, class, type, operator, plural]
matches = self.occurence_pattern.findall(xml_content)
#Store pairs of class/type for further processing
class_type_pairs = set()
for match in matches:
phrase_label = match[0].strip()
normalized_label = self.transliterator.transliterate(phrase_label)
phrase_class = match[1].strip()
phrase_type = match[2].strip()
phrase_operator = match[3].strip()
#hack around a bug where building=yes was imported with quotes into the wiki
phrase_type = re.sub(r'\"|&quot;', '', phrase_type)
#sanity check, in case somebody added garbage in the wiki
self._check_sanity(lang, phrase_class, phrase_type)
#blacklisting: disallow certain class/type combinations
if (
phrase_class in self.black_list.keys() and
phrase_type in self.black_list[phrase_class]
):
continue
#whitelisting: if class is in whitelist, allow only tags in the list
if (
phrase_class in self.white_list.keys() and
phrase_type not in self.white_list[phrase_class]
):
continue
#add class/type to the pairs dict
class_type_pairs.add((phrase_class, phrase_type))
self._process_amenity(
phrase_label, normalized_label, phrase_class,
phrase_type, phrase_operator
)
return class_type_pairs
def _process_amenity(self, phrase_label, normalized_label,
phrase_class, phrase_type, phrase_operator):
# pylint: disable-msg=too-many-arguments
"""
Add phrase lookup and corresponding class and
type to the word table based on the operator.
"""
with self.db_connection.cursor() as db_cursor:
if phrase_operator == 'near':
db_cursor.execute("""SELECT getorcreate_amenityoperator(
make_standard_name(%s), %s, %s, %s, 'near')""",
(phrase_label, normalized_label, phrase_class, phrase_type))
elif phrase_operator == 'in':
db_cursor.execute("""SELECT getorcreate_amenityoperator(
make_standard_name(%s), %s, %s, %s, 'in')""",
(phrase_label, normalized_label, phrase_class, phrase_type))
else:
db_cursor.execute("""SELECT getorcreate_amenity(
make_standard_name(%s), %s, %s, %s)""",
(phrase_label, normalized_label, phrase_class, phrase_type))
def _create_place_classtype_table_and_indexes(self, class_type_pairs):
"""
Create table place_classtype for each given pair.
Also create indexes on place_id and centroid.
"""
LOG.warning('Create tables and indexes...')
sql_tablespace = self.config.TABLESPACE_AUX_DATA
if sql_tablespace:
sql_tablespace = ' TABLESPACE '+sql_tablespace
with self.db_connection.cursor() as db_cursor:
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
for pair in class_type_pairs:
phrase_class = pair[0]
phrase_type = pair[1]
#Table creation
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
#Indexes creation
self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
#Grant access on read to the web user.
self._grant_access_to_webuser(phrase_class, phrase_type)
with self.db_connection.cursor() as db_cursor:
db_cursor.execute("DROP INDEX idx_placex_classtype")
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
"""
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""
CREATE TABLE IF NOT EXISTS {{}} {}
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
.format(Identifier(table_name), Literal(phrase_class),
Literal(phrase_type)))
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
"""
Create indexes on centroid and place_id for the place_classtype table.
"""
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
#Index on centroid
if not self.db_connection.index_exists(index_prefix + 'centroid'):
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
.format(Identifier(index_prefix + 'centroid'),
Identifier(base_table)), sql_tablespace)
#Index on place_id
if not self.db_connection.index_exists(index_prefix + 'place_id'):
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
.format(Identifier(index_prefix + 'place_id'),
Identifier(base_table)))
def _grant_access_to_webuser(self, phrase_class, phrase_type):
"""
Grant access on read to the table place_classtype for the webuser.
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
.format(Identifier(table_name),
Identifier(self.config.DATABASE_WEBUSER)))
def _convert_php_settings_if_needed(self, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
"""
if not isfile(file_path):
raise UsageError(str(file_path) + ' is not a valid file.')
file, extension = os.path.splitext(file_path)
json_file_path = Path(file + '.json').resolve()
if extension not in('.php', '.json'):
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
if extension == '.php' and not isfile(json_file_path):
try:
subprocess.run(['/usr/bin/env', 'php', '-Cq',
(self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
file_path], check=True)
LOG.warning('special_phrase configuration file has been converted to json.')
return json_file_path
except subprocess.CalledProcessError:
LOG.error('Error while converting %s to json.', file_path)
raise
else:
return json_file_path

View File

@@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH=
NOMINATIM_WIKIPEDIA_DATA_PATH=
# Configuration file for special phrase import.
# When unset, the internal default settings from 'settings/phrase_settings.php'
# When unset, the internal default settings from 'settings/phrase-settings.json'
# are used.
NOMINATIM_PHRASE_CONFIG=

View File

@@ -0,0 +1,25 @@
{
"Comments": [
"Black list correspond to class/type combinations to exclude",
"If a class is in the white list then all types will",
"be ignored except the ones given in the list.",
"Also use this list to exclude an entire class from special phrases."
],
"blackList": {
"bounday": [
"administrative"
],
"place": [
"house",
"houses"
]
},
"whiteList": {
"highway": [
"bus_stop",
"rest_area",
"raceway'"
],
"building": []
}
}

View File

@@ -5,6 +5,7 @@ from pathlib import Path
import psycopg2
import psycopg2.extras
import pytest
import tempfile
SRC_DIR = Path(__file__) / '..' / '..' / '..'
@@ -133,6 +134,13 @@ def def_config():
def src_dir():
return SRC_DIR.resolve()
@pytest.fixture
def tmp_phplib_dir():
with tempfile.TemporaryDirectory() as phpdir:
(Path(phpdir) / 'admin').mkdir()
yield Path(phpdir)
@pytest.fixture
def status_table(temp_db_conn):
""" Create an empty version of the status table and

BIN
test/python/sample.tar.gz Normal file

Binary file not shown.

View File

@@ -64,7 +64,6 @@ def test_cli_help(capsys):
@pytest.mark.parametrize("command,script", [
(('special-phrases',), 'specialphrases'),
(('add-data', '--file', 'foo.osm'), 'update'),
(('export',), 'export')
])
@@ -172,6 +171,12 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
def test_special_phrases_command(temp_db, mock_func_factory):
func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
call_nominatim('special-phrases', '--import-from-wiki')
assert func.called == 1
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),

View File

@@ -9,13 +9,6 @@ import pytest
import nominatim.tools.exec_utils as exec_utils
@pytest.fixture
def tmp_phplib_dir():
with tempfile.TemporaryDirectory() as phpdir:
(Path(phpdir) / 'admin').mkdir()
yield Path(phpdir)
@pytest.fixture
def nominatim_env(tmp_phplib_dir, def_config):
class _NominatimEnv:

View File

@@ -0,0 +1,346 @@
"""
Tests for import special phrases methods
of the class SpecialPhrasesImporter.
"""
from nominatim.errors import UsageError
from pathlib import Path
import tempfile
from shutil import copyfile
import pytest
from nominatim.tools.special_phrases import SpecialPhrasesImporter
TEST_BASE_DIR = Path(__file__) / '..' / '..'
def test_check_sanity_class(special_phrases_importer):
"""
Check for _check_sanity() method.
If a wrong class or type is given, an UsageError should raise.
If a good class and type are given, nothing special happens.
"""
with pytest.raises(UsageError) as wrong_class:
special_phrases_importer._check_sanity('en', '', 'type')
with pytest.raises(UsageError) as wrong_type:
special_phrases_importer._check_sanity('en', 'class', '')
special_phrases_importer._check_sanity('en', 'class', 'type')
assert wrong_class and wrong_type
def test_load_white_and_black_lists(special_phrases_importer):
"""
Test that _load_white_and_black_lists() well return
black list and white list and that they are of dict type.
"""
black_list, white_list = special_phrases_importer._load_white_and_black_lists()
assert isinstance(black_list, dict) and isinstance(white_list, dict)
def test_convert_php_settings(special_phrases_importer):
"""
Test that _convert_php_settings_if_needed() convert the given
php file to a json file.
"""
php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
with tempfile.TemporaryDirectory() as temp_dir:
temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
copyfile(php_file, temp_settings)
special_phrases_importer._convert_php_settings_if_needed(temp_settings)
assert (Path(temp_dir) / 'phrase_settings.json').is_file()
def test_convert_settings_wrong_file(special_phrases_importer):
"""
Test that _convert_php_settings_if_needed() raise an exception
if the given file is not a valid file.
"""
with pytest.raises(UsageError) as exceptioninfos:
special_phrases_importer._convert_php_settings_if_needed('random_file')
assert str(exceptioninfos.value) == 'random_file is not a valid file.'
def test_convert_settings_json_already_exist(special_phrases_importer):
"""
Test that if we give to '_convert_php_settings_if_needed' a php file path
and that a the corresponding json file already exists, it is returned.
"""
php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
returned = special_phrases_importer._convert_php_settings_if_needed(php_file)
assert returned == json_file
def test_convert_settings_giving_json(special_phrases_importer):
"""
Test that if we give to '_convert_php_settings_if_needed' a json file path
the same path is directly returned
"""
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase-settings.json').resolve()
returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
assert returned == json_file
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
word_table, temp_db_conn):
"""
Test that _process_amenity() execute well the
getorcreate_amenityoperator() SQL function and that
the 2 differents operators are well handled.
"""
special_phrases_importer._process_amenity('', '', '', '', 'near')
special_phrases_importer._process_amenity('', '', '', '', 'in')
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
results = temp_db_cursor.fetchall()
assert len(results) == 2
def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
temp_db_conn):
"""
Test that _process_amenity() execute well the
getorcreate_amenity() SQL function.
"""
special_phrases_importer._process_amenity('', '', '', '', '')
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
result = temp_db_cursor.fetchone()
assert result
def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
"""
Test that _create_place_classtype_indexes() create the
place_id index and centroid index on the right place_class_type table.
"""
phrase_class = 'class'
phrase_type = 'type'
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("CREATE EXTENSION postgis;")
temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer):
"""
Test that _create_place_classtype_table() create
the right place_classtype table.
"""
phrase_class = 'class'
phrase_type = 'type'
special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type)
assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer):
"""
Test that _grant_access_to_webuser() give
right access to the web user.
"""
phrase_class = 'class'
phrase_type = 'type'
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type)
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
def test_create_place_classtype_table_and_indexes(
temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
getorcreate_amenityoperator_funcs, special_phrases_importer):
"""
Test that _create_place_classtype_table_and_indexes()
create the right place_classtype tables and place_id indexes
and centroid indexes and grant access to the web user
for the given set of pairs.
"""
pairs = set([('class1', 'type1'), ('class2', 'type2')])
special_phrases_importer._create_place_classtype_table_and_indexes(pairs)
for pair in pairs:
assert check_table_exist(temp_db_conn, pair[0], pair[1])
assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
"""
Test that _process_xml_content() process the given xml content right
by executing the right SQL functions for amenities and
by returning the right set of pairs.
"""
class_test = 'aerialway'
type_test = 'zip_line'
#Converted output set to a dict for easy assert further.
results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
assert check_amenities_with_op(temp_db_conn)
assert check_amenities_without_op(temp_db_conn)
assert results[class_test] and type_test in results.values()
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
"""
Check that the main import_from_wiki() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
grand access to the web user and executing the SQL functions for amenities.
"""
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
special_phrases_importer.import_from_wiki(['en'])
class_test = 'aerialway'
type_test = 'zip_line'
assert check_table_exist(temp_db_conn, class_test, type_test)
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
assert check_amenities_with_op(temp_db_conn)
assert check_amenities_without_op(temp_db_conn)
def mock_get_wiki_content(lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.
"""
return get_test_xml_wiki_content()
def get_test_xml_wiki_content():
"""
return the content of the static xml test file.
"""
xml_test_content_path = (TEST_BASE_DIR / 'testdata' / 'special_phrases_test_content.txt').resolve()
with open(xml_test_content_path) as xml_content_reader:
return xml_content_reader.read()
def check_table_exist(temp_db_conn, phrase_class, phrase_type):
"""
Verify that the place_classtype table exists for the given
phrase_class and phrase_type.
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("""
SELECT *
FROM information_schema.tables
WHERE table_type='BASE TABLE'
AND table_name='{}'""".format(table_name))
return temp_db_cursor.fetchone()
def check_grant_access(temp_db_conn, user, phrase_class, phrase_type):
"""
Check that the web user has been granted right access to the
place_classtype table of the given phrase_class and phrase_type.
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("""
SELECT * FROM information_schema.role_table_grants
WHERE table_name='{}'
AND grantee='{}'
AND privilege_type='SELECT'""".format(table_name, user))
return temp_db_cursor.fetchone()
def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
"""
Check that the place_id index and centroid index exist for the
place_classtype table of the given phrase_class and phrase_type.
"""
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
return (
temp_db_conn.index_exists(index_prefix + 'centroid')
and
temp_db_conn.index_exists(index_prefix + 'place_id')
)
def check_amenities_with_op(temp_db_conn):
"""
Check that the test table for the SQL function getorcreate_amenityoperator()
contains more than one value (so that the SQL function was call more than one time).
"""
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_with_operator")
return len(temp_db_cursor.fetchall()) > 1
def check_amenities_without_op(temp_db_conn):
"""
Check that the test table for the SQL function getorcreate_amenity()
contains more than one value (so that the SQL function was call more than one time).
"""
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_without_operator")
return len(temp_db_cursor.fetchall()) > 1
@pytest.fixture
def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
"""
Return an instance of SpecialPhrasesImporter.
"""
return SpecialPhrasesImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn)
@pytest.fixture
def temp_phplib_dir_with_migration():
"""
Return temporary phpdir with migration subdirectory and
PhraseSettingsToJson.php script inside.
"""
migration_file = (TEST_BASE_DIR / '..' / 'lib-php' / 'migration'
/ 'PhraseSettingsToJson.php').resolve()
with tempfile.TemporaryDirectory() as phpdir:
(Path(phpdir) / 'migration').mkdir()
migration_dest_path = (Path(phpdir) / 'migration' / 'PhraseSettingsToJson.php').resolve()
copyfile(migration_file, migration_dest_path)
yield Path(phpdir)
@pytest.fixture
def make_strandard_name_func(temp_db_cursor):
temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
BEGIN
RETURN trim(name); --Basically return only the trimed name for the tests
END;
$$ LANGUAGE plpgsql IMMUTABLE;""")
@pytest.fixture
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
temp_db_cursor.execute("""
CREATE TABLE temp_without_operator(op TEXT);
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text)
RETURNS void as $$
BEGIN
INSERT INTO temp_without_operator VALUES('no_operator');
END;
$$ LANGUAGE plpgsql""")
@pytest.fixture
def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
temp_db_cursor.execute("""
CREATE TABLE temp_with_operator(op TEXT);
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text, op text)
RETURNS void as $$
BEGIN
INSERT INTO temp_with_operator VALUES(op);
END;
$$ LANGUAGE plpgsql""")

View File

@@ -0,0 +1,78 @@
<mediawiki xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
<siteinfo>
<sitename>OpenStreetMap Wiki</sitename>
<dbname>wiki</dbname>
<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
<generator>MediaWiki 1.35.1</generator>
<case>first-letter</case>
<namespaces>
<namespace key="-2" case="first-letter">Media</namespace>
<namespace key="-1" case="first-letter">Special</namespace>
<namespace key="0" case="first-letter"/>
<namespace key="1" case="first-letter">Talk</namespace>
<namespace key="2" case="first-letter">User</namespace>
<namespace key="3" case="first-letter">User talk</namespace>
<namespace key="4" case="first-letter">Wiki</namespace>
<namespace key="5" case="first-letter">Wiki talk</namespace>
<namespace key="6" case="first-letter">File</namespace>
<namespace key="7" case="first-letter">File talk</namespace>
<namespace key="8" case="first-letter">MediaWiki</namespace>
<namespace key="9" case="first-letter">MediaWiki talk</namespace>
<namespace key="10" case="first-letter">Template</namespace>
<namespace key="11" case="first-letter">Template talk</namespace>
<namespace key="12" case="first-letter">Help</namespace>
<namespace key="13" case="first-letter">Help talk</namespace>
<namespace key="14" case="first-letter">Category</namespace>
<namespace key="15" case="first-letter">Category talk</namespace>
<namespace key="120" case="first-letter">Item</namespace>
<namespace key="121" case="first-letter">Item talk</namespace>
<namespace key="122" case="first-letter">Property</namespace>
<namespace key="123" case="first-letter">Property talk</namespace>
<namespace key="200" case="first-letter">DE</namespace>
<namespace key="201" case="first-letter">DE talk</namespace>
<namespace key="202" case="first-letter">FR</namespace>
<namespace key="203" case="first-letter">FR talk</namespace>
<namespace key="204" case="first-letter">ES</namespace>
<namespace key="205" case="first-letter">ES talk</namespace>
<namespace key="206" case="first-letter">IT</namespace>
<namespace key="207" case="first-letter">IT talk</namespace>
<namespace key="208" case="first-letter">NL</namespace>
<namespace key="209" case="first-letter">NL talk</namespace>
<namespace key="210" case="first-letter">RU</namespace>
<namespace key="211" case="first-letter">RU talk</namespace>
<namespace key="212" case="first-letter">JA</namespace>
<namespace key="213" case="first-letter">JA talk</namespace>
<namespace key="710" case="first-letter">TimedText</namespace>
<namespace key="711" case="first-letter">TimedText talk</namespace>
<namespace key="828" case="first-letter">Module</namespace>
<namespace key="829" case="first-letter">Module talk</namespace>
<namespace key="2300" case="first-letter">Gadget</namespace>
<namespace key="2301" case="first-letter">Gadget talk</namespace>
<namespace key="2302" case="case-sensitive">Gadget definition</namespace>
<namespace key="2303" case="case-sensitive">Gadget definition talk</namespace>
</namespaces>
</siteinfo>
<page>
<title>Nominatim/Special Phrases/EN</title>
<ns>0</ns>
<id>67365</id>
<revision>
<id>2100424</id>
<parentid>2100422</parentid>
<timestamp>2021-01-27T20:29:53Z</timestamp>
<contributor>
<username>Violaine Do</username>
<id>88152</id>
</contributor>
<minor/>
<comment>/* en */ add coworking amenity</comment>
<origin>2100424</origin>
<model>wikitext</model>
<format>text/x-wiki</format>
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Zip Lines near || aerialway || zip_line || near || Y |- | Zip Wire || aerialway || zip_line || - || N |- | Zip Wires || aerialway || zip_line || - || Y |- | Zip Wire in || aerialway || zip_line || in || N |- | Zip Wires in || aerialway || zip_line || in || Y |- | Zip Wire near || aerialway || zip_line || near || N |} [[Category:Word list]]
</text>
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
</revision>
</page>
</mediawiki>

View File

View File

View File

@@ -40,9 +40,9 @@
php-pgsql php php-intl libpqxx-devel \
proj-epsg bzip2-devel proj-devel boost-devel \
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
expat-devel zlib-devel libicu-dev
pip3 install --user psycopg2 python-dotenv psutil Jinja2
pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
#

View File

@@ -33,9 +33,9 @@
php-pgsql php php-intl php-json libpq-devel \
bzip2-devel proj-devel boost-devel \
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
expat-devel zlib-devel libicu-dev
pip3 install --user psycopg2 python-dotenv psutil Jinja2
pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
#

View File

@@ -29,8 +29,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl python3-pip \
python3-psycopg2 python3-psutil python3-jinja2 git
php php-pgsql php-intl libicu-dev python3-pip \
python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip:

View File

@@ -32,8 +32,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 git
php php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
#
# System Configuration