Errors fixes, Cleaning code, Improvement and addition of tests

This commit is contained in:
AntoJvlt
2021-03-26 01:53:33 +01:00
parent 2c19bd5ea3
commit cde9389e75
7 changed files with 309 additions and 48 deletions

View File

@@ -3,6 +3,7 @@
""" """
import logging import logging
import os import os
from pathlib import Path
import re import re
import subprocess import subprocess
import json import json
@@ -10,6 +11,7 @@ from os.path import isfile
from icu import Transliterator from icu import Transliterator
from psycopg2.sql import Identifier, Literal, SQL from psycopg2.sql import Identifier, Literal, SQL
from nominatim.tools.exec_utils import get_url from nominatim.tools.exec_utils import get_url
from nominatim.errors import UsageError
LOG = logging.getLogger() LOG = logging.getLogger()
class SpecialPhrasesImporter(): class SpecialPhrasesImporter():
@@ -37,18 +39,18 @@ class SpecialPhrasesImporter():
extract corresponding special phrases from the wiki. extract corresponding special phrases from the wiki.
""" """
if languages is not None and not isinstance(languages, list): if languages is not None and not isinstance(languages, list):
raise TypeError('languages argument should be of type list') raise TypeError('The \'languages\' argument should be of type list.')
#Get all languages to process. #Get all languages to process.
languages = self._load_languages() if not languages else languages languages = self._load_languages() if not languages else languages
#array for pairs of class/type #Store pairs of class/type for further processing
class_type_pairs = set() class_type_pairs = set()
for lang in languages: for lang in languages:
LOG.warning('Import phrases for lang: %s', lang) LOG.warning('Import phrases for lang: %s', lang)
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang) wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
self._process_xml_content(wiki_page_xml_content, lang) class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
self._create_place_classtype_table_and_indexes(class_type_pairs) self._create_place_classtype_table_and_indexes(class_type_pairs)
self.db_connection.commit() self.db_connection.commit()
@@ -58,7 +60,7 @@ class SpecialPhrasesImporter():
""" """
Load white and black lists from phrases-settings.json. Load white and black lists from phrases-settings.json.
""" """
settings_path = str(self.config.config_dir)+'/phrase-settings.json' settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
if self.config.PHRASE_CONFIG: if self.config.PHRASE_CONFIG:
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG) settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
@@ -100,11 +102,18 @@ class SpecialPhrasesImporter():
class_matchs = self.sanity_check_pattern.findall(phrase_class) class_matchs = self.sanity_check_pattern.findall(phrase_class)
if len(class_matchs) < 1 or len(type_matchs) < 1: if len(class_matchs) < 1 or len(type_matchs) < 1:
LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type) raise UsageError("Bad class/type for language {}: {}={}".format(
lang, phrase_class, phrase_type))
def _process_xml_content(self, xml_content, lang): def _process_xml_content(self, xml_content, lang):
"""
Process given xml content by extracting matching patterns.
Matching patterns are processed there and returned in a
set of class/type pairs.
"""
#One match will be of format [label, class, type, operator, plural] #One match will be of format [label, class, type, operator, plural]
matches = self.occurence_pattern.findall(xml_content) matches = self.occurence_pattern.findall(xml_content)
#Store pairs of class/type for further processing
class_type_pairs = set() class_type_pairs = set()
for match in matches: for match in matches:
@@ -246,12 +255,19 @@ class SpecialPhrasesImporter():
""" """
Convert php settings file of special phrases to json file if it is still in php format. Convert php settings file of special phrases to json file if it is still in php format.
""" """
if not isfile(file_path):
raise UsageError(str(file_path) + ' is not a valid file.')
file, extension = os.path.splitext(file_path) file, extension = os.path.splitext(file_path)
json_file_path = file + '.json' json_file_path = Path(file + '.json').resolve()
if extension not in('.php', '.json'):
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
if extension == '.php' and not isfile(json_file_path): if extension == '.php' and not isfile(json_file_path):
try: try:
subprocess.run(['/usr/bin/env', 'php', '-Cq', subprocess.run(['/usr/bin/env', 'php', '-Cq',
self.phplib_dir / 'migration/phraseSettingsToJson.php', (self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
file_path], check=True) file_path], check=True)
LOG.warning('special_phrase configuration file has been converted to json.') LOG.warning('special_phrase configuration file has been converted to json.')
return json_file_path return json_file_path

View File

@@ -1,24 +1,127 @@
""" """
Tests for import special phrases functions Tests for import special phrases methods
of the class SpecialPhrasesImporter.
""" """
from nominatim.errors import UsageError
from pathlib import Path from pathlib import Path
import tempfile
from shutil import copyfile
import pytest import pytest
from nominatim.tools.special_phrases import SpecialPhrasesImporter from nominatim.tools.special_phrases import SpecialPhrasesImporter
TEST_BASE_DIR = Path(__file__) / '..' / '..' TEST_BASE_DIR = Path(__file__) / '..' / '..'
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs): def test_check_sanity_class(special_phrases_importer):
"""
Check for _check_sanity() method.
If a wrong class or type is given, an UsageError should raise.
If a good class and type are given, nothing special happens.
"""
with pytest.raises(UsageError) as wrong_class:
special_phrases_importer._check_sanity('en', '', 'type')
with pytest.raises(UsageError) as wrong_type:
special_phrases_importer._check_sanity('en', 'class', '')
special_phrases_importer._check_sanity('en', 'class', 'type')
assert wrong_class and wrong_type
def test_load_white_and_black_lists(special_phrases_importer):
"""
Test that _load_white_and_black_lists() well return
black list and white list and that they are of dict type.
"""
black_list, white_list = special_phrases_importer._load_white_and_black_lists()
assert isinstance(black_list, dict) and isinstance(white_list, dict)
def test_convert_php_settings(special_phrases_importer):
"""
Test that _convert_php_settings_if_needed() convert the given
php file to a json file.
"""
php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
with tempfile.TemporaryDirectory() as temp_dir:
temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
copyfile(php_file, temp_settings)
special_phrases_importer._convert_php_settings_if_needed(temp_settings)
assert (Path(temp_dir) / 'phrase_settings.json').is_file()
def test_convert_settings_wrong_file(special_phrases_importer):
"""
Test that _convert_php_settings_if_needed() raise an exception
if the given file is not a valid file.
"""
with pytest.raises(UsageError) as exceptioninfos:
special_phrases_importer._convert_php_settings_if_needed('random_file')
assert str(exceptioninfos.value) == 'random_file is not a valid file.'
def test_convert_settings_json_already_exist(special_phrases_importer):
"""
Test that if we give to '_convert_php_settings_if_needed' a php file path
and that a the corresponding json file already exists, it is returned.
"""
php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
returned = special_phrases_importer._convert_php_settings_if_needed(php_file)
assert returned == json_file
def test_convert_settings_giving_json(special_phrases_importer):
"""
Test that if we give to '_convert_php_settings_if_needed' a json file path
the same path is directly returned
"""
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase-settings.json').resolve()
returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
assert returned == json_file
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
word_table, temp_db_conn):
"""
Test that _process_amenity() execute well the
getorcreate_amenityoperator() SQL function and that
the 2 differents operators are well handled.
"""
special_phrases_importer._process_amenity('', '', '', '', 'near') special_phrases_importer._process_amenity('', '', '', '', 'near')
special_phrases_importer._process_amenity('', '', '', '', 'in') special_phrases_importer._process_amenity('', '', '', '', 'in')
def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs): with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
results = temp_db_cursor.fetchall()
assert len(results) == 2
def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
temp_db_conn):
"""
Test that _process_amenity() execute well the
getorcreate_amenity() SQL function.
"""
special_phrases_importer._process_amenity('', '', '', '', '') special_phrases_importer._process_amenity('', '', '', '', '')
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
result = temp_db_cursor.fetchone()
assert result
def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer): def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
"""
Test that _create_place_classtype_indexes() create the
place_id index and centroid index on the right place_class_type table.
"""
phrase_class = 'class' phrase_class = 'class'
phrase_type = 'type' phrase_type = 'type'
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor: with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("CREATE EXTENSION postgis;") temp_db_cursor.execute("CREATE EXTENSION postgis;")
@@ -26,26 +129,24 @@ def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type) special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
centroid_index_exists = temp_db_conn.index_exists(index_prefix + 'centroid') assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
place_id_index_exists = temp_db_conn.index_exists(index_prefix + 'place_id')
assert centroid_index_exists and place_id_index_exists
def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer): def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer):
"""
Test that _create_place_classtype_table() create
the right place_classtype table.
"""
phrase_class = 'class' phrase_class = 'class'
phrase_type = 'type' phrase_type = 'type'
special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type) special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor: assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
temp_db_cursor.execute(f"""
SELECT *
FROM information_schema.tables
WHERE table_type='BASE TABLE'
AND table_name='place_classtype_{phrase_class}_{phrase_type}'""")
result = temp_db_cursor.fetchone()
assert result
def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer): def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer):
"""
Test that _grant_access_to_webuser() give
right access to the web user.
"""
phrase_class = 'class' phrase_class = 'class'
phrase_type = 'type' phrase_type = 'type'
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
@@ -55,47 +156,163 @@ def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_impo
special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type) special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor: assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
temp_db_cursor.execute(f"""
SELECT * FROM information_schema.role_table_grants
WHERE table_name='{table_name}'
AND grantee='{def_config.DATABASE_WEBUSER}'
AND privilege_type='SELECT'""")
result = temp_db_cursor.fetchone()
assert result
def test_create_place_classtype_table_and_indexes( def test_create_place_classtype_table_and_indexes(
placex_table, getorcreate_amenity_funcs, temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
getorcreate_amenityoperator_funcs, special_phrases_importer): getorcreate_amenityoperator_funcs, special_phrases_importer):
pairs = {('class1', 'type1'), ('class2', 'type2')} """
Test that _create_place_classtype_table_and_indexes()
create the right place_classtype tables and place_id indexes
and centroid indexes and grant access to the web user
for the given set of pairs.
"""
pairs = set([('class1', 'type1'), ('class2', 'type2')])
special_phrases_importer._create_place_classtype_table_and_indexes(pairs) special_phrases_importer._create_place_classtype_table_and_indexes(pairs)
def test_process_xml_content(special_phrases_importer, getorcreate_amenity_funcs, for pair in pairs:
getorcreate_amenityoperator_funcs): assert check_table_exist(temp_db_conn, pair[0], pair[1])
special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en') assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
def mock_get_wiki_content(lang): def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
return get_test_xml_wiki_content() getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
"""
Test that _process_xml_content() process the given xml content right
by executing the right SQL functions for amenities and
by returning the right set of pairs.
"""
class_test = 'aerialway'
type_test = 'zip_line'
def test_import_from_wiki(monkeypatch, special_phrases_importer, placex_table, #Converted output set to a dict for easy assert further.
results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
assert check_amenities_with_op(temp_db_conn)
assert check_amenities_without_op(temp_db_conn)
assert results[class_test] and type_test in results.values()
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs): getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
#mocker.patch.object(special_phrases_importer, '_get_wiki_content', new=mock_get_wiki_content) """
Check that the main import_from_wiki() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
grand access to the web user and executing the SQL functions for amenities.
"""
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
special_phrases_importer.import_from_wiki(['en']) special_phrases_importer.import_from_wiki(['en'])
class_test = 'aerialway'
type_test = 'zip_line'
assert check_table_exist(temp_db_conn, class_test, type_test)
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
assert check_amenities_with_op(temp_db_conn)
assert check_amenities_without_op(temp_db_conn)
def mock_get_wiki_content(lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.
"""
return get_test_xml_wiki_content()
def get_test_xml_wiki_content(): def get_test_xml_wiki_content():
"""
return the content of the static xml test file.
"""
xml_test_content_path = (TEST_BASE_DIR / 'testdata' / 'special_phrases_test_content.txt').resolve() xml_test_content_path = (TEST_BASE_DIR / 'testdata' / 'special_phrases_test_content.txt').resolve()
with open(xml_test_content_path) as xml_content_reader: with open(xml_test_content_path) as xml_content_reader:
return xml_content_reader.read() return xml_content_reader.read()
def check_table_exist(temp_db_conn, phrase_class, phrase_type):
"""
Verify that the place_classtype table exists for the given
phrase_class and phrase_type.
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("""
SELECT *
FROM information_schema.tables
WHERE table_type='BASE TABLE'
AND table_name='{}'""".format(table_name))
return temp_db_cursor.fetchone()
def check_grant_access(temp_db_conn, user, phrase_class, phrase_type):
"""
Check that the web user has been granted right access to the
place_classtype table of the given phrase_class and phrase_type.
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("""
SELECT * FROM information_schema.role_table_grants
WHERE table_name='{}'
AND grantee='{}'
AND privilege_type='SELECT'""".format(table_name, user))
return temp_db_cursor.fetchone()
def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
"""
Check that the place_id index and centroid index exist for the
place_classtype table of the given phrase_class and phrase_type.
"""
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
return (
temp_db_conn.index_exists(index_prefix + 'centroid')
and
temp_db_conn.index_exists(index_prefix + 'place_id')
)
def check_amenities_with_op(temp_db_conn):
"""
Check that the test table for the SQL function getorcreate_amenityoperator()
contains more than one value (so that the SQL function was call more than one time).
"""
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_with_operator")
return len(temp_db_cursor.fetchall()) > 1
def check_amenities_without_op(temp_db_conn):
"""
Check that the test table for the SQL function getorcreate_amenity()
contains more than one value (so that the SQL function was call more than one time).
"""
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("SELECT * FROM temp_without_operator")
return len(temp_db_cursor.fetchall()) > 1
@pytest.fixture @pytest.fixture
def special_phrases_importer(temp_db_conn, def_config, tmp_phplib_dir): def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
return SpecialPhrasesImporter(def_config, tmp_phplib_dir, temp_db_conn) """
Return an instance of SpecialPhrasesImporter.
"""
return SpecialPhrasesImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn)
@pytest.fixture
def temp_phplib_dir_with_migration():
"""
Return temporary phpdir with migration subdirectory and
PhraseSettingsToJson.php script inside.
"""
migration_file = (TEST_BASE_DIR / '..' / 'lib-php' / 'migration'
/ 'PhraseSettingsToJson.php').resolve()
with tempfile.TemporaryDirectory() as phpdir:
(Path(phpdir) / 'migration').mkdir()
migration_dest_path = (Path(phpdir) / 'migration' / 'PhraseSettingsToJson.php').resolve()
copyfile(migration_file, migration_dest_path)
yield Path(phpdir)
@pytest.fixture @pytest.fixture
def make_strandard_name_func(temp_db_cursor): def make_strandard_name_func(temp_db_cursor):
temp_db_cursor.execute(f""" temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$ CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
BEGIN BEGIN
RETURN trim(name); --Basically return only the trimed name for the tests RETURN trim(name); --Basically return only the trimed name for the tests
@@ -104,18 +321,26 @@ def make_strandard_name_func(temp_db_cursor):
@pytest.fixture @pytest.fixture
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func): def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
temp_db_cursor.execute(f""" temp_db_cursor.execute("""
CREATE TABLE temp_without_operator(op TEXT);
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text) lookup_class text, lookup_type text)
RETURNS void as $$ RETURNS void as $$
BEGIN END; BEGIN
INSERT INTO temp_without_operator VALUES('no_operator');
END;
$$ LANGUAGE plpgsql""") $$ LANGUAGE plpgsql""")
@pytest.fixture @pytest.fixture
def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func): def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
temp_db_cursor.execute(f""" temp_db_cursor.execute("""
CREATE TABLE temp_with_operator(op TEXT);
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text, op text) lookup_class text, lookup_type text, op text)
RETURNS void as $$ RETURNS void as $$
BEGIN END; BEGIN
INSERT INTO temp_with_operator VALUES(op);
END;
$$ LANGUAGE plpgsql""") $$ LANGUAGE plpgsql""")

File diff suppressed because one or more lines are too long

View File

View File

@@ -0,0 +1,20 @@
<?php
// These settings control the import of special phrases from the wiki.
// class/type combinations to exclude
$aTagsBlacklist
= array(
'boundary' => array('administrative'),
'place' => array('house', 'houses'),
);
// If a class is in the white list then all types will
// be ignored except the ones given in the list.
// Also use this list to exclude an entire class from
// special phrases.
$aTagsWhitelist
= array(
'highway' => array('bus_stop', 'rest_area', 'raceway'),
'building' => array(),
);

View File