From 1b68152fb28f5fc146fa7e379efe8f3a5e511b26 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Wed, 21 Apr 2021 17:11:57 +0200 Subject: [PATCH 1/3] reorganization of folder/file for the special phrases importer --- nominatim/clicmd/special_phrases.py | 2 +- nominatim/tools/__init__.py | 2 ++ .../special_phrases_importer.py} | 0 test/python/test_tools_import_special_phrases.py | 5 ++--- 4 files changed, 5 insertions(+), 4 deletions(-) rename nominatim/tools/{special_phrases.py => special_phrases/special_phrases_importer.py} (100%) diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index 99e82592..1eb25bdc 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -2,7 +2,7 @@ Implementation of the 'special-phrases' command. """ import logging -from nominatim.tools.special_phrases import SpecialPhrasesImporter +from nominatim.tools import SpecialPhrasesImporter from nominatim.db.connection import connect LOG = logging.getLogger() diff --git a/nominatim/tools/__init__.py b/nominatim/tools/__init__.py index cab6fb8b..cc5d3e9b 100644 --- a/nominatim/tools/__init__.py +++ b/nominatim/tools/__init__.py @@ -2,3 +2,5 @@ Module with functions for importing, updating Nominatim databases as well as general maintenance helpers. """ + +from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases/special_phrases_importer.py similarity index 100% rename from nominatim/tools/special_phrases.py rename to nominatim/tools/special_phrases/special_phrases_importer.py diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index 4890e0b2..cb82f6b7 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -2,13 +2,12 @@ Tests for import special phrases methods of the class SpecialPhrasesImporter. """ -from mocks import MockParamCapture from nominatim.errors import UsageError from pathlib import Path import tempfile from shutil import copyfile import pytest -from nominatim.tools.special_phrases import SpecialPhrasesImporter +from nominatim.tools import SpecialPhrasesImporter TEST_BASE_DIR = Path(__file__) / '..' / '..' @@ -304,7 +303,7 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases CREATE TABLE place_classtype_amenity_animal_shelter(); CREATE TABLE place_classtype_wrongclass_wrongtype();""") - monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) + monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) special_phrases_importer.import_from_wiki(['en']) class_test = 'aerialway' From c5ecb9bae08ae00e95d3c6f687415514779402d0 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Thu, 22 Apr 2021 17:34:35 +0200 Subject: [PATCH 2/3] Implemented statistics for the import of special phrases through the SpecialPhrasesImporterStatistics class --- nominatim/tools/special_phrases/__init__.py | 0 .../special_phrases/importer_statistics.py | 130 ++++++++++++++++++ .../special_phrases_importer.py | 14 +- 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 nominatim/tools/special_phrases/__init__.py create mode 100644 nominatim/tools/special_phrases/importer_statistics.py diff --git a/nominatim/tools/special_phrases/__init__.py b/nominatim/tools/special_phrases/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nominatim/tools/special_phrases/importer_statistics.py b/nominatim/tools/special_phrases/importer_statistics.py new file mode 100644 index 00000000..520c77d6 --- /dev/null +++ b/nominatim/tools/special_phrases/importer_statistics.py @@ -0,0 +1,130 @@ +""" + Contain the class which handle statistics for the + import of special phrases. +""" +import logging +LOG = logging.getLogger() + +class SpecialPhrasesImporterStatistics(): + # pylint: disable-msg=too-many-instance-attributes + """ + Class handling statistics of the import + process of special phrases. + """ + def __init__(self): + self._set_lang_values_to_0() + self._set_global_values_to_0() + + def _set_global_values_to_0(self): + """ + Set all counts for the global + import to 0. + """ + self.tables_created = 0 + self.tables_deleted = 0 + self.tables_ignored = 0 + self.global_phrases_invalid = 0 + self.global_phrases_added = 0 + self.global_phrases_ignored = 0 + self.global_phrases_deleted = 0 + + def _set_lang_values_to_0(self): + """ + Set all counts for the current + lang to 0. + """ + self.lang_phrases_invalid = 0 + self.lang_phrases_added = 0 + self.lang_phrases_ignored = 0 + + def notify_one_phrase_invalid(self): + """ + Add +1 to the count of invalid entries + fetched from the wiki. + """ + self.lang_phrases_invalid += 1 + self.global_phrases_invalid += 1 + + def notify_one_phrase_added(self): + """ + Add +1 to the count of entries + added to the db. + """ + self.lang_phrases_added += 1 + self.global_phrases_added += 1 + + def notify_one_phrase_ignored(self): + """ + Add +1 to the count of ignored + entries as it was already in the db. + """ + self.lang_phrases_ignored += 1 + self.global_phrases_ignored += 1 + + def notify_one_phrase_deleted(self): + """ + Add +1 to the count of phrases deleted + from the database. + """ + self.global_phrases_deleted += 1 + + def notify_one_table_created(self): + """ + Add +1 to the count of created tables. + """ + self.tables_created += 1 + + def notify_one_table_deleted(self): + """ + Add +1 to the count of deleted tables. + """ + self.tables_deleted += 1 + + def notify_one_table_ignored(self): + """ + Add +1 to the count of ignored tables. + """ + self.tables_ignored += 1 + + + def notify_import_done(self): + """ + Print stats for the whole import process + and reset all values. + """ + LOG.warning('====================================================================') + LOG.warning('Final statistics of the import:') + LOG.warning('- %s phrases were invalid.', self.global_phrases_invalid) + if self.global_phrases_invalid > 0: + LOG.warning(' Those invalid phrases have been skipped.') + LOG.warning('- %s phrases were ignored as they are already in the database', + self.global_phrases_ignored) + LOG.warning('- %s phrases were added to the database', self.global_phrases_added) + LOG.warning('- %s phrases were deleted from the database', self.global_phrases_deleted) + if self.global_phrases_deleted > 0: + LOG.warning(' They were deleted as they are not valid anymore.') + LOG.warning('- %s tables were ignored as they already exist on the database', + self.tables_ignored) + LOG.warning('- %s tables were created', self.tables_created) + LOG.warning('- %s tables were deleted from the database', self.tables_deleted) + if self.tables_deleted > 0: + LOG.warning(' They were deleted as they are not valid anymore.') + + self._set_global_values_to_0() + + def notify_current_lang_done(self, lang): + """ + Print stats for the current lang + and then reset lang values. + """ + LOG.warning('====================================================================') + LOG.warning('Statistics for the import of %s:', lang) + LOG.warning('- %s phrases were invalid.', self.lang_phrases_invalid) + if self.lang_phrases_invalid > 0: + LOG.warning(' Those invalid phrases have been skipped.') + LOG.warning('- %s phrases were ignored as they are already in the database', + self.lang_phrases_ignored) + LOG.warning('- %s phrases were added to the database', self.lang_phrases_added) + LOG.warning('====================================================================') + + self._set_lang_values_to_0() diff --git a/nominatim/tools/special_phrases/special_phrases_importer.py b/nominatim/tools/special_phrases/special_phrases_importer.py index f4eec260..a8e780c3 100644 --- a/nominatim/tools/special_phrases/special_phrases_importer.py +++ b/nominatim/tools/special_phrases/special_phrases_importer.py @@ -14,6 +14,7 @@ from psycopg2.sql import Identifier, Literal, SQL from nominatim.tools.exec_utils import get_url from nominatim.errors import UsageError +from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics LOG = logging.getLogger() class SpecialPhrasesImporter(): @@ -22,6 +23,7 @@ class SpecialPhrasesImporter(): Class handling the process of special phrases importations. """ def __init__(self, config, phplib_dir, db_connection) -> None: + self.statistics_handler = SpecialPhrasesImporterStatistics() self.db_connection = db_connection self.config = config self.phplib_dir = phplib_dir @@ -63,14 +65,16 @@ class SpecialPhrasesImporter(): class_type_pairs = set() for lang in languages: - LOG.warning('Import phrases for lang: %s', lang) + LOG.warning('Importing phrases for lang: %s...', lang) wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang) class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang)) + self.statistics_handler.notify_current_lang_done(lang) self._create_place_classtype_table_and_indexes(class_type_pairs) self._remove_non_existent_phrases_from_db() self.db_connection.commit() LOG.warning('Import done.') + self.statistics_handler.notify_import_done() def _fetch_existing_words_phrases(self): """ @@ -204,11 +208,13 @@ class SpecialPhrasesImporter(): (normalized_label, phrase_class, phrase_type, phrase_operator) ) class_type_pairs.add((phrase_class, phrase_type)) + self.statistics_handler.notify_one_phrase_ignored() #Dont need to add this phrase as it already exists in the word table. continue #sanity check, in case somebody added garbage in the wiki if not self._check_sanity(lang, phrase_class, phrase_type): + self.statistics_handler.notify_one_phrase_invalid() continue class_type_pairs.add((phrase_class, phrase_type)) @@ -217,6 +223,7 @@ class SpecialPhrasesImporter(): phrase_label, normalized_label, phrase_class, phrase_type, phrase_operator ) + self.statistics_handler.notify_one_phrase_added() return class_type_pairs @@ -263,6 +270,7 @@ class SpecialPhrasesImporter(): table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) if table_name in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_ignored() #Remove this table from the ones to delete as it match a class/type #still existing on the special phrases of the wiki. self.table_phrases_to_delete.remove(table_name) @@ -278,6 +286,8 @@ class SpecialPhrasesImporter(): #Grant access on read to the web user. self._grant_access_to_webuser(phrase_class, phrase_type) + self.statistics_handler.notify_one_table_created() + with self.db_connection.cursor() as db_cursor: db_cursor.execute("DROP INDEX idx_placex_classtype") @@ -341,6 +351,7 @@ class SpecialPhrasesImporter(): #Delete phrases from the word table which are not on the wiki anymore. for phrase_to_delete in self.words_phrases_to_delete: + self.statistics_handler.notify_one_phrase_deleted() if phrase_to_delete[3] == '-': query = """ DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null @@ -357,6 +368,7 @@ class SpecialPhrasesImporter(): #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore for table in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_deleted() query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table)) queries_parameters.append((query, ())) From abb3d56b2053beb8b582c6ed7f5a3b918b4fae08 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Sun, 25 Apr 2021 17:56:12 +0200 Subject: [PATCH 3/3] Switching to log info and only send warning for invalid phrases --- .../special_phrases/importer_statistics.py | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/nominatim/tools/special_phrases/importer_statistics.py b/nominatim/tools/special_phrases/importer_statistics.py index 520c77d6..46965c4b 100644 --- a/nominatim/tools/special_phrases/importer_statistics.py +++ b/nominatim/tools/special_phrases/importer_statistics.py @@ -1,5 +1,5 @@ """ - Contain the class which handle statistics for the + Contains the class which handles statistics for the import of special phrases. """ import logging @@ -92,23 +92,27 @@ class SpecialPhrasesImporterStatistics(): Print stats for the whole import process and reset all values. """ - LOG.warning('====================================================================') - LOG.warning('Final statistics of the import:') - LOG.warning('- %s phrases were invalid.', self.global_phrases_invalid) + LOG.info('====================================================================') + LOG.info('Final statistics of the import:') + LOG.info('- %s phrases were invalid.', self.global_phrases_invalid) if self.global_phrases_invalid > 0: - LOG.warning(' Those invalid phrases have been skipped.') - LOG.warning('- %s phrases were ignored as they are already in the database', - self.global_phrases_ignored) - LOG.warning('- %s phrases were added to the database', self.global_phrases_added) - LOG.warning('- %s phrases were deleted from the database', self.global_phrases_deleted) + LOG.info(' Those invalid phrases have been skipped.') + LOG.info('- %s phrases were ignored as they are already in the database', + self.global_phrases_ignored) + LOG.info('- %s phrases were added to the database', self.global_phrases_added) + LOG.info('- %s phrases were deleted from the database', self.global_phrases_deleted) if self.global_phrases_deleted > 0: - LOG.warning(' They were deleted as they are not valid anymore.') - LOG.warning('- %s tables were ignored as they already exist on the database', - self.tables_ignored) - LOG.warning('- %s tables were created', self.tables_created) - LOG.warning('- %s tables were deleted from the database', self.tables_deleted) + LOG.info(' They were deleted as they are not valid anymore.') + LOG.info('- %s tables were ignored as they already exist on the database', + self.tables_ignored) + LOG.info('- %s tables were created', self.tables_created) + LOG.info('- %s tables were deleted from the database', self.tables_deleted) if self.tables_deleted > 0: - LOG.warning(' They were deleted as they are not valid anymore.') + LOG.info(' They were deleted as they are not valid anymore.') + + if self.global_phrases_invalid > 0: + LOG.warning('%s phrases were invalid and have been skipped during the whole process.', + self.global_phrases_invalid) self._set_global_values_to_0() @@ -117,14 +121,18 @@ class SpecialPhrasesImporterStatistics(): Print stats for the current lang and then reset lang values. """ - LOG.warning('====================================================================') - LOG.warning('Statistics for the import of %s:', lang) - LOG.warning('- %s phrases were invalid.', self.lang_phrases_invalid) + LOG.info('====================================================================') + LOG.info('Statistics for the import of %s:', lang) + LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid) if self.lang_phrases_invalid > 0: - LOG.warning(' Those invalid phrases have been skipped.') - LOG.warning('- %s phrases were ignored as they are already in the database', - self.lang_phrases_ignored) - LOG.warning('- %s phrases were added to the database', self.lang_phrases_added) - LOG.warning('====================================================================') + LOG.info(' Those invalid phrases have been skipped.') + LOG.info('- %s phrases were ignored as they are already in the database', + self.lang_phrases_ignored) + LOG.info('- %s phrases were added to the database', self.lang_phrases_added) + LOG.info('====================================================================') + + if self.lang_phrases_invalid > 0: + LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.', + self.lang_phrases_invalid, lang) self._set_lang_values_to_0()