add type annotations to special phrase importer

This commit is contained in:
Sarah Hoffmann
2022-07-17 10:46:59 +02:00
parent 459ab3bbdc
commit 9963261d8d
9 changed files with 77 additions and 60 deletions

View File

@@ -12,15 +12,14 @@ import logging
LOG = logging.getLogger()
class SpecialPhrasesImporterStatistics():
# pylint: disable-msg=too-many-instance-attributes
"""
Class handling statistics of the import
process of special phrases.
"""
def __init__(self):
def __init__(self) -> None:
self._intialize_values()
def _intialize_values(self):
def _intialize_values(self) -> None:
"""
Set all counts for the global
import to 0.
@@ -30,32 +29,32 @@ class SpecialPhrasesImporterStatistics():
self.tables_ignored = 0
self.invalids = 0
def notify_one_phrase_invalid(self):
def notify_one_phrase_invalid(self) -> None:
"""
Add +1 to the count of invalid entries
fetched from the wiki.
"""
self.invalids += 1
def notify_one_table_created(self):
def notify_one_table_created(self) -> None:
"""
Add +1 to the count of created tables.
"""
self.tables_created += 1
def notify_one_table_deleted(self):
def notify_one_table_deleted(self) -> None:
"""
Add +1 to the count of deleted tables.
"""
self.tables_deleted += 1
def notify_one_table_ignored(self):
def notify_one_table_ignored(self) -> None:
"""
Add +1 to the count of ignored tables.
"""
self.tables_ignored += 1
def notify_import_done(self):
def notify_import_done(self) -> None:
"""
Print stats for the whole import process
and reset all values.

View File

@@ -9,6 +9,7 @@
The class allows to load phrases from a csv file.
"""
from typing import Iterable
import csv
import os
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
@@ -18,12 +19,11 @@ class SPCsvLoader:
"""
Handles loading of special phrases from external csv file.
"""
def __init__(self, csv_path):
super().__init__()
def __init__(self, csv_path: str) -> None:
self.csv_path = csv_path
def generate_phrases(self):
def generate_phrases(self) -> Iterable[SpecialPhrase]:
""" Open and parse the given csv file.
Create the corresponding SpecialPhrases.
"""
@@ -35,7 +35,7 @@ class SPCsvLoader:
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
def _check_csv_validity(self):
def _check_csv_validity(self) -> None:
"""
Check that the csv file has the right extension.
"""

View File

@@ -13,19 +13,37 @@
The phrases already present in the database which are not
valids anymore are removed.
"""
from typing import Iterable, Tuple, Mapping, Sequence, Optional, Set
import logging
import re
from typing_extensions import Protocol
from psycopg2.sql import Identifier, SQL
from nominatim.config import Configuration
from nominatim.db.connection import Connection
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tokenizer.base import AbstractTokenizer
LOG = logging.getLogger()
def _classtype_table(phrase_class, phrase_type):
def _classtype_table(phrase_class: str, phrase_type: str) -> str:
""" Return the name of the table for the given class and type.
"""
return f'place_classtype_{phrase_class}_{phrase_type}'
class SpecialPhraseLoader(Protocol):
""" Protocol for classes implementing a loader for special phrases.
"""
def generate_phrases(self) -> Iterable[SpecialPhrase]:
""" Generates all special phrase terms this loader can produce.
"""
class SPImporter():
# pylint: disable-msg=too-many-instance-attributes
"""
@@ -33,21 +51,22 @@ class SPImporter():
Take a sp loader which load the phrases from an external source.
"""
def __init__(self, config, db_connection, sp_loader):
def __init__(self, config: Configuration, conn: Connection,
sp_loader: SpecialPhraseLoader) -> None:
self.config = config
self.db_connection = db_connection
self.db_connection = conn
self.sp_loader = sp_loader
self.statistics_handler = SpecialPhrasesImporterStatistics()
self.black_list, self.white_list = self._load_white_and_black_lists()
self.sanity_check_pattern = re.compile(r'^\w+$')
# This set will contain all existing phrases to be added.
# It contains tuples with the following format: (lable, class, type, operator)
self.word_phrases = set()
self.word_phrases: Set[Tuple[str, str, str, str]] = set()
# This set will contain all existing place_classtype tables which doesn't match any
# special phrases class/type on the wiki.
self.table_phrases_to_delete = set()
self.table_phrases_to_delete: Set[str] = set()
def import_phrases(self, tokenizer, should_replace):
def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
"""
Iterate through all SpecialPhrases extracted from the
loader and import them into the database.
@@ -67,7 +86,7 @@ class SPImporter():
if result:
class_type_pairs.add(result)
self._create_place_classtype_table_and_indexes(class_type_pairs)
self._create_classtype_table_and_indexes(class_type_pairs)
if should_replace:
self._remove_non_existent_tables_from_db()
self.db_connection.commit()
@@ -79,7 +98,7 @@ class SPImporter():
self.statistics_handler.notify_import_done()
def _fetch_existing_place_classtype_tables(self):
def _fetch_existing_place_classtype_tables(self) -> None:
"""
Fetch existing place_classtype tables.
Fill the table_phrases_to_delete set of the class.
@@ -95,7 +114,8 @@ class SPImporter():
for row in db_cursor:
self.table_phrases_to_delete.add(row[0])
def _load_white_and_black_lists(self):
def _load_white_and_black_lists(self) \
-> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
"""
Load white and black lists from phrases-settings.json.
"""
@@ -103,7 +123,7 @@ class SPImporter():
return settings['blackList'], settings['whiteList']
def _check_sanity(self, phrase):
def _check_sanity(self, phrase: SpecialPhrase) -> bool:
"""
Check sanity of given inputs in case somebody added garbage in the wiki.
If a bad class/type is detected the system will exit with an error.
@@ -117,7 +137,7 @@ class SPImporter():
return False
return True
def _process_phrase(self, phrase):
def _process_phrase(self, phrase: SpecialPhrase) -> Optional[Tuple[str, str]]:
"""
Processes the given phrase by checking black and white list
and sanity.
@@ -145,7 +165,8 @@ class SPImporter():
return (phrase.p_class, phrase.p_type)
def _create_place_classtype_table_and_indexes(self, class_type_pairs):
def _create_classtype_table_and_indexes(self,
class_type_pairs: Iterable[Tuple[str, str]]) -> None:
"""
Create table place_classtype for each given pair.
Also create indexes on place_id and centroid.
@@ -188,7 +209,8 @@ class SPImporter():
db_cursor.execute("DROP INDEX idx_placex_classtype")
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
def _create_place_classtype_table(self, sql_tablespace: str,
phrase_class: str, phrase_type: str) -> None:
"""
Create table place_classtype of the given phrase_class/phrase_type
if doesn't exit.
@@ -204,7 +226,8 @@ class SPImporter():
(phrase_class, phrase_type))
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
def _create_place_classtype_indexes(self, sql_tablespace: str,
phrase_class: str, phrase_type: str) -> None:
"""
Create indexes on centroid and place_id for the place_classtype table.
"""
@@ -227,7 +250,7 @@ class SPImporter():
SQL(sql_tablespace)))
def _grant_access_to_webuser(self, phrase_class, phrase_type):
def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
"""
Grant access on read to the table place_classtype for the webuser.
"""
@@ -237,7 +260,7 @@ class SPImporter():
.format(Identifier(table_name),
Identifier(self.config.DATABASE_WEBUSER)))
def _remove_non_existent_tables_from_db(self):
def _remove_non_existent_tables_from_db(self) -> None:
"""
Remove special phrases which doesn't exist on the wiki anymore.
Delete the place_classtype tables.

View File

@@ -7,14 +7,17 @@
"""
Module containing the SPWikiLoader class.
"""
from typing import Iterable
import re
import logging
from nominatim.config import Configuration
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
def _get_wiki_content(lang):
def _get_wiki_content(lang: str) -> str:
"""
Request and return the wiki page's content
corresponding to special phrases for a given lang.
@@ -30,8 +33,7 @@ class SPWikiLoader:
"""
Handles loading of special phrases from the wiki.
"""
def __init__(self, config):
super().__init__()
def __init__(self, config: Configuration) -> None:
self.config = config
# Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
@@ -39,10 +41,15 @@ class SPWikiLoader:
)
# Hack around a bug where building=yes was imported with quotes into the wiki
self.type_fix_pattern = re.compile(r'\"|"')
self._load_languages()
self.languages = self.config.get_str_list('LANGUAGES') or \
['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
def generate_phrases(self):
def generate_phrases(self) -> Iterable[SpecialPhrase]:
""" Download the wiki pages for the configured languages
and extract the phrases from the page.
"""
@@ -58,19 +65,3 @@ class SPWikiLoader:
match[1],
self.type_fix_pattern.sub('', match[2]),
match[3])
def _load_languages(self):
"""
Get list of all languages from env config file
or default if there is no languages configured.
The system will extract special phrases only from all specified languages.
"""
if self.config.LANGUAGES:
self.languages = self.config.get_str_list('LANGUAGES')
else:
self.languages = [
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']

View File

@@ -10,20 +10,21 @@
This class is a model used to transfer a special phrase through
the process of load and importation.
"""
from typing import Any
class SpecialPhrase:
"""
Model representing a special phrase.
"""
def __init__(self, p_label, p_class, p_type, p_operator):
def __init__(self, p_label: str, p_class: str, p_type: str, p_operator: str) -> None:
self.p_label = p_label.strip()
self.p_class = p_class.strip()
# Hack around a bug where building=yes was imported with quotes into the wiki
self.p_type = p_type.strip()
# Needed if some operator in the wiki are not written in english
p_operator = p_operator.strip().lower()
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
def __eq__(self, other):
def __eq__(self, other: Any) -> bool:
if not isinstance(other, SpecialPhrase):
return False
@@ -32,5 +33,5 @@ class SpecialPhrase:
and self.p_type == other.p_type \
and self.p_operator == other.p_operator
def __hash__(self):
def __hash__(self) -> int:
return hash((self.p_label, self.p_class, self.p_type, self.p_operator))