mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-12 05:44:06 +00:00
add type annotations to special phrase importer
This commit is contained in:
@@ -12,15 +12,14 @@ import logging
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class SpecialPhrasesImporterStatistics():
|
||||
# pylint: disable-msg=too-many-instance-attributes
|
||||
"""
|
||||
Class handling statistics of the import
|
||||
process of special phrases.
|
||||
"""
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self._intialize_values()
|
||||
|
||||
def _intialize_values(self):
|
||||
def _intialize_values(self) -> None:
|
||||
"""
|
||||
Set all counts for the global
|
||||
import to 0.
|
||||
@@ -30,32 +29,32 @@ class SpecialPhrasesImporterStatistics():
|
||||
self.tables_ignored = 0
|
||||
self.invalids = 0
|
||||
|
||||
def notify_one_phrase_invalid(self):
|
||||
def notify_one_phrase_invalid(self) -> None:
|
||||
"""
|
||||
Add +1 to the count of invalid entries
|
||||
fetched from the wiki.
|
||||
"""
|
||||
self.invalids += 1
|
||||
|
||||
def notify_one_table_created(self):
|
||||
def notify_one_table_created(self) -> None:
|
||||
"""
|
||||
Add +1 to the count of created tables.
|
||||
"""
|
||||
self.tables_created += 1
|
||||
|
||||
def notify_one_table_deleted(self):
|
||||
def notify_one_table_deleted(self) -> None:
|
||||
"""
|
||||
Add +1 to the count of deleted tables.
|
||||
"""
|
||||
self.tables_deleted += 1
|
||||
|
||||
def notify_one_table_ignored(self):
|
||||
def notify_one_table_ignored(self) -> None:
|
||||
"""
|
||||
Add +1 to the count of ignored tables.
|
||||
"""
|
||||
self.tables_ignored += 1
|
||||
|
||||
def notify_import_done(self):
|
||||
def notify_import_done(self) -> None:
|
||||
"""
|
||||
Print stats for the whole import process
|
||||
and reset all values.
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
The class allows to load phrases from a csv file.
|
||||
"""
|
||||
from typing import Iterable
|
||||
import csv
|
||||
import os
|
||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||
@@ -18,12 +19,11 @@ class SPCsvLoader:
|
||||
"""
|
||||
Handles loading of special phrases from external csv file.
|
||||
"""
|
||||
def __init__(self, csv_path):
|
||||
super().__init__()
|
||||
def __init__(self, csv_path: str) -> None:
|
||||
self.csv_path = csv_path
|
||||
|
||||
|
||||
def generate_phrases(self):
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Open and parse the given csv file.
|
||||
Create the corresponding SpecialPhrases.
|
||||
"""
|
||||
@@ -35,7 +35,7 @@ class SPCsvLoader:
|
||||
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||
|
||||
|
||||
def _check_csv_validity(self):
|
||||
def _check_csv_validity(self) -> None:
|
||||
"""
|
||||
Check that the csv file has the right extension.
|
||||
"""
|
||||
|
||||
@@ -13,19 +13,37 @@
|
||||
The phrases already present in the database which are not
|
||||
valids anymore are removed.
|
||||
"""
|
||||
from typing import Iterable, Tuple, Mapping, Sequence, Optional, Set
|
||||
import logging
|
||||
import re
|
||||
|
||||
from typing_extensions import Protocol
|
||||
|
||||
from psycopg2.sql import Identifier, SQL
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import Connection
|
||||
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||
from nominatim.tokenizer.base import AbstractTokenizer
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _classtype_table(phrase_class, phrase_type):
|
||||
def _classtype_table(phrase_class: str, phrase_type: str) -> str:
|
||||
""" Return the name of the table for the given class and type.
|
||||
"""
|
||||
return f'place_classtype_{phrase_class}_{phrase_type}'
|
||||
|
||||
|
||||
class SpecialPhraseLoader(Protocol):
|
||||
""" Protocol for classes implementing a loader for special phrases.
|
||||
"""
|
||||
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Generates all special phrase terms this loader can produce.
|
||||
"""
|
||||
|
||||
|
||||
class SPImporter():
|
||||
# pylint: disable-msg=too-many-instance-attributes
|
||||
"""
|
||||
@@ -33,21 +51,22 @@ class SPImporter():
|
||||
|
||||
Take a sp loader which load the phrases from an external source.
|
||||
"""
|
||||
def __init__(self, config, db_connection, sp_loader):
|
||||
def __init__(self, config: Configuration, conn: Connection,
|
||||
sp_loader: SpecialPhraseLoader) -> None:
|
||||
self.config = config
|
||||
self.db_connection = db_connection
|
||||
self.db_connection = conn
|
||||
self.sp_loader = sp_loader
|
||||
self.statistics_handler = SpecialPhrasesImporterStatistics()
|
||||
self.black_list, self.white_list = self._load_white_and_black_lists()
|
||||
self.sanity_check_pattern = re.compile(r'^\w+$')
|
||||
# This set will contain all existing phrases to be added.
|
||||
# It contains tuples with the following format: (lable, class, type, operator)
|
||||
self.word_phrases = set()
|
||||
self.word_phrases: Set[Tuple[str, str, str, str]] = set()
|
||||
# This set will contain all existing place_classtype tables which doesn't match any
|
||||
# special phrases class/type on the wiki.
|
||||
self.table_phrases_to_delete = set()
|
||||
self.table_phrases_to_delete: Set[str] = set()
|
||||
|
||||
def import_phrases(self, tokenizer, should_replace):
|
||||
def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
|
||||
"""
|
||||
Iterate through all SpecialPhrases extracted from the
|
||||
loader and import them into the database.
|
||||
@@ -67,7 +86,7 @@ class SPImporter():
|
||||
if result:
|
||||
class_type_pairs.add(result)
|
||||
|
||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||
self._create_classtype_table_and_indexes(class_type_pairs)
|
||||
if should_replace:
|
||||
self._remove_non_existent_tables_from_db()
|
||||
self.db_connection.commit()
|
||||
@@ -79,7 +98,7 @@ class SPImporter():
|
||||
self.statistics_handler.notify_import_done()
|
||||
|
||||
|
||||
def _fetch_existing_place_classtype_tables(self):
|
||||
def _fetch_existing_place_classtype_tables(self) -> None:
|
||||
"""
|
||||
Fetch existing place_classtype tables.
|
||||
Fill the table_phrases_to_delete set of the class.
|
||||
@@ -95,7 +114,8 @@ class SPImporter():
|
||||
for row in db_cursor:
|
||||
self.table_phrases_to_delete.add(row[0])
|
||||
|
||||
def _load_white_and_black_lists(self):
|
||||
def _load_white_and_black_lists(self) \
|
||||
-> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
|
||||
"""
|
||||
Load white and black lists from phrases-settings.json.
|
||||
"""
|
||||
@@ -103,7 +123,7 @@ class SPImporter():
|
||||
|
||||
return settings['blackList'], settings['whiteList']
|
||||
|
||||
def _check_sanity(self, phrase):
|
||||
def _check_sanity(self, phrase: SpecialPhrase) -> bool:
|
||||
"""
|
||||
Check sanity of given inputs in case somebody added garbage in the wiki.
|
||||
If a bad class/type is detected the system will exit with an error.
|
||||
@@ -117,7 +137,7 @@ class SPImporter():
|
||||
return False
|
||||
return True
|
||||
|
||||
def _process_phrase(self, phrase):
|
||||
def _process_phrase(self, phrase: SpecialPhrase) -> Optional[Tuple[str, str]]:
|
||||
"""
|
||||
Processes the given phrase by checking black and white list
|
||||
and sanity.
|
||||
@@ -145,7 +165,8 @@ class SPImporter():
|
||||
return (phrase.p_class, phrase.p_type)
|
||||
|
||||
|
||||
def _create_place_classtype_table_and_indexes(self, class_type_pairs):
|
||||
def _create_classtype_table_and_indexes(self,
|
||||
class_type_pairs: Iterable[Tuple[str, str]]) -> None:
|
||||
"""
|
||||
Create table place_classtype for each given pair.
|
||||
Also create indexes on place_id and centroid.
|
||||
@@ -188,7 +209,8 @@ class SPImporter():
|
||||
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
||||
|
||||
|
||||
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
|
||||
def _create_place_classtype_table(self, sql_tablespace: str,
|
||||
phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
Create table place_classtype of the given phrase_class/phrase_type
|
||||
if doesn't exit.
|
||||
@@ -204,7 +226,8 @@ class SPImporter():
|
||||
(phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
|
||||
def _create_place_classtype_indexes(self, sql_tablespace: str,
|
||||
phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
Create indexes on centroid and place_id for the place_classtype table.
|
||||
"""
|
||||
@@ -227,7 +250,7 @@ class SPImporter():
|
||||
SQL(sql_tablespace)))
|
||||
|
||||
|
||||
def _grant_access_to_webuser(self, phrase_class, phrase_type):
|
||||
def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
Grant access on read to the table place_classtype for the webuser.
|
||||
"""
|
||||
@@ -237,7 +260,7 @@ class SPImporter():
|
||||
.format(Identifier(table_name),
|
||||
Identifier(self.config.DATABASE_WEBUSER)))
|
||||
|
||||
def _remove_non_existent_tables_from_db(self):
|
||||
def _remove_non_existent_tables_from_db(self) -> None:
|
||||
"""
|
||||
Remove special phrases which doesn't exist on the wiki anymore.
|
||||
Delete the place_classtype tables.
|
||||
|
||||
@@ -7,14 +7,17 @@
|
||||
"""
|
||||
Module containing the SPWikiLoader class.
|
||||
"""
|
||||
from typing import Iterable
|
||||
import re
|
||||
import logging
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||
from nominatim.tools.exec_utils import get_url
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _get_wiki_content(lang):
|
||||
def _get_wiki_content(lang: str) -> str:
|
||||
"""
|
||||
Request and return the wiki page's content
|
||||
corresponding to special phrases for a given lang.
|
||||
@@ -30,8 +33,7 @@ class SPWikiLoader:
|
||||
"""
|
||||
Handles loading of special phrases from the wiki.
|
||||
"""
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
def __init__(self, config: Configuration) -> None:
|
||||
self.config = config
|
||||
# Compile the regex here to increase performances.
|
||||
self.occurence_pattern = re.compile(
|
||||
@@ -39,10 +41,15 @@ class SPWikiLoader:
|
||||
)
|
||||
# Hack around a bug where building=yes was imported with quotes into the wiki
|
||||
self.type_fix_pattern = re.compile(r'\"|"')
|
||||
self._load_languages()
|
||||
|
||||
self.languages = self.config.get_str_list('LANGUAGES') or \
|
||||
['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
||||
|
||||
|
||||
def generate_phrases(self):
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Download the wiki pages for the configured languages
|
||||
and extract the phrases from the page.
|
||||
"""
|
||||
@@ -58,19 +65,3 @@ class SPWikiLoader:
|
||||
match[1],
|
||||
self.type_fix_pattern.sub('', match[2]),
|
||||
match[3])
|
||||
|
||||
|
||||
def _load_languages(self):
|
||||
"""
|
||||
Get list of all languages from env config file
|
||||
or default if there is no languages configured.
|
||||
The system will extract special phrases only from all specified languages.
|
||||
"""
|
||||
if self.config.LANGUAGES:
|
||||
self.languages = self.config.get_str_list('LANGUAGES')
|
||||
else:
|
||||
self.languages = [
|
||||
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
||||
|
||||
@@ -10,20 +10,21 @@
|
||||
This class is a model used to transfer a special phrase through
|
||||
the process of load and importation.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
class SpecialPhrase:
|
||||
"""
|
||||
Model representing a special phrase.
|
||||
"""
|
||||
def __init__(self, p_label, p_class, p_type, p_operator):
|
||||
def __init__(self, p_label: str, p_class: str, p_type: str, p_operator: str) -> None:
|
||||
self.p_label = p_label.strip()
|
||||
self.p_class = p_class.strip()
|
||||
# Hack around a bug where building=yes was imported with quotes into the wiki
|
||||
self.p_type = p_type.strip()
|
||||
# Needed if some operator in the wiki are not written in english
|
||||
p_operator = p_operator.strip().lower()
|
||||
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
|
||||
|
||||
def __eq__(self, other):
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
if not isinstance(other, SpecialPhrase):
|
||||
return False
|
||||
|
||||
@@ -32,5 +33,5 @@ class SpecialPhrase:
|
||||
and self.p_type == other.p_type \
|
||||
and self.p_operator == other.p_operator
|
||||
|
||||
def __hash__(self):
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
|
||||
|
||||
Reference in New Issue
Block a user