mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
fix style issue found by flake8
This commit is contained in:
@@ -18,6 +18,7 @@ from .exec_utils import run_osm2pgsql
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _run_osm2pgsql(dsn: str, options: MutableMapping[str, Any]) -> None:
|
||||
run_osm2pgsql(options)
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from ..data.place_info import PlaceInfo
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _get_place_info(cursor: Cursor, osm_id: Optional[str],
|
||||
place_id: Optional[int]) -> DictCursorResult:
|
||||
sql = """SELECT place_id, extra.*
|
||||
|
||||
@@ -12,7 +12,7 @@ from enum import Enum
|
||||
from textwrap import dedent
|
||||
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, Connection, server_version_tuple,\
|
||||
from ..db.connection import connect, Connection, server_version_tuple, \
|
||||
index_exists, table_exists, execute_scalar
|
||||
from ..db import properties
|
||||
from ..errors import UsageError
|
||||
@@ -22,6 +22,7 @@ from ..version import NOMINATIM_VERSION, parse_version
|
||||
|
||||
CHECKLIST = []
|
||||
|
||||
|
||||
class CheckState(Enum):
|
||||
""" Possible states of a check. FATAL stops check execution entirely.
|
||||
"""
|
||||
@@ -31,9 +32,11 @@ class CheckState(Enum):
|
||||
NOT_APPLICABLE = 3
|
||||
WARN = 4
|
||||
|
||||
|
||||
CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
|
||||
CheckFunc = Callable[[Connection, Configuration], CheckResult]
|
||||
|
||||
|
||||
def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
|
||||
""" Decorator for checks. It adds the function to the list of
|
||||
checks to execute and adds the code for printing progress messages.
|
||||
@@ -68,6 +71,7 @@ def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class _BadConnection:
|
||||
|
||||
def __init__(self, msg: str) -> None:
|
||||
@@ -77,13 +81,14 @@ class _BadConnection:
|
||||
""" Dummy function to provide the implementation.
|
||||
"""
|
||||
|
||||
|
||||
def check_database(config: Configuration) -> int:
|
||||
""" Run a number of checks on the database and return the status.
|
||||
"""
|
||||
try:
|
||||
conn = connect(config.get_libpq_dsn())
|
||||
except UsageError as err:
|
||||
conn = _BadConnection(str(err)) # type: ignore[assignment]
|
||||
conn = _BadConnection(str(err)) # type: ignore[assignment]
|
||||
|
||||
overall_result = 0
|
||||
for check in CHECKLIST:
|
||||
@@ -110,7 +115,7 @@ def _get_indexes(conn: Connection) -> List[str]:
|
||||
'idx_osmline_parent_osm_id',
|
||||
'idx_postcode_id',
|
||||
'idx_postcode_postcode'
|
||||
]
|
||||
]
|
||||
|
||||
# These won't exist if --reverse-only import was used
|
||||
if table_exists(conn, 'search_name'):
|
||||
@@ -154,6 +159,7 @@ def check_connection(conn: Any, config: Configuration) -> CheckResult:
|
||||
|
||||
return CheckState.OK
|
||||
|
||||
|
||||
@_check(hint="""\
|
||||
Database version ({db_version}) doesn't match Nominatim version ({nom_version})
|
||||
|
||||
@@ -195,6 +201,7 @@ def check_database_version(conn: Connection, config: Configuration) -> CheckResu
|
||||
instruction=instruction,
|
||||
config=config)
|
||||
|
||||
|
||||
@_check(hint="""\
|
||||
placex table not found
|
||||
|
||||
@@ -274,7 +281,7 @@ def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
|
||||
return CheckState.OK
|
||||
|
||||
if freeze.is_frozen(conn):
|
||||
index_cmd="""\
|
||||
index_cmd = """\
|
||||
Database is marked frozen, it cannot be updated.
|
||||
Low counts of unindexed places are fine."""
|
||||
return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
|
||||
|
||||
@@ -132,8 +132,8 @@ def report_system_information(config: Configuration) -> None:
|
||||
- PostgreSQL version: {postgresql_ver}
|
||||
- PostGIS version: {postgis_ver}
|
||||
- OS: {os_name_info()}
|
||||
|
||||
|
||||
|
||||
|
||||
**Hardware Configuration:**
|
||||
- RAM: {friendly_memory_string(psutil.virtual_memory().total)}
|
||||
- number of CPUs: {psutil.cpu_count(logical=False)}
|
||||
@@ -144,13 +144,13 @@ def report_system_information(config: Configuration) -> None:
|
||||
```
|
||||
{run_command(["df", "-h"])}
|
||||
```
|
||||
|
||||
|
||||
**lsblk - list block devices: **
|
||||
```
|
||||
{run_command("lsblk")}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
**Postgresql Configuration:**
|
||||
```
|
||||
{postgresql_config}
|
||||
|
||||
@@ -21,6 +21,7 @@ from nominatim_api.sql.sqlalchemy_types import Geometry, IntArray
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
async def convert(project_dir: Optional[Union[str, Path]],
|
||||
outfile: Path, options: Set[str]) -> None:
|
||||
""" Export an existing database to sqlite. The resulting database
|
||||
@@ -53,7 +54,6 @@ class SqliteWriter:
|
||||
self.dest = dest
|
||||
self.options = options
|
||||
|
||||
|
||||
async def write(self) -> None:
|
||||
""" Create the database structure and copy the data from
|
||||
the source database to the destination.
|
||||
@@ -67,7 +67,6 @@ class SqliteWriter:
|
||||
await self.create_word_table()
|
||||
await self.create_indexes()
|
||||
|
||||
|
||||
async def create_tables(self) -> None:
|
||||
""" Set up the database tables.
|
||||
"""
|
||||
@@ -87,7 +86,6 @@ class SqliteWriter:
|
||||
sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
|
||||
col.type.subtype.upper(), 'XY')))
|
||||
|
||||
|
||||
async def create_class_tables(self) -> None:
|
||||
""" Set up the table that serve class/type-specific geometries.
|
||||
"""
|
||||
@@ -99,7 +97,6 @@ class SqliteWriter:
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
|
||||
async def create_word_table(self) -> None:
|
||||
""" Create the word table.
|
||||
This table needs the property information to determine the
|
||||
@@ -122,7 +119,6 @@ class SqliteWriter:
|
||||
|
||||
await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
|
||||
|
||||
|
||||
async def copy_data(self) -> None:
|
||||
""" Copy data for all registered tables.
|
||||
"""
|
||||
@@ -151,7 +147,6 @@ class SqliteWriter:
|
||||
data = [{'tablename': t} for t in self.dest.t.meta.tables]
|
||||
await self.dest.execute(pg_tables.insert().values(data))
|
||||
|
||||
|
||||
async def create_indexes(self) -> None:
|
||||
""" Add indexes necessary for the frontend.
|
||||
"""
|
||||
@@ -197,14 +192,12 @@ class SqliteWriter:
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(t, 'centroid')))
|
||||
|
||||
|
||||
async def create_spatial_index(self, table: str, column: str) -> None:
|
||||
""" Create a spatial index on the given table and column.
|
||||
"""
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
|
||||
|
||||
|
||||
async def create_index(self, table_name: str, column: str) -> None:
|
||||
""" Create a simple index on the given table and column.
|
||||
"""
|
||||
@@ -212,7 +205,6 @@ class SqliteWriter:
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
|
||||
|
||||
|
||||
async def create_search_index(self) -> None:
|
||||
""" Create the tables and indexes needed for word lookup.
|
||||
"""
|
||||
@@ -242,7 +234,6 @@ class SqliteWriter:
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
|
||||
|
||||
|
||||
def select_from(self, table: str) -> SaSelect:
|
||||
""" Create the SQL statement to select the source columns and rows.
|
||||
"""
|
||||
@@ -258,9 +249,9 @@ class SqliteWriter:
|
||||
columns.geometry),
|
||||
else_=sa.func.ST_SimplifyPreserveTopology(
|
||||
columns.geometry, 0.0001)
|
||||
)).label('geometry'))
|
||||
)).label('geometry'))
|
||||
|
||||
sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
|
||||
if isinstance(c.type, Geometry) else c for c in columns))
|
||||
if isinstance(c.type, Geometry) else c for c in columns))
|
||||
|
||||
return sql
|
||||
|
||||
@@ -20,7 +20,7 @@ from psycopg import sql as pysql
|
||||
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, get_pg_env, Connection, server_version_tuple,\
|
||||
from ..db.connection import connect, get_pg_env, Connection, server_version_tuple, \
|
||||
postgis_version_tuple, drop_tables, table_exists, execute_scalar
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..db.query_pool import QueryPool
|
||||
@@ -29,6 +29,7 @@ from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int, int]) -> None:
|
||||
""" Compares the version for the given module and raises an exception
|
||||
if the actual version is too old.
|
||||
@@ -251,7 +252,7 @@ async def _progress_print() -> None:
|
||||
|
||||
|
||||
async def create_search_indices(conn: Connection, config: Configuration,
|
||||
drop: bool = False, threads: int = 1) -> None:
|
||||
drop: bool = False, threads: int = 1) -> None:
|
||||
""" Create tables that have explicit partitioning.
|
||||
"""
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
'--number-processes', '1' if options['append'] else str(options['threads']),
|
||||
'--cache', str(options['osm2pgsql_cache']),
|
||||
'--style', str(options['osm2pgsql_style'])
|
||||
]
|
||||
]
|
||||
|
||||
if str(options['osm2pgsql_style']).endswith('.lua'):
|
||||
env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
|
||||
@@ -50,7 +50,6 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
cmd.extend(('--output', 'gazetteer', '--hstore', '--latlon'))
|
||||
cmd.extend(_mk_tablespace_options('main', options))
|
||||
|
||||
|
||||
if options['flatnode_file']:
|
||||
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ UPDATE_TABLES = [
|
||||
'wikipedia_%'
|
||||
]
|
||||
|
||||
|
||||
def drop_update_tables(conn: Connection) -> None:
|
||||
""" Drop all tables only necessary for updating the database from
|
||||
OSM replication data.
|
||||
@@ -49,8 +50,8 @@ def drop_flatnode_file(fpath: Optional[Path]) -> None:
|
||||
if fpath and fpath.exists():
|
||||
fpath.unlink()
|
||||
|
||||
|
||||
def is_frozen(conn: Connection) -> bool:
|
||||
""" Returns true if database is in a frozen state
|
||||
"""
|
||||
|
||||
return table_exists(conn, 'place') is False
|
||||
|
||||
@@ -13,7 +13,7 @@ import logging
|
||||
from ..errors import UsageError
|
||||
from ..config import Configuration
|
||||
from ..db import properties
|
||||
from ..db.connection import connect, Connection,\
|
||||
from ..db.connection import connect, Connection, \
|
||||
table_exists, register_hstore
|
||||
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
@@ -21,7 +21,8 @@ from . import refresh
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
_MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = []
|
||||
_MIGRATION_FUNCTIONS: List[Tuple[NominatimVersion, Callable[..., None]]] = []
|
||||
|
||||
|
||||
def migrate(config: Configuration, paths: Any) -> int:
|
||||
""" Check for the current database version and execute migrations,
|
||||
|
||||
@@ -25,6 +25,7 @@ from ..tokenizer.base import AbstractAnalyzer, AbstractTokenizer
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _to_float(numstr: str, max_value: float) -> float:
|
||||
""" Convert the number in string into a float. The number is expected
|
||||
to be in the range of [-max_value, max_value]. Otherwise rises a
|
||||
@@ -36,6 +37,7 @@ def _to_float(numstr: str, max_value: float) -> float:
|
||||
|
||||
return num
|
||||
|
||||
|
||||
class _PostcodeCollector:
|
||||
""" Collector for postcodes of a single country.
|
||||
"""
|
||||
@@ -46,7 +48,6 @@ class _PostcodeCollector:
|
||||
self.collected: Dict[str, PointsCentroid] = defaultdict(PointsCentroid)
|
||||
self.normalization_cache: Optional[Tuple[str, Optional[str]]] = None
|
||||
|
||||
|
||||
def add(self, postcode: str, x: float, y: float) -> None:
|
||||
""" Add the given postcode to the collection cache. If the postcode
|
||||
already existed, it is overwritten with the new centroid.
|
||||
@@ -63,7 +64,6 @@ class _PostcodeCollector:
|
||||
if normalized:
|
||||
self.collected[normalized] += (x, y)
|
||||
|
||||
|
||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Update postcodes for the country from the postcodes selected so far
|
||||
as well as any externally supplied postcodes.
|
||||
@@ -97,9 +97,9 @@ class _PostcodeCollector:
|
||||
""").format(pysql.Literal(self.country)),
|
||||
to_update)
|
||||
|
||||
|
||||
def _compute_changes(self, conn: Connection) \
|
||||
-> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[float, float, str]]]:
|
||||
def _compute_changes(
|
||||
self, conn: Connection
|
||||
) -> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[float, float, str]]]:
|
||||
""" Compute which postcodes from the collected postcodes have to be
|
||||
added or modified and which from the location_postcode table
|
||||
have to be deleted.
|
||||
@@ -125,7 +125,6 @@ class _PostcodeCollector:
|
||||
|
||||
return to_add, to_delete, to_update
|
||||
|
||||
|
||||
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Look for an external postcode file for the active country in
|
||||
the project directory and add missing postcodes when found.
|
||||
@@ -155,7 +154,6 @@ class _PostcodeCollector:
|
||||
finally:
|
||||
csvfile.close()
|
||||
|
||||
|
||||
def _open_external(self, project_dir: Path) -> Optional[TextIO]:
|
||||
fname = project_dir / f'{self.country}_postcodes.csv'
|
||||
|
||||
@@ -225,6 +223,7 @@ def update_postcodes(dsn: str, project_dir: Path, tokenizer: AbstractTokenizer)
|
||||
|
||||
analyzer.update_postcodes_from_db()
|
||||
|
||||
|
||||
def can_compute(dsn: str) -> bool:
|
||||
"""
|
||||
Check that the place table exists so that
|
||||
|
||||
@@ -16,7 +16,7 @@ from pathlib import Path
|
||||
from psycopg import sql as pysql
|
||||
|
||||
from ..config import Configuration
|
||||
from ..db.connection import Connection, connect, postgis_version_tuple,\
|
||||
from ..db.connection import Connection, connect, postgis_version_tuple, \
|
||||
drop_tables
|
||||
from ..db.utils import execute_file
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
@@ -25,6 +25,7 @@ LOG = logging.getLogger()
|
||||
|
||||
OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
|
||||
|
||||
|
||||
def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
|
||||
entry: Mapping[str, Any]) -> None:
|
||||
""" Converts a single entry from the JSON format for address rank
|
||||
@@ -51,7 +52,7 @@ def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[s
|
||||
The table has the following columns:
|
||||
country, class, type, rank_search, rank_address
|
||||
"""
|
||||
rows: List[Tuple[Any, ...]] = []
|
||||
rows: List[Tuple[Any, ...]] = []
|
||||
for entry in levels:
|
||||
_add_address_level_rows_from_entry(rows, entry)
|
||||
|
||||
@@ -199,6 +200,7 @@ def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool =
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def recompute_importance(conn: Connection) -> None:
|
||||
""" Recompute wikipedia links and importance for all entries in placex.
|
||||
This is a long-running operations that must not be executed in
|
||||
|
||||
@@ -33,6 +33,7 @@ except ModuleNotFoundError as exc:
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def init_replication(conn: Connection, base_url: str,
|
||||
socket_timeout: int = 60) -> None:
|
||||
""" Set up replication for the server at the given base URL.
|
||||
@@ -83,6 +84,7 @@ def check_for_updates(conn: Connection, base_url: str,
|
||||
LOG.warning("New data available (%i => %i).", seq, state.sequence)
|
||||
return 0
|
||||
|
||||
|
||||
class UpdateState(Enum):
|
||||
""" Possible states after an update has run.
|
||||
"""
|
||||
@@ -176,12 +178,12 @@ def _make_replication_server(url: str, timeout: int) -> ContextManager[Replicati
|
||||
""" Download a resource from the given URL and return a byte sequence
|
||||
of the content.
|
||||
"""
|
||||
headers = {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
|
||||
headers = {"User-Agent": f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
|
||||
|
||||
if self.session is not None:
|
||||
return self.session.get(url.get_full_url(),
|
||||
headers=headers, timeout=timeout or None,
|
||||
stream=True)
|
||||
headers=headers, timeout=timeout or None,
|
||||
stream=True)
|
||||
|
||||
@contextmanager
|
||||
def _get_url_with_session() -> Iterator[requests.Response]:
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
import logging
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class SpecialPhrasesImporterStatistics():
|
||||
"""
|
||||
Class handling statistics of the import
|
||||
|
||||
@@ -16,6 +16,7 @@ import os
|
||||
from ...errors import UsageError
|
||||
from .special_phrase import SpecialPhrase
|
||||
|
||||
|
||||
class SPCsvLoader:
|
||||
"""
|
||||
Handles loading of special phrases from external csv file.
|
||||
@@ -23,7 +24,6 @@ class SPCsvLoader:
|
||||
def __init__(self, csv_path: str) -> None:
|
||||
self.csv_path = csv_path
|
||||
|
||||
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Open and parse the given csv file.
|
||||
Create the corresponding SpecialPhrases.
|
||||
@@ -35,7 +35,6 @@ class SPCsvLoader:
|
||||
for row in reader:
|
||||
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||
|
||||
|
||||
def _check_csv_validity(self) -> None:
|
||||
"""
|
||||
Check that the csv file has the right extension.
|
||||
|
||||
@@ -28,6 +28,7 @@ from ...tokenizer.base import AbstractTokenizer
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _classtype_table(phrase_class: str, phrase_type: str) -> str:
|
||||
""" Return the name of the table for the given class and type.
|
||||
"""
|
||||
@@ -96,7 +97,6 @@ class SPImporter():
|
||||
LOG.warning('Import done.')
|
||||
self.statistics_handler.notify_import_done()
|
||||
|
||||
|
||||
def _fetch_existing_place_classtype_tables(self) -> None:
|
||||
"""
|
||||
Fetch existing place_classtype tables.
|
||||
@@ -114,7 +114,7 @@ class SPImporter():
|
||||
self.table_phrases_to_delete.add(row[0])
|
||||
|
||||
def _load_white_and_black_lists(self) \
|
||||
-> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
|
||||
-> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
|
||||
"""
|
||||
Load white and black lists from phrases-settings.json.
|
||||
"""
|
||||
@@ -163,7 +163,6 @@ class SPImporter():
|
||||
|
||||
return (phrase.p_class, phrase.p_type)
|
||||
|
||||
|
||||
def _create_classtype_table_and_indexes(self,
|
||||
class_type_pairs: Iterable[Tuple[str, str]]) -> None:
|
||||
"""
|
||||
@@ -207,7 +206,6 @@ class SPImporter():
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
||||
|
||||
|
||||
def _create_place_classtype_table(self, sql_tablespace: str,
|
||||
phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
@@ -224,7 +222,6 @@ class SPImporter():
|
||||
""").format(Identifier(table_name), SQL(sql_tablespace)),
|
||||
(phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_indexes(self, sql_tablespace: str,
|
||||
phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
@@ -248,7 +245,6 @@ class SPImporter():
|
||||
Identifier(base_table),
|
||||
SQL(sql_tablespace)))
|
||||
|
||||
|
||||
def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
|
||||
"""
|
||||
Grant access on read to the table place_classtype for the webuser.
|
||||
@@ -259,7 +255,6 @@ class SPImporter():
|
||||
.format(Identifier(table_name),
|
||||
Identifier(self.config.DATABASE_WEBUSER)))
|
||||
|
||||
|
||||
def _remove_non_existent_tables_from_db(self) -> None:
|
||||
"""
|
||||
Remove special phrases which doesn't exist on the wiki anymore.
|
||||
|
||||
@@ -17,6 +17,7 @@ from .special_phrase import SpecialPhrase
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def _get_wiki_content(lang: str) -> str:
|
||||
"""
|
||||
Request and return the wiki page's content
|
||||
@@ -43,12 +44,11 @@ class SPWikiLoader:
|
||||
self.type_fix_pattern = re.compile(r'\"|"')
|
||||
|
||||
self.languages = self.config.get_str_list('LANGUAGES') or \
|
||||
['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi',
|
||||
'lv', 'tr']
|
||||
|
||||
['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi',
|
||||
'lv', 'tr']
|
||||
|
||||
def generate_phrases(self) -> Iterable[SpecialPhrase]:
|
||||
""" Download the wiki pages for the configured languages
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
|
||||
class SpecialPhrase:
|
||||
"""
|
||||
Model representing a special phrase.
|
||||
@@ -29,9 +30,9 @@ class SpecialPhrase:
|
||||
return False
|
||||
|
||||
return self.p_label == other.p_label \
|
||||
and self.p_class == other.p_class \
|
||||
and self.p_type == other.p_type \
|
||||
and self.p_operator == other.p_operator
|
||||
and self.p_class == other.p_class \
|
||||
and self.p_type == other.p_type \
|
||||
and self.p_operator == other.p_operator
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
|
||||
|
||||
@@ -27,6 +27,7 @@ from . import freeze
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class TigerInput:
|
||||
""" Context manager that goes through Tiger input files which may
|
||||
either be in a directory or gzipped together in a tar file.
|
||||
@@ -38,7 +39,7 @@ class TigerInput:
|
||||
|
||||
if data_dir.endswith('.tar.gz'):
|
||||
try:
|
||||
self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
|
||||
self.tar_handle = tarfile.open(data_dir)
|
||||
except tarfile.ReadError as err:
|
||||
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
|
||||
raise UsageError("Cannot open Tiger data file.") from err
|
||||
@@ -53,11 +54,9 @@ class TigerInput:
|
||||
if not self.files:
|
||||
LOG.warning("Tiger data import selected but no files found at %s", data_dir)
|
||||
|
||||
|
||||
def __enter__(self) -> 'TigerInput':
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
||||
if self.tar_handle:
|
||||
self.tar_handle.close()
|
||||
@@ -77,7 +76,6 @@ class TigerInput:
|
||||
|
||||
return open(cast(str, fname), encoding='utf-8')
|
||||
|
||||
|
||||
def __iter__(self) -> Iterator[Dict[str, Any]]:
|
||||
""" Iterate over the lines in each file.
|
||||
"""
|
||||
@@ -87,7 +85,7 @@ class TigerInput:
|
||||
|
||||
|
||||
async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
|
||||
tokenizer: AbstractTokenizer) -> int:
|
||||
tokenizer: AbstractTokenizer) -> int:
|
||||
""" Import tiger data from directory or tar file `data dir`.
|
||||
"""
|
||||
dsn = config.get_libpq_dsn()
|
||||
|
||||
Reference in New Issue
Block a user