diff --git a/.pylintrc b/.pylintrc index d5c4514f..fef53872 100644 --- a/.pylintrc +++ b/.pylintrc @@ -11,6 +11,6 @@ ignored-modules=icu,datrie # 'with' statements. ignored-classes=NominatimArgs,closing # 'too-many-ancestors' is triggered already by deriving from UserDict -disable=too-few-public-methods,duplicate-code,too-many-ancestors +disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use good-names=i,x,y,fd,db diff --git a/nominatim/cli.py b/nominatim/cli.py index 01eb6119..f911023b 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -60,9 +60,9 @@ class CommandlineParser: def nominatim_version_text(): """ Program name and version number as string """ - text = 'Nominatim version %s.%s.%s.%s' % version.NOMINATIM_VERSION + text = f'Nominatim version {version.version_str()}' if version.GIT_COMMIT_HASH is not None: - text += ' (%s)' % version.GIT_COMMIT_HASH + text += f' ({version.GIT_COMMIT_HASH})' return text def add_subcommand(self, name, cmd): diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index 849a0e49..9d946304 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -21,7 +21,7 @@ LOG = logging.getLogger() # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 # Using non-top-level imports to make pyosmium optional for replication only. -# pylint: disable=E0012,C0415 +# pylint: disable=C0415 class UpdateReplication: """\ @@ -96,8 +96,7 @@ class UpdateReplication: end = dt.datetime.now(dt.timezone.utc) LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.", round_time((start_index or end) - start_import), - "Indexing: {} ".format(round_time(end - start_index)) - if start_index else '', + f"Indexing: {round_time(end - start_index)} " if start_index else '', round_time(end - start_import), round_time(end - batchdate)) diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index e822cbe5..b643c5ba 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -14,12 +14,12 @@ import psutil from nominatim.db.connection import connect from nominatim.db import status, properties -from nominatim.version import NOMINATIM_VERSION +from nominatim.version import version_str # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 # Using non-top-level imports to avoid eventually unused imports. -# pylint: disable=E0012,C0415 +# pylint: disable=C0415 LOG = logging.getLogger() @@ -194,10 +194,10 @@ class SetupAll: LOG.warning('Creating support index') if tablespace: tablespace = 'TABLESPACE ' + tablespace - cur.execute("""CREATE INDEX idx_placex_pendingsector - ON placex USING BTREE (rank_address,geometry_sector) - {} WHERE indexed_status > 0 - """.format(tablespace)) + cur.execute(f"""CREATE INDEX idx_placex_pendingsector + ON placex USING BTREE (rank_address,geometry_sector) + {tablespace} WHERE indexed_status > 0 + """) conn.commit() @@ -213,5 +213,4 @@ class SetupAll: except Exception as exc: # pylint: disable=broad-except LOG.error('Cannot determine date of database: %s', exc) - properties.set_property(conn, 'database_version', - '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) + properties.set_property(conn, 'database_version', version_str()) diff --git a/nominatim/config.py b/nominatim/config.py index a3f91055..ef261079 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -144,7 +144,7 @@ class Configuration: style = self.__getattr__('IMPORT_STYLE') if style in ('admin', 'street', 'address', 'full', 'extratags'): - return self.config_dir / 'import-{}.style'.format(style) + return self.config_dir / f'import-{style}.style' return self.find_config_file('', 'IMPORT_STYLE') diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index 45bc173d..c60bcfdd 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -25,7 +25,8 @@ class _Cursor(psycopg2.extras.DictCursor): execution functions. """ - def execute(self, query, args=None): # pylint: disable=W0221 + # pylint: disable=arguments-renamed,arguments-differ + def execute(self, query, args=None): """ Query execution that logs the SQL query when debugging is enabled. """ LOG.debug(self.mogrify(query, args).decode('utf-8')) @@ -163,7 +164,7 @@ def connect(dsn): ctxmgr.connection = conn return ctxmgr except psycopg2.OperationalError as err: - raise UsageError("Cannot connect to database: {}".format(err)) from err + raise UsageError(f"Cannot connect to database: {err}") from err # Translation from PG connection string parameters to PG environment variables. diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index 10d93666..4de53886 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -39,10 +39,10 @@ def _setup_tablespace_sql(config): out = {} for subset in ('ADDRESS', 'SEARCH', 'AUX'): for kind in ('DATA', 'INDEX'): - tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind)) + tspace = getattr(config, f'TABLESPACE_{subset}_{kind}') if tspace: - tspace = 'TABLESPACE "{}"'.format(tspace) - out['{}_{}'.format(subset.lower(), kind.lower())] = tspace + tspace = f'TABLESPACE "{tspace}"' + out[f'{subset.lower()}_{kind.lower()}'] = tspace return out diff --git a/nominatim/db/status.py b/nominatim/db/status.py index 12b24a83..d31196b3 100644 --- a/nominatim/db/status.py +++ b/nominatim/db/status.py @@ -34,7 +34,7 @@ def compute_database_date(conn): LOG.info("Using node id %d for timestamp lookup", osmid) # Get the node from the API to find the timestamp when it was created. - node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid) + node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1' data = get_url(node_url) match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data) diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index 87b0545f..b859afa8 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -40,36 +40,36 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None) cmd.extend(('-v', 'ON_ERROR_STOP=1')) if not LOG.isEnabledFor(logging.INFO): cmd.append('--quiet') - proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) - try: - if not LOG.isEnabledFor(logging.INFO): - proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8')) + with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc: + try: + if not LOG.isEnabledFor(logging.INFO): + proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8')) - if pre_code: - proc.stdin.write((pre_code + ';').encode('utf-8')) + if pre_code: + proc.stdin.write((pre_code + ';').encode('utf-8')) - if fname.suffix == '.gz': - with gzip.open(str(fname), 'rb') as fdesc: - remain = _pipe_to_proc(proc, fdesc) - else: - with fname.open('rb') as fdesc: - remain = _pipe_to_proc(proc, fdesc) + if fname.suffix == '.gz': + with gzip.open(str(fname), 'rb') as fdesc: + remain = _pipe_to_proc(proc, fdesc) + else: + with fname.open('rb') as fdesc: + remain = _pipe_to_proc(proc, fdesc) - if remain == 0 and post_code: - proc.stdin.write((';' + post_code).encode('utf-8')) - finally: - proc.stdin.close() - ret = proc.wait() + if remain == 0 and post_code: + proc.stdin.write((';' + post_code).encode('utf-8')) + finally: + proc.stdin.close() + ret = proc.wait() if ret != 0 or remain > 0: raise UsageError("Failed to execute SQL file.") # List of characters that need to be quoted for the copy command. -_SQL_TRANSLATION = {ord(u'\\'): u'\\\\', - ord(u'\t'): u'\\t', - ord(u'\n'): u'\\n'} +_SQL_TRANSLATION = {ord('\\'): '\\\\', + ord('\t'): '\\t', + ord('\n'): '\\n'} class CopyBuffer: diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py index ac7a0015..9a30ffe6 100644 --- a/nominatim/indexer/runners.py +++ b/nominatim/indexer/runners.py @@ -66,7 +66,7 @@ class RankRunner(AbstractPlacexRunner): """ def name(self): - return "rank {}".format(self.rank) + return f"rank {self.rank}" def sql_count_objects(self): return pysql.SQL("""SELECT count(*) FROM placex @@ -86,7 +86,7 @@ class BoundaryRunner(AbstractPlacexRunner): """ def name(self): - return "boundaries rank {}".format(self.rank) + return f"boundaries rank {self.rank}" def sql_count_objects(self): return pysql.SQL("""SELECT count(*) FROM placex diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 9c7138ce..bf5544ed 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -278,8 +278,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer): + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()] - @staticmethod - def normalize_postcode(postcode): + def normalize_postcode(self, postcode): """ Convert the postcode to a standardized form. This function must yield exactly the same result as the SQL function diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 97ce6d16..7b78b22a 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -74,10 +74,10 @@ def _check_module(module_dir, conn): with conn.cursor() as cur: try: cur.execute("""CREATE FUNCTION nominatim_test_import_func(text) - RETURNS text AS '{}/nominatim.so', 'transliteration' + RETURNS text AS %s, 'transliteration' LANGUAGE c IMMUTABLE STRICT; DROP FUNCTION nominatim_test_import_func(text) - """.format(module_dir)) + """, (f'{module_dir}/nominatim.so', )) except psycopg2.DatabaseError as err: LOG.fatal("Error accessing database module: %s", err) raise UsageError("Database module cannot be accessed.") from err @@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer): php_file = self.data_dir / "tokenizer.php" if not php_file.exists() or overwrite: - php_file.write_text(dedent("""\ + php_file.write_text(dedent(f"""\ ', rule) if len(parts) != 4: - raise UsageError("Syntax error in variant rule: " + rule) + raise UsageError(f"Syntax error in variant rule: {rule}") decompose = parts[1] is None src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')] @@ -89,7 +89,7 @@ class _VariantMaker: name = name.strip() match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name) if match is None or (match.group(1) == '~' and match.group(3) == '~'): - raise UsageError("Invalid variant word descriptor '{}'".format(name)) + raise UsageError(f"Invalid variant word descriptor '{name}'") norm_name = self.norm.transliterate(match.group(2)).strip() if not norm_name: return None diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index caec9035..50938c19 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -234,7 +234,7 @@ def create_search_indices(conn, config, drop=False): bad_indices = [row[0] for row in list(cur)] for idx in bad_indices: LOG.info("Drop invalid index %s.", idx) - cur.execute('DROP INDEX "{}"'.format(idx)) + cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx))) conn.commit() sql = SQLPreprocessor(conn, config) diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index b06b8533..a81a8d6b 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -12,7 +12,7 @@ import subprocess import urllib.request as urlrequest from urllib.parse import urlencode -from nominatim.version import NOMINATIM_VERSION +from nominatim.version import version_str from nominatim.db.connection import get_pg_env LOG = logging.getLogger() @@ -55,10 +55,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, query_string = urlencode(params or {}) env = dict(QUERY_STRING=query_string, - SCRIPT_NAME='/{}.php'.format(endpoint), - REQUEST_URI='/{}.php?{}'.format(endpoint, query_string), + SCRIPT_NAME=f'/{endpoint}.php', + REQUEST_URI=f'/{endpoint}.php?{query_string}', CONTEXT_DOCUMENT_ROOT=webdir, - SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint), + SCRIPT_FILENAME=f'{webdir}/{endpoint}.php', HTTP_HOST='localhost', HTTP_USER_AGENT='nominatim-tool', REMOTE_ADDR='0.0.0.0', @@ -150,7 +150,7 @@ def run_osm2pgsql(options): def get_url(url): """ Get the contents from the given URL and return it as a UTF-8 string. """ - headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} + headers = {"User-Agent": f"Nominatim/{version_str()}"} try: with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 76726e8c..28a14455 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -9,9 +9,11 @@ Functions for database migration to newer software versions. """ import logging +from psycopg2 import sql as pysql + from nominatim.db import properties from nominatim.db.connection import connect -from nominatim.version import NOMINATIM_VERSION +from nominatim.version import NOMINATIM_VERSION, version_str from nominatim.tools import refresh from nominatim.tokenizer import factory as tokenizer_factory from nominatim.errors import UsageError @@ -47,7 +49,7 @@ def migrate(config, paths): for version, func in _MIGRATION_FUNCTIONS: if db_version <= version: LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0], - '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version)) + version_str(version)) kwargs = dict(conn=conn, config=config, paths=paths) func(**kwargs) conn.commit() @@ -59,8 +61,7 @@ def migrate(config, paths): tokenizer = tokenizer_factory.get_tokenizer_for_db(config) tokenizer.update_sql_functions(config) - properties.set_property(conn, 'database_version', - '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) + properties.set_property(conn, 'database_version', version_str()) conn.commit() @@ -125,11 +126,11 @@ def add_nominatim_property_table(conn, config, **_): """ if not conn.table_exists('nominatim_properties'): with conn.cursor() as cur: - cur.execute("""CREATE TABLE nominatim_properties ( - property TEXT, - value TEXT); - GRANT SELECT ON TABLE nominatim_properties TO "{}"; - """.format(config.DATABASE_WEBUSER)) + cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties ( + property TEXT, + value TEXT); + GRANT SELECT ON TABLE nominatim_properties TO {}; + """).format(pysql.Identifier(config.DATABASE_WEBUSER))) @_migration(3, 6, 0, 0) def change_housenumber_transliteration(conn, **_): @@ -194,7 +195,8 @@ def install_legacy_tokenizer(conn, config, **_): and column_name = 'token_info'""", (table, )) if has_column == 0: - cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table)) + cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB') + .format(pysql.Identifier(table))) tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, module_name='legacy') diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index adc58ec5..2b7027e7 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -36,7 +36,7 @@ class _CountryPostcodesCollector: def __init__(self, country): self.country = country - self.collected = dict() + self.collected = {} def add(self, postcode, x, y): @@ -136,13 +136,13 @@ class _CountryPostcodesCollector: def _open_external(self, project_dir): - fname = project_dir / '{}_postcodes.csv'.format(self.country) + fname = project_dir / f'{self.country}_postcodes.csv' if fname.is_file(): LOG.info("Using external postcode file '%s'.", fname) - return open(fname, 'r') + return open(fname, 'r', encoding='utf-8') - fname = project_dir / '{}_postcodes.csv.gz'.format(self.country) + fname = project_dir / f'{self.country}_postcodes.csv.gz' if fname.is_file(): LOG.info("Using external postcode file '%s'.", fname) diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index aacc622b..561bcf83 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -15,7 +15,7 @@ from psycopg2 import sql as pysql from nominatim.db.utils import execute_file from nominatim.db.sql_preprocessor import SQLPreprocessor -from nominatim.version import NOMINATIM_VERSION +from nominatim.version import version_str LOG = logging.getLogger() @@ -52,16 +52,19 @@ def load_address_levels(conn, table, levels): with conn.cursor() as cur: cur.drop_table(table) - cur.execute("""CREATE TABLE {} (country_code varchar(2), + cur.execute(pysql.SQL("""CREATE TABLE {} ( + country_code varchar(2), class TEXT, type TEXT, rank_search SMALLINT, - rank_address SMALLINT)""".format(table)) + rank_address SMALLINT) + """).format(pysql.Identifier(table))) cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") .format(pysql.Identifier(table)), rows) - cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) + cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)') + .format(pysql.Identifier(table))) conn.commit() @@ -186,16 +189,15 @@ def setup_website(basedir, config, conn): LOG.info('Creating website directory.') basedir.mkdir() - template = dedent("""\ + template = dedent(f"""\ chunks. - place_threads = max(1, threads - 1) + # Reading files and then for each file line handling + # sql_query in chunks. + place_threads = max(1, threads - 1) - with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool: - with tokenizer.name_analyzer() as analyzer: - for fname in files: - if not tar: - fd = open(fname) - else: - fd = io.TextIOWrapper(tar.extractfile(fname)) + with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool: + with tokenizer.name_analyzer() as analyzer: + while tar: + with tar.next_file() as fd: + handle_threaded_sql_statements(pool, fd, analyzer) - handle_threaded_sql_statements(pool, fd, analyzer) + print('\n') - fd.close() - - if tar: - tar.close() - print('\n') LOG.warning("Creating indexes on Tiger data") with connect(dsn) as conn: sql = SQLPreprocessor(conn, config) diff --git a/nominatim/version.py b/nominatim/version.py index 47fe3b30..88d42af9 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -34,3 +34,11 @@ POSTGIS_REQUIRED_VERSION = (2, 2) # cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside # there is a call to set the variable value below. GIT_COMMIT_HASH = None + + +# pylint: disable=consider-using-f-string +def version_str(version=NOMINATIM_VERSION): + """ + Return a human-readable string of the version. + """ + return '{}.{}.{}-{}'.format(*version)