Merge pull request #2393 from lonvia/fix-flake8-issues

Fix flake8 issues
This commit is contained in:
Sarah Hoffmann
2021-07-13 16:46:12 +02:00
committed by GitHub
24 changed files with 173 additions and 133 deletions

View File

@@ -103,7 +103,7 @@ class CommandlineParser:
return 1 return 1
##### Subcommand classes # Subcommand classes
# #
# Each class needs to implement two functions: add_args() adds the CLI parameters # Each class needs to implement two functions: add_args() adds the CLI parameters
# for the subfunction, run() executes the subcommand. # for the subfunction, run() executes the subcommand.

View File

@@ -90,7 +90,7 @@ class APISearch:
if args.query: if args.query:
params = dict(q=args.query) params = dict(q=args.query)
else: else:
params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)} params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
for param, _ in EXTRADATA_PARAMS: for param, _ in EXTRADATA_PARAMS:
if getattr(args, param): if getattr(args, param):

View File

@@ -24,4 +24,4 @@ class NominatimArgs:
main_data=self.config.TABLESPACE_PLACE_DATA, main_data=self.config.TABLESPACE_PLACE_DATA,
main_index=self.config.TABLESPACE_PLACE_INDEX main_index=self.config.TABLESPACE_PLACE_INDEX
) )
) )

View File

@@ -61,7 +61,7 @@ class UpdateRefresh:
args.threads or 1) args.threads or 1)
indexer.index_postcodes() indexer.index_postcodes()
else: else:
LOG.error("The place table doesn\'t exist. " \ LOG.error("The place table doesn't exist. "
"Postcode updates on a frozen database is not possible.") "Postcode updates on a frozen database is not possible.")
if args.word_counts: if args.word_counts:

View File

@@ -93,7 +93,7 @@ class UpdateReplication:
indexed_only=not args.once) indexed_only=not args.once)
# Sanity check to not overwhelm the Geofabrik servers. # Sanity check to not overwhelm the Geofabrik servers.
if 'download.geofabrik.de'in params['base_url']\ if 'download.geofabrik.de' in params['base_url']\
and params['update_interval'] < 86400: and params['update_interval'] < 86400:
LOG.fatal("Update interval too low for download.geofabrik.de.\n" LOG.fatal("Update interval too low for download.geofabrik.de.\n"
"Please check install documentation " "Please check install documentation "

View File

@@ -85,7 +85,7 @@ class DBConnection:
# Use a dict to hand in the parameters because async is a reserved # Use a dict to hand in the parameters because async is a reserved
# word in Python3. # word in Python3.
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True}) self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
self.wait() self.wait()
self.cursor = self.conn.cursor(cursor_factory=cursor_factory) self.cursor = self.conn.cursor(cursor_factory=cursor_factory)

View File

@@ -8,6 +8,7 @@ import os
import psycopg2 import psycopg2
import psycopg2.extensions import psycopg2.extensions
import psycopg2.extras import psycopg2.extras
from psycopg2 import sql as pysql
from nominatim.errors import UsageError from nominatim.errors import UsageError
@@ -25,6 +26,16 @@ class _Cursor(psycopg2.extras.DictCursor):
super().execute(query, args) super().execute(query, args)
def execute_values(self, sql, argslist, template=None):
""" Wrapper for the psycopg2 convenience function to execute
SQL for a list of values.
"""
LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
psycopg2.extras.execute_values(self, sql, argslist, template=template)
def scalar(self, sql, args=None): def scalar(self, sql, args=None):
""" Execute query that returns a single value. The value is returned. """ Execute query that returns a single value. The value is returned.
If the query yields more than one row, a ValueError is raised. If the query yields more than one row, a ValueError is raised.
@@ -37,6 +48,22 @@ class _Cursor(psycopg2.extras.DictCursor):
return self.fetchone()[0] return self.fetchone()[0]
def drop_table(self, name, if_exists=True, cascade=False):
""" Drop the table with the given name.
Set `if_exists` to False if a non-existant table should raise
an exception instead of just being ignored. If 'cascade' is set
to True then all dependent tables are deleted as well.
"""
sql = 'DROP TABLE '
if if_exists:
sql += 'IF EXISTS '
sql += '{}'
if cascade:
sql += ' CASCADE'
self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
class _Connection(psycopg2.extensions.connection): class _Connection(psycopg2.extensions.connection):
""" A connection that provides the specialised cursor by default and """ A connection that provides the specialised cursor by default and
adds convenience functions for administrating the database. adds convenience functions for administrating the database.
@@ -75,14 +102,13 @@ class _Connection(psycopg2.extensions.connection):
return True return True
def drop_table(self, name, if_exists=True): def drop_table(self, name, if_exists=True, cascade=False):
""" Drop the table with the given name. """ Drop the table with the given name.
Set `if_exists` to False if a non-existant table should raise Set `if_exists` to False if a non-existant table should raise
an exception instead of just being ignored. an exception instead of just being ignored.
""" """
with self.cursor() as cur: with self.cursor() as cur:
cur.execute("""DROP TABLE {} "{}" cur.drop_table(name, if_exists, cascade)
""".format('IF EXISTS' if if_exists else '', name))
self.commit() self.commit()

View File

@@ -61,7 +61,7 @@ def _setup_postgresql_features(conn):
""" """
pg_version = conn.server_version_tuple() pg_version = conn.server_version_tuple()
return { return {
'has_index_non_key_column' : pg_version >= (11, 0, 0) 'has_index_non_key_column': pg_version >= (11, 0, 0)
} }
class SQLPreprocessor: class SQLPreprocessor:

View File

@@ -61,9 +61,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
# List of characters that need to be quoted for the copy command. # List of characters that need to be quoted for the copy command.
_SQL_TRANSLATION = {ord(u'\\') : u'\\\\', _SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
ord(u'\t') : u'\\t', ord(u'\t'): u'\\t',
ord(u'\n') : u'\\n'} ord(u'\n'): u'\\n'}
class CopyBuffer: class CopyBuffer:
""" Data collector for the copy_from command. """ Data collector for the copy_from command.

View File

@@ -203,7 +203,7 @@ class Indexer:
# And insert the curent batch # And insert the curent batch
for idx in range(0, len(places), batch): for idx in range(0, len(places), batch):
part = places[idx:idx+batch] part = places[idx:idx + batch]
LOG.debug("Processing places: %s", str(part)) LOG.debug("Processing places: %s", str(part))
runner.index_places(pool.next_free_worker(), part) runner.index_places(pool.next_free_worker(), part)
progress.add(len(part)) progress.add(len(part))

View File

@@ -63,7 +63,7 @@ class ProgressLogger:
places_per_sec = self.done_places places_per_sec = self.done_places
else: else:
diff_seconds = (rank_end_time - self.rank_start_time).total_seconds() diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
places_per_sec = self.done_places/diff_seconds places_per_sec = self.done_places / diff_seconds
LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n", LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
self.done_places, self.total_places, int(diff_seconds), self.done_places, self.total_places, int(diff_seconds),

View File

@@ -5,13 +5,17 @@ tasks.
import functools import functools
import psycopg2.extras import psycopg2.extras
from psycopg2 import sql as pysql
# pylint: disable=C0111 # pylint: disable=C0111
def _mk_valuelist(template, num):
return pysql.SQL(',').join([pysql.SQL(template)] * num)
class AbstractPlacexRunner: class AbstractPlacexRunner:
""" Returns SQL commands for indexing of the placex table. """ Returns SQL commands for indexing of the placex table.
""" """
SELECT_SQL = 'SELECT place_id FROM placex' SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
def __init__(self, rank, analyzer): def __init__(self, rank, analyzer):
self.rank = rank self.rank = rank
@@ -21,11 +25,12 @@ class AbstractPlacexRunner:
@staticmethod @staticmethod
@functools.lru_cache(maxsize=1) @functools.lru_cache(maxsize=1)
def _index_sql(num_places): def _index_sql(num_places):
return """ UPDATE placex return pysql.SQL(
SET indexed_status = 0, address = v.addr, token_info = v.ti """ UPDATE placex
FROM (VALUES {}) as v(id, addr, ti) SET indexed_status = 0, address = v.addr, token_info = v.ti
WHERE place_id = v.id FROM (VALUES {}) as v(id, addr, ti)
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) WHERE place_id = v.id
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
@staticmethod @staticmethod
@@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner):
return "rank {}".format(self.rank) return "rank {}".format(self.rank)
def sql_count_objects(self): def sql_count_objects(self):
return """SELECT count(*) FROM placex return pysql.SQL("""SELECT count(*) FROM placex
WHERE rank_address = {} and indexed_status > 0 WHERE rank_address = {} and indexed_status > 0
""".format(self.rank) """).format(pysql.Literal(self.rank))
def sql_get_objects(self): def sql_get_objects(self):
return """{} WHERE indexed_status > 0 and rank_address = {} return self.SELECT_SQL + pysql.SQL(
ORDER BY geometry_sector """WHERE indexed_status > 0 and rank_address = {}
""".format(self.SELECT_SQL, self.rank) ORDER BY geometry_sector
""").format(pysql.Literal(self.rank))
class BoundaryRunner(AbstractPlacexRunner): class BoundaryRunner(AbstractPlacexRunner):
@@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner):
return "boundaries rank {}".format(self.rank) return "boundaries rank {}".format(self.rank)
def sql_count_objects(self): def sql_count_objects(self):
return """SELECT count(*) FROM placex return pysql.SQL("""SELECT count(*) FROM placex
WHERE indexed_status > 0 WHERE indexed_status > 0
AND rank_search = {} AND rank_search = {}
AND class = 'boundary' and type = 'administrative' AND class = 'boundary' and type = 'administrative'
""".format(self.rank) """).format(pysql.Literal(self.rank))
def sql_get_objects(self): def sql_get_objects(self):
return """{} WHERE indexed_status > 0 and rank_search = {} return self.SELECT_SQL + pysql.SQL(
and class = 'boundary' and type = 'administrative' """WHERE indexed_status > 0 and rank_search = {}
ORDER BY partition, admin_level and class = 'boundary' and type = 'administrative'
""".format(self.SELECT_SQL, self.rank) ORDER BY partition, admin_level
""").format(pysql.Literal(self.rank))
class InterpolationRunner: class InterpolationRunner:
@@ -120,11 +127,11 @@ class InterpolationRunner:
@staticmethod @staticmethod
@functools.lru_cache(maxsize=1) @functools.lru_cache(maxsize=1)
def _index_sql(num_places): def _index_sql(num_places):
return """ UPDATE location_property_osmline return pysql.SQL("""UPDATE location_property_osmline
SET indexed_status = 0, address = v.addr, token_info = v.ti SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti) FROM (VALUES {}) as v(id, addr, ti)
WHERE place_id = v.id WHERE place_id = v.id
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
def index_places(self, worker, places): def index_places(self, worker, places):
@@ -157,6 +164,6 @@ class PostcodeRunner:
@staticmethod @staticmethod
def index_places(worker, ids): def index_places(worker, ids):
worker.perform(""" UPDATE location_postcode SET indexed_status = 0 worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
WHERE place_id IN ({}) WHERE place_id IN ({})""")
""".format(','.join((str(i[0]) for i in ids)))) .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))

View File

@@ -9,8 +9,6 @@ import re
from textwrap import dedent from textwrap import dedent
from pathlib import Path from pathlib import Path
import psycopg2.extras
from nominatim.db.connection import connect from nominatim.db.connection import connect
from nominatim.db.properties import set_property, get_property from nominatim.db.properties import set_property, get_property
from nominatim.db.utils import CopyBuffer from nominatim.db.utils import CopyBuffer
@@ -341,7 +339,7 @@ class LegacyICUNameAnalyzer:
term = self.name_processor.get_search_normalized(word) term = self.name_processor.get_search_normalized(word)
if term: if term:
copystr.add(word, ' ' + term, cls, typ, copystr.add(word, ' ' + term, cls, typ,
oper if oper in ('in', 'near') else None, 0) oper if oper in ('in', 'near') else None, 0)
added += 1 added += 1
copystr.copy_out(cursor, 'word', copystr.copy_out(cursor, 'word',
@@ -359,8 +357,7 @@ class LegacyICUNameAnalyzer:
to_delete = existing_phrases - new_phrases to_delete = existing_phrases - new_phrases
if to_delete: if to_delete:
psycopg2.extras.execute_values( cursor.execute_values(
cursor,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
WHERE word = name and class = in_class and type = in_type WHERE word = name and class = in_class and type = in_type
and ((op = '-' and operator is null) or op = operator)""", and ((op = '-' and operator is null) or op = operator)""",
@@ -386,9 +383,9 @@ class LegacyICUNameAnalyzer:
if word_tokens: if word_tokens:
cur.execute("""INSERT INTO word (word_id, word_token, country_code, cur.execute("""INSERT INTO word (word_id, word_token, country_code,
search_name_count) search_name_count)
(SELECT nextval('seq_word'), token, '{}', 0 (SELECT nextval('seq_word'), token, %s, 0
FROM unnest(%s) as token) FROM unnest(%s) as token)
""".format(country_code), (list(word_tokens),)) """, (country_code, list(word_tokens)))
def process_place(self, place): def process_place(self, place):

View File

@@ -370,8 +370,7 @@ class LegacyNameAnalyzer:
to_delete = existing_phrases - norm_phrases to_delete = existing_phrases - norm_phrases
if to_add: if to_add:
psycopg2.extras.execute_values( cur.execute_values(
cur,
""" INSERT INTO word (word_id, word_token, word, class, type, """ INSERT INTO word (word_id, word_token, word, class, type,
search_name_count, operator) search_name_count, operator)
(SELECT nextval('seq_word'), ' ' || make_standard_name(name), name, (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
@@ -381,8 +380,7 @@ class LegacyNameAnalyzer:
to_add) to_add)
if to_delete and should_replace: if to_delete and should_replace:
psycopg2.extras.execute_values( cur.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
WHERE word = name and class = in_class and type = in_type WHERE word = name and class = in_class and type = in_type
and ((op = '-' and operator is null) or op = operator)""", and ((op = '-' and operator is null) or op = operator)""",
@@ -582,7 +580,7 @@ class _TokenCache:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
FROM generate_series(1, 100) as i""") FROM generate_series(1, 100) as i""")
self._cached_housenumbers = {str(r[0]) : r[1] for r in cur} self._cached_housenumbers = {str(r[0]): r[1] for r in cur}
# For postcodes remember the ones that have already been added # For postcodes remember the ones that have already been added
self.postcodes = set() self.postcodes = set()

View File

@@ -24,6 +24,7 @@ def _check(hint=None):
""" """
def decorator(func): def decorator(func):
title = func.__doc__.split('\n', 1)[0].strip() title = func.__doc__.split('\n', 1)[0].strip()
def run_check(conn, config): def run_check(conn, config):
print(title, end=' ... ') print(title, end=' ... ')
ret = func(conn, config) ret = func(conn, config)
@@ -98,13 +99,12 @@ def _get_indexes(conn):
if conn.table_exists('place'): if conn.table_exists('place'):
indexes.extend(('idx_placex_pendingsector', indexes.extend(('idx_placex_pendingsector',
'idx_location_area_country_place_id', 'idx_location_area_country_place_id',
'idx_place_osm_unique' 'idx_place_osm_unique'))
))
return indexes return indexes
### CHECK FUNCTIONS # CHECK FUNCTIONS
# #
# Functions are exectured in the order they appear here. # Functions are exectured in the order they appear here.

View File

@@ -9,6 +9,7 @@ from pathlib import Path
import psutil import psutil
import psycopg2.extras import psycopg2.extras
from psycopg2 import sql as pysql
from nominatim.db.connection import connect, get_pg_env from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils from nominatim.db import utils as db_utils
@@ -184,7 +185,12 @@ def truncate_data_tables(conn):
conn.commit() conn.commit()
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
('osm_type', 'osm_id', 'class', 'type',
'name', 'admin_level', 'address',
'extratags', 'geometry')))
def load_data(dsn, threads): def load_data(dsn, threads):
""" Copy data into the word and placex table. """ Copy data into the word and placex table.
@@ -195,12 +201,15 @@ def load_data(dsn, threads):
for imod in range(place_threads): for imod in range(place_threads):
conn = DBConnection(dsn) conn = DBConnection(dsn)
conn.connect() conn.connect()
conn.perform("""INSERT INTO placex ({0}) conn.perform(
SELECT {0} FROM place pysql.SQL("""INSERT INTO placex ({columns})
WHERE osm_id % {1} = {2} SELECT {columns} FROM place
AND NOT (class='place' and (type='houses' or type='postcode')) WHERE osm_id % {total} = {mod}
AND ST_IsValid(geometry) AND NOT (class='place' and (type='houses' or type='postcode'))
""".format(_COPY_COLUMNS, place_threads, imod)) AND ST_IsValid(geometry)
""").format(columns=_COPY_COLUMNS,
total=pysql.Literal(place_threads),
mod=pysql.Literal(imod)))
sel.register(conn, selectors.EVENT_READ, conn) sel.register(conn, selectors.EVENT_READ, conn)
# Address interpolations go into another table. # Address interpolations go into another table.
@@ -250,6 +259,7 @@ def create_search_indices(conn, config, drop=False):
sql.run_sql_file(conn, 'indices.sql', drop=drop) sql.run_sql_file(conn, 'indices.sql', drop=drop)
def create_country_names(conn, tokenizer, languages=None): def create_country_names(conn, tokenizer, languages=None):
""" Add default country names to search index. `languages` is a comma- """ Add default country names to search index. `languages` is a comma-
separated list of language codes as used in OSM. If `languages` is not separated list of language codes as used in OSM. If `languages` is not
@@ -261,8 +271,7 @@ def create_country_names(conn, tokenizer, languages=None):
def _include_key(key): def _include_key(key):
return key == 'name' or \ return key == 'name' or \
(key.startswith('name:') \ (key.startswith('name:') and (not languages or key[5:] in languages))
and (not languages or key[5:] in languages))
with conn.cursor() as cur: with conn.cursor() as cur:
psycopg2.extras.register_hstore(cur) psycopg2.extras.register_hstore(cur)
@@ -271,7 +280,7 @@ def create_country_names(conn, tokenizer, languages=None):
with tokenizer.name_analyzer() as analyzer: with tokenizer.name_analyzer() as analyzer:
for code, name in cur: for code, name in cur:
names = {'countrycode' : code} names = {'countrycode': code}
if code == 'gb': if code == 'gb':
names['short_name'] = 'UK' names['short_name'] = 'UK'
if code == 'us': if code == 'us':

View File

@@ -136,11 +136,11 @@ def run_osm2pgsql(options):
def get_url(url): def get_url(url):
""" Get the contents from the given URL and return it as a UTF-8 string. """ Get the contents from the given URL and return it as a UTF-8 string.
""" """
headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
try: try:
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
return response.read().decode('utf-8') return response.read().decode('utf-8')
except: except Exception:
LOG.fatal('Failed to load URL: %s', url) LOG.fatal('Failed to load URL: %s', url)
raise raise

View File

@@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database.
""" """
from pathlib import Path from pathlib import Path
from psycopg2 import sql as pysql
UPDATE_TABLES = [ UPDATE_TABLES = [
'address_levels', 'address_levels',
'gb_postcode', 'gb_postcode',
@@ -21,15 +23,15 @@ def drop_update_tables(conn):
""" Drop all tables only necessary for updating the database from """ Drop all tables only necessary for updating the database from
OSM replication data. OSM replication data.
""" """
parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE " + where) cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
+ pysql.SQL(' or ').join(parts))
tables = [r[0] for r in cur] tables = [r[0] for r in cur]
for table in tables: for table in tables:
cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table)) cur.drop_table(table, cascade=True)
conn.commit() conn.commit()

View File

@@ -142,7 +142,8 @@ def change_housenumber_transliteration(conn, **_):
BEGIN BEGIN
SELECT array_to_string(array_agg(trans), ';') SELECT array_to_string(array_agg(trans), ';')
INTO normtext INTO normtext
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) FROM (SELECT lookup_word as trans,
getorcreate_housenumber_id(lookup_word)
FROM (SELECT make_standard_name(h) as lookup_word FROM (SELECT make_standard_name(h) as lookup_word
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
return normtext; return normtext;

View File

@@ -7,7 +7,7 @@ import gzip
import logging import logging
from math import isfinite from math import isfinite
from psycopg2.extras import execute_values from psycopg2 import sql as pysql
from nominatim.db.connection import connect from nominatim.db.connection import connect
@@ -52,27 +52,26 @@ class _CountryPostcodesCollector:
with conn.cursor() as cur: with conn.cursor() as cur:
if to_add: if to_add:
execute_values(cur, cur.execute_values(
"""INSERT INTO location_postcode """INSERT INTO location_postcode
(place_id, indexed_status, country_code, (place_id, indexed_status, country_code,
postcode, geometry) VALUES %s""", postcode, geometry) VALUES %s""",
to_add, to_add,
template="""(nextval('seq_place'), 1, '{}', template=pysql.SQL("""(nextval('seq_place'), 1, {},
%s, 'SRID=4326;POINT(%s %s)') %s, 'SRID=4326;POINT(%s %s)')
""".format(self.country)) """).format(pysql.Literal(self.country)))
if to_delete: if to_delete:
cur.execute("""DELETE FROM location_postcode cur.execute("""DELETE FROM location_postcode
WHERE country_code = %s and postcode = any(%s) WHERE country_code = %s and postcode = any(%s)
""", (self.country, to_delete)) """, (self.country, to_delete))
if to_update: if to_update:
execute_values(cur, cur.execute_values(
"""UPDATE location_postcode pysql.SQL("""UPDATE location_postcode
SET indexed_status = 2, SET indexed_status = 2,
geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326) geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
FROM (VALUES %s) AS v (pc, x, y) FROM (VALUES %s) AS v (pc, x, y)
WHERE country_code = '{}' and postcode = pc WHERE country_code = {} and postcode = pc
""".format(self.country), """).format(pysql.Literal(self.country)), to_update)
to_update)
def _compute_changes(self, conn): def _compute_changes(self, conn):
@@ -165,11 +164,14 @@ def update_postcodes(dsn, project_dir, tokenizer):
with conn.cursor(name="placex_postcodes") as cur: with conn.cursor(name="placex_postcodes") as cur:
cur.execute(""" cur.execute("""
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
FROM (SELECT FROM (SELECT
COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, COALESCE(plx.country_code,
get_country_code(ST_Centroid(pl.geometry))) as cc,
token_normalized_postcode(pl.address->'postcode') as pc, token_normalized_postcode(pl.address->'postcode') as pc,
ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid ST_Centroid(ST_Collect(COALESCE(plx.centroid,
FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type ST_Centroid(pl.geometry)))) as centroid
FROM place AS pl LEFT OUTER JOIN placex AS plx
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
GROUP BY cc, pc) xx GROUP BY cc, pc) xx
WHERE pc IS NOT null AND cc IS NOT null WHERE pc IS NOT null AND cc IS NOT null

View File

@@ -5,7 +5,7 @@ import json
import logging import logging
from textwrap import dedent from textwrap import dedent
from psycopg2.extras import execute_values from psycopg2 import sql as pysql
from nominatim.db.utils import execute_file from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.db.sql_preprocessor import SQLPreprocessor
@@ -49,7 +49,7 @@ def load_address_levels(conn, table, levels):
_add_address_level_rows_from_entry(rows, entry) _add_address_level_rows_from_entry(rows, entry)
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute('DROP TABLE IF EXISTS {}'.format(table)) cur.drop_table(table)
cur.execute("""CREATE TABLE {} (country_code varchar(2), cur.execute("""CREATE TABLE {} (country_code varchar(2),
class TEXT, class TEXT,
@@ -57,7 +57,8 @@ def load_address_levels(conn, table, levels):
rank_search SMALLINT, rank_search SMALLINT,
rank_address SMALLINT)""".format(table)) rank_address SMALLINT)""".format(table))
execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows) cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
.format(pysql.Identifier(table)), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))

View File

@@ -44,8 +44,8 @@ class SPImporter():
# This set will contain all existing phrases to be added. # This set will contain all existing phrases to be added.
# It contains tuples with the following format: (lable, class, type, operator) # It contains tuples with the following format: (lable, class, type, operator)
self.word_phrases = set() self.word_phrases = set()
#This set will contain all existing place_classtype tables which doesn't match any # This set will contain all existing place_classtype tables which doesn't match any
#special phrases class/type on the wiki. # special phrases class/type on the wiki.
self.table_phrases_to_delete = set() self.table_phrases_to_delete = set()
def import_phrases(self, tokenizer, should_replace): def import_phrases(self, tokenizer, should_replace):
@@ -60,7 +60,7 @@ class SPImporter():
LOG.warning('Special phrases importation starting') LOG.warning('Special phrases importation starting')
self._fetch_existing_place_classtype_tables() self._fetch_existing_place_classtype_tables()
#Store pairs of class/type for further processing # Store pairs of class/type for further processing
class_type_pairs = set() class_type_pairs = set()
for loaded_phrases in self.sp_loader: for loaded_phrases in self.sp_loader:
@@ -131,17 +131,17 @@ class SPImporter():
Return the class/type pair corresponding to the phrase. Return the class/type pair corresponding to the phrase.
""" """
#blacklisting: disallow certain class/type combinations # blacklisting: disallow certain class/type combinations
if phrase.p_class in self.black_list.keys() \ if phrase.p_class in self.black_list.keys() \
and phrase.p_type in self.black_list[phrase.p_class]: and phrase.p_type in self.black_list[phrase.p_class]:
return None return None
#whitelisting: if class is in whitelist, allow only tags in the list # whitelisting: if class is in whitelist, allow only tags in the list
if phrase.p_class in self.white_list.keys() \ if phrase.p_class in self.white_list.keys() \
and phrase.p_type not in self.white_list[phrase.p_class]: and phrase.p_type not in self.white_list[phrase.p_class]:
return None return None
#sanity check, in case somebody added garbage in the wiki # sanity check, in case somebody added garbage in the wiki
if not self._check_sanity(phrase): if not self._check_sanity(phrase):
self.statistics_handler.notify_one_phrase_invalid() self.statistics_handler.notify_one_phrase_invalid()
return None return None
@@ -161,7 +161,7 @@ class SPImporter():
sql_tablespace = self.config.TABLESPACE_AUX_DATA sql_tablespace = self.config.TABLESPACE_AUX_DATA
if sql_tablespace: if sql_tablespace:
sql_tablespace = ' TABLESPACE '+sql_tablespace sql_tablespace = ' TABLESPACE ' + sql_tablespace
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
@@ -174,19 +174,19 @@ class SPImporter():
if table_name in self.table_phrases_to_delete: if table_name in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_ignored() self.statistics_handler.notify_one_table_ignored()
#Remove this table from the ones to delete as it match a class/type # Remove this table from the ones to delete as it match a
#still existing on the special phrases of the wiki. # class/type still existing on the special phrases of the wiki.
self.table_phrases_to_delete.remove(table_name) self.table_phrases_to_delete.remove(table_name)
#So dont need to create the table and indexes. # So don't need to create the table and indexes.
continue continue
#Table creation # Table creation
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type) self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
#Indexes creation # Indexes creation
self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type) self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
#Grant access on read to the web user. # Grant access on read to the web user.
self._grant_access_to_webuser(phrase_class, phrase_type) self._grant_access_to_webuser(phrase_class, phrase_type)
self.statistics_handler.notify_one_table_created() self.statistics_handler.notify_one_table_created()
@@ -202,8 +202,8 @@ class SPImporter():
table_name = _classtype_table(phrase_class, phrase_type) table_name = _classtype_table(phrase_class, phrase_type)
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(""" db_cursor.execute(SQL("""
CREATE TABLE IF NOT EXISTS {{}} {} CREATE TABLE IF NOT EXISTS {{}} {}
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace)) WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
.format(Identifier(table_name), Literal(phrase_class), .format(Identifier(table_name), Literal(phrase_class),
Literal(phrase_type))) Literal(phrase_type)))
@@ -215,7 +215,7 @@ class SPImporter():
""" """
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
base_table = _classtype_table(phrase_class, phrase_type) base_table = _classtype_table(phrase_class, phrase_type)
#Index on centroid # Index on centroid
if not self.db_connection.index_exists(index_prefix + 'centroid'): if not self.db_connection.index_exists(index_prefix + 'centroid'):
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(""" db_cursor.execute(SQL("""
@@ -223,7 +223,7 @@ class SPImporter():
.format(Identifier(index_prefix + 'centroid'), .format(Identifier(index_prefix + 'centroid'),
Identifier(base_table)), sql_tablespace) Identifier(base_table)), sql_tablespace)
#Index on place_id # Index on place_id
if not self.db_connection.index_exists(index_prefix + 'place_id'): if not self.db_connection.index_exists(index_prefix + 'place_id'):
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL( db_cursor.execute(SQL(
@@ -248,18 +248,14 @@ class SPImporter():
Delete the place_classtype tables. Delete the place_classtype tables.
""" """
LOG.warning('Cleaning database...') LOG.warning('Cleaning database...')
#Array containing all queries to execute. Contain tuples of format (query, parameters)
queries_parameters = []
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
for table in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_deleted()
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
queries_parameters.append((query, ()))
# Delete place_classtype tables corresponding to class/type which
# are not on the wiki anymore.
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
for query, parameters in queries_parameters: for table in self.table_phrases_to_delete:
db_cursor.execute(query, parameters) self.statistics_handler.notify_one_table_deleted()
db_cursor.drop_table(table)
def _convert_php_settings_if_needed(self, file_path): def _convert_php_settings_if_needed(self, file_path):
""" """
@@ -271,7 +267,7 @@ class SPImporter():
file, extension = os.path.splitext(file_path) file, extension = os.path.splitext(file_path)
json_file_path = Path(file + '.json').resolve() json_file_path = Path(file + '.json').resolve()
if extension not in('.php', '.json'): if extension not in ('.php', '.json'):
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.') raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
if extension == '.php' and not isfile(json_file_path): if extension == '.php' and not isfile(json_file_path):

View File

@@ -15,7 +15,7 @@ class SPWikiLoader(Iterator):
def __init__(self, config, languages=None): def __init__(self, config, languages=None):
super().__init__() super().__init__()
self.config = config self.config = config
#Compile the regex here to increase performances. # Compile the regex here to increase performances.
self.occurence_pattern = re.compile( self.occurence_pattern = re.compile(
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
) )
@@ -35,7 +35,7 @@ class SPWikiLoader(Iterator):
Parses XML content and extracts special phrases from it. Parses XML content and extracts special phrases from it.
Return a list of SpecialPhrase. Return a list of SpecialPhrase.
""" """
#One match will be of format [label, class, type, operator, plural] # One match will be of format [label, class, type, operator, plural]
matches = self.occurence_pattern.findall(xml) matches = self.occurence_pattern.findall(xml)
returned_phrases = set() returned_phrases = set()
for match in matches: for match in matches:
@@ -65,5 +65,6 @@ class SPWikiLoader(Iterator):
Requested URL Example : Requested URL Example :
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
""" """
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+ lang.upper()
return get_url(url) return get_url(url)

View File

@@ -13,7 +13,7 @@ class SpecialPhrase():
def __init__(self, p_label, p_class, p_type, p_operator): def __init__(self, p_label, p_class, p_type, p_operator):
self.p_label = p_label.strip() self.p_label = p_label.strip()
self.p_class = p_class.strip() self.p_class = p_class.strip()
#Hack around a bug where building=yes was imported with quotes into the wiki # Hack around a bug where building=yes was imported with quotes into the wiki
self.p_type = re.sub(r'\"|&quot;', '', p_type.strip()) self.p_type = re.sub(r'\"|&quot;', '', p_type.strip())
#Needed if some operator in the wiki are not written in english # Needed if some operator in the wiki are not written in english
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator