use psycopg's SQL quoting where possible

Use the SQL formatting supplied with psycopg whenever the
query needs to be put together from snippets.
This commit is contained in:
Sarah Hoffmann
2021-07-12 22:05:22 +02:00
parent 6f6681ce67
commit 14f777da18
5 changed files with 58 additions and 42 deletions

View File

@@ -5,13 +5,17 @@ tasks.
import functools import functools
import psycopg2.extras import psycopg2.extras
from psycopg2 import sql as pysql
# pylint: disable=C0111 # pylint: disable=C0111
def _mk_valuelist(template, num):
return pysql.SQL(',').join([pysql.SQL(template)] * num)
class AbstractPlacexRunner: class AbstractPlacexRunner:
""" Returns SQL commands for indexing of the placex table. """ Returns SQL commands for indexing of the placex table.
""" """
SELECT_SQL = 'SELECT place_id FROM placex' SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
def __init__(self, rank, analyzer): def __init__(self, rank, analyzer):
self.rank = rank self.rank = rank
@@ -21,11 +25,12 @@ class AbstractPlacexRunner:
@staticmethod @staticmethod
@functools.lru_cache(maxsize=1) @functools.lru_cache(maxsize=1)
def _index_sql(num_places): def _index_sql(num_places):
return """ UPDATE placex return pysql.SQL(
SET indexed_status = 0, address = v.addr, token_info = v.ti """ UPDATE placex
FROM (VALUES {}) as v(id, addr, ti) SET indexed_status = 0, address = v.addr, token_info = v.ti
WHERE place_id = v.id FROM (VALUES {}) as v(id, addr, ti)
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) WHERE place_id = v.id
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
@staticmethod @staticmethod
@@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner):
return "rank {}".format(self.rank) return "rank {}".format(self.rank)
def sql_count_objects(self): def sql_count_objects(self):
return """SELECT count(*) FROM placex return pysql.SQL("""SELECT count(*) FROM placex
WHERE rank_address = {} and indexed_status > 0 WHERE rank_address = {} and indexed_status > 0
""".format(self.rank) """).format(pysql.Literal(self.rank))
def sql_get_objects(self): def sql_get_objects(self):
return """{} WHERE indexed_status > 0 and rank_address = {} return self.SELECT_SQL + pysql.SQL(
ORDER BY geometry_sector """WHERE indexed_status > 0 and rank_address = {}
""".format(self.SELECT_SQL, self.rank) ORDER BY geometry_sector
""").format(pysql.Literal(self.rank))
class BoundaryRunner(AbstractPlacexRunner): class BoundaryRunner(AbstractPlacexRunner):
@@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner):
return "boundaries rank {}".format(self.rank) return "boundaries rank {}".format(self.rank)
def sql_count_objects(self): def sql_count_objects(self):
return """SELECT count(*) FROM placex return pysql.SQL("""SELECT count(*) FROM placex
WHERE indexed_status > 0 WHERE indexed_status > 0
AND rank_search = {} AND rank_search = {}
AND class = 'boundary' and type = 'administrative' AND class = 'boundary' and type = 'administrative'
""".format(self.rank) """).format(pysql.Literal(self.rank))
def sql_get_objects(self): def sql_get_objects(self):
return """{} WHERE indexed_status > 0 and rank_search = {} return self.SELECT_SQL + pysql.SQL(
and class = 'boundary' and type = 'administrative' """WHERE indexed_status > 0 and rank_search = {}
ORDER BY partition, admin_level and class = 'boundary' and type = 'administrative'
""".format(self.SELECT_SQL, self.rank) ORDER BY partition, admin_level
""").format(pysql.Literal(self.rank))
class InterpolationRunner: class InterpolationRunner:
@@ -120,11 +127,11 @@ class InterpolationRunner:
@staticmethod @staticmethod
@functools.lru_cache(maxsize=1) @functools.lru_cache(maxsize=1)
def _index_sql(num_places): def _index_sql(num_places):
return """ UPDATE location_property_osmline return pysql.SQL("""UPDATE location_property_osmline
SET indexed_status = 0, address = v.addr, token_info = v.ti SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti) FROM (VALUES {}) as v(id, addr, ti)
WHERE place_id = v.id WHERE place_id = v.id
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
def index_places(self, worker, places): def index_places(self, worker, places):
@@ -157,6 +164,6 @@ class PostcodeRunner:
@staticmethod @staticmethod
def index_places(worker, ids): def index_places(worker, ids):
worker.perform(""" UPDATE location_postcode SET indexed_status = 0 worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
WHERE place_id IN ({}) WHERE place_id IN ({})""")
""".format(','.join((str(i[0]) for i in ids)))) .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))

View File

@@ -383,9 +383,9 @@ class LegacyICUNameAnalyzer:
if word_tokens: if word_tokens:
cur.execute("""INSERT INTO word (word_id, word_token, country_code, cur.execute("""INSERT INTO word (word_id, word_token, country_code,
search_name_count) search_name_count)
(SELECT nextval('seq_word'), token, '{}', 0 (SELECT nextval('seq_word'), token, %s, 0
FROM unnest(%s) as token) FROM unnest(%s) as token)
""".format(country_code), (list(word_tokens),)) """, (country_code, list(word_tokens)))
def process_place(self, place): def process_place(self, place):

View File

@@ -9,6 +9,7 @@ from pathlib import Path
import psutil import psutil
import psycopg2.extras import psycopg2.extras
from psycopg2 import sql as pysql
from nominatim.db.connection import connect, get_pg_env from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils from nominatim.db import utils as db_utils
@@ -185,7 +186,10 @@ def truncate_data_tables(conn):
conn.commit() conn.commit()
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' _COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
('osm_type', 'osm_id', 'class', 'type',
'name', 'admin_level', 'address',
'extratags', 'geometry')))
def load_data(dsn, threads): def load_data(dsn, threads):
@@ -197,12 +201,15 @@ def load_data(dsn, threads):
for imod in range(place_threads): for imod in range(place_threads):
conn = DBConnection(dsn) conn = DBConnection(dsn)
conn.connect() conn.connect()
conn.perform("""INSERT INTO placex ({0}) conn.perform(
SELECT {0} FROM place pysql.SQL("""INSERT INTO placex ({columns})
WHERE osm_id % {1} = {2} SELECT {columns} FROM place
AND NOT (class='place' and (type='houses' or type='postcode')) WHERE osm_id % {total} = {mod}
AND ST_IsValid(geometry) AND NOT (class='place' and (type='houses' or type='postcode'))
""".format(_COPY_COLUMNS, place_threads, imod)) AND ST_IsValid(geometry)
""").format(columns=_COPY_COLUMNS,
total=pysql.Literal(place_threads),
mod=pysql.Literal(imod)))
sel.register(conn, selectors.EVENT_READ, conn) sel.register(conn, selectors.EVENT_READ, conn)
# Address interpolations go into another table. # Address interpolations go into another table.

View File

@@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database.
""" """
from pathlib import Path from pathlib import Path
from psycopg2 import sql as pysql
UPDATE_TABLES = [ UPDATE_TABLES = [
'address_levels', 'address_levels',
'gb_postcode', 'gb_postcode',
@@ -21,11 +23,11 @@ def drop_update_tables(conn):
""" Drop all tables only necessary for updating the database from """ Drop all tables only necessary for updating the database from
OSM replication data. OSM replication data.
""" """
parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE " + where) cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
+ pysql.SQL(' or ').join(parts))
tables = [r[0] for r in cur] tables = [r[0] for r in cur]
for table in tables: for table in tables:

View File

@@ -58,7 +58,7 @@ def load_address_levels(conn, table, levels):
rank_address SMALLINT)""".format(table)) rank_address SMALLINT)""".format(table))
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
.format(pysql.Identifier(table)), rows) .format(pysql.Identifier(table)), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))