use psycopg's SQL quoting where possible

Use the SQL formatting supplied with psycopg whenever the
query needs to be put together from snippets.
This commit is contained in:
Sarah Hoffmann
2021-07-12 22:05:22 +02:00
parent 6f6681ce67
commit 14f777da18
5 changed files with 58 additions and 42 deletions

View File

@@ -5,13 +5,17 @@ tasks.
import functools
import psycopg2.extras
from psycopg2 import sql as pysql
# pylint: disable=C0111
def _mk_valuelist(template, num):
return pysql.SQL(',').join([pysql.SQL(template)] * num)
class AbstractPlacexRunner:
""" Returns SQL commands for indexing of the placex table.
"""
SELECT_SQL = 'SELECT place_id FROM placex'
SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
def __init__(self, rank, analyzer):
self.rank = rank
@@ -21,11 +25,12 @@ class AbstractPlacexRunner:
@staticmethod
@functools.lru_cache(maxsize=1)
def _index_sql(num_places):
return """ UPDATE placex
SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti)
WHERE place_id = v.id
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
return pysql.SQL(
""" UPDATE placex
SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti)
WHERE place_id = v.id
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
@staticmethod
@@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner):
return "rank {}".format(self.rank)
def sql_count_objects(self):
return """SELECT count(*) FROM placex
WHERE rank_address = {} and indexed_status > 0
""".format(self.rank)
return pysql.SQL("""SELECT count(*) FROM placex
WHERE rank_address = {} and indexed_status > 0
""").format(pysql.Literal(self.rank))
def sql_get_objects(self):
return """{} WHERE indexed_status > 0 and rank_address = {}
ORDER BY geometry_sector
""".format(self.SELECT_SQL, self.rank)
return self.SELECT_SQL + pysql.SQL(
"""WHERE indexed_status > 0 and rank_address = {}
ORDER BY geometry_sector
""").format(pysql.Literal(self.rank))
class BoundaryRunner(AbstractPlacexRunner):
@@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner):
return "boundaries rank {}".format(self.rank)
def sql_count_objects(self):
return """SELECT count(*) FROM placex
WHERE indexed_status > 0
AND rank_search = {}
AND class = 'boundary' and type = 'administrative'
""".format(self.rank)
return pysql.SQL("""SELECT count(*) FROM placex
WHERE indexed_status > 0
AND rank_search = {}
AND class = 'boundary' and type = 'administrative'
""").format(pysql.Literal(self.rank))
def sql_get_objects(self):
return """{} WHERE indexed_status > 0 and rank_search = {}
and class = 'boundary' and type = 'administrative'
ORDER BY partition, admin_level
""".format(self.SELECT_SQL, self.rank)
return self.SELECT_SQL + pysql.SQL(
"""WHERE indexed_status > 0 and rank_search = {}
and class = 'boundary' and type = 'administrative'
ORDER BY partition, admin_level
""").format(pysql.Literal(self.rank))
class InterpolationRunner:
@@ -120,11 +127,11 @@ class InterpolationRunner:
@staticmethod
@functools.lru_cache(maxsize=1)
def _index_sql(num_places):
return """ UPDATE location_property_osmline
SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti)
WHERE place_id = v.id
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
return pysql.SQL("""UPDATE location_property_osmline
SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti)
WHERE place_id = v.id
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
def index_places(self, worker, places):
@@ -157,6 +164,6 @@ class PostcodeRunner:
@staticmethod
def index_places(worker, ids):
worker.perform(""" UPDATE location_postcode SET indexed_status = 0
WHERE place_id IN ({})
""".format(','.join((str(i[0]) for i in ids))))
worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
WHERE place_id IN ({})""")
.format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))

View File

@@ -383,9 +383,9 @@ class LegacyICUNameAnalyzer:
if word_tokens:
cur.execute("""INSERT INTO word (word_id, word_token, country_code,
search_name_count)
(SELECT nextval('seq_word'), token, '{}', 0
(SELECT nextval('seq_word'), token, %s, 0
FROM unnest(%s) as token)
""".format(country_code), (list(word_tokens),))
""", (country_code, list(word_tokens)))
def process_place(self, place):

View File

@@ -9,6 +9,7 @@ from pathlib import Path
import psutil
import psycopg2.extras
from psycopg2 import sql as pysql
from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils
@@ -185,7 +186,10 @@ def truncate_data_tables(conn):
conn.commit()
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
('osm_type', 'osm_id', 'class', 'type',
'name', 'admin_level', 'address',
'extratags', 'geometry')))
def load_data(dsn, threads):
@@ -197,12 +201,15 @@ def load_data(dsn, threads):
for imod in range(place_threads):
conn = DBConnection(dsn)
conn.connect()
conn.perform("""INSERT INTO placex ({0})
SELECT {0} FROM place
WHERE osm_id % {1} = {2}
AND NOT (class='place' and (type='houses' or type='postcode'))
AND ST_IsValid(geometry)
""".format(_COPY_COLUMNS, place_threads, imod))
conn.perform(
pysql.SQL("""INSERT INTO placex ({columns})
SELECT {columns} FROM place
WHERE osm_id % {total} = {mod}
AND NOT (class='place' and (type='houses' or type='postcode'))
AND ST_IsValid(geometry)
""").format(columns=_COPY_COLUMNS,
total=pysql.Literal(place_threads),
mod=pysql.Literal(imod)))
sel.register(conn, selectors.EVENT_READ, conn)
# Address interpolations go into another table.

View File

@@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database.
"""
from pathlib import Path
from psycopg2 import sql as pysql
UPDATE_TABLES = [
'address_levels',
'gb_postcode',
@@ -21,11 +23,11 @@ def drop_update_tables(conn):
""" Drop all tables only necessary for updating the database from
OSM replication data.
"""
where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
+ pysql.SQL(' or ').join(parts))
tables = [r[0] for r in cur]
for table in tables:

View File

@@ -58,7 +58,7 @@ def load_address_levels(conn, table, levels):
rank_address SMALLINT)""".format(table))
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
.format(pysql.Identifier(table)), rows)
.format(pysql.Identifier(table)), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))