port replication initialisation to Python

This commit is contained in:
Sarah Hoffmann
2021-01-26 22:45:24 +01:00
parent 5b46fcad8e
commit d78f0ba804
13 changed files with 402 additions and 75 deletions

View File

@@ -0,0 +1,58 @@
"""
Specialised connection and cursor functions.
"""
import logging
import psycopg2
import psycopg2.extensions
import psycopg2.extras
class _Cursor(psycopg2.extras.DictCursor):
""" A cursor returning dict-like objects and providing specialised
execution functions.
"""
def execute(self, query, args=None): # pylint: disable=W0221
""" Query execution that logs the SQL query when debugging is enabled.
"""
logger = logging.getLogger()
logger.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
def scalar(self, sql, args=None):
""" Execute query that returns a single value. The value is returned.
If the query yields more than one row, a ValueError is raised.
"""
self.execute(sql, args)
if self.rowcount != 1:
raise ValueError("Query did not return a single row.")
return self.fetchone()[0]
class _Connection(psycopg2.extensions.connection):
""" A connection that provides the specialised cursor by default and
adds convenience functions for administrating the database.
"""
def cursor(self, cursor_factory=_Cursor, **kwargs):
""" Return a new cursor. By default the specialised cursor is returned.
"""
return super().cursor(cursor_factory=cursor_factory, **kwargs)
def table_exists(self, table):
""" Check that a table with the given name exists in the database.
"""
with self.cursor() as cur:
num = cur.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s""", (table, ))
return num == 1
def connect(dsn):
""" Open a connection to the database using the specialised connection
factory.
"""
return psycopg2.connect(dsn, connection_factory=_Connection)

50
nominatim/db/status.py Normal file
View File

@@ -0,0 +1,50 @@
"""
Access and helper functions for the status table.
"""
import datetime as dt
import logging
import re
from ..tools.exec_utils import get_url
LOG = logging.getLogger()
def compute_database_date(conn):
""" Determine the date of the database from the newest object in the
data base.
"""
# First, find the node with the highest ID in the database
with conn.cursor() as cur:
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
if osmid is None:
LOG.fatal("No data found in the database.")
raise RuntimeError("No data found in the database.")
LOG.info("Using node id %d for timestamp lookup", osmid)
# Get the node from the API to find the timestamp when it was created.
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
data = get_url(node_url)
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
if match is None:
LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
"URL used: %s", node_url)
raise RuntimeError("Bad API data.")
LOG.debug("Found timestamp %s", match[1])
return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
def set_status(conn, date, seq=None, indexed=True):
""" Replace the current status with the given status.
"""
assert date.tzinfo == dt.timezone.utc
with conn.cursor() as cur:
cur.execute("TRUNCATE TABLE import_status")
cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
VALUES (%s, %s, %s)""", (date, seq, indexed))
conn.commit()