port replication initialisation to Python

This commit is contained in:
Sarah Hoffmann
2021-01-26 22:45:24 +01:00
parent 5b46fcad8e
commit d78f0ba804
13 changed files with 402 additions and 75 deletions

View File

@@ -234,12 +234,29 @@ class UpdateReplication:
@staticmethod
def run(args):
try:
import osmium # pylint: disable=W0611
except ModuleNotFoundError:
LOG.fatal("pyosmium not installed. Replication functions not available.\n"
"To install pyosmium via pip: pip3 install osmium")
return 1
from .tools import replication, refresh
conn = connect(args.config.get_libpq_dsn())
params = ['update.php']
if args.init:
params.append('--init-updates')
if not args.update_functions:
params.append('--no-update-functions')
elif args.check_for_updates:
LOG.warning("Initialising replication updates")
replication.init_replication(conn, args.config.REPLICATION_URL)
if args.update_functions:
LOG.warning("Create functions")
refresh.create_functions(conn, args.config, args.data_dir,
True, False)
conn.close()
return 0
if args.check_for_updates:
params.append('--check-for-updates')
else:
if args.once:

View File

@@ -0,0 +1,58 @@
"""
Specialised connection and cursor functions.
"""
import logging
import psycopg2
import psycopg2.extensions
import psycopg2.extras
class _Cursor(psycopg2.extras.DictCursor):
""" A cursor returning dict-like objects and providing specialised
execution functions.
"""
def execute(self, query, args=None): # pylint: disable=W0221
""" Query execution that logs the SQL query when debugging is enabled.
"""
logger = logging.getLogger()
logger.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
def scalar(self, sql, args=None):
""" Execute query that returns a single value. The value is returned.
If the query yields more than one row, a ValueError is raised.
"""
self.execute(sql, args)
if self.rowcount != 1:
raise ValueError("Query did not return a single row.")
return self.fetchone()[0]
class _Connection(psycopg2.extensions.connection):
""" A connection that provides the specialised cursor by default and
adds convenience functions for administrating the database.
"""
def cursor(self, cursor_factory=_Cursor, **kwargs):
""" Return a new cursor. By default the specialised cursor is returned.
"""
return super().cursor(cursor_factory=cursor_factory, **kwargs)
def table_exists(self, table):
""" Check that a table with the given name exists in the database.
"""
with self.cursor() as cur:
num = cur.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s""", (table, ))
return num == 1
def connect(dsn):
""" Open a connection to the database using the specialised connection
factory.
"""
return psycopg2.connect(dsn, connection_factory=_Connection)

50
nominatim/db/status.py Normal file
View File

@@ -0,0 +1,50 @@
"""
Access and helper functions for the status table.
"""
import datetime as dt
import logging
import re
from ..tools.exec_utils import get_url
LOG = logging.getLogger()
def compute_database_date(conn):
""" Determine the date of the database from the newest object in the
data base.
"""
# First, find the node with the highest ID in the database
with conn.cursor() as cur:
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
if osmid is None:
LOG.fatal("No data found in the database.")
raise RuntimeError("No data found in the database.")
LOG.info("Using node id %d for timestamp lookup", osmid)
# Get the node from the API to find the timestamp when it was created.
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
data = get_url(node_url)
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
if match is None:
LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
"URL used: %s", node_url)
raise RuntimeError("Bad API data.")
LOG.debug("Found timestamp %s", match[1])
return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
def set_status(conn, date, seq=None, indexed=True):
""" Replace the current status with the given status.
"""
assert date.tzinfo == dt.timezone.utc
with conn.cursor() as cur:
cur.execute("TRUNCATE TABLE import_status")
cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
VALUES (%s, %s, %s)""", (date, seq, indexed))
conn.commit()

View File

@@ -3,8 +3,13 @@ Helper functions for executing external programs.
"""
import logging
import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
from ..version import NOMINATIM_VERSION
LOG = logging.getLogger()
def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
""" Run a Nominatim PHP script with the given arguments.
@@ -80,3 +85,16 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
print(result[content_start + 4:].replace('\\n', '\n'))
return 0
def get_url(url):
""" Get the contents from the given URL and return it as a UTF-8 string.
"""
headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION}
try:
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
return response.read().decode('utf-8')
except:
LOG.fatal('Failed to load URL: %s', url)
raise

View File

@@ -0,0 +1,34 @@
"""
Functions for updating a database from a replication source.
"""
import datetime
import logging
from osmium.replication.server import ReplicationServer
from ..db import status
LOG = logging.getLogger()
def init_replication(conn, base_url):
""" Set up replication for the server at the given base URL.
"""
LOG.info("Using replication source: %s", base_url)
date = status.compute_database_date(conn)
# margin of error to make sure we get all data
date -= datetime.timedelta(hours=3)
repl = ReplicationServer(base_url)
seq = repl.timestamp_to_sequence(date)
if seq is None:
LOG.fatal("Cannot reach the configured replication service '%s'.\n"
"Does the URL point to a directory containing OSM update data?",
base_url)
raise RuntimeError("Failed to reach replication service")
status.set_status(conn, date=date, seq=seq)
LOG.warning("Updates intialised at sequence %s (%s)", seq, date)

5
nominatim/version.py Normal file
View File

@@ -0,0 +1,5 @@
"""
Version information for Nominatim.
"""
NOMINATIM_VERSION = "3.6.0"