From 8b8aa1b4e611a4f2bfce2d64a283e14e84403c05 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 9 Feb 2020 13:57:36 +0100 Subject: [PATCH 1/4] regularly close connection while indexing Postgres sooner or later runs out of memory when the connection is used for too long. --- nominatim/nominatim.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py index 54d9b208..848846ae 100755 --- a/nominatim/nominatim.py +++ b/nominatim/nominatim.py @@ -104,14 +104,22 @@ class DBConnection(object): """ def __init__(self, options): + self.current_query = None + self.current_params = None + + self.conn = None + self.connect() + + def connect(self): + if self.conn is not None: + self.cursor.close() + self.conn.close() + self.conn = make_connection(options, asynchronous=True) self.wait() self.cursor = self.conn.cursor() - self.current_query = None - self.current_params = None - def wait(self): """ Block until any pending operation is done. """ @@ -257,13 +265,25 @@ class Indexer(object): sending a query. """ ready = self.threads + command_stat = 0 while True: for thread in ready: if thread.is_done(): + command_stat += 1 yield thread - ready, _, _ = select.select(self.threads, [], []) + # refresh the connections occasionaly to avoid potential + # memory leaks in Postgresql. + if command_stat > 100000: + for t in self.threads: + while not t.is_done(): + wait_select(t.conn) + t.connect() + command_stat = 0 + ready = self.threads + else: + ready, _, _ = select.select(self.threads, [], []) assert(False, "Unreachable code") From 882f496e0a1093239a26cdbf29c9af5c5fe9977c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 11 Feb 2020 22:00:24 +0100 Subject: [PATCH 2/4] nominatim.py: also catch deadlocks on final wait --- nominatim/nominatim.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py index 848846ae..ac6d23ff 100755 --- a/nominatim/nominatim.py +++ b/nominatim/nominatim.py @@ -123,8 +123,20 @@ class DBConnection(object): def wait(self): """ Block until any pending operation is done. """ - wait_select(self.conn) - self.current_query = None + while True: + try: + wait_select(self.conn) + self.current_query = None + return + except psycopg2.extensions.TransactionRollbackError as e: + if e.pgcode == '40P01': + log.info("Deadlock detected (params = {}), retry." + .format(self.current_params)) + self.cursor.execute(self.current_query, self.current_params) + else: + raise + except psycopg2.errors.DeadlockDetected: + self.cursor.execute(self.current_query, self.current_params) def perform(self, sql, args=None): """ Send SQL query to the server. Returns immediately without @@ -158,6 +170,8 @@ class DBConnection(object): self.cursor.execute(self.current_query, self.current_params) else: raise + except psycopg2.errors.DeadlockDetected: + self.cursor.execute(self.current_query, self.current_params) return False From d1eeaa59a60362cc566498049ad02017ee9a2d17 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 11 Feb 2020 22:08:04 +0100 Subject: [PATCH 3/4] nominatim.py: use async in connect() function The _async parameter name is only supported since psycopg 2.7. However, async is a keyword in Python >= 3.7, so using this gives us a syntax error. Working around this by defining the parameters in a dict and handing that into the connect function. --- nominatim/nominatim.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py index ac6d23ff..14643770 100755 --- a/nominatim/nominatim.py +++ b/nominatim/nominatim.py @@ -35,9 +35,14 @@ import select log = logging.getLogger() def make_connection(options, asynchronous=False): - return psycopg2.connect(dbname=options.dbname, user=options.user, - password=options.password, host=options.host, - port=options.port, async_=asynchronous) + params = {'dbname' : options.dbname, + 'user' : options.user, + 'password' : options.password, + 'host' : options.host, + 'port' : options.port, + 'async' : asynchronous} + + return psycopg2.connect(**params) class RankRunner(object): From 960409c7014d167cd6e0d5820d943a4ca04034cf Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 11 Feb 2020 22:49:03 +0100 Subject: [PATCH 4/4] psycopg 2.6 is now usable on ubuntu 16 --- vagrant/Install-on-Ubuntu-16.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vagrant/Install-on-Ubuntu-16.sh b/vagrant/Install-on-Ubuntu-16.sh index 3539851f..393cd0b1 100755 --- a/vagrant/Install-on-Ubuntu-16.sh +++ b/vagrant/Install-on-Ubuntu-16.sh @@ -31,10 +31,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: postgresql-contrib-9.5 \ apache2 php php-pgsql libapache2-mod-php \ php-intl python3-setuptools python3-dev python3-pip \ - python3-tidylib git - - # python3-psycopg2 apt package is too old (2.6), we want at least 2.7 - pip3 install --user psycopg2 + python3-tidylib python3-psycopg2 git # If you want to run the test suite, you need to install the following # additional packages: