From c6bd2bb7fba6c6c7e6b730b693d4666002832a70 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 19 Apr 2021 17:20:31 +0200 Subject: [PATCH 1/5] indexer: move runner into separate file --- nominatim/indexer/indexer.py | 123 +++-------------------------------- nominatim/indexer/runners.py | 113 ++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 115 deletions(-) create mode 100644 nominatim/indexer/runners.py diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 4f4de218..fa40334b 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -1,124 +1,17 @@ """ Main work horse for indexing (computing addresses) the database. """ -# pylint: disable=C0111 import logging import select import psycopg2 from nominatim.indexer.progress import ProgressLogger +from nominatim.indexer import runners from nominatim.db.async_connection import DBConnection LOG = logging.getLogger() -class RankRunner: - """ Returns SQL commands for indexing one rank within the placex table. - """ - - def __init__(self, rank): - self.rank = rank - - def name(self): - return "rank {}".format(self.rank) - - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) - - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector""".format(self.rank) - - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) - - -class InterpolationRunner: - """ Returns SQL commands for indexing the address interpolation table - location_property_osmline. - """ - - @staticmethod - def name(): - return "interpolation lines (location_property_osmline)" - - @staticmethod - def sql_count_objects(): - return """SELECT count(*) FROM location_property_osmline - WHERE indexed_status > 0""" - - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_property_osmline - WHERE indexed_status > 0 - ORDER BY geometry_sector""" - - @staticmethod - def sql_index_place(ids): - return """UPDATE location_property_osmline - SET indexed_status = 0 WHERE place_id IN ({}) - """.format(','.join((str(i) for i in ids))) - -class BoundaryRunner: - """ Returns SQL commands for indexing the administrative boundaries - of a certain rank. - """ - - def __init__(self, rank): - self.rank = rank - - def name(self): - return "boundaries rank {}".format(self.rank) - - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative' - """.format(self.rank) - - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level - """.format(self.rank) - - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) - - -class PostcodeRunner: - """ Provides the SQL commands for indexing the location_postcode table. - """ - - @staticmethod - def name(): - return "postcodes (location_postcode)" - - @staticmethod - def sql_count_objects(): - return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' - - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_postcode - WHERE indexed_status > 0 - ORDER BY country_code, postcode""" - - @staticmethod - def sql_index_place(ids): - return """UPDATE location_postcode SET indexed_status = 0 - WHERE place_id IN ({}) - """.format(','.join((str(i) for i in ids))) - def _analyse_db_if(conn, condition): if condition: @@ -190,7 +83,7 @@ class Indexer: try: for rank in range(max(minrank, 4), min(maxrank, 26)): - self.index(BoundaryRunner(rank)) + self.index(runners.BoundaryRunner(rank)) finally: self._close_connections() @@ -209,14 +102,14 @@ class Indexer: try: for rank in range(max(1, minrank), maxrank): - self.index(RankRunner(rank)) + self.index(runners.RankRunner(rank)) if maxrank == 30: - self.index(RankRunner(0)) - self.index(InterpolationRunner(), 20) - self.index(RankRunner(30), 20) + self.index(runners.RankRunner(0)) + self.index(runners.InterpolationRunner(), 20) + self.index(runners.RankRunner(30), 20) else: - self.index(RankRunner(maxrank)) + self.index(runners.RankRunner(maxrank)) finally: self._close_connections() @@ -229,7 +122,7 @@ class Indexer: self._setup_connections() try: - self.index(PostcodeRunner(), 20) + self.index(runners.PostcodeRunner(), 20) finally: self._close_connections() diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py new file mode 100644 index 00000000..3c853cd0 --- /dev/null +++ b/nominatim/indexer/runners.py @@ -0,0 +1,113 @@ +""" +Mix-ins that provide the actual commands for the indexer for various indexing +tasks. +""" +# pylint: disable=C0111 + +class RankRunner: + """ Returns SQL commands for indexing one rank within the placex table. + """ + + def __init__(self, rank): + self.rank = rank + + def name(self): + return "rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """.format(self.rank) + + def sql_get_objects(self): + return """SELECT place_id FROM placex + WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector""".format(self.rank) + + @staticmethod + def sql_index_place(ids): + return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ + .format(','.join((str(i) for i in ids))) + + +class BoundaryRunner: + """ Returns SQL commands for indexing the administrative boundaries + of a certain rank. + """ + + def __init__(self, rank): + self.rank = rank + + def name(self): + return "boundaries rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative' + """.format(self.rank) + + def sql_get_objects(self): + return """SELECT place_id FROM placex + WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level + """.format(self.rank) + + @staticmethod + def sql_index_place(ids): + return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ + .format(','.join((str(i) for i in ids))) + + +class InterpolationRunner: + """ Returns SQL commands for indexing the address interpolation table + location_property_osmline. + """ + + @staticmethod + def name(): + return "interpolation lines (location_property_osmline)" + + @staticmethod + def sql_count_objects(): + return """SELECT count(*) FROM location_property_osmline + WHERE indexed_status > 0""" + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_property_osmline + WHERE indexed_status > 0 + ORDER BY geometry_sector""" + + @staticmethod + def sql_index_place(ids): + return """UPDATE location_property_osmline + SET indexed_status = 0 WHERE place_id IN ({}) + """.format(','.join((str(i) for i in ids))) + + +class PostcodeRunner: + """ Provides the SQL commands for indexing the location_postcode table. + """ + + @staticmethod + def name(): + return "postcodes (location_postcode)" + + @staticmethod + def sql_count_objects(): + return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_postcode + WHERE indexed_status > 0 + ORDER BY country_code, postcode""" + + @staticmethod + def sql_index_place(ids): + return """UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({}) + """.format(','.join((str(i) for i in ids))) From 18705b3f18f61daea3897db11a860997bf4bd014 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 19 Apr 2021 17:34:26 +0200 Subject: [PATCH 2/5] move analyse function into indexinf function --- nominatim/indexer/indexer.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index fa40334b..ebc98038 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -13,12 +13,6 @@ from nominatim.db.async_connection import DBConnection LOG = logging.getLogger() -def _analyse_db_if(conn, condition): - if condition: - with conn.cursor() as cur: - cur.execute('ANALYSE') - - class Indexer: """ Main indexing routine. """ @@ -51,26 +45,31 @@ class Indexer: database will be analysed at the appropriate places to ensure that database statistics are updated. """ - conn = psycopg2.connect(self.dsn) - conn.autocommit = True + with psycopg2.connect(self.dsn) as conn: + conn.autocommit = True + + if analyse: + def _analyse(): + with conn.cursor() as cur: + cur.execute('ANALYSE') + else: + def _analyse(): + pass - try: self.index_by_rank(0, 4) - _analyse_db_if(conn, analyse) + _analyse() self.index_boundaries(0, 30) - _analyse_db_if(conn, analyse) + _analyse() self.index_by_rank(5, 25) - _analyse_db_if(conn, analyse) + _analyse() self.index_by_rank(26, 30) - _analyse_db_if(conn, analyse) + _analyse() self.index_postcodes() - _analyse_db_if(conn, analyse) - finally: - conn.close() + _analyse() def index_boundaries(self, minrank, maxrank): From 6430371d7d033f7b4562a1dda6055f9887534db0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 19 Apr 2021 18:00:28 +0200 Subject: [PATCH 3/5] make index() function private --- nominatim/indexer/indexer.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index ebc98038..7b826d96 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -82,7 +82,7 @@ class Indexer: try: for rank in range(max(minrank, 4), min(maxrank, 26)): - self.index(runners.BoundaryRunner(rank)) + self._index(runners.BoundaryRunner(rank)) finally: self._close_connections() @@ -101,14 +101,14 @@ class Indexer: try: for rank in range(max(1, minrank), maxrank): - self.index(runners.RankRunner(rank)) + self._index(runners.RankRunner(rank)) if maxrank == 30: - self.index(runners.RankRunner(0)) - self.index(runners.InterpolationRunner(), 20) - self.index(runners.RankRunner(30), 20) + self._index(runners.RankRunner(0)) + self._index(runners.InterpolationRunner(), 20) + self._index(runners.RankRunner(30), 20) else: - self.index(runners.RankRunner(maxrank)) + self._index(runners.RankRunner(maxrank)) finally: self._close_connections() @@ -121,7 +121,7 @@ class Indexer: self._setup_connections() try: - self.index(runners.PostcodeRunner(), 20) + self._index(runners.PostcodeRunner(), 20) finally: self._close_connections() @@ -138,26 +138,26 @@ class Indexer: finally: conn.close() - def index(self, obj, batch=1): - """ Index a single rank or table. `obj` describes the SQL to use + def _index(self, runner, batch=1): + """ Index a single rank or table. `runner` describes the SQL to use for indexing. `batch` describes the number of objects that should be processed with a single SQL statement """ - LOG.warning("Starting %s (using batch size %s)", obj.name(), batch) + LOG.warning("Starting %s (using batch size %s)", runner.name(), batch) cur = self.conn.cursor() - cur.execute(obj.sql_count_objects()) + cur.execute(runner.sql_count_objects()) total_tuples = cur.fetchone()[0] LOG.debug("Total number of rows: %i", total_tuples) cur.close() - progress = ProgressLogger(obj.name(), total_tuples) + progress = ProgressLogger(runner.name(), total_tuples) if total_tuples > 0: cur = self.conn.cursor(name='places') - cur.execute(obj.sql_get_objects()) + cur.execute(runner.sql_get_objects()) next_thread = self.find_free_thread() while True: @@ -168,7 +168,7 @@ class Indexer: LOG.debug("Processing places: %s", str(places)) thread = next(next_thread) - thread.perform(obj.sql_index_place(places)) + thread.perform(runner.sql_index_place(places)) progress.add(len(places)) cur.close() From 26a81654a87ffe772da683cc7a1f29e47c00c7eb Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 19 Apr 2021 18:15:09 +0200 Subject: [PATCH 4/5] indexer: make self.conn function-local Also switches to our internal connect function which gives us a cursor with a sclar() function. --- nominatim/indexer/indexer.py | 60 +++++++++++++++--------------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 7b826d96..aa1fb8ef 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -4,11 +4,10 @@ Main work horse for indexing (computing addresses) the database. import logging import select -import psycopg2 - from nominatim.indexer.progress import ProgressLogger from nominatim.indexer import runners from nominatim.db.async_connection import DBConnection +from nominatim.db.connection import connect LOG = logging.getLogger() @@ -20,20 +19,14 @@ class Indexer: def __init__(self, dsn, num_threads): self.dsn = dsn self.num_threads = num_threads - self.conn = None self.threads = [] def _setup_connections(self): - self.conn = psycopg2.connect(self.dsn) self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)] def _close_connections(self): - if self.conn: - self.conn.close() - self.conn = None - for thread in self.threads: thread.close() self.threads = [] @@ -45,7 +38,7 @@ class Indexer: database will be analysed at the appropriate places to ensure that database statistics are updated. """ - with psycopg2.connect(self.dsn) as conn: + with connect(self.dsn) as conn: conn.autocommit = True if analyse: @@ -128,15 +121,11 @@ class Indexer: def update_status_table(self): """ Update the status in the status table to 'indexed'. """ - conn = psycopg2.connect(self.dsn) - - try: + with connect(self.dsn) as conn: with conn.cursor() as cur: cur.execute('UPDATE import_status SET indexed = true') conn.commit() - finally: - conn.close() def _index(self, runner, batch=1): """ Index a single rank or table. `runner` describes the SQL to use @@ -145,36 +134,35 @@ class Indexer: """ LOG.warning("Starting %s (using batch size %s)", runner.name(), batch) - cur = self.conn.cursor() - cur.execute(runner.sql_count_objects()) + with connect(self.dsn) as conn: + with conn.cursor() as cur: + total_tuples = cur.scalar(runner.sql_count_objects()) + LOG.debug("Total number of rows: %i", total_tuples) - total_tuples = cur.fetchone()[0] - LOG.debug("Total number of rows: %i", total_tuples) + conn.commit() - cur.close() + progress = ProgressLogger(runner.name(), total_tuples) - progress = ProgressLogger(runner.name(), total_tuples) + if total_tuples > 0: + with conn.cursor(name='places') as cur: + cur.execute(runner.sql_get_objects()) - if total_tuples > 0: - cur = self.conn.cursor(name='places') - cur.execute(runner.sql_get_objects()) + next_thread = self.find_free_thread() + while True: + places = [p[0] for p in cur.fetchmany(batch)] + if not places: + break - next_thread = self.find_free_thread() - while True: - places = [p[0] for p in cur.fetchmany(batch)] - if not places: - break + LOG.debug("Processing places: %s", str(places)) + thread = next(next_thread) - LOG.debug("Processing places: %s", str(places)) - thread = next(next_thread) + thread.perform(runner.sql_index_place(places)) + progress.add(len(places)) - thread.perform(runner.sql_index_place(places)) - progress.add(len(places)) + conn.commit() - cur.close() - - for thread in self.threads: - thread.wait() + for thread in self.threads: + thread.wait() progress.done() From 50b6d7298cbf061e2b93cf9e152a25212f28d119 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 20 Apr 2021 11:16:12 +0200 Subject: [PATCH 5/5] factor out async connection handling into separate class Also adds a test for reconnecting regularly while indexing. --- nominatim/indexer/indexer.py | 175 ++++++++++++++++++----------------- test/python/test_indexing.py | 32 +++++-- 2 files changed, 115 insertions(+), 92 deletions(-) diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index aa1fb8ef..a064b285 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -11,6 +11,68 @@ from nominatim.db.connection import connect LOG = logging.getLogger() +class WorkerPool: + """ A pool of asynchronous database connections. + + The pool may be used as a context manager. + """ + REOPEN_CONNECTIONS_AFTER = 100000 + + def __init__(self, dsn, pool_size): + self.threads = [DBConnection(dsn) for _ in range(pool_size)] + self.free_workers = self._yield_free_worker() + + + def finish_all(self): + """ Wait for all connection to finish. + """ + for thread in self.threads: + while not thread.is_done(): + thread.wait() + + self.free_workers = self._yield_free_worker() + + def close(self): + """ Close all connections and clear the pool. + """ + for thread in self.threads: + thread.close() + self.threads = [] + self.free_workers = None + + + def next_free_worker(self): + """ Get the next free connection. + """ + return next(self.free_workers) + + + def _yield_free_worker(self): + ready = self.threads + command_stat = 0 + while True: + for thread in ready: + if thread.is_done(): + command_stat += 1 + yield thread + + if command_stat > self.REOPEN_CONNECTIONS_AFTER: + for thread in self.threads: + while not thread.is_done(): + thread.wait() + thread.connect() + ready = self.threads + else: + _, ready, _ = select.select([], self.threads, []) + + + def __enter__(self): + return self + + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + class Indexer: """ Main indexing routine. @@ -19,17 +81,6 @@ class Indexer: def __init__(self, dsn, num_threads): self.dsn = dsn self.num_threads = num_threads - self.threads = [] - - - def _setup_connections(self): - self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)] - - - def _close_connections(self): - for thread in self.threads: - thread.close() - self.threads = [] def index_full(self, analyse=True): @@ -42,27 +93,27 @@ class Indexer: conn.autocommit = True if analyse: - def _analyse(): + def _analyze(): with conn.cursor() as cur: - cur.execute('ANALYSE') + cur.execute('ANALYZE') else: - def _analyse(): + def _analyze(): pass self.index_by_rank(0, 4) - _analyse() + _analyze() self.index_boundaries(0, 30) - _analyse() + _analyze() self.index_by_rank(5, 25) - _analyse() + _analyze() self.index_by_rank(26, 30) - _analyse() + _analyze() self.index_postcodes() - _analyse() + _analyze() def index_boundaries(self, minrank, maxrank): @@ -71,13 +122,8 @@ class Indexer: LOG.warning("Starting indexing boundaries using %s threads", self.num_threads) - self._setup_connections() - - try: - for rank in range(max(minrank, 4), min(maxrank, 26)): - self._index(runners.BoundaryRunner(rank)) - finally: - self._close_connections() + for rank in range(max(minrank, 4), min(maxrank, 26)): + self._index(runners.BoundaryRunner(rank)) def index_by_rank(self, minrank, maxrank): """ Index all entries of placex in the given rank range (inclusive) @@ -90,20 +136,15 @@ class Indexer: LOG.warning("Starting indexing rank (%i to %i) using %i threads", minrank, maxrank, self.num_threads) - self._setup_connections() + for rank in range(max(1, minrank), maxrank): + self._index(runners.RankRunner(rank)) - try: - for rank in range(max(1, minrank), maxrank): - self._index(runners.RankRunner(rank)) - - if maxrank == 30: - self._index(runners.RankRunner(0)) - self._index(runners.InterpolationRunner(), 20) - self._index(runners.RankRunner(30), 20) - else: - self._index(runners.RankRunner(maxrank)) - finally: - self._close_connections() + if maxrank == 30: + self._index(runners.RankRunner(0)) + self._index(runners.InterpolationRunner(), 20) + self._index(runners.RankRunner(30), 20) + else: + self._index(runners.RankRunner(maxrank)) def index_postcodes(self): @@ -111,12 +152,8 @@ class Indexer: """ LOG.warning("Starting indexing postcodes using %s threads", self.num_threads) - self._setup_connections() + self._index(runners.PostcodeRunner(), 20) - try: - self._index(runners.PostcodeRunner(), 20) - finally: - self._close_connections() def update_status_table(self): """ Update the status in the status table to 'indexed'. @@ -147,48 +184,20 @@ class Indexer: with conn.cursor(name='places') as cur: cur.execute(runner.sql_get_objects()) - next_thread = self.find_free_thread() - while True: - places = [p[0] for p in cur.fetchmany(batch)] - if not places: - break + with WorkerPool(self.dsn, self.num_threads) as pool: + while True: + places = [p[0] for p in cur.fetchmany(batch)] + if not places: + break - LOG.debug("Processing places: %s", str(places)) - thread = next(next_thread) + LOG.debug("Processing places: %s", str(places)) + worker = pool.next_free_worker() - thread.perform(runner.sql_index_place(places)) - progress.add(len(places)) + worker.perform(runner.sql_index_place(places)) + progress.add(len(places)) - conn.commit() + pool.finish_all() - for thread in self.threads: - thread.wait() + conn.commit() progress.done() - - def find_free_thread(self): - """ Generator that returns the next connection that is free for - sending a query. - """ - ready = self.threads - command_stat = 0 - - while True: - for thread in ready: - if thread.is_done(): - command_stat += 1 - yield thread - - # refresh the connections occasionaly to avoid potential - # memory leaks in Postgresql. - if command_stat > 100000: - for thread in self.threads: - while not thread.is_done(): - thread.wait() - thread.connect() - command_stat = 0 - ready = self.threads - else: - ready, _, _ = select.select(self.threads, [], []) - - assert False, "Unreachable code" diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py index ee9c6c7e..6692eba6 100644 --- a/test/python/test_indexing.py +++ b/test/python/test_indexing.py @@ -5,7 +5,7 @@ import itertools import psycopg2 import pytest -from nominatim.indexer.indexer import Indexer +from nominatim.indexer import indexer class IndexerTestDB: @@ -111,7 +111,7 @@ def test_index_all_by_rank(test_db, threads): assert 31 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads) idx.index_by_rank(0, 30) assert 0 == test_db.placex_unindexed() @@ -150,7 +150,7 @@ def test_index_partial_without_30(test_db, threads): assert 31 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads) idx.index_by_rank(4, 15) assert 19 == test_db.placex_unindexed() @@ -170,7 +170,7 @@ def test_index_partial_with_30(test_db, threads): assert 31 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads) idx.index_by_rank(28, 30) assert 27 == test_db.placex_unindexed() @@ -191,7 +191,7 @@ def test_index_boundaries(test_db, threads): assert 37 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads) idx.index_boundaries(0, 30) assert 31 == test_db.placex_unindexed() @@ -209,14 +209,15 @@ def test_index_postcodes(test_db, threads): for postcode in range(32000, 33000): test_db.add_postcode('us', postcode) - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads) idx.index_postcodes() assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode WHERE indexed_status != 0""") -def test_index_full(test_db): +@pytest.mark.parametrize("analyse", [True, False]) +def test_index_full(test_db, analyse): for rank in range(4, 10): test_db.add_admin(rank_address=rank, rank_search=rank) for rank in range(31): @@ -225,10 +226,23 @@ def test_index_full(test_db): for postcode in range(1000): test_db.add_postcode('de', postcode) - idx = Indexer('dbname=test_nominatim_python_unittest', 4) - idx.index_full() + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4) + idx.index_full(analyse=analyse) assert 0 == test_db.placex_unindexed() assert 0 == test_db.osmline_unindexed() assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode WHERE indexed_status != 0""") + + +@pytest.mark.parametrize("threads", [1, 15]) +def test_index_reopen_connection(test_db, threads, monkeypatch): + monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15) + + for _ in range(1000): + test_db.add_place(rank_address=30, rank_search=30) + + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads) + idx.index_by_rank(28, 30) + + assert 0 == test_db.placex_unindexed()