make DB helper functions free functions

Also changes the drop function so that it can drop multiple tables at once.
2026-02-26 11:08:13 +00:00 · 2024-07-02 15:15:50 +02:00
parent 71249bd94a
commit 3742fa2929
30 changed files with 347 additions and 364 deletions
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -16,7 +16,8 @@ import logging
 from pathlib import Path
 from textwrap import dedent

-from ..db.connection import connect, Connection, Cursor
+from ..db.connection import connect, Connection, Cursor, server_version_tuple,\
+                            drop_tables, table_exists, execute_scalar
 from ..config import Configuration
 from ..db.utils import CopyBuffer
 from ..db.sql_preprocessor import SQLPreprocessor
@@ -108,7 +109,7 @@ class ICUTokenizer(AbstractTokenizer):
        """ Recompute frequencies for all name words.
        """
        with connect(self.dsn) as conn:
-            if not conn.table_exists('search_name'):
+            if not table_exists(conn, 'search_name'):
                return

            with conn.cursor() as cur:
@@ -117,10 +118,9 @@ class ICUTokenizer(AbstractTokenizer):
                    cur.execute('SET max_parallel_workers_per_gather TO %s',
                                (min(threads, 6),))

-                if conn.server_version_tuple() < (12, 0):
+                if server_version_tuple(conn) < (12, 0):
                    LOG.info('Computing word frequencies')
-                    cur.drop_table('word_frequencies')
-                    cur.drop_table('addressword_frequencies')
+                    drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
                    cur.execute("""CREATE TEMP TABLE word_frequencies AS
                                     SELECT unnest(name_vector) as id, count(*)
                                     FROM search_name GROUP BY id""")
@@ -152,17 +152,16 @@ class ICUTokenizer(AbstractTokenizer):
                                   $$ LANGUAGE plpgsql IMMUTABLE;
                                """)
                    LOG.info('Update word table with recomputed frequencies')
-                    cur.drop_table('tmp_word')
+                    drop_tables(conn, 'tmp_word')
                    cur.execute("""CREATE TABLE tmp_word AS
                                    SELECT word_id, word_token, type, word,
                                           word_freq_update(word_id, info) as info
                                    FROM word
                                """)
-                    cur.drop_table('word_frequencies')
-                    cur.drop_table('addressword_frequencies')
+                    drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
                else:
                    LOG.info('Computing word frequencies')
-                    cur.drop_table('word_frequencies')
+                    drop_tables(conn, 'word_frequencies')
                    cur.execute("""
                      CREATE TEMP TABLE word_frequencies AS
                      WITH word_freq AS MATERIALIZED (
@@ -182,7 +181,7 @@ class ICUTokenizer(AbstractTokenizer):
                    cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
                    cur.execute('ANALYSE word_frequencies')
                    LOG.info('Update word table with recomputed frequencies')
-                    cur.drop_table('tmp_word')
+                    drop_tables(conn, 'tmp_word')
                    cur.execute("""CREATE TABLE tmp_word AS
                                    SELECT word_id, word_token, type, word,
                                           (CASE WHEN wf.info is null THEN word.info
@@ -191,7 +190,7 @@ class ICUTokenizer(AbstractTokenizer):
                                    FROM word LEFT JOIN word_frequencies wf
                                         ON word.word_id = wf.id
                                """)
-                    cur.drop_table('word_frequencies')
+                    drop_tables(conn, 'word_frequencies')

            with conn.cursor() as cur:
                cur.execute('SET max_parallel_workers_per_gather TO 0')
@@ -210,7 +209,7 @@ class ICUTokenizer(AbstractTokenizer):
        """ Remove unused house numbers.
        """
        with connect(self.dsn) as conn:
-            if not conn.table_exists('search_name'):
+            if not table_exists(conn, 'search_name'):
                return
            with conn.cursor(name="hnr_counter") as cur:
                cur.execute("""SELECT DISTINCT word_id, coalesce(info->>'lookup', word_token)
@@ -311,8 +310,7 @@ class ICUTokenizer(AbstractTokenizer):
            frequencies.
        """
        with connect(self.dsn) as conn:
-            with conn.cursor() as cur:
-                cur.drop_table('word')
+            drop_tables(conn, 'word')
            sqlp = SQLPreprocessor(conn, config)
            sqlp.run_string(conn, """
                CREATE TABLE word (
@@ -370,8 +368,8 @@ class ICUTokenizer(AbstractTokenizer):
        """ Rename all tables and indexes used by the tokenizer.
        """
        with connect(self.dsn) as conn:
+            drop_tables(conn, 'word')
            with conn.cursor() as cur:
-                cur.drop_table('word')
                cur.execute(f"ALTER TABLE {old} RENAME TO word")
                for idx in ('word_token', 'word_id'):
                    cur.execute(f"""ALTER INDEX idx_{old}_{idx}
@@ -733,11 +731,10 @@ class ICUNameAnalyzer(AbstractAnalyzer):
            if norm_name:
                result = self._cache.housenumbers.get(norm_name, result)
                if result[0] is None:
-                    with self.conn.cursor() as cur:
-                        hid = cur.scalar("SELECT getorcreate_hnr_id(%s)", (norm_name, ))
+                    hid = execute_scalar(self.conn, "SELECT getorcreate_hnr_id(%s)", (norm_name, ))

-                        result = hid, norm_name
-                        self._cache.housenumbers[norm_name] = result
+                    result = hid, norm_name
+                    self._cache.housenumbers[norm_name] = result
        else:
            # Otherwise use the analyzer to determine the canonical name.
            # Per convention we use the first variant as the 'lookup name', the
@@ -748,11 +745,10 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                if result[0] is None:
                    variants = analyzer.compute_variants(word_id)
                    if variants:
-                        with self.conn.cursor() as cur:
-                            hid = cur.scalar("SELECT create_analyzed_hnr_id(%s, %s)",
+                        hid = execute_scalar(self.conn, "SELECT create_analyzed_hnr_id(%s, %s)",
                                             (word_id, list(variants)))
-                            result = hid, variants[0]
-                            self._cache.housenumbers[word_id] = result
+                        result = hid, variants[0]
+                        self._cache.housenumbers[word_id] = result

        return result

--- a/src/nominatim_db/tokenizer/legacy_tokenizer.py
+++ b/src/nominatim_db/tokenizer/legacy_tokenizer.py
@@ -18,10 +18,10 @@ from textwrap import dedent

 from icu import Transliterator
 import psycopg2
-import psycopg2.extras

 from ..errors import UsageError
-from ..db.connection import connect, Connection
+from ..db.connection import connect, Connection, drop_tables, table_exists,\
+                            execute_scalar, register_hstore
 from ..config import Configuration
 from ..db import properties
 from ..db import utils as db_utils
@@ -179,11 +179,10 @@ class LegacyTokenizer(AbstractTokenizer):
             * Can nominatim.so be accessed by the database user?
             """
        with connect(self.dsn) as conn:
-            with conn.cursor() as cur:
-                try:
-                    out = cur.scalar("SELECT make_standard_name('a')")
-                except psycopg2.Error as err:
-                    return hint.format(error=str(err))
+            try:
+                out = execute_scalar(conn, "SELECT make_standard_name('a')")
+            except psycopg2.Error as err:
+                return hint.format(error=str(err))

        if out != 'a':
            return hint.format(error='Unexpected result for make_standard_name()')
@@ -214,9 +213,9 @@ class LegacyTokenizer(AbstractTokenizer):
        """ Recompute the frequency of full words.
        """
        with connect(self.dsn) as conn:
-            if conn.table_exists('search_name'):
+            if table_exists(conn, 'search_name'):
+                drop_tables(conn, "word_frequencies")
                with conn.cursor() as cur:
-                    cur.drop_table("word_frequencies")
                    LOG.info("Computing word frequencies")
                    cur.execute("""CREATE TEMP TABLE word_frequencies AS
                                     SELECT unnest(name_vector) as id, count(*)
@@ -226,7 +225,7 @@ class LegacyTokenizer(AbstractTokenizer):
                    cur.execute("""UPDATE word SET search_name_count = count
                                   FROM word_frequencies
                                   WHERE word_token like ' %' and word_id = id""")
-                    cur.drop_table("word_frequencies")
+                drop_tables(conn, "word_frequencies")
            conn.commit()


@@ -316,7 +315,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
        self.conn: Optional[Connection] = connect(dsn).connection
        self.conn.autocommit = True
        self.normalizer = normalizer
-        psycopg2.extras.register_hstore(self.conn)
+        register_hstore(self.conn)

        self._cache = _TokenCache(self.conn)

@@ -536,9 +535,8 @@ class _TokenInfo:
    def add_names(self, conn: Connection, names: Mapping[str, str]) -> None:
        """ Add token information for the names of the place.
        """
-        with conn.cursor() as cur:
-            # Create the token IDs for all names.
-            self.data['names'] = cur.scalar("SELECT make_keywords(%s)::text",
+        # Create the token IDs for all names.
+        self.data['names'] = execute_scalar(conn, "SELECT make_keywords(%s)::text",
                                            (names, ))


@@ -576,9 +574,8 @@ class _TokenInfo:
        """ Add addr:street match terms.
        """
        def _get_street(name: str) -> Optional[str]:
-            with conn.cursor() as cur:
-                return cast(Optional[str],
-                            cur.scalar("SELECT word_ids_from_name(%s)::text", (name, )))
+            return cast(Optional[str],
+                        execute_scalar(conn, "SELECT word_ids_from_name(%s)::text", (name, )))

        tokens = self.cache.streets.get(street, _get_street)
        self.data['street'] = tokens or '{}'