port code to psycopg3

2024-07-05 10:43:10 +02:00
parent 3742fa2929
commit 9659afbade
57 changed files with 800 additions and 1330 deletions
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -11,15 +11,16 @@ libICU instead of the PostgreSQL module.
 from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
                   Dict, Set, Iterable
 import itertools
-import json
 import logging
 from pathlib import Path
 from textwrap import dedent

+from psycopg.types.json import Jsonb
+from psycopg import sql as pysql
+
 from ..db.connection import connect, Connection, Cursor, server_version_tuple,\
                            drop_tables, table_exists, execute_scalar
 from ..config import Configuration
-from ..db.utils import CopyBuffer
 from ..db.sql_preprocessor import SQLPreprocessor
 from ..data.place_info import PlaceInfo
 from ..data.place_name import PlaceName
@@ -115,8 +116,8 @@ class ICUTokenizer(AbstractTokenizer):
            with conn.cursor() as cur:
                cur.execute('ANALYSE search_name')
                if threads > 1:
-                    cur.execute('SET max_parallel_workers_per_gather TO %s',
-                                (min(threads, 6),))
+                    cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
+                                     .format(pysql.Literal(min(threads, 6),)))

                if server_version_tuple(conn) < (12, 0):
                    LOG.info('Computing word frequencies')
@@ -391,7 +392,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):

    def __init__(self, dsn: str, sanitizer: PlaceSanitizer,
                 token_analysis: ICUTokenAnalysis) -> None:
-        self.conn: Optional[Connection] = connect(dsn).connection
+        self.conn: Optional[Connection] = connect(dsn)
        self.conn.autocommit = True
        self.sanitizer = sanitizer
        self.token_analysis = token_analysis
@@ -533,9 +534,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):

        if terms:
            with self.conn.cursor() as cur:
-                cur.execute_values("""SELECT create_postcode_word(pc, var)
-                                      FROM (VALUES %s) AS v(pc, var)""",
-                                   terms)
+                cur.executemany("""SELECT create_postcode_word(%s, %s)""", terms)



@@ -578,18 +577,15 @@ class ICUNameAnalyzer(AbstractAnalyzer):
        to_add = new_phrases - existing_phrases

        added = 0
-        with CopyBuffer() as copystr:
+        with cursor.copy('COPY word(word_token, type, word, info) FROM STDIN') as copy:
            for word, cls, typ, oper in to_add:
                term = self._search_normalized(word)
                if term:
-                    copystr.add(term, 'S', word,
-                                json.dumps({'class': cls, 'type': typ,
-                                            'op': oper if oper in ('in', 'near') else None}))
+                    copy.write_row((term, 'S', word,
+                                    Jsonb({'class': cls, 'type': typ,
+                                           'op': oper if oper in ('in', 'near') else None})))
                    added += 1

-            copystr.copy_out(cursor, 'word',
-                             columns=['word_token', 'type', 'word', 'info'])
-
        return added


@@ -602,11 +598,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
        to_delete = existing_phrases - new_phrases

        if to_delete:
-            cursor.execute_values(
-                """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
-                    WHERE type = 'S' and word = name
-                          and info->>'class' = in_class and info->>'type' = in_type
-                          and ((op = '-' and info->>'op' is null) or op = info->>'op')
+            cursor.executemany(
+                """ DELETE FROM word
+                      WHERE type = 'S' and word = %s
+                            and info->>'class' = %s and info->>'type' = %s
+                            and %s = coalesce(info->>'op', '-')
                """, to_delete)

        return len(to_delete)
@@ -653,7 +649,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                gone_tokens.update(existing_tokens[False] & word_tokens)
            if gone_tokens:
                cur.execute("""DELETE FROM word
-                               USING unnest(%s) as token
+                               USING unnest(%s::text[]) as token
                               WHERE type = 'C' and word = %s
                                     and word_token = token""",
                            (list(gone_tokens), country_code))
@@ -666,12 +662,12 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                if internal:
                    sql = """INSERT INTO word (word_token, type, word, info)
                               (SELECT token, 'C', %s, '{"internal": "yes"}'
-                                  FROM unnest(%s) as token)
+                                  FROM unnest(%s::text[]) as token)
                           """
                else:
                    sql = """INSERT INTO word (word_token, type, word)
                                   (SELECT token, 'C', %s
-                                    FROM unnest(%s) as token)
+                                    FROM unnest(%s::text[]) as token)
                          """
                cur.execute(sql, (country_code, list(new_tokens)))

--- a/src/nominatim_db/tokenizer/legacy_tokenizer.py
+++ b/src/nominatim_db/tokenizer/legacy_tokenizer.py
@@ -17,7 +17,8 @@ import shutil
 from textwrap import dedent

 from icu import Transliterator
-import psycopg2
+import psycopg
+from psycopg import sql as pysql

 from ..errors import UsageError
 from ..db.connection import connect, Connection, drop_tables, table_exists,\
@@ -78,12 +79,12 @@ def _check_module(module_dir: str, conn: Connection) -> None:
    """
    with conn.cursor() as cur:
        try:
-            cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
-                           RETURNS text AS %s, 'transliteration'
-                           LANGUAGE c IMMUTABLE STRICT;
-                           DROP FUNCTION nominatim_test_import_func(text)
-                        """, (f'{module_dir}/nominatim.so', ))
-        except psycopg2.DatabaseError as err:
+            cur.execute(pysql.SQL("""CREATE FUNCTION nominatim_test_import_func(text)
+                                     RETURNS text AS {}, 'transliteration'
+                                     LANGUAGE c IMMUTABLE STRICT;
+                                     DROP FUNCTION nominatim_test_import_func(text)
+                                 """).format(pysql.Literal(f'{module_dir}/nominatim.so')))
+        except psycopg.DatabaseError as err:
            LOG.fatal("Error accessing database module: %s", err)
            raise UsageError("Database module cannot be accessed.") from err

@@ -181,7 +182,7 @@ class LegacyTokenizer(AbstractTokenizer):
        with connect(self.dsn) as conn:
            try:
                out = execute_scalar(conn, "SELECT make_standard_name('a')")
-            except psycopg2.Error as err:
+            except psycopg.Error as err:
                return hint.format(error=str(err))

        if out != 'a':
@@ -312,7 +313,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
    """

    def __init__(self, dsn: str, normalizer: Any):
-        self.conn: Optional[Connection] = connect(dsn).connection
+        self.conn: Optional[Connection] = connect(dsn)
        self.conn.autocommit = True
        self.normalizer = normalizer
        register_hstore(self.conn)
@@ -405,7 +406,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
                            """, (to_delete, ))
            if to_add:
                cur.execute("""SELECT count(create_postcode_id(pc))
-                               FROM unnest(%s) as pc
+                               FROM unnest(%s::text[]) as pc
                            """, (to_add, ))


@@ -422,7 +423,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
        with self.conn.cursor() as cur:
            # Get the old phrases.
            existing_phrases = set()
-            cur.execute("""SELECT word, class, type, operator FROM word
+            cur.execute("""SELECT word, class as cls, type, operator FROM word
                           WHERE class != 'place'
                                 OR (type != 'house' AND type != 'postcode')""")
            for label, cls, typ, oper in cur:
@@ -432,18 +433,19 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
            to_delete = existing_phrases - norm_phrases

            if to_add:
-                cur.execute_values(
+                cur.executemany(
                    """ INSERT INTO word (word_id, word_token, word, class, type,
                                          search_name_count, operator)
                        (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
                                class, type, 0,
                                CASE WHEN op in ('in', 'near') THEN op ELSE null END
-                           FROM (VALUES %s) as v(name, class, type, op))""",
+                           FROM (VALUES (%s, %s, %s, %s)) as v(name, class, type, op))""",
                    to_add)

            if to_delete and should_replace:
-                cur.execute_values(
-                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                cur.executemany(
+                    """ DELETE FROM word
+                          USING (VALUES (%s, %s, %s, %s)) as v(name, in_class, in_type, op)
                        WHERE word = name and class = in_class and type = in_type
                              and ((op = '-' and operator is null) or op = operator)""",
                    to_delete)
@@ -462,7 +464,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
                """INSERT INTO word (word_id, word_token, country_code)
                   (SELECT nextval('seq_word'), lookup_token, %s
                      FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token
-                            FROM unnest(%s)n) y
+                            FROM unnest(%s::TEXT[])n) y
                      WHERE NOT EXISTS(SELECT * FROM word
                                       WHERE word_token = lookup_token and country_code = %s))
                """, (country_code, list(names.values()), country_code))