convert word info column to json before copying

This commit is contained in:
Sarah Hoffmann
2021-07-21 11:37:14 +02:00
parent 6ad35aca4a
commit eb6814d74e
3 changed files with 43 additions and 4 deletions

View File

@@ -65,6 +65,7 @@ _SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
ord(u'\t'): u'\\t', ord(u'\t'): u'\\t',
ord(u'\n'): u'\\n'} ord(u'\n'): u'\\n'}
class CopyBuffer: class CopyBuffer:
""" Data collector for the copy_from command. """ Data collector for the copy_from command.
""" """

View File

@@ -4,6 +4,7 @@ libICU instead of the PostgreSQL module.
""" """
from collections import Counter from collections import Counter
import itertools import itertools
import json
import logging import logging
import re import re
from textwrap import dedent from textwrap import dedent
@@ -173,7 +174,7 @@ class LegacyICUTokenizer:
# copy them back into the word table # copy them back into the word table
with CopyBuffer() as copystr: with CopyBuffer() as copystr:
for k, v in words.items(): for k, v in words.items():
copystr.add('w', k, {'count': v}) copystr.add('w', k, json.dumps({'count': v}))
with conn.cursor() as cur: with conn.cursor() as cur:
copystr.copy_out(cur, 'word', copystr.copy_out(cur, 'word',
@@ -287,7 +288,7 @@ class LegacyICUNameAnalyzer:
to_delete.append(word) to_delete.append(word)
else: else:
copystr.add(self.name_processor.get_search_normalized(postcode), copystr.add(self.name_processor.get_search_normalized(postcode),
'P', {'postcode': postcode}) 'P', json.dumps({'postcode': postcode}))
if to_delete: if to_delete:
cur.execute("""DELETE FROM WORD cur.execute("""DELETE FROM WORD
@@ -337,8 +338,8 @@ class LegacyICUNameAnalyzer:
term = self.name_processor.get_search_normalized(word) term = self.name_processor.get_search_normalized(word)
if term: if term:
copystr.add(term, 'S', copystr.add(term, 'S',
{'word': word, 'class': cls, 'type': typ, json.dumps({'word': word, 'class': cls, 'type': typ,
'op': oper if oper in ('in', 'near') else None}) 'op': oper if oper in ('in', 'near') else None}))
added += 1 added += 1
copystr.copy_out(cursor, 'word', copystr.copy_out(cursor, 'word',

View File

@@ -1,6 +1,8 @@
""" """
Tests for DB utility functions in db.utils Tests for DB utility functions in db.utils
""" """
import json
import pytest import pytest
import nominatim.db.utils as db_utils import nominatim.db.utils as db_utils
@@ -115,3 +117,38 @@ class TestCopyBuffer:
class TestCopyBufferJson:
TABLE_NAME = 'copytable'
@pytest.fixture(autouse=True)
def setup_test_table(self, table_factory):
table_factory(self.TABLE_NAME, 'colA INT, colB JSONB')
def table_rows(self, cursor):
cursor.execute('SELECT * FROM ' + self.TABLE_NAME)
results = {k: v for k,v in cursor}
assert len(results) == cursor.rowcount
return results
def test_json_object(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add(1, json.dumps({'test': 'value', 'number': 1}))
buf.copy_out(temp_db_cursor, self.TABLE_NAME)
assert self.table_rows(temp_db_cursor) == \
{1: {'test': 'value', 'number': 1}}
def test_json_object_special_chras(self, temp_db_cursor):
with db_utils.CopyBuffer() as buf:
buf.add(1, json.dumps({'te\tst': 'va\nlue', 'nu"mber': None}))
buf.copy_out(temp_db_cursor, self.TABLE_NAME)
assert self.table_rows(temp_db_cursor) == \
{1: {'te\tst': 'va\nlue', 'nu"mber': None}}