convert word info column to json before copying

This commit is contained in:
Sarah Hoffmann
2021-07-21 11:37:14 +02:00
parent 6ad35aca4a
commit eb6814d74e
3 changed files with 43 additions and 4 deletions

View File

@@ -65,6 +65,7 @@ _SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
ord(u'\t'): u'\\t',
ord(u'\n'): u'\\n'}
class CopyBuffer:
""" Data collector for the copy_from command.
"""

View File

@@ -4,6 +4,7 @@ libICU instead of the PostgreSQL module.
"""
from collections import Counter
import itertools
import json
import logging
import re
from textwrap import dedent
@@ -173,7 +174,7 @@ class LegacyICUTokenizer:
# copy them back into the word table
with CopyBuffer() as copystr:
for k, v in words.items():
copystr.add('w', k, {'count': v})
copystr.add('w', k, json.dumps({'count': v}))
with conn.cursor() as cur:
copystr.copy_out(cur, 'word',
@@ -287,7 +288,7 @@ class LegacyICUNameAnalyzer:
to_delete.append(word)
else:
copystr.add(self.name_processor.get_search_normalized(postcode),
'P', {'postcode': postcode})
'P', json.dumps({'postcode': postcode}))
if to_delete:
cur.execute("""DELETE FROM WORD
@@ -337,8 +338,8 @@ class LegacyICUNameAnalyzer:
term = self.name_processor.get_search_normalized(word)
if term:
copystr.add(term, 'S',
{'word': word, 'class': cls, 'type': typ,
'op': oper if oper in ('in', 'near') else None})
json.dumps({'word': word, 'class': cls, 'type': typ,
'op': oper if oper in ('in', 'near') else None}))
added += 1
copystr.copy_out(cursor, 'word',