adapt unit test for new word table

Requires a second wrapper class for the word table with the new
layout. This class is interface-compatible, so that later when
the ICU tokenizer becomes the default, all tests that depend on
behaviour of the default tokenizer can be switched to the other
wrapper.
This commit is contained in:
Sarah Hoffmann
2021-07-22 17:24:43 +02:00
parent eb6814d74e
commit e42878eeda
7 changed files with 225 additions and 125 deletions

View File

@@ -1,5 +1,5 @@
DROP TABLE IF EXISTS word;
CREATE TABLE word_icu (
CREATE TABLE word (
word_id INTEGER,
word_token text NOT NULL,
type text NOT NULL,

View File

@@ -102,8 +102,8 @@ BEGIN
IF full_token IS NULL THEN
full_token := nextval('seq_word');
INSERT INTO word (word_id, word_token, info)
SELECT full_token, lookup_term,
INSERT INTO word (word_id, word_token, type, info)
SELECT full_token, lookup_term, 'W',
json_build_object('word', norm_term, 'count', 0)
FROM unnest(lookup_terms) as lookup_term;
END IF;
@@ -123,8 +123,8 @@ BEGIN
IF term_id IS NULL THEN
term_id := nextval('seq_word');
term_count := 0;
INSERT INTO word (word_id, word_token, info)
VALUES (term_id, term, json_build_object('count', term_count));
INSERT INTO word (word_id, word_token, type, info)
VALUES (term_id, term, 'w', json_build_object('count', term_count));
END IF;
IF term_count < {{ max_word_freq }} THEN