simplify token precomputation

Rename function to reflect that it is only used for precomputation.
The token IDs are not really needed, so don't bother to compute
the array of tokens.
This commit is contained in:
Sarah Hoffmann
2021-04-19 16:54:22 +02:00
parent d68b02d36a
commit b88b952f56
3 changed files with 6 additions and 26 deletions

View File

@@ -29787,7 +29787,7 @@ st 5557484
-- prefill word table -- prefill word table
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies -- copy the word frequencies

View File

@@ -377,40 +377,26 @@ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION make_keywords(src TEXT) CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
RETURNS INTEGER[] RETURNS INTEGER
AS $$ AS $$
DECLARE DECLARE
result INTEGER[];
s TEXT; s TEXT;
w INTEGER; w INTEGER;
words TEXT[]; words TEXT[];
i INTEGER; i INTEGER;
j INTEGER; j INTEGER;
BEGIN BEGIN
result := '{}'::INTEGER[];
s := make_standard_name(src); s := make_standard_name(src);
w := getorcreate_name_id(s, src); w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
w := getorcreate_word_id(s); w := getorcreate_word_id(s);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
words := string_to_array(s, ' '); words := string_to_array(s, ' ');
IF array_upper(words, 1) IS NOT NULL THEN IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP FOR j IN 1..array_upper(words, 1) LOOP
IF (words[j] != '') THEN IF (words[j] != '') THEN
w = getorcreate_word_id(words[j]); w := getorcreate_word_id(words[j]);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF; END IF;
END LOOP; END LOOP;
END IF; END IF;
@@ -421,9 +407,6 @@ BEGIN
s := make_standard_name(words[j]); s := make_standard_name(words[j]);
IF s != '' THEN IF s != '' THEN
w := getorcreate_word_id(s); w := getorcreate_word_id(s);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF; END IF;
END LOOP; END LOOP;
END IF; END IF;
@@ -433,13 +416,10 @@ BEGIN
s := make_standard_name(s); s := make_standard_name(s);
IF s != '' THEN IF s != '' THEN
w := getorcreate_name_id(s, src); w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF; END IF;
END IF; END IF;
RETURN result; RETURN 1;
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;

View File

@@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
@pytest.mark.parametrize("threads", (1, 5)) @pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table, def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
temp_db_cursor, threads): temp_db_cursor, threads):
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'): for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT) temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""".format(func)) """.format(func))