Merge pull request #2284 from lonvia/cleanup-word-frequency-computation

Rename and simplify function for word pre-computation
This commit is contained in:
Sarah Hoffmann
2021-04-19 18:28:04 +02:00
committed by GitHub
4 changed files with 6 additions and 40 deletions

View File

@@ -29787,7 +29787,7 @@ st 5557484
-- prefill word table
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies

View File

@@ -377,40 +377,26 @@ $$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
RETURNS INTEGER[]
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
RETURNS INTEGER
AS $$
DECLARE
result INTEGER[];
s TEXT;
w INTEGER;
words TEXT[];
i INTEGER;
j INTEGER;
BEGIN
result := '{}'::INTEGER[];
s := make_standard_name(src);
w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
w := getorcreate_word_id(s);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
words := string_to_array(s, ' ');
IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP
IF (words[j] != '') THEN
w = getorcreate_word_id(words[j]);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
w := getorcreate_word_id(words[j]);
END IF;
END LOOP;
END IF;
@@ -421,9 +407,6 @@ BEGIN
s := make_standard_name(words[j]);
IF s != '' THEN
w := getorcreate_word_id(s);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END LOOP;
END IF;
@@ -433,13 +416,10 @@ BEGIN
s := make_standard_name(s);
IF s != '' THEN
w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END IF;
RETURN result;
RETURN 1;
END;
$$
LANGUAGE plpgsql;

View File

@@ -1,14 +0,0 @@
CREATE TABLE word_frequencies AS
(SELECT unnest(make_keywords(v)) as id, sum(count) as count
FROM (select svals(name) as v, count(*)from place group by v) cnt
WHERE v is not null
GROUP BY id);
select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies
update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
-- and drop the temporary frequency table again
drop table word_frequencies;

View File

@@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
temp_db_cursor, threads):
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""".format(func))