forked from hans/Nominatim
Merge pull request #2284 from lonvia/cleanup-word-frequency-computation
Rename and simplify function for word pre-computation
This commit is contained in:
@@ -29787,7 +29787,7 @@ st 5557484
|
||||
|
||||
-- prefill word table
|
||||
|
||||
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
|
||||
select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
|
||||
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||
|
||||
-- copy the word frequencies
|
||||
|
||||
@@ -377,40 +377,26 @@ $$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
result INTEGER[];
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
i INTEGER;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
result := '{}'::INTEGER[];
|
||||
|
||||
s := make_standard_name(src);
|
||||
w := getorcreate_name_id(s, src);
|
||||
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w = getorcreate_word_id(words[j]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
w := getorcreate_word_id(words[j]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
@@ -421,9 +407,6 @@ BEGIN
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
@@ -433,13 +416,10 @@ BEGIN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, src);
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN result;
|
||||
RETURN 1;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
CREATE TABLE word_frequencies AS
|
||||
(SELECT unnest(make_keywords(v)) as id, sum(count) as count
|
||||
FROM (select svals(name) as v, count(*)from place group by v) cnt
|
||||
WHERE v is not null
|
||||
GROUP BY id);
|
||||
|
||||
select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
|
||||
select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||
|
||||
-- copy the word frequencies
|
||||
update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
|
||||
|
||||
-- and drop the temporary frequency table again
|
||||
drop table word_frequencies;
|
||||
@@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
|
||||
temp_db_cursor, threads):
|
||||
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
|
||||
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
|
||||
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
|
||||
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
|
||||
""".format(func))
|
||||
|
||||
Reference in New Issue
Block a user