forked from hans/Nominatim
Merge pull request #2284 from lonvia/cleanup-word-frequency-computation
Rename and simplify function for word pre-computation
This commit is contained in:
@@ -29787,7 +29787,7 @@ st 5557484
|
|||||||
|
|
||||||
-- prefill word table
|
-- prefill word table
|
||||||
|
|
||||||
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
|
select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
|
||||||
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||||
|
|
||||||
-- copy the word frequencies
|
-- copy the word frequencies
|
||||||
|
|||||||
@@ -377,40 +377,26 @@ $$
|
|||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
|
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
|
||||||
RETURNS INTEGER[]
|
RETURNS INTEGER
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
result INTEGER[];
|
|
||||||
s TEXT;
|
s TEXT;
|
||||||
w INTEGER;
|
w INTEGER;
|
||||||
words TEXT[];
|
words TEXT[];
|
||||||
i INTEGER;
|
i INTEGER;
|
||||||
j INTEGER;
|
j INTEGER;
|
||||||
BEGIN
|
BEGIN
|
||||||
result := '{}'::INTEGER[];
|
|
||||||
|
|
||||||
s := make_standard_name(src);
|
s := make_standard_name(src);
|
||||||
w := getorcreate_name_id(s, src);
|
w := getorcreate_name_id(s, src);
|
||||||
|
|
||||||
IF NOT (ARRAY[w] <@ result) THEN
|
|
||||||
result := result || w;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
w := getorcreate_word_id(s);
|
w := getorcreate_word_id(s);
|
||||||
|
|
||||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
|
||||||
result := result || w;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
words := string_to_array(s, ' ');
|
words := string_to_array(s, ' ');
|
||||||
IF array_upper(words, 1) IS NOT NULL THEN
|
IF array_upper(words, 1) IS NOT NULL THEN
|
||||||
FOR j IN 1..array_upper(words, 1) LOOP
|
FOR j IN 1..array_upper(words, 1) LOOP
|
||||||
IF (words[j] != '') THEN
|
IF (words[j] != '') THEN
|
||||||
w = getorcreate_word_id(words[j]);
|
w := getorcreate_word_id(words[j]);
|
||||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
|
||||||
result := result || w;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
END IF;
|
||||||
END LOOP;
|
END LOOP;
|
||||||
END IF;
|
END IF;
|
||||||
@@ -421,9 +407,6 @@ BEGIN
|
|||||||
s := make_standard_name(words[j]);
|
s := make_standard_name(words[j]);
|
||||||
IF s != '' THEN
|
IF s != '' THEN
|
||||||
w := getorcreate_word_id(s);
|
w := getorcreate_word_id(s);
|
||||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
|
||||||
result := result || w;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
END IF;
|
||||||
END LOOP;
|
END LOOP;
|
||||||
END IF;
|
END IF;
|
||||||
@@ -433,13 +416,10 @@ BEGIN
|
|||||||
s := make_standard_name(s);
|
s := make_standard_name(s);
|
||||||
IF s != '' THEN
|
IF s != '' THEN
|
||||||
w := getorcreate_name_id(s, src);
|
w := getorcreate_name_id(s, src);
|
||||||
IF NOT (ARRAY[w] <@ result) THEN
|
|
||||||
result := result || w;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
END IF;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
RETURN result;
|
RETURN 1;
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
CREATE TABLE word_frequencies AS
|
|
||||||
(SELECT unnest(make_keywords(v)) as id, sum(count) as count
|
|
||||||
FROM (select svals(name) as v, count(*)from place group by v) cnt
|
|
||||||
WHERE v is not null
|
|
||||||
GROUP BY id);
|
|
||||||
|
|
||||||
select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
|
|
||||||
select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
|
||||||
|
|
||||||
-- copy the word frequencies
|
|
||||||
update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
|
|
||||||
|
|
||||||
-- and drop the temporary frequency table again
|
|
||||||
drop table word_frequencies;
|
|
||||||
@@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
|
|||||||
@pytest.mark.parametrize("threads", (1, 5))
|
@pytest.mark.parametrize("threads", (1, 5))
|
||||||
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
|
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
|
||||||
temp_db_cursor, threads):
|
temp_db_cursor, threads):
|
||||||
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
|
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
|
||||||
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
|
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
|
||||||
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
|
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
|
||||||
""".format(func))
|
""".format(func))
|
||||||
|
|||||||
Reference in New Issue
Block a user