forked from hans/Nominatim
do not save word counts of 1
This is the default setting, which will be assumed when the count is missing.
This commit is contained in:
@@ -121,10 +121,10 @@ class ICUTokenizer(AbstractTokenizer):
|
|||||||
SELECT unnest(nameaddress_vector) as id, count(*)
|
SELECT unnest(nameaddress_vector) as id, count(*)
|
||||||
FROM search_name GROUP BY id)
|
FROM search_name GROUP BY id)
|
||||||
SELECT coalesce(a.id, w.id) as id,
|
SELECT coalesce(a.id, w.id) as id,
|
||||||
(CASE WHEN w.count is null THEN '{}'::JSONB
|
(CASE WHEN w.count is null or w.count <= 1 THEN '{}'::JSONB
|
||||||
ELSE jsonb_build_object('count', w.count) END
|
ELSE jsonb_build_object('count', w.count) END
|
||||||
||
|
||
|
||||||
CASE WHEN a.count is null THEN '{}'::JSONB
|
CASE WHEN a.count is null or a.count <= 1 THEN '{}'::JSONB
|
||||||
ELSE jsonb_build_object('addr_count', a.count) END) as info
|
ELSE jsonb_build_object('addr_count', a.count) END) as info
|
||||||
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
|
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
|
||||||
""")
|
""")
|
||||||
@@ -134,9 +134,10 @@ class ICUTokenizer(AbstractTokenizer):
|
|||||||
drop_tables(conn, 'tmp_word')
|
drop_tables(conn, 'tmp_word')
|
||||||
cur.execute("""CREATE TABLE tmp_word AS
|
cur.execute("""CREATE TABLE tmp_word AS
|
||||||
SELECT word_id, word_token, type, word,
|
SELECT word_id, word_token, type, word,
|
||||||
(CASE WHEN wf.info is null THEN word.info
|
coalesce(word.info, '{}'::jsonb)
|
||||||
ELSE coalesce(word.info, '{}'::jsonb) || wf.info
|
- 'count' - 'addr_count' ||
|
||||||
END) as info
|
coalesce(wf.info, '{}'::jsonb)
|
||||||
|
as info
|
||||||
FROM word LEFT JOIN word_frequencies wf
|
FROM word LEFT JOIN word_frequencies wf
|
||||||
ON word.word_id = wf.id
|
ON word.word_id = wf.id
|
||||||
""")
|
""")
|
||||||
|
|||||||
Reference in New Issue
Block a user