mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-12 05:44:06 +00:00
adapt housenumber cleanup to new word table structure
This commit is contained in:
@@ -119,12 +119,12 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
|||||||
if not conn.table_exists('search_name'):
|
if not conn.table_exists('search_name'):
|
||||||
return
|
return
|
||||||
with conn.cursor(name="hnr_counter") as cur:
|
with conn.cursor(name="hnr_counter") as cur:
|
||||||
cur.execute("""SELECT word_id, word_token FROM word
|
cur.execute("""SELECT DISTINCT word_id, coalesce(info->>'lookup', word_token) FROM word
|
||||||
WHERE type = 'H'
|
WHERE type = 'H'
|
||||||
AND NOT EXISTS(SELECT * FROM search_name
|
AND NOT EXISTS(SELECT * FROM search_name
|
||||||
WHERE ARRAY[word.word_id] && name_vector)
|
WHERE ARRAY[word.word_id] && name_vector)
|
||||||
AND (char_length(word_token) > 6
|
AND (char_length(coalesce(word, word_token)) > 6
|
||||||
OR word_token not similar to '\\d+')
|
OR coalesce(word, word_token) not similar to '\\d+')
|
||||||
""")
|
""")
|
||||||
candidates = {token: wid for wid, token in cur}
|
candidates = {token: wid for wid, token in cur}
|
||||||
with conn.cursor(name="hnr_counter") as cur:
|
with conn.cursor(name="hnr_counter") as cur:
|
||||||
@@ -137,6 +137,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
|||||||
for hnr in row[0].split(';'):
|
for hnr in row[0].split(';'):
|
||||||
candidates.pop(hnr, None)
|
candidates.pop(hnr, None)
|
||||||
LOG.info("There are %s outdated housenumbers.", len(candidates))
|
LOG.info("There are %s outdated housenumbers.", len(candidates))
|
||||||
|
LOG.debug("Outdated housenumbers: %s", candidates.keys())
|
||||||
if candidates:
|
if candidates:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""DELETE FROM word WHERE word_id = any(%s)""",
|
cur.execute("""DELETE FROM word WHERE word_id = any(%s)""",
|
||||||
|
|||||||
Reference in New Issue
Block a user