bdd: run full import on tests

This uncovered a couple of outdated/wrong tests which have been
fixed, too.
This commit is contained in:
Sarah Hoffmann
2022-02-24 11:35:21 +01:00
parent a9e3329c39
commit f74228830d
10 changed files with 115 additions and 137 deletions

View File

@@ -390,17 +390,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
def add_country_names(self, country_code, names):
""" Add names for the given country to the search index.
""" Add default names for the given country to the search index.
"""
# Make sure any name preprocessing for country names applies.
info = PlaceInfo({'name': names, 'country_code': country_code,
'rank_address': 4, 'class': 'boundary',
'type': 'administrative'})
self._add_country_full_names(country_code,
self.sanitizer.process_names(info)[0])
self.sanitizer.process_names(info)[0],
internal=True)
def _add_country_full_names(self, country_code, names):
def _add_country_full_names(self, country_code, names, internal=False):
""" Add names for the given country from an already sanitized
name list.
"""
@@ -412,21 +413,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
with self.conn.cursor() as cur:
# Get existing names
cur.execute("""SELECT word_token FROM word
WHERE type = 'C' and word = %s""",
cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
FROM word
WHERE type = 'C' and word = %s""",
(country_code, ))
existing_tokens = {t[0] for t in cur}
# Only add those names that are not yet in the list.
new_tokens = word_tokens - existing_tokens
if new_tokens:
cur.execute("""INSERT INTO word (word_token, type, word)
(SELECT token, 'C', %s
FROM unnest(%s) as token)
""", (country_code, list(new_tokens)))
existing_tokens = {True: set(), False: set()} # internal/external names
for word in cur:
existing_tokens[word[1]].add(word[0])
# Delete names that no longer exist.
gone_tokens = existing_tokens - word_tokens
gone_tokens = existing_tokens[internal] - word_tokens
if internal:
gone_tokens.update(existing_tokens[False] & word_tokens)
if gone_tokens:
cur.execute("""DELETE FROM word
USING unnest(%s) as token
@@ -434,6 +432,23 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
and word_token = token""",
(list(gone_tokens), country_code))
# Only add those names that are not yet in the list.
new_tokens = word_tokens - existing_tokens[True]
if not internal:
new_tokens -= existing_tokens[False]
if new_tokens:
if internal:
sql = """INSERT INTO word (word_token, type, word, info)
(SELECT token, 'C', %s, '{"internal": "yes"}'
FROM unnest(%s) as token)
"""
else:
sql = """INSERT INTO word (word_token, type, word)
(SELECT token, 'C', %s
FROM unnest(%s) as token)
"""
cur.execute(sql, (country_code, list(new_tokens)))
def process_place(self, place):
""" Determine tokenizer information about the given place.