Merge pull request #3975 from kad-link/fix/utf8-encoding-clean

Fix: Enforce explicit UTF-8 encoding in file I/O
This commit is contained in:
Sarah Hoffmann
2026-02-10 09:32:06 +01:00
committed by GitHub
22 changed files with 79 additions and 71 deletions

View File

@@ -159,7 +159,7 @@ class _PostcodeCollector:
if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname)
return gzip.open(fname, 'rt')
return gzip.open(fname, 'rt', encoding='utf-8')
return None

View File

@@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int:
copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
FROM STDIN"""
with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy:
with gzip.open(
str(data_file), 'rt', encoding='utf-8') as fd, \
cur.copy(copy_cmd) as copy:
for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
wd_id = int(row['wikidata_id'][1:])
copy.write_row((row['language'],