Merge pull request #3975 from kad-link/fix/utf8-encoding-clean

Fix: Enforce explicit UTF-8 encoding in file I/O
This commit is contained in:
Sarah Hoffmann
2026-02-10 09:32:06 +01:00
committed by GitHub
22 changed files with 79 additions and 71 deletions

View File

@@ -13,7 +13,8 @@ for infile in VAGRANT_PATH.glob('Install-on-*.sh'):
outfile = f"admin/{infile.stem}.md"
title = infile.stem.replace('-', ' ')
with mkdocs_gen_files.open(outfile, "w") as outfd, infile.open() as infd:
with mkdocs_gen_files.open(outfile, "w", encoding='utf-8') as outfd, \
infile.open(encoding='utf-8') as infd:
print("#", title, file=outfd)
has_empty = False
for line in infd:

View File

@@ -159,7 +159,7 @@ class _PostcodeCollector:
if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname)
return gzip.open(fname, 'rt')
return gzip.open(fname, 'rt', encoding='utf-8')
return None

View File

@@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int:
copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
FROM STDIN"""
with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy:
with gzip.open(
str(data_file), 'rt', encoding='utf-8') as fd, \
cur.copy(copy_cmd) as copy:
for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
wd_id = int(row['wikidata_id'][1:])
copy.write_row((row['language'],

View File

@@ -43,7 +43,7 @@ def opl_writer(tmp_path, node_grid):
def _write(data):
fname = tmp_path / f"test_osm_{nr[0]}.opl"
nr[0] += 1
with fname.open('wt') as fd:
with fname.open('wt', encoding='utf-8') as fd:
for line in data.split('\n'):
if line.startswith('n') and ' x' not in line:
coord = node_grid.get(line[1:].split(' ')[0]) \
@@ -59,7 +59,7 @@ def opl_writer(tmp_path, node_grid):
@given('the lua style file', target_fixture='osm2pgsql_options')
def set_lua_style_file(osm2pgsql_options, docstring, tmp_path):
style = tmp_path / 'custom.lua'
style.write_text(docstring)
style.write_text(docstring, encoding='utf-8')
osm2pgsql_options['osm2pgsql_style'] = str(style)
return osm2pgsql_options

View File

@@ -58,7 +58,7 @@ gb:
pattern: "(l?ld[A-Z0-9]?) ?(dll)"
output: \1 \2
""")
""", encoding='utf-8')
return project_env

View File

@@ -48,7 +48,7 @@ def test_no_project_dir(make_config):
@pytest.mark.parametrize("val", ('apache', '"apache"'))
def test_prefer_project_setting_over_default(make_config, val, tmp_path):
envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val))
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val), encoding='utf-8')
config = make_config(tmp_path)
@@ -57,7 +57,7 @@ def test_prefer_project_setting_over_default(make_config, val, tmp_path):
def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
@@ -68,7 +68,7 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa
def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '')
@@ -232,7 +232,7 @@ def test_get_import_style_intern(make_config, src_dir, monkeypatch):
def test_get_import_style_extern_relative(make_config_path, monkeypatch):
config = make_config_path()
(config.project_dir / 'custom.style').write_text('x')
(config.project_dir / 'custom.style').write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style')
@@ -243,7 +243,7 @@ def test_get_import_style_extern_absolute(make_config, tmp_path, monkeypatch):
config = make_config()
cfgfile = tmp_path / 'test.style'
cfgfile.write_text('x')
cfgfile.write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile))
@@ -254,10 +254,10 @@ def test_load_subconf_from_project_dir(make_config_path):
config = make_config_path()
testfile = config.project_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n')
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: miau\ncat: muh\n')
testfile.write_text('cow: miau\ncat: muh\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -268,7 +268,7 @@ def test_load_subconf_from_settings_dir(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n')
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -280,7 +280,7 @@ def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n')
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -291,8 +291,8 @@ def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -303,7 +303,7 @@ def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -314,8 +314,8 @@ def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -326,7 +326,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -335,7 +335,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
def test_load_subconf_json(make_config_path):
config = make_config_path()
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}')
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}', encoding='utf-8')
rules = config.load_sub_configuration('test.json')
@@ -352,7 +352,7 @@ def test_load_subconf_not_found(make_config_path):
def test_load_subconf_env_unknown_format(make_config_path):
config = make_config_path()
(config.project_dir / 'test.xml').write_text('<html></html>')
(config.project_dir / 'test.xml').write_text('<html></html>', encoding='utf-8')
with pytest.raises(UsageError, match='unknown format'):
config.load_sub_configuration('test.xml')
@@ -362,8 +362,8 @@ def test_load_subconf_include_absolute(make_config_path, tmp_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n')
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n', encoding='utf-8')
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -375,8 +375,8 @@ def test_load_subconf_include_relative(make_config_path, tmp_path, location):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n')
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -387,8 +387,8 @@ def test_load_subconf_include_bad_format(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n')
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n')
testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
with pytest.raises(UsageError, match='Cannot handle config file format.'):
config.load_sub_configuration('test.yaml')
@@ -398,7 +398,7 @@ def test_load_subconf_include_not_found(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n')
testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml')
@@ -408,9 +408,9 @@ def test_load_subconf_include_recursive(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n')
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n')
(config.config_dir / 'more.yaml').write_text('- the end\n')
testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n', encoding='utf-8')
(config.config_dir / 'more.yaml').write_text('- the end\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')

View File

@@ -41,7 +41,7 @@ def test_load_default_module_with_hyphen(test_config):
def test_load_plugin_module(test_config, tmp_path):
(tmp_path / 'project' / 'testpath').mkdir()
(tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'")
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -49,7 +49,7 @@ def test_load_plugin_module(test_config, tmp_path):
# also test reloading module
(tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'hjothjorhj'")
.write_text("def my_test_function():\n return 'hjothjorhj'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -61,9 +61,9 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
pythonpath = tmp_path / 'priv-python'
pythonpath.mkdir()
(pythonpath / MODULE_NAME).mkdir()
(pythonpath / MODULE_NAME / '__init__.py').write_text('')
(pythonpath / MODULE_NAME / '__init__.py').write_text('', encoding='utf-8')
(pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'")
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
monkeypatch.syspath_prepend(pythonpath)
@@ -73,7 +73,7 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
# also test reloading module
(pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'dfigjreigj'")
.write_text("def my_test_function():\n return 'dfigjreigj'", encoding='utf-8')
module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')

View File

@@ -22,7 +22,8 @@ def loaded_country(def_config):
def env_with_country_config(project_env):
def _mk_config(cfg):
(project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg))
(project_env.project_dir / 'country_settings.yaml').write_text(
dedent(cfg), encoding='utf-8')
return project_env

View File

@@ -22,7 +22,7 @@ def sql_factory(tmp_path):
BEGIN
{}
END;
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body))
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body), encoding='utf-8')
return 'test.sql'
return _mk_sql
@@ -63,7 +63,7 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp
async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor):
(tmp_path / 'test.sql').write_text("""
CREATE TABLE foo (a TEXT);
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);")
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);", encoding='utf-8')
await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4)

View File

@@ -15,7 +15,8 @@ from nominatim_db.errors import UsageError
def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
tmpfile.write_text(
'CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile)
@@ -29,7 +30,7 @@ def test_execute_file_bad_file(dsn, tmp_path):
def test_execute_file_bad_sql(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
with pytest.raises(UsageError):
db_utils.execute_file(dsn, tmpfile)
@@ -37,14 +38,14 @@ def test_execute_file_bad_sql(dsn, tmp_path):
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('INSERT INTO test VALUES(4)')
tmpfile.write_text('INSERT INTO test VALUES(4)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
@@ -53,7 +54,7 @@ def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT)')
tmpfile.write_text('CREATE TABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')

View File

@@ -31,7 +31,7 @@ def test_config(project_env, tmp_path):
sqldir = tmp_path / 'sql'
sqldir.mkdir()
(sqldir / 'tokenizer').mkdir()
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'")
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'", encoding='utf-8')
project_env.lib_dir.sql = sqldir
@@ -58,7 +58,7 @@ def db_prop(temp_db_conn):
def analyzer(tokenizer_factory, test_config, monkeypatch,
temp_db_with_extensions, tmp_path):
sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql'
sql.write_text("SELECT 'a';")
sql.write_text("SELECT 'a';", encoding='utf-8')
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
tok = tokenizer_factory()
@@ -80,7 +80,8 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
if with_postcode:
cfgstr['token-analysis'].append({'id': '@postcode',
'analyzer': 'postcodes'})
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr))
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(
yaml.dump(cfgstr), encoding='utf-8')
tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
return tok.name_analyzer()
@@ -190,7 +191,7 @@ def test_update_sql_functions(db_prop, temp_db_cursor,
table_factory('test', 'txt TEXT')
func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql'
func_file.write_text("""INSERT INTO test VALUES (1133)""")
func_file.write_text("""INSERT INTO test VALUES (1133)""", encoding='utf-8')
tok.update_sql_functions(test_config)

View File

@@ -27,7 +27,8 @@ class TestIcuRuleLoader:
self.project_env = project_env
def write_config(self, content):
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(dedent(content))
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(
dedent(content), encoding='utf-8')
def config_rules(self, *variants):
content = dedent("""\
@@ -119,7 +120,7 @@ class TestIcuRuleLoader:
variants:
""")
transpath = self.project_env.project_dir / ('transliteration.yaml')
transpath.write_text('- "x > y"')
transpath.write_text('- "x > y"', encoding='utf-8')
loader = ICURuleLoader(self.project_env)
rules = loader.get_transliteration_rules()

View File

@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
else
echo "$@"
fi
""")
""", encoding='utf-8')
osm2pgsql_exec.chmod(0o777)
return dict(osm2pgsql=str(osm2pgsql_exec),

View File

@@ -96,7 +96,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files:
f.write_text('test')
f.write_text('test', encoding='utf-8')
database_import.import_osm_data(files, osm2pgsql_options)
captured = capfd.readouterr()
@@ -124,7 +124,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
table_factory('planet_osm_nodes')
flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch')
flatfile.write_text('touch', encoding='utf-8')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
@@ -193,7 +193,7 @@ class TestSetupSQL:
self.config = def_config
def write_sql(self, fname, content):
(self.config.lib_dir.sql / fname).write_text(content)
(self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8')
@pytest.mark.parametrize("reverse", [True, False])
def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):

View File

@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):
def test_drop_flatnode_file_delete(tmp_path):
flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content')
flatfile.write_text('Some content', encoding="utf-8")
freeze.drop_flatnode_file(flatfile)

View File

@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
return the content of the static xml test file.
"""
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
return xml_test_content.read_text(encoding='utf-8')
@pytest.fixture

View File

@@ -245,7 +245,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
if gzipped:
subprocess.run(['gzip', str(extfile)])
@@ -262,7 +262,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
postcode_update(tmp_path)
@@ -274,7 +274,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
extfile.write_text(
"postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')
postcode_update(tmp_path)

View File

@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')
load_address_levels_from_config(temp_db_conn, project_env)
@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
def test_load_ranks_from_broken_file(project_env, temp_db_conn):
test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":"place":{"sea":2}}}]')
test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')
with pytest.raises(json.decoder.JSONDecodeError):
load_address_levels_from_config(temp_db_conn, project_env)

View File

@@ -21,7 +21,7 @@ class TestCreateFunctions:
def write_functions(self, content):
sqlfile = self.config.lib_dir.sql / 'functions.sql'
sqlfile.write_text(content)
sqlfile.write_text(content, encoding='utf-8')
def test_create_functions(self, temp_db_cursor):
self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER

View File

@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
@pytest.fixture
def wiki_csv(tmp_path, sql_preprocessor):
def _import(data):
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd:
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
'importance', 'wikidata_id'],
delimiter='\t', quotechar='|')

View File

@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):
def _mock_wiki_content(lang):
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
return xml_test_content.read_text(encoding='utf-8')
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
_mock_wiki_content)

View File

@@ -57,11 +57,11 @@ def tiger_table(def_config, temp_db_conn, sql_preprocessor,
RETURNS INTEGER AS $$
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
RETURNING 1
$$ LANGUAGE SQL;""")
$$ LANGUAGE SQL;""", encoding='utf-8')
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT);""")
token_info JSONB, in_postcode TEXT);""", encoding='utf-8')
return MockTigerTable(temp_db_conn)
@@ -75,7 +75,7 @@ def csv_factory(tmp_path):
from;to;interpolation;street;city;state;postcode;geometry
{};{};{};{};{};{};{};{}
""".format(hnr_from, hnr_to, interpol, street, city, state,
postcode, geometry)))
postcode, geometry)), encoding='utf-8')
return _mk_file
@@ -129,7 +129,7 @@ async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""")
sqlfile.write_text("""Random text""", encoding='utf-8')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@@ -167,7 +167,7 @@ async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""")
tarfile.write_text("""Random text""", encoding='utf-8')
with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())