Merge pull request #3975 from kad-link/fix/utf8-encoding-clean

Fix: Enforce explicit UTF-8 encoding in file I/O
This commit is contained in:
Sarah Hoffmann
2026-02-10 09:32:06 +01:00
committed by GitHub
22 changed files with 79 additions and 71 deletions

View File

@@ -13,7 +13,8 @@ for infile in VAGRANT_PATH.glob('Install-on-*.sh'):
outfile = f"admin/{infile.stem}.md" outfile = f"admin/{infile.stem}.md"
title = infile.stem.replace('-', ' ') title = infile.stem.replace('-', ' ')
with mkdocs_gen_files.open(outfile, "w") as outfd, infile.open() as infd: with mkdocs_gen_files.open(outfile, "w", encoding='utf-8') as outfd, \
infile.open(encoding='utf-8') as infd:
print("#", title, file=outfd) print("#", title, file=outfd)
has_empty = False has_empty = False
for line in infd: for line in infd:

View File

@@ -159,7 +159,7 @@ class _PostcodeCollector:
if fname.is_file(): if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname) LOG.info("Using external postcode file '%s'.", fname)
return gzip.open(fname, 'rt') return gzip.open(fname, 'rt', encoding='utf-8')
return None return None

View File

@@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int:
copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata) copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
FROM STDIN""" FROM STDIN"""
with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy: with gzip.open(
str(data_file), 'rt', encoding='utf-8') as fd, \
cur.copy(copy_cmd) as copy:
for row in csv.DictReader(fd, delimiter='\t', quotechar='|'): for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
wd_id = int(row['wikidata_id'][1:]) wd_id = int(row['wikidata_id'][1:])
copy.write_row((row['language'], copy.write_row((row['language'],

View File

@@ -43,7 +43,7 @@ def opl_writer(tmp_path, node_grid):
def _write(data): def _write(data):
fname = tmp_path / f"test_osm_{nr[0]}.opl" fname = tmp_path / f"test_osm_{nr[0]}.opl"
nr[0] += 1 nr[0] += 1
with fname.open('wt') as fd: with fname.open('wt', encoding='utf-8') as fd:
for line in data.split('\n'): for line in data.split('\n'):
if line.startswith('n') and ' x' not in line: if line.startswith('n') and ' x' not in line:
coord = node_grid.get(line[1:].split(' ')[0]) \ coord = node_grid.get(line[1:].split(' ')[0]) \
@@ -59,7 +59,7 @@ def opl_writer(tmp_path, node_grid):
@given('the lua style file', target_fixture='osm2pgsql_options') @given('the lua style file', target_fixture='osm2pgsql_options')
def set_lua_style_file(osm2pgsql_options, docstring, tmp_path): def set_lua_style_file(osm2pgsql_options, docstring, tmp_path):
style = tmp_path / 'custom.lua' style = tmp_path / 'custom.lua'
style.write_text(docstring) style.write_text(docstring, encoding='utf-8')
osm2pgsql_options['osm2pgsql_style'] = str(style) osm2pgsql_options['osm2pgsql_style'] = str(style)
return osm2pgsql_options return osm2pgsql_options

View File

@@ -58,7 +58,7 @@ gb:
pattern: "(l?ld[A-Z0-9]?) ?(dll)" pattern: "(l?ld[A-Z0-9]?) ?(dll)"
output: \1 \2 output: \1 \2
""") """, encoding='utf-8')
return project_env return project_env

View File

@@ -48,7 +48,7 @@ def test_no_project_dir(make_config):
@pytest.mark.parametrize("val", ('apache', '"apache"')) @pytest.mark.parametrize("val", ('apache', '"apache"'))
def test_prefer_project_setting_over_default(make_config, val, tmp_path): def test_prefer_project_setting_over_default(make_config, val, tmp_path):
envfile = tmp_path / '.env' envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val)) envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val), encoding='utf-8')
config = make_config(tmp_path) config = make_config(tmp_path)
@@ -57,7 +57,7 @@ def test_prefer_project_setting_over_default(make_config, val, tmp_path):
def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path): def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env' envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody') monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
@@ -68,7 +68,7 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa
def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path): def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env' envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '') monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '')
@@ -232,7 +232,7 @@ def test_get_import_style_intern(make_config, src_dir, monkeypatch):
def test_get_import_style_extern_relative(make_config_path, monkeypatch): def test_get_import_style_extern_relative(make_config_path, monkeypatch):
config = make_config_path() config = make_config_path()
(config.project_dir / 'custom.style').write_text('x') (config.project_dir / 'custom.style').write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style') monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style')
@@ -243,7 +243,7 @@ def test_get_import_style_extern_absolute(make_config, tmp_path, monkeypatch):
config = make_config() config = make_config()
cfgfile = tmp_path / 'test.style' cfgfile = tmp_path / 'test.style'
cfgfile.write_text('x') cfgfile.write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile)) monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile))
@@ -254,10 +254,10 @@ def test_load_subconf_from_project_dir(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.project_dir / 'test.yaml' testfile = config.project_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n') testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: miau\ncat: muh\n') testfile.write_text('cow: miau\ncat: muh\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -268,7 +268,7 @@ def test_load_subconf_from_settings_dir(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n') testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -280,7 +280,7 @@ def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n') testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -291,8 +291,8 @@ def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n') (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -303,7 +303,7 @@ def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'): with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG') config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -314,8 +314,8 @@ def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n') (getattr(config, location) / 'other.yaml').write_text('dog: bark\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -326,7 +326,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'): with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG') config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -335,7 +335,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
def test_load_subconf_json(make_config_path): def test_load_subconf_json(make_config_path):
config = make_config_path() config = make_config_path()
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}') (config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}', encoding='utf-8')
rules = config.load_sub_configuration('test.json') rules = config.load_sub_configuration('test.json')
@@ -352,7 +352,7 @@ def test_load_subconf_not_found(make_config_path):
def test_load_subconf_env_unknown_format(make_config_path): def test_load_subconf_env_unknown_format(make_config_path):
config = make_config_path() config = make_config_path()
(config.project_dir / 'test.xml').write_text('<html></html>') (config.project_dir / 'test.xml').write_text('<html></html>', encoding='utf-8')
with pytest.raises(UsageError, match='unknown format'): with pytest.raises(UsageError, match='unknown format'):
config.load_sub_configuration('test.xml') config.load_sub_configuration('test.xml')
@@ -362,8 +362,8 @@ def test_load_subconf_include_absolute(make_config_path, tmp_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n') testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n', encoding='utf-8')
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n') (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -375,8 +375,8 @@ def test_load_subconf_include_relative(make_config_path, tmp_path, location):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n') testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n') (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -387,8 +387,8 @@ def test_load_subconf_include_bad_format(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n') testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n') (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
with pytest.raises(UsageError, match='Cannot handle config file format.'): with pytest.raises(UsageError, match='Cannot handle config file format.'):
config.load_sub_configuration('test.yaml') config.load_sub_configuration('test.yaml')
@@ -398,7 +398,7 @@ def test_load_subconf_include_not_found(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n') testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'): with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml') config.load_sub_configuration('test.yaml')
@@ -408,9 +408,9 @@ def test_load_subconf_include_recursive(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n') testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n') (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n', encoding='utf-8')
(config.config_dir / 'more.yaml').write_text('- the end\n') (config.config_dir / 'more.yaml').write_text('- the end\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')

View File

@@ -41,7 +41,7 @@ def test_load_default_module_with_hyphen(test_config):
def test_load_plugin_module(test_config, tmp_path): def test_load_plugin_module(test_config, tmp_path):
(tmp_path / 'project' / 'testpath').mkdir() (tmp_path / 'project' / 'testpath').mkdir()
(tmp_path / 'project' / 'testpath' / 'mymod.py')\ (tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -49,7 +49,7 @@ def test_load_plugin_module(test_config, tmp_path):
# also test reloading module # also test reloading module
(tmp_path / 'project' / 'testpath' / 'mymod.py')\ (tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'hjothjorhj'") .write_text("def my_test_function():\n return 'hjothjorhj'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -61,9 +61,9 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
pythonpath = tmp_path / 'priv-python' pythonpath = tmp_path / 'priv-python'
pythonpath.mkdir() pythonpath.mkdir()
(pythonpath / MODULE_NAME).mkdir() (pythonpath / MODULE_NAME).mkdir()
(pythonpath / MODULE_NAME / '__init__.py').write_text('') (pythonpath / MODULE_NAME / '__init__.py').write_text('', encoding='utf-8')
(pythonpath / MODULE_NAME / 'tester.py')\ (pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
monkeypatch.syspath_prepend(pythonpath) monkeypatch.syspath_prepend(pythonpath)
@@ -73,7 +73,7 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
# also test reloading module # also test reloading module
(pythonpath / MODULE_NAME / 'tester.py')\ (pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'dfigjreigj'") .write_text("def my_test_function():\n return 'dfigjreigj'", encoding='utf-8')
module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something') module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')

View File

@@ -22,7 +22,8 @@ def loaded_country(def_config):
def env_with_country_config(project_env): def env_with_country_config(project_env):
def _mk_config(cfg): def _mk_config(cfg):
(project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg)) (project_env.project_dir / 'country_settings.yaml').write_text(
dedent(cfg), encoding='utf-8')
return project_env return project_env

View File

@@ -22,7 +22,7 @@ def sql_factory(tmp_path):
BEGIN BEGIN
{} {}
END; END;
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body)) $$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body), encoding='utf-8')
return 'test.sql' return 'test.sql'
return _mk_sql return _mk_sql
@@ -63,7 +63,7 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp
async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor): async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor):
(tmp_path / 'test.sql').write_text(""" (tmp_path / 'test.sql').write_text("""
CREATE TABLE foo (a TEXT); CREATE TABLE foo (a TEXT);
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);") CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);", encoding='utf-8')
await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4) await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4)

View File

@@ -15,7 +15,8 @@ from nominatim_db.errors import UsageError
def test_execute_file_success(dsn, temp_db_cursor, tmp_path): def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);') tmpfile.write_text(
'CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile) db_utils.execute_file(dsn, tmpfile)
@@ -29,7 +30,7 @@ def test_execute_file_bad_file(dsn, tmp_path):
def test_execute_file_bad_sql(dsn, tmp_path): def test_execute_file_bad_sql(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)') tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
with pytest.raises(UsageError): with pytest.raises(UsageError):
db_utils.execute_file(dsn, tmpfile) db_utils.execute_file(dsn, tmpfile)
@@ -37,14 +38,14 @@ def test_execute_file_bad_sql(dsn, tmp_path):
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path): def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)') tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, ignore_errors=True) db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor): def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('INSERT INTO test VALUES(4)') tmpfile.write_text('INSERT INTO test VALUES(4)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)') db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
@@ -53,7 +54,7 @@ def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor): def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT)') tmpfile.write_text('CREATE TABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)') db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')

View File

@@ -31,7 +31,7 @@ def test_config(project_env, tmp_path):
sqldir = tmp_path / 'sql' sqldir = tmp_path / 'sql'
sqldir.mkdir() sqldir.mkdir()
(sqldir / 'tokenizer').mkdir() (sqldir / 'tokenizer').mkdir()
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'") (sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'", encoding='utf-8')
project_env.lib_dir.sql = sqldir project_env.lib_dir.sql = sqldir
@@ -58,7 +58,7 @@ def db_prop(temp_db_conn):
def analyzer(tokenizer_factory, test_config, monkeypatch, def analyzer(tokenizer_factory, test_config, monkeypatch,
temp_db_with_extensions, tmp_path): temp_db_with_extensions, tmp_path):
sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql' sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql'
sql.write_text("SELECT 'a';") sql.write_text("SELECT 'a';", encoding='utf-8')
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();') monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
tok = tokenizer_factory() tok = tokenizer_factory()
@@ -80,7 +80,8 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
if with_postcode: if with_postcode:
cfgstr['token-analysis'].append({'id': '@postcode', cfgstr['token-analysis'].append({'id': '@postcode',
'analyzer': 'postcodes'}) 'analyzer': 'postcodes'})
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr)) (test_config.project_dir / 'icu_tokenizer.yaml').write_text(
yaml.dump(cfgstr), encoding='utf-8')
tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config) tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
return tok.name_analyzer() return tok.name_analyzer()
@@ -190,7 +191,7 @@ def test_update_sql_functions(db_prop, temp_db_cursor,
table_factory('test', 'txt TEXT') table_factory('test', 'txt TEXT')
func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql' func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql'
func_file.write_text("""INSERT INTO test VALUES (1133)""") func_file.write_text("""INSERT INTO test VALUES (1133)""", encoding='utf-8')
tok.update_sql_functions(test_config) tok.update_sql_functions(test_config)

View File

@@ -27,7 +27,8 @@ class TestIcuRuleLoader:
self.project_env = project_env self.project_env = project_env
def write_config(self, content): def write_config(self, content):
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(dedent(content)) (self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(
dedent(content), encoding='utf-8')
def config_rules(self, *variants): def config_rules(self, *variants):
content = dedent("""\ content = dedent("""\
@@ -119,7 +120,7 @@ class TestIcuRuleLoader:
variants: variants:
""") """)
transpath = self.project_env.project_dir / ('transliteration.yaml') transpath = self.project_env.project_dir / ('transliteration.yaml')
transpath.write_text('- "x > y"') transpath.write_text('- "x > y"', encoding='utf-8')
loader = ICURuleLoader(self.project_env) loader = ICURuleLoader(self.project_env)
rules = loader.get_transliteration_rules() rules = loader.get_transliteration_rules()

View File

@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
else else
echo "$@" echo "$@"
fi fi
""") """, encoding='utf-8')
osm2pgsql_exec.chmod(0o777) osm2pgsql_exec.chmod(0o777)
return dict(osm2pgsql=str(osm2pgsql_exec), return dict(osm2pgsql=str(osm2pgsql_exec),

View File

@@ -96,7 +96,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm'] files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files: for f in files:
f.write_text('test') f.write_text('test', encoding='utf-8')
database_import.import_osm_data(files, osm2pgsql_options) database_import.import_osm_data(files, osm2pgsql_options)
captured = capfd.readouterr() captured = capfd.readouterr()
@@ -124,7 +124,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
table_factory('planet_osm_nodes') table_factory('planet_osm_nodes')
flatfile = tmp_path / 'flatfile' flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch') flatfile.write_text('touch', encoding='utf-8')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve()) osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
@@ -193,7 +193,7 @@ class TestSetupSQL:
self.config = def_config self.config = def_config
def write_sql(self, fname, content): def write_sql(self, fname, content):
(self.config.lib_dir.sql / fname).write_text(content) (self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8')
@pytest.mark.parametrize("reverse", [True, False]) @pytest.mark.parametrize("reverse", [True, False])
def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse): def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):

View File

@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):
def test_drop_flatnode_file_delete(tmp_path): def test_drop_flatnode_file_delete(tmp_path):
flatfile = tmp_path / 'flatnode.store' flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content') flatfile.write_text('Some content', encoding="utf-8")
freeze.drop_flatnode_file(flatfile) freeze.drop_flatnode_file(flatfile)

View File

@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
return the content of the static xml test file. return the content of the static xml test file.
""" """
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text() return xml_test_content.read_text(encoding='utf-8')
@pytest.fixture @pytest.fixture

View File

@@ -245,7 +245,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv' extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
if gzipped: if gzipped:
subprocess.run(['gzip', str(extfile)]) subprocess.run(['gzip', str(extfile)])
@@ -262,7 +262,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv' extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
postcode_update(tmp_path) postcode_update(tmp_path)
@@ -274,7 +274,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv' extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0") extfile.write_text(
"postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')
postcode_update(tmp_path) postcode_update(tmp_path)

View File

@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor): def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
test_file = project_env.project_dir / 'address-levels.json' test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":{"place":{"sea":2}}}]') test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')
load_address_levels_from_config(temp_db_conn, project_env) load_address_levels_from_config(temp_db_conn, project_env)
@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
def test_load_ranks_from_broken_file(project_env, temp_db_conn): def test_load_ranks_from_broken_file(project_env, temp_db_conn):
test_file = project_env.project_dir / 'address-levels.json' test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":"place":{"sea":2}}}]') test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')
with pytest.raises(json.decoder.JSONDecodeError): with pytest.raises(json.decoder.JSONDecodeError):
load_address_levels_from_config(temp_db_conn, project_env) load_address_levels_from_config(temp_db_conn, project_env)

View File

@@ -21,7 +21,7 @@ class TestCreateFunctions:
def write_functions(self, content): def write_functions(self, content):
sqlfile = self.config.lib_dir.sql / 'functions.sql' sqlfile = self.config.lib_dir.sql / 'functions.sql'
sqlfile.write_text(content) sqlfile.write_text(content, encoding='utf-8')
def test_create_functions(self, temp_db_cursor): def test_create_functions(self, temp_db_cursor):
self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER

View File

@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
@pytest.fixture @pytest.fixture
def wiki_csv(tmp_path, sql_preprocessor): def wiki_csv(tmp_path, sql_preprocessor):
def _import(data): def _import(data):
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd: with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title', writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
'importance', 'wikidata_id'], 'importance', 'wikidata_id'],
delimiter='\t', quotechar='|') delimiter='\t', quotechar='|')

View File

@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):
def _mock_wiki_content(lang): def _mock_wiki_content(lang):
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text() return xml_test_content.read_text(encoding='utf-8')
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content', monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
_mock_wiki_content) _mock_wiki_content)

View File

@@ -57,11 +57,11 @@ def tiger_table(def_config, temp_db_conn, sql_preprocessor,
RETURNS INTEGER AS $$ RETURNS INTEGER AS $$
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode) INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
RETURNING 1 RETURNING 1
$$ LANGUAGE SQL;""") $$ LANGUAGE SQL;""", encoding='utf-8')
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text( (def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER, """DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT, in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT);""") token_info JSONB, in_postcode TEXT);""", encoding='utf-8')
return MockTigerTable(temp_db_conn) return MockTigerTable(temp_db_conn)
@@ -75,7 +75,7 @@ def csv_factory(tmp_path):
from;to;interpolation;street;city;state;postcode;geometry from;to;interpolation;street;city;state;postcode;geometry
{};{};{};{};{};{};{};{} {};{};{};{};{};{};{};{}
""".format(hnr_from, hnr_to, interpol, street, city, state, """.format(hnr_from, hnr_to, interpol, street, city, state,
postcode, geometry))) postcode, geometry)), encoding='utf-8')
return _mk_file return _mk_file
@@ -129,7 +129,7 @@ async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock, async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
tmp_path): tmp_path):
sqlfile = tmp_path / '1010.csv' sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""") sqlfile.write_text("""Random text""", encoding='utf-8')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@@ -167,7 +167,7 @@ async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock, async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path): tmp_path):
tarfile = tmp_path / 'sample.tar.gz' tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""") tarfile.write_text("""Random text""", encoding='utf-8')
with pytest.raises(UsageError): with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock()) await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())