diff --git a/docs/mk_install_instructions.py b/docs/mk_install_instructions.py index f8edc89c..378f4c19 100644 --- a/docs/mk_install_instructions.py +++ b/docs/mk_install_instructions.py @@ -13,7 +13,8 @@ for infile in VAGRANT_PATH.glob('Install-on-*.sh'): outfile = f"admin/{infile.stem}.md" title = infile.stem.replace('-', ' ') - with mkdocs_gen_files.open(outfile, "w") as outfd, infile.open() as infd: + with mkdocs_gen_files.open(outfile, "w", encoding='utf-8') as outfd, \ + infile.open(encoding='utf-8') as infd: print("#", title, file=outfd) has_empty = False for line in infd: diff --git a/src/nominatim_db/tools/postcodes.py b/src/nominatim_db/tools/postcodes.py index f1d561a0..d2b8b07e 100644 --- a/src/nominatim_db/tools/postcodes.py +++ b/src/nominatim_db/tools/postcodes.py @@ -159,7 +159,7 @@ class _PostcodeCollector: if fname.is_file(): LOG.info("Using external postcode file '%s'.", fname) - return gzip.open(fname, 'rt') + return gzip.open(fname, 'rt', encoding='utf-8') return None diff --git a/src/nominatim_db/tools/refresh.py b/src/nominatim_db/tools/refresh.py index 84d40256..7fc2e951 100644 --- a/src/nominatim_db/tools/refresh.py +++ b/src/nominatim_db/tools/refresh.py @@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int: copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata) FROM STDIN""" - with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy: + with gzip.open( + str(data_file), 'rt', encoding='utf-8') as fd, \ + cur.copy(copy_cmd) as copy: for row in csv.DictReader(fd, delimiter='\t', quotechar='|'): wd_id = int(row['wikidata_id'][1:]) copy.write_row((row['language'], diff --git a/test/bdd/test_osm2pgsql.py b/test/bdd/test_osm2pgsql.py index 75d26f17..144a0f9c 100644 --- a/test/bdd/test_osm2pgsql.py +++ b/test/bdd/test_osm2pgsql.py @@ -43,7 +43,7 @@ def opl_writer(tmp_path, node_grid): def _write(data): fname = tmp_path / f"test_osm_{nr[0]}.opl" nr[0] += 1 - with fname.open('wt') as fd: + with fname.open('wt', encoding='utf-8') as fd: for line in data.split('\n'): if line.startswith('n') and ' x' not in line: coord = node_grid.get(line[1:].split(' ')[0]) \ @@ -59,7 +59,7 @@ def opl_writer(tmp_path, node_grid): @given('the lua style file', target_fixture='osm2pgsql_options') def set_lua_style_file(osm2pgsql_options, docstring, tmp_path): style = tmp_path / 'custom.lua' - style.write_text(docstring) + style.write_text(docstring, encoding='utf-8') osm2pgsql_options['osm2pgsql_style'] = str(style) return osm2pgsql_options diff --git a/test/python/api/search/test_postcode_parser.py b/test/python/api/search/test_postcode_parser.py index 8c159467..6faa6d3e 100644 --- a/test/python/api/search/test_postcode_parser.py +++ b/test/python/api/search/test_postcode_parser.py @@ -58,7 +58,7 @@ gb: pattern: "(l?ld[A-Z0-9]?) ?(dll)" output: \1 \2 - """) + """, encoding='utf-8') return project_env diff --git a/test/python/config/test_config.py b/test/python/config/test_config.py index a0dbf476..34e7acd7 100644 --- a/test/python/config/test_config.py +++ b/test/python/config/test_config.py @@ -48,7 +48,7 @@ def test_no_project_dir(make_config): @pytest.mark.parametrize("val", ('apache', '"apache"')) def test_prefer_project_setting_over_default(make_config, val, tmp_path): envfile = tmp_path / '.env' - envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val)) + envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val), encoding='utf-8') config = make_config(tmp_path) @@ -57,7 +57,7 @@ def test_prefer_project_setting_over_default(make_config, val, tmp_path): def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path): envfile = tmp_path / '.env' - envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') + envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8') monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody') @@ -68,7 +68,7 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path): envfile = tmp_path / '.env' - envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') + envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8') monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '') @@ -232,7 +232,7 @@ def test_get_import_style_intern(make_config, src_dir, monkeypatch): def test_get_import_style_extern_relative(make_config_path, monkeypatch): config = make_config_path() - (config.project_dir / 'custom.style').write_text('x') + (config.project_dir / 'custom.style').write_text('x', encoding='utf-8') monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style') @@ -243,7 +243,7 @@ def test_get_import_style_extern_absolute(make_config, tmp_path, monkeypatch): config = make_config() cfgfile = tmp_path / 'test.style' - cfgfile.write_text('x') + cfgfile.write_text('x', encoding='utf-8') monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile)) @@ -254,10 +254,10 @@ def test_load_subconf_from_project_dir(make_config_path): config = make_config_path() testfile = config.project_dir / 'test.yaml' - testfile.write_text('cow: muh\ncat: miau\n') + testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8') testfile = config.config_dir / 'test.yaml' - testfile.write_text('cow: miau\ncat: muh\n') + testfile.write_text('cow: miau\ncat: muh\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml') @@ -268,7 +268,7 @@ def test_load_subconf_from_settings_dir(make_config_path): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text('cow: muh\ncat: miau\n') + testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml') @@ -280,7 +280,7 @@ def test_load_subconf_empty_env_conf(make_config_path, monkeypatch): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text('cow: muh\ncat: miau\n') + testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') @@ -291,8 +291,8 @@ def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) config = make_config_path() - (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') - (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n') + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8') + (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') @@ -303,7 +303,7 @@ def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_ monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) config = make_config_path() - (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8') with pytest.raises(UsageError, match='Config file not found.'): config.load_sub_configuration('test.yaml', config='MY_CONFIG') @@ -314,8 +314,8 @@ def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') config = make_config_path() - (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') - (getattr(config, location) / 'other.yaml').write_text('dog: bark\n') + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8') + (getattr(config, location) / 'other.yaml').write_text('dog: bark\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') @@ -326,7 +326,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch): monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') config = make_config_path() - (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8') with pytest.raises(UsageError, match='Config file not found.'): config.load_sub_configuration('test.yaml', config='MY_CONFIG') @@ -335,7 +335,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch): def test_load_subconf_json(make_config_path): config = make_config_path() - (config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}') + (config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}', encoding='utf-8') rules = config.load_sub_configuration('test.json') @@ -352,7 +352,7 @@ def test_load_subconf_not_found(make_config_path): def test_load_subconf_env_unknown_format(make_config_path): config = make_config_path() - (config.project_dir / 'test.xml').write_text('') + (config.project_dir / 'test.xml').write_text('', encoding='utf-8') with pytest.raises(UsageError, match='unknown format'): config.load_sub_configuration('test.xml') @@ -362,8 +362,8 @@ def test_load_subconf_include_absolute(make_config_path, tmp_path): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n') - (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n') + testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n', encoding='utf-8') + (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml') @@ -375,8 +375,8 @@ def test_load_subconf_include_relative(make_config_path, tmp_path, location): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text('base: !include inc.yaml\n') - (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n') + testfile.write_text('base: !include inc.yaml\n', encoding='utf-8') + (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml') @@ -387,8 +387,8 @@ def test_load_subconf_include_bad_format(make_config_path): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text('base: !include inc.txt\n') - (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n') + testfile.write_text('base: !include inc.txt\n', encoding='utf-8') + (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n', encoding='utf-8') with pytest.raises(UsageError, match='Cannot handle config file format.'): config.load_sub_configuration('test.yaml') @@ -398,7 +398,7 @@ def test_load_subconf_include_not_found(make_config_path): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text('base: !include inc.txt\n') + testfile.write_text('base: !include inc.txt\n', encoding='utf-8') with pytest.raises(UsageError, match='Config file not found.'): config.load_sub_configuration('test.yaml') @@ -408,9 +408,9 @@ def test_load_subconf_include_recursive(make_config_path): config = make_config_path() testfile = config.config_dir / 'test.yaml' - testfile.write_text('base: !include inc.yaml\n') - (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n') - (config.config_dir / 'more.yaml').write_text('- the end\n') + testfile.write_text('base: !include inc.yaml\n', encoding='utf-8') + (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n', encoding='utf-8') + (config.config_dir / 'more.yaml').write_text('- the end\n', encoding='utf-8') rules = config.load_sub_configuration('test.yaml') diff --git a/test/python/config/test_config_load_module.py b/test/python/config/test_config_load_module.py index 309bd1fc..04a114cf 100644 --- a/test/python/config/test_config_load_module.py +++ b/test/python/config/test_config_load_module.py @@ -41,7 +41,7 @@ def test_load_default_module_with_hyphen(test_config): def test_load_plugin_module(test_config, tmp_path): (tmp_path / 'project' / 'testpath').mkdir() (tmp_path / 'project' / 'testpath' / 'mymod.py')\ - .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") + .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8') module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') @@ -49,7 +49,7 @@ def test_load_plugin_module(test_config, tmp_path): # also test reloading module (tmp_path / 'project' / 'testpath' / 'mymod.py')\ - .write_text("def my_test_function():\n return 'hjothjorhj'") + .write_text("def my_test_function():\n return 'hjothjorhj'", encoding='utf-8') module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') @@ -61,9 +61,9 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch): pythonpath = tmp_path / 'priv-python' pythonpath.mkdir() (pythonpath / MODULE_NAME).mkdir() - (pythonpath / MODULE_NAME / '__init__.py').write_text('') + (pythonpath / MODULE_NAME / '__init__.py').write_text('', encoding='utf-8') (pythonpath / MODULE_NAME / 'tester.py')\ - .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") + .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8') monkeypatch.syspath_prepend(pythonpath) @@ -73,7 +73,7 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch): # also test reloading module (pythonpath / MODULE_NAME / 'tester.py')\ - .write_text("def my_test_function():\n return 'dfigjreigj'") + .write_text("def my_test_function():\n return 'dfigjreigj'", encoding='utf-8') module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something') diff --git a/test/python/data/test_country_info.py b/test/python/data/test_country_info.py index a85b7bf9..ea7caa3e 100644 --- a/test/python/data/test_country_info.py +++ b/test/python/data/test_country_info.py @@ -22,7 +22,8 @@ def loaded_country(def_config): def env_with_country_config(project_env): def _mk_config(cfg): - (project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg)) + (project_env.project_dir / 'country_settings.yaml').write_text( + dedent(cfg), encoding='utf-8') return project_env diff --git a/test/python/db/test_sql_preprocessor.py b/test/python/db/test_sql_preprocessor.py index f2fbbb2a..ad719a3f 100644 --- a/test/python/db/test_sql_preprocessor.py +++ b/test/python/db/test_sql_preprocessor.py @@ -22,7 +22,7 @@ def sql_factory(tmp_path): BEGIN {} END; - $$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body)) + $$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body), encoding='utf-8') return 'test.sql' return _mk_sql @@ -63,7 +63,7 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor): (tmp_path / 'test.sql').write_text(""" CREATE TABLE foo (a TEXT); - CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);") + CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);", encoding='utf-8') await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4) diff --git a/test/python/db/test_utils.py b/test/python/db/test_utils.py index 2bc7d959..22029ada 100644 --- a/test/python/db/test_utils.py +++ b/test/python/db/test_utils.py @@ -15,7 +15,8 @@ from nominatim_db.errors import UsageError def test_execute_file_success(dsn, temp_db_cursor, tmp_path): tmpfile = tmp_path / 'test.sql' - tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);') + tmpfile.write_text( + 'CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);', encoding='utf-8') db_utils.execute_file(dsn, tmpfile) @@ -29,7 +30,7 @@ def test_execute_file_bad_file(dsn, tmp_path): def test_execute_file_bad_sql(dsn, tmp_path): tmpfile = tmp_path / 'test.sql' - tmpfile.write_text('CREATE STABLE test (id INT)') + tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8') with pytest.raises(UsageError): db_utils.execute_file(dsn, tmpfile) @@ -37,14 +38,14 @@ def test_execute_file_bad_sql(dsn, tmp_path): def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path): tmpfile = tmp_path / 'test.sql' - tmpfile.write_text('CREATE STABLE test (id INT)') + tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8') db_utils.execute_file(dsn, tmpfile, ignore_errors=True) def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor): tmpfile = tmp_path / 'test.sql' - tmpfile.write_text('INSERT INTO test VALUES(4)') + tmpfile.write_text('INSERT INTO test VALUES(4)', encoding='utf-8') db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)') @@ -53,7 +54,7 @@ def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor): def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor): tmpfile = tmp_path / 'test.sql' - tmpfile.write_text('CREATE TABLE test (id INT)') + tmpfile.write_text('CREATE TABLE test (id INT)', encoding='utf-8') db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)') diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py index cf4140c9..67c2b84b 100644 --- a/test/python/tokenizer/test_icu.py +++ b/test/python/tokenizer/test_icu.py @@ -31,7 +31,7 @@ def test_config(project_env, tmp_path): sqldir = tmp_path / 'sql' sqldir.mkdir() (sqldir / 'tokenizer').mkdir() - (sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'") + (sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'", encoding='utf-8') project_env.lib_dir.sql = sqldir @@ -58,7 +58,7 @@ def db_prop(temp_db_conn): def analyzer(tokenizer_factory, test_config, monkeypatch, temp_db_with_extensions, tmp_path): sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql' - sql.write_text("SELECT 'a';") + sql.write_text("SELECT 'a';", encoding='utf-8') monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();') tok = tokenizer_factory() @@ -80,7 +80,8 @@ def analyzer(tokenizer_factory, test_config, monkeypatch, if with_postcode: cfgstr['token-analysis'].append({'id': '@postcode', 'analyzer': 'postcodes'}) - (test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr)) + (test_config.project_dir / 'icu_tokenizer.yaml').write_text( + yaml.dump(cfgstr), encoding='utf-8') tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config) return tok.name_analyzer() @@ -190,7 +191,7 @@ def test_update_sql_functions(db_prop, temp_db_cursor, table_factory('test', 'txt TEXT') func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql' - func_file.write_text("""INSERT INTO test VALUES (1133)""") + func_file.write_text("""INSERT INTO test VALUES (1133)""", encoding='utf-8') tok.update_sql_functions(test_config) diff --git a/test/python/tokenizer/test_icu_rule_loader.py b/test/python/tokenizer/test_icu_rule_loader.py index f26b84c2..39b548e2 100644 --- a/test/python/tokenizer/test_icu_rule_loader.py +++ b/test/python/tokenizer/test_icu_rule_loader.py @@ -27,7 +27,8 @@ class TestIcuRuleLoader: self.project_env = project_env def write_config(self, content): - (self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(dedent(content)) + (self.project_env.project_dir / 'icu_tokenizer.yaml').write_text( + dedent(content), encoding='utf-8') def config_rules(self, *variants): content = dedent("""\ @@ -119,7 +120,7 @@ class TestIcuRuleLoader: variants: """) transpath = self.project_env.project_dir / ('transliteration.yaml') - transpath.write_text('- "x > y"') + transpath.write_text('- "x > y"', encoding='utf-8') loader = ICURuleLoader(self.project_env) rules = loader.get_transliteration_rules() diff --git a/test/python/tools/conftest.py b/test/python/tools/conftest.py index c5d67757..1479dffe 100644 --- a/test/python/tools/conftest.py +++ b/test/python/tools/conftest.py @@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then else echo "$@" fi - """) + """, encoding='utf-8') osm2pgsql_exec.chmod(0o777) return dict(osm2pgsql=str(osm2pgsql_exec), diff --git a/test/python/tools/test_database_import.py b/test/python/tools/test_database_import.py index f3d388da..a00e23ec 100644 --- a/test/python/tools/test_database_import.py +++ b/test/python/tools/test_database_import.py @@ -96,7 +96,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm'] for f in files: - f.write_text('test') + f.write_text('test', encoding='utf-8') database_import.import_osm_data(files, osm2pgsql_options) captured = capfd.readouterr() @@ -124,7 +124,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql table_factory('planet_osm_nodes') flatfile = tmp_path / 'flatfile' - flatfile.write_text('touch') + flatfile.write_text('touch', encoding='utf-8') osm2pgsql_options['flatnode_file'] = str(flatfile.resolve()) @@ -193,7 +193,7 @@ class TestSetupSQL: self.config = def_config def write_sql(self, fname, content): - (self.config.lib_dir.sql / fname).write_text(content) + (self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8') @pytest.mark.parametrize("reverse", [True, False]) def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse): diff --git a/test/python/tools/test_freeze.py b/test/python/tools/test_freeze.py index 6d1a73bc..d2d5e9bf 100644 --- a/test/python/tools/test_freeze.py +++ b/test/python/tools/test_freeze.py @@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path): def test_drop_flatnode_file_delete(tmp_path): flatfile = tmp_path / 'flatnode.store' - flatfile.write_text('Some content') + flatfile.write_text('Some content', encoding="utf-8") freeze.drop_flatnode_file(flatfile) diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py index 2df1c682..db2d02e2 100644 --- a/test/python/tools/test_import_special_phrases.py +++ b/test/python/tools/test_import_special_phrases.py @@ -30,7 +30,7 @@ def xml_wiki_content(src_dir): return the content of the static xml test file. """ xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' - return xml_test_content.read_text() + return xml_test_content.read_text(encoding='utf-8') @pytest.fixture diff --git a/test/python/tools/test_postcodes.py b/test/python/tools/test_postcodes.py index cbc85b08..02cd2a1a 100644 --- a/test/python/tools/test_postcodes.py +++ b/test/python/tools/test_postcodes.py @@ -245,7 +245,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path, insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') extfile = tmp_path / 'xx_postcodes.csv' - extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") + extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8') if gzipped: subprocess.run(['gzip', str(extfile)]) @@ -262,7 +262,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path, insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') extfile = tmp_path / 'xx_postcodes.csv' - extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") + extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8') postcode_update(tmp_path) @@ -274,7 +274,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode, insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') extfile = tmp_path / 'xx_postcodes.csv' - extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0") + extfile.write_text( + "postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8') postcode_update(tmp_path) diff --git a/test/python/tools/test_refresh_address_levels.py b/test/python/tools/test_refresh_address_levels.py index f2bfdea6..17b29cd8 100644 --- a/test/python/tools/test_refresh_address_levels.py +++ b/test/python/tools/test_refresh_address_levels.py @@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config): def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor): test_file = project_env.project_dir / 'address-levels.json' - test_file.write_text('[{"tags":{"place":{"sea":2}}}]') + test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8') load_address_levels_from_config(temp_db_conn, project_env) @@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor): def test_load_ranks_from_broken_file(project_env, temp_db_conn): test_file = project_env.project_dir / 'address-levels.json' - test_file.write_text('[{"tags":"place":{"sea":2}}}]') + test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8') with pytest.raises(json.decoder.JSONDecodeError): load_address_levels_from_config(temp_db_conn, project_env) diff --git a/test/python/tools/test_refresh_create_functions.py b/test/python/tools/test_refresh_create_functions.py index bd8724d6..f01ebd30 100644 --- a/test/python/tools/test_refresh_create_functions.py +++ b/test/python/tools/test_refresh_create_functions.py @@ -21,7 +21,7 @@ class TestCreateFunctions: def write_functions(self, content): sqlfile = self.config.lib_dir.sql / 'functions.sql' - sqlfile.write_text(content) + sqlfile.write_text(content, encoding='utf-8') def test_create_functions(self, temp_db_cursor): self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER diff --git a/test/python/tools/test_refresh_wiki_data.py b/test/python/tools/test_refresh_wiki_data.py index b7542fdb..6d4a727d 100644 --- a/test/python/tools/test_refresh_wiki_data.py +++ b/test/python/tools/test_refresh_wiki_data.py @@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles, @pytest.fixture def wiki_csv(tmp_path, sql_preprocessor): def _import(data): - with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd: + with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd: writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title', 'importance', 'wikidata_id'], delimiter='\t', quotechar='|') diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py index 9b937112..c51c71e9 100644 --- a/test/python/tools/test_sp_wiki_loader.py +++ b/test/python/tools/test_sp_wiki_loader.py @@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config): def _mock_wiki_content(lang): xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' - return xml_test_content.read_text() + return xml_test_content.read_text(encoding='utf-8') monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content', _mock_wiki_content) diff --git a/test/python/tools/test_tiger_data.py b/test/python/tools/test_tiger_data.py index 9623e21b..65c4e929 100644 --- a/test/python/tools/test_tiger_data.py +++ b/test/python/tools/test_tiger_data.py @@ -57,11 +57,11 @@ def tiger_table(def_config, temp_db_conn, sql_preprocessor, RETURNS INTEGER AS $$ INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode) RETURNING 1 - $$ LANGUAGE SQL;""") + $$ LANGUAGE SQL;""", encoding='utf-8') (def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text( """DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER, in_endnumber INTEGER, interpolationtype TEXT, - token_info JSONB, in_postcode TEXT);""") + token_info JSONB, in_postcode TEXT);""", encoding='utf-8') return MockTigerTable(temp_db_conn) @@ -75,7 +75,7 @@ def csv_factory(tmp_path): from;to;interpolation;street;city;state;postcode;geometry {};{};{};{};{};{};{};{} """.format(hnr_from, hnr_to, interpol, street, city, state, - postcode, geometry))) + postcode, geometry)), encoding='utf-8') return _mk_file @@ -129,7 +129,7 @@ async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock, async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock, tmp_path): sqlfile = tmp_path / '1010.csv' - sqlfile.write_text("""Random text""") + sqlfile.write_text("""Random text""", encoding='utf-8') await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) @@ -167,7 +167,7 @@ async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock, async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock, tmp_path): tarfile = tmp_path / 'sample.tar.gz' - tarfile.write_text("""Random text""") + tarfile.write_text("""Random text""", encoding='utf-8') with pytest.raises(UsageError): await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())