fix: add utf-8 encoding in read-write files

2026-02-15 02:47:59 +00:00 · 2026-02-08 19:14:57 +05:30
parent 67ecf5f6a0
commit f84b279540
22 changed files with 79 additions and 71 deletions
--- a/test/python/tools/conftest.py
+++ b/test/python/tools/conftest.py
@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
 else
  echo "$@"
 fi
-    """)
+    """, encoding='utf-8')
    osm2pgsql_exec.chmod(0o777)

    return dict(osm2pgsql=str(osm2pgsql_exec),
--- a/test/python/tools/test_database_import.py
+++ b/test/python/tools/test_database_import.py
@@ -96,7 +96,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c

    files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
    for f in files:
-        f.write_text('test')
+        f.write_text('test', encoding='utf-8')

    database_import.import_osm_data(files, osm2pgsql_options)
    captured = capfd.readouterr()
@@ -124,7 +124,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
    table_factory('planet_osm_nodes')

    flatfile = tmp_path / 'flatfile'
-    flatfile.write_text('touch')
+    flatfile.write_text('touch', encoding='utf-8')

    osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())

@@ -193,7 +193,7 @@ class TestSetupSQL:
        self.config = def_config

    def write_sql(self, fname, content):
-        (self.config.lib_dir.sql / fname).write_text(content)
+        (self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8')

    @pytest.mark.parametrize("reverse", [True, False])
    def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):
--- a/test/python/tools/test_freeze.py
+++ b/test/python/tools/test_freeze.py
@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):

 def test_drop_flatnode_file_delete(tmp_path):
    flatfile = tmp_path / 'flatnode.store'
-    flatfile.write_text('Some content')
+    flatfile.write_text('Some content', encoding="utf-8")

    freeze.drop_flatnode_file(flatfile)

--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
        return the content of the static xml test file.
    """
    xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
-    return xml_test_content.read_text()
+    return xml_test_content.read_text(encoding='utf-8')


@pytest.fixture
--- a/test/python/tools/test_postcodes.py
+++ b/test/python/tools/test_postcodes.py
@@ -245,7 +245,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
    insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')

    extfile = tmp_path / 'xx_postcodes.csv'
-    extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
+    extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')

    if gzipped:
        subprocess.run(['gzip', str(extfile)])
@@ -262,7 +262,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
    insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')

    extfile = tmp_path / 'xx_postcodes.csv'
-    extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
+    extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')

    postcode_update(tmp_path)

@@ -274,7 +274,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode,
    insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')

    extfile = tmp_path / 'xx_postcodes.csv'
-    extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
+    extfile.write_text(
+        "postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')

    postcode_update(tmp_path)

--- a/test/python/tools/test_refresh_address_levels.py
+++ b/test/python/tools/test_refresh_address_levels.py
@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):

 def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
    test_file = project_env.project_dir / 'address-levels.json'
-    test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
+    test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')

    load_address_levels_from_config(temp_db_conn, project_env)

@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):

 def test_load_ranks_from_broken_file(project_env, temp_db_conn):
    test_file = project_env.project_dir / 'address-levels.json'
-    test_file.write_text('[{"tags":"place":{"sea":2}}}]')
+    test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')

    with pytest.raises(json.decoder.JSONDecodeError):
        load_address_levels_from_config(temp_db_conn, project_env)
--- a/test/python/tools/test_refresh_create_functions.py
+++ b/test/python/tools/test_refresh_create_functions.py
@@ -21,7 +21,7 @@ class TestCreateFunctions:

    def write_functions(self, content):
        sqlfile = self.config.lib_dir.sql / 'functions.sql'
-        sqlfile.write_text(content)
+        sqlfile.write_text(content, encoding='utf-8')

    def test_create_functions(self, temp_db_cursor):
        self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER
--- a/test/python/tools/test_refresh_wiki_data.py
+++ b/test/python/tools/test_refresh_wiki_data.py
@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
@pytest.fixture
 def wiki_csv(tmp_path, sql_preprocessor):
    def _import(data):
-        with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd:
+        with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
            writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
                                                    'importance', 'wikidata_id'],
                                    delimiter='\t', quotechar='|')
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):

    def _mock_wiki_content(lang):
        xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
-        return xml_test_content.read_text()
+        return xml_test_content.read_text(encoding='utf-8')

    monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
                        _mock_wiki_content)
--- a/test/python/tools/test_tiger_data.py
+++ b/test/python/tools/test_tiger_data.py
@@ -57,11 +57,11 @@ def tiger_table(def_config, temp_db_conn, sql_preprocessor,
           RETURNS INTEGER AS $$
            INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
            RETURNING 1
-           $$ LANGUAGE SQL;""")
+           $$ LANGUAGE SQL;""", encoding='utf-8')
    (def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
        """DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
                                 in_endnumber INTEGER, interpolationtype TEXT,
-                                 token_info JSONB, in_postcode TEXT);""")
+                                 token_info JSONB, in_postcode TEXT);""", encoding='utf-8')

    return MockTigerTable(temp_db_conn)

@@ -75,7 +75,7 @@ def csv_factory(tmp_path):
        from;to;interpolation;street;city;state;postcode;geometry
        {};{};{};{};{};{};{};{}
        """.format(hnr_from, hnr_to, interpol, street, city, state,
-                   postcode, geometry)))
+                   postcode, geometry)), encoding='utf-8')

    return _mk_file

@@ -129,7 +129,7 @@ async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
 async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
                                       tmp_path):
    sqlfile = tmp_path / '1010.csv'
-    sqlfile.write_text("""Random text""")
+    sqlfile.write_text("""Random text""", encoding='utf-8')

    await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())

@@ -167,7 +167,7 @@ async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
 async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
                                          tmp_path):
    tarfile = tmp_path / 'sample.tar.gz'
-    tarfile.write_text("""Random text""")
+    tarfile.write_text("""Random text""", encoding='utf-8')

    with pytest.raises(UsageError):
        await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())