fix: add utf-8 encoding in read-write files

This commit is contained in:
Sri CHaRan
2026-02-08 19:14:57 +05:30
parent 67ecf5f6a0
commit f84b279540
22 changed files with 79 additions and 71 deletions

View File

@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
else
echo "$@"
fi
""")
""", encoding='utf-8')
osm2pgsql_exec.chmod(0o777)
return dict(osm2pgsql=str(osm2pgsql_exec),

View File

@@ -96,7 +96,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files:
f.write_text('test')
f.write_text('test', encoding='utf-8')
database_import.import_osm_data(files, osm2pgsql_options)
captured = capfd.readouterr()
@@ -124,7 +124,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
table_factory('planet_osm_nodes')
flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch')
flatfile.write_text('touch', encoding='utf-8')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
@@ -193,7 +193,7 @@ class TestSetupSQL:
self.config = def_config
def write_sql(self, fname, content):
(self.config.lib_dir.sql / fname).write_text(content)
(self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8')
@pytest.mark.parametrize("reverse", [True, False])
def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):

View File

@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):
def test_drop_flatnode_file_delete(tmp_path):
flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content')
flatfile.write_text('Some content', encoding="utf-8")
freeze.drop_flatnode_file(flatfile)

View File

@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
return the content of the static xml test file.
"""
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
return xml_test_content.read_text(encoding='utf-8')
@pytest.fixture

View File

@@ -245,7 +245,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
if gzipped:
subprocess.run(['gzip', str(extfile)])
@@ -262,7 +262,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
postcode_update(tmp_path)
@@ -274,7 +274,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
extfile.write_text(
"postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')
postcode_update(tmp_path)

View File

@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')
load_address_levels_from_config(temp_db_conn, project_env)
@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
def test_load_ranks_from_broken_file(project_env, temp_db_conn):
test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":"place":{"sea":2}}}]')
test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')
with pytest.raises(json.decoder.JSONDecodeError):
load_address_levels_from_config(temp_db_conn, project_env)

View File

@@ -21,7 +21,7 @@ class TestCreateFunctions:
def write_functions(self, content):
sqlfile = self.config.lib_dir.sql / 'functions.sql'
sqlfile.write_text(content)
sqlfile.write_text(content, encoding='utf-8')
def test_create_functions(self, temp_db_cursor):
self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER

View File

@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
@pytest.fixture
def wiki_csv(tmp_path, sql_preprocessor):
def _import(data):
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd:
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
'importance', 'wikidata_id'],
delimiter='\t', quotechar='|')

View File

@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):
def _mock_wiki_content(lang):
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
return xml_test_content.read_text(encoding='utf-8')
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
_mock_wiki_content)

View File

@@ -57,11 +57,11 @@ def tiger_table(def_config, temp_db_conn, sql_preprocessor,
RETURNS INTEGER AS $$
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
RETURNING 1
$$ LANGUAGE SQL;""")
$$ LANGUAGE SQL;""", encoding='utf-8')
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT);""")
token_info JSONB, in_postcode TEXT);""", encoding='utf-8')
return MockTigerTable(temp_db_conn)
@@ -75,7 +75,7 @@ def csv_factory(tmp_path):
from;to;interpolation;street;city;state;postcode;geometry
{};{};{};{};{};{};{};{}
""".format(hnr_from, hnr_to, interpol, street, city, state,
postcode, geometry)))
postcode, geometry)), encoding='utf-8')
return _mk_file
@@ -129,7 +129,7 @@ async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""")
sqlfile.write_text("""Random text""", encoding='utf-8')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@@ -167,7 +167,7 @@ async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""")
tarfile.write_text("""Random text""", encoding='utf-8')
with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())