Compare commits

...

10 Commits

Author SHA1 Message Date
Sarah Hoffmann
986d303c95 Merge pull request #3980 from lonvia/security-smells
Improve SQL query assembly
2026-02-10 15:26:34 +01:00
Sarah Hoffmann
7a3ea55f3d ignore tables with odd names in SQLPreprocessor 2026-02-10 11:40:52 +01:00
Sarah Hoffmann
d10d70944d avoid f-strings in SQL creation in tests 2026-02-10 11:39:19 +01:00
Sarah Hoffmann
73590baf15 use psycopg.sql for SQL building in tokenizer 2026-02-10 11:39:19 +01:00
Sarah Hoffmann
e17d0cb5cf only allow alphanumeric and dash in DATABASE_WEBUSER
This variable is used a lot in raw SQL. Avoid injection issues.
2026-02-10 11:39:17 +01:00
Sarah Hoffmann
7a62c7d812 sanity check class names before inserting into classtype tables
The subsequent INSERT is done on an unqouted table name, making in
theory an SQL injection through an OSM value possible. In practise
this cannot happen because we check for the existance of the table.
During the creation of the classtype tables there is a sanity
check in place to disallow any table names that consist of anything
other than alphanumeric characters.
2026-02-10 11:38:26 +01:00
Sarah Hoffmann
615804b1b3 Merge pull request #3978 from jayaddison/issue-2714-prep/index-boundaries-method-signature-nitpick
Refactor: add default params to Indexer.index_boundaries
2026-02-10 09:45:29 +01:00
Sarah Hoffmann
79bbdfd55c Merge pull request #3975 from kad-link/fix/utf8-encoding-clean
Fix: Enforce explicit UTF-8 encoding in file I/O
2026-02-10 09:32:06 +01:00
James Addison
509f59b193 Refactor: add default params to index_boundaries 2026-02-09 21:36:30 +00:00
Sri CHaRan
f84b279540 fix: add utf-8 encoding in read-write files 2026-02-10 00:38:40 +05:30
29 changed files with 126 additions and 101 deletions

View File

@@ -13,7 +13,8 @@ for infile in VAGRANT_PATH.glob('Install-on-*.sh'):
outfile = f"admin/{infile.stem}.md"
title = infile.stem.replace('-', ' ')
with mkdocs_gen_files.open(outfile, "w") as outfd, infile.open() as infd:
with mkdocs_gen_files.open(outfile, "w", encoding='utf-8') as outfd, \
infile.open(encoding='utf-8') as infd:
print("#", title, file=outfd)
has_empty = False
for line in infd:

View File

@@ -672,7 +672,7 @@ CREATE OR REPLACE FUNCTION placex_insert()
AS $$
DECLARE
postcode TEXT;
result BOOLEAN;
result INT;
is_area BOOLEAN;
country_code VARCHAR(2);
diameter FLOAT;
@@ -777,11 +777,12 @@ BEGIN
-- add to tables for special search
-- Note: won't work on initial import because the classtype tables
-- do not yet exist. It won't hurt either.
classtable := 'place_classtype_' || NEW.class || '_' || NEW.type;
SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable and schemaname = current_schema() INTO result;
IF result THEN
SELECT count(*) INTO result
FROM pg_tables
WHERE classtable NOT SIMILAR TO '%\W%'
AND tablename = classtable and schemaname = current_schema();
IF result > 0 THEN
EXECUTE 'INSERT INTO ' || classtable::regclass || ' (place_id, centroid) VALUES ($1,$2)'
USING NEW.place_id, NEW.centroid;
END IF;
@@ -1337,6 +1338,7 @@ CREATE OR REPLACE FUNCTION placex_delete()
AS $$
DECLARE
b BOOLEAN;
result INT;
classtable TEXT;
BEGIN
-- RAISE WARNING 'placex_delete % %',OLD.osm_type,OLD.osm_id;
@@ -1395,8 +1397,12 @@ BEGIN
-- remove from tables for special search
classtable := 'place_classtype_' || OLD.class || '_' || OLD.type;
SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable and schemaname = current_schema() INTO b;
IF b THEN
SELECT count(*) INTO result
FROM pg_tables
WHERE classtable NOT SIMILAR TO '%\W%'
AND tablename = classtable and schemaname = current_schema();
IF result > 0 THEN
EXECUTE 'DELETE FROM ' || classtable::regclass || ' WHERE place_id = $1' USING OLD.place_id;
END IF;

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Nominatim configuration accessor.
@@ -12,6 +12,7 @@ import importlib.util
import logging
import os
import sys
import re
from pathlib import Path
import json
import yaml
@@ -80,6 +81,10 @@ class Configuration:
self.lib_dir = _LibDirs()
self._private_plugins: Dict[str, object] = {}
if re.fullmatch(r'[\w-]+', self.DATABASE_WEBUSER) is None:
raise UsageError("Misconfigured DATABASE_WEBUSER. "
"Only alphnumberic characters, - and _ are allowed.")
def set_libdirs(self, **kwargs: StrPath) -> None:
""" Set paths to library functions and data.
"""

View File

@@ -2,12 +2,13 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Preprocessing of SQL files.
"""
from typing import Set, Dict, Any, cast
import re
import jinja2
@@ -34,7 +35,9 @@ def _get_tables(conn: Connection) -> Set[str]:
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
return set((row[0] for row in list(cur)))
# paranoia check: make sure we don't get table names that cause
# an SQL injection later
return {row[0] for row in list(cur) if re.fullmatch(r'\w+', row[0])}
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:

View File

@@ -59,7 +59,7 @@ class Indexer:
if await self.index_by_rank(0, 4) > 0:
_analyze()
if await self.index_boundaries(0, 30) > 100:
if await self.index_boundaries() > 100:
_analyze()
if await self.index_by_rank(5, 25) > 100:
@@ -74,7 +74,7 @@ class Indexer:
if not self.has_pending():
break
async def index_boundaries(self, minrank: int, maxrank: int) -> int:
async def index_boundaries(self, minrank: int = 0, maxrank: int = 30) -> int:
""" Index only administrative boundaries within the given rank range.
"""
total = 0

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tokenizer implementing normalisation as used before Nominatim 4 but using
@@ -294,13 +294,12 @@ class ICUTokenizer(AbstractTokenizer):
with connect(self.dsn) as conn:
drop_tables(conn, 'word')
with conn.cursor() as cur:
cur.execute(f"ALTER TABLE {old} RENAME TO word")
for idx in ('word_token', 'word_id'):
cur.execute(f"""ALTER INDEX idx_{old}_{idx}
RENAME TO idx_word_{idx}""")
for name, _ in WORD_TYPES:
cur.execute(f"""ALTER INDEX idx_{old}_{name}
RENAME TO idx_word_{name}""")
cur.execute(pysql.SQL("ALTER TABLE {} RENAME TO word")
.format(pysql.Identifier(old)))
for idx in ['word_token', 'word_id'] + [n[0] for n in WORD_TYPES]:
cur.execute(pysql.SQL("ALTER INDEX {} RENAME TO {}")
.format(pysql.Identifier(f"idx_{old}_{idx}"),
pysql.Identifier(f"idx_word_{idx}")))
conn.commit()

View File

@@ -159,7 +159,7 @@ class _PostcodeCollector:
if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname)
return gzip.open(fname, 'rt')
return gzip.open(fname, 'rt', encoding='utf-8')
return None

View File

@@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int:
copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
FROM STDIN"""
with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy:
with gzip.open(
str(data_file), 'rt', encoding='utf-8') as fd, \
cur.copy(copy_cmd) as copy:
for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
wd_id = int(row['wikidata_id'][1:])
copy.write_row((row['language'],

View File

@@ -43,7 +43,7 @@ def opl_writer(tmp_path, node_grid):
def _write(data):
fname = tmp_path / f"test_osm_{nr[0]}.opl"
nr[0] += 1
with fname.open('wt') as fd:
with fname.open('wt', encoding='utf-8') as fd:
for line in data.split('\n'):
if line.startswith('n') and ' x' not in line:
coord = node_grid.get(line[1:].split(' ')[0]) \
@@ -59,7 +59,7 @@ def opl_writer(tmp_path, node_grid):
@given('the lua style file', target_fixture='osm2pgsql_options')
def set_lua_style_file(osm2pgsql_options, docstring, tmp_path):
style = tmp_path / 'custom.lua'
style.write_text(docstring)
style.write_text(docstring, encoding='utf-8')
osm2pgsql_options['osm2pgsql_style'] = str(style)
return osm2pgsql_options

View File

@@ -58,7 +58,7 @@ gb:
pattern: "(l?ld[A-Z0-9]?) ?(dll)"
output: \1 \2
""")
""", encoding='utf-8')
return project_env

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Test for loading dotenv configuration.
@@ -48,7 +48,7 @@ def test_no_project_dir(make_config):
@pytest.mark.parametrize("val", ('apache', '"apache"'))
def test_prefer_project_setting_over_default(make_config, val, tmp_path):
envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val))
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val), encoding='utf-8')
config = make_config(tmp_path)
@@ -57,7 +57,7 @@ def test_prefer_project_setting_over_default(make_config, val, tmp_path):
def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
@@ -68,13 +68,13 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa
def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
envfile.write_text('NOMINATIM_OSM2PGSQL_BINARY=osm2pgsql\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '')
monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', '')
config = make_config(tmp_path)
assert config.DATABASE_WEBUSER == ''
assert config.OSM2PGSQL_BINARY == ''
def test_get_os_env_add_defaults(make_config, monkeypatch):
@@ -232,7 +232,7 @@ def test_get_import_style_intern(make_config, src_dir, monkeypatch):
def test_get_import_style_extern_relative(make_config_path, monkeypatch):
config = make_config_path()
(config.project_dir / 'custom.style').write_text('x')
(config.project_dir / 'custom.style').write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style')
@@ -243,7 +243,7 @@ def test_get_import_style_extern_absolute(make_config, tmp_path, monkeypatch):
config = make_config()
cfgfile = tmp_path / 'test.style'
cfgfile.write_text('x')
cfgfile.write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile))
@@ -254,10 +254,10 @@ def test_load_subconf_from_project_dir(make_config_path):
config = make_config_path()
testfile = config.project_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n')
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: miau\ncat: muh\n')
testfile.write_text('cow: miau\ncat: muh\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -268,7 +268,7 @@ def test_load_subconf_from_settings_dir(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n')
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -280,7 +280,7 @@ def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n')
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -291,8 +291,8 @@ def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -303,7 +303,7 @@ def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -314,8 +314,8 @@ def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -326,7 +326,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -335,7 +335,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
def test_load_subconf_json(make_config_path):
config = make_config_path()
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}')
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}', encoding='utf-8')
rules = config.load_sub_configuration('test.json')
@@ -352,7 +352,7 @@ def test_load_subconf_not_found(make_config_path):
def test_load_subconf_env_unknown_format(make_config_path):
config = make_config_path()
(config.project_dir / 'test.xml').write_text('<html></html>')
(config.project_dir / 'test.xml').write_text('<html></html>', encoding='utf-8')
with pytest.raises(UsageError, match='unknown format'):
config.load_sub_configuration('test.xml')
@@ -362,8 +362,8 @@ def test_load_subconf_include_absolute(make_config_path, tmp_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n')
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n', encoding='utf-8')
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -375,8 +375,8 @@ def test_load_subconf_include_relative(make_config_path, tmp_path, location):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n')
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')
@@ -387,8 +387,8 @@ def test_load_subconf_include_bad_format(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n')
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n')
testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
with pytest.raises(UsageError, match='Cannot handle config file format.'):
config.load_sub_configuration('test.yaml')
@@ -398,7 +398,7 @@ def test_load_subconf_include_not_found(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n')
testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml')
@@ -408,9 +408,9 @@ def test_load_subconf_include_recursive(make_config_path):
config = make_config_path()
testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n')
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n')
(config.config_dir / 'more.yaml').write_text('- the end\n')
testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n', encoding='utf-8')
(config.config_dir / 'more.yaml').write_text('- the end\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml')

View File

@@ -41,7 +41,7 @@ def test_load_default_module_with_hyphen(test_config):
def test_load_plugin_module(test_config, tmp_path):
(tmp_path / 'project' / 'testpath').mkdir()
(tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'")
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -49,7 +49,7 @@ def test_load_plugin_module(test_config, tmp_path):
# also test reloading module
(tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'hjothjorhj'")
.write_text("def my_test_function():\n return 'hjothjorhj'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -61,9 +61,9 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
pythonpath = tmp_path / 'priv-python'
pythonpath.mkdir()
(pythonpath / MODULE_NAME).mkdir()
(pythonpath / MODULE_NAME / '__init__.py').write_text('')
(pythonpath / MODULE_NAME / '__init__.py').write_text('', encoding='utf-8')
(pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'")
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
monkeypatch.syspath_prepend(pythonpath)
@@ -73,7 +73,7 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
# also test reloading module
(pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'dfigjreigj'")
.write_text("def my_test_function():\n return 'dfigjreigj'", encoding='utf-8')
module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')

View File

@@ -60,7 +60,7 @@ def temp_db(monkeypatch):
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS') + pysql.Identifier(name))
@pytest.fixture
@@ -104,7 +104,9 @@ def table_factory(temp_db_conn):
"""
def mk_table(name, definition='id INT', content=None):
with psycopg.ClientCursor(temp_db_conn) as cur:
cur.execute('CREATE TABLE {} ({})'.format(name, definition))
cur.execute(pysql.SQL("CREATE TABLE {} ({})")
.format(pysql.Identifier(name),
pysql.SQL(definition)))
if content:
sql = pysql.SQL("INSERT INTO {} VALUES ({})")\
.format(pysql.Identifier(name),

View File

@@ -22,7 +22,8 @@ def loaded_country(def_config):
def env_with_country_config(project_env):
def _mk_config(cfg):
(project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg))
(project_env.project_dir / 'country_settings.yaml').write_text(
dedent(cfg), encoding='utf-8')
return project_env

View File

@@ -22,7 +22,7 @@ def sql_factory(tmp_path):
BEGIN
{}
END;
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body))
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body), encoding='utf-8')
return 'test.sql'
return _mk_sql
@@ -63,7 +63,7 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp
async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor):
(tmp_path / 'test.sql').write_text("""
CREATE TABLE foo (a TEXT);
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);")
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);", encoding='utf-8')
await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4)

View File

@@ -15,7 +15,8 @@ from nominatim_db.errors import UsageError
def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
tmpfile.write_text(
'CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile)
@@ -29,7 +30,7 @@ def test_execute_file_bad_file(dsn, tmp_path):
def test_execute_file_bad_sql(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
with pytest.raises(UsageError):
db_utils.execute_file(dsn, tmpfile)
@@ -37,14 +38,14 @@ def test_execute_file_bad_sql(dsn, tmp_path):
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('INSERT INTO test VALUES(4)')
tmpfile.write_text('INSERT INTO test VALUES(4)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
@@ -53,7 +54,7 @@ def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT)')
tmpfile.write_text('CREATE TABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')

View File

@@ -247,7 +247,7 @@ async def test_index_boundaries(test_db, threads, test_tokenizer):
assert test_db.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
await idx.index_boundaries(0, 30)
await idx.index_boundaries()
assert test_db.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1

View File

@@ -31,7 +31,7 @@ def test_config(project_env, tmp_path):
sqldir = tmp_path / 'sql'
sqldir.mkdir()
(sqldir / 'tokenizer').mkdir()
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'")
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'", encoding='utf-8')
project_env.lib_dir.sql = sqldir
@@ -58,7 +58,7 @@ def db_prop(temp_db_conn):
def analyzer(tokenizer_factory, test_config, monkeypatch,
temp_db_with_extensions, tmp_path):
sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql'
sql.write_text("SELECT 'a';")
sql.write_text("SELECT 'a';", encoding='utf-8')
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
tok = tokenizer_factory()
@@ -80,7 +80,8 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
if with_postcode:
cfgstr['token-analysis'].append({'id': '@postcode',
'analyzer': 'postcodes'})
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr))
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(
yaml.dump(cfgstr), encoding='utf-8')
tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
return tok.name_analyzer()
@@ -190,7 +191,7 @@ def test_update_sql_functions(db_prop, temp_db_cursor,
table_factory('test', 'txt TEXT')
func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql'
func_file.write_text("""INSERT INTO test VALUES (1133)""")
func_file.write_text("""INSERT INTO test VALUES (1133)""", encoding='utf-8')
tok.update_sql_functions(test_config)

View File

@@ -27,7 +27,8 @@ class TestIcuRuleLoader:
self.project_env = project_env
def write_config(self, content):
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(dedent(content))
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(
dedent(content), encoding='utf-8')
def config_rules(self, *variants):
content = dedent("""\
@@ -119,7 +120,7 @@ class TestIcuRuleLoader:
variants:
""")
transpath = self.project_env.project_dir / ('transliteration.yaml')
transpath.write_text('- "x > y"')
transpath.write_text('- "x > y"', encoding='utf-8')
loader = ICURuleLoader(self.project_env)
rules = loader.get_transliteration_rules()

View File

@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
else
echo "$@"
fi
""")
""", encoding='utf-8')
osm2pgsql_exec.chmod(0o777)
return dict(osm2pgsql=str(osm2pgsql_exec),

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for functions to import a new database.
@@ -25,12 +25,14 @@ class TestDatabaseSetup:
def setup_nonexistant_db(self):
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS ')
+ pysql.Identifier(self.DBNAME))
yield True
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS ')
+ pysql.Identifier(self.DBNAME))
@pytest.fixture
def cursor(self):
@@ -62,7 +64,7 @@ class TestDatabaseSetup:
def test_create_db_missing_ro_user(self):
with pytest.raises(UsageError, match='Missing read-only user.'):
database_import.setup_database_skeleton(f'dbname={self.DBNAME}',
rouser='sdfwkjkjgdugu2;jgsafkljas;')
rouser='sdfwkjkjgdugu2jgsafkljas')
def test_setup_extensions_old_postgis(self, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
@@ -96,7 +98,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files:
f.write_text('test')
f.write_text('test', encoding='utf-8')
database_import.import_osm_data(files, osm2pgsql_options)
captured = capfd.readouterr()
@@ -124,7 +126,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
table_factory('planet_osm_nodes')
flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch')
flatfile.write_text('touch', encoding='utf-8')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
@@ -193,7 +195,7 @@ class TestSetupSQL:
self.config = def_config
def write_sql(self, fname, content):
(self.config.lib_dir.sql / fname).write_text(content)
(self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8')
@pytest.mark.parametrize("reverse", [True, False])
def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):

View File

@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):
def test_drop_flatnode_file_delete(tmp_path):
flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content')
flatfile.write_text('Some content', encoding="utf-8")
freeze.drop_flatnode_file(flatfile)

View File

@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
return the content of the static xml test file.
"""
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
return xml_test_content.read_text(encoding='utf-8')
@pytest.fixture

View File

@@ -245,7 +245,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
if gzipped:
subprocess.run(['gzip', str(extfile)])
@@ -262,7 +262,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
postcode_update(tmp_path)
@@ -274,7 +274,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode,
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
extfile.write_text(
"postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')
postcode_update(tmp_path)

View File

@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')
load_address_levels_from_config(temp_db_conn, project_env)
@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
def test_load_ranks_from_broken_file(project_env, temp_db_conn):
test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":"place":{"sea":2}}}]')
test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')
with pytest.raises(json.decoder.JSONDecodeError):
load_address_levels_from_config(temp_db_conn, project_env)

View File

@@ -21,7 +21,7 @@ class TestCreateFunctions:
def write_functions(self, content):
sqlfile = self.config.lib_dir.sql / 'functions.sql'
sqlfile.write_text(content)
sqlfile.write_text(content, encoding='utf-8')
def test_create_functions(self, temp_db_cursor):
self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER

View File

@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
@pytest.fixture
def wiki_csv(tmp_path, sql_preprocessor):
def _import(data):
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd:
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
'importance', 'wikidata_id'],
delimiter='\t', quotechar='|')

View File

@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):
def _mock_wiki_content(lang):
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
return xml_test_content.read_text(encoding='utf-8')
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
_mock_wiki_content)

View File

@@ -57,11 +57,11 @@ def tiger_table(def_config, temp_db_conn, sql_preprocessor,
RETURNS INTEGER AS $$
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
RETURNING 1
$$ LANGUAGE SQL;""")
$$ LANGUAGE SQL;""", encoding='utf-8')
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT);""")
token_info JSONB, in_postcode TEXT);""", encoding='utf-8')
return MockTigerTable(temp_db_conn)
@@ -75,7 +75,7 @@ def csv_factory(tmp_path):
from;to;interpolation;street;city;state;postcode;geometry
{};{};{};{};{};{};{};{}
""".format(hnr_from, hnr_to, interpol, street, city, state,
postcode, geometry)))
postcode, geometry)), encoding='utf-8')
return _mk_file
@@ -129,7 +129,7 @@ async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""")
sqlfile.write_text("""Random text""", encoding='utf-8')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@@ -167,7 +167,7 @@ async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""")
tarfile.write_text("""Random text""", encoding='utf-8')
with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())