mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
move module installation to legacy tokenizer
This commit is contained in:
@@ -68,12 +68,6 @@ class SetupAll:
|
|||||||
args.no_partitions,
|
args.no_partitions,
|
||||||
rouser=args.config.DATABASE_WEBUSER)
|
rouser=args.config.DATABASE_WEBUSER)
|
||||||
|
|
||||||
LOG.warning('Installing database module')
|
|
||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
|
||||||
database_import.install_module(args.module_dir, args.project_dir,
|
|
||||||
args.config.DATABASE_MODULE_PATH,
|
|
||||||
conn=conn)
|
|
||||||
|
|
||||||
LOG.warning('Importing OSM data file')
|
LOG.warning('Importing OSM data file')
|
||||||
database_import.import_osm_data(Path(args.osm_file),
|
database_import.import_osm_data(Path(args.osm_file),
|
||||||
args.osm2pgsql_options(0, 1),
|
args.osm2pgsql_options(0, 1),
|
||||||
|
|||||||
@@ -1,16 +1,61 @@
|
|||||||
"""
|
"""
|
||||||
Tokenizer implementing normalisation as used before Nominatim 4.
|
Tokenizer implementing normalisation as used before Nominatim 4.
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
from nominatim.db import properties
|
from nominatim.db import properties
|
||||||
|
from nominatim.errors import UsageError
|
||||||
|
|
||||||
DBCFG_NORMALIZATION = "tokenizer_normalization"
|
DBCFG_NORMALIZATION = "tokenizer_normalization"
|
||||||
|
|
||||||
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
def create(dsn, data_dir):
|
def create(dsn, data_dir):
|
||||||
""" Create a new instance of the tokenizer provided by this module.
|
""" Create a new instance of the tokenizer provided by this module.
|
||||||
"""
|
"""
|
||||||
return LegacyTokenizer(dsn, data_dir)
|
return LegacyTokenizer(dsn, data_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def _install_module(src_dir, module_dir):
|
||||||
|
""" Copies the PostgreSQL normalisation module into the project
|
||||||
|
directory if necessary. For historical reasons the module is
|
||||||
|
saved in the '/module' subdirectory and not with the other tokenizer
|
||||||
|
data.
|
||||||
|
|
||||||
|
The function detects when the installation is run from the
|
||||||
|
build directory. It doesn't touch the module in that case.
|
||||||
|
"""
|
||||||
|
if module_dir.exists() and src_dir.samefile(module_dir):
|
||||||
|
LOG.info('Running from build directory. Leaving database module as is.')
|
||||||
|
return
|
||||||
|
|
||||||
|
if not module_dir.exists():
|
||||||
|
module_dir.mkdir()
|
||||||
|
|
||||||
|
destfile = module_dir / 'nominatim.so'
|
||||||
|
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
|
||||||
|
destfile.chmod(0o755)
|
||||||
|
|
||||||
|
LOG.info('Database module installed at %s', str(destfile))
|
||||||
|
|
||||||
|
|
||||||
|
def _check_module(module_dir, conn):
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
try:
|
||||||
|
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||||
|
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||||
|
LANGUAGE c IMMUTABLE STRICT;
|
||||||
|
DROP FUNCTION nominatim_test_import_func(text)
|
||||||
|
""".format(module_dir))
|
||||||
|
except psycopg2.DatabaseError as err:
|
||||||
|
LOG.fatal("Error accessing database module: %s", err)
|
||||||
|
raise UsageError("Database module cannot be accessed.") from err
|
||||||
|
|
||||||
|
|
||||||
class LegacyTokenizer:
|
class LegacyTokenizer:
|
||||||
""" The legacy tokenizer uses a special PostgreSQL module to normalize
|
""" The legacy tokenizer uses a special PostgreSQL module to normalize
|
||||||
names and queries. The tokenizer thus implements normalization through
|
names and queries. The tokenizer thus implements normalization through
|
||||||
@@ -29,12 +74,24 @@ class LegacyTokenizer:
|
|||||||
This copies all necessary data in the project directory to make
|
This copies all necessary data in the project directory to make
|
||||||
sure the tokenizer remains stable even over updates.
|
sure the tokenizer remains stable even over updates.
|
||||||
"""
|
"""
|
||||||
|
# Find and optionally install the PsotgreSQL normalization module.
|
||||||
|
if config.DATABASE_MODULE_PATH:
|
||||||
|
LOG.info("Using custom path for database module at '%s'",
|
||||||
|
config.DATABASE_MODULE_PATH)
|
||||||
|
module_dir = config.DATABASE_MODULE_PATH
|
||||||
|
else:
|
||||||
|
_install_module(config.lib_dir.module, config.project_dir / 'module')
|
||||||
|
module_dir = config.project_dir / 'module'
|
||||||
|
|
||||||
self.normalization = config.TERM_NORMALIZATION
|
self.normalization = config.TERM_NORMALIZATION
|
||||||
|
|
||||||
# Stable configuration is saved in the database.
|
|
||||||
with connect(self.dsn) as conn:
|
with connect(self.dsn) as conn:
|
||||||
properties.set_property(conn, DBCFG_NORMALIZATION,
|
_check_module(module_dir, conn)
|
||||||
self.normalization)
|
|
||||||
|
# Stable configuration is saved in the database.
|
||||||
|
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
def init_from_project(self):
|
def init_from_project(self):
|
||||||
|
|||||||
@@ -5,11 +5,9 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import selectors
|
import selectors
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
import psycopg2
|
|
||||||
|
|
||||||
from nominatim.db.connection import connect, get_pg_env
|
from nominatim.db.connection import connect, get_pg_env
|
||||||
from nominatim.db import utils as db_utils
|
from nominatim.db import utils as db_utils
|
||||||
@@ -89,49 +87,6 @@ def setup_extensions(conn):
|
|||||||
raise UsageError('PostGIS version is too old.')
|
raise UsageError('PostGIS version is too old.')
|
||||||
|
|
||||||
|
|
||||||
def install_module(src_dir, project_dir, module_dir, conn=None):
|
|
||||||
""" Copy the normalization module from src_dir into the project
|
|
||||||
directory under the '/module' directory. If 'module_dir' is set, then
|
|
||||||
use the module from there instead and check that it is accessible
|
|
||||||
for Postgresql.
|
|
||||||
|
|
||||||
The function detects when the installation is run from the
|
|
||||||
build directory. It doesn't touch the module in that case.
|
|
||||||
|
|
||||||
If 'conn' is given, then the function also tests if the module
|
|
||||||
can be access via the given database.
|
|
||||||
"""
|
|
||||||
if not module_dir:
|
|
||||||
module_dir = project_dir / 'module'
|
|
||||||
|
|
||||||
if not module_dir.exists() or not src_dir.samefile(module_dir):
|
|
||||||
|
|
||||||
if not module_dir.exists():
|
|
||||||
module_dir.mkdir()
|
|
||||||
|
|
||||||
destfile = module_dir / 'nominatim.so'
|
|
||||||
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
|
|
||||||
destfile.chmod(0o755)
|
|
||||||
|
|
||||||
LOG.info('Database module installed at %s', str(destfile))
|
|
||||||
else:
|
|
||||||
LOG.info('Running from build directory. Leaving database module as is.')
|
|
||||||
else:
|
|
||||||
LOG.info("Using custom path for database module at '%s'", module_dir)
|
|
||||||
|
|
||||||
if conn is not None:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
try:
|
|
||||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
|
||||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
|
||||||
LANGUAGE c IMMUTABLE STRICT;
|
|
||||||
DROP FUNCTION nominatim_test_import_func(text)
|
|
||||||
""".format(module_dir))
|
|
||||||
except psycopg2.DatabaseError as err:
|
|
||||||
LOG.fatal("Error accessing database module: %s", err)
|
|
||||||
raise UsageError("Database module cannot be accessed.") from err
|
|
||||||
|
|
||||||
|
|
||||||
def import_base_data(dsn, sql_dir, ignore_partitions=False):
|
def import_base_data(dsn, sql_dir, ignore_partitions=False):
|
||||||
""" Create and populate the tables with basic static data that provides
|
""" Create and populate the tables with basic static data that provides
|
||||||
the background for geocoding. Data is assumed to not yet exist.
|
the background for geocoding. Data is assumed to not yet exist.
|
||||||
|
|||||||
@@ -88,7 +88,6 @@ def test_import_bad_file(temp_db):
|
|||||||
def test_import_full(temp_db, mock_func_factory):
|
def test_import_full(temp_db, mock_func_factory):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
|
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'install_module'),
|
|
||||||
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
|
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
|
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
||||||
|
|||||||
@@ -5,24 +5,78 @@ import pytest
|
|||||||
|
|
||||||
from nominatim.tokenizer import legacy_tokenizer
|
from nominatim.tokenizer import legacy_tokenizer
|
||||||
from nominatim.db import properties
|
from nominatim.db import properties
|
||||||
|
from nominatim.errors import UsageError
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tokenizer(dsn, tmp_path, def_config, property_table):
|
def test_config(def_config, tmp_path):
|
||||||
tok = legacy_tokenizer.create(dsn, tmp_path)
|
def_config.project_dir = tmp_path / 'project'
|
||||||
tok.init_new_db(def_config)
|
def_config.project_dir.mkdir()
|
||||||
|
|
||||||
return tok
|
module_dir = tmp_path / 'module_src'
|
||||||
|
module_dir.mkdir()
|
||||||
|
(module_dir / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||||
|
|
||||||
def test_init_new(dsn, tmp_path, def_config, property_table, monkeypatch, temp_db_conn):
|
def_config.lib_dir.module = module_dir
|
||||||
|
|
||||||
|
return def_config
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tokenizer_factory(dsn, tmp_path, monkeypatch):
|
||||||
|
|
||||||
|
def _maker():
|
||||||
|
return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer')
|
||||||
|
|
||||||
|
return _maker
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tokenizer_setup(tokenizer_factory, test_config, property_table, monkeypatch):
|
||||||
|
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||||
|
tok = tokenizer_factory()
|
||||||
|
tok.init_new_db(test_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_new(tokenizer_factory, test_config, property_table, monkeypatch, temp_db_conn):
|
||||||
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
|
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
|
||||||
|
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||||
|
|
||||||
tok = legacy_tokenizer.create(dsn, tmp_path)
|
tok = tokenizer_factory()
|
||||||
tok.init_new_db(def_config)
|
tok.init_new_db(test_config)
|
||||||
|
|
||||||
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
|
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
|
||||||
|
|
||||||
|
outfile = test_config.project_dir / 'module' / 'nominatim.so'
|
||||||
|
|
||||||
def test_init_from_project(tokenizer):
|
assert outfile.exists()
|
||||||
tokenizer.init_from_project()
|
assert outfile.read_text() == 'TEST nomiantim.so'
|
||||||
|
assert outfile.stat().st_mode == 33261
|
||||||
|
|
||||||
assert tokenizer.normalization is not None
|
|
||||||
|
def test_init_module_load_failed(tokenizer_factory, test_config, property_table, monkeypatch, temp_db_conn):
|
||||||
|
tok = tokenizer_factory()
|
||||||
|
|
||||||
|
with pytest.raises(UsageError):
|
||||||
|
tok.init_new_db(test_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_module_custom(tokenizer_factory, test_config, property_table,
|
||||||
|
monkeypatch, tmp_path):
|
||||||
|
module_dir = (tmp_path / 'custom').resolve()
|
||||||
|
module_dir.mkdir()
|
||||||
|
(module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so')
|
||||||
|
|
||||||
|
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir))
|
||||||
|
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||||
|
|
||||||
|
tok = tokenizer_factory()
|
||||||
|
tok.init_new_db(test_config)
|
||||||
|
|
||||||
|
assert not (test_config.project_dir / 'module').exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_from_project(tokenizer_setup, tokenizer_factory):
|
||||||
|
tok = tokenizer_factory()
|
||||||
|
|
||||||
|
tok.init_from_project()
|
||||||
|
|
||||||
|
assert tok.normalization is not None
|
||||||
|
|||||||
@@ -80,39 +80,6 @@ def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch):
|
|||||||
database_import.setup_extensions(temp_db_conn)
|
database_import.setup_extensions(temp_db_conn)
|
||||||
|
|
||||||
|
|
||||||
def test_install_module(tmp_path):
|
|
||||||
src_dir = tmp_path / 'source'
|
|
||||||
src_dir.mkdir()
|
|
||||||
(src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
|
|
||||||
|
|
||||||
project_dir = tmp_path / 'project'
|
|
||||||
project_dir.mkdir()
|
|
||||||
|
|
||||||
database_import.install_module(src_dir, project_dir, '')
|
|
||||||
|
|
||||||
outfile = project_dir / 'module' / 'nominatim.so'
|
|
||||||
|
|
||||||
assert outfile.exists()
|
|
||||||
assert outfile.read_text() == 'TEST nomiantim.so'
|
|
||||||
assert outfile.stat().st_mode == 33261
|
|
||||||
|
|
||||||
|
|
||||||
def test_install_module_custom(tmp_path):
|
|
||||||
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
|
|
||||||
|
|
||||||
database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
|
|
||||||
|
|
||||||
assert not (tmp_path / 'module').exists()
|
|
||||||
|
|
||||||
|
|
||||||
def test_install_module_fail_access(temp_db_conn, tmp_path):
|
|
||||||
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
|
|
||||||
|
|
||||||
with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
|
|
||||||
database_import.install_module(tmp_path, tmp_path, '',
|
|
||||||
conn=temp_db_conn)
|
|
||||||
|
|
||||||
|
|
||||||
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
|
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
|
||||||
temp_db_cursor.execute('CREATE EXTENSION hstore')
|
temp_db_cursor.execute('CREATE EXTENSION hstore')
|
||||||
temp_db_cursor.execute('CREATE EXTENSION postgis')
|
temp_db_cursor.execute('CREATE EXTENSION postgis')
|
||||||
|
|||||||
Reference in New Issue
Block a user