mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 05:14:07 +00:00
add migration for configurable tokenizer
Adds a migration that initialises a legacy tokenizer for an existing database. The migration is not active yet as it will need completion when more functionality is added to the legacy tokenizer.
This commit is contained in:
@@ -33,12 +33,15 @@ def _import_tokenizer(name):
|
||||
raise UsageError('Tokenizer not found') from exp
|
||||
|
||||
|
||||
def create_tokenizer(config):
|
||||
def create_tokenizer(config, init_db=True, module_name=None):
|
||||
""" Create a new tokenizer as defined by the given configuration.
|
||||
|
||||
The tokenizer data and code is copied into the 'tokenizer' directory
|
||||
of the project directory and the tokenizer loaded from its new location.
|
||||
"""
|
||||
if module_name is None:
|
||||
module_name = config.TOKENIZER
|
||||
|
||||
# Create the directory for the tokenizer data
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.exists():
|
||||
@@ -47,13 +50,15 @@ def create_tokenizer(config):
|
||||
LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
|
||||
raise UsageError("Tokenizer setup failed.")
|
||||
|
||||
tokenizer_module = _import_tokenizer(config.TOKENIZER)
|
||||
# Import and initialize the tokenizer.
|
||||
tokenizer_module = _import_tokenizer(module_name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_new_db(config)
|
||||
if init_db:
|
||||
tokenizer.init_new_db(config)
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
properties.set_property(conn, 'tokenizer', config.TOKENIZER)
|
||||
properties.set_property(conn, 'tokenizer', module_name)
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ def create(dsn, data_dir):
|
||||
return LegacyTokenizer(dsn, data_dir)
|
||||
|
||||
|
||||
def _install_module(src_dir, module_dir):
|
||||
def _install_module(config_module_path, src_dir, module_dir):
|
||||
""" Copies the PostgreSQL normalisation module into the project
|
||||
directory if necessary. For historical reasons the module is
|
||||
saved in the '/module' subdirectory and not with the other tokenizer
|
||||
@@ -29,10 +29,17 @@ def _install_module(src_dir, module_dir):
|
||||
The function detects when the installation is run from the
|
||||
build directory. It doesn't touch the module in that case.
|
||||
"""
|
||||
# Custom module locations are simply used as is.
|
||||
if config_module_path:
|
||||
LOG.info("Using custom path for database module at '%s'", config_module_path)
|
||||
return config_module_path
|
||||
|
||||
# Compatibility mode for builddir installations.
|
||||
if module_dir.exists() and src_dir.samefile(module_dir):
|
||||
LOG.info('Running from build directory. Leaving database module as is.')
|
||||
return
|
||||
return module_dir
|
||||
|
||||
# In any other case install the module in the project directory.
|
||||
if not module_dir.exists():
|
||||
module_dir.mkdir()
|
||||
|
||||
@@ -42,6 +49,8 @@ def _install_module(src_dir, module_dir):
|
||||
|
||||
LOG.info('Database module installed at %s', str(destfile))
|
||||
|
||||
return module_dir
|
||||
|
||||
|
||||
def _check_module(module_dir, conn):
|
||||
with conn.cursor() as cur:
|
||||
@@ -74,24 +83,15 @@ class LegacyTokenizer:
|
||||
This copies all necessary data in the project directory to make
|
||||
sure the tokenizer remains stable even over updates.
|
||||
"""
|
||||
# Find and optionally install the PsotgreSQL normalization module.
|
||||
if config.DATABASE_MODULE_PATH:
|
||||
LOG.info("Using custom path for database module at '%s'",
|
||||
config.DATABASE_MODULE_PATH)
|
||||
module_dir = config.DATABASE_MODULE_PATH
|
||||
else:
|
||||
_install_module(config.lib_dir.module, config.project_dir / 'module')
|
||||
module_dir = config.project_dir / 'module'
|
||||
module_dir = _install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
self.normalization = config.TERM_NORMALIZATION
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
|
||||
# Stable configuration is saved in the database.
|
||||
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
|
||||
|
||||
conn.commit()
|
||||
self._save_config(conn)
|
||||
|
||||
|
||||
def init_from_project(self):
|
||||
@@ -99,3 +99,26 @@ class LegacyTokenizer:
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||
|
||||
|
||||
def migrate_database(self, config):
|
||||
""" Initialise the project directory of an existing database for
|
||||
use with this tokenizer.
|
||||
|
||||
This is a special migration function for updating existing databases
|
||||
to new software versions.
|
||||
"""
|
||||
module_dir = _install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
self._save_config(conn)
|
||||
|
||||
|
||||
def _save_config(self, conn):
|
||||
""" Save the configuration that needs to remain stable for the given
|
||||
database as database properties.
|
||||
"""
|
||||
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
|
||||
|
||||
Reference in New Issue
Block a user