restore the tokenizer directory when missing

Automatically repopulate the tokenizer/ directory with the PHP stub
and the postgresql module, when the directory is missing. This allows
to switch working directories and in particular run the service
from a different maschine then where it was installed.
Users still need to make sure that .env files are set up correctly
or they will shoot themselves in the foot.

See #2515.
This commit is contained in:
Sarah Hoffmann
2022-03-20 11:31:42 +01:00
parent e65913d376
commit a0ed80d821
6 changed files with 39 additions and 24 deletions

View File

@@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
"""
basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir():
LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
raise UsageError('Cannot initialize tokenizer.')
# Directory will be repopulated by tokenizer below.
basedir.mkdir()
with connect(config.get_libpq_dsn()) as conn:
name = properties.get_property(conn, 'tokenizer')

View File

@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
"""
self.loader = ICURuleLoader(config)
self._install_php(config.lib_dir.php)
self._install_php(config.lib_dir.php, overwrite=True)
self._save_config()
if init_db:
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
with connect(self.dsn) as conn:
self.loader.load_config_from_db(conn)
self._install_php(config.lib_dir.php, overwrite=False)
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
self.loader.make_token_analysis())
def _install_php(self, phpdir):
def _install_php(self, phpdir, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', 10000000);
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
if not php_file.exists() or overwrite:
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', 10000000);
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
def _save_config(self):

View File

@@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
self.normalization = config.TERM_NORMALIZATION
self._install_php(config)
self._install_php(config, overwrite=True)
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
@@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
self._init_db_tables(config)
def init_from_project(self, _):
def init_from_project(self, config):
""" Initialise the tokenizer from the project directory.
"""
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
if not (config.project_dir / 'module' / 'nominatim.so').exists():
_install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
config.project_dir / 'module')
self._install_php(config, overwrite=False)
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
@@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
return LegacyNameAnalyzer(self.dsn, normalizer)
def _install_php(self, config):
def _install_php(self, config, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
php_file.write_text(dedent("""\
<?php
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
""".format(config)))
if not php_file.exists() or overwrite:
php_file.write_text(dedent("""\
<?php
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
""".format(config)))
def _init_db_tables(self, config):