forked from hans/Nominatim
restore the tokenizer directory when missing
Automatically repopulate the tokenizer/ directory with the PHP stub and the postgresql module, when the directory is missing. This allows to switch working directories and in particular run the service from a different maschine then where it was installed. Users still need to make sure that .env files are set up correctly or they will shoot themselves in the foot. See #2515.
This commit is contained in:
@@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
|
||||
"""
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.is_dir():
|
||||
LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
|
||||
raise UsageError('Cannot initialize tokenizer.')
|
||||
# Directory will be repopulated by tokenizer below.
|
||||
basedir.mkdir()
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
name = properties.get_property(conn, 'tokenizer')
|
||||
|
||||
@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
||||
"""
|
||||
self.loader = ICURuleLoader(config)
|
||||
|
||||
self._install_php(config.lib_dir.php)
|
||||
self._install_php(config.lib_dir.php, overwrite=True)
|
||||
self._save_config()
|
||||
|
||||
if init_db:
|
||||
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
||||
with connect(self.dsn) as conn:
|
||||
self.loader.load_config_from_db(conn)
|
||||
|
||||
self._install_php(config.lib_dir.php, overwrite=False)
|
||||
|
||||
|
||||
def finalize_import(self, config):
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
||||
self.loader.make_token_analysis())
|
||||
|
||||
|
||||
def _install_php(self, phpdir):
|
||||
def _install_php(self, phpdir, overwrite=True):
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', 10000000);
|
||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', 10000000);
|
||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
|
||||
|
||||
|
||||
def _save_config(self):
|
||||
|
||||
@@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
|
||||
self.normalization = config.TERM_NORMALIZATION
|
||||
|
||||
self._install_php(config)
|
||||
self._install_php(config, overwrite=True)
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
@@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
self._init_db_tables(config)
|
||||
|
||||
|
||||
def init_from_project(self, _):
|
||||
def init_from_project(self, config):
|
||||
""" Initialise the tokenizer from the project directory.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||
|
||||
if not (config.project_dir / 'module' / 'nominatim.so').exists():
|
||||
_install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
self._install_php(config, overwrite=False)
|
||||
|
||||
def finalize_import(self, config):
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
@@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
return LegacyNameAnalyzer(self.dsn, normalizer)
|
||||
|
||||
|
||||
def _install_php(self, config):
|
||||
def _install_php(self, config, overwrite=True):
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
php_file.write_text(dedent("""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
""".format(config)))
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent("""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
""".format(config)))
|
||||
|
||||
|
||||
def _init_db_tables(self, config):
|
||||
|
||||
Reference in New Issue
Block a user