restore the tokenizer directory when missing

Automatically repopulate the tokenizer/ directory with the PHP stub
and the postgresql module, when the directory is missing. This allows
to switch working directories and in particular run the service
from a different maschine then where it was installed.
Users still need to make sure that .env files are set up correctly
or they will shoot themselves in the foot.

See #2515.
This commit is contained in:
Sarah Hoffmann
2022-03-20 11:31:42 +01:00
parent e65913d376
commit a0ed80d821
6 changed files with 39 additions and 24 deletions

View File

@@ -27,6 +27,9 @@ def get_property(conn, name):
""" Return the current value of the given propery or None if the property """ Return the current value of the given propery or None if the property
is not set. is not set.
""" """
if not conn.table_exists('nominatim_properties'):
return None
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s', cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
(name, )) (name, ))

View File

@@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
""" """
basedir = config.project_dir / 'tokenizer' basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir(): if not basedir.is_dir():
LOG.fatal("Cannot find tokenizer data in '%s'.", basedir) # Directory will be repopulated by tokenizer below.
raise UsageError('Cannot initialize tokenizer.') basedir.mkdir()
with connect(config.get_libpq_dsn()) as conn: with connect(config.get_libpq_dsn()) as conn:
name = properties.get_property(conn, 'tokenizer') name = properties.get_property(conn, 'tokenizer')

View File

@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
""" """
self.loader = ICURuleLoader(config) self.loader = ICURuleLoader(config)
self._install_php(config.lib_dir.php) self._install_php(config.lib_dir.php, overwrite=True)
self._save_config() self._save_config()
if init_db: if init_db:
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
with connect(self.dsn) as conn: with connect(self.dsn) as conn:
self.loader.load_config_from_db(conn) self.loader.load_config_from_db(conn)
self._install_php(config.lib_dir.php, overwrite=False)
def finalize_import(self, config): def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready """ Do any required postprocessing to make the tokenizer data ready
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
self.loader.make_token_analysis()) self.loader.make_token_analysis())
def _install_php(self, phpdir): def _install_php(self, phpdir, overwrite=True):
""" Install the php script for the tokenizer. """ Install the php script for the tokenizer.
""" """
php_file = self.data_dir / "tokenizer.php" php_file = self.data_dir / "tokenizer.php"
php_file.write_text(dedent(f"""\
<?php if not php_file.exists() or overwrite:
@define('CONST_Max_Word_Frequency', 10000000); php_file.write_text(dedent(f"""\
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}"); <?php
@define('CONST_Transliteration', "{self.loader.get_search_rules()}"); @define('CONST_Max_Word_Frequency', 10000000);
require_once('{phpdir}/tokenizer/icu_tokenizer.php');""")) @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
def _save_config(self): def _save_config(self):

View File

@@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
self.normalization = config.TERM_NORMALIZATION self.normalization = config.TERM_NORMALIZATION
self._install_php(config) self._install_php(config, overwrite=True)
with connect(self.dsn) as conn: with connect(self.dsn) as conn:
_check_module(module_dir, conn) _check_module(module_dir, conn)
@@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
self._init_db_tables(config) self._init_db_tables(config)
def init_from_project(self, _): def init_from_project(self, config):
""" Initialise the tokenizer from the project directory. """ Initialise the tokenizer from the project directory.
""" """
with connect(self.dsn) as conn: with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION) self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
if not (config.project_dir / 'module' / 'nominatim.so').exists():
_install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
config.project_dir / 'module')
self._install_php(config, overwrite=False)
def finalize_import(self, config): def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready """ Do any required postprocessing to make the tokenizer data ready
@@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
return LegacyNameAnalyzer(self.dsn, normalizer) return LegacyNameAnalyzer(self.dsn, normalizer)
def _install_php(self, config): def _install_php(self, config, overwrite=True):
""" Install the php script for the tokenizer. """ Install the php script for the tokenizer.
""" """
php_file = self.data_dir / "tokenizer.php" php_file = self.data_dir / "tokenizer.php"
php_file.write_text(dedent("""\
<?php if not php_file.exists() or overwrite:
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY}); php_file.write_text(dedent("""\
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}"); <?php
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php'); @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
""".format(config))) @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
""".format(config)))
def _init_db_tables(self, config): def _init_db_tables(self, config):

View File

@@ -217,7 +217,7 @@ class NominatimEnvironment:
self.db_drop_database(self.api_test_db) self.db_drop_database(self.api_test_db)
raise raise
tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False) tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
def setup_unknown_db(self): def setup_unknown_db(self):

View File

@@ -63,13 +63,13 @@ class TestFactory:
assert tokenizer.init_state == "loaded" assert tokenizer.init_state == "loaded"
def test_load_no_tokenizer_dir(self): def test_load_repopulate_tokenizer_dir(self):
factory.create_tokenizer(self.config) factory.create_tokenizer(self.config)
self.config.project_dir = self.config.project_dir / 'foo' self.config.project_dir = self.config.project_dir
with pytest.raises(UsageError): factory.get_tokenizer_for_db(self.config)
factory.get_tokenizer_for_db(self.config) assert (self.config.project_dir / 'tokenizer').exists()
def test_load_missing_property(self, temp_db_cursor): def test_load_missing_property(self, temp_db_cursor):