mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-09 19:44:07 +00:00
restore the tokenizer directory when missing
Automatically repopulate the tokenizer/ directory with the PHP stub and the postgresql module, when the directory is missing. This allows to switch working directories and in particular run the service from a different maschine then where it was installed. Users still need to make sure that .env files are set up correctly or they will shoot themselves in the foot. See #2515.
This commit is contained in:
@@ -27,6 +27,9 @@ def get_property(conn, name):
|
|||||||
""" Return the current value of the given propery or None if the property
|
""" Return the current value of the given propery or None if the property
|
||||||
is not set.
|
is not set.
|
||||||
"""
|
"""
|
||||||
|
if not conn.table_exists('nominatim_properties'):
|
||||||
|
return None
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
|
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
|
||||||
(name, ))
|
(name, ))
|
||||||
|
|||||||
@@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
|
|||||||
"""
|
"""
|
||||||
basedir = config.project_dir / 'tokenizer'
|
basedir = config.project_dir / 'tokenizer'
|
||||||
if not basedir.is_dir():
|
if not basedir.is_dir():
|
||||||
LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
|
# Directory will be repopulated by tokenizer below.
|
||||||
raise UsageError('Cannot initialize tokenizer.')
|
basedir.mkdir()
|
||||||
|
|
||||||
with connect(config.get_libpq_dsn()) as conn:
|
with connect(config.get_libpq_dsn()) as conn:
|
||||||
name = properties.get_property(conn, 'tokenizer')
|
name = properties.get_property(conn, 'tokenizer')
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
|||||||
"""
|
"""
|
||||||
self.loader = ICURuleLoader(config)
|
self.loader = ICURuleLoader(config)
|
||||||
|
|
||||||
self._install_php(config.lib_dir.php)
|
self._install_php(config.lib_dir.php, overwrite=True)
|
||||||
self._save_config()
|
self._save_config()
|
||||||
|
|
||||||
if init_db:
|
if init_db:
|
||||||
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
|||||||
with connect(self.dsn) as conn:
|
with connect(self.dsn) as conn:
|
||||||
self.loader.load_config_from_db(conn)
|
self.loader.load_config_from_db(conn)
|
||||||
|
|
||||||
|
self._install_php(config.lib_dir.php, overwrite=False)
|
||||||
|
|
||||||
|
|
||||||
def finalize_import(self, config):
|
def finalize_import(self, config):
|
||||||
""" Do any required postprocessing to make the tokenizer data ready
|
""" Do any required postprocessing to make the tokenizer data ready
|
||||||
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
|||||||
self.loader.make_token_analysis())
|
self.loader.make_token_analysis())
|
||||||
|
|
||||||
|
|
||||||
def _install_php(self, phpdir):
|
def _install_php(self, phpdir, overwrite=True):
|
||||||
""" Install the php script for the tokenizer.
|
""" Install the php script for the tokenizer.
|
||||||
"""
|
"""
|
||||||
php_file = self.data_dir / "tokenizer.php"
|
php_file = self.data_dir / "tokenizer.php"
|
||||||
php_file.write_text(dedent(f"""\
|
|
||||||
<?php
|
if not php_file.exists() or overwrite:
|
||||||
@define('CONST_Max_Word_Frequency', 10000000);
|
php_file.write_text(dedent(f"""\
|
||||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
<?php
|
||||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
@define('CONST_Max_Word_Frequency', 10000000);
|
||||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
|
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||||
|
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||||
|
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
|
||||||
|
|
||||||
|
|
||||||
def _save_config(self):
|
def _save_config(self):
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
|
|||||||
|
|
||||||
self.normalization = config.TERM_NORMALIZATION
|
self.normalization = config.TERM_NORMALIZATION
|
||||||
|
|
||||||
self._install_php(config)
|
self._install_php(config, overwrite=True)
|
||||||
|
|
||||||
with connect(self.dsn) as conn:
|
with connect(self.dsn) as conn:
|
||||||
_check_module(module_dir, conn)
|
_check_module(module_dir, conn)
|
||||||
@@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
|
|||||||
self._init_db_tables(config)
|
self._init_db_tables(config)
|
||||||
|
|
||||||
|
|
||||||
def init_from_project(self, _):
|
def init_from_project(self, config):
|
||||||
""" Initialise the tokenizer from the project directory.
|
""" Initialise the tokenizer from the project directory.
|
||||||
"""
|
"""
|
||||||
with connect(self.dsn) as conn:
|
with connect(self.dsn) as conn:
|
||||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||||
|
|
||||||
|
if not (config.project_dir / 'module' / 'nominatim.so').exists():
|
||||||
|
_install_module(config.DATABASE_MODULE_PATH,
|
||||||
|
config.lib_dir.module,
|
||||||
|
config.project_dir / 'module')
|
||||||
|
|
||||||
|
self._install_php(config, overwrite=False)
|
||||||
|
|
||||||
def finalize_import(self, config):
|
def finalize_import(self, config):
|
||||||
""" Do any required postprocessing to make the tokenizer data ready
|
""" Do any required postprocessing to make the tokenizer data ready
|
||||||
@@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
|
|||||||
return LegacyNameAnalyzer(self.dsn, normalizer)
|
return LegacyNameAnalyzer(self.dsn, normalizer)
|
||||||
|
|
||||||
|
|
||||||
def _install_php(self, config):
|
def _install_php(self, config, overwrite=True):
|
||||||
""" Install the php script for the tokenizer.
|
""" Install the php script for the tokenizer.
|
||||||
"""
|
"""
|
||||||
php_file = self.data_dir / "tokenizer.php"
|
php_file = self.data_dir / "tokenizer.php"
|
||||||
php_file.write_text(dedent("""\
|
|
||||||
<?php
|
if not php_file.exists() or overwrite:
|
||||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
php_file.write_text(dedent("""\
|
||||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
<?php
|
||||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||||
""".format(config)))
|
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||||
|
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||||
|
""".format(config)))
|
||||||
|
|
||||||
|
|
||||||
def _init_db_tables(self, config):
|
def _init_db_tables(self, config):
|
||||||
|
|||||||
@@ -217,7 +217,7 @@ class NominatimEnvironment:
|
|||||||
self.db_drop_database(self.api_test_db)
|
self.db_drop_database(self.api_test_db)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
|
tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
|
||||||
|
|
||||||
|
|
||||||
def setup_unknown_db(self):
|
def setup_unknown_db(self):
|
||||||
|
|||||||
@@ -63,13 +63,13 @@ class TestFactory:
|
|||||||
assert tokenizer.init_state == "loaded"
|
assert tokenizer.init_state == "loaded"
|
||||||
|
|
||||||
|
|
||||||
def test_load_no_tokenizer_dir(self):
|
def test_load_repopulate_tokenizer_dir(self):
|
||||||
factory.create_tokenizer(self.config)
|
factory.create_tokenizer(self.config)
|
||||||
|
|
||||||
self.config.project_dir = self.config.project_dir / 'foo'
|
self.config.project_dir = self.config.project_dir
|
||||||
|
|
||||||
with pytest.raises(UsageError):
|
factory.get_tokenizer_for_db(self.config)
|
||||||
factory.get_tokenizer_for_db(self.config)
|
assert (self.config.project_dir / 'tokenizer').exists()
|
||||||
|
|
||||||
|
|
||||||
def test_load_missing_property(self, temp_db_cursor):
|
def test_load_missing_property(self, temp_db_cursor):
|
||||||
|
|||||||
Reference in New Issue
Block a user