mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
move index creation for word table to tokenizer
This introduces a finalization routing for the tokenizer where it can post-process the import if necessary.
This commit is contained in:
@@ -1,9 +1,6 @@
|
|||||||
-- Indices used only during search and update.
|
-- Indices used only during search and update.
|
||||||
-- These indices are created only after the indexing process is done.
|
-- These indices are created only after the indexing process is done.
|
||||||
|
|
||||||
CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
|
|
||||||
ON word USING BTREE (word_id) {{db.tablespace.search_index}};
|
|
||||||
|
|
||||||
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
|
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
|
||||||
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
|
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
|
||||||
|
|
||||||
|
|||||||
2
lib-sql/tokenizer/legacy_tokenizer_indices.sql
Normal file
2
lib-sql/tokenizer/legacy_tokenizer_indices.sql
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
|
||||||
|
ON word USING BTREE (word_id) {{db.tablespace.search_index}};
|
||||||
@@ -135,6 +135,7 @@ class SetupAll:
|
|||||||
LOG.warning('Create search index for default country names.')
|
LOG.warning('Create search index for default country names.')
|
||||||
database_import.create_country_names(conn, tokenizer,
|
database_import.create_country_names(conn, tokenizer,
|
||||||
args.config.LANGUAGES)
|
args.config.LANGUAGES)
|
||||||
|
tokenizer.finalize_import(args.config)
|
||||||
|
|
||||||
webdir = args.project_dir / 'website'
|
webdir = args.project_dir / 'website'
|
||||||
LOG.warning('Setup website at %s', webdir)
|
LOG.warning('Setup website at %s', webdir)
|
||||||
|
|||||||
@@ -119,6 +119,15 @@ class LegacyTokenizer:
|
|||||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||||
|
|
||||||
|
|
||||||
|
def finalize_import(self, config):
|
||||||
|
""" Do any required postprocessing to make the tokenizer data ready
|
||||||
|
for use.
|
||||||
|
"""
|
||||||
|
with connect(self.dsn) as conn:
|
||||||
|
sqlp = SQLPreprocessor(conn, config)
|
||||||
|
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
|
||||||
|
|
||||||
|
|
||||||
def update_sql_functions(self, config):
|
def update_sql_functions(self, config):
|
||||||
""" Reimport the SQL functions for this tokenizer.
|
""" Reimport the SQL functions for this tokenizer.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ class DummyTokenizer:
|
|||||||
self.init_state = "loaded"
|
self.init_state = "loaded"
|
||||||
|
|
||||||
|
|
||||||
|
def finalize_import(self, _):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def name_analyzer(self):
|
def name_analyzer(self):
|
||||||
return DummyNameAnalyzer(self.analyser_cache)
|
return DummyNameAnalyzer(self.analyser_cache)
|
||||||
|
|
||||||
|
|||||||
@@ -62,13 +62,19 @@ def tokenizer_mock(monkeypatch):
|
|||||||
class DummyTokenizer:
|
class DummyTokenizer:
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.update_sql_functions_called = False
|
self.update_sql_functions_called = False
|
||||||
|
self.finalize_import_called = False
|
||||||
|
|
||||||
def update_sql_functions(self, *args):
|
def update_sql_functions(self, *args):
|
||||||
self.update_sql_functions_called = True
|
self.update_sql_functions_called = True
|
||||||
|
|
||||||
|
def finalize_import(self, *args):
|
||||||
|
self.finalize_import_called = True
|
||||||
|
|
||||||
tok = DummyTokenizer()
|
tok = DummyTokenizer()
|
||||||
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
|
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
|
||||||
lambda *args: tok)
|
lambda *args: tok)
|
||||||
|
monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
|
||||||
|
lambda *args: tok)
|
||||||
|
|
||||||
return tok
|
return tok
|
||||||
|
|
||||||
@@ -101,7 +107,7 @@ def test_import_bad_file(temp_db):
|
|||||||
assert 1 == call_nominatim('import', '--osm-file', '.')
|
assert 1 == call_nominatim('import', '--osm-file', '.')
|
||||||
|
|
||||||
|
|
||||||
def test_import_full(temp_db, mock_func_factory):
|
def test_import_full(temp_db, mock_func_factory, tokenizer_mock):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
|
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
|
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
|
||||||
@@ -113,7 +119,6 @@ def test_import_full(temp_db, mock_func_factory):
|
|||||||
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
|
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.tokenizer.factory, 'create_tokenizer'),
|
|
||||||
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
|
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
|
||||||
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
|
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
|
||||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||||
@@ -124,6 +129,7 @@ def test_import_full(temp_db, mock_func_factory):
|
|||||||
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
||||||
|
|
||||||
assert 0 == call_nominatim('import', '--osm-file', __file__)
|
assert 0 == call_nominatim('import', '--osm-file', __file__)
|
||||||
|
assert tokenizer_mock.finalize_import_called
|
||||||
|
|
||||||
assert cf_mock.called > 1
|
assert cf_mock.called > 1
|
||||||
|
|
||||||
@@ -131,13 +137,12 @@ def test_import_full(temp_db, mock_func_factory):
|
|||||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||||
|
|
||||||
|
|
||||||
def test_import_continue_load_data(temp_db, mock_func_factory):
|
def test_import_continue_load_data(temp_db, mock_func_factory, tokenizer_mock):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'load_data'),
|
mock_func_factory(nominatim.tools.database_import, 'load_data'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.tokenizer.factory, 'create_tokenizer'),
|
|
||||||
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
|
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
|
||||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||||
@@ -145,17 +150,18 @@ def test_import_continue_load_data(temp_db, mock_func_factory):
|
|||||||
]
|
]
|
||||||
|
|
||||||
assert 0 == call_nominatim('import', '--continue', 'load-data')
|
assert 0 == call_nominatim('import', '--continue', 'load-data')
|
||||||
|
assert tokenizer_mock.finalize_import_called
|
||||||
|
|
||||||
for mock in mocks:
|
for mock in mocks:
|
||||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||||
|
|
||||||
|
|
||||||
def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
|
def test_import_continue_indexing(temp_db, mock_func_factory, placex_table,
|
||||||
|
temp_db_conn, tokenizer_mock):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||||
mock_func_factory(nominatim.tokenizer.factory, 'get_tokenizer_for_db'),
|
|
||||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||||
mock_func_factory(nominatim.db.properties, 'set_property')
|
mock_func_factory(nominatim.db.properties, 'set_property')
|
||||||
]
|
]
|
||||||
@@ -172,17 +178,18 @@ def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp
|
|||||||
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
||||||
|
|
||||||
|
|
||||||
def test_import_continue_postprocess(temp_db, mock_func_factory):
|
def test_import_continue_postprocess(temp_db, mock_func_factory, tokenizer_mock):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||||
mock_func_factory(nominatim.tokenizer.factory, 'get_tokenizer_for_db'),
|
|
||||||
mock_func_factory(nominatim.db.properties, 'set_property')
|
mock_func_factory(nominatim.db.properties, 'set_property')
|
||||||
]
|
]
|
||||||
|
|
||||||
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
|
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
|
||||||
|
|
||||||
|
assert tokenizer_mock.finalize_import_called
|
||||||
|
|
||||||
for mock in mocks:
|
for mock in mocks:
|
||||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user