Added --no-replace command for special phrases importation and added corresponding tests

2026-03-07 10:34:08 +00:00 · 2021-05-17 12:40:50 +02:00
parent 06aab389ed
commit 8b8dfc46eb
11 changed files with 76 additions and 31 deletions
--- a/nominatim/clicmd/special_phrases.py
+++ b/nominatim/clicmd/special_phrases.py
@@ -27,6 +27,8 @@ class ImportSpecialPhrases:
                           help='Import special phrases from the OSM wiki to the database.')
        group.add_argument('--import-from-csv', metavar='FILE',
                           help='Import special phrases from a CSV file.')
        group.add_argument('--no-replace', action='store_true',
                           help='Keep the old phrases and only add the new ones.')
    @staticmethod
    def run(args):
@@ -51,7 +53,8 @@ class ImportSpecialPhrases:
        from ..tokenizer import factory as tokenizer_factory
        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
        should_replace = not args.no_replace
        with connect(args.config.get_libpq_dsn()) as db_connection:
            SPImporter(
                args.config, args.phplib_dir, db_connection, loader
-            ).import_phrases(tokenizer)
+            ).import_phrases(tokenizer, should_replace)
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@ -306,7 +306,7 @@ class LegacyICUNameAnalyzer:
            #                WHERE word_id is null and type = 'postcode'""")
-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
        """ Replace the search index for special phrases with the new phrases.
        """
        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@@ -345,7 +345,7 @@ class LegacyICUNameAnalyzer:
                              columns=['word', 'word_token', 'class', 'type',
                                       'operator', 'search_name_count'])
-            if to_delete:
+            if to_delete and should_replace:
                psycopg2.extras.execute_values(
                    cur,
                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -314,7 +314,7 @@ class LegacyNameAnalyzer:
                                 FROM location_postcode) x""")
-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
        """ Replace the search index for special phrases with the new phrases.
        """
        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@@ -343,7 +343,7 @@ class LegacyNameAnalyzer:
                           FROM (VALUES %s) as v(name, class, type, op))""",
                    to_add)
-            if to_delete:
+            if to_delete and should_replace:
                psycopg2.extras.execute_values(
                    cur,
                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -23,7 +23,7 @@ LOG = logging.getLogger()
 class SPImporter():
    # pylint: disable-msg=too-many-instance-attributes
    """
-        Class handling the process of special phrases importations into the database.
+        Class handling the process of special phrases importation into the database.
        Take a sp loader which load the phrases from an external source.
    """
@@ -42,10 +42,14 @@ class SPImporter():
        #special phrases class/type on the wiki.
        self.table_phrases_to_delete = set()
-    def import_phrases(self, tokenizer):
+    def import_phrases(self, tokenizer, should_replace):
        """
-            Iterate through all specified languages and
+            Iterate through all SpecialPhrases extracted from the
-            extract corresponding special phrases from the wiki.
+            loader and import them into the database.
            If should_replace is set to True only the loaded phrases
            will be kept into the database. All other phrases already
            in the database will be removed.
        """
        LOG.warning('Special phrases importation starting')
        self._fetch_existing_place_classtype_tables()
@@ -60,11 +64,12 @@ class SPImporter():
                    class_type_pairs.update(result)
        self._create_place_classtype_table_and_indexes(class_type_pairs)
-        self._remove_non_existent_tables_from_db()
+        if should_replace:
            self._remove_non_existent_tables_from_db()
        self.db_connection.commit()
        with tokenizer.name_analyzer() as analyzer:
-            analyzer.update_special_phrases(self.word_phrases)
+            analyzer.update_special_phrases(self.word_phrases, should_replace)
        LOG.warning('Import done.')
        self.statistics_handler.notify_import_done()
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@@ -54,7 +54,7 @@ class DummyNameAnalyzer:
    def add_postcodes_from_db(self):
        pass
-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
        self.analyser_cache['special_phrases'] = phrases
    def add_country_names(self, code, names):
--- a/test/python/test_cli.py
+++ b/test/python/test_cli.py
@@ -255,18 +255,27 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
    assert bnd_mock.called == do_bnds
    assert rank_mock.called == do_ranks
-def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock):
+@pytest.mark.parametrize("no_replace", [(True), (False)])
 def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
    func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
-    call_nominatim('special-phrases', '--import-from-wiki')
+    if no_replace:
        call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
    else:
        call_nominatim('special-phrases', '--import-from-wiki')
    assert func.called == 1
-def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock):
+@pytest.mark.parametrize("no_replace", [(True), (False)])
 def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
    func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
-    testdata = Path('__file__') / '..' / '..' / 'testdb'
+    testdata = SRC_DIR / 'test' / 'testdb'
    csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
-    call_nominatim('special-phrases', '--import-from-csv', csv_path)
+
    if no_replace:
        call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
    else:
        call_nominatim('special-phrases', '--import-from-csv', csv_path)
    assert func.called == 1
--- a/test/python/test_tokenizer_legacy.py
+++ b/test/python/test_tokenizer_legacy.py
@@ -209,7 +209,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
        ("König bei", "amenity", "royal", "near"),
        ("Könige", "amenity", "royal", "-"),
        ("strasse", "highway", "primary", "in")
-    ])
+    ], True)
    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
@@ -226,11 +226,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
-    analyzer.update_special_phrases([])
+    analyzer.update_special_phrases([], True)
    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
                                          make_standard_name):
    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
                              VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
                                     (' bar', 'bar', 'highway', 'road', null)""")
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
    analyzer.update_special_phrases([], False)
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
                                      make_standard_name):
    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
@@ -243,7 +256,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
      ('prison', 'amenity', 'prison', 'in'),
      ('bar', 'highway', 'road', '-'),
      ('garden', 'leisure', 'garden', 'near')
-    ])
+    ], True)
    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
--- a/test/python/test_tokenizer_legacy_icu.py
+++ b/test/python/test_tokenizer_legacy_icu.py
@@ -159,7 +159,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor)
            ("König bei", "amenity", "royal", "near"),
            ("Könige", "amenity", "royal", "-"),
            ("street", "highway", "primary", "in")
-        ])
+        ], True)
    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
@@ -176,11 +176,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
    with analyzer() as a:
-        a.update_special_phrases([])
+        a.update_special_phrases([], True)
    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
                                     (' BAR', 'bar', 'highway', 'road', null)""")
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
    with analyzer() as a:
        a.update_special_phrases([], False)
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
@@ -193,7 +206,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
          ('prison', 'amenity', 'prison', 'in'),
          ('bar', 'highway', 'road', '-'),
          ('garden', 'leisure', 'garden', 'near')
-        ])
+        ], True)
    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
--- a/test/python/test_tools_import_special_phrases.py
+++ b/test/python/test_tools_import_special_phrases.py
@@ -185,8 +185,9 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
            tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
        )
@pytest.mark.parametrize("should_replace", [(True), (False)])
 def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
-                          placex_table, tokenizer_mock):
+                        placex_table, tokenizer_mock, should_replace):
    """
        Check that the main import_phrases() method is well executed.
        It should create the place_classtype table, the place_id and centroid indexes,
@@ -202,10 +203,10 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
            CREATE TABLE place_classtype_wrongclass_wrongtype();""")
    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
-                    mock_get_wiki_content)
+                        mock_get_wiki_content)
    tokenizer = tokenizer_mock()
-    sp_importer.import_phrases(tokenizer)
+    sp_importer.import_phrases(tokenizer, should_replace)
    assert len(tokenizer.analyser_cache['special_phrases']) == 18
@@ -216,7 +217,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
    assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
    assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
    assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
-    assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
+    if should_replace:
        assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
    #Format (query, should_return_something_bool) use to easily execute all asserts
    queries_tests = set()
@@ -237,7 +239,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
        WHERE table_schema='public'
        AND table_name = 'place_classtype_wrongclass_wrongtype';
    """
-    queries_tests.add((query_wrong_table, False))
+    if should_replace:
        queries_tests.add((query_wrong_table, False))
    with temp_db_conn.cursor() as temp_db_cursor:
        for query in queries_tests:
@@ -247,7 +250,7 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
            else:
                assert not temp_db_cursor.fetchone()
-def mock_get_wiki_content(lang):
+def mock_get_wiki_content(self, lang):
    """
        Mock the _get_wiki_content() method to return
        static xml test file content.
--- a/test/python/test_tools_sp_csv_loader.py
+++ b/test/python/test_tools_sp_csv_loader.py
@@ -16,7 +16,6 @@ def test_parse_csv(sp_csv_loader):
    phrases = sp_csv_loader.parse_csv()
    assert check_phrases_content(phrases)
 def test_next(sp_csv_loader):
    """
        Test objects returned from the next() method.
--- a/test/python/test_tools_sp_wiki_loader.py
+++ b/test/python/test_tools_sp_wiki_loader.py
@@ -47,7 +47,7 @@ def sp_wiki_loader(monkeypatch, def_config):
                        mock_get_wiki_content)
    return loader
-def mock_get_wiki_content(lang):
+def mock_get_wiki_content(self, lang):
    """
        Mock the _get_wiki_content() method to return
        static xml test file content.