move default country name creation to tokenizer

The new function is also used, when a country us updated. All SQL function related to country names have been removed.
2021-04-27 11:37:18 +02:00
parent dc700c25b6
commit bef300305e
8 changed files with 105 additions and 134 deletions
--- a/test/python/conftest.py
+++ b/test/python/conftest.py
@@ -121,9 +121,8 @@ def table_factory(temp_db_cursor):
    def mk_table(name, definition='id INT', content=None):
        temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
        if content is not None:
-            if not isinstance(content, str):
-                content = '),('.join([str(x) for x in content])
-            temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
+            psycopg2.extras.execute_values(
+                temp_db_cursor, "INSERT INTO {} VALUES %s".format(name), content)

    return mk_table

@@ -290,7 +289,7 @@ def osm2pgsql_options(temp_db):

@pytest.fixture
 def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
-    table_factory('country_name', 'partition INT', (0, 1, 2))
+    table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
    cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
    cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
                    sql=tmp_path, data=SRC_DIR / 'data')
@@ -299,9 +298,10 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):


@pytest.fixture
-def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn, tmp_path):
    """ Sets up the configuration so that the test dummy tokenizer will be
-        loaded.
+        loaded when the tokenizer factory is used. Also returns a factory
+        with which a new dummy tokenizer may be created.
    """
    monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')

@@ -310,3 +310,8 @@ def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):

    monkeypatch.setattr(importlib, "import_module", _import_dummy)
    properties.set_property(temp_db_conn, 'tokenizer', 'dummy')
+
+    def _create_tokenizer():
+        return dummy_tokenizer.DummyTokenizer(None, None)
+
+    return _create_tokenizer
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@@ -13,6 +13,7 @@ class DummyTokenizer:
        self.dsn = dsn
        self.data_dir = data_dir
        self.init_state = None
+        self.analyser_cache = {}


    def init_new_db(self, config):
@@ -26,7 +27,7 @@ class DummyTokenizer:


    def name_analyzer(self):
-        return DummyNameAnalyzer()
+        return DummyNameAnalyzer(self.analyser_cache)


 class DummyNameAnalyzer:
@@ -38,18 +39,20 @@ class DummyNameAnalyzer:
        self.close()


+    def __init__(self, cache):
+        self.analyser_cache = cache
+        cache['countries'] = []
+
+
    def close(self):
-        """ Free all resources used by the analyzer.
-        """
        pass

    def add_postcodes_from_db(self):
        pass

-    def process_place(self, place):
-        """ Determine tokenizer information about the given place.

-            Returns a JSON-serialisable structure that will be handed into
-            the database via the token_info field.
-        """
+    def add_country_names(self, code, names):
+        self.analyser_cache['countries'].append((code, names))
+
+    def process_place(self, place):
        return {}
--- a/test/python/test_tools_database_import.py
+++ b/test/python/test_tools_database_import.py
@@ -143,7 +143,8 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
              'location_property_tiger', 'location_property_osmline',
              'location_postcode', 'search_name', 'location_road_23')
    for table in tables:
-        table_factory(table, content=(1, 2, 3))
+        table_factory(table, content=((1, ), (2, ), (3, )))
+        assert temp_db_cursor.table_rows(table) == 3

    database_import.truncate_data_tables(temp_db_conn)

@@ -168,31 +169,28 @@ def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_ta
    assert temp_db_cursor.table_rows('placex') == 30
    assert temp_db_cursor.table_rows('location_property_osmline') == 1

-@pytest.mark.parametrize("languages", (False, True))
-def test_create_country_names(temp_db_conn, temp_db_cursor, def_config,
-                              temp_db_with_extensions, monkeypatch, languages):
+
+@pytest.mark.parametrize("languages", (None, ' fr,en'))
+def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
+                              table_factory, tokenizer_mock, languages):
+
+    table_factory('country_name', 'country_code varchar(2), name hstore',
+                  content=(('us', '"name"=>"us1","name:af"=>"us2"'),
+                           ('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
+
+    assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
+
+    tokenizer = tokenizer_mock()
+
+    database_import.create_country_names(temp_db_conn, tokenizer, languages)
+
+    assert len(tokenizer.analyser_cache['countries']) == 2
+
+    result_set = {k: set(v) for k, v in tokenizer.analyser_cache['countries']}
+
    if languages:
-        monkeypatch.setenv('NOMINATIM_LANGUAGES', 'fr,en')
-    temp_db_cursor.execute("""CREATE FUNCTION make_standard_name (name TEXT)
-                                  RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
-                               """)
-    temp_db_cursor.execute('CREATE TABLE country_name (country_code varchar(2), name hstore)')
-    temp_db_cursor.execute('CREATE TABLE word (code varchar(2))')
-    temp_db_cursor.execute("""INSERT INTO country_name VALUES ('us',
-                              '"name"=>"us","name:af"=>"us"')""")
-    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
-                            lookup_country_code varchar(2))
-                            RETURNS INTEGER
-                            AS $$
-                            BEGIN
-                                INSERT INTO word VALUES (lookup_country_code);
-                                RETURN 5;
-                            END;
-                            $$
-                            LANGUAGE plpgsql;
-                               """)
-    database_import.create_country_names(temp_db_conn, def_config)
-    if languages:
-        assert temp_db_cursor.table_rows('word') == 4
+        assert result_set == {'us' : set(('us', 'us1', 'United States')),
+                              'fr' : set(('fr', 'Fra', 'Fren'))}
    else:
-        assert temp_db_cursor.table_rows('word') == 5
+        assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
+                              'fr' : set(('fr', 'Fra', 'Fren'))}
--- a/test/python/test_tools_refresh_create_functions.py
+++ b/test/python/test_tools_refresh_create_functions.py
@@ -11,9 +11,7 @@ def sql_tmp_path(tmp_path, def_config):
    return tmp_path

@pytest.fixture
-def conn(temp_db_conn, table_factory, monkeypatch):
-    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
-    table_factory('country_name', 'partition INT', (0, 1, 2))
+def conn(sql_preprocessor, temp_db_conn):
    return temp_db_conn