From 61d813bfef810fbc6f5a0ecd2c5d43aaa8bf5823 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Sun, 29 May 2022 13:53:50 +0200
Subject: [PATCH 1/6] add get_str_list() for config

Converts a config value written as a comma-sparated list into
a Python list of strings.
---
 nominatim/clicmd/setup.py              |  2 +-
 nominatim/config.py                    | 11 +++++++++++
 nominatim/tools/country_info.py        |  3 ---
 test/python/config/test_config.py      | 17 +++++++++++++++++
 test/python/tools/test_country_info.py |  2 +-
 5 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py
index 7b5f3797..f0ec358b 100644
--- a/nominatim/clicmd/setup.py
+++ b/nominatim/clicmd/setup.py
@@ -128,7 +128,7 @@ class SetupAll:
                                                   drop=args.no_updates)
             LOG.warning('Create search index for default country names.')
             country_info.create_country_names(conn, tokenizer,
-                                              args.config.LANGUAGES)
+                                              args.config.get_str_list('LANGUAGES'))
             if args.no_updates:
                 freeze.drop_update_tables(conn)
         tokenizer.finalize_import(args.config)
diff --git a/nominatim/config.py b/nominatim/config.py
index ef261079..700af328 100644
--- a/nominatim/config.py
+++ b/nominatim/config.py
@@ -99,6 +99,17 @@ class Configuration:
             raise UsageError("Configuration error.") from exp
 
 
+    def get_str_list(self, name):
+        """ Return the given configuration parameter as a list of strings.
+            The values are assumed to be given as a comma-sparated list and
+            will be stripped before returning them. On empty values None
+            is returned.
+        """
+        raw = self.__getattr__(name)
+
+        return [v.strip() for v in raw.split(',')] if raw else None
+
+
     def get_path(self, name):
         """ Return the given configuration parameter as a Path.
             If a relative path is configured, then the function converts this
diff --git a/nominatim/tools/country_info.py b/nominatim/tools/country_info.py
index ed04c2d5..0ad00171 100644
--- a/nominatim/tools/country_info.py
+++ b/nominatim/tools/country_info.py
@@ -131,9 +131,6 @@ def create_country_names(conn, tokenizer, languages=None):
         empty then only name translations for the given languages are added
         to the index.
     """
-    if languages:
-        languages = languages.split(',')
-
     def _include_key(key):
         return ':' not in key or not languages or \
                key[key.index(':') + 1:] in languages
diff --git a/test/python/config/test_config.py b/test/python/config/test_config.py
index 9f9ca880..a9cbb48d 100644
--- a/test/python/config/test_config.py
+++ b/test/python/config/test_config.py
@@ -173,6 +173,23 @@ def test_get_int_empty(make_config):
         config.get_int('DATABASE_MODULE_PATH')
 
 
+@pytest.mark.parametrize("value,outlist", [('sd', ['sd']),
+                                           ('dd,rr', ['dd', 'rr']),
+                                           (' a , b ', ['a', 'b'])])
+def test_get_str_list_success(make_config, monkeypatch, value, outlist):
+    config = make_config()
+
+    monkeypatch.setenv('NOMINATIM_MYLIST', value)
+
+    assert config.get_str_list('MYLIST') == outlist
+
+
+def test_get_str_list_empty(make_config):
+    config = make_config()
+
+    assert config.get_str_list('LANGUAGES') is None
+
+
 def test_get_path_empty(make_config):
     config = make_config()
 
diff --git a/test/python/tools/test_country_info.py b/test/python/tools/test_country_info.py
index 3c20b3e0..3f00d54e 100644
--- a/test/python/tools/test_country_info.py
+++ b/test/python/tools/test_country_info.py
@@ -49,7 +49,7 @@ def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cur
     assert temp_db_cursor.table_rows('country_osm_grid') > 100
 
 
-@pytest.mark.parametrize("languages", (None, ' fr,en'))
+@pytest.mark.parametrize("languages", (None, ['fr', 'en']))
 def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
                               table_factory, tokenizer_mock, languages, loaded_country):
 

From 042e31458917e83713cc60f73a0dc4c171db78b1 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Sun, 29 May 2022 14:06:05 +0200
Subject: [PATCH 2/6] remove the language parameter in the SPWikiLoader

Languages must always be configured through config or environment.
Also use monkeypatched environment in tests.
---
 nominatim/tools/special_phrases/sp_wiki_loader.py | 10 ++++++----
 test/python/tools/test_import_special_phrases.py  |  5 +++--
 test/python/tools/test_sp_wiki_loader.py          |  3 ++-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py
index 2f698092..b5f8db83 100644
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@@ -18,14 +18,14 @@ class SPWikiLoader(Iterator):
     """
         Handles loading of special phrases from the wiki.
     """
-    def __init__(self, config, languages=None):
+    def __init__(self, config):
         super().__init__()
         self.config = config
         # Compile the regex here to increase performances.
         self.occurence_pattern = re.compile(
             r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
         )
-        self.languages = self._load_languages() if not languages else list(languages)
+        self._load_languages()
 
     def __next__(self):
         if not self.languages:
@@ -56,12 +56,14 @@ class SPWikiLoader(Iterator):
             or default if there is no languages configured.
             The system will extract special phrases only from all specified languages.
         """
-        default_languages = [
+        if self.config.LANGUAGES:
+            self.languages = self.config.get_str_list('LANGUAGES')
+        else:
+            self.languages = [
             'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
             'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
             'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
             'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
-        return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
 
     @staticmethod
     def _get_wiki_content(lang):
diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py
index 41017694..57664586 100644
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -23,11 +23,12 @@ def testfile_dir(src_dir):
 
 
 @pytest.fixture
-def sp_importer(temp_db_conn, def_config):
+def sp_importer(temp_db_conn, def_config, monkeypatch):
     """
         Return an instance of SPImporter.
     """
-    loader = SPWikiLoader(def_config, ['en'])
+    monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
+    loader = SPWikiLoader(def_config)
     return SPImporter(def_config, temp_db_conn, loader)
 
 
diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py
index bfe93c57..0a64cd56 100644
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@@ -24,7 +24,8 @@ def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
     """
         Return an instance of SPWikiLoader.
     """
-    loader = SPWikiLoader(def_config, ['en'])
+    monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
+    loader = SPWikiLoader(def_config)
     monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
                         lambda self, lang: xml_wiki_content)
     return loader

From cce0e5ea38fe3466e157651e789554d99fbdc8fe Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 30 May 2022 14:12:46 +0200
Subject: [PATCH 3/6] convert special phrase loaders to generators

Generators simplify the code quite a bit compared to the previous
Iterator approach.
---
 .../tools/special_phrases/sp_csv_loader.py    | 26 ++-----
 .../tools/special_phrases/sp_importer.py      |  9 +--
 .../tools/special_phrases/sp_wiki_loader.py   | 61 +++++++--------
 .../tools/test_import_special_phrases.py      |  4 +-
 test/python/tools/test_sp_csv_loader.py       | 76 ++++++++-----------
 test/python/tools/test_sp_wiki_loader.py      | 17 +----
 6 files changed, 76 insertions(+), 117 deletions(-)

diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py
index 55a9d8d0..0bd93c00 100644
--- a/nominatim/tools/special_phrases/sp_csv_loader.py
+++ b/nominatim/tools/special_phrases/sp_csv_loader.py
@@ -11,43 +11,31 @@
 """
 import csv
 import os
-from collections.abc import Iterator
 from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
 from nominatim.errors import UsageError
 
-class SPCsvLoader(Iterator):
+class SPCsvLoader:
     """
         Handles loading of special phrases from external csv file.
     """
     def __init__(self, csv_path):
         super().__init__()
         self.csv_path = csv_path
-        self.has_been_read = False
 
-    def __next__(self):
-        if self.has_been_read:
-            raise StopIteration()
 
-        self.has_been_read = True
-        self.check_csv_validity()
-        return self.parse_csv()
-
-    def parse_csv(self):
-        """
-            Open and parse the given csv file.
+    def generate_phrases(self):
+        """ Open and parse the given csv file.
             Create the corresponding SpecialPhrases.
         """
-        phrases = set()
+        self._check_csv_validity()
 
         with open(self.csv_path, encoding='utf-8') as fd:
             reader = csv.DictReader(fd, delimiter=',')
             for row in reader:
-                phrases.add(
-                    SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
-                )
-        return phrases
+                yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
 
-    def check_csv_validity(self):
+
+    def _check_csv_validity(self):
         """
             Check that the csv file has the right extension.
         """
diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py
index 8137142b..31bbc355 100644
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -62,11 +62,10 @@ class SPImporter():
         # Store pairs of class/type for further processing
         class_type_pairs = set()
 
-        for loaded_phrases in self.sp_loader:
-            for phrase in loaded_phrases:
-                result = self._process_phrase(phrase)
-                if result:
-                    class_type_pairs.add(result)
+        for phrase in self.sp_loader.generate_phrases():
+            result = self._process_phrase(phrase)
+            if result:
+                class_type_pairs.add(result)
 
         self._create_place_classtype_table_and_indexes(class_type_pairs)
         if should_replace:
diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py
index b5f8db83..6093fa45 100644
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@@ -9,12 +9,24 @@
 """
 import re
 import logging
-from collections.abc import Iterator
 from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
 from nominatim.tools.exec_utils import get_url
 
 LOG = logging.getLogger()
-class SPWikiLoader(Iterator):
+
+def _get_wiki_content(lang):
+    """
+        Request and return the wiki page's content
+        corresponding to special phrases for a given lang.
+        Requested URL Example :
+            https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+    """
+    url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+          + lang.upper()
+    return get_url(url)
+
+
+class SPWikiLoader:
     """
         Handles loading of special phrases from the wiki.
     """
@@ -27,28 +39,21 @@ class SPWikiLoader(Iterator):
         )
         self._load_languages()
 
-    def __next__(self):
-        if not self.languages:
-            raise StopIteration
 
-        lang = self.languages.pop(0)
-        loaded_xml = self._get_wiki_content(lang)
-        LOG.warning('Importing phrases for lang: %s...', lang)
-        return self.parse_xml(loaded_xml)
+    def generate_phrases(self):
+        """ Download the wiki pages for the configured languages
+            and extract the phrases from the page.
+        """
+        for lang in self.languages:
+            LOG.warning('Importing phrases for lang: %s...', lang)
+            loaded_xml = _get_wiki_content(lang)
+
+            # One match will be of format [label, class, type, operator, plural]
+            matches = self.occurence_pattern.findall(loaded_xml)
+
+            for match in matches:
+                yield SpecialPhrase(match[0], match[1], match[2], match[3])
 
-    def parse_xml(self, xml):
-        """
-            Parses XML content and extracts special phrases from it.
-            Return a list of SpecialPhrase.
-        """
-        # One match will be of format [label, class, type, operator, plural]
-        matches = self.occurence_pattern.findall(xml)
-        returned_phrases = set()
-        for match in matches:
-            returned_phrases.add(
-                SpecialPhrase(match[0], match[1], match[2], match[3])
-            )
-        return returned_phrases
 
     def _load_languages(self):
         """
@@ -64,15 +69,3 @@ class SPWikiLoader(Iterator):
             'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
             'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
             'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
-
-    @staticmethod
-    def _get_wiki_content(lang):
-        """
-            Request and return the wiki page's content
-            corresponding to special phrases for a given lang.
-            Requested URL Example :
-                https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
-        """
-        url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
-              + lang.upper()
-        return get_url(url)
diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py
index 57664586..7026a549 100644
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -187,8 +187,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
     table_factory('place_classtype_amenity_animal_shelter')
     table_factory('place_classtype_wrongclass_wrongtype')
 
-    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
-                        lambda self, lang: xml_wiki_content)
+    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
+                        lambda lang: xml_wiki_content)
 
     tokenizer = tokenizer_mock()
     sp_importer.import_phrases(tokenizer, should_replace)
diff --git a/test/python/tools/test_sp_csv_loader.py b/test/python/tools/test_sp_csv_loader.py
index 6f5670f0..b5069a52 100644
--- a/test/python/tools/test_sp_csv_loader.py
+++ b/test/python/tools/test_sp_csv_loader.py
@@ -12,50 +12,6 @@ import pytest
 from nominatim.errors import UsageError
 from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
 
-def test_parse_csv(sp_csv_loader):
-    """
-        Test method parse_csv()
-        Should return the right SpecialPhrase objects.
-    """
-    phrases = sp_csv_loader.parse_csv()
-    assert check_phrases_content(phrases)
-
-def test_next(sp_csv_loader):
-    """
-        Test objects returned from the next() method.
-        It should return all SpecialPhrases objects of
-        the sp_csv_test.csv special phrases.
-    """
-    phrases = next(sp_csv_loader)
-    assert check_phrases_content(phrases)
-
-def test_check_csv_validity(sp_csv_loader):
-    """
-        Test method check_csv_validity()
-        It should raise an exception when file with a
-        different exception than .csv is given.
-    """
-    sp_csv_loader.csv_path = 'test.csv'
-    sp_csv_loader.check_csv_validity()
-    sp_csv_loader.csv_path = 'test.wrong'
-    with pytest.raises(UsageError):
-        assert sp_csv_loader.check_csv_validity()
-
-def check_phrases_content(phrases):
-    """
-        Asserts that the given phrases list contains
-        the right phrases of the sp_csv_test.csv special phrases.
-    """
-    return  len(phrases) > 1 \
-            and any(p.p_label == 'Billboard'
-                    and p.p_class == 'advertising'
-                    and p.p_type == 'billboard'
-                    and p.p_operator == '-' for p in phrases) \
-            and any(p.p_label == 'Zip Lines'
-                    and p.p_class == 'aerialway'
-                    and p.p_type == 'zip_line'
-                    and p.p_operator == '-' for p in phrases)
-
 @pytest.fixture
 def sp_csv_loader(src_dir):
     """
@@ -64,3 +20,35 @@ def sp_csv_loader(src_dir):
     csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
     loader = SPCsvLoader(csv_path)
     return loader
+
+
+def test_generate_phrases(sp_csv_loader):
+    """
+        Test method parse_csv()
+        Should return the right SpecialPhrase objects.
+    """
+    phrases = list(sp_csv_loader.generate_phrases())
+
+    assert len(phrases) == 41
+    assert len(set(phrases)) == 41
+
+    assert any(p.p_label == 'Billboard'
+               and p.p_class == 'advertising'
+               and p.p_type == 'billboard'
+               and p.p_operator == '-' for p in phrases)
+    assert any(p.p_label == 'Zip Lines'
+               and p.p_class == 'aerialway'
+               and p.p_type == 'zip_line'
+               and p.p_operator == '-' for p in phrases)
+
+
+def test_invalid_cvs_file():
+    """
+        Test method check_csv_validity()
+        It should raise an exception when file with a
+        different exception than .csv is given.
+    """
+    loader = SPCsvLoader('test.wrong')
+
+    with pytest.raises(UsageError, match='not a csv file'):
+        next(loader.generate_phrases())
diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py
index 0a64cd56..5bd45de3 100644
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@@ -26,27 +26,18 @@ def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
     """
     monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
     loader = SPWikiLoader(def_config)
-    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
-                        lambda self, lang: xml_wiki_content)
+    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
+                        lambda lang: xml_wiki_content)
     return loader
 
 
-def test_parse_xml(sp_wiki_loader, xml_wiki_content):
-    """
-        Test method parse_xml()
-        Should return the right SpecialPhrase objects.
-    """
-    phrases = sp_wiki_loader.parse_xml(xml_wiki_content)
-    check_phrases_content(phrases)
-
-
-def test_next(sp_wiki_loader):
+def test_generate_phrases(sp_wiki_loader):
     """
         Test objects returned from the next() method.
         It should return all SpecialPhrases objects of
         the 'en' special phrases.
     """
-    phrases = next(sp_wiki_loader)
+    phrases = list(sp_wiki_loader.generate_phrases())
     check_phrases_content(phrases)
 
 def check_phrases_content(phrases):

From e828d0d3f79400b3f3541b38d3a7d4de5d9cfc35 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 30 May 2022 14:32:36 +0200
Subject: [PATCH 4/6] move quoting hack to wiki loader

The bad quotes around the type for special phrases
specifically occure in the Wiki pages, so it should be
removed by the loader and not in the generic SpecialPhrase
object.
---
 .../tools/special_phrases/sp_wiki_loader.py   |  7 +++++-
 .../tools/special_phrases/special_phrase.py   |  4 +---
 test/python/tools/test_sp_wiki_loader.py      | 23 ++++++-------------
 .../testdata/special_phrases_test_content.txt |  2 +-
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py
index 6093fa45..ca4758ac 100644
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@@ -37,6 +37,8 @@ class SPWikiLoader:
         self.occurence_pattern = re.compile(
             r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
         )
+        # Hack around a bug where building=yes was imported with quotes into the wiki
+        self.type_fix_pattern = re.compile(r'\"|&quot;')
         self._load_languages()
 
 
@@ -52,7 +54,10 @@ class SPWikiLoader:
             matches = self.occurence_pattern.findall(loaded_xml)
 
             for match in matches:
-                yield SpecialPhrase(match[0], match[1], match[2], match[3])
+                yield SpecialPhrase(match[0],
+                                    match[1],
+                                    self.type_fix_pattern.sub('', match[2]),
+                                    match[3])
 
 
     def _load_languages(self):
diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py
index dc7f69fe..d9bf9e58 100644
--- a/nominatim/tools/special_phrases/special_phrase.py
+++ b/nominatim/tools/special_phrases/special_phrase.py
@@ -10,8 +10,6 @@
     This class is a model used to transfer a special phrase through
     the process of load and importation.
 """
-import re
-
 class SpecialPhrase():
     """
         Model representing a special phrase.
@@ -20,7 +18,7 @@ class SpecialPhrase():
         self.p_label = p_label.strip()
         self.p_class = p_class.strip()
         # Hack around a bug where building=yes was imported with quotes into the wiki
-        self.p_type = re.sub(r'\"|&quot;', '', p_type.strip())
+        self.p_type = p_type.strip()
         # Needed if some operator in the wiki are not written in english
         p_operator = p_operator.strip().lower()
         self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py
index 5bd45de3..2f47734e 100644
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@@ -10,24 +10,21 @@
 import pytest
 from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
 
-@pytest.fixture
-def xml_wiki_content(src_dir):
-    """
-        return the content of the static xml test file.
-    """
-    xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
-    return xml_test_content.read_text()
-
 
 @pytest.fixture
-def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
+def sp_wiki_loader(src_dir, monkeypatch, def_config):
     """
         Return an instance of SPWikiLoader.
     """
     monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
     loader = SPWikiLoader(def_config)
+
+    def _mock_wiki_content(lang):
+        xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
+        return xml_test_content.read_text()
+
     monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
-                        lambda lang: xml_wiki_content)
+                        _mock_wiki_content)
     return loader
 
 
@@ -38,13 +35,7 @@ def test_generate_phrases(sp_wiki_loader):
         the 'en' special phrases.
     """
     phrases = list(sp_wiki_loader.generate_phrases())
-    check_phrases_content(phrases)
 
-def check_phrases_content(phrases):
-    """
-        Asserts that the given phrases list contains
-        the right phrases of the 'en' special phrases.
-    """
     assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\
               {('Zip Line', 'aerialway', 'zip_line', '-'),
                ('Zip Lines', 'aerialway', 'zip_line', '-'),
diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt
index e790ca58..e5f340b9 100644
--- a/test/testdata/special_phrases_test_content.txt
+++ b/test/testdata/special_phrases_test_content.txt
@@ -70,7 +70,7 @@
 <model>wikitext</model>
 <format>text/x-wiki</format>
 <text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
-== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
+== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
 </text>
 <sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
 </revision>

From 46689df668f7c074e3a5ad661bac9b50bc52a0b5 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 30 May 2022 16:30:41 +0200
Subject: [PATCH 5/6] custom comparison for SpecialPhrase

Duplicate elemination only works when a custom hash/equal function
is implemented that is based on the members.
---
 nominatim/tools/special_phrases/special_phrase.py | 14 +++++++++++++-
 test/python/tools/test_import_special_phrases.py  |  5 -----
 test/python/tools/test_sp_csv_loader.py           | 13 ++++---------
 test/testdata/sp_csv_test.csv                     |  1 +
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py
index d9bf9e58..16935ccf 100644
--- a/nominatim/tools/special_phrases/special_phrase.py
+++ b/nominatim/tools/special_phrases/special_phrase.py
@@ -10,7 +10,7 @@
     This class is a model used to transfer a special phrase through
     the process of load and importation.
 """
-class SpecialPhrase():
+class SpecialPhrase:
     """
         Model representing a special phrase.
     """
@@ -22,3 +22,15 @@ class SpecialPhrase():
         # Needed if some operator in the wiki are not written in english
         p_operator = p_operator.strip().lower()
         self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
+
+    def __eq__(self, other):
+        if not isinstance(other, SpecialPhrase):
+            return False
+
+        return self.p_label == other.p_label \
+               and self.p_class == other.p_class \
+               and self.p_type == other.p_type \
+               and self.p_operator == other.p_operator
+
+    def __hash__(self):
+        return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py
index 7026a549..0dcf549c 100644
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -17,11 +17,6 @@ from nominatim.errors import UsageError
 
 from cursor import CursorForTesting
 
-@pytest.fixture
-def testfile_dir(src_dir):
-    return src_dir / 'test' / 'testfiles'
-
-
 @pytest.fixture
 def sp_importer(temp_db_conn, def_config, monkeypatch):
     """
diff --git a/test/python/tools/test_sp_csv_loader.py b/test/python/tools/test_sp_csv_loader.py
index b5069a52..49d5a853 100644
--- a/test/python/tools/test_sp_csv_loader.py
+++ b/test/python/tools/test_sp_csv_loader.py
@@ -11,6 +11,7 @@ import pytest
 
 from nominatim.errors import UsageError
 from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
+from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
 
 @pytest.fixture
 def sp_csv_loader(src_dir):
@@ -29,17 +30,11 @@ def test_generate_phrases(sp_csv_loader):
     """
     phrases = list(sp_csv_loader.generate_phrases())
 
-    assert len(phrases) == 41
+    assert len(phrases) == 42
     assert len(set(phrases)) == 41
 
-    assert any(p.p_label == 'Billboard'
-               and p.p_class == 'advertising'
-               and p.p_type == 'billboard'
-               and p.p_operator == '-' for p in phrases)
-    assert any(p.p_label == 'Zip Lines'
-               and p.p_class == 'aerialway'
-               and p.p_type == 'zip_line'
-               and p.p_operator == '-' for p in phrases)
+    assert SpecialPhrase('Billboard', 'advertising', 'billboard', '-') in phrases
+    assert SpecialPhrase('Zip Lines', 'aerialway', 'zip_line', '-') in phrases
 
 
 def test_invalid_cvs_file():
diff --git a/test/testdata/sp_csv_test.csv b/test/testdata/sp_csv_test.csv
index 3dab967b..147526de 100644
--- a/test/testdata/sp_csv_test.csv
+++ b/test/testdata/sp_csv_test.csv
@@ -18,6 +18,7 @@ Zipline near,aerialway,zip_line,near,N
 Ziplines near,aerialway,zip_line,near,Y 
 Zipwire,aerialway,zip_line,-,N 
 Zipwires,aerialway,zip_line,-,Y 
+Zipwires,aerialway,zip_line,name,Y 
 Zipwire in,aerialway,zip_line,in,N 
 Zipwires in,aerialway,zip_line,in,Y 
 Zipwire near,aerialway,zip_line,near,N 

From b5ac5462752bae0209f9fb255fe98b82e6016873 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 31 May 2022 09:12:26 +0200
Subject: [PATCH 6/6] CI: always use the latest version of pylint

This makes it easier to reproduce issues locally.
---
 .github/workflows/ci-tests.yml          | 7 +++++--
 docs/develop/Development-Environment.md | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index a08a995f..4ce14f93 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -81,13 +81,16 @@ jobs:
                   ubuntu: ${{ matrix.ubuntu }}
 
             - name: Install test prerequsites
-              run: sudo apt-get install -y -qq pylint python3-pytest python3-behave
+              run: sudo apt-get install -y -qq python3-pytest python3-behave
               if: matrix.ubuntu == 20
 
             - name: Install test prerequsites
-              run: pip3 install pylint==2.6.0 pytest behave==1.2.6
+              run: pip3 install pytest behave==1.2.6
               if: matrix.ubuntu == 18
 
+            - name: Install latest pylint
+              run: pip3 install pylint
+
             - name: PHP linting
               run: phpcs --report-width=120 .
               working-directory: Nominatim
diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md
index eba87c09..3cda610e 100644
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -32,7 +32,7 @@ It has the following additional requirements:
 * [behave test framework](https://behave.readthedocs.io) >= 1.2.6
 * [phpunit](https://phpunit.de) (9.5 is known to work)
 * [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
-* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
+* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
 * [pytest](https://pytest.org)
 
 The documentation is built with mkdocs: