diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 6093fa45..ca4758ac 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -37,6 +37,8 @@ class SPWikiLoader: self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) + # Hack around a bug where building=yes was imported with quotes into the wiki + self.type_fix_pattern = re.compile(r'\"|"') self._load_languages() @@ -52,7 +54,10 @@ class SPWikiLoader: matches = self.occurence_pattern.findall(loaded_xml) for match in matches: - yield SpecialPhrase(match[0], match[1], match[2], match[3]) + yield SpecialPhrase(match[0], + match[1], + self.type_fix_pattern.sub('', match[2]), + match[3]) def _load_languages(self): diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py index dc7f69fe..d9bf9e58 100644 --- a/nominatim/tools/special_phrases/special_phrase.py +++ b/nominatim/tools/special_phrases/special_phrase.py @@ -10,8 +10,6 @@ This class is a model used to transfer a special phrase through the process of load and importation. """ -import re - class SpecialPhrase(): """ Model representing a special phrase. @@ -20,7 +18,7 @@ class SpecialPhrase(): self.p_label = p_label.strip() self.p_class = p_class.strip() # Hack around a bug where building=yes was imported with quotes into the wiki - self.p_type = re.sub(r'\"|"', '', p_type.strip()) + self.p_type = p_type.strip() # Needed if some operator in the wiki are not written in english p_operator = p_operator.strip().lower() self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py index 5bd45de3..2f47734e 100644 --- a/test/python/tools/test_sp_wiki_loader.py +++ b/test/python/tools/test_sp_wiki_loader.py @@ -10,24 +10,21 @@ import pytest from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader -@pytest.fixture -def xml_wiki_content(src_dir): - """ - return the content of the static xml test file. - """ - xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' - return xml_test_content.read_text() - @pytest.fixture -def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content): +def sp_wiki_loader(src_dir, monkeypatch, def_config): """ Return an instance of SPWikiLoader. """ monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en') loader = SPWikiLoader(def_config) + + def _mock_wiki_content(lang): + xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' + return xml_test_content.read_text() + monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content', - lambda lang: xml_wiki_content) + _mock_wiki_content) return loader @@ -38,13 +35,7 @@ def test_generate_phrases(sp_wiki_loader): the 'en' special phrases. """ phrases = list(sp_wiki_loader.generate_phrases()) - check_phrases_content(phrases) -def check_phrases_content(phrases): - """ - Asserts that the given phrases list contains - the right phrases of the 'en' special phrases. - """ assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\ {('Zip Line', 'aerialway', 'zip_line', '-'), ('Zip Lines', 'aerialway', 'zip_line', '-'), diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt index e790ca58..e5f340b9 100644 --- a/test/testdata/special_phrases_test_content.txt +++ b/test/testdata/special_phrases_test_content.txt @@ -70,7 +70,7 @@ wikitext text/x-wiki -== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] +== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] cst5x7tt58izti1pxzgljf27tx8qjcj