move quoting hack to wiki loader

The bad quotes around the type for special phrases
specifically occure in the Wiki pages, so it should be
removed by the loader and not in the generic SpecialPhrase
object.
This commit is contained in:
Sarah Hoffmann
2022-05-30 14:32:36 +02:00
parent cce0e5ea38
commit e828d0d3f7
4 changed files with 15 additions and 21 deletions

View File

@@ -37,6 +37,8 @@ class SPWikiLoader:
self.occurence_pattern = re.compile( self.occurence_pattern = re.compile(
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
) )
# Hack around a bug where building=yes was imported with quotes into the wiki
self.type_fix_pattern = re.compile(r'\"|"')
self._load_languages() self._load_languages()
@@ -52,7 +54,10 @@ class SPWikiLoader:
matches = self.occurence_pattern.findall(loaded_xml) matches = self.occurence_pattern.findall(loaded_xml)
for match in matches: for match in matches:
yield SpecialPhrase(match[0], match[1], match[2], match[3]) yield SpecialPhrase(match[0],
match[1],
self.type_fix_pattern.sub('', match[2]),
match[3])
def _load_languages(self): def _load_languages(self):

View File

@@ -10,8 +10,6 @@
This class is a model used to transfer a special phrase through This class is a model used to transfer a special phrase through
the process of load and importation. the process of load and importation.
""" """
import re
class SpecialPhrase(): class SpecialPhrase():
""" """
Model representing a special phrase. Model representing a special phrase.
@@ -20,7 +18,7 @@ class SpecialPhrase():
self.p_label = p_label.strip() self.p_label = p_label.strip()
self.p_class = p_class.strip() self.p_class = p_class.strip()
# Hack around a bug where building=yes was imported with quotes into the wiki # Hack around a bug where building=yes was imported with quotes into the wiki
self.p_type = re.sub(r'\"|"', '', p_type.strip()) self.p_type = p_type.strip()
# Needed if some operator in the wiki are not written in english # Needed if some operator in the wiki are not written in english
p_operator = p_operator.strip().lower() p_operator = p_operator.strip().lower()
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator

View File

@@ -10,24 +10,21 @@
import pytest import pytest
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
@pytest.fixture
def xml_wiki_content(src_dir):
"""
return the content of the static xml test file.
"""
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
@pytest.fixture @pytest.fixture
def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content): def sp_wiki_loader(src_dir, monkeypatch, def_config):
""" """
Return an instance of SPWikiLoader. Return an instance of SPWikiLoader.
""" """
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en') monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
loader = SPWikiLoader(def_config) loader = SPWikiLoader(def_config)
def _mock_wiki_content(lang):
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text()
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content', monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
lambda lang: xml_wiki_content) _mock_wiki_content)
return loader return loader
@@ -38,13 +35,7 @@ def test_generate_phrases(sp_wiki_loader):
the 'en' special phrases. the 'en' special phrases.
""" """
phrases = list(sp_wiki_loader.generate_phrases()) phrases = list(sp_wiki_loader.generate_phrases())
check_phrases_content(phrases)
def check_phrases_content(phrases):
"""
Asserts that the given phrases list contains
the right phrases of the 'en' special phrases.
"""
assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\ assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\
{('Zip Line', 'aerialway', 'zip_line', '-'), {('Zip Line', 'aerialway', 'zip_line', '-'),
('Zip Lines', 'aerialway', 'zip_line', '-'), ('Zip Lines', 'aerialway', 'zip_line', '-'),

View File

@@ -70,7 +70,7 @@
<model>wikitext</model> <model>wikitext</model>
<format>text/x-wiki</format> <format>text/x-wiki</format>
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve"> <text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] == en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
</text> </text>
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1> <sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
</revision> </revision>