diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index a08a995f..4ce14f93 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -81,13 +81,16 @@ jobs:
ubuntu: ${{ matrix.ubuntu }}
- name: Install test prerequsites
- run: sudo apt-get install -y -qq pylint python3-pytest python3-behave
+ run: sudo apt-get install -y -qq python3-pytest python3-behave
if: matrix.ubuntu == 20
- name: Install test prerequsites
- run: pip3 install pylint==2.6.0 pytest behave==1.2.6
+ run: pip3 install pytest behave==1.2.6
if: matrix.ubuntu == 18
+ - name: Install latest pylint
+ run: pip3 install pylint
+
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md
index eba87c09..3cda610e 100644
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -32,7 +32,7 @@ It has the following additional requirements:
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
* [phpunit](https://phpunit.de) (9.5 is known to work)
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
-* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
+* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
* [pytest](https://pytest.org)
The documentation is built with mkdocs:
diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py
index 7b5f3797..f0ec358b 100644
--- a/nominatim/clicmd/setup.py
+++ b/nominatim/clicmd/setup.py
@@ -128,7 +128,7 @@ class SetupAll:
drop=args.no_updates)
LOG.warning('Create search index for default country names.')
country_info.create_country_names(conn, tokenizer,
- args.config.LANGUAGES)
+ args.config.get_str_list('LANGUAGES'))
if args.no_updates:
freeze.drop_update_tables(conn)
tokenizer.finalize_import(args.config)
diff --git a/nominatim/config.py b/nominatim/config.py
index ef261079..700af328 100644
--- a/nominatim/config.py
+++ b/nominatim/config.py
@@ -99,6 +99,17 @@ class Configuration:
raise UsageError("Configuration error.") from exp
+ def get_str_list(self, name):
+ """ Return the given configuration parameter as a list of strings.
+ The values are assumed to be given as a comma-sparated list and
+ will be stripped before returning them. On empty values None
+ is returned.
+ """
+ raw = self.__getattr__(name)
+
+ return [v.strip() for v in raw.split(',')] if raw else None
+
+
def get_path(self, name):
""" Return the given configuration parameter as a Path.
If a relative path is configured, then the function converts this
diff --git a/nominatim/tools/country_info.py b/nominatim/tools/country_info.py
index ed04c2d5..0ad00171 100644
--- a/nominatim/tools/country_info.py
+++ b/nominatim/tools/country_info.py
@@ -131,9 +131,6 @@ def create_country_names(conn, tokenizer, languages=None):
empty then only name translations for the given languages are added
to the index.
"""
- if languages:
- languages = languages.split(',')
-
def _include_key(key):
return ':' not in key or not languages or \
key[key.index(':') + 1:] in languages
diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py
index 55a9d8d0..0bd93c00 100644
--- a/nominatim/tools/special_phrases/sp_csv_loader.py
+++ b/nominatim/tools/special_phrases/sp_csv_loader.py
@@ -11,43 +11,31 @@
"""
import csv
import os
-from collections.abc import Iterator
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.errors import UsageError
-class SPCsvLoader(Iterator):
+class SPCsvLoader:
"""
Handles loading of special phrases from external csv file.
"""
def __init__(self, csv_path):
super().__init__()
self.csv_path = csv_path
- self.has_been_read = False
- def __next__(self):
- if self.has_been_read:
- raise StopIteration()
- self.has_been_read = True
- self.check_csv_validity()
- return self.parse_csv()
-
- def parse_csv(self):
- """
- Open and parse the given csv file.
+ def generate_phrases(self):
+ """ Open and parse the given csv file.
Create the corresponding SpecialPhrases.
"""
- phrases = set()
+ self._check_csv_validity()
with open(self.csv_path, encoding='utf-8') as fd:
reader = csv.DictReader(fd, delimiter=',')
for row in reader:
- phrases.add(
- SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
- )
- return phrases
+ yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
- def check_csv_validity(self):
+
+ def _check_csv_validity(self):
"""
Check that the csv file has the right extension.
"""
diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py
index 8137142b..31bbc355 100644
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -62,11 +62,10 @@ class SPImporter():
# Store pairs of class/type for further processing
class_type_pairs = set()
- for loaded_phrases in self.sp_loader:
- for phrase in loaded_phrases:
- result = self._process_phrase(phrase)
- if result:
- class_type_pairs.add(result)
+ for phrase in self.sp_loader.generate_phrases():
+ result = self._process_phrase(phrase)
+ if result:
+ class_type_pairs.add(result)
self._create_place_classtype_table_and_indexes(class_type_pairs)
if should_replace:
diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py
index 2f698092..ca4758ac 100644
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@@ -9,46 +9,56 @@
"""
import re
import logging
-from collections.abc import Iterator
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
-class SPWikiLoader(Iterator):
+
+def _get_wiki_content(lang):
+ """
+ Request and return the wiki page's content
+ corresponding to special phrases for a given lang.
+ Requested URL Example :
+ https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+ """
+ url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+ + lang.upper()
+ return get_url(url)
+
+
+class SPWikiLoader:
"""
Handles loading of special phrases from the wiki.
"""
- def __init__(self, config, languages=None):
+ def __init__(self, config):
super().__init__()
self.config = config
# Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
)
- self.languages = self._load_languages() if not languages else list(languages)
+ # Hack around a bug where building=yes was imported with quotes into the wiki
+ self.type_fix_pattern = re.compile(r'\"|"')
+ self._load_languages()
- def __next__(self):
- if not self.languages:
- raise StopIteration
- lang = self.languages.pop(0)
- loaded_xml = self._get_wiki_content(lang)
- LOG.warning('Importing phrases for lang: %s...', lang)
- return self.parse_xml(loaded_xml)
-
- def parse_xml(self, xml):
+ def generate_phrases(self):
+ """ Download the wiki pages for the configured languages
+ and extract the phrases from the page.
"""
- Parses XML content and extracts special phrases from it.
- Return a list of SpecialPhrase.
- """
- # One match will be of format [label, class, type, operator, plural]
- matches = self.occurence_pattern.findall(xml)
- returned_phrases = set()
- for match in matches:
- returned_phrases.add(
- SpecialPhrase(match[0], match[1], match[2], match[3])
- )
- return returned_phrases
+ for lang in self.languages:
+ LOG.warning('Importing phrases for lang: %s...', lang)
+ loaded_xml = _get_wiki_content(lang)
+
+ # One match will be of format [label, class, type, operator, plural]
+ matches = self.occurence_pattern.findall(loaded_xml)
+
+ for match in matches:
+ yield SpecialPhrase(match[0],
+ match[1],
+ self.type_fix_pattern.sub('', match[2]),
+ match[3])
+
def _load_languages(self):
"""
@@ -56,21 +66,11 @@ class SPWikiLoader(Iterator):
or default if there is no languages configured.
The system will extract special phrases only from all specified languages.
"""
- default_languages = [
+ if self.config.LANGUAGES:
+ self.languages = self.config.get_str_list('LANGUAGES')
+ else:
+ self.languages = [
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
-
- @staticmethod
- def _get_wiki_content(lang):
- """
- Request and return the wiki page's content
- corresponding to special phrases for a given lang.
- Requested URL Example :
- https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
- """
- url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
- + lang.upper()
- return get_url(url)
diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py
index dc7f69fe..16935ccf 100644
--- a/nominatim/tools/special_phrases/special_phrase.py
+++ b/nominatim/tools/special_phrases/special_phrase.py
@@ -10,9 +10,7 @@
This class is a model used to transfer a special phrase through
the process of load and importation.
"""
-import re
-
-class SpecialPhrase():
+class SpecialPhrase:
"""
Model representing a special phrase.
"""
@@ -20,7 +18,19 @@ class SpecialPhrase():
self.p_label = p_label.strip()
self.p_class = p_class.strip()
# Hack around a bug where building=yes was imported with quotes into the wiki
- self.p_type = re.sub(r'\"|"', '', p_type.strip())
+ self.p_type = p_type.strip()
# Needed if some operator in the wiki are not written in english
p_operator = p_operator.strip().lower()
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
+
+ def __eq__(self, other):
+ if not isinstance(other, SpecialPhrase):
+ return False
+
+ return self.p_label == other.p_label \
+ and self.p_class == other.p_class \
+ and self.p_type == other.p_type \
+ and self.p_operator == other.p_operator
+
+ def __hash__(self):
+ return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
diff --git a/test/python/config/test_config.py b/test/python/config/test_config.py
index 9f9ca880..a9cbb48d 100644
--- a/test/python/config/test_config.py
+++ b/test/python/config/test_config.py
@@ -173,6 +173,23 @@ def test_get_int_empty(make_config):
config.get_int('DATABASE_MODULE_PATH')
+@pytest.mark.parametrize("value,outlist", [('sd', ['sd']),
+ ('dd,rr', ['dd', 'rr']),
+ (' a , b ', ['a', 'b'])])
+def test_get_str_list_success(make_config, monkeypatch, value, outlist):
+ config = make_config()
+
+ monkeypatch.setenv('NOMINATIM_MYLIST', value)
+
+ assert config.get_str_list('MYLIST') == outlist
+
+
+def test_get_str_list_empty(make_config):
+ config = make_config()
+
+ assert config.get_str_list('LANGUAGES') is None
+
+
def test_get_path_empty(make_config):
config = make_config()
diff --git a/test/python/tools/test_country_info.py b/test/python/tools/test_country_info.py
index 3c20b3e0..3f00d54e 100644
--- a/test/python/tools/test_country_info.py
+++ b/test/python/tools/test_country_info.py
@@ -49,7 +49,7 @@ def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cur
assert temp_db_cursor.table_rows('country_osm_grid') > 100
-@pytest.mark.parametrize("languages", (None, ' fr,en'))
+@pytest.mark.parametrize("languages", (None, ['fr', 'en']))
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
table_factory, tokenizer_mock, languages, loaded_country):
diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py
index 41017694..0dcf549c 100644
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -18,16 +18,12 @@ from nominatim.errors import UsageError
from cursor import CursorForTesting
@pytest.fixture
-def testfile_dir(src_dir):
- return src_dir / 'test' / 'testfiles'
-
-
-@pytest.fixture
-def sp_importer(temp_db_conn, def_config):
+def sp_importer(temp_db_conn, def_config, monkeypatch):
"""
Return an instance of SPImporter.
"""
- loader = SPWikiLoader(def_config, ['en'])
+ monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
+ loader = SPWikiLoader(def_config)
return SPImporter(def_config, temp_db_conn, loader)
@@ -186,8 +182,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
table_factory('place_classtype_amenity_animal_shelter')
table_factory('place_classtype_wrongclass_wrongtype')
- monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
- lambda self, lang: xml_wiki_content)
+ monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
+ lambda lang: xml_wiki_content)
tokenizer = tokenizer_mock()
sp_importer.import_phrases(tokenizer, should_replace)
diff --git a/test/python/tools/test_sp_csv_loader.py b/test/python/tools/test_sp_csv_loader.py
index 6f5670f0..49d5a853 100644
--- a/test/python/tools/test_sp_csv_loader.py
+++ b/test/python/tools/test_sp_csv_loader.py
@@ -11,50 +11,7 @@ import pytest
from nominatim.errors import UsageError
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
-
-def test_parse_csv(sp_csv_loader):
- """
- Test method parse_csv()
- Should return the right SpecialPhrase objects.
- """
- phrases = sp_csv_loader.parse_csv()
- assert check_phrases_content(phrases)
-
-def test_next(sp_csv_loader):
- """
- Test objects returned from the next() method.
- It should return all SpecialPhrases objects of
- the sp_csv_test.csv special phrases.
- """
- phrases = next(sp_csv_loader)
- assert check_phrases_content(phrases)
-
-def test_check_csv_validity(sp_csv_loader):
- """
- Test method check_csv_validity()
- It should raise an exception when file with a
- different exception than .csv is given.
- """
- sp_csv_loader.csv_path = 'test.csv'
- sp_csv_loader.check_csv_validity()
- sp_csv_loader.csv_path = 'test.wrong'
- with pytest.raises(UsageError):
- assert sp_csv_loader.check_csv_validity()
-
-def check_phrases_content(phrases):
- """
- Asserts that the given phrases list contains
- the right phrases of the sp_csv_test.csv special phrases.
- """
- return len(phrases) > 1 \
- and any(p.p_label == 'Billboard'
- and p.p_class == 'advertising'
- and p.p_type == 'billboard'
- and p.p_operator == '-' for p in phrases) \
- and any(p.p_label == 'Zip Lines'
- and p.p_class == 'aerialway'
- and p.p_type == 'zip_line'
- and p.p_operator == '-' for p in phrases)
+from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
@pytest.fixture
def sp_csv_loader(src_dir):
@@ -64,3 +21,29 @@ def sp_csv_loader(src_dir):
csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
loader = SPCsvLoader(csv_path)
return loader
+
+
+def test_generate_phrases(sp_csv_loader):
+ """
+ Test method parse_csv()
+ Should return the right SpecialPhrase objects.
+ """
+ phrases = list(sp_csv_loader.generate_phrases())
+
+ assert len(phrases) == 42
+ assert len(set(phrases)) == 41
+
+ assert SpecialPhrase('Billboard', 'advertising', 'billboard', '-') in phrases
+ assert SpecialPhrase('Zip Lines', 'aerialway', 'zip_line', '-') in phrases
+
+
+def test_invalid_cvs_file():
+ """
+ Test method check_csv_validity()
+ It should raise an exception when file with a
+ different exception than .csv is given.
+ """
+ loader = SPCsvLoader('test.wrong')
+
+ with pytest.raises(UsageError, match='not a csv file'):
+ next(loader.generate_phrases())
diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py
index bfe93c57..2f47734e 100644
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@@ -10,49 +10,32 @@
import pytest
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
-@pytest.fixture
-def xml_wiki_content(src_dir):
- """
- return the content of the static xml test file.
- """
- xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
- return xml_test_content.read_text()
-
@pytest.fixture
-def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
+def sp_wiki_loader(src_dir, monkeypatch, def_config):
"""
Return an instance of SPWikiLoader.
"""
- loader = SPWikiLoader(def_config, ['en'])
- monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
- lambda self, lang: xml_wiki_content)
+ monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
+ loader = SPWikiLoader(def_config)
+
+ def _mock_wiki_content(lang):
+ xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
+ return xml_test_content.read_text()
+
+ monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
+ _mock_wiki_content)
return loader
-def test_parse_xml(sp_wiki_loader, xml_wiki_content):
- """
- Test method parse_xml()
- Should return the right SpecialPhrase objects.
- """
- phrases = sp_wiki_loader.parse_xml(xml_wiki_content)
- check_phrases_content(phrases)
-
-
-def test_next(sp_wiki_loader):
+def test_generate_phrases(sp_wiki_loader):
"""
Test objects returned from the next() method.
It should return all SpecialPhrases objects of
the 'en' special phrases.
"""
- phrases = next(sp_wiki_loader)
- check_phrases_content(phrases)
+ phrases = list(sp_wiki_loader.generate_phrases())
-def check_phrases_content(phrases):
- """
- Asserts that the given phrases list contains
- the right phrases of the 'en' special phrases.
- """
assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\
{('Zip Line', 'aerialway', 'zip_line', '-'),
('Zip Lines', 'aerialway', 'zip_line', '-'),
diff --git a/test/testdata/sp_csv_test.csv b/test/testdata/sp_csv_test.csv
index 3dab967b..147526de 100644
--- a/test/testdata/sp_csv_test.csv
+++ b/test/testdata/sp_csv_test.csv
@@ -18,6 +18,7 @@ Zipline near,aerialway,zip_line,near,N
Ziplines near,aerialway,zip_line,near,Y
Zipwire,aerialway,zip_line,-,N
Zipwires,aerialway,zip_line,-,Y
+Zipwires,aerialway,zip_line,name,Y
Zipwire in,aerialway,zip_line,in,N
Zipwires in,aerialway,zip_line,in,Y
Zipwire near,aerialway,zip_line,near,N
diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt
index e790ca58..e5f340b9 100644
--- a/test/testdata/special_phrases_test_content.txt
+++ b/test/testdata/special_phrases_test_content.txt
@@ -70,7 +70,7 @@
wikitext
text/x-wiki
-== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
+== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
cst5x7tt58izti1pxzgljf27tx8qjcj