mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Merge pull request #2731 from lonvia/cleanup-special-phrases
Minor code reorganisation around special phrase parsing
This commit is contained in:
7
.github/workflows/ci-tests.yml
vendored
7
.github/workflows/ci-tests.yml
vendored
@@ -81,13 +81,16 @@ jobs:
|
|||||||
ubuntu: ${{ matrix.ubuntu }}
|
ubuntu: ${{ matrix.ubuntu }}
|
||||||
|
|
||||||
- name: Install test prerequsites
|
- name: Install test prerequsites
|
||||||
run: sudo apt-get install -y -qq pylint python3-pytest python3-behave
|
run: sudo apt-get install -y -qq python3-pytest python3-behave
|
||||||
if: matrix.ubuntu == 20
|
if: matrix.ubuntu == 20
|
||||||
|
|
||||||
- name: Install test prerequsites
|
- name: Install test prerequsites
|
||||||
run: pip3 install pylint==2.6.0 pytest behave==1.2.6
|
run: pip3 install pytest behave==1.2.6
|
||||||
if: matrix.ubuntu == 18
|
if: matrix.ubuntu == 18
|
||||||
|
|
||||||
|
- name: Install latest pylint
|
||||||
|
run: pip3 install pylint
|
||||||
|
|
||||||
- name: PHP linting
|
- name: PHP linting
|
||||||
run: phpcs --report-width=120 .
|
run: phpcs --report-width=120 .
|
||||||
working-directory: Nominatim
|
working-directory: Nominatim
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ It has the following additional requirements:
|
|||||||
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
|
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
|
||||||
* [phpunit](https://phpunit.de) (9.5 is known to work)
|
* [phpunit](https://phpunit.de) (9.5 is known to work)
|
||||||
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
|
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
|
||||||
* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
|
* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
|
||||||
* [pytest](https://pytest.org)
|
* [pytest](https://pytest.org)
|
||||||
|
|
||||||
The documentation is built with mkdocs:
|
The documentation is built with mkdocs:
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ class SetupAll:
|
|||||||
drop=args.no_updates)
|
drop=args.no_updates)
|
||||||
LOG.warning('Create search index for default country names.')
|
LOG.warning('Create search index for default country names.')
|
||||||
country_info.create_country_names(conn, tokenizer,
|
country_info.create_country_names(conn, tokenizer,
|
||||||
args.config.LANGUAGES)
|
args.config.get_str_list('LANGUAGES'))
|
||||||
if args.no_updates:
|
if args.no_updates:
|
||||||
freeze.drop_update_tables(conn)
|
freeze.drop_update_tables(conn)
|
||||||
tokenizer.finalize_import(args.config)
|
tokenizer.finalize_import(args.config)
|
||||||
|
|||||||
@@ -99,6 +99,17 @@ class Configuration:
|
|||||||
raise UsageError("Configuration error.") from exp
|
raise UsageError("Configuration error.") from exp
|
||||||
|
|
||||||
|
|
||||||
|
def get_str_list(self, name):
|
||||||
|
""" Return the given configuration parameter as a list of strings.
|
||||||
|
The values are assumed to be given as a comma-sparated list and
|
||||||
|
will be stripped before returning them. On empty values None
|
||||||
|
is returned.
|
||||||
|
"""
|
||||||
|
raw = self.__getattr__(name)
|
||||||
|
|
||||||
|
return [v.strip() for v in raw.split(',')] if raw else None
|
||||||
|
|
||||||
|
|
||||||
def get_path(self, name):
|
def get_path(self, name):
|
||||||
""" Return the given configuration parameter as a Path.
|
""" Return the given configuration parameter as a Path.
|
||||||
If a relative path is configured, then the function converts this
|
If a relative path is configured, then the function converts this
|
||||||
|
|||||||
@@ -131,9 +131,6 @@ def create_country_names(conn, tokenizer, languages=None):
|
|||||||
empty then only name translations for the given languages are added
|
empty then only name translations for the given languages are added
|
||||||
to the index.
|
to the index.
|
||||||
"""
|
"""
|
||||||
if languages:
|
|
||||||
languages = languages.split(',')
|
|
||||||
|
|
||||||
def _include_key(key):
|
def _include_key(key):
|
||||||
return ':' not in key or not languages or \
|
return ':' not in key or not languages or \
|
||||||
key[key.index(':') + 1:] in languages
|
key[key.index(':') + 1:] in languages
|
||||||
|
|||||||
@@ -11,43 +11,31 @@
|
|||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
from collections.abc import Iterator
|
|
||||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
|
|
||||||
class SPCsvLoader(Iterator):
|
class SPCsvLoader:
|
||||||
"""
|
"""
|
||||||
Handles loading of special phrases from external csv file.
|
Handles loading of special phrases from external csv file.
|
||||||
"""
|
"""
|
||||||
def __init__(self, csv_path):
|
def __init__(self, csv_path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.csv_path = csv_path
|
self.csv_path = csv_path
|
||||||
self.has_been_read = False
|
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
if self.has_been_read:
|
|
||||||
raise StopIteration()
|
|
||||||
|
|
||||||
self.has_been_read = True
|
def generate_phrases(self):
|
||||||
self.check_csv_validity()
|
""" Open and parse the given csv file.
|
||||||
return self.parse_csv()
|
|
||||||
|
|
||||||
def parse_csv(self):
|
|
||||||
"""
|
|
||||||
Open and parse the given csv file.
|
|
||||||
Create the corresponding SpecialPhrases.
|
Create the corresponding SpecialPhrases.
|
||||||
"""
|
"""
|
||||||
phrases = set()
|
self._check_csv_validity()
|
||||||
|
|
||||||
with open(self.csv_path, encoding='utf-8') as fd:
|
with open(self.csv_path, encoding='utf-8') as fd:
|
||||||
reader = csv.DictReader(fd, delimiter=',')
|
reader = csv.DictReader(fd, delimiter=',')
|
||||||
for row in reader:
|
for row in reader:
|
||||||
phrases.add(
|
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||||
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
|
||||||
)
|
|
||||||
return phrases
|
|
||||||
|
|
||||||
def check_csv_validity(self):
|
|
||||||
|
def _check_csv_validity(self):
|
||||||
"""
|
"""
|
||||||
Check that the csv file has the right extension.
|
Check that the csv file has the right extension.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -62,11 +62,10 @@ class SPImporter():
|
|||||||
# Store pairs of class/type for further processing
|
# Store pairs of class/type for further processing
|
||||||
class_type_pairs = set()
|
class_type_pairs = set()
|
||||||
|
|
||||||
for loaded_phrases in self.sp_loader:
|
for phrase in self.sp_loader.generate_phrases():
|
||||||
for phrase in loaded_phrases:
|
result = self._process_phrase(phrase)
|
||||||
result = self._process_phrase(phrase)
|
if result:
|
||||||
if result:
|
class_type_pairs.add(result)
|
||||||
class_type_pairs.add(result)
|
|
||||||
|
|
||||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||||
if should_replace:
|
if should_replace:
|
||||||
|
|||||||
@@ -9,46 +9,56 @@
|
|||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Iterator
|
|
||||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||||
from nominatim.tools.exec_utils import get_url
|
from nominatim.tools.exec_utils import get_url
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
class SPWikiLoader(Iterator):
|
|
||||||
|
def _get_wiki_content(lang):
|
||||||
|
"""
|
||||||
|
Request and return the wiki page's content
|
||||||
|
corresponding to special phrases for a given lang.
|
||||||
|
Requested URL Example :
|
||||||
|
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
||||||
|
"""
|
||||||
|
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
|
||||||
|
+ lang.upper()
|
||||||
|
return get_url(url)
|
||||||
|
|
||||||
|
|
||||||
|
class SPWikiLoader:
|
||||||
"""
|
"""
|
||||||
Handles loading of special phrases from the wiki.
|
Handles loading of special phrases from the wiki.
|
||||||
"""
|
"""
|
||||||
def __init__(self, config, languages=None):
|
def __init__(self, config):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.config = config
|
self.config = config
|
||||||
# Compile the regex here to increase performances.
|
# Compile the regex here to increase performances.
|
||||||
self.occurence_pattern = re.compile(
|
self.occurence_pattern = re.compile(
|
||||||
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
|
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
|
||||||
)
|
)
|
||||||
self.languages = self._load_languages() if not languages else list(languages)
|
# Hack around a bug where building=yes was imported with quotes into the wiki
|
||||||
|
self.type_fix_pattern = re.compile(r'\"|"')
|
||||||
|
self._load_languages()
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
if not self.languages:
|
|
||||||
raise StopIteration
|
|
||||||
|
|
||||||
lang = self.languages.pop(0)
|
def generate_phrases(self):
|
||||||
loaded_xml = self._get_wiki_content(lang)
|
""" Download the wiki pages for the configured languages
|
||||||
LOG.warning('Importing phrases for lang: %s...', lang)
|
and extract the phrases from the page.
|
||||||
return self.parse_xml(loaded_xml)
|
|
||||||
|
|
||||||
def parse_xml(self, xml):
|
|
||||||
"""
|
"""
|
||||||
Parses XML content and extracts special phrases from it.
|
for lang in self.languages:
|
||||||
Return a list of SpecialPhrase.
|
LOG.warning('Importing phrases for lang: %s...', lang)
|
||||||
"""
|
loaded_xml = _get_wiki_content(lang)
|
||||||
# One match will be of format [label, class, type, operator, plural]
|
|
||||||
matches = self.occurence_pattern.findall(xml)
|
# One match will be of format [label, class, type, operator, plural]
|
||||||
returned_phrases = set()
|
matches = self.occurence_pattern.findall(loaded_xml)
|
||||||
for match in matches:
|
|
||||||
returned_phrases.add(
|
for match in matches:
|
||||||
SpecialPhrase(match[0], match[1], match[2], match[3])
|
yield SpecialPhrase(match[0],
|
||||||
)
|
match[1],
|
||||||
return returned_phrases
|
self.type_fix_pattern.sub('', match[2]),
|
||||||
|
match[3])
|
||||||
|
|
||||||
|
|
||||||
def _load_languages(self):
|
def _load_languages(self):
|
||||||
"""
|
"""
|
||||||
@@ -56,21 +66,11 @@ class SPWikiLoader(Iterator):
|
|||||||
or default if there is no languages configured.
|
or default if there is no languages configured.
|
||||||
The system will extract special phrases only from all specified languages.
|
The system will extract special phrases only from all specified languages.
|
||||||
"""
|
"""
|
||||||
default_languages = [
|
if self.config.LANGUAGES:
|
||||||
|
self.languages = self.config.get_str_list('LANGUAGES')
|
||||||
|
else:
|
||||||
|
self.languages = [
|
||||||
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
||||||
return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_wiki_content(lang):
|
|
||||||
"""
|
|
||||||
Request and return the wiki page's content
|
|
||||||
corresponding to special phrases for a given lang.
|
|
||||||
Requested URL Example :
|
|
||||||
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
|
||||||
"""
|
|
||||||
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
|
|
||||||
+ lang.upper()
|
|
||||||
return get_url(url)
|
|
||||||
|
|||||||
@@ -10,9 +10,7 @@
|
|||||||
This class is a model used to transfer a special phrase through
|
This class is a model used to transfer a special phrase through
|
||||||
the process of load and importation.
|
the process of load and importation.
|
||||||
"""
|
"""
|
||||||
import re
|
class SpecialPhrase:
|
||||||
|
|
||||||
class SpecialPhrase():
|
|
||||||
"""
|
"""
|
||||||
Model representing a special phrase.
|
Model representing a special phrase.
|
||||||
"""
|
"""
|
||||||
@@ -20,7 +18,19 @@ class SpecialPhrase():
|
|||||||
self.p_label = p_label.strip()
|
self.p_label = p_label.strip()
|
||||||
self.p_class = p_class.strip()
|
self.p_class = p_class.strip()
|
||||||
# Hack around a bug where building=yes was imported with quotes into the wiki
|
# Hack around a bug where building=yes was imported with quotes into the wiki
|
||||||
self.p_type = re.sub(r'\"|"', '', p_type.strip())
|
self.p_type = p_type.strip()
|
||||||
# Needed if some operator in the wiki are not written in english
|
# Needed if some operator in the wiki are not written in english
|
||||||
p_operator = p_operator.strip().lower()
|
p_operator = p_operator.strip().lower()
|
||||||
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
|
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, SpecialPhrase):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.p_label == other.p_label \
|
||||||
|
and self.p_class == other.p_class \
|
||||||
|
and self.p_type == other.p_type \
|
||||||
|
and self.p_operator == other.p_operator
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
|
||||||
|
|||||||
@@ -173,6 +173,23 @@ def test_get_int_empty(make_config):
|
|||||||
config.get_int('DATABASE_MODULE_PATH')
|
config.get_int('DATABASE_MODULE_PATH')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("value,outlist", [('sd', ['sd']),
|
||||||
|
('dd,rr', ['dd', 'rr']),
|
||||||
|
(' a , b ', ['a', 'b'])])
|
||||||
|
def test_get_str_list_success(make_config, monkeypatch, value, outlist):
|
||||||
|
config = make_config()
|
||||||
|
|
||||||
|
monkeypatch.setenv('NOMINATIM_MYLIST', value)
|
||||||
|
|
||||||
|
assert config.get_str_list('MYLIST') == outlist
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_str_list_empty(make_config):
|
||||||
|
config = make_config()
|
||||||
|
|
||||||
|
assert config.get_str_list('LANGUAGES') is None
|
||||||
|
|
||||||
|
|
||||||
def test_get_path_empty(make_config):
|
def test_get_path_empty(make_config):
|
||||||
config = make_config()
|
config = make_config()
|
||||||
|
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cur
|
|||||||
assert temp_db_cursor.table_rows('country_osm_grid') > 100
|
assert temp_db_cursor.table_rows('country_osm_grid') > 100
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("languages", (None, ' fr,en'))
|
@pytest.mark.parametrize("languages", (None, ['fr', 'en']))
|
||||||
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
|
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
|
||||||
table_factory, tokenizer_mock, languages, loaded_country):
|
table_factory, tokenizer_mock, languages, loaded_country):
|
||||||
|
|
||||||
|
|||||||
@@ -18,16 +18,12 @@ from nominatim.errors import UsageError
|
|||||||
from cursor import CursorForTesting
|
from cursor import CursorForTesting
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def testfile_dir(src_dir):
|
def sp_importer(temp_db_conn, def_config, monkeypatch):
|
||||||
return src_dir / 'test' / 'testfiles'
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sp_importer(temp_db_conn, def_config):
|
|
||||||
"""
|
"""
|
||||||
Return an instance of SPImporter.
|
Return an instance of SPImporter.
|
||||||
"""
|
"""
|
||||||
loader = SPWikiLoader(def_config, ['en'])
|
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
|
||||||
|
loader = SPWikiLoader(def_config)
|
||||||
return SPImporter(def_config, temp_db_conn, loader)
|
return SPImporter(def_config, temp_db_conn, loader)
|
||||||
|
|
||||||
|
|
||||||
@@ -186,8 +182,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
|||||||
table_factory('place_classtype_amenity_animal_shelter')
|
table_factory('place_classtype_amenity_animal_shelter')
|
||||||
table_factory('place_classtype_wrongclass_wrongtype')
|
table_factory('place_classtype_wrongclass_wrongtype')
|
||||||
|
|
||||||
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
|
||||||
lambda self, lang: xml_wiki_content)
|
lambda lang: xml_wiki_content)
|
||||||
|
|
||||||
tokenizer = tokenizer_mock()
|
tokenizer = tokenizer_mock()
|
||||||
sp_importer.import_phrases(tokenizer, should_replace)
|
sp_importer.import_phrases(tokenizer, should_replace)
|
||||||
|
|||||||
@@ -11,50 +11,7 @@ import pytest
|
|||||||
|
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
|
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
|
||||||
|
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||||
def test_parse_csv(sp_csv_loader):
|
|
||||||
"""
|
|
||||||
Test method parse_csv()
|
|
||||||
Should return the right SpecialPhrase objects.
|
|
||||||
"""
|
|
||||||
phrases = sp_csv_loader.parse_csv()
|
|
||||||
assert check_phrases_content(phrases)
|
|
||||||
|
|
||||||
def test_next(sp_csv_loader):
|
|
||||||
"""
|
|
||||||
Test objects returned from the next() method.
|
|
||||||
It should return all SpecialPhrases objects of
|
|
||||||
the sp_csv_test.csv special phrases.
|
|
||||||
"""
|
|
||||||
phrases = next(sp_csv_loader)
|
|
||||||
assert check_phrases_content(phrases)
|
|
||||||
|
|
||||||
def test_check_csv_validity(sp_csv_loader):
|
|
||||||
"""
|
|
||||||
Test method check_csv_validity()
|
|
||||||
It should raise an exception when file with a
|
|
||||||
different exception than .csv is given.
|
|
||||||
"""
|
|
||||||
sp_csv_loader.csv_path = 'test.csv'
|
|
||||||
sp_csv_loader.check_csv_validity()
|
|
||||||
sp_csv_loader.csv_path = 'test.wrong'
|
|
||||||
with pytest.raises(UsageError):
|
|
||||||
assert sp_csv_loader.check_csv_validity()
|
|
||||||
|
|
||||||
def check_phrases_content(phrases):
|
|
||||||
"""
|
|
||||||
Asserts that the given phrases list contains
|
|
||||||
the right phrases of the sp_csv_test.csv special phrases.
|
|
||||||
"""
|
|
||||||
return len(phrases) > 1 \
|
|
||||||
and any(p.p_label == 'Billboard'
|
|
||||||
and p.p_class == 'advertising'
|
|
||||||
and p.p_type == 'billboard'
|
|
||||||
and p.p_operator == '-' for p in phrases) \
|
|
||||||
and any(p.p_label == 'Zip Lines'
|
|
||||||
and p.p_class == 'aerialway'
|
|
||||||
and p.p_type == 'zip_line'
|
|
||||||
and p.p_operator == '-' for p in phrases)
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sp_csv_loader(src_dir):
|
def sp_csv_loader(src_dir):
|
||||||
@@ -64,3 +21,29 @@ def sp_csv_loader(src_dir):
|
|||||||
csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
|
csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
|
||||||
loader = SPCsvLoader(csv_path)
|
loader = SPCsvLoader(csv_path)
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_phrases(sp_csv_loader):
|
||||||
|
"""
|
||||||
|
Test method parse_csv()
|
||||||
|
Should return the right SpecialPhrase objects.
|
||||||
|
"""
|
||||||
|
phrases = list(sp_csv_loader.generate_phrases())
|
||||||
|
|
||||||
|
assert len(phrases) == 42
|
||||||
|
assert len(set(phrases)) == 41
|
||||||
|
|
||||||
|
assert SpecialPhrase('Billboard', 'advertising', 'billboard', '-') in phrases
|
||||||
|
assert SpecialPhrase('Zip Lines', 'aerialway', 'zip_line', '-') in phrases
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_cvs_file():
|
||||||
|
"""
|
||||||
|
Test method check_csv_validity()
|
||||||
|
It should raise an exception when file with a
|
||||||
|
different exception than .csv is given.
|
||||||
|
"""
|
||||||
|
loader = SPCsvLoader('test.wrong')
|
||||||
|
|
||||||
|
with pytest.raises(UsageError, match='not a csv file'):
|
||||||
|
next(loader.generate_phrases())
|
||||||
|
|||||||
@@ -10,49 +10,32 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
|
from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def xml_wiki_content(src_dir):
|
|
||||||
"""
|
|
||||||
return the content of the static xml test file.
|
|
||||||
"""
|
|
||||||
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
|
|
||||||
return xml_test_content.read_text()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
|
def sp_wiki_loader(src_dir, monkeypatch, def_config):
|
||||||
"""
|
"""
|
||||||
Return an instance of SPWikiLoader.
|
Return an instance of SPWikiLoader.
|
||||||
"""
|
"""
|
||||||
loader = SPWikiLoader(def_config, ['en'])
|
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
|
||||||
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
loader = SPWikiLoader(def_config)
|
||||||
lambda self, lang: xml_wiki_content)
|
|
||||||
|
def _mock_wiki_content(lang):
|
||||||
|
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
|
||||||
|
return xml_test_content.read_text()
|
||||||
|
|
||||||
|
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
|
||||||
|
_mock_wiki_content)
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
|
|
||||||
def test_parse_xml(sp_wiki_loader, xml_wiki_content):
|
def test_generate_phrases(sp_wiki_loader):
|
||||||
"""
|
|
||||||
Test method parse_xml()
|
|
||||||
Should return the right SpecialPhrase objects.
|
|
||||||
"""
|
|
||||||
phrases = sp_wiki_loader.parse_xml(xml_wiki_content)
|
|
||||||
check_phrases_content(phrases)
|
|
||||||
|
|
||||||
|
|
||||||
def test_next(sp_wiki_loader):
|
|
||||||
"""
|
"""
|
||||||
Test objects returned from the next() method.
|
Test objects returned from the next() method.
|
||||||
It should return all SpecialPhrases objects of
|
It should return all SpecialPhrases objects of
|
||||||
the 'en' special phrases.
|
the 'en' special phrases.
|
||||||
"""
|
"""
|
||||||
phrases = next(sp_wiki_loader)
|
phrases = list(sp_wiki_loader.generate_phrases())
|
||||||
check_phrases_content(phrases)
|
|
||||||
|
|
||||||
def check_phrases_content(phrases):
|
|
||||||
"""
|
|
||||||
Asserts that the given phrases list contains
|
|
||||||
the right phrases of the 'en' special phrases.
|
|
||||||
"""
|
|
||||||
assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\
|
assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\
|
||||||
{('Zip Line', 'aerialway', 'zip_line', '-'),
|
{('Zip Line', 'aerialway', 'zip_line', '-'),
|
||||||
('Zip Lines', 'aerialway', 'zip_line', '-'),
|
('Zip Lines', 'aerialway', 'zip_line', '-'),
|
||||||
|
|||||||
1
test/testdata/sp_csv_test.csv
vendored
1
test/testdata/sp_csv_test.csv
vendored
@@ -18,6 +18,7 @@ Zipline near,aerialway,zip_line,near,N
|
|||||||
Ziplines near,aerialway,zip_line,near,Y
|
Ziplines near,aerialway,zip_line,near,Y
|
||||||
Zipwire,aerialway,zip_line,-,N
|
Zipwire,aerialway,zip_line,-,N
|
||||||
Zipwires,aerialway,zip_line,-,Y
|
Zipwires,aerialway,zip_line,-,Y
|
||||||
|
Zipwires,aerialway,zip_line,name,Y
|
||||||
Zipwire in,aerialway,zip_line,in,N
|
Zipwire in,aerialway,zip_line,in,N
|
||||||
Zipwires in,aerialway,zip_line,in,Y
|
Zipwires in,aerialway,zip_line,in,Y
|
||||||
Zipwire near,aerialway,zip_line,near,N
|
Zipwire near,aerialway,zip_line,near,N
|
||||||
|
|||||||
|
@@ -70,7 +70,7 @@
|
|||||||
<model>wikitext</model>
|
<model>wikitext</model>
|
||||||
<format>text/x-wiki</format>
|
<format>text/x-wiki</format>
|
||||||
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
|
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
|
||||||
== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
|
== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
|
||||||
</text>
|
</text>
|
||||||
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
|
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
|
||||||
</revision>
|
</revision>
|
||||||
|
|||||||
Reference in New Issue
Block a user