forked from hans/Nominatim
add unit tests for new sanatizer functions
This commit is contained in:
65
test/python/tokenizer/sanitizers/test_split_name_list.py
Normal file
65
test/python/tokenizer/sanitizers/test_split_name_list.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Tests for the sanitizer that splitts multivalue lists.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
def run_sanitizer_on(**kwargs):
|
||||
place = PlaceInfo({'name': kwargs})
|
||||
name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix) for p in name])
|
||||
|
||||
|
||||
def sanitize_with_delimiter(delimiter, name):
|
||||
place = PlaceInfo({'name': {'name': name}})
|
||||
san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
|
||||
name, _ = san.process_names(place)
|
||||
|
||||
return sorted([p.name for p in name])
|
||||
|
||||
|
||||
def test_simple():
|
||||
assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
|
||||
assert run_sanitizer_on(name='') == [('', 'name', None)]
|
||||
|
||||
|
||||
def test_splits():
|
||||
assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
|
||||
('B', 'name', None),
|
||||
('C', 'name', None)]
|
||||
assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
|
||||
('boat', 'short_name', None)]
|
||||
|
||||
|
||||
def test_empty_fields():
|
||||
assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
|
||||
('B', 'name', None)]
|
||||
assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
|
||||
('B', 'name', None)]
|
||||
assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
|
||||
assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
|
||||
|
||||
|
||||
def test_custom_delimiters():
|
||||
assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
|
||||
assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
|
||||
assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
|
||||
assert sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun']
|
||||
|
||||
|
||||
def test_empty_delimiter_set():
|
||||
with pytest.raises(UsageError):
|
||||
sanitize_with_delimiter('', 'abc')
|
||||
|
||||
|
||||
def test_no_name_list():
|
||||
place = PlaceInfo({'address': {'housenumber': '3'}})
|
||||
name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
|
||||
|
||||
assert not name
|
||||
assert len(address) == 1
|
||||
44
test/python/tokenizer/sanitizers/test_strip_brace_terms.py
Normal file
44
test/python/tokenizer/sanitizers/test_strip_brace_terms.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Tests for the sanitizer that handles braced suffixes.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
def run_sanitizer_on(**kwargs):
|
||||
place = PlaceInfo({'name': kwargs})
|
||||
name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix) for p in name])
|
||||
|
||||
|
||||
def test_no_braces():
|
||||
assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
|
||||
('foo', 'name', None)]
|
||||
|
||||
|
||||
def test_simple_braces():
|
||||
assert run_sanitizer_on(name='Halle (Saale)', ref='3')\
|
||||
== [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
|
||||
assert run_sanitizer_on(name='ack ( bar')\
|
||||
== [('ack', 'name', None), ('ack ( bar', 'name', None)]
|
||||
|
||||
|
||||
def test_only_braces():
|
||||
assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
|
||||
|
||||
|
||||
def test_double_braces():
|
||||
assert run_sanitizer_on(name='a((b))') == [('a', 'name', None),
|
||||
('a((b))', 'name', None)]
|
||||
assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
|
||||
('a (b) (c)', 'name', None)]
|
||||
|
||||
|
||||
def test_no_names():
|
||||
place = PlaceInfo({'address': {'housenumber': '3'}})
|
||||
name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
|
||||
|
||||
assert not name
|
||||
assert len(address) == 1
|
||||
71
test/python/tokenizer/test_place_sanitizer.py
Normal file
71
test/python/tokenizer/test_place_sanitizer.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Tests for execution of the sanitztion step.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
import nominatim.tokenizer.place_sanitizer as sanitizer
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
|
||||
def test_placeinfo_clone_new_name():
|
||||
place = sanitizer.PlaceName('foo', 'ki', 'su')
|
||||
|
||||
newplace = place.clone(name='bar')
|
||||
|
||||
assert place.name == 'foo'
|
||||
assert newplace.name == 'bar'
|
||||
assert newplace.kind == 'ki'
|
||||
assert newplace.suffix == 'su'
|
||||
|
||||
|
||||
def test_placeinfo_clone_merge_attr():
|
||||
place = sanitizer.PlaceName('foo', 'ki', 'su')
|
||||
place.set_attr('a1', 'v1')
|
||||
place.set_attr('a2', 'v2')
|
||||
|
||||
newplace = place.clone(attr={'a2': 'new', 'b2': 'foo'})
|
||||
|
||||
assert place.get_attr('a2') == 'v2'
|
||||
assert place.get_attr('b2') is None
|
||||
assert newplace.get_attr('a1') == 'v1'
|
||||
assert newplace.get_attr('a2') == 'new'
|
||||
assert newplace.get_attr('b2') == 'foo'
|
||||
|
||||
|
||||
def test_placeinfo_has_attr():
|
||||
place = sanitizer.PlaceName('foo', 'ki', 'su')
|
||||
place.set_attr('a1', 'v1')
|
||||
|
||||
assert place.has_attr('a1')
|
||||
assert not place.has_attr('whatever')
|
||||
|
||||
|
||||
def test_sanitizer_default():
|
||||
san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}])
|
||||
|
||||
name, address = san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'},
|
||||
'address': {'street': 'Bald'}}))
|
||||
|
||||
assert len(name) == 3
|
||||
assert all(isinstance(n, sanitizer.PlaceName) for n in name)
|
||||
assert all(n.kind == 'name' for n in name)
|
||||
assert all(n.suffix == 'de:de' for n in name)
|
||||
|
||||
assert len(address) == 1
|
||||
assert all(isinstance(n, sanitizer.PlaceName) for n in address)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('rules', [None, []])
|
||||
def test_sanitizer_empty_list(rules):
|
||||
san = sanitizer.PlaceSanitizer(rules)
|
||||
|
||||
name, address = san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}}))
|
||||
|
||||
assert len(name) == 1
|
||||
assert all(isinstance(n, sanitizer.PlaceName) for n in name)
|
||||
|
||||
|
||||
def test_sanitizer_missing_step_definition():
|
||||
with pytest.raises(UsageError):
|
||||
san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}])
|
||||
Reference in New Issue
Block a user