add unit tests for new sanatizer functions

This commit is contained in:
Sarah Hoffmann
2021-10-01 09:50:17 +02:00
parent 8171fe4571
commit 732cd27d2e
5 changed files with 191 additions and 4 deletions

View File

@@ -0,0 +1,65 @@
"""
Tests for the sanitizer that splitts multivalue lists.
"""
import pytest
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim.indexer.place_info import PlaceInfo
from nominatim.errors import UsageError
def run_sanitizer_on(**kwargs):
place = PlaceInfo({'name': kwargs})
name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
return sorted([(p.name, p.kind, p.suffix) for p in name])
def sanitize_with_delimiter(delimiter, name):
place = PlaceInfo({'name': {'name': name}})
san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
name, _ = san.process_names(place)
return sorted([p.name for p in name])
def test_simple():
assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
assert run_sanitizer_on(name='') == [('', 'name', None)]
def test_splits():
assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
('B', 'name', None),
('C', 'name', None)]
assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
('boat', 'short_name', None)]
def test_empty_fields():
assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
('B', 'name', None)]
assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
('B', 'name', None)]
assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
def test_custom_delimiters():
assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
assert sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun']
def test_empty_delimiter_set():
with pytest.raises(UsageError):
sanitize_with_delimiter('', 'abc')
def test_no_name_list():
place = PlaceInfo({'address': {'housenumber': '3'}})
name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
assert not name
assert len(address) == 1

View File

@@ -0,0 +1,44 @@
"""
Tests for the sanitizer that handles braced suffixes.
"""
import pytest
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim.indexer.place_info import PlaceInfo
def run_sanitizer_on(**kwargs):
place = PlaceInfo({'name': kwargs})
name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
return sorted([(p.name, p.kind, p.suffix) for p in name])
def test_no_braces():
assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
('foo', 'name', None)]
def test_simple_braces():
assert run_sanitizer_on(name='Halle (Saale)', ref='3')\
== [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
assert run_sanitizer_on(name='ack ( bar')\
== [('ack', 'name', None), ('ack ( bar', 'name', None)]
def test_only_braces():
assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
def test_double_braces():
assert run_sanitizer_on(name='a((b))') == [('a', 'name', None),
('a((b))', 'name', None)]
assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
('a (b) (c)', 'name', None)]
def test_no_names():
place = PlaceInfo({'address': {'housenumber': '3'}})
name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
assert not name
assert len(address) == 1