add unit tests for new sanatizer functions

This commit is contained in:
Sarah Hoffmann
2021-10-01 09:50:17 +02:00
parent 8171fe4571
commit 732cd27d2e
5 changed files with 191 additions and 4 deletions

View File

@@ -3,13 +3,19 @@ Name processor that splits name values with multiple values into their component
"""
import re
from nominatim.errors import UsageError
def create(func):
""" Create a name processing function that splits name values with
multiple values into their components. The optional parameter
'delimiters' can be used to define the characters that should be used
for splitting. The default is ',;'.
"""
regexp = re.compile('[{}]'.format(func.get('delimiters', ',;')))
delimiter_set = set(func.get('delimiters', ',;'))
if not delimiter_set:
raise UsageError("Set of delimiters in split-name-list sanitizer is empty.")
regexp = re.compile('\\s*[{}]\\s*'.format(''.join('\\' + d for d in delimiter_set)))
def _process(obj):
if not obj.names:
@@ -18,10 +24,11 @@ def create(func):
new_names = []
for name in obj.names:
split_names = regexp.split(name.name)
print(split_names)
if len(split_names) == 1:
new_names.append(name)
else:
new_names.extend(name.clone(name=n) for n in split_names)
new_names.extend(name.clone(name=n) for n in split_names if n)
obj.names = new_names

View File

@@ -10,13 +10,13 @@ def create(_):
def _process(obj):
""" Add variants for names that have a bracket extension.
"""
new_names = []
if obj.names:
new_names = []
for name in (n for n in obj.names if '(' in n.name):
new_name = name.name.split('(')[0].strip()
if new_name:
new_names.append(name.clone(name=new_name))
obj.names.extend(new_names)
obj.names.extend(new_names)
return _process