mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-10 03:54:06 +00:00
add unit tests for new sanatizer functions
This commit is contained in:
@@ -3,13 +3,19 @@ Name processor that splits name values with multiple values into their component
|
||||
"""
|
||||
import re
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
def create(func):
|
||||
""" Create a name processing function that splits name values with
|
||||
multiple values into their components. The optional parameter
|
||||
'delimiters' can be used to define the characters that should be used
|
||||
for splitting. The default is ',;'.
|
||||
"""
|
||||
regexp = re.compile('[{}]'.format(func.get('delimiters', ',;')))
|
||||
delimiter_set = set(func.get('delimiters', ',;'))
|
||||
if not delimiter_set:
|
||||
raise UsageError("Set of delimiters in split-name-list sanitizer is empty.")
|
||||
|
||||
regexp = re.compile('\\s*[{}]\\s*'.format(''.join('\\' + d for d in delimiter_set)))
|
||||
|
||||
def _process(obj):
|
||||
if not obj.names:
|
||||
@@ -18,10 +24,11 @@ def create(func):
|
||||
new_names = []
|
||||
for name in obj.names:
|
||||
split_names = regexp.split(name.name)
|
||||
print(split_names)
|
||||
if len(split_names) == 1:
|
||||
new_names.append(name)
|
||||
else:
|
||||
new_names.extend(name.clone(name=n) for n in split_names)
|
||||
new_names.extend(name.clone(name=n) for n in split_names if n)
|
||||
|
||||
obj.names = new_names
|
||||
|
||||
|
||||
@@ -10,13 +10,13 @@ def create(_):
|
||||
def _process(obj):
|
||||
""" Add variants for names that have a bracket extension.
|
||||
"""
|
||||
new_names = []
|
||||
if obj.names:
|
||||
new_names = []
|
||||
for name in (n for n in obj.names if '(' in n.name):
|
||||
new_name = name.name.split('(')[0].strip()
|
||||
if new_name:
|
||||
new_names.append(name.clone(name=new_name))
|
||||
|
||||
obj.names.extend(new_names)
|
||||
obj.names.extend(new_names)
|
||||
|
||||
return _process
|
||||
|
||||
Reference in New Issue
Block a user