mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Merge pull request #2602 from lonvia/filter-bad-housenumbers
Handle mistagged housenumbers like names
This commit is contained in:
@@ -10,6 +10,7 @@ ignored-modules=icu,datrie
|
|||||||
# closing added here because it sometimes triggers a false positive with
|
# closing added here because it sometimes triggers a false positive with
|
||||||
# 'with' statements.
|
# 'with' statements.
|
||||||
ignored-classes=NominatimArgs,closing
|
ignored-classes=NominatimArgs,closing
|
||||||
disable=too-few-public-methods,duplicate-code
|
# 'too-many-ancestors' is triggered already by deriving from UserDict
|
||||||
|
disable=too-few-public-methods,duplicate-code,too-many-ancestors
|
||||||
|
|
||||||
good-names=i,x,y,fd,db
|
good-names=i,x,y,fd,db
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ class ICURuleLoader:
|
|||||||
rules = config.load_sub_configuration('icu_tokenizer.yaml',
|
rules = config.load_sub_configuration('icu_tokenizer.yaml',
|
||||||
config='TOKENIZER_CONFIG')
|
config='TOKENIZER_CONFIG')
|
||||||
|
|
||||||
# Make sure country information is available to analyzers and sanatizers.
|
# Make sure country information is available to analyzers and sanitizers.
|
||||||
nominatim.tools.country_info.setup_country_config(config)
|
nominatim.tools.country_info.setup_country_config(config)
|
||||||
|
|
||||||
self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
|
self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ is handed to the token analysis.
|
|||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
|
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
|
||||||
|
|
||||||
class PlaceName:
|
class PlaceName:
|
||||||
""" A searchable name for a place together with properties.
|
""" A searchable name for a place together with properties.
|
||||||
@@ -117,7 +118,7 @@ class PlaceSanitizer:
|
|||||||
raise UsageError("Sanitizer rule is missing the 'step' attribute.")
|
raise UsageError("Sanitizer rule is missing the 'step' attribute.")
|
||||||
module_name = 'nominatim.tokenizer.sanitizers.' + func['step'].replace('-', '_')
|
module_name = 'nominatim.tokenizer.sanitizers.' + func['step'].replace('-', '_')
|
||||||
handler_module = importlib.import_module(module_name)
|
handler_module = importlib.import_module(module_name)
|
||||||
self.handlers.append(handler_module.create(func))
|
self.handlers.append(handler_module.create(SanitizerConfig(func)))
|
||||||
|
|
||||||
|
|
||||||
def process_names(self, place):
|
def process_names(self, place):
|
||||||
|
|||||||
@@ -19,15 +19,22 @@ Arguments:
|
|||||||
where each string is a regular expression. An address item
|
where each string is a regular expression. An address item
|
||||||
is considered a house number if the 'kind' fully matches any
|
is considered a house number if the 'kind' fully matches any
|
||||||
of the given regular expressions. (default: 'housenumber')
|
of the given regular expressions. (default: 'housenumber')
|
||||||
|
convert-to-name: Define house numbers that should be treated as a name
|
||||||
|
instead of a house number. Either takes a single string
|
||||||
|
or a list of strings, where each string is a regular
|
||||||
|
expression that must match the full house number value.
|
||||||
"""
|
"""
|
||||||
from nominatim.tokenizer.sanitizers.helpers import create_split_regex, create_kind_filter
|
import re
|
||||||
|
|
||||||
class _HousenumberSanitizer:
|
class _HousenumberSanitizer:
|
||||||
|
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
self.filter_kind = create_kind_filter(config, 'housenumber')
|
self.filter_kind = config.get_filter_kind('housenumber')
|
||||||
self.split_regexp = create_split_regex(config)
|
self.split_regexp = config.get_delimiter()
|
||||||
|
|
||||||
|
nameregexps = config.get_string_list('convert-to-name', [])
|
||||||
|
self.is_name_regexp = [re.compile(r) for r in nameregexps]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, obj):
|
def __call__(self, obj):
|
||||||
@@ -37,8 +44,11 @@ class _HousenumberSanitizer:
|
|||||||
new_address = []
|
new_address = []
|
||||||
for item in obj.address:
|
for item in obj.address:
|
||||||
if self.filter_kind(item):
|
if self.filter_kind(item):
|
||||||
new_address.extend(item.clone(kind='housenumber', name=n)
|
if self._treat_as_name(item.name):
|
||||||
for n in self.sanitize(item.name))
|
obj.names.append(item.clone(kind='housenumber'))
|
||||||
|
else:
|
||||||
|
new_address.extend(item.clone(kind='housenumber', name=n)
|
||||||
|
for n in self.sanitize(item.name))
|
||||||
else:
|
else:
|
||||||
# Don't touch other address items.
|
# Don't touch other address items.
|
||||||
new_address.append(item)
|
new_address.append(item)
|
||||||
@@ -62,6 +72,10 @@ class _HousenumberSanitizer:
|
|||||||
yield hnr
|
yield hnr
|
||||||
|
|
||||||
|
|
||||||
|
def _treat_as_name(self, housenumber):
|
||||||
|
return any(r.fullmatch(housenumber) is not None for r in self.is_name_regexp)
|
||||||
|
|
||||||
|
|
||||||
def create(config):
|
def create(config):
|
||||||
""" Create a housenumber processing function.
|
""" Create a housenumber processing function.
|
||||||
"""
|
"""
|
||||||
|
|||||||
82
nominatim/tokenizer/sanitizers/config.py
Normal file
82
nominatim/tokenizer/sanitizers/config.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
#
|
||||||
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
|
#
|
||||||
|
# Copyright (C) 2022 by the Nominatim developer community.
|
||||||
|
# For a full list of authors see the git log.
|
||||||
|
"""
|
||||||
|
Configuration for Sanitizers.
|
||||||
|
"""
|
||||||
|
from collections import UserDict
|
||||||
|
import re
|
||||||
|
|
||||||
|
from nominatim.errors import UsageError
|
||||||
|
|
||||||
|
class SanitizerConfig(UserDict):
|
||||||
|
""" Dictionary with configuration options for a sanitizer.
|
||||||
|
|
||||||
|
In addition to the usualy dictionary function, the class provides
|
||||||
|
accessors to standard sanatizer options that are used by many of the
|
||||||
|
sanitizers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_string_list(self, param, default=tuple()):
|
||||||
|
""" Extract a configuration parameter as a string list.
|
||||||
|
If the parameter value is a simple string, it is returned as a
|
||||||
|
one-item list. If the parameter value does not exist, the given
|
||||||
|
default is returned. If the parameter value is a list, it is checked
|
||||||
|
to contain only strings before being returned.
|
||||||
|
"""
|
||||||
|
values = self.data.get(param, None)
|
||||||
|
|
||||||
|
if values is None:
|
||||||
|
return None if default is None else list(default)
|
||||||
|
|
||||||
|
if isinstance(values, str):
|
||||||
|
return [values] if values else []
|
||||||
|
|
||||||
|
if not isinstance(values, (list, tuple)):
|
||||||
|
raise UsageError(f"Parameter '{param}' must be string or list of strings.")
|
||||||
|
|
||||||
|
if any(not isinstance(value, str) for value in values):
|
||||||
|
raise UsageError(f"Parameter '{param}' must be string or list of strings.")
|
||||||
|
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
def get_delimiter(self, default=',;'):
|
||||||
|
""" Return the 'delimiter' parameter in the configuration as a
|
||||||
|
compiled regular expression that can be used to split the names on the
|
||||||
|
delimiters. The regular expression makes sure that the resulting names
|
||||||
|
are stripped and that repeated delimiters
|
||||||
|
are ignored but it will still create empty fields on occasion. The
|
||||||
|
code needs to filter those.
|
||||||
|
|
||||||
|
The 'default' parameter defines the delimiter set to be used when
|
||||||
|
not explicitly configured.
|
||||||
|
"""
|
||||||
|
delimiter_set = set(self.data.get('delimiters', default))
|
||||||
|
if not delimiter_set:
|
||||||
|
raise UsageError("Empty 'delimiter' parameter not allowed for sanitizer.")
|
||||||
|
|
||||||
|
return re.compile('\\s*[{}]+\\s*'.format(''.join('\\' + d for d in delimiter_set)))
|
||||||
|
|
||||||
|
|
||||||
|
def get_filter_kind(self, *default):
|
||||||
|
""" Return a filter function for the name kind from the 'filter-kind'
|
||||||
|
config parameter. The filter functions takes a name item and returns
|
||||||
|
True when the item passes the filter.
|
||||||
|
|
||||||
|
If the parameter is empty, the filter lets all items pass. If the
|
||||||
|
paramter is a string, it is interpreted as a single regular expression
|
||||||
|
that must match the full kind string. If the parameter is a list then
|
||||||
|
any of the regular expressions in the list must match to pass.
|
||||||
|
"""
|
||||||
|
filters = self.get_string_list('filter-kind', default)
|
||||||
|
|
||||||
|
if not filters:
|
||||||
|
return lambda _: True
|
||||||
|
|
||||||
|
regexes = [re.compile(regex) for regex in filters]
|
||||||
|
|
||||||
|
return lambda name: any(regex.fullmatch(name.kind) for regex in regexes)
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
# SPDX-License-Identifier: GPL-2.0-only
|
|
||||||
#
|
|
||||||
# This file is part of Nominatim. (https://nominatim.org)
|
|
||||||
#
|
|
||||||
# Copyright (C) 2022 by the Nominatim developer community.
|
|
||||||
# For a full list of authors see the git log.
|
|
||||||
"""
|
|
||||||
Helper functions for sanitizers.
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
from nominatim.errors import UsageError
|
|
||||||
|
|
||||||
def create_split_regex(config, default=',;'):
|
|
||||||
""" Converts the 'delimiter' parameter in the configuration into a
|
|
||||||
compiled regular expression that can be used to split the names on the
|
|
||||||
delimiters. The regular expression makes sure that the resulting names
|
|
||||||
are stripped and that repeated delimiters
|
|
||||||
are ignored but it will still create empty fields on occasion. The
|
|
||||||
code needs to filter those.
|
|
||||||
|
|
||||||
The 'default' parameter defines the delimiter set to be used when
|
|
||||||
not explicitly configured.
|
|
||||||
"""
|
|
||||||
delimiter_set = set(config.get('delimiters', default))
|
|
||||||
if not delimiter_set:
|
|
||||||
raise UsageError("Empty 'delimiter' parameter not allowed for sanitizer.")
|
|
||||||
|
|
||||||
return re.compile('\\s*[{}]+\\s*'.format(''.join('\\' + d for d in delimiter_set)))
|
|
||||||
|
|
||||||
|
|
||||||
def create_kind_filter(config, default=None):
|
|
||||||
""" Create a filter function for the name kind from the 'filter-kind'
|
|
||||||
config parameter. The filter functions takes a name item and returns
|
|
||||||
True when the item passes the filter.
|
|
||||||
|
|
||||||
If the parameter is empty, the filter lets all items pass. If the
|
|
||||||
paramter is a string, it is interpreted as a single regular expression
|
|
||||||
that must match the full kind string. If the parameter is a list then
|
|
||||||
any of the regular expressions in the list must match to pass.
|
|
||||||
"""
|
|
||||||
filters = config.get('filter-kind', default)
|
|
||||||
|
|
||||||
if not filters:
|
|
||||||
return lambda _: True
|
|
||||||
|
|
||||||
if isinstance(filters, str):
|
|
||||||
regex = re.compile(filters)
|
|
||||||
return lambda name: regex.fullmatch(name.kind)
|
|
||||||
|
|
||||||
regexes = [re.compile(regex) for regex in filters]
|
|
||||||
return lambda name: any(regex.fullmatch(name.kind) for regex in regexes)
|
|
||||||
@@ -11,13 +11,11 @@ Arguments:
|
|||||||
delimiters: Define the set of characters to be used for
|
delimiters: Define the set of characters to be used for
|
||||||
splitting the list. (default: ',;')
|
splitting the list. (default: ',;')
|
||||||
"""
|
"""
|
||||||
from nominatim.tokenizer.sanitizers.helpers import create_split_regex
|
def create(config):
|
||||||
|
|
||||||
def create(func):
|
|
||||||
""" Create a name processing function that splits name values with
|
""" Create a name processing function that splits name values with
|
||||||
multiple values into their components.
|
multiple values into their components.
|
||||||
"""
|
"""
|
||||||
regexp = create_split_regex(func)
|
regexp = config.get_delimiter()
|
||||||
|
|
||||||
def _process(obj):
|
def _process(obj):
|
||||||
if not obj.names:
|
if not obj.names:
|
||||||
|
|||||||
@@ -31,21 +31,20 @@ Arguments:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
from nominatim.tools import country_info
|
from nominatim.tools import country_info
|
||||||
from nominatim.tokenizer.sanitizers.helpers import create_kind_filter
|
|
||||||
|
|
||||||
class _AnalyzerByLanguage:
|
class _AnalyzerByLanguage:
|
||||||
""" Processor for tagging the language of names in a place.
|
""" Processor for tagging the language of names in a place.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
self.filter_kind = create_kind_filter(config)
|
self.filter_kind = config.get_filter_kind()
|
||||||
self.replace = config.get('mode', 'replace') != 'append'
|
self.replace = config.get('mode', 'replace') != 'append'
|
||||||
self.whitelist = config.get('whitelist')
|
self.whitelist = config.get('whitelist')
|
||||||
|
|
||||||
self.__compute_default_languages(config.get('use-defaults', 'no'))
|
self._compute_default_languages(config.get('use-defaults', 'no'))
|
||||||
|
|
||||||
|
|
||||||
def __compute_default_languages(self, use_defaults):
|
def _compute_default_languages(self, use_defaults):
|
||||||
self.deflangs = {}
|
self.deflangs = {}
|
||||||
|
|
||||||
if use_defaults in ('mono', 'all'):
|
if use_defaults in ('mono', 'all'):
|
||||||
|
|||||||
@@ -25,13 +25,15 @@ transliteration:
|
|||||||
- "[^a-z0-9[:Space:]] >"
|
- "[^a-z0-9[:Space:]] >"
|
||||||
- ":: NFC ()"
|
- ":: NFC ()"
|
||||||
sanitizers:
|
sanitizers:
|
||||||
- step: split-name-list
|
|
||||||
- step: strip-brace-terms
|
|
||||||
- step: clean-housenumbers
|
- step: clean-housenumbers
|
||||||
filter-kind:
|
filter-kind:
|
||||||
- housenumber
|
- housenumber
|
||||||
- conscriptionnumber
|
- conscriptionnumber
|
||||||
- streetnumber
|
- streetnumber
|
||||||
|
convert-to-name:
|
||||||
|
- (\A|.*,)[^\d,]{3,}(,.*|\Z)
|
||||||
|
- step: split-name-list
|
||||||
|
- step: strip-brace-terms
|
||||||
- step: tag-analyzer-by-language
|
- step: tag-analyzer-by-language
|
||||||
filter-kind: [".*name.*"]
|
filter-kind: [".*name.*"]
|
||||||
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
|
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
|
||||||
|
|||||||
@@ -53,3 +53,17 @@ Feature: Searching of house numbers
|
|||||||
| 2;4;12 |
|
| 2;4;12 |
|
||||||
| 2,4,12 |
|
| 2,4,12 |
|
||||||
| 2, 4, 12 |
|
| 2, 4, 12 |
|
||||||
|
|
||||||
|
|
||||||
|
Scenario: A name mapped as a housenumber is found
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | housenr | geometry |
|
||||||
|
| N1 | building | yes | Warring | 9 |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name | geometry |
|
||||||
|
| W10 | highway | path | Chester St | 1,2,3 |
|
||||||
|
When importing
|
||||||
|
When sending search query "Chester St Warring"
|
||||||
|
Then results contain
|
||||||
|
| osm |
|
||||||
|
| N1 |
|
||||||
|
|||||||
@@ -42,3 +42,27 @@ def test_housenumber_lists(sanitize, number):
|
|||||||
def test_filter_kind(sanitize):
|
def test_filter_kind(sanitize):
|
||||||
assert sanitize(housenumber='34', number='4', badnumber='65') == \
|
assert sanitize(housenumber='34', number='4', badnumber='65') == \
|
||||||
[('badnumber', '65'), ('housenumber', '34'), ('housenumber', '4')]
|
[('badnumber', '65'), ('housenumber', '34'), ('housenumber', '4')]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('number', ('6523', 'n/a', '4'))
|
||||||
|
def test_convert_to_name_converted(number):
|
||||||
|
sanitizer_args = {'step': 'clean-housenumbers',
|
||||||
|
'convert-to-name': (r'\d+', 'n/a')}
|
||||||
|
|
||||||
|
place = PlaceInfo({'address': {'housenumber': number}})
|
||||||
|
names, address = PlaceSanitizer([sanitizer_args]).process_names(place)
|
||||||
|
|
||||||
|
assert ('housenumber', number) in set((p.kind, p.name) for p in names)
|
||||||
|
assert 'housenumber' not in set(p.kind for p in address)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('number', ('a54', 'n.a', 'bow'))
|
||||||
|
def test_convert_to_name_unconverted(number):
|
||||||
|
sanitizer_args = {'step': 'clean-housenumbers',
|
||||||
|
'convert-to-name': (r'\d+', 'n/a')}
|
||||||
|
|
||||||
|
place = PlaceInfo({'address': {'housenumber': number}})
|
||||||
|
names, address = PlaceSanitizer([sanitizer_args]).process_names(place)
|
||||||
|
|
||||||
|
assert 'housenumber' not in set(p.kind for p in names)
|
||||||
|
assert ('housenumber', number) in set((p.kind, p.name) for p in address)
|
||||||
|
|||||||
@@ -5,17 +5,51 @@
|
|||||||
# Copyright (C) 2022 by the Nominatim developer community.
|
# Copyright (C) 2022 by the Nominatim developer community.
|
||||||
# For a full list of authors see the git log.
|
# For a full list of authors see the git log.
|
||||||
"""
|
"""
|
||||||
Tests for sanitizer helper functions.
|
Tests for sanitizer configuration helper functions.
|
||||||
"""
|
"""
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
from nominatim.tokenizer.place_sanitizer import PlaceName
|
from nominatim.tokenizer.place_sanitizer import PlaceName
|
||||||
import nominatim.tokenizer.sanitizers.helpers as helpers
|
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
|
||||||
|
|
||||||
|
def test_string_list_default_empty():
|
||||||
|
assert SanitizerConfig().get_string_list('op') == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_default_none():
|
||||||
|
assert SanitizerConfig().get_string_list('op', default=None) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_default_something():
|
||||||
|
assert SanitizerConfig().get_string_list('op', default=['a', 'b']) == ['a', 'b']
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_value_string():
|
||||||
|
assert SanitizerConfig({'op': 't'}).get_string_list('op', default=['a', 'b']) == ['t']
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_value_list():
|
||||||
|
assert SanitizerConfig({'op': ['1', '2']}).get_string_list('op') == ['1', '2']
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_value_empty():
|
||||||
|
assert SanitizerConfig({'op': ''}).get_string_list('op', default=['a', 'b']) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_value_dict():
|
||||||
|
with pytest.raises(UsageError):
|
||||||
|
SanitizerConfig({'op': {'1': 'a'}}).get_string_list('op')
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_list_value_int_list():
|
||||||
|
with pytest.raises(UsageError):
|
||||||
|
SanitizerConfig({'op': [1, 2]}).get_string_list('op')
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('inp', ('fg34', 'f\\f', 'morning [glory]', '56.78'))
|
@pytest.mark.parametrize('inp', ('fg34', 'f\\f', 'morning [glory]', '56.78'))
|
||||||
def test_create_split_regex_no_params_unsplit(inp):
|
def test_create_split_regex_no_params_unsplit(inp):
|
||||||
regex = helpers.create_split_regex({})
|
regex = SanitizerConfig().get_delimiter()
|
||||||
|
|
||||||
assert list(regex.split(inp)) == [inp]
|
assert list(regex.split(inp)) == [inp]
|
||||||
|
|
||||||
@@ -26,14 +60,14 @@ def test_create_split_regex_no_params_unsplit(inp):
|
|||||||
('1, 3 ,5', ['1', '3', '5'])
|
('1, 3 ,5', ['1', '3', '5'])
|
||||||
])
|
])
|
||||||
def test_create_split_regex_no_params_split(inp, outp):
|
def test_create_split_regex_no_params_split(inp, outp):
|
||||||
regex = helpers.create_split_regex({})
|
regex = SanitizerConfig().get_delimiter()
|
||||||
|
|
||||||
assert list(regex.split(inp)) == outp
|
assert list(regex.split(inp)) == outp
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('delimiter', ['.', '\\', '[]', ' ', '/.*+'])
|
@pytest.mark.parametrize('delimiter', ['.', '\\', '[]', ' ', '/.*+'])
|
||||||
def test_create_split_regex_custom(delimiter):
|
def test_create_split_regex_custom(delimiter):
|
||||||
regex = helpers.create_split_regex({'delimiters': delimiter})
|
regex = SanitizerConfig({'delimiters': delimiter}).get_delimiter()
|
||||||
|
|
||||||
assert list(regex.split(f'out{delimiter}house')) == ['out', 'house']
|
assert list(regex.split(f'out{delimiter}house')) == ['out', 'house']
|
||||||
assert list(regex.split('out,house')) == ['out,house']
|
assert list(regex.split('out,house')) == ['out,house']
|
||||||
@@ -41,39 +75,39 @@ def test_create_split_regex_custom(delimiter):
|
|||||||
|
|
||||||
def test_create_split_regex_empty_delimiter():
|
def test_create_split_regex_empty_delimiter():
|
||||||
with pytest.raises(UsageError):
|
with pytest.raises(UsageError):
|
||||||
regex = helpers.create_split_regex({'delimiters': ''})
|
regex = SanitizerConfig({'delimiters': ''}).get_delimiter()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('inp', ('name', 'name:de', 'na\\me', '.*'))
|
@pytest.mark.parametrize('inp', ('name', 'name:de', 'na\\me', '.*'))
|
||||||
def test_create_kind_filter_no_params(inp):
|
def test_create_kind_filter_no_params(inp):
|
||||||
filt = helpers.create_kind_filter({})
|
filt = SanitizerConfig().get_filter_kind()
|
||||||
|
|
||||||
assert filt(PlaceName('something', inp, ''))
|
assert filt(PlaceName('something', inp, ''))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('kind', ('de', 'name:de', 'ende'))
|
@pytest.mark.parametrize('kind', ('de', 'name:de', 'ende'))
|
||||||
def test_create_kind_filter_custom_regex_positive(kind):
|
def test_create_kind_filter_custom_regex_positive(kind):
|
||||||
filt = helpers.create_kind_filter({'filter-kind': '.*de'})
|
filt = SanitizerConfig({'filter-kind': '.*de'}).get_filter_kind()
|
||||||
|
|
||||||
assert filt(PlaceName('something', kind, ''))
|
assert filt(PlaceName('something', kind, ''))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('kind', ('de ', '123', '', 'bedece'))
|
@pytest.mark.parametrize('kind', ('de ', '123', '', 'bedece'))
|
||||||
def test_create_kind_filter_custom_regex_negative(kind):
|
def test_create_kind_filter_custom_regex_negative(kind):
|
||||||
filt = helpers.create_kind_filter({'filter-kind': '.*de'})
|
filt = SanitizerConfig({'filter-kind': '.*de'}).get_filter_kind()
|
||||||
|
|
||||||
assert not filt(PlaceName('something', kind, ''))
|
assert not filt(PlaceName('something', kind, ''))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('kind', ('name', 'fr', 'name:fr', 'frfr', '34'))
|
@pytest.mark.parametrize('kind', ('name', 'fr', 'name:fr', 'frfr', '34'))
|
||||||
def test_create_kind_filter_many_positive(kind):
|
def test_create_kind_filter_many_positive(kind):
|
||||||
filt = helpers.create_kind_filter({'filter-kind': ['.*fr', 'name', r'\d+']})
|
filt = SanitizerConfig({'filter-kind': ['.*fr', 'name', r'\d+']}).get_filter_kind()
|
||||||
|
|
||||||
assert filt(PlaceName('something', kind, ''))
|
assert filt(PlaceName('something', kind, ''))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('kind', ('name:de', 'fridge', 'a34', '.*', '\\'))
|
@pytest.mark.parametrize('kind', ('name:de', 'fridge', 'a34', '.*', '\\'))
|
||||||
def test_create_kind_filter_many_negative(kind):
|
def test_create_kind_filter_many_negative(kind):
|
||||||
filt = helpers.create_kind_filter({'filter-kind': ['.*fr', 'name', r'\d+']})
|
filt = SanitizerConfig({'filter-kind': ['.*fr', 'name', r'\d+']}).get_filter_kind()
|
||||||
|
|
||||||
assert not filt(PlaceName('something', kind, ''))
|
assert not filt(PlaceName('something', kind, ''))
|
||||||
Reference in New Issue
Block a user