enable flake for Python tests

This commit is contained in:
Sarah Hoffmann
2025-03-09 15:33:24 +01:00
parent 5a245e33e0
commit 4cc788f69e
93 changed files with 949 additions and 1191 deletions

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the sanitizer that normalizes housenumbers.
@@ -12,11 +12,12 @@ import pytest
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim_db.data.place_info import PlaceInfo
@pytest.fixture
def sanitize(request, def_config):
sanitizer_args = {'step': 'clean-housenumbers'}
for mark in request.node.iter_markers(name="sanitizer_params"):
sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
sanitizer_args.update({k.replace('_', '-'): v for k, v in mark.kwargs.items()})
def _run(**kwargs):
place = PlaceInfo({'address': kwargs})

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the sanitizer that normalizes postcodes.
@@ -13,12 +13,13 @@ from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim_db.data.place_info import PlaceInfo
from nominatim_db.data import country_info
@pytest.fixture
def sanitize(def_config, request):
country_info.setup_country_config(def_config)
sanitizer_args = {'step': 'clean-postcodes'}
for mark in request.node.iter_markers(name="sanitizer_params"):
sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
sanitizer_args.update({k.replace('_', '-'): v for k, v in mark.kwargs.items()})
def _run(country=None, **kwargs):
pi = {'address': kwargs}

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for sanitizer that clean up TIGER tags.
@@ -12,16 +12,17 @@ import pytest
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim_db.data.place_info import PlaceInfo
class TestCleanTigerTags:
@pytest.fixture(autouse=True)
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self, addr):
place = PlaceInfo({'address': addr})
_, outaddr = PlaceSanitizer([{'step': 'clean-tiger-tags'}], self.config).process_names(place)
_, outaddr = PlaceSanitizer([{'step': 'clean-tiger-tags'}],
self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix) for p in outaddr])
@@ -31,13 +32,11 @@ class TestCleanTigerTags:
assert self.run_sanitizer_on({'tiger:county': inname})\
== [(outname, 'county', 'tiger')]
@pytest.mark.parametrize('name', ('Hamilton', 'Big, Road', ''))
def test_badly_formatted(self, name):
assert self.run_sanitizer_on({'tiger:county': name})\
== [(name, 'county', 'tiger')]
def test_unmatched(self):
assert self.run_sanitizer_on({'tiger:country': 'US'})\
== [('US', 'tiger', 'country')]

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the sanitizer that normalizes housenumbers.
@@ -22,18 +22,15 @@ class TestWithDefault:
def run_sanitizer_on(self, type, **kwargs):
place = PlaceInfo({type: {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {'step': 'delete-tags'}
name, address = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
return {
'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]),
'address': sorted([(p.name, p.kind, p.suffix or '') for p in address])
}
self.config).process_names(place)
return {'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]),
'address': sorted([(p.name, p.kind, p.suffix or '') for p in address])}
def test_on_name(self):
res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz')
@@ -44,7 +41,7 @@ class TestWithDefault:
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
assert res.get('address') == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),
('foo', 'name', '')]
('foo', 'name', '')]
class TestTypeField:
@@ -56,15 +53,13 @@ class TestTypeField:
def run_sanitizer_on(self, type, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {
'step': 'delete-tags',
'type': type,
}
sanitizer_args = {'step': 'delete-tags',
'type': type}
name, _ = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
@@ -77,7 +72,8 @@ class TestTypeField:
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
assert res == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),
('foo', 'name', '')]
('foo', 'name', '')]
class TestFilterKind:
@@ -88,15 +84,13 @@ class TestFilterKind:
def run_sanitizer_on(self, filt, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {
'step': 'delete-tags',
'filter-kind': filt,
}
sanitizer_args = {'step': 'delete-tags',
'filter-kind': filt}
name, _ = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
@@ -106,7 +100,6 @@ class TestFilterKind:
assert res == [('bar', 'ref', 'abc'), ('foo', 'ref', '')]
def test_single_pattern(self):
res = self.run_sanitizer_on(['.*name'],
name_fr='foo', ref_fr='foo', namexx_fr='bar',
@@ -114,7 +107,6 @@ class TestFilterKind:
assert res == [('bar', 'namexx', 'fr'), ('foo', 'ref', 'fr')]
def test_multiple_patterns(self):
res = self.run_sanitizer_on(['.*name', 'ref'],
name_fr='foo', ref_fr='foo', oldref_fr='foo',
@@ -132,19 +124,16 @@ class TestRankAddress:
def run_sanitizer_on(self, rank_addr, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {
'step': 'delete-tags',
'rank_address': rank_addr
}
sanitizer_args = {'step': 'delete-tags',
'rank_address': rank_addr}
name, _ = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
def test_single_rank(self):
res = self.run_sanitizer_on('30', name='foo', ref='bar')
@@ -185,33 +174,29 @@ class TestSuffix:
def run_sanitizer_on(self, suffix, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {
'step': 'delete-tags',
'suffix': suffix,
}
sanitizer_args = {'step': 'delete-tags',
'suffix': suffix}
name, _ = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
def test_single_suffix(self):
res = self.run_sanitizer_on('abc', name='foo', name_abc='foo',
name_pqr='bar', ref='bar', ref_abc='baz')
name_pqr='bar', ref='bar', ref_abc='baz')
assert res == [('bar', 'name', 'pqr'), ('bar', 'ref', ''), ('foo', 'name', '')]
def test_multiple_suffix(self):
res = self.run_sanitizer_on(['abc.*', 'pqr'], name='foo', name_abcxx='foo',
ref_pqr='bar', name_pqrxx='baz')
ref_pqr='bar', name_pqrxx='baz')
assert res == [('baz', 'name', 'pqrxx'), ('foo', 'name', '')]
class TestCountryCodes:
@pytest.fixture(autouse=True)
@@ -221,19 +206,16 @@ class TestCountryCodes:
def run_sanitizer_on(self, country_code, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {
'step': 'delete-tags',
'country_code': country_code,
}
sanitizer_args = {'step': 'delete-tags',
'country_code': country_code}
name, _ = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
self.config).process_names(place)
return sorted([(p.name, p.kind) for p in name])
def test_single_country_code_pass(self):
res = self.run_sanitizer_on('de', name='foo', ref='bar')
@@ -259,6 +241,7 @@ class TestCountryCodes:
assert res == [('bar', 'ref'), ('foo', 'name')]
class TestAllParameters:
@pytest.fixture(autouse=True)
@@ -268,7 +251,7 @@ class TestAllParameters:
def run_sanitizer_on(self, country_code, rank_addr, suffix, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de', 'rank_address': 30})
'country_code': 'de', 'rank_address': 30})
sanitizer_args = {
'step': 'delete-tags',
@@ -281,11 +264,10 @@ class TestAllParameters:
}
name, _ = PlaceSanitizer([sanitizer_args],
self.config).process_names(place)
self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
def test_string_arguments_pass(self):
res = self.run_sanitizer_on('de', '25-30', r'[\s\S]*',
name='foo', ref='foo', name_abc='bar', ref_abc='baz')

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for sanitizer configuration helper functions.
@@ -12,6 +12,7 @@ import pytest
from nominatim_db.errors import UsageError
from nominatim_db.tokenizer.sanitizers.config import SanitizerConfig
def test_string_list_default_empty():
assert SanitizerConfig().get_string_list('op') == []
@@ -53,7 +54,7 @@ def test_create_split_regex_no_params_unsplit(inp):
('ying;;yang', ['ying', 'yang']),
(';a; ;c;d,', ['', 'a', '', 'c', 'd', '']),
('1, 3 ,5', ['1', '3', '5'])
])
])
def test_create_split_regex_no_params_split(inp, outp):
regex = SanitizerConfig().get_delimiter()
@@ -70,7 +71,7 @@ def test_create_split_regex_custom(delimiter):
def test_create_split_regex_empty_delimiter():
with pytest.raises(UsageError):
regex = SanitizerConfig({'delimiters': ''}).get_delimiter()
SanitizerConfig({'delimiters': ''}).get_delimiter()
@pytest.mark.parametrize('inp', ('name', 'name:de', 'na\\me', '.*', ''))
@@ -96,12 +97,12 @@ def test_create_name_filter_no_param_default_fail_all(inp):
def test_create_name_filter_no_param_default_invalid_string():
with pytest.raises(ValueError):
filt = SanitizerConfig().get_filter('name', 'abc')
SanitizerConfig().get_filter('name', 'abc')
def test_create_name_filter_no_param_default_empty_list():
with pytest.raises(ValueError):
filt = SanitizerConfig().get_filter('name', [])
SanitizerConfig().get_filter('name', [])
@pytest.mark.parametrize('kind', ('de', 'name:de', 'ende'))
@@ -121,7 +122,7 @@ def test_create_kind_filter_default_negetive(kind):
@pytest.mark.parametrize('kind', ('lang', 'lang:de', 'langxx'))
def test_create_kind_filter_custom_regex_positive(kind):
filt = SanitizerConfig({'filter-kind': 'lang.*'}
).get_filter('filter-kind', ['.*fr'])
).get_filter('filter-kind', ['.*fr'])
assert filt(kind)
@@ -136,7 +137,7 @@ def test_create_kind_filter_custom_regex_negative(kind):
@pytest.mark.parametrize('kind', ('name', 'fr', 'name:fr', 'frfr', '34'))
def test_create_kind_filter_many_positive(kind):
filt = SanitizerConfig({'filter-kind': ['.*fr', 'name', r'\d+']}
).get_filter('filter-kind')
).get_filter('filter-kind')
assert filt(kind)
@@ -144,6 +145,6 @@ def test_create_kind_filter_many_positive(kind):
@pytest.mark.parametrize('kind', ('name:de', 'fridge', 'a34', '.*', '\\'))
def test_create_kind_filter_many_negative(kind):
filt = SanitizerConfig({'filter-kind': ['.*fr', 'name', r'\d+']}
).get_filter('filter-kind')
).get_filter('filter-kind')
assert not filt(kind)

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the sanitizer that splits multivalue lists.
@@ -14,20 +14,19 @@ from nominatim_db.data.place_info import PlaceInfo
from nominatim_db.errors import UsageError
class TestSplitName:
@pytest.fixture(autouse=True)
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self, **kwargs):
place = PlaceInfo({'name': kwargs})
name, _ = PlaceSanitizer([{'step': 'split-name-list'}], self.config).process_names(place)
return sorted([(p.name, p.kind, p.suffix) for p in name])
def sanitize_with_delimiter(self, delimiter, name):
place = PlaceInfo({'name': {'name': name}})
san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}],
@@ -36,12 +35,10 @@ class TestSplitName:
return sorted([p.name for p in name])
def test_simple(self):
assert self.run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
assert self.run_sanitizer_on(name='') == [('', 'name', None)]
def test_splits(self):
assert self.run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
('B', 'name', None),
@@ -49,7 +46,6 @@ class TestSplitName:
assert self.run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
('boat', 'short_name', None)]
def test_empty_fields(self):
assert self.run_sanitizer_on(name='A;;B') == [('A', 'name', None),
('B', 'name', None)]
@@ -58,14 +54,12 @@ class TestSplitName:
assert self.run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
assert self.run_sanitizer_on(name='B,') == [('B', 'name', None)]
def test_custom_delimiters(self):
assert self.sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
assert self.sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
assert self.sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
assert self.sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun']
def test_empty_delimiter_set(self):
with pytest.raises(UsageError):
self.sanitize_with_delimiter('', 'abc')

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the sanitizer that handles braced suffixes.
@@ -12,6 +12,7 @@ import pytest
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim_db.data.place_info import PlaceInfo
class TestStripBrace:
@pytest.fixture(autouse=True)
@@ -24,23 +25,19 @@ class TestStripBrace:
return sorted([(p.name, p.kind, p.suffix) for p in name])
def test_no_braces(self):
assert self.run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
('foo', 'name', None)]
def test_simple_braces(self):
assert self.run_sanitizer_on(name='Halle (Saale)', ref='3')\
== [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
assert self.run_sanitizer_on(name='ack ( bar')\
== [('ack', 'name', None), ('ack ( bar', 'name', None)]
assert self.run_sanitizer_on(name='Halle (Saale)', ref='3') \
== [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
assert self.run_sanitizer_on(name='ack ( bar') \
== [('ack', 'name', None), ('ack ( bar', 'name', None)]
def test_only_braces(self):
assert self.run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
def test_double_braces(self):
assert self.run_sanitizer_on(name='a((b))') == [('a', 'name', None),
('a((b))', 'name', None)]

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the sanitizer that enables language-dependent analyzers.
@@ -13,13 +13,13 @@ from nominatim_db.data.place_info import PlaceInfo
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim_db.data.country_info import setup_country_config
class TestWithDefaults:
@pytest.fixture(autouse=True)
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self, country, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': country})
@@ -28,19 +28,16 @@ class TestWithDefaults:
return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
def test_no_names(self):
assert self.run_sanitizer_on('de') == []
def test_simple(self):
res = self.run_sanitizer_on('fr', name='Foo',name_de='Zoo', ref_abc='M')
res = self.run_sanitizer_on('fr', name='Foo', name_de='Zoo', ref_abc='M')
assert res == [('Foo', 'name', None, {}),
('M', 'ref', 'abc', {'analyzer': 'abc'}),
('Zoo', 'name', 'de', {'analyzer': 'de'})]
@pytest.mark.parametrize('suffix', ['DE', 'asbc'])
def test_illegal_suffix(self, suffix):
assert self.run_sanitizer_on('fr', **{'name_' + suffix: 'Foo'}) \
@@ -53,7 +50,6 @@ class TestFilterKind:
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self, filt, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': 'de'})
@@ -63,17 +59,15 @@ class TestFilterKind:
return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
def test_single_exact_name(self):
res = self.run_sanitizer_on(['name'], name_fr='A', ref_fr='12',
shortname_fr='C', name='D')
shortname_fr='C', name='D')
assert res == [('12', 'ref', 'fr', {}),
('A', 'name', 'fr', {'analyzer': 'fr'}),
('C', 'shortname', 'fr', {}),
('D', 'name', None, {})]
def test_single_pattern(self):
res = self.run_sanitizer_on(['.*name'],
name_fr='A', ref_fr='12', namexx_fr='B',
@@ -85,7 +79,6 @@ class TestFilterKind:
('C', 'shortname', 'fr', {'analyzer': 'fr'}),
('D', 'name', None, {})]
def test_multiple_patterns(self):
res = self.run_sanitizer_on(['.*name', 'ref'],
name_fr='A', ref_fr='12', oldref_fr='X',
@@ -106,7 +99,6 @@ class TestDefaultCountry:
setup_country_config(def_config)
self.config = def_config
def run_sanitizer_append(self, mode, country, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': country})
@@ -122,7 +114,6 @@ class TestDefaultCountry:
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
def run_sanitizer_replace(self, mode, country, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': country})
@@ -138,7 +129,6 @@ class TestDefaultCountry:
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
def test_missing_country(self):
place = PlaceInfo({'name': {'name': 'something'}})
name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
@@ -151,59 +141,50 @@ class TestDefaultCountry:
assert name[0].suffix is None
assert 'analyzer' not in name[0].attr
def test_mono_unknown_country(self):
expect = [('XX', '')]
assert self.run_sanitizer_replace('mono', 'xx', name='XX') == expect
assert self.run_sanitizer_append('mono', 'xx', name='XX') == expect
def test_mono_monoling_replace(self):
res = self.run_sanitizer_replace('mono', 'de', name='Foo')
assert res == [('Foo', 'de')]
def test_mono_monoling_append(self):
res = self.run_sanitizer_append('mono', 'de', name='Foo')
assert res == [('Foo', ''), ('Foo', 'de')]
def test_mono_multiling(self):
expect = [('XX', '')]
assert self.run_sanitizer_replace('mono', 'ch', name='XX') == expect
assert self.run_sanitizer_append('mono', 'ch', name='XX') == expect
def test_all_unknown_country(self):
expect = [('XX', '')]
assert self.run_sanitizer_replace('all', 'xx', name='XX') == expect
assert self.run_sanitizer_append('all', 'xx', name='XX') == expect
def test_all_monoling_replace(self):
res = self.run_sanitizer_replace('all', 'de', name='Foo')
assert res == [('Foo', 'de')]
def test_all_monoling_append(self):
res = self.run_sanitizer_append('all', 'de', name='Foo')
assert res == [('Foo', ''), ('Foo', 'de')]
def test_all_multiling_append(self):
res = self.run_sanitizer_append('all', 'ch', name='XX')
assert res == [('XX', ''),
('XX', 'de'), ('XX', 'fr'), ('XX', 'it'), ('XX', 'rm')]
def test_all_multiling_replace(self):
res = self.run_sanitizer_replace('all', 'ch', name='XX')
@@ -216,7 +197,6 @@ class TestCountryWithWhitelist:
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self, mode, country, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
'country_code': country})
@@ -233,21 +213,17 @@ class TestCountryWithWhitelist:
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
def test_mono_monoling(self):
assert self.run_sanitizer_on('mono', 'de', name='Foo') == [('Foo', 'de')]
assert self.run_sanitizer_on('mono', 'pt', name='Foo') == [('Foo', '')]
def test_mono_multiling(self):
assert self.run_sanitizer_on('mono', 'ca', name='Foo') == [('Foo', '')]
def test_all_monoling(self):
assert self.run_sanitizer_on('all', 'de', name='Foo') == [('Foo', 'de')]
assert self.run_sanitizer_on('all', 'pt', name='Foo') == [('Foo', '')]
def test_all_multiling(self):
assert self.run_sanitizer_on('all', 'ca', name='Foo') == [('Foo', 'fr')]
assert self.run_sanitizer_on('all', 'ch', name='Foo') \
@@ -260,7 +236,6 @@ class TestWhiteList:
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self, whitelist, **kwargs):
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}})
name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
@@ -275,14 +250,11 @@ class TestWhiteList:
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
def test_in_whitelist(self):
assert self.run_sanitizer_on(['de', 'xx'], ref_xx='123') == [('123', 'xx')]
def test_not_in_whitelist(self):
assert self.run_sanitizer_on(['de', 'xx'], ref_yy='123') == [('123', '')]
def test_empty_whitelist(self):
assert self.run_sanitizer_on([], ref_yy='123') == [('123', '')]

View File

@@ -2,86 +2,86 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
from typing import Mapping, Optional, List
import pytest
from nominatim_db.data.place_info import PlaceInfo
from nominatim_db.data.place_name import PlaceName
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
class TestTagJapanese:
@pytest.fixture(autouse=True)
def setup_country(self, def_config):
self.config = def_config
def run_sanitizer_on(self,type, **kwargs):
def run_sanitizer_on(self, type, **kwargs):
place = PlaceInfo({
'address': kwargs,
'country_code': 'jp'
})
sanitizer_args = {'step': 'tag-japanese'}
_, address = PlaceSanitizer([sanitizer_args], self.config).process_names(place)
tmp_list = [(p.name,p.kind) for p in address]
tmp_list = [(p.name, p.kind) for p in address]
return sorted(tmp_list)
def test_on_address(self):
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
assert res == [('bar','ref'),('baz','ref_abc'),('foo','name')]
assert res == [('bar', 'ref'), ('baz', 'ref_abc'), ('foo', 'name')]
def test_housenumber(self):
res = self.run_sanitizer_on('address', housenumber='2')
assert res == [('2','housenumber')]
assert res == [('2', 'housenumber')]
def test_blocknumber(self):
res = self.run_sanitizer_on('address', block_number='6')
assert res == [('6','housenumber')]
assert res == [('6', 'housenumber')]
def test_neighbourhood(self):
res = self.run_sanitizer_on('address', neighbourhood='8')
assert res == [('8','place')]
assert res == [('8', 'place')]
def test_quarter(self):
res = self.run_sanitizer_on('address', quarter='kase')
assert res==[('kase','place')]
assert res == [('kase', 'place')]
def test_housenumber_blocknumber(self):
res = self.run_sanitizer_on('address', housenumber='2', block_number='6')
assert res == [('6-2','housenumber')]
assert res == [('6-2', 'housenumber')]
def test_quarter_neighbourhood(self):
res = self.run_sanitizer_on('address', quarter='kase', neighbourhood='8')
assert res == [('kase8','place')]
assert res == [('kase8', 'place')]
def test_blocknumber_housenumber_quarter(self):
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase')
assert res == [('6-2','housenumber'),('kase','place')]
assert res == [('6-2', 'housenumber'), ('kase', 'place')]
def test_blocknumber_housenumber_quarter_neighbourhood(self):
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', neighbourhood='8')
assert res == [('6-2','housenumber'),('8','place')]
assert res == [('6-2', 'housenumber'), ('8', 'place')]
def test_blocknumber_quarter_neighbourhood(self):
res = self.run_sanitizer_on('address',block_number='6', quarter='kase', neighbourhood='8')
assert res == [('6','housenumber'),('kase8','place')]
res = self.run_sanitizer_on('address', block_number='6', quarter='kase', neighbourhood='8')
assert res == [('6', 'housenumber'), ('kase8', 'place')]
def test_blocknumber_quarter(self):
res = self.run_sanitizer_on('address',block_number='6', quarter='kase')
assert res == [('6','housenumber'),('kase','place')]
res = self.run_sanitizer_on('address', block_number='6', quarter='kase')
assert res == [('6', 'housenumber'), ('kase', 'place')]
def test_blocknumber_neighbourhood(self):
res = self.run_sanitizer_on('address',block_number='6', neighbourhood='8')
assert res == [('6','housenumber'),('8','place')]
res = self.run_sanitizer_on('address', block_number='6', neighbourhood='8')
assert res == [('6', 'housenumber'), ('8', 'place')]
def test_housenumber_quarter_neighbourhood(self):
res = self.run_sanitizer_on('address',housenumber='2', quarter='kase', neighbourhood='8')
assert res == [('2','housenumber'),('kase8','place')]
res = self.run_sanitizer_on('address', housenumber='2', quarter='kase', neighbourhood='8')
assert res == [('2', 'housenumber'), ('kase8', 'place')]
def test_housenumber_quarter(self):
res = self.run_sanitizer_on('address',housenumber='2', quarter='kase')
assert res == [('2','housenumber'),('kase','place')]
res = self.run_sanitizer_on('address', housenumber='2', quarter='kase')
assert res == [('2', 'housenumber'), ('kase', 'place')]
def test_housenumber_blocknumber_neighbourhood_quarter(self):
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8')
assert res == [('6-2','housenumber'),('kase8','place')]
res = self.run_sanitizer_on('address', block_number='6', housenumber='2',
quarter='kase', neighbourhood='8')
assert res == [('6-2', 'housenumber'), ('kase8', 'place')]