mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 05:18:00 +00:00
enable flake for Python tests
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that normalizes housenumbers.
|
||||
@@ -12,11 +12,12 @@ import pytest
|
||||
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim_db.data.place_info import PlaceInfo
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sanitize(request, def_config):
|
||||
sanitizer_args = {'step': 'clean-housenumbers'}
|
||||
for mark in request.node.iter_markers(name="sanitizer_params"):
|
||||
sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
|
||||
sanitizer_args.update({k.replace('_', '-'): v for k, v in mark.kwargs.items()})
|
||||
|
||||
def _run(**kwargs):
|
||||
place = PlaceInfo({'address': kwargs})
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that normalizes postcodes.
|
||||
@@ -13,12 +13,13 @@ from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim_db.data.place_info import PlaceInfo
|
||||
from nominatim_db.data import country_info
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sanitize(def_config, request):
|
||||
country_info.setup_country_config(def_config)
|
||||
sanitizer_args = {'step': 'clean-postcodes'}
|
||||
for mark in request.node.iter_markers(name="sanitizer_params"):
|
||||
sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
|
||||
sanitizer_args.update({k.replace('_', '-'): v for k, v in mark.kwargs.items()})
|
||||
|
||||
def _run(country=None, **kwargs):
|
||||
pi = {'address': kwargs}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for sanitizer that clean up TIGER tags.
|
||||
@@ -12,16 +12,17 @@ import pytest
|
||||
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim_db.data.place_info import PlaceInfo
|
||||
|
||||
|
||||
class TestCleanTigerTags:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_on(self, addr):
|
||||
place = PlaceInfo({'address': addr})
|
||||
_, outaddr = PlaceSanitizer([{'step': 'clean-tiger-tags'}], self.config).process_names(place)
|
||||
_, outaddr = PlaceSanitizer([{'step': 'clean-tiger-tags'}],
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix) for p in outaddr])
|
||||
|
||||
@@ -31,13 +32,11 @@ class TestCleanTigerTags:
|
||||
assert self.run_sanitizer_on({'tiger:county': inname})\
|
||||
== [(outname, 'county', 'tiger')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('name', ('Hamilton', 'Big, Road', ''))
|
||||
def test_badly_formatted(self, name):
|
||||
assert self.run_sanitizer_on({'tiger:county': name})\
|
||||
== [(name, 'county', 'tiger')]
|
||||
|
||||
|
||||
def test_unmatched(self):
|
||||
assert self.run_sanitizer_on({'tiger:country': 'US'})\
|
||||
== [('US', 'tiger', 'country')]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that normalizes housenumbers.
|
||||
@@ -22,18 +22,15 @@ class TestWithDefault:
|
||||
def run_sanitizer_on(self, type, **kwargs):
|
||||
|
||||
place = PlaceInfo({type: {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {'step': 'delete-tags'}
|
||||
|
||||
name, address = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
|
||||
return {
|
||||
'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]),
|
||||
'address': sorted([(p.name, p.kind, p.suffix or '') for p in address])
|
||||
}
|
||||
self.config).process_names(place)
|
||||
|
||||
return {'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]),
|
||||
'address': sorted([(p.name, p.kind, p.suffix or '') for p in address])}
|
||||
|
||||
def test_on_name(self):
|
||||
res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz')
|
||||
@@ -44,7 +41,7 @@ class TestWithDefault:
|
||||
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
|
||||
|
||||
assert res.get('address') == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),
|
||||
('foo', 'name', '')]
|
||||
('foo', 'name', '')]
|
||||
|
||||
|
||||
class TestTypeField:
|
||||
@@ -56,15 +53,13 @@ class TestTypeField:
|
||||
def run_sanitizer_on(self, type, **kwargs):
|
||||
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {
|
||||
'step': 'delete-tags',
|
||||
'type': type,
|
||||
}
|
||||
sanitizer_args = {'step': 'delete-tags',
|
||||
'type': type}
|
||||
|
||||
name, _ = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
|
||||
|
||||
@@ -77,7 +72,8 @@ class TestTypeField:
|
||||
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
|
||||
|
||||
assert res == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),
|
||||
('foo', 'name', '')]
|
||||
('foo', 'name', '')]
|
||||
|
||||
|
||||
class TestFilterKind:
|
||||
|
||||
@@ -88,15 +84,13 @@ class TestFilterKind:
|
||||
def run_sanitizer_on(self, filt, **kwargs):
|
||||
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {
|
||||
'step': 'delete-tags',
|
||||
'filter-kind': filt,
|
||||
}
|
||||
sanitizer_args = {'step': 'delete-tags',
|
||||
'filter-kind': filt}
|
||||
|
||||
name, _ = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
|
||||
|
||||
@@ -106,7 +100,6 @@ class TestFilterKind:
|
||||
|
||||
assert res == [('bar', 'ref', 'abc'), ('foo', 'ref', '')]
|
||||
|
||||
|
||||
def test_single_pattern(self):
|
||||
res = self.run_sanitizer_on(['.*name'],
|
||||
name_fr='foo', ref_fr='foo', namexx_fr='bar',
|
||||
@@ -114,7 +107,6 @@ class TestFilterKind:
|
||||
|
||||
assert res == [('bar', 'namexx', 'fr'), ('foo', 'ref', 'fr')]
|
||||
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
res = self.run_sanitizer_on(['.*name', 'ref'],
|
||||
name_fr='foo', ref_fr='foo', oldref_fr='foo',
|
||||
@@ -132,19 +124,16 @@ class TestRankAddress:
|
||||
def run_sanitizer_on(self, rank_addr, **kwargs):
|
||||
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {
|
||||
'step': 'delete-tags',
|
||||
'rank_address': rank_addr
|
||||
}
|
||||
sanitizer_args = {'step': 'delete-tags',
|
||||
'rank_address': rank_addr}
|
||||
|
||||
name, _ = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
|
||||
|
||||
|
||||
def test_single_rank(self):
|
||||
res = self.run_sanitizer_on('30', name='foo', ref='bar')
|
||||
|
||||
@@ -185,33 +174,29 @@ class TestSuffix:
|
||||
def run_sanitizer_on(self, suffix, **kwargs):
|
||||
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {
|
||||
'step': 'delete-tags',
|
||||
'suffix': suffix,
|
||||
}
|
||||
sanitizer_args = {'step': 'delete-tags',
|
||||
'suffix': suffix}
|
||||
|
||||
name, _ = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
|
||||
|
||||
|
||||
def test_single_suffix(self):
|
||||
res = self.run_sanitizer_on('abc', name='foo', name_abc='foo',
|
||||
name_pqr='bar', ref='bar', ref_abc='baz')
|
||||
name_pqr='bar', ref='bar', ref_abc='baz')
|
||||
|
||||
assert res == [('bar', 'name', 'pqr'), ('bar', 'ref', ''), ('foo', 'name', '')]
|
||||
|
||||
def test_multiple_suffix(self):
|
||||
res = self.run_sanitizer_on(['abc.*', 'pqr'], name='foo', name_abcxx='foo',
|
||||
ref_pqr='bar', name_pqrxx='baz')
|
||||
ref_pqr='bar', name_pqrxx='baz')
|
||||
|
||||
assert res == [('baz', 'name', 'pqrxx'), ('foo', 'name', '')]
|
||||
|
||||
|
||||
|
||||
class TestCountryCodes:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -221,19 +206,16 @@ class TestCountryCodes:
|
||||
def run_sanitizer_on(self, country_code, **kwargs):
|
||||
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {
|
||||
'step': 'delete-tags',
|
||||
'country_code': country_code,
|
||||
}
|
||||
sanitizer_args = {'step': 'delete-tags',
|
||||
'country_code': country_code}
|
||||
|
||||
name, _ = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind) for p in name])
|
||||
|
||||
|
||||
def test_single_country_code_pass(self):
|
||||
res = self.run_sanitizer_on('de', name='foo', ref='bar')
|
||||
|
||||
@@ -259,6 +241,7 @@ class TestCountryCodes:
|
||||
|
||||
assert res == [('bar', 'ref'), ('foo', 'name')]
|
||||
|
||||
|
||||
class TestAllParameters:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -268,7 +251,7 @@ class TestAllParameters:
|
||||
def run_sanitizer_on(self, country_code, rank_addr, suffix, **kwargs):
|
||||
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
'country_code': 'de', 'rank_address': 30})
|
||||
|
||||
sanitizer_args = {
|
||||
'step': 'delete-tags',
|
||||
@@ -281,11 +264,10 @@ class TestAllParameters:
|
||||
}
|
||||
|
||||
name, _ = PlaceSanitizer([sanitizer_args],
|
||||
self.config).process_names(place)
|
||||
self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix or '') for p in name])
|
||||
|
||||
|
||||
def test_string_arguments_pass(self):
|
||||
res = self.run_sanitizer_on('de', '25-30', r'[\s\S]*',
|
||||
name='foo', ref='foo', name_abc='bar', ref_abc='baz')
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for sanitizer configuration helper functions.
|
||||
@@ -12,6 +12,7 @@ import pytest
|
||||
from nominatim_db.errors import UsageError
|
||||
from nominatim_db.tokenizer.sanitizers.config import SanitizerConfig
|
||||
|
||||
|
||||
def test_string_list_default_empty():
|
||||
assert SanitizerConfig().get_string_list('op') == []
|
||||
|
||||
@@ -53,7 +54,7 @@ def test_create_split_regex_no_params_unsplit(inp):
|
||||
('ying;;yang', ['ying', 'yang']),
|
||||
(';a; ;c;d,', ['', 'a', '', 'c', 'd', '']),
|
||||
('1, 3 ,5', ['1', '3', '5'])
|
||||
])
|
||||
])
|
||||
def test_create_split_regex_no_params_split(inp, outp):
|
||||
regex = SanitizerConfig().get_delimiter()
|
||||
|
||||
@@ -70,7 +71,7 @@ def test_create_split_regex_custom(delimiter):
|
||||
|
||||
def test_create_split_regex_empty_delimiter():
|
||||
with pytest.raises(UsageError):
|
||||
regex = SanitizerConfig({'delimiters': ''}).get_delimiter()
|
||||
SanitizerConfig({'delimiters': ''}).get_delimiter()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('inp', ('name', 'name:de', 'na\\me', '.*', ''))
|
||||
@@ -96,12 +97,12 @@ def test_create_name_filter_no_param_default_fail_all(inp):
|
||||
|
||||
def test_create_name_filter_no_param_default_invalid_string():
|
||||
with pytest.raises(ValueError):
|
||||
filt = SanitizerConfig().get_filter('name', 'abc')
|
||||
SanitizerConfig().get_filter('name', 'abc')
|
||||
|
||||
|
||||
def test_create_name_filter_no_param_default_empty_list():
|
||||
with pytest.raises(ValueError):
|
||||
filt = SanitizerConfig().get_filter('name', [])
|
||||
SanitizerConfig().get_filter('name', [])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('kind', ('de', 'name:de', 'ende'))
|
||||
@@ -121,7 +122,7 @@ def test_create_kind_filter_default_negetive(kind):
|
||||
@pytest.mark.parametrize('kind', ('lang', 'lang:de', 'langxx'))
|
||||
def test_create_kind_filter_custom_regex_positive(kind):
|
||||
filt = SanitizerConfig({'filter-kind': 'lang.*'}
|
||||
).get_filter('filter-kind', ['.*fr'])
|
||||
).get_filter('filter-kind', ['.*fr'])
|
||||
|
||||
assert filt(kind)
|
||||
|
||||
@@ -136,7 +137,7 @@ def test_create_kind_filter_custom_regex_negative(kind):
|
||||
@pytest.mark.parametrize('kind', ('name', 'fr', 'name:fr', 'frfr', '34'))
|
||||
def test_create_kind_filter_many_positive(kind):
|
||||
filt = SanitizerConfig({'filter-kind': ['.*fr', 'name', r'\d+']}
|
||||
).get_filter('filter-kind')
|
||||
).get_filter('filter-kind')
|
||||
|
||||
assert filt(kind)
|
||||
|
||||
@@ -144,6 +145,6 @@ def test_create_kind_filter_many_positive(kind):
|
||||
@pytest.mark.parametrize('kind', ('name:de', 'fridge', 'a34', '.*', '\\'))
|
||||
def test_create_kind_filter_many_negative(kind):
|
||||
filt = SanitizerConfig({'filter-kind': ['.*fr', 'name', r'\d+']}
|
||||
).get_filter('filter-kind')
|
||||
).get_filter('filter-kind')
|
||||
|
||||
assert not filt(kind)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that splits multivalue lists.
|
||||
@@ -14,20 +14,19 @@ from nominatim_db.data.place_info import PlaceInfo
|
||||
|
||||
from nominatim_db.errors import UsageError
|
||||
|
||||
|
||||
class TestSplitName:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_on(self, **kwargs):
|
||||
place = PlaceInfo({'name': kwargs})
|
||||
name, _ = PlaceSanitizer([{'step': 'split-name-list'}], self.config).process_names(place)
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix) for p in name])
|
||||
|
||||
|
||||
def sanitize_with_delimiter(self, delimiter, name):
|
||||
place = PlaceInfo({'name': {'name': name}})
|
||||
san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}],
|
||||
@@ -36,12 +35,10 @@ class TestSplitName:
|
||||
|
||||
return sorted([p.name for p in name])
|
||||
|
||||
|
||||
def test_simple(self):
|
||||
assert self.run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
|
||||
assert self.run_sanitizer_on(name='') == [('', 'name', None)]
|
||||
|
||||
|
||||
def test_splits(self):
|
||||
assert self.run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
|
||||
('B', 'name', None),
|
||||
@@ -49,7 +46,6 @@ class TestSplitName:
|
||||
assert self.run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
|
||||
('boat', 'short_name', None)]
|
||||
|
||||
|
||||
def test_empty_fields(self):
|
||||
assert self.run_sanitizer_on(name='A;;B') == [('A', 'name', None),
|
||||
('B', 'name', None)]
|
||||
@@ -58,14 +54,12 @@ class TestSplitName:
|
||||
assert self.run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
|
||||
assert self.run_sanitizer_on(name='B,') == [('B', 'name', None)]
|
||||
|
||||
|
||||
def test_custom_delimiters(self):
|
||||
assert self.sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
|
||||
assert self.sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
|
||||
assert self.sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
|
||||
assert self.sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun']
|
||||
|
||||
|
||||
def test_empty_delimiter_set(self):
|
||||
with pytest.raises(UsageError):
|
||||
self.sanitize_with_delimiter('', 'abc')
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that handles braced suffixes.
|
||||
@@ -12,6 +12,7 @@ import pytest
|
||||
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim_db.data.place_info import PlaceInfo
|
||||
|
||||
|
||||
class TestStripBrace:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -24,23 +25,19 @@ class TestStripBrace:
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix) for p in name])
|
||||
|
||||
|
||||
def test_no_braces(self):
|
||||
assert self.run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
|
||||
('foo', 'name', None)]
|
||||
|
||||
|
||||
def test_simple_braces(self):
|
||||
assert self.run_sanitizer_on(name='Halle (Saale)', ref='3')\
|
||||
== [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
|
||||
assert self.run_sanitizer_on(name='ack ( bar')\
|
||||
== [('ack', 'name', None), ('ack ( bar', 'name', None)]
|
||||
|
||||
assert self.run_sanitizer_on(name='Halle (Saale)', ref='3') \
|
||||
== [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
|
||||
assert self.run_sanitizer_on(name='ack ( bar') \
|
||||
== [('ack', 'name', None), ('ack ( bar', 'name', None)]
|
||||
|
||||
def test_only_braces(self):
|
||||
assert self.run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
|
||||
|
||||
|
||||
def test_double_braces(self):
|
||||
assert self.run_sanitizer_on(name='a((b))') == [('a', 'name', None),
|
||||
('a((b))', 'name', None)]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that enables language-dependent analyzers.
|
||||
@@ -13,13 +13,13 @@ from nominatim_db.data.place_info import PlaceInfo
|
||||
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
from nominatim_db.data.country_info import setup_country_config
|
||||
|
||||
|
||||
class TestWithDefaults:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_on(self, country, **kwargs):
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': country})
|
||||
@@ -28,19 +28,16 @@ class TestWithDefaults:
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
|
||||
|
||||
|
||||
def test_no_names(self):
|
||||
assert self.run_sanitizer_on('de') == []
|
||||
|
||||
|
||||
def test_simple(self):
|
||||
res = self.run_sanitizer_on('fr', name='Foo',name_de='Zoo', ref_abc='M')
|
||||
res = self.run_sanitizer_on('fr', name='Foo', name_de='Zoo', ref_abc='M')
|
||||
|
||||
assert res == [('Foo', 'name', None, {}),
|
||||
('M', 'ref', 'abc', {'analyzer': 'abc'}),
|
||||
('Zoo', 'name', 'de', {'analyzer': 'de'})]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('suffix', ['DE', 'asbc'])
|
||||
def test_illegal_suffix(self, suffix):
|
||||
assert self.run_sanitizer_on('fr', **{'name_' + suffix: 'Foo'}) \
|
||||
@@ -53,7 +50,6 @@ class TestFilterKind:
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_on(self, filt, **kwargs):
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': 'de'})
|
||||
@@ -63,17 +59,15 @@ class TestFilterKind:
|
||||
|
||||
return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
|
||||
|
||||
|
||||
def test_single_exact_name(self):
|
||||
res = self.run_sanitizer_on(['name'], name_fr='A', ref_fr='12',
|
||||
shortname_fr='C', name='D')
|
||||
shortname_fr='C', name='D')
|
||||
|
||||
assert res == [('12', 'ref', 'fr', {}),
|
||||
('A', 'name', 'fr', {'analyzer': 'fr'}),
|
||||
('C', 'shortname', 'fr', {}),
|
||||
('D', 'name', None, {})]
|
||||
|
||||
|
||||
def test_single_pattern(self):
|
||||
res = self.run_sanitizer_on(['.*name'],
|
||||
name_fr='A', ref_fr='12', namexx_fr='B',
|
||||
@@ -85,7 +79,6 @@ class TestFilterKind:
|
||||
('C', 'shortname', 'fr', {'analyzer': 'fr'}),
|
||||
('D', 'name', None, {})]
|
||||
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
res = self.run_sanitizer_on(['.*name', 'ref'],
|
||||
name_fr='A', ref_fr='12', oldref_fr='X',
|
||||
@@ -106,7 +99,6 @@ class TestDefaultCountry:
|
||||
setup_country_config(def_config)
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_append(self, mode, country, **kwargs):
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': country})
|
||||
@@ -122,7 +114,6 @@ class TestDefaultCountry:
|
||||
|
||||
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
|
||||
|
||||
|
||||
def run_sanitizer_replace(self, mode, country, **kwargs):
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': country})
|
||||
@@ -138,7 +129,6 @@ class TestDefaultCountry:
|
||||
|
||||
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
|
||||
|
||||
|
||||
def test_missing_country(self):
|
||||
place = PlaceInfo({'name': {'name': 'something'}})
|
||||
name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
|
||||
@@ -151,59 +141,50 @@ class TestDefaultCountry:
|
||||
assert name[0].suffix is None
|
||||
assert 'analyzer' not in name[0].attr
|
||||
|
||||
|
||||
def test_mono_unknown_country(self):
|
||||
expect = [('XX', '')]
|
||||
|
||||
assert self.run_sanitizer_replace('mono', 'xx', name='XX') == expect
|
||||
assert self.run_sanitizer_append('mono', 'xx', name='XX') == expect
|
||||
|
||||
|
||||
def test_mono_monoling_replace(self):
|
||||
res = self.run_sanitizer_replace('mono', 'de', name='Foo')
|
||||
|
||||
assert res == [('Foo', 'de')]
|
||||
|
||||
|
||||
def test_mono_monoling_append(self):
|
||||
res = self.run_sanitizer_append('mono', 'de', name='Foo')
|
||||
|
||||
assert res == [('Foo', ''), ('Foo', 'de')]
|
||||
|
||||
|
||||
def test_mono_multiling(self):
|
||||
expect = [('XX', '')]
|
||||
|
||||
assert self.run_sanitizer_replace('mono', 'ch', name='XX') == expect
|
||||
assert self.run_sanitizer_append('mono', 'ch', name='XX') == expect
|
||||
|
||||
|
||||
def test_all_unknown_country(self):
|
||||
expect = [('XX', '')]
|
||||
|
||||
assert self.run_sanitizer_replace('all', 'xx', name='XX') == expect
|
||||
assert self.run_sanitizer_append('all', 'xx', name='XX') == expect
|
||||
|
||||
|
||||
def test_all_monoling_replace(self):
|
||||
res = self.run_sanitizer_replace('all', 'de', name='Foo')
|
||||
|
||||
assert res == [('Foo', 'de')]
|
||||
|
||||
|
||||
def test_all_monoling_append(self):
|
||||
res = self.run_sanitizer_append('all', 'de', name='Foo')
|
||||
|
||||
assert res == [('Foo', ''), ('Foo', 'de')]
|
||||
|
||||
|
||||
def test_all_multiling_append(self):
|
||||
res = self.run_sanitizer_append('all', 'ch', name='XX')
|
||||
|
||||
assert res == [('XX', ''),
|
||||
('XX', 'de'), ('XX', 'fr'), ('XX', 'it'), ('XX', 'rm')]
|
||||
|
||||
|
||||
def test_all_multiling_replace(self):
|
||||
res = self.run_sanitizer_replace('all', 'ch', name='XX')
|
||||
|
||||
@@ -216,7 +197,6 @@ class TestCountryWithWhitelist:
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_on(self, mode, country, **kwargs):
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
|
||||
'country_code': country})
|
||||
@@ -233,21 +213,17 @@ class TestCountryWithWhitelist:
|
||||
|
||||
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
|
||||
|
||||
|
||||
def test_mono_monoling(self):
|
||||
assert self.run_sanitizer_on('mono', 'de', name='Foo') == [('Foo', 'de')]
|
||||
assert self.run_sanitizer_on('mono', 'pt', name='Foo') == [('Foo', '')]
|
||||
|
||||
|
||||
def test_mono_multiling(self):
|
||||
assert self.run_sanitizer_on('mono', 'ca', name='Foo') == [('Foo', '')]
|
||||
|
||||
|
||||
def test_all_monoling(self):
|
||||
assert self.run_sanitizer_on('all', 'de', name='Foo') == [('Foo', 'de')]
|
||||
assert self.run_sanitizer_on('all', 'pt', name='Foo') == [('Foo', '')]
|
||||
|
||||
|
||||
def test_all_multiling(self):
|
||||
assert self.run_sanitizer_on('all', 'ca', name='Foo') == [('Foo', 'fr')]
|
||||
assert self.run_sanitizer_on('all', 'ch', name='Foo') \
|
||||
@@ -260,7 +236,6 @@ class TestWhiteList:
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
|
||||
def run_sanitizer_on(self, whitelist, **kwargs):
|
||||
place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}})
|
||||
name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
|
||||
@@ -275,14 +250,11 @@ class TestWhiteList:
|
||||
|
||||
return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
|
||||
|
||||
|
||||
def test_in_whitelist(self):
|
||||
assert self.run_sanitizer_on(['de', 'xx'], ref_xx='123') == [('123', 'xx')]
|
||||
|
||||
|
||||
def test_not_in_whitelist(self):
|
||||
assert self.run_sanitizer_on(['de', 'xx'], ref_yy='123') == [('123', '')]
|
||||
|
||||
|
||||
def test_empty_whitelist(self):
|
||||
assert self.run_sanitizer_on([], ref_yy='123') == [('123', '')]
|
||||
|
||||
@@ -2,86 +2,86 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
from typing import Mapping, Optional, List
|
||||
import pytest
|
||||
|
||||
from nominatim_db.data.place_info import PlaceInfo
|
||||
from nominatim_db.data.place_name import PlaceName
|
||||
from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
|
||||
|
||||
|
||||
class TestTagJapanese:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_country(self, def_config):
|
||||
self.config = def_config
|
||||
|
||||
def run_sanitizer_on(self,type, **kwargs):
|
||||
def run_sanitizer_on(self, type, **kwargs):
|
||||
place = PlaceInfo({
|
||||
'address': kwargs,
|
||||
'country_code': 'jp'
|
||||
})
|
||||
sanitizer_args = {'step': 'tag-japanese'}
|
||||
_, address = PlaceSanitizer([sanitizer_args], self.config).process_names(place)
|
||||
tmp_list = [(p.name,p.kind) for p in address]
|
||||
tmp_list = [(p.name, p.kind) for p in address]
|
||||
return sorted(tmp_list)
|
||||
|
||||
def test_on_address(self):
|
||||
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
|
||||
assert res == [('bar','ref'),('baz','ref_abc'),('foo','name')]
|
||||
assert res == [('bar', 'ref'), ('baz', 'ref_abc'), ('foo', 'name')]
|
||||
|
||||
def test_housenumber(self):
|
||||
res = self.run_sanitizer_on('address', housenumber='2')
|
||||
assert res == [('2','housenumber')]
|
||||
assert res == [('2', 'housenumber')]
|
||||
|
||||
def test_blocknumber(self):
|
||||
res = self.run_sanitizer_on('address', block_number='6')
|
||||
assert res == [('6','housenumber')]
|
||||
assert res == [('6', 'housenumber')]
|
||||
|
||||
def test_neighbourhood(self):
|
||||
res = self.run_sanitizer_on('address', neighbourhood='8')
|
||||
assert res == [('8','place')]
|
||||
assert res == [('8', 'place')]
|
||||
|
||||
def test_quarter(self):
|
||||
res = self.run_sanitizer_on('address', quarter='kase')
|
||||
assert res==[('kase','place')]
|
||||
assert res == [('kase', 'place')]
|
||||
|
||||
def test_housenumber_blocknumber(self):
|
||||
res = self.run_sanitizer_on('address', housenumber='2', block_number='6')
|
||||
assert res == [('6-2','housenumber')]
|
||||
assert res == [('6-2', 'housenumber')]
|
||||
|
||||
def test_quarter_neighbourhood(self):
|
||||
res = self.run_sanitizer_on('address', quarter='kase', neighbourhood='8')
|
||||
assert res == [('kase8','place')]
|
||||
assert res == [('kase8', 'place')]
|
||||
|
||||
def test_blocknumber_housenumber_quarter(self):
|
||||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase')
|
||||
assert res == [('6-2','housenumber'),('kase','place')]
|
||||
assert res == [('6-2', 'housenumber'), ('kase', 'place')]
|
||||
|
||||
def test_blocknumber_housenumber_quarter_neighbourhood(self):
|
||||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', neighbourhood='8')
|
||||
assert res == [('6-2','housenumber'),('8','place')]
|
||||
assert res == [('6-2', 'housenumber'), ('8', 'place')]
|
||||
|
||||
def test_blocknumber_quarter_neighbourhood(self):
|
||||
res = self.run_sanitizer_on('address',block_number='6', quarter='kase', neighbourhood='8')
|
||||
assert res == [('6','housenumber'),('kase8','place')]
|
||||
res = self.run_sanitizer_on('address', block_number='6', quarter='kase', neighbourhood='8')
|
||||
assert res == [('6', 'housenumber'), ('kase8', 'place')]
|
||||
|
||||
def test_blocknumber_quarter(self):
|
||||
res = self.run_sanitizer_on('address',block_number='6', quarter='kase')
|
||||
assert res == [('6','housenumber'),('kase','place')]
|
||||
res = self.run_sanitizer_on('address', block_number='6', quarter='kase')
|
||||
assert res == [('6', 'housenumber'), ('kase', 'place')]
|
||||
|
||||
def test_blocknumber_neighbourhood(self):
|
||||
res = self.run_sanitizer_on('address',block_number='6', neighbourhood='8')
|
||||
assert res == [('6','housenumber'),('8','place')]
|
||||
res = self.run_sanitizer_on('address', block_number='6', neighbourhood='8')
|
||||
assert res == [('6', 'housenumber'), ('8', 'place')]
|
||||
|
||||
def test_housenumber_quarter_neighbourhood(self):
|
||||
res = self.run_sanitizer_on('address',housenumber='2', quarter='kase', neighbourhood='8')
|
||||
assert res == [('2','housenumber'),('kase8','place')]
|
||||
res = self.run_sanitizer_on('address', housenumber='2', quarter='kase', neighbourhood='8')
|
||||
assert res == [('2', 'housenumber'), ('kase8', 'place')]
|
||||
|
||||
def test_housenumber_quarter(self):
|
||||
res = self.run_sanitizer_on('address',housenumber='2', quarter='kase')
|
||||
assert res == [('2','housenumber'),('kase','place')]
|
||||
res = self.run_sanitizer_on('address', housenumber='2', quarter='kase')
|
||||
assert res == [('2', 'housenumber'), ('kase', 'place')]
|
||||
|
||||
def test_housenumber_blocknumber_neighbourhood_quarter(self):
|
||||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8')
|
||||
assert res == [('6-2','housenumber'),('kase8','place')]
|
||||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2',
|
||||
quarter='kase', neighbourhood='8')
|
||||
assert res == [('6-2', 'housenumber'), ('kase8', 'place')]
|
||||
|
||||
Reference in New Issue
Block a user