mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 10:57:58 +00:00
62 lines
2.2 KiB
Python
62 lines
2.2 KiB
Python
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2025 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Tests for special postcode analysis and variant generation.
|
|
"""
|
|
import pytest
|
|
|
|
from icu import Transliterator
|
|
|
|
import nominatim_db.tokenizer.token_analysis.postcodes as module
|
|
from nominatim_db.data.place_name import PlaceName
|
|
|
|
DEFAULT_NORMALIZATION = """ :: NFD ();
|
|
'🜳' > ' ';
|
|
[[:Nonspacing Mark:] [:Cf:]] >;
|
|
:: lower ();
|
|
[[:Punctuation:][:Space:]]+ > ' ';
|
|
:: NFC ();
|
|
"""
|
|
|
|
DEFAULT_TRANSLITERATION = """ :: Latin ();
|
|
'🜵' > ' ';
|
|
"""
|
|
|
|
|
|
@pytest.fixture
|
|
def analyser():
|
|
rules = {'analyzer': 'postcodes'}
|
|
config = module.configure(rules, DEFAULT_NORMALIZATION)
|
|
|
|
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
|
|
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
|
|
|
|
return module.create(norm, trans, config)
|
|
|
|
|
|
def get_normalized_variants(proc, name):
|
|
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
|
|
return proc.compute_variants(norm.transliterate(name).strip())
|
|
|
|
|
|
@pytest.mark.parametrize('name,norm', [('12', '12'),
|
|
('A 34 ', 'A 34'),
|
|
('34-av', '34-AV')])
|
|
def test_get_canonical_id(analyser, name, norm):
|
|
assert analyser.get_canonical_id(PlaceName(name=name, kind='', suffix='')) == norm
|
|
|
|
|
|
@pytest.mark.parametrize('postcode,variants', [('12345', {'12345'}),
|
|
('AB-998', {'ab 998', 'ab998'}),
|
|
('23 FGH D3', {'23 fgh d3', '23fgh d3',
|
|
'23 fghd3', '23fghd3'})])
|
|
def test_compute_variants(analyser, postcode, variants):
|
|
out = analyser.compute_variants(postcode)
|
|
|
|
assert len(out) == len(set(out))
|
|
assert set(out) == variants
|