mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 10:57:58 +00:00
make compund decomposition pure import feature
Compound decomposition now creates a full name variant on import just like abbreviations. This simplifies query time normalization and opens a path for changing abbreviation and compund decomposition lists for an existing database.
This commit is contained in:
@@ -48,9 +48,10 @@ def test_simple_variants(cfgfile):
|
||||
proc = ICUNameProcessor(rules)
|
||||
|
||||
assert set(get_normalized_variants(proc, "Bauwegstraße")) \
|
||||
== {'bauweg straße', 'bauweg str'}
|
||||
assert get_normalized_variants(proc, "Bauwegstr") == ['bauweg str']
|
||||
assert get_normalized_variants(proc, "holzweg") == ['holz weg']
|
||||
== {'bauweg straße', 'bauweg str', 'bauwegstraße', 'bauwegstr'}
|
||||
assert get_normalized_variants(proc, "Bauwegstr") == ['bauwegstr']
|
||||
assert set(get_normalized_variants(proc, "holzweg")) \
|
||||
== {'holz weg', 'holzweg'}
|
||||
assert get_normalized_variants(proc, "hallo") == ['hallo']
|
||||
|
||||
|
||||
@@ -82,6 +83,6 @@ def test_search_normalized(cfgfile):
|
||||
rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
|
||||
proc = ICUNameProcessor(rules)
|
||||
|
||||
assert proc.get_search_normalized('Master Street') == 'master street'
|
||||
assert proc.get_search_normalized('Earnes St') == 'earne s st'
|
||||
assert proc.get_search_normalized('Nostreet') == 'no street'
|
||||
assert proc.get_search_normalized('Master Street') == 'master street'
|
||||
assert proc.get_search_normalized('Earnes St') == 'earnes st'
|
||||
assert proc.get_search_normalized('Nostreet') == 'nostreet'
|
||||
|
||||
@@ -91,10 +91,10 @@ def test_get_search_rules(cfgfile):
|
||||
trans = Transliterator.createFromRules("test", rules)
|
||||
|
||||
assert trans.transliterate(" Baum straße ") == " baum straße "
|
||||
assert trans.transliterate(" Baumstraße ") == " baum straße "
|
||||
assert trans.transliterate(" Baumstrasse ") == " baum strasse "
|
||||
assert trans.transliterate(" Baumstr ") == " baum str "
|
||||
assert trans.transliterate(" Baumwegstr ") == " baumweg str "
|
||||
assert trans.transliterate(" Baumstraße ") == " baumstraße "
|
||||
assert trans.transliterate(" Baumstrasse ") == " baumstrasse "
|
||||
assert trans.transliterate(" Baumstr ") == " baumstr "
|
||||
assert trans.transliterate(" Baumwegstr ") == " baumwegstr "
|
||||
assert trans.transliterate(" Αθήνα ") == " athēna "
|
||||
assert trans.transliterate(" проспект ") == " prospekt "
|
||||
|
||||
@@ -128,11 +128,10 @@ def test_get_replacement_pairs_multi_to(cfgfile):
|
||||
repl = ICURuleLoader(fpath).get_replacement_pairs()
|
||||
|
||||
assert [(a, sorted(b)) for a, b in repl] == \
|
||||
[(' strasse ', [' st ', ' str ', ' strasse ']),
|
||||
('strasse ', [' st ', ' str ', ' strasse ']),
|
||||
('pfad ', [' pfad ']),
|
||||
('str ' , [' str ']),
|
||||
('st ' , [' st '])]
|
||||
[(' strasse ', [' st ', ' str ', ' strasse ', 'st ', 'str ', 'strasse ']),
|
||||
('strasse ', [' st ', ' str ', ' strasse ', 'st ', 'str ', 'strasse ']),
|
||||
(' pfad ', [' pfad ', 'pfad ']),
|
||||
('pfad ', [' pfad ', 'pfad '])]
|
||||
|
||||
|
||||
def test_get_replacement_pairs_multi_from(cfgfile):
|
||||
|
||||
@@ -151,8 +151,9 @@ def test_init_word_table(tokenizer_factory, test_config, place_row, word_table):
|
||||
tok = tokenizer_factory()
|
||||
tok.init_new_db(test_config)
|
||||
|
||||
assert word_table.get_partial_words() == {('te', 1), ('st', 1), ('52', 1),
|
||||
assert word_table.get_partial_words() == {('test', 1), ('52', 1),
|
||||
('no', 1), ('area', 2),
|
||||
('holzstrasse', 1), ('holzstr', 1),
|
||||
('holz', 1), ('strasse', 1),
|
||||
('str', 1)}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user