mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 02:47:59 +00:00
add wrapper class for place data passed to tokenizer
This is mostly for convenience and documentation purposes.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
Tokenizer for testing.
|
||||
"""
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
def create(dsn, data_dir):
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
@@ -68,4 +69,5 @@ class DummyNameAnalyzer:
|
||||
|
||||
@staticmethod
|
||||
def process_place(place):
|
||||
assert isinstance(place, PlaceInfo)
|
||||
return {}
|
||||
|
||||
@@ -11,6 +11,7 @@ from nominatim.tokenizer.icu_name_processor import ICUNameProcessorRules
|
||||
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
|
||||
from nominatim.db import properties
|
||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
from mock_icu_word_table import MockIcuWordTable
|
||||
|
||||
@@ -322,30 +323,37 @@ class TestPlaceNames:
|
||||
assert eval(info['names']) == set((t[2] for t in tokens))
|
||||
|
||||
|
||||
def process_named_place(self, names, country_feature=None):
|
||||
place = {'name': names}
|
||||
if country_feature:
|
||||
place['country_feature'] = country_feature
|
||||
|
||||
return self.analyzer.process_place(PlaceInfo(place))
|
||||
|
||||
|
||||
def test_simple_names(self):
|
||||
info = self.analyzer.process_place({'name': {'name': 'Soft bAr', 'ref': '34'}})
|
||||
info = self.process_named_place({'name': 'Soft bAr', 'ref': '34'})
|
||||
|
||||
self.expect_name_terms(info, '#Soft bAr', '#34', 'Soft', 'bAr', '34')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sep', [',' , ';'])
|
||||
def test_names_with_separator(self, sep):
|
||||
info = self.analyzer.process_place({'name': {'name': sep.join(('New York', 'Big Apple'))}})
|
||||
info = self.process_named_place({'name': sep.join(('New York', 'Big Apple'))})
|
||||
|
||||
self.expect_name_terms(info, '#New York', '#Big Apple',
|
||||
'new', 'york', 'big', 'apple')
|
||||
|
||||
|
||||
def test_full_names_with_bracket(self):
|
||||
info = self.analyzer.process_place({'name': {'name': 'Houseboat (left)'}})
|
||||
info = self.process_named_place({'name': 'Houseboat (left)'})
|
||||
|
||||
self.expect_name_terms(info, '#Houseboat (left)', '#Houseboat',
|
||||
'houseboat', 'left')
|
||||
|
||||
|
||||
def test_country_name(self, word_table):
|
||||
info = self.analyzer.process_place({'name': {'name': 'Norge'},
|
||||
'country_feature': 'no'})
|
||||
info = self.process_named_place({'name': 'Norge'}, country_feature='no')
|
||||
|
||||
self.expect_name_terms(info, '#norge', 'norge')
|
||||
assert word_table.get_country() == {('no', 'NORGE')}
|
||||
@@ -361,7 +369,7 @@ class TestPlaceAddress:
|
||||
|
||||
|
||||
def process_address(self, **kwargs):
|
||||
return self.analyzer.process_place({'address': kwargs})
|
||||
return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
|
||||
|
||||
|
||||
def name_token_set(self, *expected_terms):
|
||||
|
||||
@@ -5,6 +5,7 @@ import shutil
|
||||
|
||||
import pytest
|
||||
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
from nominatim.tokenizer import legacy_tokenizer
|
||||
from nominatim.db import properties
|
||||
from nominatim.errors import UsageError
|
||||
@@ -284,21 +285,21 @@ def test_add_more_country_names(analyzer, word_table, make_standard_name):
|
||||
|
||||
|
||||
def test_process_place_names(analyzer, make_keywords):
|
||||
info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
|
||||
info = analyzer.process_place(PlaceInfo({'name' : {'name' : 'Soft bAr', 'ref': '34'}}))
|
||||
|
||||
assert info['names'] == '{1,2,3}'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])
|
||||
def test_process_place_postcode(analyzer, create_postcode_id, word_table, pcode):
|
||||
analyzer.process_place({'address': {'postcode' : pcode}})
|
||||
analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
|
||||
|
||||
assert word_table.get_postcodes() == {pcode, }
|
||||
|
||||
|
||||
@pytest.mark.parametrize('pcode', ['12:23', 'ab;cd;f', '123;836'])
|
||||
def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table, pcode):
|
||||
analyzer.process_place({'address': {'postcode' : pcode}})
|
||||
analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
|
||||
|
||||
assert not word_table.get_postcodes()
|
||||
|
||||
@@ -319,7 +320,7 @@ class TestHousenumberName:
|
||||
@staticmethod
|
||||
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
|
||||
def test_process_place_housenumbers_simple(analyzer, hnr):
|
||||
info = analyzer.process_place({'address': {'housenumber' : hnr}})
|
||||
info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : hnr}}))
|
||||
|
||||
assert info['hnr'] == hnr
|
||||
assert info['hnr_tokens'].startswith("{")
|
||||
@@ -327,15 +328,15 @@ class TestHousenumberName:
|
||||
|
||||
@staticmethod
|
||||
def test_process_place_housenumbers_lists(analyzer):
|
||||
info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
|
||||
info = analyzer.process_place(PlaceInfo({'address': {'conscriptionnumber' : '1; 2;3'}}))
|
||||
|
||||
assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def test_process_place_housenumbers_duplicates(analyzer):
|
||||
info = analyzer.process_place({'address': {'housenumber' : '134',
|
||||
info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : '134',
|
||||
'conscriptionnumber' : '134',
|
||||
'streetnumber' : '99a'}})
|
||||
'streetnumber' : '99a'}}))
|
||||
|
||||
assert set(info['hnr'].split(';')) == set(('134', '99a'))
|
||||
|
||||
Reference in New Issue
Block a user