forked from hans/Nominatim
172 lines
4.7 KiB
Python
172 lines
4.7 KiB
Python
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2025 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Test for parsing of postcodes in queries.
|
|
"""
|
|
import re
|
|
from itertools import zip_longest
|
|
|
|
import pytest
|
|
|
|
from nominatim_api.search.postcode_parser import PostcodeParser
|
|
from nominatim_api.search.query import QueryStruct, PHRASE_ANY, PHRASE_POSTCODE, PHRASE_STREET
|
|
|
|
|
|
@pytest.fixture
|
|
def pc_config(project_env):
|
|
country_file = project_env.project_dir / 'country_settings.yaml'
|
|
country_file.write_text(r"""
|
|
ab:
|
|
postcode:
|
|
pattern: "ddddd ll"
|
|
ba:
|
|
postcode:
|
|
pattern: "ddddd"
|
|
de:
|
|
postcode:
|
|
pattern: "ddddd"
|
|
gr:
|
|
postcode:
|
|
pattern: "(ddd) ?(dd)"
|
|
output: \1 \2
|
|
in:
|
|
postcode:
|
|
pattern: "(ddd) ?(ddd)"
|
|
output: \1\2
|
|
mc:
|
|
postcode:
|
|
pattern: "980dd"
|
|
mz:
|
|
postcode:
|
|
pattern: "(dddd)(?:-dd)?"
|
|
bn:
|
|
postcode:
|
|
pattern: "(ll) ?(dddd)"
|
|
output: \1\2
|
|
ky:
|
|
postcode:
|
|
pattern: "(d)-(dddd)"
|
|
output: KY\1-\2
|
|
|
|
gb:
|
|
postcode:
|
|
pattern: "(l?ld[A-Z0-9]?) ?(dll)"
|
|
output: \1 \2
|
|
|
|
""")
|
|
|
|
return project_env
|
|
|
|
|
|
def mk_query(inp):
|
|
query = QueryStruct([])
|
|
phrase_split = re.split(r"([ ,:'-])", inp)
|
|
|
|
for word, breakchar in zip_longest(*[iter(phrase_split)]*2, fillvalue='>'):
|
|
query.add_node(breakchar, PHRASE_ANY, 0.1, word, word)
|
|
|
|
return query
|
|
|
|
|
|
@pytest.mark.parametrize('query,pos', [('45325 Berlin', 0),
|
|
('45325:Berlin', 0),
|
|
('45325,Berlin', 0),
|
|
('Berlin 45325', 1),
|
|
('Berlin,45325', 1),
|
|
('Berlin:45325', 1),
|
|
('Hansastr,45325 Berlin', 1),
|
|
('Hansastr 45325 Berlin', 1)])
|
|
def test_simple_postcode(pc_config, query, pos):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
result = parser.parse(mk_query(query))
|
|
|
|
assert result == {(pos, pos + 1, '45325'), (pos, pos + 1, '453 25')}
|
|
|
|
|
|
@pytest.mark.parametrize('query', ['EC1R 3HF', 'ec1r 3hf'])
|
|
def test_postcode_matching_case_insensitive(pc_config, query):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert parser.parse(mk_query(query)) == {(0, 2, 'EC1R 3HF')}
|
|
|
|
|
|
def test_contained_postcode(pc_config):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert parser.parse(mk_query('12345 dx')) == {(0, 1, '12345'), (0, 1, '123 45'),
|
|
(0, 2, '12345 DX')}
|
|
|
|
|
|
@pytest.mark.parametrize('query,frm,to', [('345987', 0, 1), ('345 987', 0, 2),
|
|
('Aina 345 987', 1, 3),
|
|
('Aina 23 345 987 ff', 2, 4)])
|
|
def test_postcode_with_space(pc_config, query, frm, to):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
result = parser.parse(mk_query(query))
|
|
|
|
assert result == {(frm, to, '345987')}
|
|
|
|
|
|
def test_overlapping_postcode(pc_config):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert parser.parse(mk_query('123 456 78')) == {(0, 2, '123456'), (1, 3, '456 78')}
|
|
|
|
|
|
@pytest.mark.parametrize('query', ['45325-Berlin', "45325'Berlin",
|
|
'Berlin-45325', "Berlin'45325", '45325Berlin'
|
|
'345-987', "345'987", '345,987', '345:987'])
|
|
def test_not_a_postcode(pc_config, query):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert not parser.parse(mk_query(query))
|
|
|
|
|
|
@pytest.mark.parametrize('query', ['ba 12233', 'ba-12233'])
|
|
def test_postcode_with_country_prefix(pc_config, query):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert (0, 2, '12233') in parser.parse(mk_query(query))
|
|
|
|
|
|
def test_postcode_with_joined_country_prefix(pc_config):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert parser.parse(mk_query('ba12233')) == {(0, 1, '12233')}
|
|
|
|
|
|
def test_postcode_with_non_matching_country_prefix(pc_config):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
assert not parser.parse(mk_query('ky12233'))
|
|
|
|
|
|
def test_postcode_inside_postcode_phrase(pc_config):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
query = QueryStruct([])
|
|
query.nodes[-1].ptype = PHRASE_STREET
|
|
query.add_node(',', PHRASE_STREET, 0.1, '12345', '12345')
|
|
query.add_node(',', PHRASE_POSTCODE, 0.1, 'xz', 'xz')
|
|
query.add_node('>', PHRASE_POSTCODE, 0.1, '4444', '4444')
|
|
|
|
assert parser.parse(query) == {(2, 3, '4444')}
|
|
|
|
|
|
def test_partial_postcode_in_postcode_phrase(pc_config):
|
|
parser = PostcodeParser(pc_config)
|
|
|
|
query = QueryStruct([])
|
|
query.nodes[-1].ptype = PHRASE_POSTCODE
|
|
query.add_node(' ', PHRASE_POSTCODE, 0.1, '2224', '2224')
|
|
query.add_node('>', PHRASE_POSTCODE, 0.1, '12345', '12345')
|
|
|
|
assert not parser.parse(query)
|