mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
add support for expanding interpolations on housenumbers
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Sanitizer that preprocesses address tags for house numbers. The sanitizer
|
||||
@@ -10,6 +10,7 @@ allows to
|
||||
|
||||
* define which tags are to be considered house numbers (see 'filter-kind')
|
||||
* split house number lists into individual numbers (see 'delimiters')
|
||||
* expand interpolated house numbers
|
||||
|
||||
Arguments:
|
||||
delimiters: Define the set of characters to be used for
|
||||
@@ -23,13 +24,19 @@ Arguments:
|
||||
instead of a house number. Either takes a single string
|
||||
or a list of strings, where each string is a regular
|
||||
expression that must match the full house number value.
|
||||
expand-interpolations: When true, expand house number ranges to separate numbers
|
||||
when an 'interpolation' is present. (default: true)
|
||||
|
||||
"""
|
||||
from typing import Callable, Iterator, List
|
||||
from typing import Callable, Iterator, Iterable, Union
|
||||
import re
|
||||
|
||||
from ...data.place_name import PlaceName
|
||||
from .base import ProcessInfo
|
||||
from .config import SanitizerConfig
|
||||
|
||||
RANGE_REGEX = re.compile(r'\d+-\d+')
|
||||
|
||||
|
||||
class _HousenumberSanitizer:
|
||||
|
||||
@@ -38,21 +45,40 @@ class _HousenumberSanitizer:
|
||||
self.split_regexp = config.get_delimiter()
|
||||
|
||||
self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL')
|
||||
self.expand_interpolations = config.get_bool('expand-interpolations', True)
|
||||
|
||||
def __call__(self, obj: ProcessInfo) -> None:
|
||||
if not obj.address:
|
||||
return
|
||||
|
||||
new_address: List[PlaceName] = []
|
||||
itype: Union[int, str, None] = None
|
||||
if self.expand_interpolations:
|
||||
itype = next((i.name for i in obj.address if i.kind == 'interpolation'), None)
|
||||
if itype is not None:
|
||||
if itype == 'all':
|
||||
itype = 1
|
||||
elif len(itype) == 1 and itype.isdigit():
|
||||
itype = int(itype)
|
||||
elif itype not in ('odd', 'even'):
|
||||
itype = None
|
||||
|
||||
new_address: list[PlaceName] = []
|
||||
for item in obj.address:
|
||||
if self.filter_kind(item.kind):
|
||||
if itype is not None and RANGE_REGEX.fullmatch(item.name):
|
||||
hnrs = self._expand_range(itype, item.name)
|
||||
if hnrs:
|
||||
new_address.extend(item.clone(kind='housenumber', name=str(hnr))
|
||||
for hnr in hnrs)
|
||||
continue
|
||||
|
||||
if self.filter_name(item.name):
|
||||
obj.names.append(item.clone(kind='housenumber'))
|
||||
else:
|
||||
new_address.extend(item.clone(kind='housenumber', name=n)
|
||||
for n in self.sanitize(item.name))
|
||||
else:
|
||||
# Don't touch other address items.
|
||||
elif item.kind != 'interpolation':
|
||||
# Ignore interpolation, otherwise don't touch other address items.
|
||||
new_address.append(item)
|
||||
|
||||
obj.address = new_address
|
||||
@@ -70,6 +96,22 @@ class _HousenumberSanitizer:
|
||||
def _regularize(self, hnr: str) -> Iterator[str]:
|
||||
yield hnr
|
||||
|
||||
def _expand_range(self, itype: Union[str, int], hnr: str) -> Iterable[int]:
|
||||
first, last = (int(i) for i in hnr.split('-'))
|
||||
|
||||
if isinstance(itype, int):
|
||||
step = itype
|
||||
else:
|
||||
step = 2
|
||||
if (itype == 'even' and first % 2 == 1)\
|
||||
or (itype == 'odd' and first % 2 == 0):
|
||||
first += 1
|
||||
|
||||
if (last + 1 - first) / step < 10:
|
||||
return range(first, last + 1, step)
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||
""" Create a housenumber processing function.
|
||||
|
||||
@@ -318,3 +318,64 @@ Feature: Searching of house numbers
|
||||
Then the result set contains
|
||||
| object |
|
||||
| W20 |
|
||||
|
||||
Scenario: A housenumber with interpolation is found
|
||||
Given the places
|
||||
| osm | class | type | housenr | addr+interpolation | geometry |
|
||||
| N1 | building | yes | 1-5 | odd | 9 |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W10 | highway | path | Rue Paris | 1,2,3 |
|
||||
When importing
|
||||
When geocoding "Rue Paris 1"
|
||||
Then the result set contains
|
||||
| object | address+house_number |
|
||||
| N1 | 1-5 |
|
||||
When geocoding "Rue Paris 3"
|
||||
Then the result set contains
|
||||
| object | address+house_number |
|
||||
| N1 | 1-5 |
|
||||
When geocoding "Rue Paris 5"
|
||||
Then the result set contains
|
||||
| object | address+house_number |
|
||||
| N1 | 1-5 |
|
||||
When geocoding "Rue Paris 2"
|
||||
Then the result set contains
|
||||
| object |
|
||||
| W10 |
|
||||
|
||||
Scenario: A housenumber with bad interpolation is ignored
|
||||
Given the places
|
||||
| osm | class | type | housenr | addr+interpolation | geometry |
|
||||
| N1 | building | yes | 1-5 | bad | 9 |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W10 | highway | path | Rue Paris | 1,2,3 |
|
||||
When importing
|
||||
When geocoding "Rue Paris 1-5"
|
||||
Then the result set contains
|
||||
| object | address+house_number |
|
||||
| N1 | 1-5 |
|
||||
When geocoding "Rue Paris 3"
|
||||
Then the result set contains
|
||||
| object |
|
||||
| W10 |
|
||||
|
||||
|
||||
Scenario: A bad housenumber with a good interpolation is just a housenumber
|
||||
Given the places
|
||||
| osm | class | type | housenr | addr+interpolation | geometry |
|
||||
| N1 | building | yes | 1-100 | all | 9 |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W10 | highway | path | Rue Paris | 1,2,3 |
|
||||
When importing
|
||||
When geocoding "Rue Paris 1-100"
|
||||
Then the result set contains
|
||||
| object | address+house_number |
|
||||
| N1 | 1-100 |
|
||||
When geocoding "Rue Paris 3"
|
||||
Then the result set contains
|
||||
| object |
|
||||
| W10 |
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for the sanitizer that normalizes housenumbers.
|
||||
@@ -67,3 +67,25 @@ def test_convert_to_name_unconverted(def_config, number):
|
||||
|
||||
assert 'housenumber' not in set(p.kind for p in names)
|
||||
assert ('housenumber', number) in set((p.kind, p.name) for p in address)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('hnr,itype,out', [
|
||||
('1-5', 'all', (1, 2, 3, 4, 5)),
|
||||
('1-5', 'odd', (1, 3, 5)),
|
||||
('1-5', 'even', (2, 4)),
|
||||
('6-9', '1', (6, 7, 8, 9)),
|
||||
('6-9', '2', (6, 8)),
|
||||
('6-9', '3', (6, 9)),
|
||||
('6-9', '5', (6,)),
|
||||
('6-9', 'odd', (7, 9)),
|
||||
('6-9', 'even', (6, 8)),
|
||||
('6-22', 'even', (6, 8, 10, 12, 14, 16, 18, 20, 22))
|
||||
])
|
||||
def test_convert_interpolations(sanitize, hnr, itype, out):
|
||||
assert set(sanitize(housenumber=hnr, interpolation=itype)) \
|
||||
== {('housenumber', str(i)) for i in out}
|
||||
|
||||
|
||||
@pytest.mark.parametrize('hnr', ('23', '23-', '3z-f', '1-10', '5-1', '1-4-5'))
|
||||
def test_ignore_interpolation_with_bad_housenumber(sanitize, hnr):
|
||||
assert sanitize(housenumber=hnr, interpolation='all') == [('housenumber', hnr)]
|
||||
|
||||
Reference in New Issue
Block a user