mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Merge pull request #3991 from lonvia/interpolation-on-addresses
Add support for addr:interpolation on housenumbers
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
#
|
#
|
||||||
# This file is part of Nominatim. (https://nominatim.org)
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
#
|
#
|
||||||
# Copyright (C) 2024 by the Nominatim developer community.
|
# Copyright (C) 2026 by the Nominatim developer community.
|
||||||
# For a full list of authors see the git log.
|
# For a full list of authors see the git log.
|
||||||
"""
|
"""
|
||||||
Sanitizer that preprocesses address tags for house numbers. The sanitizer
|
Sanitizer that preprocesses address tags for house numbers. The sanitizer
|
||||||
@@ -10,6 +10,7 @@ allows to
|
|||||||
|
|
||||||
* define which tags are to be considered house numbers (see 'filter-kind')
|
* define which tags are to be considered house numbers (see 'filter-kind')
|
||||||
* split house number lists into individual numbers (see 'delimiters')
|
* split house number lists into individual numbers (see 'delimiters')
|
||||||
|
* expand interpolated house numbers
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
delimiters: Define the set of characters to be used for
|
delimiters: Define the set of characters to be used for
|
||||||
@@ -23,13 +24,19 @@ Arguments:
|
|||||||
instead of a house number. Either takes a single string
|
instead of a house number. Either takes a single string
|
||||||
or a list of strings, where each string is a regular
|
or a list of strings, where each string is a regular
|
||||||
expression that must match the full house number value.
|
expression that must match the full house number value.
|
||||||
|
expand-interpolations: When true, expand house number ranges to separate numbers
|
||||||
|
when an 'interpolation' is present. (default: true)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from typing import Callable, Iterator, List
|
from typing import Callable, Iterator, Iterable, Union
|
||||||
|
import re
|
||||||
|
|
||||||
from ...data.place_name import PlaceName
|
from ...data.place_name import PlaceName
|
||||||
from .base import ProcessInfo
|
from .base import ProcessInfo
|
||||||
from .config import SanitizerConfig
|
from .config import SanitizerConfig
|
||||||
|
|
||||||
|
RANGE_REGEX = re.compile(r'\d+-\d+')
|
||||||
|
|
||||||
|
|
||||||
class _HousenumberSanitizer:
|
class _HousenumberSanitizer:
|
||||||
|
|
||||||
@@ -38,21 +45,40 @@ class _HousenumberSanitizer:
|
|||||||
self.split_regexp = config.get_delimiter()
|
self.split_regexp = config.get_delimiter()
|
||||||
|
|
||||||
self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL')
|
self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL')
|
||||||
|
self.expand_interpolations = config.get_bool('expand-interpolations', True)
|
||||||
|
|
||||||
def __call__(self, obj: ProcessInfo) -> None:
|
def __call__(self, obj: ProcessInfo) -> None:
|
||||||
if not obj.address:
|
if not obj.address:
|
||||||
return
|
return
|
||||||
|
|
||||||
new_address: List[PlaceName] = []
|
itype: Union[int, str, None] = None
|
||||||
|
if self.expand_interpolations:
|
||||||
|
itype = next((i.name for i in obj.address if i.kind == 'interpolation'), None)
|
||||||
|
if itype is not None:
|
||||||
|
if itype == 'all':
|
||||||
|
itype = 1
|
||||||
|
elif len(itype) == 1 and itype.isdigit():
|
||||||
|
itype = int(itype)
|
||||||
|
elif itype not in ('odd', 'even'):
|
||||||
|
itype = None
|
||||||
|
|
||||||
|
new_address: list[PlaceName] = []
|
||||||
for item in obj.address:
|
for item in obj.address:
|
||||||
if self.filter_kind(item.kind):
|
if self.filter_kind(item.kind):
|
||||||
|
if itype is not None and RANGE_REGEX.fullmatch(item.name):
|
||||||
|
hnrs = self._expand_range(itype, item.name)
|
||||||
|
if hnrs:
|
||||||
|
new_address.extend(item.clone(kind='housenumber', name=str(hnr))
|
||||||
|
for hnr in hnrs)
|
||||||
|
continue
|
||||||
|
|
||||||
if self.filter_name(item.name):
|
if self.filter_name(item.name):
|
||||||
obj.names.append(item.clone(kind='housenumber'))
|
obj.names.append(item.clone(kind='housenumber'))
|
||||||
else:
|
else:
|
||||||
new_address.extend(item.clone(kind='housenumber', name=n)
|
new_address.extend(item.clone(kind='housenumber', name=n)
|
||||||
for n in self.sanitize(item.name))
|
for n in self.sanitize(item.name))
|
||||||
else:
|
elif item.kind != 'interpolation':
|
||||||
# Don't touch other address items.
|
# Ignore interpolation, otherwise don't touch other address items.
|
||||||
new_address.append(item)
|
new_address.append(item)
|
||||||
|
|
||||||
obj.address = new_address
|
obj.address = new_address
|
||||||
@@ -70,6 +96,22 @@ class _HousenumberSanitizer:
|
|||||||
def _regularize(self, hnr: str) -> Iterator[str]:
|
def _regularize(self, hnr: str) -> Iterator[str]:
|
||||||
yield hnr
|
yield hnr
|
||||||
|
|
||||||
|
def _expand_range(self, itype: Union[str, int], hnr: str) -> Iterable[int]:
|
||||||
|
first, last = (int(i) for i in hnr.split('-'))
|
||||||
|
|
||||||
|
if isinstance(itype, int):
|
||||||
|
step = itype
|
||||||
|
else:
|
||||||
|
step = 2
|
||||||
|
if (itype == 'even' and first % 2 == 1)\
|
||||||
|
or (itype == 'odd' and first % 2 == 0):
|
||||||
|
first += 1
|
||||||
|
|
||||||
|
if (last + 1 - first) / step < 10:
|
||||||
|
return range(first, last + 1, step)
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||||
""" Create a housenumber processing function.
|
""" Create a housenumber processing function.
|
||||||
|
|||||||
@@ -318,3 +318,64 @@ Feature: Searching of house numbers
|
|||||||
Then the result set contains
|
Then the result set contains
|
||||||
| object |
|
| object |
|
||||||
| W20 |
|
| W20 |
|
||||||
|
|
||||||
|
Scenario: A housenumber with interpolation is found
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | housenr | addr+interpolation | geometry |
|
||||||
|
| N1 | building | yes | 1-5 | odd | 9 |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name | geometry |
|
||||||
|
| W10 | highway | path | Rue Paris | 1,2,3 |
|
||||||
|
When importing
|
||||||
|
When geocoding "Rue Paris 1"
|
||||||
|
Then the result set contains
|
||||||
|
| object | address+house_number |
|
||||||
|
| N1 | 1-5 |
|
||||||
|
When geocoding "Rue Paris 3"
|
||||||
|
Then the result set contains
|
||||||
|
| object | address+house_number |
|
||||||
|
| N1 | 1-5 |
|
||||||
|
When geocoding "Rue Paris 5"
|
||||||
|
Then the result set contains
|
||||||
|
| object | address+house_number |
|
||||||
|
| N1 | 1-5 |
|
||||||
|
When geocoding "Rue Paris 2"
|
||||||
|
Then the result set contains
|
||||||
|
| object |
|
||||||
|
| W10 |
|
||||||
|
|
||||||
|
Scenario: A housenumber with bad interpolation is ignored
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | housenr | addr+interpolation | geometry |
|
||||||
|
| N1 | building | yes | 1-5 | bad | 9 |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name | geometry |
|
||||||
|
| W10 | highway | path | Rue Paris | 1,2,3 |
|
||||||
|
When importing
|
||||||
|
When geocoding "Rue Paris 1-5"
|
||||||
|
Then the result set contains
|
||||||
|
| object | address+house_number |
|
||||||
|
| N1 | 1-5 |
|
||||||
|
When geocoding "Rue Paris 3"
|
||||||
|
Then the result set contains
|
||||||
|
| object |
|
||||||
|
| W10 |
|
||||||
|
|
||||||
|
|
||||||
|
Scenario: A bad housenumber with a good interpolation is just a housenumber
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | housenr | addr+interpolation | geometry |
|
||||||
|
| N1 | building | yes | 1-100 | all | 9 |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name | geometry |
|
||||||
|
| W10 | highway | path | Rue Paris | 1,2,3 |
|
||||||
|
When importing
|
||||||
|
When geocoding "Rue Paris 1-100"
|
||||||
|
Then the result set contains
|
||||||
|
| object | address+house_number |
|
||||||
|
| N1 | 1-100 |
|
||||||
|
When geocoding "Rue Paris 3"
|
||||||
|
Then the result set contains
|
||||||
|
| object |
|
||||||
|
| W10 |
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
#
|
#
|
||||||
# This file is part of Nominatim. (https://nominatim.org)
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
#
|
#
|
||||||
# Copyright (C) 2025 by the Nominatim developer community.
|
# Copyright (C) 2026 by the Nominatim developer community.
|
||||||
# For a full list of authors see the git log.
|
# For a full list of authors see the git log.
|
||||||
"""
|
"""
|
||||||
Tests for the sanitizer that normalizes housenumbers.
|
Tests for the sanitizer that normalizes housenumbers.
|
||||||
@@ -67,3 +67,25 @@ def test_convert_to_name_unconverted(def_config, number):
|
|||||||
|
|
||||||
assert 'housenumber' not in set(p.kind for p in names)
|
assert 'housenumber' not in set(p.kind for p in names)
|
||||||
assert ('housenumber', number) in set((p.kind, p.name) for p in address)
|
assert ('housenumber', number) in set((p.kind, p.name) for p in address)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('hnr,itype,out', [
|
||||||
|
('1-5', 'all', (1, 2, 3, 4, 5)),
|
||||||
|
('1-5', 'odd', (1, 3, 5)),
|
||||||
|
('1-5', 'even', (2, 4)),
|
||||||
|
('6-9', '1', (6, 7, 8, 9)),
|
||||||
|
('6-9', '2', (6, 8)),
|
||||||
|
('6-9', '3', (6, 9)),
|
||||||
|
('6-9', '5', (6,)),
|
||||||
|
('6-9', 'odd', (7, 9)),
|
||||||
|
('6-9', 'even', (6, 8)),
|
||||||
|
('6-22', 'even', (6, 8, 10, 12, 14, 16, 18, 20, 22))
|
||||||
|
])
|
||||||
|
def test_convert_interpolations(sanitize, hnr, itype, out):
|
||||||
|
assert set(sanitize(housenumber=hnr, interpolation=itype)) \
|
||||||
|
== {('housenumber', str(i)) for i in out}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('hnr', ('23', '23-', '3z-f', '1-10', '5-1', '1-4-5'))
|
||||||
|
def test_ignore_interpolation_with_bad_housenumber(sanitize, hnr):
|
||||||
|
assert sanitize(housenumber=hnr, interpolation='all') == [('housenumber', hnr)]
|
||||||
|
|||||||
Reference in New Issue
Block a user