From 9ac56c207867b9d212c93f3aff53d4fc7cd73249 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 18 Feb 2026 11:39:48 +0100 Subject: [PATCH] add support for expanding interpolations on housenumbers --- .../sanitizers/clean_housenumbers.py | 52 ++++++++++++++-- .../features/db/query/housenumbers.feature | 61 +++++++++++++++++++ .../sanitizers/test_clean_housenumbers.py | 24 +++++++- 3 files changed, 131 insertions(+), 6 deletions(-) diff --git a/src/nominatim_db/tokenizer/sanitizers/clean_housenumbers.py b/src/nominatim_db/tokenizer/sanitizers/clean_housenumbers.py index a74ca249..359f6cfd 100644 --- a/src/nominatim_db/tokenizer/sanitizers/clean_housenumbers.py +++ b/src/nominatim_db/tokenizer/sanitizers/clean_housenumbers.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2026 by the Nominatim developer community. # For a full list of authors see the git log. """ Sanitizer that preprocesses address tags for house numbers. The sanitizer @@ -10,6 +10,7 @@ allows to * define which tags are to be considered house numbers (see 'filter-kind') * split house number lists into individual numbers (see 'delimiters') +* expand interpolated house numbers Arguments: delimiters: Define the set of characters to be used for @@ -23,13 +24,19 @@ Arguments: instead of a house number. Either takes a single string or a list of strings, where each string is a regular expression that must match the full house number value. + expand-interpolations: When true, expand house number ranges to separate numbers + when an 'interpolation' is present. (default: true) + """ -from typing import Callable, Iterator, List +from typing import Callable, Iterator, Iterable, Union +import re from ...data.place_name import PlaceName from .base import ProcessInfo from .config import SanitizerConfig +RANGE_REGEX = re.compile(r'\d+-\d+') + class _HousenumberSanitizer: @@ -38,21 +45,40 @@ class _HousenumberSanitizer: self.split_regexp = config.get_delimiter() self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL') + self.expand_interpolations = config.get_bool('expand-interpolations', True) def __call__(self, obj: ProcessInfo) -> None: if not obj.address: return - new_address: List[PlaceName] = [] + itype: Union[int, str, None] = None + if self.expand_interpolations: + itype = next((i.name for i in obj.address if i.kind == 'interpolation'), None) + if itype is not None: + if itype == 'all': + itype = 1 + elif len(itype) == 1 and itype.isdigit(): + itype = int(itype) + elif itype not in ('odd', 'even'): + itype = None + + new_address: list[PlaceName] = [] for item in obj.address: if self.filter_kind(item.kind): + if itype is not None and RANGE_REGEX.fullmatch(item.name): + hnrs = self._expand_range(itype, item.name) + if hnrs: + new_address.extend(item.clone(kind='housenumber', name=str(hnr)) + for hnr in hnrs) + continue + if self.filter_name(item.name): obj.names.append(item.clone(kind='housenumber')) else: new_address.extend(item.clone(kind='housenumber', name=n) for n in self.sanitize(item.name)) - else: - # Don't touch other address items. + elif item.kind != 'interpolation': + # Ignore interpolation, otherwise don't touch other address items. new_address.append(item) obj.address = new_address @@ -70,6 +96,22 @@ class _HousenumberSanitizer: def _regularize(self, hnr: str) -> Iterator[str]: yield hnr + def _expand_range(self, itype: Union[str, int], hnr: str) -> Iterable[int]: + first, last = (int(i) for i in hnr.split('-')) + + if isinstance(itype, int): + step = itype + else: + step = 2 + if (itype == 'even' and first % 2 == 1)\ + or (itype == 'odd' and first % 2 == 0): + first += 1 + + if (last + 1 - first) / step < 10: + return range(first, last + 1, step) + + return [] + def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]: """ Create a housenumber processing function. diff --git a/test/bdd/features/db/query/housenumbers.feature b/test/bdd/features/db/query/housenumbers.feature index 6ed6284b..75d77b18 100644 --- a/test/bdd/features/db/query/housenumbers.feature +++ b/test/bdd/features/db/query/housenumbers.feature @@ -318,3 +318,64 @@ Feature: Searching of house numbers Then the result set contains | object | | W20 | + + Scenario: A housenumber with interpolation is found + Given the places + | osm | class | type | housenr | addr+interpolation | geometry | + | N1 | building | yes | 1-5 | odd | 9 | + And the places + | osm | class | type | name | geometry | + | W10 | highway | path | Rue Paris | 1,2,3 | + When importing + When geocoding "Rue Paris 1" + Then the result set contains + | object | address+house_number | + | N1 | 1-5 | + When geocoding "Rue Paris 3" + Then the result set contains + | object | address+house_number | + | N1 | 1-5 | + When geocoding "Rue Paris 5" + Then the result set contains + | object | address+house_number | + | N1 | 1-5 | + When geocoding "Rue Paris 2" + Then the result set contains + | object | + | W10 | + + Scenario: A housenumber with bad interpolation is ignored + Given the places + | osm | class | type | housenr | addr+interpolation | geometry | + | N1 | building | yes | 1-5 | bad | 9 | + And the places + | osm | class | type | name | geometry | + | W10 | highway | path | Rue Paris | 1,2,3 | + When importing + When geocoding "Rue Paris 1-5" + Then the result set contains + | object | address+house_number | + | N1 | 1-5 | + When geocoding "Rue Paris 3" + Then the result set contains + | object | + | W10 | + + + Scenario: A bad housenumber with a good interpolation is just a housenumber + Given the places + | osm | class | type | housenr | addr+interpolation | geometry | + | N1 | building | yes | 1-100 | all | 9 | + And the places + | osm | class | type | name | geometry | + | W10 | highway | path | Rue Paris | 1,2,3 | + When importing + When geocoding "Rue Paris 1-100" + Then the result set contains + | object | address+house_number | + | N1 | 1-100 | + When geocoding "Rue Paris 3" + Then the result set contains + | object | + | W10 | + diff --git a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py index 2dd10c56..2e9f4016 100644 --- a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py +++ b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2025 by the Nominatim developer community. +# Copyright (C) 2026 by the Nominatim developer community. # For a full list of authors see the git log. """ Tests for the sanitizer that normalizes housenumbers. @@ -67,3 +67,25 @@ def test_convert_to_name_unconverted(def_config, number): assert 'housenumber' not in set(p.kind for p in names) assert ('housenumber', number) in set((p.kind, p.name) for p in address) + + +@pytest.mark.parametrize('hnr,itype,out', [ + ('1-5', 'all', (1, 2, 3, 4, 5)), + ('1-5', 'odd', (1, 3, 5)), + ('1-5', 'even', (2, 4)), + ('6-9', '1', (6, 7, 8, 9)), + ('6-9', '2', (6, 8)), + ('6-9', '3', (6, 9)), + ('6-9', '5', (6,)), + ('6-9', 'odd', (7, 9)), + ('6-9', 'even', (6, 8)), + ('6-22', 'even', (6, 8, 10, 12, 14, 16, 18, 20, 22)) + ]) +def test_convert_interpolations(sanitize, hnr, itype, out): + assert set(sanitize(housenumber=hnr, interpolation=itype)) \ + == {('housenumber', str(i)) for i in out} + + +@pytest.mark.parametrize('hnr', ('23', '23-', '3z-f', '1-10', '5-1', '1-4-5')) +def test_ignore_interpolation_with_bad_housenumber(sanitize, hnr): + assert sanitize(housenumber=hnr, interpolation='all') == [('housenumber', hnr)]