implement is-a-name option for housenumbers

This commit is contained in:
Sarah Hoffmann
2022-02-05 15:13:45 +01:00
parent 39ede26b5c
commit a79a3210e6
3 changed files with 31 additions and 4 deletions

View File

@@ -29,6 +29,10 @@ class _HousenumberSanitizer:
self.filter_kind = create_kind_filter(config, 'housenumber')
self.split_regexp = create_split_regex(config)
nameregexps = config.get('is-a-name', [])
self.is_name_regexp = [re.compile(r) for r in nameregexps]
def __call__(self, obj):
if not obj.address:
@@ -37,8 +41,11 @@ class _HousenumberSanitizer:
new_address = []
for item in obj.address:
if self.filter_kind(item):
new_address.extend(item.clone(kind='housenumber', name=n)
for n in self.sanitize(item.name))
if self.treat_as_name(item.name):
obj.names.append(item.clone(kind='housenumber'))
else:
new_address.extend(item.clone(kind='housenumber', name=n)
for n in self.sanitize(item.name))
else:
# Don't touch other address items.
new_address.append(item)
@@ -62,6 +69,10 @@ class _HousenumberSanitizer:
yield hnr
def _treat_as_name(self, housenumber):
return any(r.fullmatch(housenumber) is not None for r in self.is_name_regexp)
def create(config):
""" Create a housenumber processing function.
"""

View File

@@ -25,13 +25,15 @@ transliteration:
- "[^a-z0-9[:Space:]] >"
- ":: NFC ()"
sanitizers:
- step: split-name-list
- step: strip-brace-terms
- step: clean-housenumbers
filter-kind:
- housenumber
- conscriptionnumber
- streetnumber
is-a-name:
- (\A|.*,)[^\d,]{3,}(,.*|\Z)
- step: split-name-list
- step: strip-brace-terms
- step: tag-analyzer-by-language
filter-kind: [".*name.*"]
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]

View File

@@ -53,3 +53,17 @@ Feature: Searching of house numbers
| 2;4;12 |
| 2,4,12 |
| 2, 4, 12 |
Scenario: A name mapped as a housenumber is found
Given the places
| osm | class | type | housenr | geometry |
| N1 | building | yes | Warring | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | highway | path | Chester St | 1,2,3 |
When importing
When sending search query "Chester St Warring"
Then results contain
| osm |
| N1 |