mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
introduce and use analyzer for postcodes
This commit is contained in:
54
nominatim/tokenizer/token_analysis/postcodes.py
Normal file
54
nominatim/tokenizer/token_analysis/postcodes.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Specialized processor for postcodes. Supports a 'lookup' variant of the
|
||||
token, which produces variants with optional spaces.
|
||||
"""
|
||||
|
||||
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
|
||||
|
||||
### Configuration section
|
||||
|
||||
def configure(rules, normalization_rules): # pylint: disable=W0613
|
||||
""" All behaviour is currently hard-coded.
|
||||
"""
|
||||
return None
|
||||
|
||||
### Analysis section
|
||||
|
||||
def create(normalizer, transliterator, config): # pylint: disable=W0613
|
||||
""" Create a new token analysis instance for this module.
|
||||
"""
|
||||
return PostcodeTokenAnalysis(normalizer, transliterator)
|
||||
|
||||
class PostcodeTokenAnalysis:
|
||||
""" Detects common housenumber patterns and normalizes them.
|
||||
"""
|
||||
def __init__(self, norm, trans):
|
||||
self.norm = norm
|
||||
self.trans = trans
|
||||
|
||||
self.mutator = MutationVariantGenerator(' ', (' ', ''))
|
||||
|
||||
|
||||
def normalize(self, name):
|
||||
""" Return the standard form of the postcode.
|
||||
"""
|
||||
return name.strip().upper()
|
||||
|
||||
|
||||
def get_variants_ascii(self, norm_name):
|
||||
""" Compute the spelling variants for the given normalized postcode.
|
||||
|
||||
The official form creates one variant. If a 'lookup version' is
|
||||
given, then it will create variants with optional spaces.
|
||||
"""
|
||||
# Postcodes follow their own transliteration rules.
|
||||
# Make sure at this point, that the terms are normalized in a way
|
||||
# that they are searchable with the standard transliteration rules.
|
||||
return [self.trans.transliterate(term) for term in
|
||||
self.mutator.generate([self.norm.transliterate(norm_name)])]
|
||||
Reference in New Issue
Block a user