mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
This gives the analyzer more flexibility in choosing the normalized form. In particular, an analyzer creating different variants can choose the variant that will be used as the canonical form.
30 lines
1.2 KiB
Python
30 lines
1.2 KiB
Python
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Container class collecting all components required to transform an OSM name
|
|
into a Nominatim token.
|
|
"""
|
|
|
|
from icu import Transliterator
|
|
|
|
class ICUTokenAnalysis:
|
|
""" Container class collecting the transliterators and token analysis
|
|
modules for a single NameAnalyser instance.
|
|
"""
|
|
|
|
def __init__(self, norm_rules, trans_rules, analysis_rules):
|
|
self.normalizer = Transliterator.createFromRules("icu_normalization",
|
|
norm_rules)
|
|
trans_rules += ";[:Space:]+ > ' '"
|
|
self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
|
|
trans_rules)
|
|
self.search = Transliterator.createFromRules("icu_search",
|
|
norm_rules + trans_rules)
|
|
|
|
self.analysis = {name: arules.create(self.normalizer, self.to_ascii, arules.config)
|
|
for name, arules in analysis_rules.items()}
|