mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
unify ICUNameProcessorRules and ICURuleLoader
There is no need for the additional layer of indirection that the ICUNameProcessorRules class adds. The ICURuleLoader can fill the database properties directly.
This commit is contained in:
@@ -8,67 +8,25 @@ import itertools
|
||||
from icu import Transliterator
|
||||
import datrie
|
||||
|
||||
from nominatim.db.properties import set_property, get_property
|
||||
from nominatim.tokenizer import icu_variants as variants
|
||||
|
||||
DBCFG_IMPORT_NORM_RULES = "tokenizer_import_normalisation"
|
||||
DBCFG_IMPORT_TRANS_RULES = "tokenizer_import_transliteration"
|
||||
DBCFG_IMPORT_REPLACEMENTS = "tokenizer_import_replacements"
|
||||
DBCFG_SEARCH_STD_RULES = "tokenizer_search_standardization"
|
||||
|
||||
|
||||
class ICUNameProcessorRules:
|
||||
""" Data object that saves the rules needed for the name processor.
|
||||
|
||||
The rules can either be initialised through an ICURuleLoader or
|
||||
be loaded from a database when a connection is given.
|
||||
"""
|
||||
def __init__(self, loader=None, conn=None):
|
||||
if loader is not None:
|
||||
self.norm_rules = loader.get_normalization_rules()
|
||||
self.trans_rules = loader.get_transliteration_rules()
|
||||
self.replacements = loader.get_replacement_pairs()
|
||||
self.search_rules = loader.get_search_rules()
|
||||
elif conn is not None:
|
||||
self.norm_rules = get_property(conn, DBCFG_IMPORT_NORM_RULES)
|
||||
self.trans_rules = get_property(conn, DBCFG_IMPORT_TRANS_RULES)
|
||||
self.replacements = \
|
||||
variants.unpickle_variant_set(get_property(conn, DBCFG_IMPORT_REPLACEMENTS))
|
||||
self.search_rules = get_property(conn, DBCFG_SEARCH_STD_RULES)
|
||||
else:
|
||||
assert False, "Parameter loader or conn required."
|
||||
|
||||
|
||||
def save_rules(self, conn):
|
||||
""" Save the rules in the property table of the given database.
|
||||
the rules can be loaded again by handing in a connection into
|
||||
the constructor of the class.
|
||||
"""
|
||||
set_property(conn, DBCFG_IMPORT_NORM_RULES, self.norm_rules)
|
||||
set_property(conn, DBCFG_IMPORT_TRANS_RULES, self.trans_rules)
|
||||
set_property(conn, DBCFG_IMPORT_REPLACEMENTS,
|
||||
variants.pickle_variant_set(self.replacements))
|
||||
set_property(conn, DBCFG_SEARCH_STD_RULES, self.search_rules)
|
||||
|
||||
|
||||
class ICUNameProcessor:
|
||||
""" Collects the different transformation rules for normalisation of names
|
||||
and provides the functions to aply the transformations.
|
||||
and provides the functions to apply the transformations.
|
||||
"""
|
||||
|
||||
def __init__(self, rules):
|
||||
def __init__(self, norm_rules, trans_rules, replacements):
|
||||
self.normalizer = Transliterator.createFromRules("icu_normalization",
|
||||
rules.norm_rules)
|
||||
norm_rules)
|
||||
self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
|
||||
rules.trans_rules +
|
||||
trans_rules +
|
||||
";[:Space:]+ > ' '")
|
||||
self.search = Transliterator.createFromRules("icu_search",
|
||||
rules.search_rules)
|
||||
norm_rules + trans_rules)
|
||||
|
||||
# Intermediate reorder by source. Also compute required character set.
|
||||
immediate = defaultdict(list)
|
||||
chars = set()
|
||||
for variant in rules.replacements:
|
||||
for variant in replacements:
|
||||
if variant.source[-1] == ' ' and variant.replacement[-1] == ' ':
|
||||
replstr = variant.replacement[:-1]
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user