remove support for properties on variants

Those are not going to be used in the near future, so no need to
carry that code around just now.
This commit is contained in:
Sarah Hoffmann
2021-10-05 10:29:36 +02:00
parent 9ba2019470
commit 92f6ec2328
2 changed files with 7 additions and 42 deletions

View File

@@ -1,25 +0,0 @@
"""
Data structures for saving variant expansions for ICU tokenizer.
"""
from collections import namedtuple
_ICU_VARIANT_PORPERTY_FIELDS = ['lang']
class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)):
""" Data container for saving properties that describe when a variant
should be applied.
Property instances are hashable.
"""
@classmethod
def from_rules(cls, _):
""" Create a new property type from a generic dictionary.
The function only takes into account the properties that are
understood presently and ignores all others.
"""
return cls(lang=None)
ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])

View File

@@ -1,7 +1,7 @@
""" """
Generic processor for names that creates abbreviation variants. Generic processor for names that creates abbreviation variants.
""" """
from collections import defaultdict from collections import defaultdict, namedtuple
import itertools import itertools
import re import re
@@ -10,10 +10,11 @@ import datrie
from nominatim.config import flatten_config_list from nominatim.config import flatten_config_list
from nominatim.errors import UsageError from nominatim.errors import UsageError
import nominatim.tokenizer.icu_variants as variants
### Configuration section ### Configuration section
ICUVariant = namedtuple('ICUVariant', ['source', 'replacement'])
def configure(rules, normalization_rules): def configure(rules, normalization_rules):
""" Extract and preprocess the configuration for this module. """ Extract and preprocess the configuration for this module.
""" """
@@ -27,20 +28,9 @@ def configure(rules, normalization_rules):
vmaker = _VariantMaker(normalization_rules) vmaker = _VariantMaker(normalization_rules)
properties = []
for section in rules: for section in rules:
# Create the property field and deduplicate against existing
# instances.
props = variants.ICUVariantProperties.from_rules(section)
for existing in properties:
if existing == props:
props = existing
break
else:
properties.append(props)
for rule in (section.get('words') or []): for rule in (section.get('words') or []):
vset.update(vmaker.compute(rule, props)) vset.update(vmaker.compute(rule))
# Intermediate reorder by source. Also compute required character set. # Intermediate reorder by source. Also compute required character set.
for variant in vset: for variant in vset:
@@ -66,7 +56,7 @@ class _VariantMaker:
norm_rules) norm_rules)
def compute(self, rule, props): def compute(self, rule):
""" Generator for all ICUVariant tuples from a single variant rule. """ Generator for all ICUVariant tuples from a single variant rule.
""" """
parts = re.split(r'(\|)?([=-])>', rule) parts = re.split(r'(\|)?([=-])>', rule)
@@ -82,12 +72,12 @@ class _VariantMaker:
for src in src_terms: for src in src_terms:
if src: if src:
for froms, tos in _create_variants(*src, src[0], decompose): for froms, tos in _create_variants(*src, src[0], decompose):
yield variants.ICUVariant(froms, tos, props) yield ICUVariant(froms, tos)
for src, repl in itertools.product(src_terms, repl_terms): for src, repl in itertools.product(src_terms, repl_terms):
if src and repl: if src and repl:
for froms, tos in _create_variants(*src, repl, decompose): for froms, tos in _create_variants(*src, repl, decompose):
yield variants.ICUVariant(froms, tos, props) yield ICUVariant(froms, tos)
def _parse_variant_word(self, name): def _parse_variant_word(self, name):