mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2025 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
This preprocessor replaces values in a given input based on pre-defined regex rules.
|
|
|
|
Arguments:
|
|
pattern: Regex pattern to be applied on the input
|
|
replace: The string that it is to be replaced with
|
|
"""
|
|
from typing import List
|
|
import re
|
|
|
|
from .config import QueryConfig
|
|
from .base import QueryProcessingFunc
|
|
from ..search.query import Phrase
|
|
|
|
|
|
class _GenericPreprocessing:
|
|
"""Perform replacements to input phrases using custom regex patterns."""
|
|
|
|
def __init__(self, config: QueryConfig) -> None:
|
|
"""Initialise the _GenericPreprocessing class with patterns from the ICU config file."""
|
|
self.config = config
|
|
|
|
match_patterns = self.config.get('replacements', 'Key not found')
|
|
self.compiled_patterns = [
|
|
(re.compile(item['pattern']), item['replace']) for item in match_patterns
|
|
]
|
|
|
|
def split_phrase(self, phrase: Phrase) -> Phrase:
|
|
"""This function performs replacements on the given text using regex patterns."""
|
|
for item in self.compiled_patterns:
|
|
phrase.text = item[0].sub(item[1], phrase.text)
|
|
|
|
return phrase
|
|
|
|
def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
|
|
"""
|
|
Return the final Phrase list.
|
|
Returns an empty list if there is nothing left after split_phrase.
|
|
"""
|
|
result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
|
|
return result
|
|
|
|
|
|
def create(config: QueryConfig) -> QueryProcessingFunc:
|
|
""" Create a function for generic preprocessing."""
|
|
return _GenericPreprocessing(config)
|