Remove unnecessary assert statement, Fix regex_replace docstring and simplify regex_replace

This commit is contained in:
TuringVerified
2025-04-01 01:27:45 +05:30
parent 6d5a4a20c5
commit 2eeec46040
3 changed files with 18 additions and 12 deletions

View File

@@ -67,8 +67,9 @@ Here is an example configuration file:
``` yaml
query-preprocessing:
- step: split_japanese_phrases
- step: regex_replace
replacements:
replacements:
- pattern: https?://[^\s]* # Filter URLs starting with http or https
replace: ''
- step: normalize
@@ -111,6 +112,8 @@ The following is a list of preprocessors that are shipped with Nominatim.
heading_level: 6
docstring_section_style: spacy
##### regex-replace
::: nominatim_api.query_preprocessing.regex_replace
options:
members: False

View File

@@ -5,7 +5,11 @@
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
This file replaces values based on pre-defined regex rules:
This preprocessor replaces values in a given input based on pre-defined regex rules.
Arguments:
pattern: Regex pattern to be applied on the input
replace: The string that it is to be replaced with
"""
from typing import List
import re
@@ -16,8 +20,10 @@ from ..search.query import Phrase
class _GenericPreprocessing:
"""Perform replacements to input phrases using custom regex patterns."""
def __init__(self, config: QueryConfig) -> None:
"""Initialise the _GenericPreprocessing class with patterns from the ICU config file."""
self.config = config
match_patterns = self.config.get('replacements', 'Key not found')
@@ -26,22 +32,21 @@ class _GenericPreprocessing:
]
def split_phrase(self, phrase: Phrase) -> Phrase:
"""
This function performs replacements on the given text using regex patterns.
"""
"""This function performs replacements on the given text using regex patterns."""
for item in self.compiled_patterns:
phrase.text = item[0].sub(item[1], phrase.text)
return phrase
def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
"""Apply regex replacements to the given addresses.
"""
Return the final Phrase list.
Returns an empty list if there is nothing left after split_phrase.
"""
result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
return result if result else []
return result
def create(config: QueryConfig) -> QueryProcessingFunc:
""" Create a function for generic preprocessing.
"""
""" Create a function for generic preprocessing."""
return _GenericPreprocessing(config)

View File

@@ -46,6 +46,4 @@ def test_split_phrases(inp, outp):
query = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in inp]
out = run_preprocessor_on(query)
expected_out = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]
assert out == expected_out, f"Expected {expected_out}, but got {out}"
assert out == [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]