mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-25 18:48:15 +00:00
Remove unnecessary assert statement, Fix regex_replace docstring and simplify regex_replace
This commit is contained in:
@@ -67,8 +67,9 @@ Here is an example configuration file:
|
|||||||
|
|
||||||
``` yaml
|
``` yaml
|
||||||
query-preprocessing:
|
query-preprocessing:
|
||||||
|
- step: split_japanese_phrases
|
||||||
- step: regex_replace
|
- step: regex_replace
|
||||||
replacements:
|
replacements:
|
||||||
- pattern: https?://[^\s]* # Filter URLs starting with http or https
|
- pattern: https?://[^\s]* # Filter URLs starting with http or https
|
||||||
replace: ''
|
replace: ''
|
||||||
- step: normalize
|
- step: normalize
|
||||||
@@ -111,6 +112,8 @@ The following is a list of preprocessors that are shipped with Nominatim.
|
|||||||
heading_level: 6
|
heading_level: 6
|
||||||
docstring_section_style: spacy
|
docstring_section_style: spacy
|
||||||
|
|
||||||
|
##### regex-replace
|
||||||
|
|
||||||
::: nominatim_api.query_preprocessing.regex_replace
|
::: nominatim_api.query_preprocessing.regex_replace
|
||||||
options:
|
options:
|
||||||
members: False
|
members: False
|
||||||
|
|||||||
@@ -5,7 +5,11 @@
|
|||||||
# Copyright (C) 2025 by the Nominatim developer community.
|
# Copyright (C) 2025 by the Nominatim developer community.
|
||||||
# For a full list of authors see the git log.
|
# For a full list of authors see the git log.
|
||||||
"""
|
"""
|
||||||
This file replaces values based on pre-defined regex rules:
|
This preprocessor replaces values in a given input based on pre-defined regex rules.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
pattern: Regex pattern to be applied on the input
|
||||||
|
replace: The string that it is to be replaced with
|
||||||
"""
|
"""
|
||||||
from typing import List
|
from typing import List
|
||||||
import re
|
import re
|
||||||
@@ -16,8 +20,10 @@ from ..search.query import Phrase
|
|||||||
|
|
||||||
|
|
||||||
class _GenericPreprocessing:
|
class _GenericPreprocessing:
|
||||||
|
"""Perform replacements to input phrases using custom regex patterns."""
|
||||||
|
|
||||||
def __init__(self, config: QueryConfig) -> None:
|
def __init__(self, config: QueryConfig) -> None:
|
||||||
|
"""Initialise the _GenericPreprocessing class with patterns from the ICU config file."""
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
match_patterns = self.config.get('replacements', 'Key not found')
|
match_patterns = self.config.get('replacements', 'Key not found')
|
||||||
@@ -26,22 +32,21 @@ class _GenericPreprocessing:
|
|||||||
]
|
]
|
||||||
|
|
||||||
def split_phrase(self, phrase: Phrase) -> Phrase:
|
def split_phrase(self, phrase: Phrase) -> Phrase:
|
||||||
"""
|
"""This function performs replacements on the given text using regex patterns."""
|
||||||
This function performs replacements on the given text using regex patterns.
|
|
||||||
"""
|
|
||||||
for item in self.compiled_patterns:
|
for item in self.compiled_patterns:
|
||||||
phrase.text = item[0].sub(item[1], phrase.text)
|
phrase.text = item[0].sub(item[1], phrase.text)
|
||||||
|
|
||||||
return phrase
|
return phrase
|
||||||
|
|
||||||
def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
|
def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
|
||||||
"""Apply regex replacements to the given addresses.
|
"""
|
||||||
|
Return the final Phrase list.
|
||||||
|
Returns an empty list if there is nothing left after split_phrase.
|
||||||
"""
|
"""
|
||||||
result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
|
result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
|
||||||
return result if result else []
|
return result
|
||||||
|
|
||||||
|
|
||||||
def create(config: QueryConfig) -> QueryProcessingFunc:
|
def create(config: QueryConfig) -> QueryProcessingFunc:
|
||||||
""" Create a function for generic preprocessing.
|
""" Create a function for generic preprocessing."""
|
||||||
"""
|
|
||||||
return _GenericPreprocessing(config)
|
return _GenericPreprocessing(config)
|
||||||
|
|||||||
@@ -46,6 +46,4 @@ def test_split_phrases(inp, outp):
|
|||||||
query = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in inp]
|
query = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in inp]
|
||||||
|
|
||||||
out = run_preprocessor_on(query)
|
out = run_preprocessor_on(query)
|
||||||
expected_out = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]
|
assert out == [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]
|
||||||
|
|
||||||
assert out == expected_out, f"Expected {expected_out}, but got {out}"
|
|
||||||
|
|||||||
Reference in New Issue
Block a user