Add generic preprocessor

This commit is contained in:
TuringVerified
2025-03-13 20:01:21 +05:30
parent 32728d6c89
commit 4665ea3e77
2 changed files with 54 additions and 0 deletions

View File

@@ -1,5 +1,13 @@
query-preprocessing:
- step: split_japanese_phrases
- step: regex_replace
replacements:
- pattern: \b(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}\b # Filter for IPv4 addresses
replace: ''
- pattern: \b(?:(?:[A-Fa-f0-9]{1,4}:){1,7}|:)(?:[A-Fa-f0-9]{1,4})?\b # Filter for IPv6 addresses
replace: ''
- pattern: https?://[^\s]* # Filter URLs starting with http or https
replace: ''
- step: normalize
normalization:
- ":: lower ()"