Update documentation, optimise regex_replace, add tests

This commit is contained in:
TuringVerified
2025-03-20 20:00:34 +05:30
parent 4665ea3e77
commit 6d5a4a20c5
4 changed files with 79 additions and 19 deletions

View File

@@ -1,13 +1,5 @@
query-preprocessing:
- step: split_japanese_phrases
- step: regex_replace
replacements:
- pattern: \b(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}\b # Filter for IPv4 addresses
replace: ''
- pattern: \b(?:(?:[A-Fa-f0-9]{1,4}:){1,7}|:)(?:[A-Fa-f0-9]{1,4})?\b # Filter for IPv6 addresses
replace: ''
- pattern: https?://[^\s]* # Filter URLs starting with http or https
replace: ''
- step: normalize
normalization:
- ":: lower ()"