From bc8e7ec972d3fcff6e8b209b4f1f0b186297f9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurent=20Qu=C3=A9rel?= Date: Thu, 19 Sep 2024 07:58:14 -0700 Subject: [PATCH] feat(forge): Add `regex_replace` filter to support replacing text using regex. (#380) --- CHANGELOG.md | 1 + crates/weaver_forge/README.md | 3 ++ crates/weaver_forge/src/extensions/util.rs | 50 ++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76a40371..64912d62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ What's changed * Add `enforce_trailing_dots` into the `comment_formats` configuration. ([#XXX](...) by lquerel). * Add support for `indent_type` in both the comment filter and the `comment_formats` configuration. ([#XXX](...) by lquerel). +* Add `regex_replace` filter to support replacing text using regex. ([#XXX](...) by lquerel). ## [0.9.2] - 2024-09-09 diff --git a/crates/weaver_forge/README.md b/crates/weaver_forge/README.md index fab37a18..88d75acb 100644 --- a/crates/weaver_forge/README.md +++ b/crates/weaver_forge/README.md @@ -486,6 +486,9 @@ The following filters are available: - `acronym`: Replaces acronyms in the input string with the full name defined in the `acronyms` section of the `weaver.yaml` configuration file. - `split_id`: Splits a string by '.' creating a list of nested ids. +- `regex_replace`: Replace all occurrences of a regex pattern (1st parameter) in the input string with the replacement + string (2nd parameter). Under the hood, this filter uses the `regex` crate (see + [regex](https://docs.rs/regex/latest/regex/index.html#traits) for more details) - `comment_with_prefix(prefix)`: Outputs a multiline comment with the given prefix. This filter is deprecated, please use the more general `comment` filter. - `comment`: A generic comment formatter that uses the `comment_formats` section of the `weaver.yaml` configuration file (more details [here](#comment-filter)). - `flatten`: Converts a List of Lists into a single list with all elements. diff --git a/crates/weaver_forge/src/extensions/util.rs b/crates/weaver_forge/src/extensions/util.rs index 7110d96c..ffab931a 100644 --- a/crates/weaver_forge/src/extensions/util.rs +++ b/crates/weaver_forge/src/extensions/util.rs @@ -6,6 +6,7 @@ use crate::config::WeaverConfig; use minijinja::value::Rest; use minijinja::{Environment, ErrorKind, Value}; use regex::Regex; +use std::borrow::Cow; use std::collections::HashMap; use std::sync::OnceLock; @@ -17,6 +18,7 @@ pub(crate) fn add_filters(env: &mut Environment<'_>, target_config: &WeaverConfi ); env.add_filter("flatten", flatten); env.add_filter("split_id", split_id); + env.add_filter("regex_replace", regex_replace); } /// Add utility functions to the environment. @@ -66,6 +68,24 @@ fn split_id(value: Value) -> Result, minijinja::Error> { } } +/// Replace all occurrences of a regex pattern (1st parameter) in the input string with the +/// replacement string (2nd parameter). +fn regex_replace( + input: Cow<'_, str>, + pattern: Cow<'_, str>, + replacement: Cow<'_, str>, +) -> Result { + let re = Regex::new(pattern.as_ref()).map_err(|e| { + minijinja::Error::new( + ErrorKind::InvalidOperation, + format!("Invalid regex pattern: {}", e), + ) + })?; + Ok(re + .replace_all(input.as_ref(), replacement.as_ref()) + .to_string()) +} + /// Create a filter that replaces acronyms in the input string with the full /// name defined in the `acronyms` list. /// @@ -110,3 +130,33 @@ pub fn acronym(acronyms: Vec) -> impl Fn(&str) -> String { .collect() } } + +#[cfg(test)] +mod tests { + use crate::extensions::util::add_filters; + use minijinja::Environment; + + #[test] + fn test_regex_replace() { + let mut env = Environment::new(); + let ctx = serde_json::Value::Null; + let config = crate::config::WeaverConfig::default(); + + add_filters(&mut env, &config); + + assert_eq!( + env.render_str("{{ 'Hello World!' | regex_replace('!','?') }}", &ctx) + .unwrap(), + "Hello World?" + ); + + assert_eq!( + env.render_str( + "{{ \"This a test with multiple a's\" | regex_replace('a','A') }}", + &ctx + ) + .unwrap(), + "This A test with multiple A's" + ); + } +}