From 2d0a1ef2b802a4a466f27133629182b784029f9f Mon Sep 17 00:00:00 2001 From: hdwalters Date: Wed, 11 Sep 2024 20:28:56 +0100 Subject: [PATCH] Optionally use sed extended regex syntax (#453) * Optionally use sed extended regex syntax. * Update src/std/text.ab Co-authored-by: Phoenix Himself * Add comment stating that sed extended regex may fail on some older Linux variants. * Change regex used to detect GNU sed. * Remove duplicate unsafe from stdlib function. --------- Co-authored-by: Phoenix Himself --- src/std/text.ab | 19 +++++++++++++++---- src/tests/stdlib/replace_regex.ab | 8 -------- src/tests/stdlib/replace_regex_basic.ab | 8 ++++++++ src/tests/stdlib/replace_regex_ext.ab | 10 ++++++++++ 4 files changed, 33 insertions(+), 12 deletions(-) delete mode 100644 src/tests/stdlib/replace_regex.ab create mode 100644 src/tests/stdlib/replace_regex_basic.ab create mode 100644 src/tests/stdlib/replace_regex_ext.ab diff --git a/src/std/text.ab b/src/std/text.ab index 681b45dec..7a96bc120 100644 --- a/src/std/text.ab +++ b/src/std/text.ab @@ -11,11 +11,22 @@ pub fun replace(source, pattern, replacement) { /// Replaces all occurences of a regex pattern in the content with the provided replacement text. /// /// Function uses `sed` -pub fun replace_regex(source: Text, pattern: Text, replacement: Text): Text { - return unsafe $echo "{source}" | sed -e "s/{pattern}/{replacement}/g"$ +pub fun replace_regex(source: Text, pattern: Text, replacement: Text, extended: Bool = false): Text { + unsafe { + if extended { + // GNU sed versions 4.0 through 4.2 support extended regex syntax, + // but only via the "-r" option; use that if the version information + // contains "GNU sed". + $re='\bCopyright\b.+\bFree Software Foundation\b'; [[ \$(sed --version 2>/dev/null) =~ \$re ]]$ + let flag = status == 0 then "-r" else "-E" + return $echo "{source}" | sed {flag} -e "s/{pattern}/{replacement}/g"$ + } else { + return $echo "{source}" | sed -e "s/{pattern}/{replacement}/g"$ + } + } } -/// Splits the input `text` into an array of substrings using the specified `delimiter`. +/// Splits the input `text` into an array of substrings using the specified `delimiter`. pub fun split(text: Text, delimiter: Text): [Text] { let result = [Text] unsafe $IFS="{delimiter}" read -rd '' -a {nameof result} < <(printf %s "\${nameof text}")$ @@ -29,7 +40,7 @@ pub fun lines(text: Text): [Text] { /// Splits a `text` into an array of substrings based on space character. pub fun words(text: Text): [Text] { - return split(text, " ") + return split(text, " ") } /// Merges text using the delimeter specified. diff --git a/src/tests/stdlib/replace_regex.ab b/src/tests/stdlib/replace_regex.ab deleted file mode 100644 index ef3068c28..000000000 --- a/src/tests/stdlib/replace_regex.ab +++ /dev/null @@ -1,8 +0,0 @@ -import * from "std/text" - -// Output -// abc456def - -main { - echo replace_regex("abc123def", "[0-9][0-9]*", "456") -} diff --git a/src/tests/stdlib/replace_regex_basic.ab b/src/tests/stdlib/replace_regex_basic.ab new file mode 100644 index 000000000..7569fb0e4 --- /dev/null +++ b/src/tests/stdlib/replace_regex_basic.ab @@ -0,0 +1,8 @@ +import * from "std/text" + +// Output +// abc[123]def + +main { + echo replace_regex("abc123def", "\([0-9][0-9]*\)", "[\1]") +} diff --git a/src/tests/stdlib/replace_regex_ext.ab b/src/tests/stdlib/replace_regex_ext.ab new file mode 100644 index 000000000..50c3c88e5 --- /dev/null +++ b/src/tests/stdlib/replace_regex_ext.ab @@ -0,0 +1,10 @@ +import * from "std/text" + +// Output +// abc[123]def + +main { + // This will fail on any system where sed does not support extended + // regex syntax, via "-r" on GNU sed and "-E" on all other versions. + echo replace_regex("abc123def", "([0-9]+)", "[\1]", true) +}