From 22aaa350af0ab6dbcdf77bdf0f50e849bbec1cff Mon Sep 17 00:00:00 2001 From: Carlo Dapor Date: Sun, 26 May 2024 23:27:38 +0200 Subject: [PATCH 1/4] Support for replace with regexp This PR addresses issues #346 and #347. --- .../spt/data/jslt/impl/BuiltinFunctions.java | 27 +++++++++++++++++++ core/src/test/resources/function-tests.json | 10 +++++++ 2 files changed, 37 insertions(+) diff --git a/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java b/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java index b903cbe..a66a706 100644 --- a/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java +++ b/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java @@ -104,6 +104,7 @@ public class BuiltinFunctions { functions.put("from-json", new BuiltinFunctions.FromJson()); functions.put("to-json", new BuiltinFunctions.ToJson()); functions.put("replace", new BuiltinFunctions.Replace()); + functions.put("replace-regexp", new BuiltinFunctions.ReplaceRegexp()); functions.put("trim", new BuiltinFunctions.Trim()); functions.put("uuid", new BuiltinFunctions.Uuid()); @@ -961,6 +962,32 @@ else if (pos < string.length()) } } + // ===== REPLACE-REGEXP + + public static class ReplaceRegexp extends AbstractRegexpFunction { + + public ReplaceRegexp() { + super("replace-regexp", 3, 3); + } + + public JsonNode call(JsonNode input, JsonNode[] arguments) { + int args = null == arguments ? 0 : arguments.length; + if (args != 3) { + throw new JsltException("ReplaceRegexp requires 3 arguments, only " + args + " provided!"); + } + + String string = NodeUtils.toString(arguments[0], true); + if (string == null) + return NullNode.instance; + + String regexp = NodeUtils.toString(arguments[1], false); + String replacement = NodeUtils.toString(arguments[2], false); + String result = !string.matches(regexp) ? string : string.replaceAll(regexp, replacement); + + return new TextNode(result); + } + } + // ===== TRIM public static class Trim extends AbstractFunction { diff --git a/core/src/test/resources/function-tests.json b/core/src/test/resources/function-tests.json index bea7f7b..49f2827 100644 --- a/core/src/test/resources/function-tests.json +++ b/core/src/test/resources/function-tests.json @@ -1127,6 +1127,16 @@ "input" : "\"some text\"", "output": "\"\"" }, + { + "query": "replace-regexp(., \"([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])\", \"$2/$3/$1\")", + "input" : "\"2019-12-31\"", + "output": "\"12/31/2019\"" + }, + { + "query": "replace-regexp(., \"(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])\", \"${day}.${month}.${year}\")", + "input" : "\"2019-12-31\"", + "output": "\"31.12.2019\"" + }, { "query": "trim(.)", "input" : "\"some text\"", From 1eebe416219f0e1c8a692f3057b293de4e25cea3 Mon Sep 17 00:00:00 2001 From: Carlo Dapor Date: Sun, 26 May 2024 23:41:49 +0200 Subject: [PATCH 2/4] Support for replace with regexp This PR addresses issues #346 and #347. Added documentation. --- functions.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/functions.md b/functions.md index 3f3accc..eb38f1c 100644 --- a/functions.md +++ b/functions.md @@ -510,6 +510,24 @@ replace("abc def ghi", "[a-z]", "x") => "xxx xxx xxx" replace("abc def ghi", "[a-z]+", "x") => "x x x" ``` +### _replace-regexp(value, regexp, out) -> string_ + +Replaces the string in `value` that matches `regexp` with `out`. +If `value` is not a string, it's converted to a string, except +if it is `null`. `regexp` and `out` must be strings. + +It is an error for `regexp` ever to match an empty string. + +If the `regexp` does not match the input,`out` corresponds to `value`. + +Examples: + +``` +replace-regexp("2019-12-31", "([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])", "$2/$3/$1") => "12/31/2019" +replace-regexp("2019-12-31", "(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])", "$3.$2.$1") => "31.12.2019" +replace-regexp("2019-12-31", "([a-z]+)", "$1") => "2019-12-31" +``` + ### _trim(string) -> string_ Removes leading and trailing whitespace in the input string. If the From d84fa16a6be7f7c0cfcb377f0e5c1947946917e1 Mon Sep 17 00:00:00 2001 From: Carlo Dapor Date: Sun, 26 May 2024 23:43:59 +0200 Subject: [PATCH 3/4] Support for replace with regexp This PR addresses issues #346 and #347. Added documentation. --- functions.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/functions.md b/functions.md index eb38f1c..e41042a 100644 --- a/functions.md +++ b/functions.md @@ -523,9 +523,12 @@ If the `regexp` does not match the input,`out` corresponds to `value`. Examples: ``` -replace-regexp("2019-12-31", "([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])", "$2/$3/$1") => "12/31/2019" -replace-regexp("2019-12-31", "(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])", "$3.$2.$1") => "31.12.2019" -replace-regexp("2019-12-31", "([a-z]+)", "$1") => "2019-12-31" +replace-regexp("2019-12-31", "([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])", "$2/$3/$1") + => "12/31/2019" +replace-regexp("2019-12-31", "(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])", + => "${day}.${month}.${year}") => "31.12.2019" +replace-regexp("2019-12-31", "([a-z]+)", "$1") + => "2019-12-31" ``` ### _trim(string) -> string_ From 087a2df8e92efa1268e49e04ceb646dd239ad802 Mon Sep 17 00:00:00 2001 From: Carlo Dapor Date: Mon, 27 May 2024 00:27:11 +0200 Subject: [PATCH 4/4] Support for replace with regexp This PR addresses issues #346 and #347. Added support for Regexp pattern "Predefined character classes". --- .../com/schibsted/spt/data/jslt/parser/ParserImpl.java | 8 +++++++- core/src/test/resources/function-tests.json | 9 +++++++-- core/src/test/resources/json-parse-error-tests.json | 1 - functions.md | 6 +++--- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java b/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java index f5acc00..bc4ab16 100644 --- a/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java +++ b/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java @@ -503,7 +503,13 @@ private static String makeString(ParseContext ctx, Token literal) { result[pos++] = ch; else { ch = string.charAt(++ix); - + // special Regexp characters, s. https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html, + // "Predefined character classes". + if ("dDhHsSvVwW".contains(String.valueOf(ch))) { + result[pos++] = '\\'; + result[pos++] = ch; + continue; + } switch (ch) { case '\\': result[pos++] = ch; break; case '"': result[pos++] = ch; break; diff --git a/core/src/test/resources/function-tests.json b/core/src/test/resources/function-tests.json index 49f2827..3408c03 100644 --- a/core/src/test/resources/function-tests.json +++ b/core/src/test/resources/function-tests.json @@ -1128,15 +1128,20 @@ "output": "\"\"" }, { - "query": "replace-regexp(., \"([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])\", \"$2/$3/$1\")", + "query": "replace-regexp(., \"(\\d{4})-(\\d{2})-(\\d{2})\", \"$2/$3/$1\")", "input" : "\"2019-12-31\"", "output": "\"12/31/2019\"" }, { - "query": "replace-regexp(., \"(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])\", \"${day}.${month}.${year}\")", + "query": "replace-regexp(., \"(?\\d{4})-(?\\d{2})-(?\\d{2})\", \"${day}.${month}.${year}\")", "input" : "\"2019-12-31\"", "output": "\"31.12.2019\"" }, + { + "query": "replace-regexp(., \"([a-z]+)\", \"$1\")", + "input" : "\"2019-12-31\"", + "output" : "\"2019-12-31\"" + }, { "query": "trim(.)", "input" : "\"some text\"", diff --git a/core/src/test/resources/json-parse-error-tests.json b/core/src/test/resources/json-parse-error-tests.json index 9a98f99..487245f 100644 --- a/core/src/test/resources/json-parse-error-tests.json +++ b/core/src/test/resources/json-parse-error-tests.json @@ -1,7 +1,6 @@ { "description" : "Tests that should cause the JSLT parser to declare JSON syntax error.", "tests" : [ - "\" \\d \"", "\"\\u\"", "\"\\u0\"", "\"\\u00\"", diff --git a/functions.md b/functions.md index e41042a..8fbc219 100644 --- a/functions.md +++ b/functions.md @@ -523,10 +523,10 @@ If the `regexp` does not match the input,`out` corresponds to `value`. Examples: ``` -replace-regexp("2019-12-31", "([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])", "$2/$3/$1") +replace-regexp("2019-12-31", "(\\d{4})-(\\d{2})-(\\d{2})", "$2/$3/$1") => "12/31/2019" -replace-regexp("2019-12-31", "(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])", - => "${day}.${month}.${year}") => "31.12.2019" +replace-regexp("2019-12-31", "(?\\d{4})-(?\\d{2})-(?\\d{2})", "${day}.${month}.${year}") + => "31.12.2019" replace-regexp("2019-12-31", "([a-z]+)", "$1") => "2019-12-31" ```