From aaf0f82c36976de18e3a71089ac5d92aa3e2772c Mon Sep 17 00:00:00 2001 From: Jirka Marsik Date: Tue, 3 Dec 2024 01:01:43 +0100 Subject: [PATCH] Implement RegExp.escape --- CHANGELOG.md | 1 + .../external/test262/Test262Runnable.java | 3 +- .../js/builtins/RegExpFunctionBuiltins.java | 160 ++++++++++++++++++ .../oracle/truffle/js/runtime/Boundaries.java | 10 ++ .../oracle/truffle/js/runtime/JSRuntime.java | 2 +- .../oracle/truffle/js/runtime/Strings.java | 20 +++ .../truffle/js/runtime/builtins/JSRegExp.java | 5 +- .../js/runtime/util/StringBuilderProfile.java | 10 +- 8 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/builtins/RegExpFunctionBuiltins.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 45aa736d614..ca1ac66127b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ See [release calendar](https://www.graalvm.org/release-calendar/) for release da * Made option `js.locale` stable and allowed in `SandboxPolicy.UNTRUSTED`. Its value, if non-empty, must be a well-formed Unicode BCP 47 locale identifier and is now validated. * Added an experimental `java.util.concurrent.Executor` that can be used to post tasks into the event loop thread in `graal-nodejs`. It is available as `require('node:graal').eventLoopExecutor`. * Implemented the `TextDecoder` and `TextEncoder` APIs of the [WHATWG Encoding Standard](https://encoding.spec.whatwg.org/). They are available behind the experimental option (`--js.text-encoding`). +* Implemented the [`RegExp.escape`](https://github.com/tc39/proposal-regex-escaping) proposal. It is available in ECMAScript staging mode (`--js.ecmascript-version=staging`). ## Version 24.1.0 * ECMAScript 2024 mode/features enabled by default. diff --git a/graal-js/src/com.oracle.truffle.js.test.external/src/com/oracle/truffle/js/test/external/test262/Test262Runnable.java b/graal-js/src/com.oracle.truffle.js.test.external/src/com/oracle/truffle/js/test/external/test262/Test262Runnable.java index d9ce2669512..c3f0f252353 100644 --- a/graal-js/src/com.oracle.truffle.js.test.external/src/com/oracle/truffle/js/test/external/test262/Test262Runnable.java +++ b/graal-js/src/com.oracle.truffle.js.test.external/src/com/oracle/truffle/js/test/external/test262/Test262Runnable.java @@ -154,6 +154,7 @@ public class Test262Runnable extends TestRunnable { "Reflect.construct", "Reflect.set", "Reflect.setPrototypeOf", + "RegExp.escape", "Set", "ShadowRealm", "SharedArrayBuffer", @@ -279,7 +280,6 @@ public class Test262Runnable extends TestRunnable { "Intl.DurationFormat", "IsHTMLDDA", "Math.sumPrecise", - "RegExp.escape", "explicit-resource-management", "regexp-modifiers", "tail-call-optimization", @@ -291,6 +291,7 @@ public class Test262Runnable extends TestRunnable { "FinalizationRegistry.prototype.cleanupSome", "Float16Array", "Intl.Locale-info", + "RegExp.escape", "ShadowRealm", "decorators", "json-parse-with-source", diff --git a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/builtins/RegExpFunctionBuiltins.java b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/builtins/RegExpFunctionBuiltins.java new file mode 100644 index 00000000000..99cb3d4711e --- /dev/null +++ b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/builtins/RegExpFunctionBuiltins.java @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2024, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.js.builtins; + +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.dsl.Fallback; +import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF16; +import com.oracle.truffle.js.nodes.function.JSBuiltin; +import com.oracle.truffle.js.nodes.function.JSBuiltinNode; +import com.oracle.truffle.js.runtime.Boundaries; +import com.oracle.truffle.js.runtime.Errors; +import com.oracle.truffle.js.runtime.JSConfig; +import com.oracle.truffle.js.runtime.JSContext; +import com.oracle.truffle.js.runtime.JSRuntime; +import com.oracle.truffle.js.runtime.Strings; +import com.oracle.truffle.js.runtime.builtins.BuiltinEnum; +import com.oracle.truffle.js.runtime.builtins.JSRegExp; +import com.oracle.truffle.js.runtime.util.StringBuilderProfile; + +public class RegExpFunctionBuiltins extends JSBuiltinsContainer.SwitchEnum { + + public static final RegExpFunctionBuiltins BUILTINS = new RegExpFunctionBuiltins(); + + protected RegExpFunctionBuiltins() { + super(JSRegExp.CLASS_NAME, RegExpFunction.class); + } + + public enum RegExpFunction implements BuiltinEnum { + escape; + + @Override + public int getLength() { + return 1; + } + + @Override + public int getECMAScriptVersion() { + return JSConfig.StagingECMAScriptVersion; + } + + @Override + public Object createNode(JSContext context, JSBuiltin builtin, boolean construct, boolean newTarget) { + return RegExpFunctionBuiltinsFactory.JSRegExpEscapeNodeGen.create(context, builtin, args().fixedArgs(1).createArgumentNodes(context)); + } + } + + abstract static class JSRegExpEscapeNode extends JSBuiltinNode { + + JSRegExpEscapeNode(JSContext context, JSBuiltin builtin) { + super(context, builtin); + } + + @Specialization + TruffleString escapeString(TruffleString input, + @Cached(parameters = "getContext().getStringLengthLimit()") StringBuilderProfile builderProfile, + @Cached TruffleString.ByteLengthOfCodePointNode lengthOfCodePointNode, + @Cached TruffleString.CodePointAtByteIndexNode codePointAtNode, + @Cached TruffleString.ReadCharUTF16Node readCharNode, + @Cached TruffleString.ByteIndexOfCodePointNode indexOfCodePointNode, + @Cached TruffleStringBuilder.AppendJavaStringUTF16Node appendJavaStringNode, + @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + @Cached TruffleStringBuilder.ToStringNode toStringNode) { + int length = Strings.length(input); + int initialCapacity = Math.min(Math.max(length + 16, length + (length >> 1)), getContext().getStringLengthLimit()); + TruffleStringBuilderUTF16 escaped = builderProfile.newStringBuilder(initialCapacity); + for (int index = 0; index < length; index += Strings.lengthOfCodePointAt(lengthOfCodePointNode, input, index)) { + int cp = Strings.codePointAt(codePointAtNode, input, index); + if (StringBuilderProfile.length(escaped) == 0 && ((cp >= '0' && cp <= '9') || (cp >= 'a' && cp <= 'z') || (cp >= 'A' && cp <= 'Z'))) { + builderProfile.append(appendJavaStringNode, escaped, "\\x"); + builderProfile.append(appendJavaStringNode, escaped, Boundaries.integerToString(cp, 16)); + } else if (Strings.indexOf(indexOfCodePointNode, Strings.REGEXP_SYNTAX_CHARS_WITH_SOLIDUS, cp) >= 0) { + // SyntaxCharacter or U+002F (SOLIDUS) + builderProfile.append(appendCodePointNode, escaped, '\\'); + builderProfile.append(appendCodePointNode, escaped, cp); + } else if (cp == '\t') { + builderProfile.append(appendJavaStringNode, escaped, "\\t"); + } else if (cp == '\n') { + builderProfile.append(appendJavaStringNode, escaped, "\\n"); + } else if (cp == 0x0b) { + builderProfile.append(appendJavaStringNode, escaped, "\\v"); + } else if (cp == '\f') { + builderProfile.append(appendJavaStringNode, escaped, "\\f"); + } else if (cp == '\r') { + builderProfile.append(appendJavaStringNode, escaped, "\\r"); + } else if (Strings.indexOf(indexOfCodePointNode, Strings.REGEXP_OTHER_PUNCTUATORS, cp) >= 0 || JSRuntime.isWhiteSpaceOrLineTerminator(cp) || (cp >= 0xd800 && cp <= 0xdfff)) { + if (cp <= 0xff) { + builderProfile.append(appendJavaStringNode, escaped, "\\x"); + leftPad(builderProfile, appendCodePointNode, appendJavaStringNode, escaped, Boundaries.integerToString(cp, 16), 2); + } else { + int numCodeUnits = Strings.lengthOfCodePointAt(lengthOfCodePointNode, input, index); + for (int i = index; i < index + numCodeUnits; i++) { + char cu = Strings.charAt(readCharNode, input, i); + builderProfile.append(appendJavaStringNode, escaped, "\\u"); + leftPad(builderProfile, appendCodePointNode, appendJavaStringNode, escaped, Boundaries.integerToString(cu, 16), 4); + } + } + } else { + builderProfile.append(appendCodePointNode, escaped, cp); + } + } + return StringBuilderProfile.toString(toStringNode, escaped); + } + + private static void leftPad(StringBuilderProfile builderProfile, TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendJavaStringUTF16Node appendJavaStringNode, + TruffleStringBuilderUTF16 sb, String str, int padSize) { + int padding = padSize - str.length(); + if (padding > 0) { + builderProfile.repeat(appendCodePointNode, sb, '0', padding); + } + builderProfile.append(appendJavaStringNode, sb, str); + } + + @Fallback + TruffleString escapeNotString(Object input) { + throw Errors.createTypeErrorNotAString(input); + } + } +} diff --git a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Boundaries.java b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Boundaries.java index 76dd93680bf..5e8cee0f254 100644 --- a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Boundaries.java +++ b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Boundaries.java @@ -73,6 +73,16 @@ public static boolean characterIsUpperCase(char ch) { return Character.isUpperCase(ch); } + @TruffleBoundary + public static boolean characterIsSpaceChar(int cp) { + return Character.isSpaceChar(cp); + } + + @TruffleBoundary + public static String integerToString(int i, int radix) { + return Integer.toString(i, radix); + } + @TruffleBoundary public static boolean equals(Object a, Object b) { return a.equals(b); diff --git a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/JSRuntime.java b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/JSRuntime.java index 2945b1b87a3..4b0af7c9cf5 100644 --- a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/JSRuntime.java +++ b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/JSRuntime.java @@ -1726,7 +1726,7 @@ public static int lastNonWhitespaceIndex(TruffleString string, TruffleString.Rea /** * Union of WhiteSpace and LineTerminator (StrWhiteSpaceChar). Used by TrimString. */ - public static boolean isWhiteSpaceOrLineTerminator(char cp) { + public static boolean isWhiteSpaceOrLineTerminator(int cp) { return switch (cp) { // @formatter:off case 0x0009, 0x000B, 0x000C, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0xFEFF, diff --git a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Strings.java b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Strings.java index a8a7b1efc41..8b60b802178 100644 --- a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Strings.java +++ b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/Strings.java @@ -506,6 +506,10 @@ private Strings() { public static final TruffleString TIME_ZONE = Strings.constant("timeZone"); public static final TruffleString TIME_ZONE_NAME = Strings.constant("timeZoneName"); + /* RegExp.escape constants */ + public static final TruffleString REGEXP_SYNTAX_CHARS_WITH_SOLIDUS = Strings.constant("^$\\.*+?()[]{}|/"); + public static final TruffleString REGEXP_OTHER_PUNCTUATORS = Strings.constant(",-=<>#&!%:;@~'`\""); + /* end of constants */ public static boolean isTString(Object string) { @@ -562,6 +566,10 @@ public static int codePointAt(TruffleString.CodePointAtByteIndexNode node, Truff return node.execute(s, i << 1, TruffleString.Encoding.UTF_16); } + public static int lengthOfCodePointAt(TruffleString.ByteLengthOfCodePointNode node, TruffleString s, int i) { + return node.execute(s, i << 1, TruffleString.Encoding.UTF_16) >> 1; + } + public static TruffleString concat(TruffleString s1, TruffleString s2) { return concat(TruffleString.ConcatNode.getUncached(), s1, s2); } @@ -938,6 +946,14 @@ public static void builderAppend(TruffleStringBuilder.AppendCharUTF16Node node, node.execute(sb, chr); } + public static void builderAppend(TruffleStringBuilder.AppendCodePointNode node, TruffleStringBuilderUTF16 sb, int cp) { + node.execute(sb, cp); + } + + public static void builderAppend(TruffleStringBuilder.AppendCodePointNode node, TruffleStringBuilderUTF16 sb, int cp, int repeat) { + node.execute(sb, cp, repeat); + } + public static void builderAppend(TruffleStringBuilderUTF16 sb, int i) { TruffleStringBuilder.AppendIntNumberNode.getUncached().execute(sb, i); } @@ -958,6 +974,10 @@ public static void builderAppend(TruffleStringBuilderUTF16 sb, String str) { TruffleStringBuilder.AppendJavaStringUTF16Node.getUncached().execute(sb, str, 0, str.length()); } + public static void builderAppend(TruffleStringBuilder.AppendJavaStringUTF16Node node, TruffleStringBuilderUTF16 sb, String str) { + node.execute(sb, str); + } + public static void builderAppend(TruffleStringBuilderUTF16 sb, TruffleString str) { builderAppendLen(sb, str, 0, length(str)); } diff --git a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/builtins/JSRegExp.java b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/builtins/JSRegExp.java index ee17f44a5b3..ca3dedd6b71 100644 --- a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/builtins/JSRegExp.java +++ b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/builtins/JSRegExp.java @@ -55,6 +55,7 @@ import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.js.builtins.RegExpFunctionBuiltins; import com.oracle.truffle.js.builtins.RegExpPrototypeBuiltins; import com.oracle.truffle.js.lang.JavaScriptLanguage; import com.oracle.truffle.js.runtime.JSConfig; @@ -77,7 +78,7 @@ import com.oracle.truffle.js.runtime.util.TRegexUtil.InvokeGetGroupBoundariesMethodNode; import com.oracle.truffle.js.runtime.util.TRegexUtil.TRegexMaterializeResult; -public final class JSRegExp extends JSNonProxy implements JSConstructorFactory.Default, PrototypeSupplier { +public final class JSRegExp extends JSNonProxy implements JSConstructorFactory.Default.WithFunctions, PrototypeSupplier { static final TruffleString BRACKET_REG_EXP_SPC = Strings.constant("[RegExp "); @@ -389,7 +390,7 @@ public void fillConstructor(JSRealm realm, JSDynamicObject constructor) { } public static JSConstructor createConstructor(JSRealm realm) { - return INSTANCE.createConstructorAndPrototype(realm); + return INSTANCE.createConstructorAndPrototype(realm, RegExpFunctionBuiltins.BUILTINS); } @Override diff --git a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/util/StringBuilderProfile.java b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/util/StringBuilderProfile.java index 7194d9a8db0..44ffb717fa6 100644 --- a/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/util/StringBuilderProfile.java +++ b/graal-js/src/com.oracle.truffle.js/src/com/oracle/truffle/js/runtime/util/StringBuilderProfile.java @@ -88,6 +88,14 @@ public static TruffleString toString(TruffleStringBuilder.ToStringNode node, Tru return Strings.builderToString(node, builder); } + public void append(TruffleStringBuilder.AppendJavaStringUTF16Node node, TruffleStringBuilderUTF16 builder, String str) { + if ((Strings.builderLength(builder) + str.length()) > stringLengthLimit) { + errorBranch.enter(); + throw Errors.createRangeErrorInvalidStringLength(); + } + Strings.builderAppend(node, builder, str); + } + public void append(TruffleStringBuilder.AppendStringNode node, TruffleStringBuilderUTF16 builder, TruffleString str) { if ((Strings.builderLength(builder) + Strings.length(str)) > stringLengthLimit) { errorBranch.enter(); @@ -114,7 +122,7 @@ public void repeat(TruffleStringBuilder.AppendCodePointNode node, TruffleStringB errorBranch.enter(); throw Errors.createRangeErrorInvalidStringLength(); } - node.execute(builder, codePoint, repeat); + Strings.builderAppend(node, builder, codePoint, repeat); } public void append(TruffleStringBuilder.AppendIntNumberNode node, TruffleStringBuilderUTF16 builder, int intValue) {