Skip to content

Commit

Permalink
Implement RegExp.escape
Browse files Browse the repository at this point in the history
  • Loading branch information
jirkamarsik committed Dec 4, 2024
1 parent c9b289a commit aaf0f82
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ See [release calendar](https://www.graalvm.org/release-calendar/) for release da
* Made option `js.locale` stable and allowed in `SandboxPolicy.UNTRUSTED`. Its value, if non-empty, must be a well-formed Unicode BCP 47 locale identifier and is now validated.
* Added an experimental `java.util.concurrent.Executor` that can be used to post tasks into the event loop thread in `graal-nodejs`. It is available as `require('node:graal').eventLoopExecutor`.
* Implemented the `TextDecoder` and `TextEncoder` APIs of the [WHATWG Encoding Standard](https://encoding.spec.whatwg.org/). They are available behind the experimental option (`--js.text-encoding`).
* Implemented the [`RegExp.escape`](https://github.com/tc39/proposal-regex-escaping) proposal. It is available in ECMAScript staging mode (`--js.ecmascript-version=staging`).

## Version 24.1.0
* ECMAScript 2024 mode/features enabled by default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ public class Test262Runnable extends TestRunnable {
"Reflect.construct",
"Reflect.set",
"Reflect.setPrototypeOf",
"RegExp.escape",
"Set",
"ShadowRealm",
"SharedArrayBuffer",
Expand Down Expand Up @@ -279,7 +280,6 @@ public class Test262Runnable extends TestRunnable {
"Intl.DurationFormat",
"IsHTMLDDA",
"Math.sumPrecise",
"RegExp.escape",
"explicit-resource-management",
"regexp-modifiers",
"tail-call-optimization",
Expand All @@ -291,6 +291,7 @@ public class Test262Runnable extends TestRunnable {
"FinalizationRegistry.prototype.cleanupSome",
"Float16Array",
"Intl.Locale-info",
"RegExp.escape",
"ShadowRealm",
"decorators",
"json-parse-with-source",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Copyright (c) 2024, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.js.builtins;

import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.api.strings.TruffleStringBuilder;
import com.oracle.truffle.api.strings.TruffleStringBuilderUTF16;
import com.oracle.truffle.js.nodes.function.JSBuiltin;
import com.oracle.truffle.js.nodes.function.JSBuiltinNode;
import com.oracle.truffle.js.runtime.Boundaries;
import com.oracle.truffle.js.runtime.Errors;
import com.oracle.truffle.js.runtime.JSConfig;
import com.oracle.truffle.js.runtime.JSContext;
import com.oracle.truffle.js.runtime.JSRuntime;
import com.oracle.truffle.js.runtime.Strings;
import com.oracle.truffle.js.runtime.builtins.BuiltinEnum;
import com.oracle.truffle.js.runtime.builtins.JSRegExp;
import com.oracle.truffle.js.runtime.util.StringBuilderProfile;

public class RegExpFunctionBuiltins extends JSBuiltinsContainer.SwitchEnum<RegExpFunctionBuiltins.RegExpFunction> {

public static final RegExpFunctionBuiltins BUILTINS = new RegExpFunctionBuiltins();

protected RegExpFunctionBuiltins() {
super(JSRegExp.CLASS_NAME, RegExpFunction.class);
}

public enum RegExpFunction implements BuiltinEnum<RegExpFunction> {
escape;

@Override
public int getLength() {
return 1;
}

@Override
public int getECMAScriptVersion() {
return JSConfig.StagingECMAScriptVersion;
}

@Override
public Object createNode(JSContext context, JSBuiltin builtin, boolean construct, boolean newTarget) {
return RegExpFunctionBuiltinsFactory.JSRegExpEscapeNodeGen.create(context, builtin, args().fixedArgs(1).createArgumentNodes(context));
}
}

abstract static class JSRegExpEscapeNode extends JSBuiltinNode {

JSRegExpEscapeNode(JSContext context, JSBuiltin builtin) {
super(context, builtin);
}

@Specialization
TruffleString escapeString(TruffleString input,
@Cached(parameters = "getContext().getStringLengthLimit()") StringBuilderProfile builderProfile,
@Cached TruffleString.ByteLengthOfCodePointNode lengthOfCodePointNode,
@Cached TruffleString.CodePointAtByteIndexNode codePointAtNode,
@Cached TruffleString.ReadCharUTF16Node readCharNode,
@Cached TruffleString.ByteIndexOfCodePointNode indexOfCodePointNode,
@Cached TruffleStringBuilder.AppendJavaStringUTF16Node appendJavaStringNode,
@Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode,
@Cached TruffleStringBuilder.ToStringNode toStringNode) {
int length = Strings.length(input);
int initialCapacity = Math.min(Math.max(length + 16, length + (length >> 1)), getContext().getStringLengthLimit());
TruffleStringBuilderUTF16 escaped = builderProfile.newStringBuilder(initialCapacity);
for (int index = 0; index < length; index += Strings.lengthOfCodePointAt(lengthOfCodePointNode, input, index)) {
int cp = Strings.codePointAt(codePointAtNode, input, index);
if (StringBuilderProfile.length(escaped) == 0 && ((cp >= '0' && cp <= '9') || (cp >= 'a' && cp <= 'z') || (cp >= 'A' && cp <= 'Z'))) {
builderProfile.append(appendJavaStringNode, escaped, "\\x");
builderProfile.append(appendJavaStringNode, escaped, Boundaries.integerToString(cp, 16));
} else if (Strings.indexOf(indexOfCodePointNode, Strings.REGEXP_SYNTAX_CHARS_WITH_SOLIDUS, cp) >= 0) {
// SyntaxCharacter or U+002F (SOLIDUS)
builderProfile.append(appendCodePointNode, escaped, '\\');
builderProfile.append(appendCodePointNode, escaped, cp);
} else if (cp == '\t') {
builderProfile.append(appendJavaStringNode, escaped, "\\t");
} else if (cp == '\n') {
builderProfile.append(appendJavaStringNode, escaped, "\\n");
} else if (cp == 0x0b) {
builderProfile.append(appendJavaStringNode, escaped, "\\v");
} else if (cp == '\f') {
builderProfile.append(appendJavaStringNode, escaped, "\\f");
} else if (cp == '\r') {
builderProfile.append(appendJavaStringNode, escaped, "\\r");
} else if (Strings.indexOf(indexOfCodePointNode, Strings.REGEXP_OTHER_PUNCTUATORS, cp) >= 0 || JSRuntime.isWhiteSpaceOrLineTerminator(cp) || (cp >= 0xd800 && cp <= 0xdfff)) {
if (cp <= 0xff) {
builderProfile.append(appendJavaStringNode, escaped, "\\x");
leftPad(builderProfile, appendCodePointNode, appendJavaStringNode, escaped, Boundaries.integerToString(cp, 16), 2);
} else {
int numCodeUnits = Strings.lengthOfCodePointAt(lengthOfCodePointNode, input, index);
for (int i = index; i < index + numCodeUnits; i++) {
char cu = Strings.charAt(readCharNode, input, i);
builderProfile.append(appendJavaStringNode, escaped, "\\u");
leftPad(builderProfile, appendCodePointNode, appendJavaStringNode, escaped, Boundaries.integerToString(cu, 16), 4);
}
}
} else {
builderProfile.append(appendCodePointNode, escaped, cp);
}
}
return StringBuilderProfile.toString(toStringNode, escaped);
}

private static void leftPad(StringBuilderProfile builderProfile, TruffleStringBuilder.AppendCodePointNode appendCodePointNode,
TruffleStringBuilder.AppendJavaStringUTF16Node appendJavaStringNode,
TruffleStringBuilderUTF16 sb, String str, int padSize) {
int padding = padSize - str.length();
if (padding > 0) {
builderProfile.repeat(appendCodePointNode, sb, '0', padding);
}
builderProfile.append(appendJavaStringNode, sb, str);
}

@Fallback
TruffleString escapeNotString(Object input) {
throw Errors.createTypeErrorNotAString(input);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ public static boolean characterIsUpperCase(char ch) {
return Character.isUpperCase(ch);
}

@TruffleBoundary
public static boolean characterIsSpaceChar(int cp) {
return Character.isSpaceChar(cp);
}

@TruffleBoundary
public static String integerToString(int i, int radix) {
return Integer.toString(i, radix);
}

@TruffleBoundary
public static boolean equals(Object a, Object b) {
return a.equals(b);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1726,7 +1726,7 @@ public static int lastNonWhitespaceIndex(TruffleString string, TruffleString.Rea
/**
* Union of WhiteSpace and LineTerminator (StrWhiteSpaceChar). Used by TrimString.
*/
public static boolean isWhiteSpaceOrLineTerminator(char cp) {
public static boolean isWhiteSpaceOrLineTerminator(int cp) {
return switch (cp) {
// @formatter:off
case 0x0009, 0x000B, 0x000C, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0xFEFF,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,10 @@ private Strings() {
public static final TruffleString TIME_ZONE = Strings.constant("timeZone");
public static final TruffleString TIME_ZONE_NAME = Strings.constant("timeZoneName");

/* RegExp.escape constants */
public static final TruffleString REGEXP_SYNTAX_CHARS_WITH_SOLIDUS = Strings.constant("^$\\.*+?()[]{}|/");
public static final TruffleString REGEXP_OTHER_PUNCTUATORS = Strings.constant(",-=<>#&!%:;@~'`\"");

/* end of constants */

public static boolean isTString(Object string) {
Expand Down Expand Up @@ -562,6 +566,10 @@ public static int codePointAt(TruffleString.CodePointAtByteIndexNode node, Truff
return node.execute(s, i << 1, TruffleString.Encoding.UTF_16);
}

public static int lengthOfCodePointAt(TruffleString.ByteLengthOfCodePointNode node, TruffleString s, int i) {
return node.execute(s, i << 1, TruffleString.Encoding.UTF_16) >> 1;
}

public static TruffleString concat(TruffleString s1, TruffleString s2) {
return concat(TruffleString.ConcatNode.getUncached(), s1, s2);
}
Expand Down Expand Up @@ -938,6 +946,14 @@ public static void builderAppend(TruffleStringBuilder.AppendCharUTF16Node node,
node.execute(sb, chr);
}

public static void builderAppend(TruffleStringBuilder.AppendCodePointNode node, TruffleStringBuilderUTF16 sb, int cp) {
node.execute(sb, cp);
}

public static void builderAppend(TruffleStringBuilder.AppendCodePointNode node, TruffleStringBuilderUTF16 sb, int cp, int repeat) {
node.execute(sb, cp, repeat);
}

public static void builderAppend(TruffleStringBuilderUTF16 sb, int i) {
TruffleStringBuilder.AppendIntNumberNode.getUncached().execute(sb, i);
}
Expand All @@ -958,6 +974,10 @@ public static void builderAppend(TruffleStringBuilderUTF16 sb, String str) {
TruffleStringBuilder.AppendJavaStringUTF16Node.getUncached().execute(sb, str, 0, str.length());
}

public static void builderAppend(TruffleStringBuilder.AppendJavaStringUTF16Node node, TruffleStringBuilderUTF16 sb, String str) {
node.execute(sb, str);
}

public static void builderAppend(TruffleStringBuilderUTF16 sb, TruffleString str) {
builderAppendLen(sb, str, 0, length(str));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import com.oracle.truffle.api.object.Shape;
import com.oracle.truffle.api.source.Source;
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.js.builtins.RegExpFunctionBuiltins;
import com.oracle.truffle.js.builtins.RegExpPrototypeBuiltins;
import com.oracle.truffle.js.lang.JavaScriptLanguage;
import com.oracle.truffle.js.runtime.JSConfig;
Expand All @@ -77,7 +78,7 @@
import com.oracle.truffle.js.runtime.util.TRegexUtil.InvokeGetGroupBoundariesMethodNode;
import com.oracle.truffle.js.runtime.util.TRegexUtil.TRegexMaterializeResult;

public final class JSRegExp extends JSNonProxy implements JSConstructorFactory.Default, PrototypeSupplier {
public final class JSRegExp extends JSNonProxy implements JSConstructorFactory.Default.WithFunctions, PrototypeSupplier {

static final TruffleString BRACKET_REG_EXP_SPC = Strings.constant("[RegExp ");

Expand Down Expand Up @@ -389,7 +390,7 @@ public void fillConstructor(JSRealm realm, JSDynamicObject constructor) {
}

public static JSConstructor createConstructor(JSRealm realm) {
return INSTANCE.createConstructorAndPrototype(realm);
return INSTANCE.createConstructorAndPrototype(realm, RegExpFunctionBuiltins.BUILTINS);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ public static TruffleString toString(TruffleStringBuilder.ToStringNode node, Tru
return Strings.builderToString(node, builder);
}

public void append(TruffleStringBuilder.AppendJavaStringUTF16Node node, TruffleStringBuilderUTF16 builder, String str) {
if ((Strings.builderLength(builder) + str.length()) > stringLengthLimit) {
errorBranch.enter();
throw Errors.createRangeErrorInvalidStringLength();
}
Strings.builderAppend(node, builder, str);
}

public void append(TruffleStringBuilder.AppendStringNode node, TruffleStringBuilderUTF16 builder, TruffleString str) {
if ((Strings.builderLength(builder) + Strings.length(str)) > stringLengthLimit) {
errorBranch.enter();
Expand All @@ -114,7 +122,7 @@ public void repeat(TruffleStringBuilder.AppendCodePointNode node, TruffleStringB
errorBranch.enter();
throw Errors.createRangeErrorInvalidStringLength();
}
node.execute(builder, codePoint, repeat);
Strings.builderAppend(node, builder, codePoint, repeat);
}

public void append(TruffleStringBuilder.AppendIntNumberNode node, TruffleStringBuilderUTF16 builder, int intValue) {
Expand Down

0 comments on commit aaf0f82

Please sign in to comment.