diff --git a/deps/v8/src/unicode-decoder.h b/deps/v8/src/unicode-decoder.h index 35d23a2ac7c085..c03084116603c5 100644 --- a/deps/v8/src/unicode-decoder.h +++ b/deps/v8/src/unicode-decoder.h @@ -7,11 +7,10 @@ #include #include "src/globals.h" -#include "src/utils.h" namespace unibrow { -class V8_EXPORT_PRIVATE Utf8DecoderBase { +class Utf8DecoderBase { public: // Initialization done in subclass. inline Utf8DecoderBase(); diff --git a/deps/v8/src/unicode.cc b/deps/v8/src/unicode.cc index 015f8a27f23d1b..fa4afc59965d68 100644 --- a/deps/v8/src/unicode.cc +++ b/deps/v8/src/unicode.cc @@ -228,52 +228,80 @@ static inline bool IsContinuationCharacter(byte chr) { // This method decodes an UTF-8 value according to RFC 3629. uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { size_t length = NonASCIISequenceLength(str[0]); - - // Check continuation characters. - size_t max_count = std::min(length, max_length); - size_t count = 1; - while (count < max_count && IsContinuationCharacter(str[count])) { - count++; + if (length == 0 || max_length < length) { + *cursor += 1; + return kBadChar; } - *cursor += count; - - // There must be enough continuation characters. - if (count != length) return kBadChar; - - // Check overly long sequences & other conditions. - if (length == 3) { - if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { - // Overlong three-byte sequence? + if (length == 2) { + if (!IsContinuationCharacter(str[1])) { + *cursor += 1; return kBadChar; - } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { - // High and low surrogate halves? + } + *cursor += 2; + return ((str[0] << 6) + str[1]) - 0x00003080; + } + if (length == 3) { + switch (str[0]) { + case 0xE0: + // Overlong three-byte sequence. + if (str[1] < 0xA0 || str[1] > 0xBF) { + *cursor += 1; + return kBadChar; + } + break; + case 0xED: + // High and low surrogate halves. + if (str[1] < 0x80 || str[1] > 0x9F) { + *cursor += 1; + return kBadChar; + } + break; + default: + if (!IsContinuationCharacter(str[1])) { + *cursor += 1; + return kBadChar; + } + } + if (!IsContinuationCharacter(str[2])) { + *cursor += 1; return kBadChar; } - } else if (length == 4) { - if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { + *cursor += 3; + return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; + } + DCHECK(length == 4); + switch (str[0]) { + case 0xF0: // Overlong four-byte sequence. - return kBadChar; - } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { + if (str[1] < 0x90 || str[1] > 0xBF) { + *cursor += 1; + return kBadChar; + } + break; + case 0xF4: // Code points outside of the unicode range. - return kBadChar; - } + if (str[1] < 0x80 || str[1] > 0x8F) { + *cursor += 1; + return kBadChar; + } + break; + default: + if (!IsContinuationCharacter(str[1])) { + *cursor += 1; + return kBadChar; + } } - - // All errors have been handled, so we only have to assemble the result. - switch (length) { - case 1: - return str[0]; - case 2: - return ((str[0] << 6) + str[1]) - 0x00003080; - case 3: - return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; - case 4: - return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - - 0x03C82080; + if (!IsContinuationCharacter(str[2])) { + *cursor += 1; + return kBadChar; } - - UNREACHABLE(); - return kBadChar; + if (!IsContinuationCharacter(str[3])) { + *cursor += 1; + return kBadChar; + } + *cursor += 4; + return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - + 0x03C82080; } uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { @@ -295,10 +323,9 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { // with one shift. uint8_t mask = 0x7f >> kind; - // Store the kind in the top nibble, and kind - 1 (i.e., remaining bytes) - // in 2nd nibble, and the value in the bottom three. The 2nd nibble is - // intended as a counter about how many bytes are still needed. - *buffer = kind << 28 | (kind - 1) << 24 | (next & mask); + // Store the kind - 1 (i.e., remaining bytes) in the top byte, value + // in the bottom three. + *buffer = (kind - 1) << 24 | (next & mask); return kIncomplete; } else { // No buffer, and not the start of a 1-byte char (handled at the @@ -327,19 +354,15 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { // We're inside of a character, as described by buffer. // How many bytes (excluding this one) do we still expect? - uint8_t bytes_expected = *buffer >> 28; - uint8_t bytes_left = (*buffer >> 24) & 0x0f; - bytes_left--; + uint8_t count = (*buffer >> 24) - 1; // Update the value. uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F); - if (bytes_left) { - *buffer = (bytes_expected << 28 | bytes_left << 24 | value); + if (count) { + *buffer = count << 24 | value; return kIncomplete; } else { *buffer = 0; - bool sequence_was_too_long = (bytes_expected == 2 && value < 0x80) || - (bytes_expected == 3 && value < 0x800); - return sequence_was_too_long ? kBadChar : value; + return value; } } else { // Within a character, but not a continuation character? Then the diff --git a/deps/v8/test/cctest/test-parsing.cc b/deps/v8/test/cctest/test-parsing.cc index 921cebcad621a4..3bce6ccb32cb98 100644 --- a/deps/v8/test/cctest/test-parsing.cc +++ b/deps/v8/test/cctest/test-parsing.cc @@ -684,26 +684,74 @@ TEST(RegExpScanning) { TestScanRegExp("/=?/", "=?"); } -static int Ucs2CharLength(unibrow::uchar c) { - if (c == unibrow::Utf8::kIncomplete || c == unibrow::Utf8::kBufferEmpty) { - return 0; - } else if (c < 0xffff) { - return 1; - } else { - return 2; - } -} static int Utf8LengthHelper(const char* s) { - unibrow::Utf8::Utf8IncrementalBuffer buffer(unibrow::Utf8::kBufferEmpty); - int length = 0; - for (; *s != '\0'; s++) { - unibrow::uchar tmp = unibrow::Utf8::ValueOfIncremental(*s, &buffer); - length += Ucs2CharLength(tmp); + int len = i::StrLength(s); + int character_length = len; + for (int i = 0; i < len; i++) { + unsigned char c = s[i]; + int input_offset = 0; + int output_adjust = 0; + if (c > 0x7f) { + if (c < 0xc0) continue; + if (c >= 0xf0) { + if (c >= 0xf8) { + // 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8 + // byte. + continue; // Handle first UTF-8 byte. + } + if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) { + // This 4 byte sequence could have been coded as a 3 byte sequence. + // Record a single kBadChar for the first byte and continue. + continue; + } + input_offset = 3; + // 4 bytes of UTF-8 turn into 2 UTF-16 code units. + character_length -= 2; + } else if (c >= 0xe0) { + if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) { + // This 3 byte sequence could have been coded as a 2 byte sequence. + // Record a single kBadChar for the first byte and continue. + continue; + } + if (c == 0xed) { + unsigned char d = s[i + 1]; + if ((d < 0x80) || (d > 0x9f)) { + // This 3 byte sequence is part of a surrogate pair which is not + // supported by UTF-8. Record a single kBadChar for the first byte + // and continue. + continue; + } + } + input_offset = 2; + // 3 bytes of UTF-8 turn into 1 UTF-16 code unit. + output_adjust = 2; + } else { + if ((c & 0x1e) == 0) { + // This 2 byte sequence could have been coded as a 1 byte sequence. + // Record a single kBadChar for the first byte and continue. + continue; + } + input_offset = 1; + // 2 bytes of UTF-8 turn into 1 UTF-16 code unit. + output_adjust = 1; + } + bool bad = false; + for (int j = 1; j <= input_offset; j++) { + if ((s[i + j] & 0xc0) != 0x80) { + // Bad UTF-8 sequence turns the first in the sequence into kBadChar, + // which is a single UTF-16 code unit. + bad = true; + break; + } + } + if (!bad) { + i += input_offset; + character_length -= output_adjust; + } + } } - unibrow::uchar tmp = unibrow::Utf8::ValueOfIncrementalFinish(&buffer); - length += Ucs2CharLength(tmp); - return length; + return character_length; } @@ -933,206 +981,169 @@ TEST(ScopePositions) { }; const SourceData source_data[] = { - {" with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY}, - {" with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY}, - {" with ({}) ", - "{\n" - " block;\n" - " }", - "\n" - " more;", - i::WITH_SCOPE, i::SLOPPY}, - {" with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY}, - {" with ({}) ", "statement", - "\n" - " more;", - i::WITH_SCOPE, i::SLOPPY}, - {" with ({})\n" - " ", - "statement;", - "\n" - " more;", - i::WITH_SCOPE, i::SLOPPY}, - {" try {} catch ", "(e) { block; }", " more;", i::CATCH_SCOPE, - i::SLOPPY}, - {" try {} catch ", "(e) { block; }", "; more;", i::CATCH_SCOPE, - i::SLOPPY}, - {" try {} catch ", - "(e) {\n" - " block;\n" - " }", - "\n" - " more;", - i::CATCH_SCOPE, i::SLOPPY}, - {" try {} catch ", "(e) { block; }", " finally { block; } more;", - i::CATCH_SCOPE, i::SLOPPY}, - {" start;\n" - " ", - "{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT}, - {" start;\n" - " ", - "{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT}, - {" start;\n" - " ", - "{\n" - " let block;\n" - " }", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" start;\n" - " function fun", - "(a,b) { infunction; }", " more;", i::FUNCTION_SCOPE, i::SLOPPY}, - {" start;\n" - " function fun", - "(a,b) {\n" - " infunction;\n" - " }", - "\n" - " more;", - i::FUNCTION_SCOPE, i::SLOPPY}, - {" start;\n", "(a,b) => a + b", "; more;", i::FUNCTION_SCOPE, i::SLOPPY}, - {" start;\n", "(a,b) => { return a+b; }", "\nmore;", i::FUNCTION_SCOPE, - i::SLOPPY}, - {" start;\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - {" for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", - "(let x = 1 ; x < 10; ++ x) {\n" - " block;\n" - " }", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", "(let x = 1 ; x < 10; ++ x) statement", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", - "(let x = 1 ; x < 10; ++ x)\n" - " statement;", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", "(let x in {}) { block; }", " more;", i::BLOCK_SCOPE, - i::STRICT}, - {" for ", "(let x in {}) { block; }", "; more;", i::BLOCK_SCOPE, - i::STRICT}, - {" for ", - "(let x in {}) {\n" - " block;\n" - " }", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", "(let x in {}) statement;", " more;", i::BLOCK_SCOPE, - i::STRICT}, - {" for ", "(let x in {}) statement", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - {" for ", - "(let x in {})\n" - " statement;", - "\n" - " more;", - i::BLOCK_SCOPE, i::STRICT}, - // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw - // the preparser off in terms of byte offsets. - // 2 surrogates, encode a character that doesn't need a surrogate. - {" 'foo\355\240\201\355\260\211';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // 4 byte encoding. - {" 'foo\360\220\220\212';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // 3 byte encoding of \u0fff. - {" 'foo\340\277\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // 3 byte surrogate, followed by broken 2-byte surrogate w/ impossible 2nd - // byte and last byte missing. - {" 'foo\355\240\201\355\211';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Broken 3 byte encoding of \u0fff with missing last byte. - {" 'foo\340\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Broken 3 byte encoding of \u0fff with missing 2 last bytes. - {" 'foo\340';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Broken 3 byte encoding of \u00ff should be a 2 byte encoding. - {" 'foo\340\203\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Broken 3 byte encoding of \u007f should be a 2 byte encoding. - {" 'foo\340\201\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Unpaired lead surrogate. - {" 'foo\355\240\201';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Unpaired lead surrogate where following code point is a 3 byte - // sequence. - {" 'foo\355\240\201\340\277\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Unpaired lead surrogate where following code point is a 4 byte encoding - // of a trail surrogate. - {" 'foo\355\240\201\360\215\260\211';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Unpaired trail surrogate. - {" 'foo\355\260\211';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // 2 byte encoding of \u00ff. - {" 'foo\303\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Broken 2 byte encoding of \u00ff with missing last byte. - {" 'foo\303';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Broken 2 byte encoding of \u007f should be a 1 byte encoding. - {" 'foo\301\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Illegal 5 byte encoding. - {" 'foo\370\277\277\277\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Illegal 6 byte encoding. - {" 'foo\374\277\277\277\277\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Illegal 0xfe byte - {" 'foo\376\277\277\277\277\277\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - // Illegal 0xff byte - {" 'foo\377\277\277\277\277\277\277\277';\n" - " (function fun", - "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, - {" 'foo';\n" - " (function fun", - "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();", i::FUNCTION_SCOPE, - i::SLOPPY}, - {" 'foo';\n" - " (function fun", - "(a,b) { 'bar\360\220\220\214'; }", ")();", i::FUNCTION_SCOPE, - i::SLOPPY}, - {NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY}}; + { " with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY }, + { " with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY }, + { " with ({}) ", "{\n" + " block;\n" + " }", "\n" + " more;", i::WITH_SCOPE, i::SLOPPY }, + { " with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY }, + { " with ({}) ", "statement", "\n" + " more;", i::WITH_SCOPE, i::SLOPPY }, + { " with ({})\n" + " ", "statement;", "\n" + " more;", i::WITH_SCOPE, i::SLOPPY }, + { " try {} catch ", "(e) { block; }", " more;", + i::CATCH_SCOPE, i::SLOPPY }, + { " try {} catch ", "(e) { block; }", "; more;", + i::CATCH_SCOPE, i::SLOPPY }, + { " try {} catch ", "(e) {\n" + " block;\n" + " }", "\n" + " more;", i::CATCH_SCOPE, i::SLOPPY }, + { " try {} catch ", "(e) { block; }", " finally { block; } more;", + i::CATCH_SCOPE, i::SLOPPY }, + { " start;\n" + " ", "{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT }, + { " start;\n" + " ", "{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT }, + { " start;\n" + " ", "{\n" + " let block;\n" + " }", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + { " start;\n" + " function fun", "(a,b) { infunction; }", " more;", + i::FUNCTION_SCOPE, i::SLOPPY }, + { " start;\n" + " function fun", "(a,b) {\n" + " infunction;\n" + " }", "\n" + " more;", i::FUNCTION_SCOPE, i::SLOPPY }, + { " start;\n", "(a,b) => a + b", "; more;", + i::FUNCTION_SCOPE, i::SLOPPY }, + { " start;\n", "(a,b) => { return a+b; }", "\nmore;", + i::FUNCTION_SCOPE, i::SLOPPY }, + { " start;\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + { " for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;", + i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;", + i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x = 1 ; x < 10; ++ x) {\n" + " block;\n" + " }", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;", + i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x = 1 ; x < 10; ++ x) statement", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x = 1 ; x < 10; ++ x)\n" + " statement;", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x in {}) { block; }", " more;", + i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x in {}) { block; }", "; more;", + i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x in {}) {\n" + " block;\n" + " }", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x in {}) statement;", " more;", + i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x in {}) statement", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + { " for ", "(let x in {})\n" + " statement;", "\n" + " more;", i::BLOCK_SCOPE, i::STRICT }, + // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw + // the preparser off in terms of byte offsets. + // 6 byte encoding. + { " 'foo\355\240\201\355\260\211';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // 4 byte encoding. + { " 'foo\360\220\220\212';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // 3 byte encoding of \u0fff. + { " 'foo\340\277\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 6 byte encoding with missing last byte. + { " 'foo\355\240\201\355\211';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 3 byte encoding of \u0fff with missing last byte. + { " 'foo\340\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 3 byte encoding of \u0fff with missing 2 last bytes. + { " 'foo\340';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 3 byte encoding of \u00ff should be a 2 byte encoding. + { " 'foo\340\203\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 3 byte encoding of \u007f should be a 2 byte encoding. + { " 'foo\340\201\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Unpaired lead surrogate. + { " 'foo\355\240\201';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Unpaired lead surrogate where following code point is a 3 byte sequence. + { " 'foo\355\240\201\340\277\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Unpaired lead surrogate where following code point is a 4 byte encoding + // of a trail surrogate. + { " 'foo\355\240\201\360\215\260\211';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Unpaired trail surrogate. + { " 'foo\355\260\211';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // 2 byte encoding of \u00ff. + { " 'foo\303\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 2 byte encoding of \u00ff with missing last byte. + { " 'foo\303';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Broken 2 byte encoding of \u007f should be a 1 byte encoding. + { " 'foo\301\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Illegal 5 byte encoding. + { " 'foo\370\277\277\277\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Illegal 6 byte encoding. + { " 'foo\374\277\277\277\277\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Illegal 0xfe byte + { " 'foo\376\277\277\277\277\277\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + // Illegal 0xff byte + { " 'foo\377\277\277\277\277\277\277\277';\n" + " (function fun", "(a,b) { infunction; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + { " 'foo';\n" + " (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + { " 'foo';\n" + " (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();", + i::FUNCTION_SCOPE, i::SLOPPY }, + { NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY } + }; i::Isolate* isolate = CcTest::i_isolate(); i::Factory* factory = isolate->factory(); diff --git a/deps/v8/test/unittests/BUILD.gn b/deps/v8/test/unittests/BUILD.gn index ebee73db757fc1..286b5319ee88a6 100644 --- a/deps/v8/test/unittests/BUILD.gn +++ b/deps/v8/test/unittests/BUILD.gn @@ -117,7 +117,6 @@ v8_executable("unittests") { "source-position-table-unittest.cc", "test-utils.cc", "test-utils.h", - "unicode-unittest.cc", "value-serializer-unittest.cc", "wasm/asm-types-unittest.cc", "wasm/ast-decoder-unittest.cc", diff --git a/deps/v8/test/unittests/unicode-unittest.cc b/deps/v8/test/unittests/unicode-unittest.cc deleted file mode 100644 index 67edfb7331705c..00000000000000 --- a/deps/v8/test/unittests/unicode-unittest.cc +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2016 the V8 project authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include -#include - -#include "src/unicode-decoder.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace v8 { -namespace internal { - -namespace { - -using Utf8Decoder = unibrow::Utf8Decoder<512>; - -void Decode(Utf8Decoder* decoder, const std::string& str) { - // Put the string in its own buffer on the heap to make sure that - // AddressSanitizer's heap-buffer-overflow logic can see what's going on. - std::unique_ptr buffer(new char[str.length()]); - memcpy(buffer.get(), str.data(), str.length()); - decoder->Reset(buffer.get(), str.length()); -} - -} // namespace - -TEST(UnicodeTest, ReadOffEndOfUtf8String) { - Utf8Decoder decoder; - - // Not enough continuation bytes before string ends. - Decode(&decoder, "\xE0"); - Decode(&decoder, "\xED"); - Decode(&decoder, "\xF0"); - Decode(&decoder, "\xF4"); -} - -} // namespace internal -} // namespace v8 diff --git a/deps/v8/test/unittests/unittests.gyp b/deps/v8/test/unittests/unittests.gyp index e2b9f26347e93e..6a7c3707405121 100644 --- a/deps/v8/test/unittests/unittests.gyp +++ b/deps/v8/test/unittests/unittests.gyp @@ -115,7 +115,6 @@ 'source-position-table-unittest.cc', 'test-utils.h', 'test-utils.cc', - 'unicode-unittest.cc', 'value-serializer-unittest.cc', 'wasm/asm-types-unittest.cc', 'wasm/ast-decoder-unittest.cc',