diff --git a/src/prism.c b/src/prism.c index be6b52f5b1b..75f14321612 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9658,7 +9658,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG); } else if (hexadecimal_length == 0) { // there are not hexadecimal characters - pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); return; } @@ -9707,10 +9708,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } } - if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) { - pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); - } - return; } case 'c': { @@ -9733,6 +9730,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } parser->current.end++; + + if (match(parser, 'u') || match(parser, 'U')) { + pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + return; + } + escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL); return; case ' ': @@ -9760,7 +9763,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre case 'C': { parser->current.end++; if (peek(parser) != '-') { - pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL); + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9783,6 +9787,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } parser->current.end++; + + if (match(parser, 'u') || match(parser, 'U')) { + pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + return; + } + escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL); return; case ' ': @@ -9797,7 +9807,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; default: { if (!char_is_ascii_printable(peeked)) { - pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL); + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9810,7 +9821,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre case 'M': { parser->current.end++; if (peek(parser) != '-') { - pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META); + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); return; } @@ -9828,6 +9840,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } parser->current.end++; + + if (match(parser, 'u') || match(parser, 'U')) { + pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + return; + } + escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META); return; case ' ': @@ -9842,7 +9860,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; default: if (!char_is_ascii_printable(peeked)) { - pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META); + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); return; } diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index eab3dd26bff..d9e195e08f3 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -167,7 +167,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "invalid Unicode escape sequence; needs closing `}`", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = { "expected an expression after `&&=`", PM_ERROR_LEVEL_SYNTAX }, @@ -330,12 +330,12 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_STATEMENT_PREEXE_BEGIN] = { "unexpected a `BEGIN` at a non-statement position", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STATEMENT_UNDEF] = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STRING_CONCATENATION] = { "expected a string for concatenation", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_STRING_INTERPOLATED_TERM] = { "expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_STRING_INTERPOLATED_TERM] = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719 [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_TERNARY_EXPRESSION_TRUE] = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index 1357c97cf97..3670b90dd76 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -216,7 +216,7 @@ def test_unterminated_argument_expression def test_unterminated_interpolated_symbol assert_error_messages ":\"#", [ - "expected a closing delimiter for the interpolated symbol" + "unterminated symbol; expected a closing delimiter for the interpolated symbol" ] end @@ -715,12 +715,13 @@ def test_do_not_allow_characters_other_than_0_9_a_f_and_A_F_in_u_Unicode_charact assert_errors expected, '"\u{000z}"', [ ["invalid Unicode escape sequence", 7..7], + ["unterminated Unicode escape", 7..7] ] end def test_unterminated_unicode_brackets_should_be_a_syntax_error assert_errors expression('?\\u{3'), '?\\u{3', [ - ["invalid Unicode escape sequence; needs closing `}`", 1..5], + ["unterminated Unicode escape", 1..5], ] end