Skip to content

Commit

Permalink
Merge pull request #2818 from ruby/more-escape-messages
Browse files Browse the repository at this point in the history
Update more escape error messages to match CRuby
  • Loading branch information
kddnewton authored May 16, 2024
2 parents 7b5e75e + ab43b3a commit 1c8d08b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 14 deletions.
37 changes: 28 additions & 9 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -9658,7 +9658,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
} else if (hexadecimal_length == 0) {
// there are not hexadecimal characters
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
return;
}

Expand Down Expand Up @@ -9707,10 +9708,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
}

if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
}

return;
}
case 'c': {
Expand All @@ -9733,6 +9730,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
parser->current.end++;

if (match(parser, 'u') || match(parser, 'U')) {
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}

escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
case ' ':
Expand Down Expand Up @@ -9760,7 +9763,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
case 'C': {
parser->current.end++;
if (peek(parser) != '-') {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}

Expand All @@ -9783,6 +9787,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
parser->current.end++;

if (match(parser, 'u') || match(parser, 'U')) {
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}

escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
case ' ':
Expand All @@ -9797,7 +9807,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
default: {
if (!char_is_ascii_printable(peeked)) {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}

Expand All @@ -9810,7 +9821,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
case 'M': {
parser->current.end++;
if (peek(parser) != '-') {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
return;
}

Expand All @@ -9828,6 +9840,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
parser->current.end++;

if (match(parser, 'u') || match(parser, 'U')) {
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}

escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
return;
case ' ':
Expand All @@ -9842,7 +9860,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
default:
if (!char_is_ascii_printable(peeked)) {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
return;
}

Expand Down
6 changes: 3 additions & 3 deletions templates/src/diagnostic.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "invalid Unicode escape sequence; needs closing `}`", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = { "expected an expression after `&&=`", PM_ERROR_LEVEL_SYNTAX },
Expand Down Expand Up @@ -330,12 +330,12 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_STATEMENT_PREEXE_BEGIN] = { "unexpected a `BEGIN` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STATEMENT_UNDEF] = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_CONCATENATION] = { "expected a string for concatenation", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_INTERPOLATED_TERM] = { "expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_INTERPOLATED_TERM] = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719
[PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_TERNARY_EXPRESSION_TRUE] = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
Expand Down
5 changes: 3 additions & 2 deletions test/prism/errors_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_unterminated_argument_expression

def test_unterminated_interpolated_symbol
assert_error_messages ":\"#", [
"expected a closing delimiter for the interpolated symbol"
"unterminated symbol; expected a closing delimiter for the interpolated symbol"
]
end

Expand Down Expand Up @@ -715,12 +715,13 @@ def test_do_not_allow_characters_other_than_0_9_a_f_and_A_F_in_u_Unicode_charact

assert_errors expected, '"\u{000z}"', [
["invalid Unicode escape sequence", 7..7],
["unterminated Unicode escape", 7..7]
]
end

def test_unterminated_unicode_brackets_should_be_a_syntax_error
assert_errors expression('?\\u{3'), '?\\u{3', [
["invalid Unicode escape sequence; needs closing `}`", 1..5],
["unterminated Unicode escape", 1..5],
]
end

Expand Down

0 comments on commit 1c8d08b

Please sign in to comment.