Skip to content

Commit

Permalink
Merge pull request ruby#1701 from eileencodes/implement-regex-flags
Browse files Browse the repository at this point in the history
Ensure last encoding flag wins
  • Loading branch information
kddnewton authored and HParker committed Oct 18, 2023
2 parents fbd101d + 4182c98 commit 9fde1b1
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
13 changes: 8 additions & 5 deletions bin/lex
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ else
source = File.read(filepath)
end

pattern = "%-70s %-70s"
pattern = "%-70s %-70s %-70s"

ripper =
begin
Expand All @@ -30,6 +30,7 @@ ripper =
end

prism = Prism.lex_compat(source, filepath)
prism_new = Prism.lex(source, filepath)
if prism.errors.any?
puts "Errors lexing:"
prism.errors.map do |error|
Expand All @@ -40,18 +41,20 @@ if prism.errors.any?
puts "\n"
end

puts pattern % ["Ripper lex", "Prism lex"]
puts pattern % ["-" * 70, "-" * 70]
puts pattern % ["Ripper lex", "Prism compat lex", "Prism Lex"]
puts pattern % ["-" * 70, "-" * 70, "-" * 70]

prism_value = prism.value
prism_new_value = prism_new.value

[prism_value.length, ripper.length].max.times do |index|
[prism_value.length, ripper.length, prism_new_value.length].max.times do |index|
left = ripper[index]
right = prism_value[index]
new = prism_new_value[index]

color = left == right ? "38;5;102" : "1;31"

if ENV["VERBOSE"] || (left != right)
puts "\033[#{color}m#{pattern}\033[0m" % [left.inspect, right.inspect]
puts "\033[#{color}m#{pattern}\033[0m" % [left.inspect, right.inspect, [new[0].type, [new[0].location.start_offset, new[0].location.length]]]
end
end
11 changes: 7 additions & 4 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -779,18 +779,21 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
static inline pm_node_flags_t
pm_regular_expression_flags_create(const pm_token_t *closing) {
pm_node_flags_t flags = 0;
pm_node_flags_t mask = (uint16_t) 0xFF0F;

if (closing->type == PM_TOKEN_REGEXP_END) {
for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
switch (*flag) {
case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
case 'e': flags |= PM_REGULAR_EXPRESSION_FLAGS_EUC_JP; break;
case 'n': flags |= PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT; break;
case 's': flags |= PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J; break;
case 'u': flags |= PM_REGULAR_EXPRESSION_FLAGS_UTF_8; break;
case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;

case 'e': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_EUC_JP; break;
case 'n': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT; break;
case 's': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J; break;
case 'u': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_UTF_8; break;

default: assert(false && "unreachable");
}
}
Expand Down
12 changes: 12 additions & 0 deletions test/prism/regexp_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,18 @@ def test_flag_combined
assert_equal(value, options("mix"))
end

def test_last_encoding_option_wins
regex = "/foo/nu"
option = Prism.parse(regex).value.statements.body.first.options

assert_equal Regexp::FIXEDENCODING, option

regex = "/foo/un"
option = Prism.parse(regex).value.statements.body.first.options

assert_equal Regexp::NOENCODING, option
end

private

def named_captures(source)
Expand Down

0 comments on commit 9fde1b1

Please sign in to comment.