From 1dbbbedf5542cf98e7cb8418c2cd65a8d2a8c9e5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 20 May 2024 15:20:44 -0400 Subject: [PATCH 1/2] Fix up regexp escapes with control/meta and x --- src/prism.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/prism.c b/src/prism.c index 22c19f56b57..057ce64f4c7 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9396,7 +9396,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) { */ static inline uint8_t escape_byte(uint8_t value, const uint8_t flags) { - if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f; + if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f; if (flags & PM_ESCAPE_FLAG_META) value |= 0x80; return value; } @@ -9632,11 +9632,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; } + value = escape_byte(value, flags); if (flags & PM_ESCAPE_FLAG_REGEXP) { - pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); + if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) { + pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value); + } else { + pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); + } } - escape_write_byte_encoded(parser, buffer, escape_byte(value, flags)); + escape_write_byte_encoded(parser, buffer, value); } else { pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL); } From 8a7afa69881047f8c5af3948baf8ed9aa37a33a4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 20 May 2024 15:29:06 -0400 Subject: [PATCH 2/2] Fix up regexp escapes with control/meta --- src/prism.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/src/prism.c b/src/prism.c index 057ce64f4c7..a382d070e57 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9496,22 +9496,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) { static inline void escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) { if (flags & PM_ESCAPE_FLAG_REGEXP) { - pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2); - - uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF); - uint8_t byte2 = (uint8_t) (byte & 0xF); - - if (byte1 >= 0xA) { - pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A')); - } else { - pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0')); - } - - if (byte2 >= 0xA) { - pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A')); - } else { - pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0')); - } + pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte); } escape_write_byte_encoded(parser, buffer, byte); @@ -9546,57 +9531,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre switch (peek(parser)) { case '\\': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags)); return; } case '\'': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags)); return; } case 'a': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags)); return; } case 'b': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags)); return; } case 'e': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags)); return; } case 'f': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags)); return; } case 'n': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags)); return; } case 'r': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags)); return; } case 's': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags)); return; } case 't': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags)); return; } case 'v': { parser->current.end++; - escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags)); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags)); return; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { @@ -9613,7 +9598,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } } - escape_write_byte_encoded(parser, buffer, value); + escape_write_byte(parser, buffer, regular_expression_buffer, flags, value); return; } case 'x': {