Skip to content

Commit

Permalink
feat(regular_expression): Support RegExp Modifiers (#6410)
Browse files Browse the repository at this point in the history
Fixes #6354
  • Loading branch information
leaysgur committed Oct 10, 2024
1 parent 9dc4ee9 commit b5b0af9
Show file tree
Hide file tree
Showing 16 changed files with 885 additions and 113 deletions.
48 changes: 29 additions & 19 deletions crates/oxc_ast/src/generated/assert_layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1522,18 +1522,23 @@ const _: () = {
assert!(offset_of!(CapturingGroup, name) == 8usize);
assert!(offset_of!(CapturingGroup, body) == 24usize);

assert!(size_of::<IgnoreGroup>() == 56usize);
assert!(size_of::<IgnoreGroup>() == 64usize);
assert!(align_of::<IgnoreGroup>() == 8usize);
assert!(offset_of!(IgnoreGroup, span) == 0usize);
assert!(offset_of!(IgnoreGroup, enabling_modifiers) == 8usize);
assert!(offset_of!(IgnoreGroup, disabling_modifiers) == 11usize);
assert!(offset_of!(IgnoreGroup, body) == 16usize);
assert!(offset_of!(IgnoreGroup, modifiers) == 8usize);
assert!(offset_of!(IgnoreGroup, body) == 24usize);

assert!(size_of::<ModifierFlags>() == 3usize);
assert!(align_of::<ModifierFlags>() == 1usize);
assert!(offset_of!(ModifierFlags, ignore_case) == 0usize);
assert!(offset_of!(ModifierFlags, sticky) == 1usize);
assert!(offset_of!(ModifierFlags, multiline) == 2usize);
assert!(size_of::<Modifiers>() == 16usize);
assert!(align_of::<Modifiers>() == 4usize);
assert!(offset_of!(Modifiers, span) == 0usize);
assert!(offset_of!(Modifiers, enabling) == 8usize);
assert!(offset_of!(Modifiers, disabling) == 11usize);

assert!(size_of::<Modifier>() == 3usize);
assert!(align_of::<Modifier>() == 1usize);
assert!(offset_of!(Modifier, ignore_case) == 0usize);
assert!(offset_of!(Modifier, multiline) == 1usize);
assert!(offset_of!(Modifier, sticky) == 2usize);

assert!(size_of::<IndexedReference>() == 12usize);
assert!(align_of::<IndexedReference>() == 4usize);
Expand Down Expand Up @@ -3059,18 +3064,23 @@ const _: () = {
assert!(offset_of!(CapturingGroup, name) == 8usize);
assert!(offset_of!(CapturingGroup, body) == 16usize);

assert!(size_of::<IgnoreGroup>() == 40usize);
assert!(size_of::<IgnoreGroup>() == 48usize);
assert!(align_of::<IgnoreGroup>() == 4usize);
assert!(offset_of!(IgnoreGroup, span) == 0usize);
assert!(offset_of!(IgnoreGroup, enabling_modifiers) == 8usize);
assert!(offset_of!(IgnoreGroup, disabling_modifiers) == 11usize);
assert!(offset_of!(IgnoreGroup, body) == 16usize);

assert!(size_of::<ModifierFlags>() == 3usize);
assert!(align_of::<ModifierFlags>() == 1usize);
assert!(offset_of!(ModifierFlags, ignore_case) == 0usize);
assert!(offset_of!(ModifierFlags, sticky) == 1usize);
assert!(offset_of!(ModifierFlags, multiline) == 2usize);
assert!(offset_of!(IgnoreGroup, modifiers) == 8usize);
assert!(offset_of!(IgnoreGroup, body) == 24usize);

assert!(size_of::<Modifiers>() == 16usize);
assert!(align_of::<Modifiers>() == 4usize);
assert!(offset_of!(Modifiers, span) == 0usize);
assert!(offset_of!(Modifiers, enabling) == 8usize);
assert!(offset_of!(Modifiers, disabling) == 11usize);

assert!(size_of::<Modifier>() == 3usize);
assert!(align_of::<Modifier>() == 1usize);
assert!(offset_of!(Modifier, ignore_case) == 0usize);
assert!(offset_of!(Modifier, multiline) == 1usize);
assert!(offset_of!(Modifier, sticky) == 2usize);

assert!(size_of::<IndexedReference>() == 12usize);
assert!(align_of::<IndexedReference>() == 4usize);
Expand Down
23 changes: 17 additions & 6 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,21 +351,32 @@ pub struct CapturingGroup<'a> {
pub struct IgnoreGroup<'a> {
#[serde(flatten)]
pub span: Span,
pub enabling_modifiers: Option<ModifierFlags>,
pub disabling_modifiers: Option<ModifierFlags>,
pub modifiers: Option<Modifiers>,
pub body: Disjunction<'a>,
}

/// Pattern modifiers in [`IgnoreGroup`].
/// e.g. `(?i:...)`, `(?-s:...)`
/// Modifiers in [`IgnoreGroup`].
/// e.g. `i` in `(?i:...)`, `-s` in `(?-s:...)`
#[ast]
#[derive(Debug)]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct ModifierFlags {
pub struct Modifiers {
#[serde(flatten)]
pub span: Span,
pub enabling: Option<Modifier>,
pub disabling: Option<Modifier>,
}

/// Each part of modifier in [`Modifiers`].
#[ast]
#[derive(Debug)]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct Modifier {
pub ignore_case: bool,
pub sticky: bool,
pub multiline: bool,
pub sticky: bool,
}

/// Backreference by index.
Expand Down
35 changes: 20 additions & 15 deletions crates/oxc_regular_expression/src/ast_impl/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,30 +258,29 @@ impl<'a> Display for CapturingGroup<'a> {

impl<'a> Display for IgnoreGroup<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn write_flags(
f: &mut fmt::Formatter<'_>,
prefix: char,
flags: &ModifierFlags,
) -> fmt::Result {
fn write_flags(f: &mut fmt::Formatter<'_>, flags: &Modifier) -> fmt::Result {
if flags.ignore_case {
write!(f, "{prefix}i")?;
}
if flags.sticky {
write!(f, "{prefix}y")?;
write!(f, "i")?;
}
if flags.multiline {
write!(f, "{prefix}m")?;
write!(f, "m")?;
}
if flags.sticky {
write!(f, "s")?;
}
Ok(())
}

write!(f, "(?")?;

if let Some(enabling) = &self.enabling_modifiers {
write_flags(f, '\0', enabling)?;
}
if let Some(disabling) = &self.disabling_modifiers {
write_flags(f, '-', disabling)?;
if let Some(modifiers) = &self.modifiers {
if let Some(enabling) = &modifiers.enabling {
write_flags(f, enabling)?;
}
if let Some(disabling) = &modifiers.disabling {
write!(f, "-")?;
write_flags(f, disabling)?;
}
}

write!(f, ":{})", self.body)
Expand Down Expand Up @@ -541,6 +540,12 @@ mod test {
(r"/[\-]/u", None),
(r"/[\-]/v", None),
(r"/([\-a-z]{0,31})/iu", None),
// ES2025 ---
(r"/(?i:.)/", None),
(r"/(?-s:.)/", None),
(r"/(?im-s:.)/u", None),
(r"/(?m-is:.)/v", None),
(r"/(?smi:.)/v", Some(r"/(?ims:.)/v")),
];

#[test]
Expand Down
15 changes: 10 additions & 5 deletions crates/oxc_regular_expression/src/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,6 @@ pub fn character_class_contents_invalid_operands(span: Span) -> OxcDiagnostic {
.with_label(span)
}

#[cold]
pub fn missing_capturing_group_name(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Missing capturing group name")).with_label(span)
}

#[cold]
pub fn too_large_number_in_braced_quantifier(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Number is too large in braced quantifier"))
Expand Down Expand Up @@ -152,3 +147,13 @@ pub fn invalid_unicode_escape_sequence(span: Span) -> OxcDiagnostic {
pub fn invalid_surrogate_pair(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Invalid surrogate pair")).with_label(span)
}

#[cold]
pub fn invalid_modifiers(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Invalid modifiers")).with_label(span)
}

#[cold]
pub fn unknown_modifiers(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Unknown modifiers")).with_label(span)
}
22 changes: 16 additions & 6 deletions crates/oxc_regular_expression/src/generated/derive_clone_in.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,20 +296,30 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for IgnoreGroup<'old_alloc> {
fn clone_in(&self, allocator: &'new_alloc Allocator) -> Self::Cloned {
IgnoreGroup {
span: CloneIn::clone_in(&self.span, allocator),
enabling_modifiers: CloneIn::clone_in(&self.enabling_modifiers, allocator),
disabling_modifiers: CloneIn::clone_in(&self.disabling_modifiers, allocator),
modifiers: CloneIn::clone_in(&self.modifiers, allocator),
body: CloneIn::clone_in(&self.body, allocator),
}
}
}

impl<'alloc> CloneIn<'alloc> for ModifierFlags {
type Cloned = ModifierFlags;
impl<'alloc> CloneIn<'alloc> for Modifiers {
type Cloned = Modifiers;
fn clone_in(&self, allocator: &'alloc Allocator) -> Self::Cloned {
ModifierFlags {
Modifiers {
span: CloneIn::clone_in(&self.span, allocator),
enabling: CloneIn::clone_in(&self.enabling, allocator),
disabling: CloneIn::clone_in(&self.disabling, allocator),
}
}
}

impl<'alloc> CloneIn<'alloc> for Modifier {
type Cloned = Modifier;
fn clone_in(&self, allocator: &'alloc Allocator) -> Self::Cloned {
Modifier {
ignore_case: CloneIn::clone_in(&self.ignore_case, allocator),
sticky: CloneIn::clone_in(&self.sticky, allocator),
multiline: CloneIn::clone_in(&self.multiline, allocator),
sticky: CloneIn::clone_in(&self.sticky, allocator),
}
}
}
Expand Down
14 changes: 10 additions & 4 deletions crates/oxc_regular_expression/src/generated/derive_content_eq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,23 @@ impl<'a> ContentEq for CapturingGroup<'a> {

impl<'a> ContentEq for IgnoreGroup<'a> {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.enabling_modifiers, &other.enabling_modifiers)
&& ContentEq::content_eq(&self.disabling_modifiers, &other.disabling_modifiers)
ContentEq::content_eq(&self.modifiers, &other.modifiers)
&& ContentEq::content_eq(&self.body, &other.body)
}
}

impl ContentEq for ModifierFlags {
impl ContentEq for Modifiers {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.enabling, &other.enabling)
&& ContentEq::content_eq(&self.disabling, &other.disabling)
}
}

impl ContentEq for Modifier {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.ignore_case, &other.ignore_case)
&& ContentEq::content_eq(&self.sticky, &other.sticky)
&& ContentEq::content_eq(&self.multiline, &other.multiline)
&& ContentEq::content_eq(&self.sticky, &other.sticky)
}
}

Expand Down
14 changes: 10 additions & 4 deletions crates/oxc_regular_expression/src/generated/derive_content_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,23 @@ impl<'a> ContentHash for CapturingGroup<'a> {

impl<'a> ContentHash for IgnoreGroup<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.enabling_modifiers, state);
ContentHash::content_hash(&self.disabling_modifiers, state);
ContentHash::content_hash(&self.modifiers, state);
ContentHash::content_hash(&self.body, state);
}
}

impl ContentHash for ModifierFlags {
impl ContentHash for Modifiers {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.enabling, state);
ContentHash::content_hash(&self.disabling, state);
}
}

impl ContentHash for Modifier {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.ignore_case, state);
ContentHash::content_hash(&self.sticky, state);
ContentHash::content_hash(&self.multiline, state);
ContentHash::content_hash(&self.sticky, state);
}
}

Expand Down
40 changes: 35 additions & 5 deletions crates/oxc_regular_expression/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,21 @@ mod test {
(r"\1()", default()),
(r"\1()", with_unicode_mode()),
(r"(?<n1>..)(?<n2>..)", default()),
// TODO: ES2025 Duplicate named capturing groups
// ES2025 ---
// TODO: Duplicate named capturing groups
// (r"(?<n1>..)|(?<n1>..)", default()),
// (r"(?<year>[0-9]{4})-[0-9]{2}|[0-9]{2}-(?<year>[0-9]{4})", default()),
// (r"(?:(?<a>x)|(?<a>y))\k<a>", default()),
// Modifiers
(r"(?:.)", default()),
(r"(?s:.)", default()),
(r"(?ism:.)", default()),
(r"(?-s:.)", default()),
(r"(?-smi:.)", default()),
(r"(?s-im:.)", default()),
(r"(?si-m:.)", default()),
(r"(?im-s:.)", with_unicode_sets_mode()),
(r"(?ims-:.)", default()),
] {
let res = Parser::new(&allocator, source_text, *options).parse();
if let Err(err) = res {
Expand Down Expand Up @@ -161,6 +172,7 @@ mod test {
("a(?:", default()),
("(a", default()),
("(?<a>", default()),
("(?<", default()),
(r"(?<a\>.)", default()),
(r"(?<a\>.)", with_unicode_mode()),
(r"(?<\>.)", default()),
Expand All @@ -184,13 +196,26 @@ mod test {
(r"[a--b&&c]", with_unicode_sets_mode()),
(r"[\q{]", with_unicode_sets_mode()),
(r"[\q{\a}]", with_unicode_sets_mode()),
// TODO: ES2025 Duplicate named capturing groups
// ES2025 ---
// TODO: Duplicate named capturing groups
(r"(?<n>..)|(?<n>..)", default()), // This will be valid
// (r"(?<a>|(?<a>))", default()), // Nested, still invalid
// (r"(?<a>|(?<a>))", default()), // Nested, still invalid
// Modifiers
(r"(?a:.)", default()),
(r"(?-S:.)", default()),
(r"(?-:.)", default()),
(r"(?iM:.)", default()),
(r"(?imms:.)", default()),
(r"(?-sI:.)", default()),
(r"(?ii-s:.)", default()),
(r"(?i-msm:.)", default()),
(r"(?i", default()),
(r"(?i-", default()),
(r"(?i-s", default()),
] {
assert!(
Parser::new(&allocator, source_text, *options).parse().is_err(),
"{source_text} should fail to parse with {options:?}!"
"{source_text} should fail to parse with {options:?}, but passed!"
);
}
}
Expand Down Expand Up @@ -234,11 +259,16 @@ mod test {
(r"[[z-a]]", with_unicode_sets_mode(), true),
(r"[[[[[^[[[[\q{ng}]]]]]]]]]", with_unicode_sets_mode(), true),
(r"[^[[[[[[[[[[[[[[[[\q{ng}]]]]]]]]]]]]]]]]]", with_unicode_sets_mode(), true),
// ES2025 ---
// Modifiers
(r"(?ii:.)", default(), true),
(r"(?-ss:.)", default(), true),
(r"(?im-im:.)", default(), true),
] {
assert_eq!(
Parser::new(&allocator, source_text, *options).parse().is_err(),
*is_err,
"{source_text} should early error with {options:?}!"
"{source_text} should fail with early error with {options:?}, but passed!"
);
}
}
Expand Down
Loading

0 comments on commit b5b0af9

Please sign in to comment.