diff --git a/docs/content/parsers/regexp.md b/docs/content/parsers/regexp.md index 235a17f..873275e 100644 --- a/docs/content/parsers/regexp.md +++ b/docs/content/parsers/regexp.md @@ -9,7 +9,7 @@ description: 'regexp parses a string that matches a provided regular expression. ## Signature ```ts -function regexp(re: RegExp, expected: string): Parser +function regexp(rs: RegExp, expected: string): Parser ``` ## Description @@ -18,9 +18,13 @@ function regexp(re: RegExp, expected: string): Parser ## Implementation notes +::: warning +If `g` flag is missing, it will be automatically injected. It's still better to always provide it to avoid small performance penalty and clearly document the intention. +::: + The regular expression must obey two simple rules: -- It *does* use `g` flag. Flags like `u` and `i` are allowed and can be added if needed. +- It *does* use g flag. Flags like u and i are allowed and can be added if needed. - It *doesn't* use `^` and `$` to match at the beginning or at the end of the text. ## Usage diff --git a/src/__tests__/parsers/regexp.spec.ts b/src/__tests__/parsers/regexp.spec.ts index aa28091..3af73a7 100644 --- a/src/__tests__/parsers/regexp.spec.ts +++ b/src/__tests__/parsers/regexp.spec.ts @@ -17,6 +17,35 @@ describe('regexp', () => { should.matchState(actualMatchGroups, expectedMatchGroups) }) + it('should succeed if given matching input without Global flag', () => { + const actualDigit = run(regexp(/\d/, 'digit'), '0') + const expectedDigit = result(true, '0') + + const actualDigits = run(regexp(/\d+/, 'digits'), '9000') + const expectedDigits = result(true, '9000') + + const actualMatchGroups = run(regexp(/\((\s)+\)/, 'match-groups'), '( )') + const expectedMatchGroups = result(true, '( )') + + should.matchState(actualDigit, expectedDigit) + should.matchState(actualDigits, expectedDigits) + should.matchState(actualMatchGroups, expectedMatchGroups) + }) + + it('should succeed if matches the beginning of input', () => { + const actualDigits = run(regexp(/\d{2,3}/g, 'first-digits'), '90000') + const expectedDigits = result(true, '900') + + should.matchState(actualDigits, expectedDigits) + }) + + it('should succeed if matches the beginning of input without Global flag', () => { + const actualDigits = run(regexp(/\d{2,3}/, 'first-digits'), '90000') + const expectedDigits = result(true, '900') + + should.matchState(actualDigits, expectedDigits) + }) + it('should succeed if given a RegExp with Unicode flag', () => { const actualReEmoji = run(regexp(/\w+\s+👌/gu, 'words, spaces, ok emoji'), 'Yes 👌') const expectedReEmoji = result(true, 'Yes 👌') @@ -24,22 +53,33 @@ describe('regexp', () => { should.matchState(actualReEmoji, expectedReEmoji) }) + it('should succeed if given a RegExp with Unicode flag and without Global one', () => { + const actualReEmoji = run(regexp(/\w+\s+👌/u, 'words, spaces, ok emoji'), 'Yes 👌') + const expectedReEmoji = result(true, 'Yes 👌') + + should.matchState(actualReEmoji, expectedReEmoji) + }) + it('should succeed if given a RegExp with Unicode property escapes', () => { const actualReEmoji = run(regexp(/\p{Emoji_Presentation}+/gu, 'emoji'), '👌👌👌') const expectedReEmoji = result(true, '👌👌👌') const actualReNonLatin = run(regexp(/\P{Script_Extensions=Latin}+/gu, 'non-latin'), '大阪') - const expectedReNonLation = result(true, '大阪') + const expectedReNonLatin = result(true, '大阪') should.matchState(actualReEmoji, expectedReEmoji) - should.matchState(actualReNonLatin, expectedReNonLation) + should.matchState(actualReNonLatin, expectedReNonLatin) }) - it('should succeeed if matches the beginning of input', () => { - const actualDigits = run(regexp(/\d{2,3}/g, 'first-digits'), '90000') - const expectedDigits = result(true, '900') + it('should succeed if given a RegExp with Unicode property escapes without Global flag', () => { + const actualReEmoji = run(regexp(/\p{Emoji_Presentation}+/u, 'emoji'), '👌👌👌') + const expectedReEmoji = result(true, '👌👌👌') - should.matchState(actualDigits, expectedDigits) + const actualReNonLatin = run(regexp(/\P{Script_Extensions=Latin}+/u, 'non-latin'), '大阪') + const expectedReNonLatin = result(true, '大阪') + + should.matchState(actualReEmoji, expectedReEmoji) + should.matchState(actualReNonLatin, expectedReNonLatin) }) it('should fail if does not match input', () => { diff --git a/src/parsers/regexp.ts b/src/parsers/regexp.ts index db6e9a0..46e4507 100644 --- a/src/parsers/regexp.ts +++ b/src/parsers/regexp.ts @@ -6,15 +6,20 @@ import type { Parser } from '@types' * * The regular expression must obey two simple rules: * - * - It *does* use `g` flag. Flags like `u` and `i` are allowed and can be added if needed. + * - It *does* use `g` flag. Flags like u and i are allowed and can be added if needed. * - It *doesn't* use `^` and `$` to match at the beginning or at the end of the text. * - * @param re - Regular expression + * If `g` flag is missing, it will be automatically injected. It's still better to always provide it + * to avoid small performance penalty and clearly document the intention. + * + * @param rs - Regular expression * @param expected - Error message if the regular expression does not match input * * @returns Matched string */ -export function regexp(re: RegExp, expected: string): Parser { +export function regexp(rs: RegExp, expected: string): Parser { + const re = rs.global ? rs : new RegExp(rs.source, rs.flags + 'g') + return { parse(input, pos) { // Reset RegExp index, because we abuse the 'g' flag.