diff --git a/.eslintrc.json b/.eslintrc.json index 2006c43..f3680f7 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -36,7 +36,7 @@ ], "@typescript-eslint/array-type": "error", "@typescript-eslint/consistent-type-assertions": "error", - "@typescript-eslint/consistent-type-definitions": "error", + "@typescript-eslint/consistent-type-definitions": "off", "@typescript-eslint/explicit-function-return-type": "off", "@typescript-eslint/no-explicit-any": "off", "@typescript-eslint/no-parameter-properties": "off", @@ -83,7 +83,7 @@ "sort-imports": "off", "sort-imports-es6-autofix/sort-imports-es6": "warn", "spaced-comment": ["error", "always", { "markers": ["/"] }], - "@typescript-eslint/no-unused-vars": ["warn", { "argsIgnorePattern": "^_" }], + "@typescript-eslint/no-unused-vars": ["warn", { "argsIgnorePattern": "^_", "varsIgnorePattern": "^_" }], "tsdoc/syntax": "warn" } } diff --git a/README.md b/README.md index 1c0eb2c..cc30414 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Create readable Regular Expressions with concise and flexible syntax. - [Quantifiers](#quantifiers) - [Groups](#groups) - [Misc](#misc) + - [Custom Tokens](#custom-tokens) ## Installation @@ -490,3 +491,141 @@ const coordinates = oneOrMore.digit .toRegExp(Flag.Global); console.log(coordinates.exec('[1,2] [3,4]')); // expect 2 matches ``` + +### Custom Tokens + +Apart from extracting reusable expressions into variables, you can also define custom tokens directly, allowing you to +use them as if they are part of the readable-regexp package for maximum convenience. + +There are 3 types of custom tokens: + +- **Constant**: tokens that modify the expression without needing parameters. These tokens are not callable. +- **Dynamic**: tokens that take parameters and return different expressions depending on the parameters given. These tokens must be called to provide them with parameters. +- **Mixed**: tokens with optional parameters. These tokens can be called or accessed directly. + +Rules for custom tokens: + +- The token name must be a valid JavaScript identifier. +- The token name must not conflict with any existing properties of `RegExpToken`. +- All custom tokens should be defined before **any** tokens are used to build regular expressions. + +Defining custom tokens is a 3-step process that requires minimal effort and maintains strong typing. + +#### Step 1 - Extend the `RegExpToken` interface + +**Only required for TypeScript users** + +To maintain strong typing on custom tokens, you should extend the built-in `RegExpToken` interface with the type of your +custom token. + +```ts +// Import the interface and helper types (as needed) +import { RegExpToken, LiteralFunction, GenericFunction, IncompleteToken } from 'readable-regexp'; + +// Extend the interface with a declaration +declare module 'readable-regexp' { + interface RegExpToken { + + // ===== CONSTANT tokens ===== + + severity: RegExpToken; + matchAll: RegExpToken; + + // ===== DYNAMIC tokens ===== + // Dynamic tokens must intersect the IncompleteToken type to signify that parameters are required + + // Use the LiteralFunction type for tokens that take a single string parameter + notExactly: LiteralFunction & IncompleteToken; + // Use the GenericFunction type for all other dynamic tokens + // Use a union for function overloading + exactValue: GenericFunction<[num: number] | [bool: boolean], RegExpToken> & IncompleteToken; + + // ===== MIXED tokens ===== + + alpha: GenericFunction<[upper: boolean], RegExpToken> & RegExpToken; + + } +} +``` + +#### Step 2 - Implement the tokens + +Use the `defineToken` function to implement the tokens. This function takes the name of the token and its +implementation and returns the implemented token. + +**For CONSTANT tokens:** + +- Implement the `constant` function +- `this` in the function is a `RegExpToken` that contains the expression preceding the custom token +- The token can append to, wrap around, or modify `this` in any way + +```ts +const severity = defineToken('severity', { + constant(this: RegExpToken) { + // Append a constant expression + return this.oneOf`error``warning``info``debug`; + }, +}); + +const matchAll = defineToken('matchAll', { + constant(this: RegExpToken) { + // Wrap around the existing expression + return lineStart.match(this).lineEnd; + }, +}); +``` + +**For DYNAMIC tokens:** + +- Implement the `dynamic` function +- `this` in the function is a `RegExpToken` that contains the expression preceding the custom token +- Token arguments are passed to the `dynamic` function as arguments +- Template string arguments are converted to ordinary strings automatically + +```ts +const notExactly = defineToken('notExactly', { + // Tagged template literals are converted to ordinary strings in the "value" argument + dynamic(this: RegExpToken, value: string) { + return this.notAhead(exactly(value)).repeat(value.length).notCharIn``; + }, +}); + +const exactValue = defineToken('exactValue', { + // Implementation of function overloads + dynamic(this: RegExpToken, num: number | boolean) { + return this.exactly(String(num)); + }, +}); +``` + +**For MIXED tokens:** + +- Implement both the `constant` and `dynamic` functions +- Same rules apply for both functions +- If the custom token is called, the `dynamic` function will handle the call. Otherwise, the `constant` function will be used. + +```ts +const alpha = defineToken('alpha', { + constant(this: RegExpToken) { + return this.charIn`a-zA-Z`; + }, + dynamic(this: RegExpToken, upper: boolean) { + return upper ? this.charIn`A-Z` : this.charIn`a-z`; + }, +}); +``` + +#### Step 3 - Use the token + +Custom tokens are integrated as part of readable-regexp. So you can use them just like how you use a built-in token. + +```ts +// Start an expression with a custom token returned by defineToken +const expr1 = notExactly`foo`.exactly`bar`.toRegExp(); // /(?!foo)[^]{3}bar/ + +// Use custom tokens as part of an expression chain +const expr2 = capture.severity.matchAll.toRegExp(); // /^(error|warning|info|debug)$/ + +// Use custom tokens from the `r` shorthand +const expr3 = r.alpha(false).toRegExp(); // /[a-z]/ +``` diff --git a/jest.config.ts b/jest.config.ts index 22e8136..010f6f7 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -192,4 +192,13 @@ export default { // Whether to use watchman for file crawling // watchman: true, + + transform: { + '^.+\\.tsx?$': [ + 'ts-jest', + { + isolatedModules: true, + }, + ], + }, }; diff --git a/src/expression.ts b/src/expression.ts index 3dddbef..c6f28cb 100644 --- a/src/expression.ts +++ b/src/expression.ts @@ -3,7 +3,9 @@ import { CharClassFunction, FlagUnion, FlagsString, + GenericFunction, GroupFunction, + IncompleteToken, LiteralFunction, RegExpLiteral, RegExpModifier, @@ -1876,3 +1878,158 @@ export const oneOf = r.oneOf; * ``` */ export const match = r.match; + +// a list of tokens with RegExpBuilder assigned to a function +const funcTokens = [ + not, + maybe, + maybeLazily, + zeroOrMore, + zeroOrMoreLazily, + oneOrMore, + oneOrMoreLazily, + capture, + group, + ahead, + behind, + notAhead, + notBehind, +]; + +/** + * Checks if a token intersects either the {@link RegExpToken} or the {@link IncompleteToken} interface. + */ +type IncompleteTokenCheck = TokenType extends RegExpToken + ? true + : TokenType extends IncompleteToken + ? true + : false; + +/** + * Transforms template string arguments to string literals while leaving other arguments unchanged. + */ +type TransformStringLiteralArgs = Args extends [infer U, ...infer Rest] + ? [ + U extends TemplateStringsArray ? string : U, // replace template string with ordinary string + ...(Rest extends unknown[] ? (unknown[] extends Rest ? [] : Rest) : Rest), // Remove the rest parameter if the argument is a string literal + ] + : Args; + +/** + * Specifies the configurations required for a given token type. + */ +type CustomTokenConfig = (TokenType extends RegExpToken + ? { + constant: (this: RegExpToken) => RegExpToken; + } + : {}) & + (TokenType extends GenericFunction + ? { dynamic: (this: RegExpToken, ...args: TransformStringLiteralArgs) => ReturnType } + : {}); + +const invalidReturnMessage = (val: unknown) => + `Invalid return value from a constant token: ${val}.\n` + + 'If you want to return any other values (which are non-chainable), ' + + 'you should implement a dynamic token without parameters to make the chain termination explicit.'; + +function ensureTokenReturned(value: T): T { + if ((typeof value !== 'object' && typeof value !== 'function') || value === null) + throw new Error(invalidReturnMessage(value)); + if ('toRegExp' in value && 'toString' in value) return value; + throw new Error(invalidReturnMessage(value)); +} + +/** + * Define a custom token that can be used in conjunction with other tokens. + * For a detailed guide on custom tokens, please read https://github.com/hlysine/readable-regexp#custom-tokens + * + * Notes: + * + * - TypeScript users should extend the {@link RegExpToken} interface to add their own custom tokens before calling this function. + * - The token name must be a valid JavaScript identifier. + * - The token name must not conflict with any existing properties of {@link RegExpToken}. + * - All custom tokens should be defined before **any** tokens are used to build regular expressions. + * + * @param tokenName - The name of the custom token. In TypeScript, it needs to be defined in the {@link RegExpToken} interface. + * @param config - The configuration for the custom token. Implement the `constant` method to return a constant token, or the `dynamic` method for a token that accepts arguments. Implement both for a mixed token. + * @returns The custom token + * + * @example + * Create a constant token + * + * Extend the RegExpToken interface to add a new token: + * + * ```ts + * import { RegExpToken } from 'readable-regexp'; + * + * declare module 'readable-regexp' { + * interface RegExpToken { + * severity: RegExpToken; + * } + * } + * ``` + * + * Implement the custom token: + * + * ```ts + * const severity = defineToken('severity', { + * constant(this: RegExpToken) { + * return this.oneOf`error` `warning` `info` `debug`; + * }, + * }); + * ``` + * + * Use the custom token: + * + * ```ts + * // Referencing the token returned by the defineToken function + * console.log(severity.toString()); // (?:error|warning|info|debug) + * + * // Referencing the token in an expression + * console.log(lineStart.severity.lineEnd.toString()); // ^(?:error|warning|info|debug)$ + * ``` + */ +export function defineToken>( + tokenName: Name, + config: Check extends true + ? CustomTokenConfig + : { + error: 'Invalid token type: tokens should intersect the RegExpToken type if they are constant, or the IncompleteToken type if they are dynamic.'; + } +): Check extends true ? RegExpToken[Name] : never { + if (tokenName in RegExpBuilder.prototype) throw new Error(`Token ${tokenName} already exists`); + Object.defineProperty(RegExpBuilder.prototype, tokenName, { + get() { + function configure( + this: RegExpBuilder, + ...configArgs: RegExpToken[Name] extends (...args: infer Args) => any ? Args : never + ): RegExpToken[Name] extends (...args: any) => infer Ret ? Ret : never { + if ('dynamic' in config) { + const value = isLiteralArgument(configArgs) ? [getLiteralString(configArgs)] : configArgs; + return (config.dynamic as (this: RegExpToken, ...args: typeof value) => ReturnType).apply( + this, + value + ); + } else { + throw new Error('Invalid arguments for ' + tokenName + '. This is probably a bug.'); + } + } + if (`constant` in config && !('dynamic' in config)) { + return ensureTokenReturned(config.constant.apply(this)); + } else if (!(`constant` in config) && 'dynamic' in config) { + return bindAsIncomplete(configure, this, tokenName); + } else if (`constant` in config && 'dynamic' in config) { + return assign(configure.bind(this), ensureTokenReturned(config.constant.apply(this)), false); + } else { + throw new Error(`The custom token ${tokenName} does not have any valid configurations.`); + } + }, + enumerable: true, + configurable: true, + }); + funcTokens.forEach(token => { + if (!('toRegExp' in token)) return; + Object.defineProperty(token, tokenName, Object.getOwnPropertyDescriptor(RegExpBuilder.prototype, tokenName)!); + }); + return r[tokenName] as ReturnType; +} diff --git a/src/helper.ts b/src/helper.ts index 24fd802..f16c915 100644 --- a/src/helper.ts +++ b/src/helper.ts @@ -74,10 +74,11 @@ export function bindAsIncomplete( * Copy all properties from source to target, including those in the prototype chain of source. * @param target - The target object to which the properties will be added. * @param source - The source object from which the properties will be copied. + * @param bindFunc - Whether to bind target to source. * @returns The target object. */ -export function assign(target: T, source: U): T & U { - target = target.bind(source); +export function assign(target: T, source: U, bindFunc = true): T & U { + if (bindFunc) target = target.bind(source); const props: string[] = []; do { diff --git a/src/index.ts b/src/index.ts index ef419be..ab8ab77 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,2 +1,18 @@ export * from './expression'; -export { Flag } from './types'; +export { + Flag, + /* These types are exported for the convenience of custom extensions */ + type RegExpToken, + type LiteralFunction, + type GenericFunction, + type NumberFunction, + type TokenFunction, + type MultiTokenFunction, + type GroupFunction, + type AlternationFunction, + type NamedCaptureFunction, + type RepeatFunction, + type LimitFunction, + type CharClassFunction, + type IncompleteToken, +} from './types'; diff --git a/src/types.ts b/src/types.ts index 24839b8..13f0b2b 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,10 +1,22 @@ export type RegExpLiteral = [string] | [TemplateStringsArray, ...unknown[]]; +/** + * A function that takes a string input and returns a {@link RegExpToken}. + * The string can be given as a string parameter or a template string. + */ export interface LiteralFunction { (literal: string): RegExpToken; (template: TemplateStringsArray, ...args: unknown[]): RegExpToken; } +/** + * A function with flexible parameters and return type. + * This is used by custom tokens. + */ +export interface GenericFunction { + (...args: Params): ReturnType; +} + export interface NumberFunction { (num: number): RegExpToken; } @@ -44,6 +56,10 @@ export interface CharClassFunction { (...args: (string | RegExpToken)[]): RegExpToken & CharClassFunction; } +/** + * A token that requires additional parameters. + * Terminal operations are defined as `never` to prevent the token from being used without the required parameters. + */ export interface IncompleteToken { /** * @deprecated The token is incomplete. Please provide the required parameters. diff --git a/test/customTokens.test.ts b/test/customTokens.test.ts new file mode 100644 index 0000000..33875ab --- /dev/null +++ b/test/customTokens.test.ts @@ -0,0 +1,200 @@ +import { + GenericFunction, + IncompleteToken, + LiteralFunction, + RegExpToken, + defineToken, + exactly, + lineStart, + oneOrMore, + r, +} from '../src'; +import { assertType } from './testUtils'; + +declare module '../src' { + interface RegExpToken { + testConstant: RegExpToken; + testString: string; + testEmpty: null; + testObject: { foo: string }; + + testDynamicLiteral: LiteralFunction & IncompleteToken; + testDynamicIncomplete: LiteralFunction; + testDynamicGeneric: GenericFunction<[num: number], RegExpToken> & IncompleteToken; + testDynamicOverload: GenericFunction<[num: number] | [bool: boolean], RegExpToken> & IncompleteToken; + testDynamicString: GenericFunction<[str: string] | [TemplateStringsArray, ...unknown[]], RegExpToken> & + IncompleteToken; + testDynamicMixed: GenericFunction<[str: string | number] | [TemplateStringsArray, ...unknown[]], RegExpToken> & + IncompleteToken; + + testMixed: LiteralFunction & RegExpToken; + } +} + +function isRegExpToken(token: unknown): token is RegExpToken { + return ( + (typeof token === 'object' || typeof token === 'function') && + token !== null && + 'toRegExp' in token && + 'toString' in token + ); +} + +describe('custom tokens', () => { + it('supports constant tokens', () => { + const testConstant = defineToken('testConstant', { + constant(this: RegExpToken) { + expect(isRegExpToken(this)).toBe(true); + return this.exactly`foo`; + }, + }); + assertType(testConstant); + expect(() => { + const testString = defineToken('testString', { + // @ts-expect-error - the token does not intersect RegExpToken or IncompleteToken + constant(this: RegExpToken) { + expect(isRegExpToken(this)).toBe(true); + return 'foo'; + }, + }); + assertType(testString); + }).toThrow('Invalid return value'); + + expect(() => { + // @ts-expect-error - no config provided + const testEmpty = defineToken('testEmpty', {}); + assertType(testEmpty); + }).toThrow('valid configurations'); + + expect(() => { + const testObject = defineToken('testObject', { + // @ts-expect-error - the token does not intersect RegExpToken or IncompleteToken + constant(this: RegExpToken) { + expect(isRegExpToken(this)).toBe(true); + return { foo: 'bar' }; + }, + }); + assertType(testObject); + }).toThrow('Invalid return value'); + + expect(testConstant.toString()).toBe('foo'); + expect(r.testConstant.toString()).toBe('foo'); + expect(lineStart.testConstant.toString()).toBe('^foo'); + expect(oneOrMore.testConstant.toString()).toBe('(?:foo)+'); + // @ts-expect-error - the token is constant + expect(() => testConstant`foo`).toThrow(); + }); + it('supports dynamic tokens', () => { + const testDynamicLiteral = defineToken('testDynamicLiteral', { + dynamic(this: RegExpToken, str: string) { + expect(isRegExpToken(this)).toBe(true); + return this.exactly`${str}`; + }, + }); + assertType(testDynamicLiteral); + const testDynamicIncomplete = defineToken('testDynamicIncomplete', { + // @ts-expect-error - the dynamic token does not intersect IncompleteToken + dynamic(this: RegExpToken, str: string) { + expect(isRegExpToken(this)).toBe(true); + return this.exactly`${str}`; + }, + }); + assertType(testDynamicIncomplete); + const testDynamicGeneric = defineToken('testDynamicGeneric', { + dynamic(this: RegExpToken, num: number) { + expect(isRegExpToken(this)).toBe(true); + return this.repeat(num)`f`; + }, + }); + assertType & IncompleteToken>(testDynamicGeneric); + const testDynamicOverload = defineToken('testDynamicOverload', { + dynamic(this: RegExpToken, val: number | boolean) { + expect(isRegExpToken(this)).toBe(true); + return exactly`${val}`.match(this); + }, + }); + assertType & IncompleteToken>(testDynamicOverload); + const testDynamicString = defineToken('testDynamicString', { + dynamic(this: RegExpToken, val: string) { + expect(isRegExpToken(this)).toBe(true); + return this.charIn(val); + }, + }); + assertType & IncompleteToken>( + testDynamicString + ); + const testDynamicMixed = defineToken('testDynamicMixed', { + dynamic(this: RegExpToken, val: string | number) { + expect(isRegExpToken(this)).toBe(true); + return this.charIn(String(val)); + }, + }); + assertType< + GenericFunction<[str: string | number] | [TemplateStringsArray, ...unknown[]], RegExpToken> & IncompleteToken + >(testDynamicMixed); + + // @ts-expect-error - the token is incomplete + expect(() => testDynamicLiteral.toString()).toThrow('Required parameters'); + // @ts-expect-error - the token is incomplete + expect(() => testDynamicLiteral.toRegExp()).toThrow('Required parameters'); + // @ts-expect-error - the token is incomplete + expect(() => testDynamicLiteral.executeModifiers()).toThrow('Required parameters'); + // @ts-expect-error - the token is incomplete + expect(() => testDynamicLiteral.addModifier()).toThrow('Required parameters'); + // @ts-expect-error - the token is incomplete + expect(() => testDynamicLiteral.addNode()).toThrow('Required parameters'); + expect(testDynamicLiteral`foo`.toString()).toBe('foo'); + expect(testDynamicLiteral('foo').toString()).toBe('foo'); + expect(r.testDynamicLiteral`foo`.toString()).toBe('foo'); + expect(lineStart.testDynamicLiteral`foo`.toString()).toBe('^foo'); + + // @ts-expect-error - the token is incomplete + expect(() => testDynamicGeneric.toString()).toThrow(); + expect(testDynamicGeneric(3).toString()).toBe('f{3}'); + expect(r.testDynamicGeneric(3).toString()).toBe('f{3}'); + expect(lineStart.testDynamicGeneric(3).toString()).toBe('^f{3}'); + + // @ts-expect-error - the token is incomplete + expect(() => testDynamicOverload.toString()).toThrow(); + expect(testDynamicOverload(3).toString()).toBe('3'); + expect(testDynamicOverload(true).toString()).toBe('true'); + expect(r.testDynamicOverload(3).toString()).toBe('3'); + expect(lineStart.testDynamicOverload(3).toString()).toBe('3^'); + + // @ts-expect-error - the token is incomplete + expect(() => testDynamicString.toString()).toThrow(); + expect(testDynamicString('abc').toString()).toBe('[abc]'); + expect(testDynamicString`abc`.toString()).toBe('[abc]'); + expect(r.testDynamicString('abc').toString()).toBe('[abc]'); + expect(lineStart.testDynamicString('abc').toString()).toBe('^[abc]'); + expect(testDynamicString`abc${'def'}`.toString()).toBe('[abcdef]'); + + // @ts-expect-error - the token is incomplete + expect(() => testDynamicMixed.toString()).toThrow(); + expect(testDynamicMixed('abc').toString()).toBe('[abc]'); + expect(testDynamicMixed`abc`.toString()).toBe('[abc]'); + expect(r.testDynamicMixed('abc').toString()).toBe('[abc]'); + expect(lineStart.testDynamicMixed('abc').toString()).toBe('^[abc]'); + }); + it('supports mixed tokens', () => { + const testMixed = defineToken('testMixed', { + dynamic(this: RegExpToken, val: string) { + expect(isRegExpToken(this)).toBe(true); + return this.exactly(String(val)); + }, + constant(this: RegExpToken) { + expect(isRegExpToken(this)).toBe(true); + return this.char; + }, + }); + assertType(testMixed); + + expect(testMixed.toString()).toBe('.'); + expect(r.testMixed.toString()).toBe('.'); + expect(lineStart.testMixed.toString()).toBe('^.'); + expect(testMixed('abc').toString()).toBe('abc'); + expect(testMixed`abc`.toString()).toBe('abc'); + expect(r.testMixed('abc').toString()).toBe('abc'); + expect(lineStart.testMixed('abc').toString()).toBe('^abc'); + }); +}); diff --git a/test/testUtils.ts b/test/testUtils.ts new file mode 100644 index 0000000..e17f6a7 --- /dev/null +++ b/test/testUtils.ts @@ -0,0 +1,14 @@ +export function assertType(): void; +export function assertType(_: T): void; +export function assertType(_?: T1): void {} + +assertType<''>(''); +assertType<'', ''>(); + +assertType<'a'>('a'); +assertType<'a', 'a'>(); + +// @ts-expect-error - wrong type +assertType<'a'>('b'); +// @ts-expect-error - wrong type +assertType<'a', 'b'>();