move all regexps

markedjs · Oct 10, 2024 · b0444c6 · b0444c6
1 parent f3111f0
commit b0444c6
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 92 deletions.
diff --git a/src/Lexer.ts b/src/Lexer.ts
@@ -86,7 +86,7 @@ export class _Lexer {
    */
   lex(src: string) {
     src = src
-      .replace(/\r\n|\r/g, '\n');
+      .replace(other.carriageReturn, '\n');
 
     this.blockTokens(src, this.tokens);
 
@@ -106,7 +106,7 @@ export class _Lexer {
   blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList;
   blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
     if (this.options.pedantic) {
-      src = src.replace(/\t/g, '    ').replace(/^ +$/gm, '');
+      src = src.replace(other.tabCharGlobal, '    ').replace(other.spaceLine, '');
     }
 
     let token: Tokens.Generic | undefined;

diff --git a/src/Renderer.ts b/src/Renderer.ts
@@ -3,6 +3,7 @@ import {
   cleanUrl,
   escape,
 } from './helpers.ts';
+import { other } from './rules.ts';
 import type { MarkedOptions } from './MarkedOptions.ts';
 import type { Tokens } from './Tokens.ts';
 import type { _Parser } from './Parser.ts';
@@ -22,9 +23,9 @@ export class _Renderer {
   }
 
   code({ text, lang, escaped }: Tokens.Code): string {
-    const langString = (lang || '').match(/^\S*/)?.[0];
+    const langString = (lang || '').match(other.notSpaceStart)?.[0];
 
-    const code = text.replace(/\n$/, '') + '\n';
+    const code = text.replace(other.endingNewline, '') + '\n';
 
     if (!langString) {
       return '<pre><code>'

diff --git a/src/helpers.ts b/src/helpers.ts
@@ -1,10 +1,8 @@
+import { other } from './rules.ts';
+
 /**
  * Helpers
  */
-const escapeTest = /[&<>"']/;
-const escapeReplace = new RegExp(escapeTest.source, 'g');
-const escapeTestNoEncode = /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/;
-const escapeReplaceNoEncode = new RegExp(escapeTestNoEncode.source, 'g');
 const escapeReplacements: { [index: string]: string } = {
   '&': '&amp;',
   '<': '&lt;',
@@ -16,23 +14,21 @@ const getEscapeReplacement = (ch: string) => escapeReplacements[ch];
 
 export function escape(html: string, encode?: boolean) {
   if (encode) {
-    if (escapeTest.test(html)) {
-      return html.replace(escapeReplace, getEscapeReplacement);
+    if (other.escapeTest.test(html)) {
+      return html.replace(other.escapeReplace, getEscapeReplacement);
     }
   } else {
-    if (escapeTestNoEncode.test(html)) {
-      return html.replace(escapeReplaceNoEncode, getEscapeReplacement);
+    if (other.escapeTestNoEncode.test(html)) {
+      return html.replace(other.escapeReplaceNoEncode, getEscapeReplacement);
     }
   }
 
   return html;
 }
 
-const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig;
-
 export function unescape(html: string) {
   // explicitly match decimal, hex, and named HTML entities
-  return html.replace(unescapeTest, (_, n) => {
+  return html.replace(other.unescapeTest, (_, n) => {
     n = n.toLowerCase();
     if (n === 'colon') return ':';
     if (n.charAt(0) === '#') {
@@ -44,40 +40,19 @@ export function unescape(html: string) {
   });
 }
 
-const caret = /(^|[^\[])\^/g;
-
-export function edit(regex: string | RegExp, opt?: string) {
-  let source = typeof regex === 'string' ? regex : regex.source;
-  opt = opt || '';
-  const obj = {
-    replace: (name: string | RegExp, val: string | RegExp) => {
-      let valSource = typeof val === 'string' ? val : val.source;
-      valSource = valSource.replace(caret, '$1');
-      source = source.replace(name, valSource);
-      return obj;
-    },
-    getRegex: () => {
-      return new RegExp(source, opt);
-    },
-  };
-  return obj;
-}
-
 export function cleanUrl(href: string) {
   try {
-    href = encodeURI(href).replace(/%25/g, '%');
+    href = encodeURI(href).replace(other.percentDecode, '%');
   } catch {
     return null;
   }
   return href;
 }
 
-export const noopTest = { exec: () => null } as unknown as RegExp;
-
 export function splitCells(tableRow: string, count?: number) {
   // ensure that every cell-delimiting pipe has a space
   // before it to distinguish it from an escaped pipe
-  const row = tableRow.replace(/\|/g, (match, offset, str) => {
+  const row = tableRow.replace(other.findPipe, (match, offset, str) => {
       let escaped = false;
       let curr = offset;
       while (--curr >= 0 && str[curr] === '\\') escaped = !escaped;
@@ -90,7 +65,7 @@ export function splitCells(tableRow: string, count?: number) {
         return ' |';
       }
     }),
-    cells = row.split(/ \|/);
+    cells = row.split(other.splitPipe);
   let i = 0;
 
   // First/last cell in a row cannot be empty if it has no leading/trailing pipe
@@ -111,7 +86,7 @@ export function splitCells(tableRow: string, count?: number) {
 
   for (; i < cells.length; i++) {
     // leading or trailing whitespace is ignored per the gfm spec
-    cells[i] = cells[i].trim().replace(/\\\|/g, '|');
+    cells[i] = cells[i].trim().replace(other.slashPipe, '|');
   }
   return cells;
 }

diff --git a/src/rules.ts b/src/rules.ts
@@ -1,6 +1,83 @@
-import {
-  edit, noopTest,
-} from './helpers.ts';
+export const noopTest = { exec: () => null } as unknown as RegExp;
+
+export function edit(regex: string | RegExp, opt?: string) {
+  let source = typeof regex === 'string' ? regex : regex.source;
+  opt = opt || '';
+  const obj = {
+    replace: (name: string | RegExp, val: string | RegExp) => {
+      let valSource = typeof val === 'string' ? val : val.source;
+      valSource = valSource.replace(other.caret, '$1');
+      source = source.replace(name, valSource);
+      return obj;
+    },
+    getRegex: () => {
+      return new RegExp(source, opt);
+    },
+  };
+  return obj;
+}
+/**
+ * Other Rules
+ */
+
+export const other = {
+  codeRemoveIndent: /^(?: {1,4}| {0,3}\t)/gm,
+  outputLinkReplace: /\\([\[\]])/g,
+  indentCodeCompensation: /^(\s+)(?:```)/,
+  beginningSpace: /^\s+/,
+  endingHash: /#$/,
+  startingSpaceChar: /^ /,
+  endingSpaceChar: / $/,
+  nonSpaceChar: /[^ ]/,
+  newLineCharGlobal: /\n/g,
+  tabCharGlobal: /\t/g,
+  multipleSpaceGlobal: /\s+/g,
+  blankLine: /^[ \t]*$/,
+  doubleBlankLine: /\n[ \t]*\n[ \t]*$/,
+  blockquoteStart: /^ {0,3}>/,
+  blockquoteSetextReplace: /\n {0,3}((?:=+|-+) *)(?=\n|$)/g,
+  blockquoteSetextReplace2: /^ {0,3}>[ \t]?/gm,
+  listReplaceTabs: /^\t+/,
+  listReplaceNesting: /^ {1,4}(?=( {4})*[^ ])/g,
+  listIsTask: /^\[[ xX]\] /,
+  listReplaceTask: /^\[[ xX]\] +/,
+  anyLine: /\n.*\n/,
+  hrefBrackets: /^<(.*)>$/,
+  tableDelimiter: /[:|]/,
+  tableAlignChars: /^\||\| *$/g,
+  tableRowBlankLine: /\n[ \t]*$/,
+  tableAlignRight: /^ *-+: *$/,
+  tableAlignCenter: /^ *:-+: *$/,
+  tableAlignLeft: /^ *:-+ *$/,
+  startATag: /^<a /i,
+  endATag: /^<\/a>/i,
+  startPreScriptTag: /^<(pre|code|kbd|script)(\s|>)/i,
+  endPreScriptTag: /^<\/(pre|code|kbd|script)(\s|>)/i,
+  startAngleBracket: /^</,
+  endAngleBracket: />$/,
+  pedanticHrefTitle: /^([^'"]*[^\s])\s+(['"])(.*)\2/,
+  unicodeAlphaNumeric: /[\p{L}\p{N}]/u,
+  escapeTest: /[&<>"']/,
+  escapeReplace: /[&<>"']/g,
+  escapeTestNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/,
+  escapeReplaceNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g,
+  unescapeTest: /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig,
+  caret: /(^|[^\[])\^/g,
+  percentDecode: /%25/g,
+  findPipe: /\|/g,
+  splitPipe: / \|/,
+  slashPipe: /\\\|/g,
+  carriageReturn: /\r\n|\r/g,
+  spaceLine: /^ +$/gm,
+  notSpaceStart: /^\S*/,
+  endingNewline: /\n$/,
+  listItemRegex: (bull: string) => new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`),
+  nextBulletRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),
+  hrRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),
+  fencesBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`),
+  headingBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`),
+  htmlBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}<[a-z].*>`, 'i'),
+};
 
 /**
  * Block-Level Grammar
@@ -322,55 +399,6 @@ const inlineBreaks: Record<InlineKeys, RegExp> = {
  * exports
  */
 
-/**
- * Other Rules
- */
-
-export const other = {
-  codeRemoveIndent: /^(?: {1,4}| {0,3}\t)/gm,
-  outputLinkReplace: /\\([\[\]])/g,
-  indentCodeCompensation: /^(\s+)(?:```)/,
-  beginningSpace: /^\s+/,
-  endingHash: /#$/,
-  startingSpaceChar: /^ /,
-  endingSpaceChar: / $/,
-  nonSpaceChar: /[^ ]/,
-  newLineCharGlobal: /\n/g,
-  tabCharGlobal: /\t/g,
-  multipleSpaceGlobal: /\s+/g,
-  blankLine: /^[ \t]*$/,
-  doubleBlankLine: /\n[ \t]*\n[ \t]*$/,
-  blockquoteStart: /^ {0,3}>/,
-  blockquoteSetextReplace: /\n {0,3}((?:=+|-+) *)(?=\n|$)/g,
-  blockquoteSetextReplace2: /^ {0,3}>[ \t]?/gm,
-  listReplaceTabs: /^\t+/,
-  listReplaceNesting: /^ {1,4}(?=( {4})*[^ ])/g,
-  listIsTask: /^\[[ xX]\] /,
-  listReplaceTask: /^\[[ xX]\] +/,
-  anyLine: /\n.*\n/,
-  hrefBrackets: /^<(.*)>$/,
-  tableDelimiter: /[:|]/,
-  tableAlignChars: /^\||\| *$/g,
-  tableRowBlankLine: /\n[ \t]*$/,
-  tableAlignRight: /^ *-+: *$/,
-  tableAlignCenter: /^ *:-+: *$/,
-  tableAlignLeft: /^ *:-+ *$/,
-  startATag: /^<a /i,
-  endATag: /^<\/a>/i,
-  startPreScriptTag: /^<(pre|code|kbd|script)(\s|>)/i,
-  endPreScriptTag: /^<\/(pre|code|kbd|script)(\s|>)/i,
-  startAngleBracket: /^</,
-  endAngleBracket: />$/,
-  pedanticHrefTitle: /^([^'"]*[^\s])\s+(['"])(.*)\2/,
-  unicodeAlphaNumeric: /[\p{L}\p{N}]/u,
-  listItemRegex: (bull: string) => new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`),
-  nextBulletRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),
-  hrRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),
-  fencesBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`),
-  headingBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`),
-  htmlBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}<[a-z].*>`, 'i'),
-};
-
 export const block = {
   normal: blockNormal,
   gfm: blockGfm,