Skip to content

Commit

Permalink
move all regexps
Browse files Browse the repository at this point in the history
  • Loading branch information
UziTech committed Oct 10, 2024
1 parent f3111f0 commit b0444c6
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 92 deletions.
4 changes: 2 additions & 2 deletions src/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ export class _Lexer {
*/
lex(src: string) {
src = src
.replace(/\r\n|\r/g, '\n');
.replace(other.carriageReturn, '\n');

this.blockTokens(src, this.tokens);

Expand All @@ -106,7 +106,7 @@ export class _Lexer {
blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList;
blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
if (this.options.pedantic) {
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
src = src.replace(other.tabCharGlobal, ' ').replace(other.spaceLine, '');
}

let token: Tokens.Generic | undefined;
Expand Down
5 changes: 3 additions & 2 deletions src/Renderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
cleanUrl,
escape,
} from './helpers.ts';
import { other } from './rules.ts';
import type { MarkedOptions } from './MarkedOptions.ts';
import type { Tokens } from './Tokens.ts';
import type { _Parser } from './Parser.ts';
Expand All @@ -22,9 +23,9 @@ export class _Renderer {
}

code({ text, lang, escaped }: Tokens.Code): string {
const langString = (lang || '').match(/^\S*/)?.[0];
const langString = (lang || '').match(other.notSpaceStart)?.[0];

const code = text.replace(/\n$/, '') + '\n';
const code = text.replace(other.endingNewline, '') + '\n';

if (!langString) {
return '<pre><code>'
Expand Down
47 changes: 11 additions & 36 deletions src/helpers.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import { other } from './rules.ts';

/**
* Helpers
*/
const escapeTest = /[&<>"']/;
const escapeReplace = new RegExp(escapeTest.source, 'g');
const escapeTestNoEncode = /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/;
const escapeReplaceNoEncode = new RegExp(escapeTestNoEncode.source, 'g');
const escapeReplacements: { [index: string]: string } = {
'&': '&amp;',
'<': '&lt;',
Expand All @@ -16,23 +14,21 @@ const getEscapeReplacement = (ch: string) => escapeReplacements[ch];

export function escape(html: string, encode?: boolean) {
if (encode) {
if (escapeTest.test(html)) {
return html.replace(escapeReplace, getEscapeReplacement);
if (other.escapeTest.test(html)) {
return html.replace(other.escapeReplace, getEscapeReplacement);
}
} else {
if (escapeTestNoEncode.test(html)) {
return html.replace(escapeReplaceNoEncode, getEscapeReplacement);
if (other.escapeTestNoEncode.test(html)) {
return html.replace(other.escapeReplaceNoEncode, getEscapeReplacement);
}
}

return html;
}

const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig;

export function unescape(html: string) {
// explicitly match decimal, hex, and named HTML entities
return html.replace(unescapeTest, (_, n) => {
return html.replace(other.unescapeTest, (_, n) => {
n = n.toLowerCase();
if (n === 'colon') return ':';
if (n.charAt(0) === '#') {
Expand All @@ -44,40 +40,19 @@ export function unescape(html: string) {
});
}

const caret = /(^|[^\[])\^/g;

export function edit(regex: string | RegExp, opt?: string) {
let source = typeof regex === 'string' ? regex : regex.source;
opt = opt || '';
const obj = {
replace: (name: string | RegExp, val: string | RegExp) => {
let valSource = typeof val === 'string' ? val : val.source;
valSource = valSource.replace(caret, '$1');
source = source.replace(name, valSource);
return obj;
},
getRegex: () => {
return new RegExp(source, opt);
},
};
return obj;
}

export function cleanUrl(href: string) {
try {
href = encodeURI(href).replace(/%25/g, '%');
href = encodeURI(href).replace(other.percentDecode, '%');
} catch {
return null;
}
return href;
}

export const noopTest = { exec: () => null } as unknown as RegExp;

export function splitCells(tableRow: string, count?: number) {
// ensure that every cell-delimiting pipe has a space
// before it to distinguish it from an escaped pipe
const row = tableRow.replace(/\|/g, (match, offset, str) => {
const row = tableRow.replace(other.findPipe, (match, offset, str) => {
let escaped = false;
let curr = offset;
while (--curr >= 0 && str[curr] === '\\') escaped = !escaped;
Expand All @@ -90,7 +65,7 @@ export function splitCells(tableRow: string, count?: number) {
return ' |';
}
}),
cells = row.split(/ \|/);
cells = row.split(other.splitPipe);
let i = 0;

// First/last cell in a row cannot be empty if it has no leading/trailing pipe
Expand All @@ -111,7 +86,7 @@ export function splitCells(tableRow: string, count?: number) {

for (; i < cells.length; i++) {
// leading or trailing whitespace is ignored per the gfm spec
cells[i] = cells[i].trim().replace(/\\\|/g, '|');
cells[i] = cells[i].trim().replace(other.slashPipe, '|');
}
return cells;
}
Expand Down
132 changes: 80 additions & 52 deletions src/rules.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,83 @@
import {
edit, noopTest,
} from './helpers.ts';
export const noopTest = { exec: () => null } as unknown as RegExp;

export function edit(regex: string | RegExp, opt?: string) {
let source = typeof regex === 'string' ? regex : regex.source;
opt = opt || '';
const obj = {
replace: (name: string | RegExp, val: string | RegExp) => {
let valSource = typeof val === 'string' ? val : val.source;
valSource = valSource.replace(other.caret, '$1');
source = source.replace(name, valSource);
return obj;
},
getRegex: () => {
return new RegExp(source, opt);
},
};
return obj;
}
/**
* Other Rules
*/

export const other = {
codeRemoveIndent: /^(?: {1,4}| {0,3}\t)/gm,
outputLinkReplace: /\\([\[\]])/g,
indentCodeCompensation: /^(\s+)(?:```)/,
beginningSpace: /^\s+/,
endingHash: /#$/,
startingSpaceChar: /^ /,
endingSpaceChar: / $/,
nonSpaceChar: /[^ ]/,
newLineCharGlobal: /\n/g,
tabCharGlobal: /\t/g,
multipleSpaceGlobal: /\s+/g,
blankLine: /^[ \t]*$/,
doubleBlankLine: /\n[ \t]*\n[ \t]*$/,
blockquoteStart: /^ {0,3}>/,
blockquoteSetextReplace: /\n {0,3}((?:=+|-+) *)(?=\n|$)/g,
blockquoteSetextReplace2: /^ {0,3}>[ \t]?/gm,
listReplaceTabs: /^\t+/,
listReplaceNesting: /^ {1,4}(?=( {4})*[^ ])/g,
listIsTask: /^\[[ xX]\] /,
listReplaceTask: /^\[[ xX]\] +/,
anyLine: /\n.*\n/,
hrefBrackets: /^<(.*)>$/,
tableDelimiter: /[:|]/,
tableAlignChars: /^\||\| *$/g,
tableRowBlankLine: /\n[ \t]*$/,
tableAlignRight: /^ *-+: *$/,
tableAlignCenter: /^ *:-+: *$/,
tableAlignLeft: /^ *:-+ *$/,
startATag: /^<a /i,
endATag: /^<\/a>/i,
startPreScriptTag: /^<(pre|code|kbd|script)(\s|>)/i,
endPreScriptTag: /^<\/(pre|code|kbd|script)(\s|>)/i,
startAngleBracket: /^</,
endAngleBracket: />$/,
pedanticHrefTitle: /^([^'"]*[^\s])\s+(['"])(.*)\2/,
unicodeAlphaNumeric: /[\p{L}\p{N}]/u,
escapeTest: /[&<>"']/,
escapeReplace: /[&<>"']/g,
escapeTestNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/,
escapeReplaceNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g,
unescapeTest: /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig,
caret: /(^|[^\[])\^/g,
percentDecode: /%25/g,
findPipe: /\|/g,
splitPipe: / \|/,
slashPipe: /\\\|/g,
carriageReturn: /\r\n|\r/g,
spaceLine: /^ +$/gm,
notSpaceStart: /^\S*/,
endingNewline: /\n$/,
listItemRegex: (bull: string) => new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`),
nextBulletRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),
hrRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),
fencesBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`),
headingBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`),
htmlBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}<[a-z].*>`, 'i'),
};

/**
* Block-Level Grammar
Expand Down Expand Up @@ -322,55 +399,6 @@ const inlineBreaks: Record<InlineKeys, RegExp> = {
* exports
*/

/**
* Other Rules
*/

export const other = {
codeRemoveIndent: /^(?: {1,4}| {0,3}\t)/gm,
outputLinkReplace: /\\([\[\]])/g,
indentCodeCompensation: /^(\s+)(?:```)/,
beginningSpace: /^\s+/,
endingHash: /#$/,
startingSpaceChar: /^ /,
endingSpaceChar: / $/,
nonSpaceChar: /[^ ]/,
newLineCharGlobal: /\n/g,
tabCharGlobal: /\t/g,
multipleSpaceGlobal: /\s+/g,
blankLine: /^[ \t]*$/,
doubleBlankLine: /\n[ \t]*\n[ \t]*$/,
blockquoteStart: /^ {0,3}>/,
blockquoteSetextReplace: /\n {0,3}((?:=+|-+) *)(?=\n|$)/g,
blockquoteSetextReplace2: /^ {0,3}>[ \t]?/gm,
listReplaceTabs: /^\t+/,
listReplaceNesting: /^ {1,4}(?=( {4})*[^ ])/g,
listIsTask: /^\[[ xX]\] /,
listReplaceTask: /^\[[ xX]\] +/,
anyLine: /\n.*\n/,
hrefBrackets: /^<(.*)>$/,
tableDelimiter: /[:|]/,
tableAlignChars: /^\||\| *$/g,
tableRowBlankLine: /\n[ \t]*$/,
tableAlignRight: /^ *-+: *$/,
tableAlignCenter: /^ *:-+: *$/,
tableAlignLeft: /^ *:-+ *$/,
startATag: /^<a /i,
endATag: /^<\/a>/i,
startPreScriptTag: /^<(pre|code|kbd|script)(\s|>)/i,
endPreScriptTag: /^<\/(pre|code|kbd|script)(\s|>)/i,
startAngleBracket: /^</,
endAngleBracket: />$/,
pedanticHrefTitle: /^([^'"]*[^\s])\s+(['"])(.*)\2/,
unicodeAlphaNumeric: /[\p{L}\p{N}]/u,
listItemRegex: (bull: string) => new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`),
nextBulletRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),
hrRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),
fencesBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`),
headingBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`),
htmlBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}<[a-z].*>`, 'i'),
};

export const block = {
normal: blockNormal,
gfm: blockGfm,
Expand Down

0 comments on commit b0444c6

Please sign in to comment.