Skip to content

Commit

Permalink
Better error messages for pattern tests (#2364)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment authored May 4, 2020
1 parent 7f948ec commit 10ca643
Showing 1 changed file with 70 additions and 44 deletions.
114 changes: 70 additions & 44 deletions tests/pattern-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ function testPatterns(Prism) {
BFS(Prism.languages, path => {
const { key, value } = path[path.length - 1];

let tokenPath = '<languages>';
let tokenPath = 'Prism.languages';
for (const { key } of path) {
if (!key) {
// do nothing
Expand Down Expand Up @@ -172,11 +172,46 @@ function testPatterns(Prism) {
}
}

/**
* Returns whether the given element will always at the start of the whole match.
*
* @param {Element} element
* @returns {boolean}
*/
function isFirstMatch(element) {
const parent = element.parent;
switch (parent.type) {
case 'Alternative':
// all elements before this element have to of zero length
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
return false;
}
const grandParent = parent.parent;
if (grandParent.type === 'Pattern') {
return true;
} else {
return isFirstMatch(grandParent);
}

case 'Quantifier':
if (parent.max >= 2) {
return false;
} else {
return isFirstMatch(parent);
}

default:
throw new Error(`Internal error: The given node should not be a '${element.type}'.`);
}
}


it('- should not match the empty string', function () {
forEachPattern(({ pattern, tokenPath }) => {
// test for empty string
assert.notMatch('', pattern, `Token ${tokenPath}: ${pattern} should not match the empty string.`);
assert.notMatch('', pattern, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
+ `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
+ `Make sure that all patterns always consume at least one character.`);
});
});

Expand All @@ -187,54 +222,26 @@ function testPatterns(Prism) {
forEachCapturingGroup(ast.pattern, () => { hasCapturingGroup = true; });

if (!hasCapturingGroup) {
assert.fail(`Token ${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.`);
assert.fail(`${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.\n\n`
+ `Prism lookbehind groups use the captured text of the first capturing group to simulate a lookbehind. `
+ `Without a capturing group, a lookbehind is not possible.\n`
+ `To fix this, either add a capturing group for the lookbehind or remove the 'lookbehind' property.`);
}
}
});
});

it('- should not have lookbehind groups that can be preceded by other some characters', function () {
/**
* Returns whether the given element will always match the start of the string.
*
* @param {Element} element
* @returns {boolean}
*/
function isFirstMatch(element) {
const parent = element.parent;
switch (parent.type) {
case 'Alternative':
// all elements before this element have to of zero length
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
return false;
}
const grandParent = parent.parent;
if (grandParent.type === 'Pattern') {
return true;
} else {
return isFirstMatch(grandParent);
}

case 'Quantifier':
if (parent.max >= 2) {
return false;
} else {
return isFirstMatch(parent);
}

default:
throw new Error(`Internal error: The given node should not be a '${element.type}'.`);
}
}

forEachPattern(({ ast, tokenPath, lookbehind }) => {
if (!lookbehind) {
return;
}
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
if (number === 1 && !isFirstMatch(group)) {
assert.fail(`Token ${tokenPath}: `
+ `The lookbehind group (if matched) always has to be at index 0 relative to the whole match.`);
assert.fail(`${tokenPath}: The lookbehind group ${group.raw} might be preceded by some characters.\n\n`
+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
}
});
});
Expand All @@ -249,9 +256,9 @@ function testPatterns(Prism) {
if (number === 1 && isAlwaysZeroWidth(group)) {
const groupContent = group.raw.substr(1, group.raw.length - 2);
const replacement = group.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
reportError(`Token ${tokenPath}: The lookbehind group ${group.raw} does not consume characters. `
+ `Therefor it is not necessary to use a lookbehind group. `
+ `Replacing the lookbehind group with: ${replacement}`);
reportError(`${tokenPath}: The lookbehind group ${group.raw} does not consume characters.\n\n`
+ `Therefor it is not necessary to use a lookbehind group.\n`
+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
}
});
});
Expand All @@ -262,7 +269,22 @@ function testPatterns(Prism) {
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
const isLookbehindGroup = lookbehind && number === 1;
if (group.references.length === 0 && !isLookbehindGroup) {
reportError(`Token ${tokenPath}: Unused capturing group ${group.raw}. All capturing groups have to be either referenced or used as a Prism lookbehind group.`);
const fixes = [];
fixes.push(`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`);
fixes.push(`Reference this group with a backreference (use '\\${number}' for this).`);
if (number === 1 && !lookbehind) {
if (isFirstMatch(group)) {
fixes.push(`Add a 'lookbehind: true' declaration.`);
} else {
fixes.push(`Add a 'lookbehind: true' declaration. (This group is not a valid lookbehind group because it can be preceded by some characters.)`);
}
}

reportError(`${tokenPath}: Unused capturing group ${group.raw}.\n\n`
+ `Unused capturing groups generally degrade the performance of regular expressions. `
+ `They might also be a sign that a backreference is incorrect or that a 'lookbehind: true' declaration in missing.\n`
+ `To fix this, do one of the following:\n`
+ fixes.map(f => '- ' + f).join('\n'));
}
});
});
Expand All @@ -272,7 +294,8 @@ function testPatterns(Prism) {
const niceName = /^[a-z][a-z\d]*(?:[-_][a-z\d]+)*$/;
function testName(name, desc = 'token name') {
if (!niceName.test(name)) {
assert.fail(`The ${desc} '${name}' does not match ${niceName}`);
assert.fail(`The ${desc} '${name}' does not match ${niceName}.\n\n`
+ `To fix this, choose a name that matches the above regular expression.`);
}
}

Expand Down Expand Up @@ -305,7 +328,10 @@ function testPatterns(Prism) {
visitRegExpAST(ast.pattern, {
onCharacterEnter(node) {
if (/^\\(?:[1-9]|\d{2,})$/.test(node.raw)) {
reportError(`Token ${tokenPath}: Octal escape ${node.raw}.`);
reportError(`${tokenPath}: Octal escape ${node.raw}.\n\n`
+ `Octal escapes can be confused with backreferences, so please do not use them.\n`
+ `To fix this, use a different escape method. `
+ `Note that this could also be an invalid backreference, so be sure to carefully analyse the pattern.`);
}
}
});
Expand Down

0 comments on commit 10ca643

Please sign in to comment.