From bc90a2132f4ab7f483cb2ef52aab8290c9e5b30b Mon Sep 17 00:00:00 2001 From: Kael Zhang Date: Fri, 22 May 2020 21:37:21 +0800 Subject: [PATCH] #59: fixes escaped range notation --- index.js | 112 ++++++++++++++++++++++++----------------- test/fixtures/cases.js | 50 ++++++++++++++++-- 2 files changed, 114 insertions(+), 48 deletions(-) diff --git a/index.js b/index.js index 8ee4858..fe59046 100644 --- a/index.js +++ b/index.js @@ -5,6 +5,8 @@ function makeArray (subject) { : [subject] } +const EMPTY = '' +const ESCAPE = '\\' const REGEX_TEST_BLANK_LINE = /^\s+$/ const REGEX_REPLACE_LEADING_EXCAPED_EXCLAMATION = /^\\!/ const REGEX_REPLACE_LEADING_EXCAPED_HASH = /^\\#/ @@ -35,7 +37,7 @@ const sanitizeRange = range => range.replace( ? match // Invalid range (out of order) which is ok for gitignore rules but // fatal for JavaScript regular expression, so eliminate it. - : '' + : EMPTY ) // > If the pattern ends with a slash, @@ -59,7 +61,7 @@ const REPLACERS = [ /\\?\s+$/, match => match.indexOf('\\') === 0 ? ' ' - : '' + : EMPTY ], // replace (\ ) with ' ' @@ -86,19 +88,10 @@ const REPLACERS = [ // > - the opening curly brace {, // > These special characters are often called "metacharacters". [ - /[\\^$.|*+(){]/g, + /[\\$.|*+(){^]/g, match => `\\${match}` ], - [ - // > [abc] matches any character inside the brackets - // > (in this case a, b, or c); - /\[([^\]/]*)($|\])/g, - (match, p1, p2) => p2 === ']' - ? `[${sanitizeRange(p1)}]` - : `\\${match}` - ], - [ // > a question mark (?) matches a single character /(?!\\)\?/g, @@ -134,31 +127,6 @@ const REPLACERS = [ () => '^(?:.*\\/)?' ], - // ending - [ - // 'js' will not match 'js.' - // 'ab' will not match 'abc' - /(?:[^*])$/, - - // WTF! - // https://git-scm.com/docs/gitignore - // changes in [2.22.1](https://git-scm.com/docs/gitignore/2.22.1) - // which re-fixes #24, #38 - - // > If there is a separator at the end of the pattern then the pattern - // > will only match directories, otherwise the pattern can match both - // > files and directories. - - // 'js*' will not match 'a.js' - // 'js/' will not match 'a.js' - // 'js' will match 'a.js' and 'a.js/' - match => /\/$/.test(match) - // foo/ will not match 'foo' - ? `${match}$` - // foo matches 'foo' and 'foo/' - : `${match}(?=$|\\/$)` - ], - // starting [ // there will be no leading '/' @@ -227,13 +195,73 @@ const REPLACERS = [ (_, p1) => `${p1}[^\\/]*` ], + [ + // unescape, revert step 3 except for back slash + // For example, if a user escape a '\\*', + // after step 3, the result will be '\\\\\\*' + /\\\\\\(?=[$.|*+(){^])/g, + () => ESCAPE + ], + + [ + // '\\\\' -> '\\' + /\\\\/g, + () => ESCAPE + ], + + [ + // > The range notation, e.g. [a-zA-Z], + // > can be used to match one of the characters in a range. + + // `\` is escaped by step 3 + /(\\)?\[([^\]/]*?)(\\*)($|\])/g, + (match, leadEscape, range, endEscape, close) => leadEscape === ESCAPE + // '\\[bar]' -> '\\\\[bar\\]' + ? `\\[${range}${close}` + : close === ']' + ? endEscape.length % 2 === 0 + // A normal case, and it is a range notation + // '[bar]' + // '[bar\\\\]' + ? `[${sanitizeRange(range)}${endEscape}]` + // Invalid range notaton + // '[bar\\]' -> '[bar\\\\]' + : '[]' + : '[]' + ], + + // ending + [ + // 'js' will not match 'js.' + // 'ab' will not match 'abc' + /(?:[^*])$/, + + // WTF! + // https://git-scm.com/docs/gitignore + // changes in [2.22.1](https://git-scm.com/docs/gitignore/2.22.1) + // which re-fixes #24, #38 + + // > If there is a separator at the end of the pattern then the pattern + // > will only match directories, otherwise the pattern can match both + // > files and directories. + + // 'js*' will not match 'a.js' + // 'js/' will not match 'a.js' + // 'js' will match 'a.js' and 'a.js/' + match => /\/$/.test(match) + // foo/ will not match 'foo' + ? `${match}$` + // foo matches 'foo' and 'foo/' + : `${match}(?=$|\\/$)` + ], + // trailing wildcard [ /(\^|\\\/)?\\\*$/, (_, p1) => { const prefix = p1 // '\^': - // '/*' does not match '' + // '/*' does not match EMPTY // '/*' does not match everything // '\\\/': @@ -247,12 +275,6 @@ const REPLACERS = [ return `${prefix}(?=$|\\/$)` } ], - - [ - // unescape - /\\\\\\/g, - () => '\\' - ] ] // A simple cache, because an ignore rule only has only one certain meaning @@ -345,7 +367,7 @@ const checkPath = (path, originalPath, doThrow) => { ) } - // We don't know if we should ignore '', so throw + // We don't know if we should ignore EMPTY, so throw if (!path) { return doThrow(`path must not be empty`, TypeError) } diff --git a/test/fixtures/cases.js b/test/fixtures/cases.js index 2528873..e6fa64d 100644 --- a/test/fixtures/cases.js +++ b/test/fixtures/cases.js @@ -12,19 +12,63 @@ const cases = [ // [ // 'Example', // [ + // // ignore pattern // 'a' // ], // { + // // 1 indicates 'a' should be ignored // 'a': 1 // } // ], [ - '#59', + '#59 and more cases about range notation', [ - 'src/\\[app\\]' + 'src/\\[foo\\]', + 'src/\\[foo2\\\\]', + 'src/\\[foo3\\\\\\]', + 'src/\\[foo4\\\\\\\\]', + 'src/\\[foo5\\\\\\\\\\]', + 'src/\\[foo6\\\\\\\\\\\\]', + + 'src/\\[bar]', + + 'src/[e\\\\]', + 's/[f\\\\\\\\]', + + 's/[a-z0-9]', + + // The following special cases are not described in gitignore manual + 'src/[q', + 'src/\\[u', + 'src/[x\\]' ], { - 'src/[app]': 1 + // 'src/[foo]': 1, + // 'src/[foo2\\]': 1, + + // // Seems the followings are side-effects, + // // however, we will implement these + // 'src/[foo3\\]': 1, + // 'src/[foo4\\\\]': 1, + // 'src/[foo5\\\\]': 1, + // 'src/[foo6\\\\\\]': 1, + + 'src/[bar]': 1, + + 'src/e': 1, + 'src/\\': 1, + 's/f': 1, + 's/\\': 1, + + 's/a': 1, + + 's/0': 1, + + 'src/[q': 0, + 'src/[u': 1, + 'src/[x': 0, + 'src/[x]': 0, + 'src/x': 0 } ], [