From 62989a0891468c886757f206d2db39ef940e380e Mon Sep 17 00:00:00 2001 From: gideong Date: Mon, 18 Dec 2017 12:12:42 -0800 Subject: [PATCH 1/2] Adds a new option to IParseOptions, alternateComment. When enabled, this activates an alternate comment parsing mode that preserves double-slash comments. --- index.d.ts | 6 + src/parse.js | 2 +- src/tokenize.js | 183 +++++++++++++++++----- tests/data/comments-alternate-parse.proto | 73 +++++++++ tests/docs_comments_alternate_parse.js | 35 +++++ 5 files changed, 260 insertions(+), 39 deletions(-) create mode 100644 tests/data/comments-alternate-parse.proto create mode 100644 tests/docs_comments_alternate_parse.js diff --git a/index.d.ts b/index.d.ts index 91ad65ea8..4eb773d20 100644 --- a/index.d.ts +++ b/index.d.ts @@ -1015,6 +1015,12 @@ export interface IParseOptions { /** Keeps field casing instead of converting to camel case */ keepCase?: boolean; + + /** + * Turns on an alternate comment parsing mode that preserves double-slash + * and slash-star comments as documentation. + */ + alternateCommentMode?: boolean; } /** diff --git a/src/parse.js b/src/parse.js index 0acf919df..e1e572de2 100644 --- a/src/parse.js +++ b/src/parse.js @@ -61,7 +61,7 @@ function parse(source, root, options) { if (!options) options = parse.defaults; - var tn = tokenize(source), + var tn = tokenize(source, options.alternateCommentMode || false), next = tn.next, push = tn.push, peek = tn.peek, diff --git a/src/tokenize.js b/src/tokenize.js index 825a7af4f..86e5598f9 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -6,6 +6,7 @@ var delimRe = /[\s{}=;:[\],'"()<>]/g, stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g; var setCommentRe = /^ *[*/]+ */, + setCommentAltRe = /^\s*\*?\/*/, setCommentSplitRe = /\n/g, whitespaceRe = /\s/, unescapeRe = /\\(.?)/g; @@ -92,9 +93,10 @@ tokenize.unescape = unescape; /** * Tokenizes the given .proto source and returns an object with useful utility functions. * @param {string} source Source contents + * @param {boolean} alternateCommentMode Whether we should activate alternate comment parsing mode. * @returns {ITokenizerHandle} Tokenizer handle */ -function tokenize(source) { +function tokenize(source, alternateCommentMode) { /* eslint-disable callback-return */ source = source.toString(); @@ -159,10 +161,17 @@ function tokenize(source) { commentType = source.charAt(start++); commentLine = line; commentLineEmpty = false; - var offset = start - 3, // "///" or "/**" + var lookback; + if (alternateCommentMode) { + lookback = 2; // alternate comment parsing: "//" or "/*" + } else { + lookback = 3; // "///" or "/**" + } + var commentOffset = start - lookback, c; do { - if (--offset < 0 || (c = source.charAt(offset)) === "\n") { + if (--commentOffset < 0 || + (c = source.charAt(commentOffset)) === "\n") { commentLineEmpty = true; break; } @@ -171,12 +180,34 @@ function tokenize(source) { .substring(start, end) .split(setCommentSplitRe); for (var i = 0; i < lines.length; ++i) - lines[i] = lines[i].replace(setCommentRe, "").trim(); + lines[i] = lines[i] + .replace(alternateCommentMode ? setCommentAltRe : setCommentRe, "") + .trim(); commentText = lines .join("\n") .trim(); } + function isDoubleSlashCommentLine(startOffset) { + var endOffset = findEndOfLine(startOffset); + + // see if remaining line matches comment pattern + var lineText = source.substring(startOffset, endOffset); + // look for 1 or 2 slashes since startOffset would already point past + // the first slash that started the comment. + var isComment = /^\s*\/{1,2}/.test(lineText); + return isComment; + } + + function findEndOfLine(cursor) { + // find end of cursor's line + var endOffset = cursor; + while (endOffset < length && charAt(endOffset) !== '\n') { + endOffset++; + } + return endOffset; + } + /** * Obtains the next token. * @returns {string|null} Next token or `null` on eof @@ -202,35 +233,93 @@ function tokenize(source) { if (++offset === length) return null; } + if (charAt(offset) === "/") { - if (++offset === length) + if (++offset === length) { throw illegal("comment"); - if (charAt(offset) === "/") { // Line - isDoc = charAt(start = offset + 1) === "/"; - while (charAt(++offset) !== "\n") - if (offset === length) - return null; - ++offset; - if (isDoc) /// Comment - setComment(start, offset - 1); - ++line; - repeat = true; - } else if ((curr = charAt(offset)) === "*") { /* Block */ - isDoc = charAt(start = offset + 1) === "*"; - do { - if (curr === "\n") - ++line; - if (++offset === length) - throw illegal("comment"); - prev = curr; - curr = charAt(offset); - } while (prev !== "*" || curr !== "/"); - ++offset; - if (isDoc) /** Comment */ + } + if (!alternateCommentMode) { + // standard comment parsing + if (charAt(offset) === "/") { // Line + // check for triple-slash comment + isDoc = charAt(start = offset + 1) === "/"; + + while (charAt(++offset) !== "\n") { + if (offset === length) { + return null; + } + } + ++offset; + if (isDoc) { + setComment(start, offset - 1); + } + ++line; + repeat = true; + } else if ((curr = charAt(offset)) === "*") { /* Block */ + // check for /** doc comment + isDoc = charAt(start = offset + 1) === "*"; + do { + if (curr === "\n") { + ++line; + } + if (++offset === length) { + throw illegal("comment"); + } + prev = curr; + curr = charAt(offset); + } while (prev !== "*" || curr !== "/"); + ++offset; + if (isDoc) { /** Comment */ + setComment(start, offset - 2); + } + repeat = true; + } else { + return "/"; + } + } else { + // alternate comment parsing + // check for double-slash comments, coalescing consecutive + // lines into one multi-line comment. + if (charAt(offset) === "/") { + start = offset; + isDoc = false; + if (isDoubleSlashCommentLine(offset)) { + isDoc = true; + do { + offset = findEndOfLine(offset); + if (offset === length) { + break; + } + offset++; + } while (isDoubleSlashCommentLine(offset)); + } else { + offset = Math.min(length, findEndOfLine(offset) + 1); + } + if (isDoc) { + setComment(start, offset); + } + line++; + repeat = true; + } else if ((curr = charAt(offset)) === "*") { /* Block */ + // found /* doc comment + start = offset + 1; + do { + if (curr === "\n") { + ++line; + } + if (++offset === length) { + throw illegal("comment"); + } + prev = curr; + curr = charAt(offset); + } while (prev !== "*" || curr !== "/"); + ++offset; setComment(start, offset - 2); - repeat = true; - } else - return "/"; + repeat = true; + } else { + return "/"; + } + } } } while (repeat); @@ -301,15 +390,33 @@ function tokenize(source) { */ function cmnt(trailingLine) { var ret = null; - if (trailingLine === undefined) { - if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) - ret = commentText; + if (alternateCommentMode) { + if (trailingLine === undefined) { + if (commentLine === line - 1) { + ret = commentText; + } + } else { + if (commentLine < trailingLine) { + peek(); + } + if (commentLine === trailingLine && !commentLineEmpty) { + ret = commentText; + } + } } else { - /* istanbul ignore else */ - if (commentLine < trailingLine) - peek(); - if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") - ret = commentText; + if (trailingLine === undefined) { + if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) { + ret = commentText; + } + } else { + /* istanbul ignore else */ + if (commentLine < trailingLine) { + peek(); + } + if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") { + ret = commentText; + } + } } return ret; } diff --git a/tests/data/comments-alternate-parse.proto b/tests/data/comments-alternate-parse.proto new file mode 100644 index 000000000..4d01f672f --- /dev/null +++ b/tests/data/comments-alternate-parse.proto @@ -0,0 +1,73 @@ +/** + * File with alternate comment syntax. + * This file uses double slash and regular star-slash comment styles for doc + * strings. + */ + +syntax = "proto3"; + +// Message with +// a +// multi-line comment. +message Test1 { + + /** + * Field with a doc-block comment. + */ + string field1 = 1; + + // Field with a single-line comment starting with two slashes. + uint32 field2 = 2; + + /// Field with a single-line comment starting with three slashes. + bool field3 = 3; + + /* Field with a single-line slash-star comment. */ + bool field4 = 4; + + bool field5 = 5; // Field with a trailing single-line two-slash comment. + + bool field6 = 6; /// Field with a trailing single-line three-slash comment. + + bool field7 = 7; /* Field with a trailing single-line slash-star comment. */ + + bool field8 = 8; + + // Field with a + // multi-line comment. + bool field9 = 9; + + /** + * Field with a + * multi-line doc-block comment. + */ + string field10 = 10; +} + +/* Message + with + a multiline plain slash-star + comment. +*/ +message Test2 { +} + +/* + * Message + * with + * a + * comment and stars. + */ +enum Test3 { + + /** Value with a comment. */ + ONE = 1; + + // Value with a single-line comment. + TWO = 2; + + /// Value with a triple-slash comment. + THREE = 3; // ignored + + FOUR = 4; /// Other value with a comment. +} diff --git a/tests/docs_comments_alternate_parse.js b/tests/docs_comments_alternate_parse.js new file mode 100644 index 000000000..5793ef091 --- /dev/null +++ b/tests/docs_comments_alternate_parse.js @@ -0,0 +1,35 @@ +var tape = require("tape"); + +var protobuf = require(".."); + +tape.test("proto comments in alternate-parse mode", function(test) { + test.plan(17); + var options = {alternateCommentMode: true}; + var root = new protobuf.Root(); + root.load("tests/data/comments-alternate-parse.proto", options, function(err, root) { + if (err) + throw test.fail(err.message); + + test.equal(root.lookup("Test1").comment, "Message with\na\nmulti-line comment.", "should parse double-slash multiline comment"); + test.equal(root.lookup("Test2").comment, "Message\nwith\na multiline plain slash-star\ncomment.", "should parse slash-star multiline comment"); + test.equal(root.lookup("Test3").comment, "Message\nwith\na\ncomment and stars.", "should parse doc-block multiline comment"); + + test.equal(root.lookup("Test1.field1").comment, "Field with a doc-block comment.", "should parse doc-block field comment"); + test.equal(root.lookup("Test1.field2").comment, "Field with a single-line comment starting with two slashes.", "should parse double-slash field comment"); + test.equal(root.lookup("Test1.field3").comment, "Field with a single-line comment starting with three slashes.", "should parse triple-slash field comment"); + test.equal(root.lookup("Test1.field4").comment, "Field with a single-line slash-star comment.", "should parse single-line slash-star field comment"); + test.equal(root.lookup("Test1.field5").comment, "Field with a trailing single-line two-slash comment.", "should parse trailing double-slash comment"); + test.equal(root.lookup("Test1.field6").comment, "Field with a trailing single-line three-slash comment.", "should parse trailing triple-slash comment"); + test.equal(root.lookup("Test1.field7").comment, "Field with a trailing single-line slash-star comment.", "should parse trailing slash-star comment"); + test.equal(root.lookup("Test1.field8").comment, null, "should parse no comment"); + test.equal(root.lookup("Test1.field9").comment, "Field with a\nmulti-line comment.", "should parse multiline double-slash field comment"); + test.equal(root.lookup("Test1.field10").comment, "Field with a\nmulti-line doc-block comment.", "should parse multiline doc-block field comment"); + + test.equal(root.lookup("Test3").comments.ONE, "Value with a comment.", "should parse blocks for enum values"); + test.equal(root.lookup("Test3").comments.TWO, "Value with a single-line comment.", "should parse double-slash comments for enum values"); + test.equal(root.lookup("Test3").comments.THREE, "Value with a triple-slash comment.", "should parse lines for enum values and prefer on top over trailing"); + test.equal(root.lookup("Test3").comments.FOUR, "Other value with a comment.", "should not confuse previous trailing comments with comments for the next field"); + + test.end(); + }); +}); From 00a19adf042ba89554ec350b67c2d28277d65eb3 Mon Sep 17 00:00:00 2001 From: gideong Date: Thu, 18 Jan 2018 12:57:24 -0800 Subject: [PATCH 2/2] Fixing codeClimate errors and refactoring. --- src/tokenize.js | 103 ++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 70 deletions(-) diff --git a/src/tokenize.js b/src/tokenize.js index 86e5598f9..b939ef289 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -202,7 +202,7 @@ function tokenize(source, alternateCommentMode) { function findEndOfLine(cursor) { // find end of cursor's line var endOffset = cursor; - while (endOffset < length && charAt(endOffset) !== '\n') { + while (endOffset < length && charAt(endOffset) !== "\n") { endOffset++; } return endOffset; @@ -238,9 +238,8 @@ function tokenize(source, alternateCommentMode) { if (++offset === length) { throw illegal("comment"); } - if (!alternateCommentMode) { - // standard comment parsing - if (charAt(offset) === "/") { // Line + if (charAt(offset) === "/") { // Line + if (!alternateCommentMode) { // check for triple-slash comment isDoc = charAt(start = offset + 1) === "/"; @@ -255,32 +254,8 @@ function tokenize(source, alternateCommentMode) { } ++line; repeat = true; - } else if ((curr = charAt(offset)) === "*") { /* Block */ - // check for /** doc comment - isDoc = charAt(start = offset + 1) === "*"; - do { - if (curr === "\n") { - ++line; - } - if (++offset === length) { - throw illegal("comment"); - } - prev = curr; - curr = charAt(offset); - } while (prev !== "*" || curr !== "/"); - ++offset; - if (isDoc) { /** Comment */ - setComment(start, offset - 2); - } - repeat = true; } else { - return "/"; - } - } else { - // alternate comment parsing - // check for double-slash comments, coalescing consecutive - // lines into one multi-line comment. - if (charAt(offset) === "/") { + // check for double-slash comments, consolidating consecutive lines start = offset; isDoc = false; if (isDoubleSlashCommentLine(offset)) { @@ -300,25 +275,28 @@ function tokenize(source, alternateCommentMode) { } line++; repeat = true; - } else if ((curr = charAt(offset)) === "*") { /* Block */ - // found /* doc comment - start = offset + 1; - do { - if (curr === "\n") { - ++line; - } - if (++offset === length) { - throw illegal("comment"); - } - prev = curr; - curr = charAt(offset); - } while (prev !== "*" || curr !== "/"); - ++offset; + } + } else if ((curr = charAt(offset)) === "*") { /* Block */ + // check for /** (regular comment mode) or /* (alternate comment mode) + start = offset + 1; + isDoc = alternateCommentMode || charAt(start) === "*"; + do { + if (curr === "\n") { + ++line; + } + if (++offset === length) { + throw illegal("comment"); + } + prev = curr; + curr = charAt(offset); + } while (prev !== "*" || curr !== "/"); + ++offset; + if (isDoc) { setComment(start, offset - 2); - repeat = true; - } else { - return "/"; } + repeat = true; + } else { + return "/"; } } } while (repeat); @@ -390,32 +368,17 @@ function tokenize(source, alternateCommentMode) { */ function cmnt(trailingLine) { var ret = null; - if (alternateCommentMode) { - if (trailingLine === undefined) { - if (commentLine === line - 1) { - ret = commentText; - } - } else { - if (commentLine < trailingLine) { - peek(); - } - if (commentLine === trailingLine && !commentLineEmpty) { - ret = commentText; - } + if (trailingLine === undefined) { + if (commentLine === line - 1 && (alternateCommentMode || commentType === "*" || commentLineEmpty)) { + ret = commentText; } } else { - if (trailingLine === undefined) { - if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) { - ret = commentText; - } - } else { - /* istanbul ignore else */ - if (commentLine < trailingLine) { - peek(); - } - if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") { - ret = commentText; - } + /* istanbul ignore else */ + if (commentLine < trailingLine) { + peek(); + } + if (commentLine === trailingLine && !commentLineEmpty && (alternateCommentMode || commentType === "/")) { + ret = commentText; } } return ret;