From 62989a0891468c886757f206d2db39ef940e380e Mon Sep 17 00:00:00 2001
From: gideong <gideong@google.com>
Date: Mon, 18 Dec 2017 12:12:42 -0800
Subject: [PATCH 1/2] Adds a new option to IParseOptions, alternateComment.
 When enabled, this activates an alternate comment parsing mode that preserves
 double-slash comments.

---
 index.d.ts                                |   6 +
 src/parse.js                              |   2 +-
 src/tokenize.js                           | 183 +++++++++++++++++-----
 tests/data/comments-alternate-parse.proto |  73 +++++++++
 tests/docs_comments_alternate_parse.js    |  35 +++++
 5 files changed, 260 insertions(+), 39 deletions(-)
 create mode 100644 tests/data/comments-alternate-parse.proto
 create mode 100644 tests/docs_comments_alternate_parse.js

diff --git a/index.d.ts b/index.d.ts
index 91ad65ea8..4eb773d20 100644
--- a/index.d.ts
+++ b/index.d.ts
@@ -1015,6 +1015,12 @@ export interface IParseOptions {
 
     /** Keeps field casing instead of converting to camel case */
     keepCase?: boolean;
+
+    /**
+     * Turns on an alternate comment parsing mode that preserves double-slash
+     * and slash-star comments as documentation.
+     */
+    alternateCommentMode?: boolean;
 }
 
 /**
diff --git a/src/parse.js b/src/parse.js
index 0acf919df..e1e572de2 100644
--- a/src/parse.js
+++ b/src/parse.js
@@ -61,7 +61,7 @@ function parse(source, root, options) {
     if (!options)
         options = parse.defaults;
 
-    var tn = tokenize(source),
+    var tn = tokenize(source, options.alternateCommentMode || false),
         next = tn.next,
         push = tn.push,
         peek = tn.peek,
diff --git a/src/tokenize.js b/src/tokenize.js
index 825a7af4f..86e5598f9 100644
--- a/src/tokenize.js
+++ b/src/tokenize.js
@@ -6,6 +6,7 @@ var delimRe        = /[\s{}=;:[\],'"()<>]/g,
     stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g;
 
 var setCommentRe = /^ *[*/]+ */,
+    setCommentAltRe = /^\s*\*?\/*/,
     setCommentSplitRe = /\n/g,
     whitespaceRe = /\s/,
     unescapeRe = /\\(.?)/g;
@@ -92,9 +93,10 @@ tokenize.unescape = unescape;
 /**
  * Tokenizes the given .proto source and returns an object with useful utility functions.
  * @param {string} source Source contents
+ * @param {boolean} alternateCommentMode Whether we should activate alternate comment parsing mode.
  * @returns {ITokenizerHandle} Tokenizer handle
  */
-function tokenize(source) {
+function tokenize(source, alternateCommentMode) {
     /* eslint-disable callback-return */
     source = source.toString();
 
@@ -159,10 +161,17 @@ function tokenize(source) {
         commentType = source.charAt(start++);
         commentLine = line;
         commentLineEmpty = false;
-        var offset = start - 3, // "///" or "/**"
+        var lookback;
+        if (alternateCommentMode) {
+            lookback = 2;  // alternate comment parsing: "//" or "/*"
+        } else {
+            lookback = 3;  // "///" or "/**"
+        }
+        var commentOffset = start - lookback,
             c;
         do {
-            if (--offset < 0 || (c = source.charAt(offset)) === "\n") {
+            if (--commentOffset < 0 ||
+                    (c = source.charAt(commentOffset)) === "\n") {
                 commentLineEmpty = true;
                 break;
             }
@@ -171,12 +180,34 @@ function tokenize(source) {
             .substring(start, end)
             .split(setCommentSplitRe);
         for (var i = 0; i < lines.length; ++i)
-            lines[i] = lines[i].replace(setCommentRe, "").trim();
+            lines[i] = lines[i]
+                .replace(alternateCommentMode ? setCommentAltRe : setCommentRe, "")
+                .trim();
         commentText = lines
             .join("\n")
             .trim();
     }
 
+    function isDoubleSlashCommentLine(startOffset) {
+        var endOffset = findEndOfLine(startOffset);
+
+        // see if remaining line matches comment pattern
+        var lineText = source.substring(startOffset, endOffset);
+        // look for 1 or 2 slashes since startOffset would already point past
+        // the first slash that started the comment.
+        var isComment = /^\s*\/{1,2}/.test(lineText);
+        return isComment;
+    }
+
+    function findEndOfLine(cursor) {
+        // find end of cursor's line
+        var endOffset = cursor;
+        while (endOffset < length && charAt(endOffset) !== '\n') {
+            endOffset++;
+        }
+        return endOffset;
+    }
+
     /**
      * Obtains the next token.
      * @returns {string|null} Next token or `null` on eof
@@ -202,35 +233,93 @@ function tokenize(source) {
                 if (++offset === length)
                     return null;
             }
+
             if (charAt(offset) === "/") {
-                if (++offset === length)
+                if (++offset === length) {
                     throw illegal("comment");
-                if (charAt(offset) === "/") { // Line
-                    isDoc = charAt(start = offset + 1) === "/";
-                    while (charAt(++offset) !== "\n")
-                        if (offset === length)
-                            return null;
-                    ++offset;
-                    if (isDoc) /// Comment
-                        setComment(start, offset - 1);
-                    ++line;
-                    repeat = true;
-                } else if ((curr = charAt(offset)) === "*") { /* Block */
-                    isDoc = charAt(start = offset + 1) === "*";
-                    do {
-                        if (curr === "\n")
-                            ++line;
-                        if (++offset === length)
-                            throw illegal("comment");
-                        prev = curr;
-                        curr = charAt(offset);
-                    } while (prev !== "*" || curr !== "/");
-                    ++offset;
-                    if (isDoc) /** Comment */
+                }
+                if (!alternateCommentMode) {
+                    // standard comment parsing
+                    if (charAt(offset) === "/") { // Line
+                        // check for triple-slash comment
+                        isDoc = charAt(start = offset + 1) === "/";
+
+                        while (charAt(++offset) !== "\n") {
+                            if (offset === length) {
+                                return null;
+                            }
+                        }
+                        ++offset;
+                        if (isDoc) {
+                            setComment(start, offset - 1);
+                        }
+                        ++line;
+                        repeat = true;
+                    } else if ((curr = charAt(offset)) === "*") { /* Block */
+                        // check for /** doc comment
+                        isDoc = charAt(start = offset + 1) === "*";
+                        do {
+                            if (curr === "\n") {
+                                ++line;
+                            }
+                            if (++offset === length) {
+                                throw illegal("comment");
+                            }
+                            prev = curr;
+                            curr = charAt(offset);
+                        } while (prev !== "*" || curr !== "/");
+                        ++offset;
+                        if (isDoc) { /** Comment */
+                            setComment(start, offset - 2);
+                        }
+                        repeat = true;
+                    } else {
+                        return "/";
+                    }
+                } else {
+                    // alternate comment parsing
+                    // check for double-slash comments, coalescing consecutive
+                    // lines into one multi-line comment.
+                    if (charAt(offset) === "/") {
+                        start = offset;
+                        isDoc = false;
+                        if (isDoubleSlashCommentLine(offset)) {
+                            isDoc = true;
+                            do {
+                                offset = findEndOfLine(offset);
+                                if (offset === length) {
+                                    break;
+                                }
+                                offset++;
+                            } while (isDoubleSlashCommentLine(offset));
+                        } else {
+                            offset = Math.min(length, findEndOfLine(offset) + 1);
+                        }
+                        if (isDoc) {
+                            setComment(start, offset);
+                        }
+                        line++;
+                        repeat = true;
+                    } else if ((curr = charAt(offset)) === "*") { /* Block */
+                        // found /* doc comment
+                        start = offset + 1;
+                        do {
+                            if (curr === "\n") {
+                                ++line;
+                            }
+                            if (++offset === length) {
+                                throw illegal("comment");
+                            }
+                            prev = curr;
+                            curr = charAt(offset);
+                        } while (prev !== "*" || curr !== "/");
+                        ++offset;
                         setComment(start, offset - 2);
-                    repeat = true;
-                } else
-                    return "/";
+                        repeat = true;
+                    } else {
+                        return "/";
+                    }
+                }
             }
         } while (repeat);
 
@@ -301,15 +390,33 @@ function tokenize(source) {
      */
     function cmnt(trailingLine) {
         var ret = null;
-        if (trailingLine === undefined) {
-            if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty))
-                ret = commentText;
+        if (alternateCommentMode) {
+            if (trailingLine === undefined) {
+                if (commentLine === line - 1) {
+                    ret = commentText;
+                }
+            } else {
+                if (commentLine < trailingLine) {
+                    peek();
+                }
+                if (commentLine === trailingLine && !commentLineEmpty) {
+                    ret = commentText;
+                }
+            }
         } else {
-            /* istanbul ignore else */
-            if (commentLine < trailingLine)
-                peek();
-            if (commentLine === trailingLine && !commentLineEmpty && commentType === "/")
-                ret = commentText;
+            if (trailingLine === undefined) {
+                if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) {
+                    ret = commentText;
+                }
+            } else {
+                /* istanbul ignore else */
+                if (commentLine < trailingLine) {
+                    peek();
+                }
+                if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") {
+                    ret = commentText;
+                }
+            }
         }
         return ret;
     }
diff --git a/tests/data/comments-alternate-parse.proto b/tests/data/comments-alternate-parse.proto
new file mode 100644
index 000000000..4d01f672f
--- /dev/null
+++ b/tests/data/comments-alternate-parse.proto
@@ -0,0 +1,73 @@
+/**
+ * File with alternate comment syntax.
+ * This file uses double slash and regular star-slash comment styles for doc
+ * strings.
+ */
+
+syntax = "proto3";
+
+// Message with
+// a
+// multi-line comment.
+message Test1 {
+
+    /**
+     * Field with a doc-block comment.
+     */
+    string field1 = 1;
+
+    // Field with a single-line comment starting with two slashes.
+    uint32 field2 = 2;
+
+    /// Field with a single-line comment starting with three slashes.
+    bool field3 = 3;
+
+    /* Field with a single-line slash-star comment. */
+    bool field4 = 4;
+
+    bool field5 = 5; // Field with a trailing single-line two-slash comment.
+
+    bool field6 = 6; /// Field with a trailing single-line three-slash comment.
+
+    bool field7 = 7; /* Field with a trailing single-line slash-star comment. */
+
+    bool field8 = 8;
+
+    // Field with a
+    // multi-line comment.
+    bool field9 = 9;
+
+    /**
+     * Field with a
+     * multi-line doc-block comment.
+     */
+    string field10 = 10;
+}
+
+/* Message
+   with
+   a multiline plain slash-star
+   comment.
+*/
+message Test2 {
+}
+
+/*
+ * Message
+ * with
+ * a
+ * comment and stars.
+ */
+enum Test3 {
+
+    /** Value with a comment. */
+    ONE = 1;
+
+    // Value with a single-line comment.
+    TWO = 2;
+
+    /// Value with a triple-slash comment.
+    THREE = 3;  // ignored
+
+    FOUR = 4; /// Other value with a comment.
+}
diff --git a/tests/docs_comments_alternate_parse.js b/tests/docs_comments_alternate_parse.js
new file mode 100644
index 000000000..5793ef091
--- /dev/null
+++ b/tests/docs_comments_alternate_parse.js
@@ -0,0 +1,35 @@
+var tape = require("tape");
+
+var protobuf = require("..");
+
+tape.test("proto comments in alternate-parse mode", function(test) {
+    test.plan(17);
+    var options = {alternateCommentMode: true};
+    var root = new protobuf.Root();
+    root.load("tests/data/comments-alternate-parse.proto", options, function(err, root) {
+        if (err)
+            throw test.fail(err.message);
+
+        test.equal(root.lookup("Test1").comment, "Message with\na\nmulti-line comment.", "should parse double-slash multiline comment");
+        test.equal(root.lookup("Test2").comment, "Message\nwith\na multiline plain slash-star\ncomment.", "should parse slash-star multiline comment");
+        test.equal(root.lookup("Test3").comment, "Message\nwith\na\ncomment and stars.", "should parse doc-block multiline comment");
+
+        test.equal(root.lookup("Test1.field1").comment, "Field with a doc-block comment.", "should parse doc-block field comment");
+        test.equal(root.lookup("Test1.field2").comment, "Field with a single-line comment starting with two slashes.", "should parse double-slash field comment");
+        test.equal(root.lookup("Test1.field3").comment, "Field with a single-line comment starting with three slashes.", "should parse triple-slash field comment");
+        test.equal(root.lookup("Test1.field4").comment, "Field with a single-line slash-star comment.", "should parse single-line slash-star field comment");
+        test.equal(root.lookup("Test1.field5").comment, "Field with a trailing single-line two-slash comment.", "should parse trailing double-slash comment");
+        test.equal(root.lookup("Test1.field6").comment, "Field with a trailing single-line three-slash comment.", "should parse trailing triple-slash comment");
+        test.equal(root.lookup("Test1.field7").comment, "Field with a trailing single-line slash-star comment.", "should parse trailing slash-star comment");
+        test.equal(root.lookup("Test1.field8").comment, null, "should parse no comment");
+        test.equal(root.lookup("Test1.field9").comment, "Field with a\nmulti-line comment.", "should parse multiline double-slash field comment");
+        test.equal(root.lookup("Test1.field10").comment, "Field with a\nmulti-line doc-block comment.", "should parse multiline doc-block field comment");
+
+        test.equal(root.lookup("Test3").comments.ONE, "Value with a comment.", "should parse blocks for enum values");
+        test.equal(root.lookup("Test3").comments.TWO, "Value with a single-line comment.", "should parse double-slash comments for enum values");
+        test.equal(root.lookup("Test3").comments.THREE, "Value with a triple-slash comment.", "should parse lines for enum values and prefer on top over trailing");
+        test.equal(root.lookup("Test3").comments.FOUR, "Other value with a comment.", "should not confuse previous trailing comments with comments for the next field");
+
+        test.end();
+    });
+});

From 00a19adf042ba89554ec350b67c2d28277d65eb3 Mon Sep 17 00:00:00 2001
From: gideong <gideong@google.com>
Date: Thu, 18 Jan 2018 12:57:24 -0800
Subject: [PATCH 2/2] Fixing codeClimate errors and refactoring.

---
 src/tokenize.js | 103 ++++++++++++++++--------------------------------
 1 file changed, 33 insertions(+), 70 deletions(-)

diff --git a/src/tokenize.js b/src/tokenize.js
index 86e5598f9..b939ef289 100644
--- a/src/tokenize.js
+++ b/src/tokenize.js
@@ -202,7 +202,7 @@ function tokenize(source, alternateCommentMode) {
     function findEndOfLine(cursor) {
         // find end of cursor's line
         var endOffset = cursor;
-        while (endOffset < length && charAt(endOffset) !== '\n') {
+        while (endOffset < length && charAt(endOffset) !== "\n") {
             endOffset++;
         }
         return endOffset;
@@ -238,9 +238,8 @@ function tokenize(source, alternateCommentMode) {
                 if (++offset === length) {
                     throw illegal("comment");
                 }
-                if (!alternateCommentMode) {
-                    // standard comment parsing
-                    if (charAt(offset) === "/") { // Line
+                if (charAt(offset) === "/") { // Line
+                    if (!alternateCommentMode) {
                         // check for triple-slash comment
                         isDoc = charAt(start = offset + 1) === "/";
 
@@ -255,32 +254,8 @@ function tokenize(source, alternateCommentMode) {
                         }
                         ++line;
                         repeat = true;
-                    } else if ((curr = charAt(offset)) === "*") { /* Block */
-                        // check for /** doc comment
-                        isDoc = charAt(start = offset + 1) === "*";
-                        do {
-                            if (curr === "\n") {
-                                ++line;
-                            }
-                            if (++offset === length) {
-                                throw illegal("comment");
-                            }
-                            prev = curr;
-                            curr = charAt(offset);
-                        } while (prev !== "*" || curr !== "/");
-                        ++offset;
-                        if (isDoc) { /** Comment */
-                            setComment(start, offset - 2);
-                        }
-                        repeat = true;
                     } else {
-                        return "/";
-                    }
-                } else {
-                    // alternate comment parsing
-                    // check for double-slash comments, coalescing consecutive
-                    // lines into one multi-line comment.
-                    if (charAt(offset) === "/") {
+                        // check for double-slash comments, consolidating consecutive lines
                         start = offset;
                         isDoc = false;
                         if (isDoubleSlashCommentLine(offset)) {
@@ -300,25 +275,28 @@ function tokenize(source, alternateCommentMode) {
                         }
                         line++;
                         repeat = true;
-                    } else if ((curr = charAt(offset)) === "*") { /* Block */
-                        // found /* doc comment
-                        start = offset + 1;
-                        do {
-                            if (curr === "\n") {
-                                ++line;
-                            }
-                            if (++offset === length) {
-                                throw illegal("comment");
-                            }
-                            prev = curr;
-                            curr = charAt(offset);
-                        } while (prev !== "*" || curr !== "/");
-                        ++offset;
+                    }
+                } else if ((curr = charAt(offset)) === "*") { /* Block */
+                    // check for /** (regular comment mode) or /* (alternate comment mode)
+                    start = offset + 1;
+                    isDoc = alternateCommentMode || charAt(start) === "*";
+                    do {
+                        if (curr === "\n") {
+                            ++line;
+                        }
+                        if (++offset === length) {
+                            throw illegal("comment");
+                        }
+                        prev = curr;
+                        curr = charAt(offset);
+                    } while (prev !== "*" || curr !== "/");
+                    ++offset;
+                    if (isDoc) {
                         setComment(start, offset - 2);
-                        repeat = true;
-                    } else {
-                        return "/";
                     }
+                    repeat = true;
+                } else {
+                    return "/";
                 }
             }
         } while (repeat);
@@ -390,32 +368,17 @@ function tokenize(source, alternateCommentMode) {
      */
     function cmnt(trailingLine) {
         var ret = null;
-        if (alternateCommentMode) {
-            if (trailingLine === undefined) {
-                if (commentLine === line - 1) {
-                    ret = commentText;
-                }
-            } else {
-                if (commentLine < trailingLine) {
-                    peek();
-                }
-                if (commentLine === trailingLine && !commentLineEmpty) {
-                    ret = commentText;
-                }
+        if (trailingLine === undefined) {
+            if (commentLine === line - 1 && (alternateCommentMode || commentType === "*" || commentLineEmpty)) {
+                ret = commentText;
             }
         } else {
-            if (trailingLine === undefined) {
-                if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) {
-                    ret = commentText;
-                }
-            } else {
-                /* istanbul ignore else */
-                if (commentLine < trailingLine) {
-                    peek();
-                }
-                if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") {
-                    ret = commentText;
-                }
+            /* istanbul ignore else */
+            if (commentLine < trailingLine) {
+                peek();
+            }
+            if (commentLine === trailingLine && !commentLineEmpty && (alternateCommentMode || commentType === "/")) {
+                ret = commentText;
             }
         }
         return ret;