Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds a new option to IParseOptions, alternateComment. #968

Merged
merged 2 commits into from
Feb 6, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,12 @@ export interface IParseOptions {

/** Keeps field casing instead of converting to camel case */
keepCase?: boolean;

/**
* Turns on an alternate comment parsing mode that preserves double-slash
* and slash-star comments as documentation.
*/
alternateCommentMode?: boolean;
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ function parse(source, root, options) {
if (!options)
options = parse.defaults;

var tn = tokenize(source),
var tn = tokenize(source, options.alternateCommentMode || false),
next = tn.next,
push = tn.push,
peek = tn.peek,
Expand Down
183 changes: 145 additions & 38 deletions src/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ var delimRe = /[\s{}=;:[\],'"()<>]/g,
stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g;

var setCommentRe = /^ *[*/]+ */,
setCommentAltRe = /^\s*\*?\/*/,
setCommentSplitRe = /\n/g,
whitespaceRe = /\s/,
unescapeRe = /\\(.?)/g;
Expand Down Expand Up @@ -92,9 +93,10 @@ tokenize.unescape = unescape;
/**
* Tokenizes the given .proto source and returns an object with useful utility functions.
* @param {string} source Source contents
* @param {boolean} alternateCommentMode Whether we should activate alternate comment parsing mode.
* @returns {ITokenizerHandle} Tokenizer handle
*/
function tokenize(source) {
function tokenize(source, alternateCommentMode) {
/* eslint-disable callback-return */
source = source.toString();

Expand Down Expand Up @@ -159,10 +161,17 @@ function tokenize(source) {
commentType = source.charAt(start++);
commentLine = line;
commentLineEmpty = false;
var offset = start - 3, // "///" or "/**"
var lookback;
if (alternateCommentMode) {
lookback = 2; // alternate comment parsing: "//" or "/*"
} else {
lookback = 3; // "///" or "/**"
}
var commentOffset = start - lookback,
c;
do {
if (--offset < 0 || (c = source.charAt(offset)) === "\n") {
if (--commentOffset < 0 ||
(c = source.charAt(commentOffset)) === "\n") {
commentLineEmpty = true;
break;
}
Expand All @@ -171,12 +180,34 @@ function tokenize(source) {
.substring(start, end)
.split(setCommentSplitRe);
for (var i = 0; i < lines.length; ++i)
lines[i] = lines[i].replace(setCommentRe, "").trim();
lines[i] = lines[i]
.replace(alternateCommentMode ? setCommentAltRe : setCommentRe, "")
.trim();
commentText = lines
.join("\n")
.trim();
}

function isDoubleSlashCommentLine(startOffset) {
var endOffset = findEndOfLine(startOffset);

// see if remaining line matches comment pattern
var lineText = source.substring(startOffset, endOffset);
// look for 1 or 2 slashes since startOffset would already point past
// the first slash that started the comment.
var isComment = /^\s*\/{1,2}/.test(lineText);
return isComment;
}

function findEndOfLine(cursor) {
// find end of cursor's line
var endOffset = cursor;
while (endOffset < length && charAt(endOffset) !== '\n') {
endOffset++;
}
return endOffset;
}

/**
* Obtains the next token.
* @returns {string|null} Next token or `null` on eof
Expand All @@ -202,35 +233,93 @@ function tokenize(source) {
if (++offset === length)
return null;
}

if (charAt(offset) === "/") {
if (++offset === length)
if (++offset === length) {
throw illegal("comment");
if (charAt(offset) === "/") { // Line
isDoc = charAt(start = offset + 1) === "/";
while (charAt(++offset) !== "\n")
if (offset === length)
return null;
++offset;
if (isDoc) /// Comment
setComment(start, offset - 1);
++line;
repeat = true;
} else if ((curr = charAt(offset)) === "*") { /* Block */
isDoc = charAt(start = offset + 1) === "*";
do {
if (curr === "\n")
++line;
if (++offset === length)
throw illegal("comment");
prev = curr;
curr = charAt(offset);
} while (prev !== "*" || curr !== "/");
++offset;
if (isDoc) /** Comment */
}
if (!alternateCommentMode) {
// standard comment parsing
if (charAt(offset) === "/") { // Line
// check for triple-slash comment
isDoc = charAt(start = offset + 1) === "/";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of doing something like

isDoc = (charAt(start = offset) === "/" && ++offset) || alternateCommentMode;

here so it always handles /// and advances properly, and otherwise falls back to // in alternateCommentMode? Just asking because it seems that duplicating this section could potentially be avoided.

I'd say it would be fine if /// in non-alternateCommentMode also coalesces consecutive lines.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for looking. I agree that avoiding duplication would be good, however I am hesitant to change existing behavior in case other consumers of this library depend on it.

In the spirit of your comment, I consolidated the /* handling for original and alternate parsing modes. The ////// handling is a bit too divergent to consolidate further.


while (charAt(++offset) !== "\n") {
if (offset === length) {
return null;
}
}
++offset;
if (isDoc) {
setComment(start, offset - 1);
}
++line;
repeat = true;
} else if ((curr = charAt(offset)) === "*") { /* Block */
// check for /** doc comment
isDoc = charAt(start = offset + 1) === "*";
do {
if (curr === "\n") {
++line;
}
if (++offset === length) {
throw illegal("comment");
}
prev = curr;
curr = charAt(offset);
} while (prev !== "*" || curr !== "/");
++offset;
if (isDoc) { /** Comment */
setComment(start, offset - 2);
}
repeat = true;
} else {
return "/";
}
} else {
// alternate comment parsing
// check for double-slash comments, coalescing consecutive
// lines into one multi-line comment.
if (charAt(offset) === "/") {
start = offset;
isDoc = false;
if (isDoubleSlashCommentLine(offset)) {
isDoc = true;
do {
offset = findEndOfLine(offset);
if (offset === length) {
break;
}
offset++;
} while (isDoubleSlashCommentLine(offset));
} else {
offset = Math.min(length, findEndOfLine(offset) + 1);
}
if (isDoc) {
setComment(start, offset);
}
line++;
repeat = true;
} else if ((curr = charAt(offset)) === "*") { /* Block */
// found /* doc comment
start = offset + 1;
do {
if (curr === "\n") {
++line;
}
if (++offset === length) {
throw illegal("comment");
}
prev = curr;
curr = charAt(offset);
} while (prev !== "*" || curr !== "/");
++offset;
setComment(start, offset - 2);
repeat = true;
} else
return "/";
repeat = true;
} else {
return "/";
}
}
}
} while (repeat);

Expand Down Expand Up @@ -301,15 +390,33 @@ function tokenize(source) {
*/
function cmnt(trailingLine) {
var ret = null;
if (trailingLine === undefined) {
if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty))
ret = commentText;
if (alternateCommentMode) {
if (trailingLine === undefined) {
if (commentLine === line - 1) {
ret = commentText;
}
} else {
if (commentLine < trailingLine) {
peek();
}
if (commentLine === trailingLine && !commentLineEmpty) {
ret = commentText;
}
}
} else {
/* istanbul ignore else */
if (commentLine < trailingLine)
peek();
if (commentLine === trailingLine && !commentLineEmpty && commentType === "/")
ret = commentText;
if (trailingLine === undefined) {
if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) {
ret = commentText;
}
} else {
/* istanbul ignore else */
if (commentLine < trailingLine) {
peek();
}
if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") {
ret = commentText;
}
}
}
return ret;
}
Expand Down
73 changes: 73 additions & 0 deletions tests/data/comments-alternate-parse.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* File with alternate comment syntax.
* This file uses double slash and regular star-slash comment styles for doc
* strings.
*/

syntax = "proto3";

// Message with
// a
// multi-line comment.
message Test1 {

/**
* Field with a doc-block comment.
*/
string field1 = 1;

// Field with a single-line comment starting with two slashes.
uint32 field2 = 2;

/// Field with a single-line comment starting with three slashes.
bool field3 = 3;

/* Field with a single-line slash-star comment. */
bool field4 = 4;

bool field5 = 5; // Field with a trailing single-line two-slash comment.

bool field6 = 6; /// Field with a trailing single-line three-slash comment.

bool field7 = 7; /* Field with a trailing single-line slash-star comment. */

bool field8 = 8;

// Field with a
// multi-line comment.
bool field9 = 9;

/**
* Field with a
* multi-line doc-block comment.
*/
string field10 = 10;
}

/* Message
with
a multiline plain slash-star
comment.
*/
message Test2 {
}

/*
* Message
* with
* a
* comment and stars.
*/
enum Test3 {

/** Value with a comment. */
ONE = 1;

// Value with a single-line comment.
TWO = 2;

/// Value with a triple-slash comment.
THREE = 3; // ignored

FOUR = 4; /// Other value with a comment.
}
35 changes: 35 additions & 0 deletions tests/docs_comments_alternate_parse.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
var tape = require("tape");

var protobuf = require("..");

tape.test("proto comments in alternate-parse mode", function(test) {
test.plan(17);
var options = {alternateCommentMode: true};
var root = new protobuf.Root();
root.load("tests/data/comments-alternate-parse.proto", options, function(err, root) {
if (err)
throw test.fail(err.message);

test.equal(root.lookup("Test1").comment, "Message with\na\nmulti-line comment.", "should parse double-slash multiline comment");
test.equal(root.lookup("Test2").comment, "Message\nwith\na multiline plain slash-star\ncomment.", "should parse slash-star multiline comment");
test.equal(root.lookup("Test3").comment, "Message\nwith\na\ncomment and stars.", "should parse doc-block multiline comment");

test.equal(root.lookup("Test1.field1").comment, "Field with a doc-block comment.", "should parse doc-block field comment");
test.equal(root.lookup("Test1.field2").comment, "Field with a single-line comment starting with two slashes.", "should parse double-slash field comment");
test.equal(root.lookup("Test1.field3").comment, "Field with a single-line comment starting with three slashes.", "should parse triple-slash field comment");
test.equal(root.lookup("Test1.field4").comment, "Field with a single-line slash-star comment.", "should parse single-line slash-star field comment");
test.equal(root.lookup("Test1.field5").comment, "Field with a trailing single-line two-slash comment.", "should parse trailing double-slash comment");
test.equal(root.lookup("Test1.field6").comment, "Field with a trailing single-line three-slash comment.", "should parse trailing triple-slash comment");
test.equal(root.lookup("Test1.field7").comment, "Field with a trailing single-line slash-star comment.", "should parse trailing slash-star comment");
test.equal(root.lookup("Test1.field8").comment, null, "should parse no comment");
test.equal(root.lookup("Test1.field9").comment, "Field with a\nmulti-line comment.", "should parse multiline double-slash field comment");
test.equal(root.lookup("Test1.field10").comment, "Field with a\nmulti-line doc-block comment.", "should parse multiline doc-block field comment");

test.equal(root.lookup("Test3").comments.ONE, "Value with a comment.", "should parse blocks for enum values");
test.equal(root.lookup("Test3").comments.TWO, "Value with a single-line comment.", "should parse double-slash comments for enum values");
test.equal(root.lookup("Test3").comments.THREE, "Value with a triple-slash comment.", "should parse lines for enum values and prefer on top over trailing");
test.equal(root.lookup("Test3").comments.FOUR, "Other value with a comment.", "should not confuse previous trailing comments with comments for the next field");

test.end();
});
});