diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/BackslashChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/BackslashChannel.java index 8fa45142a1..55ffbdd01c 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/BackslashChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/BackslashChannel.java @@ -25,21 +25,23 @@ public class BackslashChannel extends Channel { - private static boolean isNewLine(char ch) { - return (ch == '\n') || (ch == '\r'); - } + private final StringBuilder sb = new StringBuilder(256); @Override public boolean consume(CodeReader code, Lexer output) { - var ch = (char) code.peek(); - - if ((ch == '\\') && isNewLine(code.charAt(1))) { - // just throw away the backslash - code.pop(); - return true; + if (code.charAt(0) != '\\') { + return false; } - return false; + var lineSplicing = read(code, sb); + sb.delete(0, sb.length()); + return lineSplicing != 0; + } + + public static int read(CodeReader code, StringBuilder sb) { + var end = ChannelUtils.handleLineSplicing(code, 0); + code.skip(end); // remove line splicing + return end; } } diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/ChannelUtils.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/ChannelUtils.java new file mode 100644 index 0000000000..7f06b716eb --- /dev/null +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/ChannelUtils.java @@ -0,0 +1,69 @@ +/* + * C++ Community Plugin (cxx plugin) + * Copyright (C) 2010-2023 SonarOpenCommunity + * http://github.com/SonarOpenCommunity/sonar-cxx + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.cxx.channels; + +import org.sonar.cxx.sslr.channel.CodeReader; + +public class ChannelUtils { + + public static final char LF = '\n'; + public static final char CR = '\r'; + public static final char EOF = (char) -1; + + private ChannelUtils() { + // empty + } + + public static boolean isNewLine(char ch) { + return (ch == LF) || (ch == CR); + } + + public static boolean isWhitespace(char ch) { + return (ch == ' ') || (ch == '\t'); + } + + public static boolean isSuffix(char c) { + return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_'); + } + + public static int handleLineSplicing(CodeReader code, int start) { + int next = start; + if (code.charAt(next) != '\\') { + return 0; + } + + boolean newline = false; + next++; + while (true) { + var charAt = code.charAt(next); + if (ChannelUtils.isNewLine(charAt)) { + newline = true; + break; + } + if (!ChannelUtils.isWhitespace(charAt)) { + break; + } + next++; + } + + return newline ? (next - start + 1) : 0; + } + +} diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java index 3d7d9dd6fd..b8d54743f0 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java @@ -30,12 +30,10 @@ */ public class CharacterLiteralsChannel extends Channel { - private static final char EOF = (char) -1; - private final StringBuilder sb = new StringBuilder(256); private int index = 0; - private char ch = ' '; + private char charAt = ' '; @Override public boolean consume(CodeReader code, Lexer output) { @@ -43,7 +41,7 @@ public boolean consume(CodeReader code, Lexer output) { int column = code.getColumnPosition(); index = 0; readPrefix(code); - if (ch != '\'') { + if (charAt != '\'') { return false; } if (!read(code)) { @@ -66,8 +64,8 @@ public boolean consume(CodeReader code, Lexer output) { private boolean read(CodeReader code) { index++; - while (code.charAt(index) != ch) { - if (code.charAt(index) == EOF) { + while (code.charAt(index) != charAt) { + if (code.charAt(index) == ChannelUtils.EOF) { return false; } if (code.charAt(index) == '\\') { @@ -81,13 +79,13 @@ private boolean read(CodeReader code) { } private void readPrefix(CodeReader code) { - ch = code.charAt(index); - if ((ch == 'u') || (ch == 'U') || ch == 'L') { + charAt = code.charAt(index); + if ((charAt == 'u') || (charAt == 'U') || charAt == 'L') { index++; - if (ch == 'u' && code.charAt(index) == '8') { + if (charAt == 'u' && code.charAt(index) == '8') { index++; } - ch = code.charAt(index); + charAt = code.charAt(index); } } @@ -95,10 +93,10 @@ private void readUdSuffix(CodeReader code) { int len = 0; for (int start_index = index;; index++) { var charAt = code.charAt(index); - if (charAt == EOF) { + if (charAt == ChannelUtils.EOF) { return; } - if (isSuffix(charAt)) { + if (ChannelUtils.isSuffix(charAt)) { len++; } else if (Character.isDigit(charAt)) { if (len > 0) { @@ -113,8 +111,4 @@ private void readUdSuffix(CodeReader code) { } } - private static boolean isSuffix(char c) { - return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_'); - } - } diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/KeywordChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/KeywordChannel.java index c22349eb8c..37e4389a18 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/KeywordChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/KeywordChannel.java @@ -38,8 +38,7 @@ public class KeywordChannel extends Channel { private final Matcher matcher; private final Token.Builder tokenBuilder = Token.builder(); - public KeywordChannel(String regexp, TokenType[] - ... keywordSets) { + public KeywordChannel(String regexp, TokenType[]... keywordSets) { for (var keywords : keywordSets) { for (var keyword : keywords) { keywordsMap.put(keyword.getValue(), keyword); diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/MultiLineCommentChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/MultiLineCommentChannel.java new file mode 100644 index 0000000000..c1f1e5947e --- /dev/null +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/MultiLineCommentChannel.java @@ -0,0 +1,111 @@ +/* + * C++ Community Plugin (cxx plugin) + * Copyright (C) 2010-2023 SonarOpenCommunity + * http://github.com/SonarOpenCommunity/sonar-cxx + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.cxx.channels; + +import static com.sonar.cxx.sslr.api.GenericTokenType.COMMENT; +import com.sonar.cxx.sslr.api.Token; +import com.sonar.cxx.sslr.api.Trivia; +import com.sonar.cxx.sslr.impl.Lexer; +import org.sonar.cxx.sslr.channel.Channel; +import org.sonar.cxx.sslr.channel.CodeReader; + +public class MultiLineCommentChannel extends Channel { + + private final StringBuilder sb = new StringBuilder(256); + private final Token.Builder tokenBuilder = Token.builder(); + + @Override + public boolean consume(CodeReader code, Lexer lexer) { + // start of multi line comment? + int next = isComment(code); + if (next == 0) { + return false; + } + + int line = code.getLinePosition(); + int column = code.getColumnPosition(); + + code.skip(next); + sb.append('/'); + sb.append('*'); + + read(code, sb); // search end of multi line comment + + var value = sb.toString(); + var token = tokenBuilder + .setType(COMMENT) + .setValueAndOriginalValue(value) + .setURI(lexer.getURI()) + .setLine(line) + .setColumn(column) + .build(); + + lexer.addTrivia(Trivia.createComment(token)); + sb.delete(0, sb.length()); + return true; + } + + public static int isComment(CodeReader code) { + int next = 0; + + // start of multi line comment? + if (code.charAt(next) != '/') { + return 0; + } + next += 1; + next += ChannelUtils.handleLineSplicing(code, next); + + if (code.charAt(next) != '*') { + return 0; + } + next += 1; + return next; + } + + public static boolean read(CodeReader code, StringBuilder sb) { + boolean first = false; + while (true) { // search end of multi line comment: */ + var end = ChannelUtils.handleLineSplicing(code, 0); + code.skip(end); // remove line splicing + + var charAt = (char) code.pop(); + switch (charAt) { + case '*': + first = true; + break; + case '/': + if (first) { + sb.append('/'); + return true; + } + break; + case ChannelUtils.EOF: + return false; + default: + first = false; + break; + } + + sb.append(charAt); + } + + } + +} diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java index 0f8822d4bb..2b5421f564 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java @@ -34,9 +34,9 @@ // public class PreprocessorChannel extends Channel { - private static final char EOF = (char) -1; private final StringLiteralsChannel stringLiteralsChannel = new StringLiteralsChannel(); private final StringBuilder sb = new StringBuilder(256); + private final StringBuilder dummy = new StringBuilder(256); private final Matcher matcher; public PreprocessorChannel(TokenType[]... keywordSets) { @@ -84,67 +84,46 @@ public boolean consume(CodeReader code, Lexer output) { private void read(CodeReader code) { while (true) { - var ch = code.charAt(0); - if (isNewline(ch) || ch == EOF) { + var charAt = code.charAt(0); + if (ChannelUtils.isNewLine(charAt) || charAt == ChannelUtils.EOF) { code.pop(); break; - } else if (stringLiteralsChannel.read(code, sb)) { + } else if (stringLiteralsChannel.read(code, sb)) { // string literal continue; } - ch = (char) code.pop(); - if (ch == '/' && code.charAt(0) == '/') { - consumeSingleLineComment(code); - } else if (ch == '/' && code.charAt(0) == '*') { - consumeMultiLineComment(code); - } else if (ch == '\\' && isNewline((char) code.peek())) { - // the newline is escaped: we have a the multi line preprocessor directive - // consume both the backslash and the newline, insert a space instead - consumeNewline(code); - sb.append(' '); - } else { - sb.append(ch); - } - } - } - private static void consumeNewline(CodeReader code) { - if ((code.charAt(0) == '\r') && (code.charAt(1) == '\n')) { - // \r\n - code.pop(); - code.pop(); - } else { - // \r or \n - code.pop(); - } - } - - private static void consumeSingleLineComment(CodeReader code) { - code.pop(); // initial '/' - while (true) { - var charAt = code.charAt(0); - if (isNewline(charAt) || charAt == EOF) { - break; + var len = 0; + switch (charAt) { + case '/': // comment? + len = SingleLineCommentChannel.isComment(code); + if (len != 0) { + // single line comment + code.skip(len); + SingleLineCommentChannel.read(code, dummy); + dummy.delete(0, dummy.length()); + } else { + len = MultiLineCommentChannel.isComment(code); + if (len != 0) { + // multi line comment + code.skip(len); + MultiLineCommentChannel.read(code, dummy); + dummy.delete(0, dummy.length()); + } + } + break; + case '\\': + len = BackslashChannel.read(code, dummy); + if (len != 0) { + // consume backslash and the newline + dummy.delete(0, dummy.length()); + } + break; } - code.pop(); - } - } - private static void consumeMultiLineComment(CodeReader code) { - code.pop(); // initial '*' - while (true) { - var ch = (char) code.pop(); - if (ch == EOF) { - return; - } - if (ch == '*' && code.charAt(0) == '/') { - code.pop(); - return; + if (len == 0) { + sb.append((char) code.pop()); } } } - private static boolean isNewline(char ch) { - return (ch == '\n') || (ch == '\r'); - } - } diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/RightAngleBracketsChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/RightAngleBracketsChannel.java index 49cc23070f..7c5f33cd81 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/RightAngleBracketsChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/RightAngleBracketsChannel.java @@ -48,10 +48,9 @@ public class RightAngleBracketsChannel extends Channel { @Override public boolean consume(CodeReader code, Lexer output) { - var ch = (char) code.peek(); var consumed = false; - switch (ch) { + switch (code.charAt(0)) { case '(': if (angleBracketLevel > 0) { parentheseLevel++; diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/SingleLineCommentChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/SingleLineCommentChannel.java new file mode 100644 index 0000000000..a17af4bf68 --- /dev/null +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/SingleLineCommentChannel.java @@ -0,0 +1,98 @@ +/* + * C++ Community Plugin (cxx plugin) + * Copyright (C) 2010-2023 SonarOpenCommunity + * http://github.com/SonarOpenCommunity/sonar-cxx + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.cxx.channels; + +import static com.sonar.cxx.sslr.api.GenericTokenType.COMMENT; +import com.sonar.cxx.sslr.api.Token; +import com.sonar.cxx.sslr.api.Trivia; +import com.sonar.cxx.sslr.impl.Lexer; +import org.sonar.cxx.sslr.channel.Channel; +import org.sonar.cxx.sslr.channel.CodeReader; + +public class SingleLineCommentChannel extends Channel { + + private final StringBuilder sb = new StringBuilder(256); + private final Token.Builder tokenBuilder = Token.builder(); + + @Override + public boolean consume(CodeReader code, Lexer lexer) { + // start of single line comment? + int next = isComment(code); + if (next == 0) { + return false; + } + + int line = code.getLinePosition(); + int column = code.getColumnPosition(); + + code.skip(next); + sb.append('/'); + sb.append('/'); + + // search end of line + read(code, sb); + + var value = sb.toString(); + var token = tokenBuilder + .setType(COMMENT) + .setValueAndOriginalValue(value) + .setURI(lexer.getURI()) + .setLine(line) + .setColumn(column) + .build(); + + lexer.addTrivia(Trivia.createComment(token)); + sb.delete(0, sb.length()); + return true; + } + + public static int isComment(CodeReader code) { + int next = 0; + + // start of single line comment? + if (code.charAt(next) != '/') { + return 0; + } + + next += 1; + next += ChannelUtils.handleLineSplicing(code, next); + + if (code.charAt(next) != '/') { + return 0; + } + next += 1; + return next; + } + + public static boolean read(CodeReader code, StringBuilder sb) { + while (true) { // search end of line + var end = ChannelUtils.handleLineSplicing(code, 0); + code.skip(end); // remove line splicing + + var charAt = code.charAt(0); + if (ChannelUtils.isNewLine(charAt) || charAt == ChannelUtils.EOF) { + break; + } + sb.append((char) code.pop()); + } + return true; + } + +} diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java index 288ed1d652..b08a190227 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java @@ -30,11 +30,9 @@ */ public class StringLiteralsChannel extends Channel { - private static final char EOF = (char) -1; - private final StringBuilder csb = new StringBuilder(256); private int index = 0; - private char ch = ' '; + private char charAt = ' '; private boolean isRawString = false; @Override @@ -58,7 +56,7 @@ public boolean consume(CodeReader code, Lexer output) { public boolean read(CodeReader code, StringBuilder sb) { index = 0; readStringPrefix(code); - if (ch != '\"') { + if (charAt != '\"') { return false; } if (isRawString) { @@ -72,28 +70,36 @@ public boolean read(CodeReader code, StringBuilder sb) { } readUdSuffix(code); for (var i = 0; i < index; i++) { + if (code.charAt(0) == '\\') { + var len = ChannelUtils.handleLineSplicing(code, 0); + if (len > 1) { + code.skip(len); // remove line splicing + i += (len - 1); + continue; + } + } sb.append((char) code.pop()); } return true; } private void readStringPrefix(CodeReader code) { - ch = code.charAt(index); + charAt = code.charAt(index); isRawString = false; - if ((ch == 'u') || (ch == 'U') || ch == 'L') { + if ((charAt == 'u') || (charAt == 'U') || charAt == 'L') { index++; - if (ch == 'u' && code.charAt(index) == '8') { + if (charAt == 'u' && code.charAt(index) == '8') { index++; } if (code.charAt(index) == ' ') { index++; } - ch = code.charAt(index); + charAt = code.charAt(index); } - if (ch == 'R') { + if (charAt == 'R') { index++; isRawString = true; - ch = code.charAt(index); + charAt = code.charAt(index); } } @@ -102,7 +108,7 @@ private boolean readRawString(CodeReader code, StringBuilder sb) { char charAt; index++; while ((charAt = code.charAt(index)) != '(') { // delimiter in front of ( - if (charAt == EOF) { + if (charAt == ChannelUtils.EOF) { return false; } sb.append(charAt); @@ -114,14 +120,14 @@ private boolean readRawString(CodeReader code, StringBuilder sb) { index -= sb.length(); sb.delete(0, sb.length()); while ((charAt = code.charAt(index)) != ')') { // raw_character* - if (charAt == EOF) { + if (charAt == ChannelUtils.EOF) { return false; } index++; } index++; while ((charAt = code.charAt(index)) != '"') { // delimiter after ) - if (charAt == EOF) { + if (charAt == ChannelUtils.EOF) { return false; } sb.append(charAt); @@ -140,8 +146,8 @@ private boolean readRawString(CodeReader code, StringBuilder sb) { private boolean readString(CodeReader code) { index++; char charAt; - while ((charAt = code.charAt(index)) != ch) { - if (charAt == EOF) { + while ((charAt = code.charAt(index)) != this.charAt) { + if (charAt == ChannelUtils.EOF) { return false; } if (charAt == '\\') { @@ -158,10 +164,10 @@ private void readUdSuffix(CodeReader code) { int len = 0; for (int start_index = index;; index++) { var charAt = code.charAt(index); - if (charAt == EOF) { + if (charAt == ChannelUtils.EOF) { return; } - if (isSuffix(charAt)) { + if (ChannelUtils.isSuffix(charAt)) { len++; } else if (Character.isDigit(charAt)) { if (len > 0) { @@ -176,8 +182,4 @@ private void readUdSuffix(CodeReader code) { } } - private static boolean isSuffix(char c) { - return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_'); - } - } diff --git a/cxx-squid/src/main/java/org/sonar/cxx/parser/CxxLexerPool.java b/cxx-squid/src/main/java/org/sonar/cxx/parser/CxxLexerPool.java index 9d553de977..5d1539c28b 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/parser/CxxLexerPool.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/parser/CxxLexerPool.java @@ -25,9 +25,7 @@ import com.sonar.cxx.sslr.impl.channel.BomCharacterChannel; import com.sonar.cxx.sslr.impl.channel.IdentifierAndKeywordChannel; import com.sonar.cxx.sslr.impl.channel.PunctuatorChannel; -import static com.sonar.cxx.sslr.impl.channel.RegexpChannelBuilder.ANY_CHAR; import static com.sonar.cxx.sslr.impl.channel.RegexpChannelBuilder.and; -import static com.sonar.cxx.sslr.impl.channel.RegexpChannelBuilder.commentRegexp; import static com.sonar.cxx.sslr.impl.channel.RegexpChannelBuilder.g; import static com.sonar.cxx.sslr.impl.channel.RegexpChannelBuilder.o2n; import static com.sonar.cxx.sslr.impl.channel.RegexpChannelBuilder.opt; @@ -39,8 +37,10 @@ import java.util.Set; import org.sonar.cxx.channels.BackslashChannel; import org.sonar.cxx.channels.CharacterLiteralsChannel; +import org.sonar.cxx.channels.MultiLineCommentChannel; import org.sonar.cxx.channels.PreprocessorChannel; import org.sonar.cxx.channels.RightAngleBracketsChannel; +import org.sonar.cxx.channels.SingleLineCommentChannel; import org.sonar.cxx.channels.StringLiteralsChannel; import org.sonar.cxx.preprocessor.PPSpecialIdentifier; @@ -81,8 +81,8 @@ public static CxxLexerPool create(Charset charset, Preprocessor... preprocessors .withFailIfNoChannelToConsumeOneCharacter(true) .withChannel(new BlackHoleChannel("\\s++")) // C++ Standard, Section 2.8 "Comments" - .withChannel(commentRegexp("//[^\\n\\r]*+")) - .withChannel(commentRegexp("/\\*", ANY_CHAR + "*?", "\\*/")) + .withChannel(new SingleLineCommentChannel()) + .withChannel(new MultiLineCommentChannel()) // backslash at the end of the line: just throw away .withChannel(new BackslashChannel()) // detects preprocessor directives: diff --git a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerTest.java b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerTest.java index abb2f585bb..a52fd9cdd0 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerTest.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerTest.java @@ -23,7 +23,6 @@ import com.sonar.cxx.sslr.api.Grammar; import com.sonar.cxx.sslr.impl.Lexer; import java.io.File; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import static org.assertj.core.api.Assertions.assertThat; @@ -55,10 +54,77 @@ public void init() { lexer = CxxLexerPool.create(cxxpp, new JoinStringsPreprocessor()).getLexer(); } + /** + * C++ Standard, line splicing + */ + @Test + void line_splicing() { + var softly = new SoftAssertions(); + + softly.assertThat(lexer.lex("" + )).as("empty file").allSatisfy(token + -> assertThat(token).isValue("EOF").isLine(1)); + softly.assertThat(lexer.lex("\\\n" + )).as("empty file with line splicing").allSatisfy(token + -> assertThat(token).isValue("EOF").isLine(2)); + softly.assertThat(lexer.lex("\\ \t \n" + )).as("empty file with line splicing and whitespaces").allSatisfy(token + -> assertThat(token).isValue("EOF").isLine(2)); + + softly.assertThat(lexer.lex("//a\\\n" + + "b\n")).as("comment c++: line splicing").anySatisfy(token + -> assertThat(token).hasTrivia().isTrivia("//ab").isComment().isTriviaLine(1)); + softly.assertThat(lexer.lex("/\\\n" + + "/ab\n")).as("comment c++: line splicing").anySatisfy(token + -> assertThat(token).hasTrivia().isTrivia("//ab").isComment().isTriviaLine(1)); + softly.assertThat(lexer.lex("/\\ \t \n" + + "/ab\n")).as("comment c++: line splicing").anySatisfy(token + -> assertThat(token).hasTrivia().isTrivia("//ab").isComment().isTriviaLine(1)); + softly.assertThat(lexer.lex("int main() {\n" + + "int i = 1\n" + + "// \\\n" // line splicing + + "+ 42\n" + + ";\n" + + "return i;\n" + + "}\n")).as("comment c++: line splicing").anySatisfy(token + -> assertThat(token).hasTrivia().isTrivia("// + 42").isComment().isTriviaLine(3)); + softly.assertThat(lexer.lex("int main() {\n" + + "int i = 1\n" + + "// \\ \t \n" // line splicing with whitespaces + + "+ 42\n" + + ";\n" + + "return i;\n" + + "}\n")).as("comment c++: line splicing with whitespaces").anySatisfy(token + -> assertThat(token).hasTrivia().isTrivia("// + 42").isComment().isTriviaLine(3)); + + softly.assertThat(lexer.lex("/\\\n" // line splicing + + "**\\\n" // line splicing + + "/")).as("comment c: line splicing").anySatisfy(token + -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("/**/").isComment().isTriviaLine(1)); + softly.assertThat(lexer.lex("/\\ \t \n" // line splicing with whitespaces + + "**\\ \t \n" // line splicing with whitespaces + + "/")).as("comment c: line splicing with whitespaces").anySatisfy(token + -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("/**/").isComment().isTriviaLine(1)); + + softly.assertThat(lexer.lex("/\\\n" + + "*\n" + + "*/ # /*\n" + + "*/ defi\\\n" + + "ne FO\\\n" + + "O 10\\\n" + + "20\n")).as("preprocessor directive with line splicing").anySatisfy(token + -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("# define FOO 1020").isTriviaLine(3)); + softly.assertAll(); + + softly.assertThat(lexer.lex("\"str\\\n" + + "i\\\n" + + "ng\"")).as("string with line splicing").anySatisfy(token + -> assertThat(token).isValue("\"string\"").hasType(CxxTokenType.STRING).isLine(1)); + softly.assertAll(); + } + /** * C++ Standard, Section 2.8 "Comments" - * - * @throws URISyntaxException */ @Test void comments_cxx() { @@ -85,7 +151,7 @@ void comments_c() { softly.assertThat(lexer.lex("/* My comment */")).as("comment c: simple").anySatisfy(token -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("/* My comment */").isComment().isTriviaLine(1)); softly.assertThat(lexer.lex("/*\\\n*/")).as("comment c: with newline").anySatisfy(token - -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("/*\\\n*/").isComment().isTriviaLine(1)); + -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("/**/").isComment().isTriviaLine(1)); softly.assertThat(lexer.lex("/*//*/")).as("comment c: nested").anySatisfy(token -> assertThat(token).isValue("EOF").hasTrivia().isTrivia("/*//*/").isComment().isTriviaLine(1)); softly.assertThat(lexer.lex("/* /* */")).as("comment c: nested2").anySatisfy(token diff --git a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithoutPreprocessorTest.java b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithoutPreprocessorTest.java index 0d74061753..6a9b829086 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithoutPreprocessorTest.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithoutPreprocessorTest.java @@ -66,7 +66,7 @@ void preprocessor_directives() { @Test void preprocessor_continued_define() { assertThat(lexer.lex("#define M\\\n" - + "0")).anySatisfy(token -> assertThat(token).isValue("#define M 0").hasType( + + "0")).anySatisfy(token -> assertThat(token).isValue("#define M0").hasType( CxxTokenType.PREPROCESSOR)); } diff --git a/cxx-squid/src/test/java/org/sonar/cxx/lexer/LexerAssert.java b/cxx-squid/src/test/java/org/sonar/cxx/lexer/LexerAssert.java index 1ee4728f08..2eaec38a9f 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/lexer/LexerAssert.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/lexer/LexerAssert.java @@ -51,6 +51,15 @@ public LexerAssert isValue(String value) { return this; } + public LexerAssert isLine(int line) { + isNotNull(); + int tokenLine = actual.getLine(); + if (tokenLine != line) { + failWithMessage("Expected the Token line to be <%s> but was <%s>", line, tokenLine); + } + return this; + } + public LexerAssert hasTrivia() { isNotNull(); boolean exists = actual.hasTrivia(); diff --git a/cxx-squid/src/test/java/org/sonar/cxx/preprocessor/IncludeFileLexerTest.java b/cxx-squid/src/test/java/org/sonar/cxx/preprocessor/IncludeFileLexerTest.java index 56f9ffb7e4..3c3625d0a8 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/preprocessor/IncludeFileLexerTest.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/preprocessor/IncludeFileLexerTest.java @@ -44,8 +44,10 @@ void proper_preprocessor_directives_are_created() { @Test void continued_lines_are_handled_correctly() { - List tokens = LEXER.lex("#define\\\nname"); - assertThat(hasToken("#define name", CxxTokenType.PREPROCESSOR) + List tokens = LEXER.lex("#define \\\n" + + "name \\\n" + + "10"); + assertThat(hasToken("#define name 10", CxxTokenType.PREPROCESSOR) .matches(tokens)).isTrue(); assertThat(tokens).hasSize(2); } diff --git a/cxx-squid/src/test/resources/metrics/nosonar.cc b/cxx-squid/src/test/resources/metrics/nosonar.cc index 83538969b8..22c25f1c11 100644 --- a/cxx-squid/src/test/resources/metrics/nosonar.cc +++ b/cxx-squid/src/test/resources/metrics/nosonar.cc @@ -10,4 +10,4 @@ int c; /* NOSONAR */ int d; // NOSONAR -/* EOF '/ +/* EOF */ diff --git a/cxx-sslr/sslr-core/src/main/java/org/sonar/cxx/sslr/channel/CodeBuffer.java b/cxx-sslr/sslr-core/src/main/java/org/sonar/cxx/sslr/channel/CodeBuffer.java index 94fb56f1f2..c6cf461906 100644 --- a/cxx-sslr/sslr-core/src/main/java/org/sonar/cxx/sslr/channel/CodeBuffer.java +++ b/cxx-sslr/sslr-core/src/main/java/org/sonar/cxx/sslr/channel/CodeBuffer.java @@ -113,6 +113,20 @@ public final int pop() { return character; } + /** + * Read and consume the next characters + * + * @param number number of characters to consume + * @return the next character or -1 if the end of the stream is reached + */ + public final int skip(int number) { + while (number != 0) { + pop(); + number--; + } + return peek(); + } + private void updateCursorPosition(int character) { // see Java Language Specification : http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#3.4 if (character == LF || character == CR && peek() != LF) { diff --git a/cxx-sslr/sslr-core/src/test/java/org/sonar/cxx/sslr/channel/CodeBufferTest.java b/cxx-sslr/sslr-core/src/test/java/org/sonar/cxx/sslr/channel/CodeBufferTest.java index c2c3c2a06c..d0f1e2268d 100644 --- a/cxx-sslr/sslr-core/src/test/java/org/sonar/cxx/sslr/channel/CodeBufferTest.java +++ b/cxx-sslr/sslr-core/src/test/java/org/sonar/cxx/sslr/channel/CodeBufferTest.java @@ -41,6 +41,13 @@ void testPop() { assertThat(code.pop()).isEqualTo(-1); } + @Test + void testSkip() { + var code = new CodeBuffer("1234", defaulConfiguration); + assertThat((char) code.skip(2)).isEqualTo('3'); + assertThat(code.skip(2)).isEqualTo(-1); + } + @Test void testPeek() { var code = new CodeBuffer("pa", defaulConfiguration);