Skip to content

Commit

Permalink
C++23: Trimming whitespaces before line splicing
Browse files Browse the repository at this point in the history
- [P2223R2](https://wg21.link/P2223R2)
- the cxx plugin supports line splicing still not in all cases (e.g. middle of number or identifier)
- related to SonarOpenCommunity#2536
  • Loading branch information
guwirth committed Sep 21, 2024
1 parent 68854cc commit 9e98b51
Show file tree
Hide file tree
Showing 17 changed files with 470 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@

public class BackslashChannel extends Channel<Lexer> {

private static boolean isNewLine(char ch) {
return (ch == '\n') || (ch == '\r');
}
private final StringBuilder sb = new StringBuilder(256);

@Override
public boolean consume(CodeReader code, Lexer output) {
var ch = (char) code.peek();

if ((ch == '\\') && isNewLine(code.charAt(1))) {
// just throw away the backslash
code.pop();
return true;
if (code.charAt(0) != '\\') {
return false;
}

return false;
var lineSplicing = read(code, sb);
sb.delete(0, sb.length());
return lineSplicing != 0;
}

public static int read(CodeReader code, StringBuilder sb) {
var end = ChannelUtils.handleLineSplicing(code, 0);
code.skip(end); // remove line splicing
return end;
}

}
88 changes: 88 additions & 0 deletions cxx-squid/src/main/java/org/sonar/cxx/channels/ChannelUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* C++ Community Plugin (cxx plugin)
* Copyright (C) 2010-2023 SonarOpenCommunity
* http://github.com/SonarOpenCommunity/sonar-cxx
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.cxx.channels;

import org.sonar.cxx.sslr.channel.CodeReader;

public class ChannelUtils {

public static final char LF = '\n';
public static final char CR = '\r';
public static final char EOF = (char) -1;

private ChannelUtils() {
// empty
}

public static boolean isNewLine(char ch) {
return (ch == LF) || (ch == CR);
}

public static boolean isWhitespace(char ch) {
return (ch == ' ') || (ch == '\t');
}

public static boolean isSuffix(char c) {
return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_');
}

/**
* Handle line splicing.
* - lines terminated by a \ are spliced together with the next line
* - P2178R0 making trailing whitespaces non-significant
*
* line endings:
* - Linux/Unix, Mac from OS X a.k.a macOS: LF
* - Windows/DOS: CR LF
* - Classic Mac OS: CR
*
* @return numbers of sign to remove to splice the lines
*/
public static int handleLineSplicing(CodeReader code, int start) {
int next = start;
if (code.charAt(next) != '\\') {
return 0;
}

boolean newline = false;
next++;
while (true) {
var charAt = code.charAt(next);
if (charAt == LF) {
newline = true;
break;
}
if (charAt == CR) {
if (code.charAt(next + 1) == LF) {
next++;
}
newline = true;
break;
}
if (!isWhitespace(charAt)) {
break;
}
next++;
}

return newline ? (next - start + 1) : 0;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
*/
public class CharacterLiteralsChannel extends Channel<Lexer> {

private static final char EOF = (char) -1;

private final StringBuilder sb = new StringBuilder(256);

private int index = 0;
Expand Down Expand Up @@ -67,7 +65,7 @@ public boolean consume(CodeReader code, Lexer output) {
private boolean read(CodeReader code) {
index++;
while (code.charAt(index) != ch) {
if (code.charAt(index) == EOF) {
if (code.charAt(index) == ChannelUtils.EOF) {
return false;
}
if (code.charAt(index) == '\\') {
Expand Down Expand Up @@ -95,10 +93,10 @@ private void readUdSuffix(CodeReader code) {
int len = 0;
for (int start_index = index;; index++) {
var charAt = code.charAt(index);
if (charAt == EOF) {
if (charAt == ChannelUtils.EOF) {
return;
}
if (isSuffix(charAt)) {
if (ChannelUtils.isSuffix(charAt)) {
len++;
} else if (Character.isDigit(charAt)) {
if (len > 0) {
Expand All @@ -113,8 +111,4 @@ private void readUdSuffix(CodeReader code) {
}
}

private static boolean isSuffix(char c) {
return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_');
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ public class KeywordChannel extends Channel<Lexer> {
private final Matcher matcher;
private final Token.Builder tokenBuilder = Token.builder();

public KeywordChannel(String regexp, TokenType[]
... keywordSets) {
public KeywordChannel(String regexp, TokenType[]... keywordSets) {
for (var keywords : keywordSets) {
for (var keyword : keywords) {
keywordsMap.put(keyword.getValue(), keyword);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* C++ Community Plugin (cxx plugin)
* Copyright (C) 2010-2023 SonarOpenCommunity
* http://github.com/SonarOpenCommunity/sonar-cxx
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.cxx.channels;

import static com.sonar.cxx.sslr.api.GenericTokenType.COMMENT;
import com.sonar.cxx.sslr.api.Token;
import com.sonar.cxx.sslr.api.Trivia;
import com.sonar.cxx.sslr.impl.Lexer;
import org.sonar.cxx.sslr.channel.Channel;
import org.sonar.cxx.sslr.channel.CodeReader;

public class MultiLineCommentChannel extends Channel<Lexer> {

private final StringBuilder sb = new StringBuilder(256);
private final Token.Builder tokenBuilder = Token.builder();

@Override
public boolean consume(CodeReader code, Lexer lexer) {
// start of multi line comment?
int next = isComment(code);
if (next == 0) {
return false;
}

int line = code.getLinePosition();
int column = code.getColumnPosition();

code.skip(next);
sb.append('/');
sb.append('*');

read(code, sb); // search end of multi line comment

var value = sb.toString();
var token = tokenBuilder
.setType(COMMENT)
.setValueAndOriginalValue(value)
.setURI(lexer.getURI())
.setLine(line)
.setColumn(column)
.build();

lexer.addTrivia(Trivia.createComment(token));
sb.delete(0, sb.length());
return true;
}

public static int isComment(CodeReader code) {
int next = 0;

// start of multi line comment?
if (code.charAt(next) != '/') {
return 0;
}
next += 1;
next += ChannelUtils.handleLineSplicing(code, next);

if (code.charAt(next) != '*') {
return 0;
}
next += 1;
return next;
}

public static boolean read(CodeReader code, StringBuilder sb) {
boolean first = false;
while (true) { // search end of multi line comment: */
var end = ChannelUtils.handleLineSplicing(code, 0);
code.skip(end); // remove line splicing

var charAt = (char) code.pop();
switch (charAt) {
case '*':
first = true;
break;
case '/':
if (first) {
sb.append('/');
return true;
}
break;
case ChannelUtils.EOF:
return false;
default:
first = false;
break;
}

sb.append(charAt);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
//
public class PreprocessorChannel extends Channel<Lexer> {

private static final char EOF = (char) -1;
private final StringLiteralsChannel stringLiteralsChannel = new StringLiteralsChannel();
private final StringBuilder sb = new StringBuilder(256);
private final StringBuilder dummy = new StringBuilder(256);
private final Matcher matcher;

public PreprocessorChannel(TokenType[]... keywordSets) {
Expand Down Expand Up @@ -84,67 +84,46 @@ public boolean consume(CodeReader code, Lexer output) {

private void read(CodeReader code) {
while (true) {
var ch = code.charAt(0);
if (isNewline(ch) || ch == EOF) {
var charAt = code.charAt(0);
if (ChannelUtils.isNewLine(charAt) || charAt == ChannelUtils.EOF) {
code.pop();
break;
} else if (stringLiteralsChannel.read(code, sb)) {
} else if (stringLiteralsChannel.read(code, sb)) { // string literal
continue;
}
ch = (char) code.pop();
if (ch == '/' && code.charAt(0) == '/') {
consumeSingleLineComment(code);
} else if (ch == '/' && code.charAt(0) == '*') {
consumeMultiLineComment(code);
} else if (ch == '\\' && isNewline((char) code.peek())) {
// the newline is escaped: we have a the multi line preprocessor directive
// consume both the backslash and the newline, insert a space instead
consumeNewline(code);
sb.append(' ');
} else {
sb.append(ch);
}
}
}

private static void consumeNewline(CodeReader code) {
if ((code.charAt(0) == '\r') && (code.charAt(1) == '\n')) {
// \r\n
code.pop();
code.pop();
} else {
// \r or \n
code.pop();
}
}

private static void consumeSingleLineComment(CodeReader code) {
code.pop(); // initial '/'
while (true) {
var charAt = code.charAt(0);
if (isNewline(charAt) || charAt == EOF) {
break;
var len = 0;
switch (charAt) {
case '/': // comment?
len = SingleLineCommentChannel.isComment(code);
if (len != 0) {
// single line comment
code.skip(len);
SingleLineCommentChannel.read(code, dummy);
dummy.delete(0, dummy.length());
} else {
len = MultiLineCommentChannel.isComment(code);
if (len != 0) {
// multi line comment
code.skip(len);
MultiLineCommentChannel.read(code, dummy);
dummy.delete(0, dummy.length());
}
}
break;
case '\\':
len = BackslashChannel.read(code, dummy);
if (len != 0) {
// consume backslash and the newline
dummy.delete(0, dummy.length());
}
break;
}
code.pop();
}
}

private static void consumeMultiLineComment(CodeReader code) {
code.pop(); // initial '*'
while (true) {
var ch = (char) code.pop();
if (ch == EOF) {
return;
}
if (ch == '*' && code.charAt(0) == '/') {
code.pop();
return;
if (len == 0) {
sb.append((char) code.pop());
}
}
}

private static boolean isNewline(char ch) {
return (ch == '\n') || (ch == '\r');
}

}
Loading

0 comments on commit 9e98b51

Please sign in to comment.