Skip to content

Commit

Permalink
C++23: Trimming whitespaces before line splicing
Browse files Browse the repository at this point in the history
- [P2223R2](https://wg21.link/P2223R2)
- the cxx plugin supports line splicing still not in all cases (e.g. middle of number or identifier)
- related to SonarOpenCommunity#2536
  • Loading branch information
guwirth committed Sep 20, 2024
1 parent 68854cc commit f31594b
Show file tree
Hide file tree
Showing 17 changed files with 467 additions and 116 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@

public class BackslashChannel extends Channel<Lexer> {

private static boolean isNewLine(char ch) {
return (ch == '\n') || (ch == '\r');
}
private final StringBuilder sb = new StringBuilder(256);

@Override
public boolean consume(CodeReader code, Lexer output) {
var ch = (char) code.peek();

if ((ch == '\\') && isNewLine(code.charAt(1))) {
// just throw away the backslash
code.pop();
return true;
if (code.charAt(0) != '\\') {
return false;
}

return false;
var lineSplicing = read(code, sb);
sb.delete(0, sb.length());
return lineSplicing != 0;
}

public static int read(CodeReader code, StringBuilder sb) {
var end = ChannelUtils.handleLineSplicing(code, 0);
code.skip(end); // remove line splicing
return end;
}

}
69 changes: 69 additions & 0 deletions cxx-squid/src/main/java/org/sonar/cxx/channels/ChannelUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* C++ Community Plugin (cxx plugin)
* Copyright (C) 2010-2023 SonarOpenCommunity
* http://github.com/SonarOpenCommunity/sonar-cxx
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.cxx.channels;

import org.sonar.cxx.sslr.channel.CodeReader;

public class ChannelUtils {

public static final char LF = '\n';
public static final char CR = '\r';
public static final char EOF = (char) -1;

private ChannelUtils() {
// empty
}

public static boolean isNewLine(char ch) {
return (ch == LF) || (ch == CR);
}

public static boolean isWhitespace(char ch) {
return (ch == ' ') || (ch == '\t');
}

public static boolean isSuffix(char c) {
return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_');
}

public static int handleLineSplicing(CodeReader code, int start) {
int next = start;
if (code.charAt(next) != '\\') {
return 0;
}

boolean newline = false;
next++;
while (true) {
var charAt = code.charAt(next);
if (ChannelUtils.isNewLine(charAt)) {
newline = true;
break;
}
if (!ChannelUtils.isWhitespace(charAt)) {
break;
}
next++;
}

return newline ? (next - start + 1) : 0;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,18 @@
*/
public class CharacterLiteralsChannel extends Channel<Lexer> {

private static final char EOF = (char) -1;

private final StringBuilder sb = new StringBuilder(256);

private int index = 0;
private char ch = ' ';
private char charAt = ' ';

@Override
public boolean consume(CodeReader code, Lexer output) {
int line = code.getLinePosition();
int column = code.getColumnPosition();
index = 0;
readPrefix(code);
if (ch != '\'') {
if (charAt != '\'') {
return false;
}
if (!read(code)) {
Expand All @@ -66,8 +64,8 @@ public boolean consume(CodeReader code, Lexer output) {

private boolean read(CodeReader code) {
index++;
while (code.charAt(index) != ch) {
if (code.charAt(index) == EOF) {
while (code.charAt(index) != charAt) {
if (code.charAt(index) == ChannelUtils.EOF) {
return false;
}
if (code.charAt(index) == '\\') {
Expand All @@ -81,24 +79,24 @@ private boolean read(CodeReader code) {
}

private void readPrefix(CodeReader code) {
ch = code.charAt(index);
if ((ch == 'u') || (ch == 'U') || ch == 'L') {
charAt = code.charAt(index);
if ((charAt == 'u') || (charAt == 'U') || charAt == 'L') {
index++;
if (ch == 'u' && code.charAt(index) == '8') {
if (charAt == 'u' && code.charAt(index) == '8') {
index++;
}
ch = code.charAt(index);
charAt = code.charAt(index);
}
}

private void readUdSuffix(CodeReader code) {
int len = 0;
for (int start_index = index;; index++) {
var charAt = code.charAt(index);
if (charAt == EOF) {
if (charAt == ChannelUtils.EOF) {
return;
}
if (isSuffix(charAt)) {
if (ChannelUtils.isSuffix(charAt)) {
len++;
} else if (Character.isDigit(charAt)) {
if (len > 0) {
Expand All @@ -113,8 +111,4 @@ private void readUdSuffix(CodeReader code) {
}
}

private static boolean isSuffix(char c) {
return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_');
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ public class KeywordChannel extends Channel<Lexer> {
private final Matcher matcher;
private final Token.Builder tokenBuilder = Token.builder();

public KeywordChannel(String regexp, TokenType[]
... keywordSets) {
public KeywordChannel(String regexp, TokenType[]... keywordSets) {
for (var keywords : keywordSets) {
for (var keyword : keywords) {
keywordsMap.put(keyword.getValue(), keyword);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* C++ Community Plugin (cxx plugin)
* Copyright (C) 2010-2023 SonarOpenCommunity
* http://github.com/SonarOpenCommunity/sonar-cxx
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.cxx.channels;

import static com.sonar.cxx.sslr.api.GenericTokenType.COMMENT;
import com.sonar.cxx.sslr.api.Token;
import com.sonar.cxx.sslr.api.Trivia;
import com.sonar.cxx.sslr.impl.Lexer;
import org.sonar.cxx.sslr.channel.Channel;
import org.sonar.cxx.sslr.channel.CodeReader;

public class MultiLineCommentChannel extends Channel<Lexer> {

private final StringBuilder sb = new StringBuilder(256);
private final Token.Builder tokenBuilder = Token.builder();

@Override
public boolean consume(CodeReader code, Lexer lexer) {
// start of multi line comment?
int next = isComment(code);
if (next == 0) {
return false;
}

int line = code.getLinePosition();
int column = code.getColumnPosition();

code.skip(next);
sb.append('/');
sb.append('*');

read(code, sb); // search end of multi line comment

var value = sb.toString();
var token = tokenBuilder
.setType(COMMENT)
.setValueAndOriginalValue(value)
.setURI(lexer.getURI())
.setLine(line)
.setColumn(column)
.build();

lexer.addTrivia(Trivia.createComment(token));
sb.delete(0, sb.length());
return true;
}

public static int isComment(CodeReader code) {
int next = 0;

// start of multi line comment?
if (code.charAt(next) != '/') {
return 0;
}
next += 1;
next += ChannelUtils.handleLineSplicing(code, next);

if (code.charAt(next) != '*') {
return 0;
}
next += 1;
return next;
}

public static boolean read(CodeReader code, StringBuilder sb) {
boolean first = false;
while (true) { // search end of multi line comment: */
var end = ChannelUtils.handleLineSplicing(code, 0);
code.skip(end); // remove line splicing

var charAt = (char) code.pop();
switch (charAt) {
case '*':
first = true;
break;
case '/':
if (first) {
sb.append('/');
return true;
}
break;
case ChannelUtils.EOF:
return false;
default:
first = false;
break;
}

sb.append(charAt);
}

}

}
Loading

0 comments on commit f31594b

Please sign in to comment.