Skip to content

Commit

Permalink
C++23: escape sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
guwirth committed Sep 4, 2024
1 parent ef659c9 commit 19728ba
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 141 deletions.
119 changes: 0 additions & 119 deletions cxx-squid/dox/diff-cpp20-cpp23_grammar.txt

This file was deleted.

80 changes: 68 additions & 12 deletions cxx-squid/src/main/java/org/sonar/cxx/preprocessor/PPNumber.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,18 @@
package org.sonar.cxx.preprocessor;

import java.math.BigInteger;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.sonar.api.internal.apachecommons.lang.StringUtils;

/**
* Helper class to evaluate preprocessor numbers.
*/
final class PPNumber {

private static final HashMap<String, BigInteger> numberCache = new HashMap<>();
private static final Map<String, Integer> namedUniversalCharacter = createNamedUniversalCharacter();

private PPNumber() {

Expand Down Expand Up @@ -127,27 +131,79 @@ static BigInteger decodeCharacter(String charValue) {
}

switch (charValue.charAt(1)) {
case 't':
return BigInteger.valueOf('\t');
case 'b':
return BigInteger.valueOf('\b');
case 'n':
return BigInteger.valueOf('\n');
case 'r':
return BigInteger.valueOf('\r');
case 'f':
return BigInteger.valueOf('\f');
case '\'':
return BigInteger.valueOf('\'');
case '"':
return BigInteger.valueOf('\"');
case '?':
return BigInteger.valueOf(0x3f);
case '\\':
return BigInteger.valueOf('\\');
case 'a':
return BigInteger.valueOf(0x07);
case 'b':
return BigInteger.valueOf('\b');
case 'f':
return BigInteger.valueOf('\f');
case 'n':
return BigInteger.valueOf('\n');
case 'r':
return BigInteger.valueOf('\r');
case 't':
return BigInteger.valueOf('\t');
case 'v':
return BigInteger.valueOf(0x0b);

case 'u':
if (charValue.length() > 2 && charValue.charAt(2) == '{') {
return delimitedEscapeSequences(charValue, 16);
}
return new BigInteger(StringUtils.substring(charValue, 2, 2 + 4), 16); // 4 hexadecimal digits

case 'U':
return new BigInteger(StringUtils.substring(charValue, 2, 2 + 8), 16); // 8 hexadecimal digits

case 'x':
case 'X':
if (charValue.length() > 2 && charValue.charAt(2) == '{') {
return delimitedEscapeSequences(charValue, 16);
}
return new BigInteger(charValue.substring(2), 16);

case 'o':
if (charValue.length() > 2 && charValue.charAt(2) == '{') {
return delimitedEscapeSequences(charValue, 8);
}
return BigInteger.ZERO;

case 'N':
if (charValue.length() > 2 && charValue.charAt(2) == '{') {
return delimitedEscapeSequences(charValue, -1);
}
return BigInteger.ZERO;

default:
return new BigInteger(charValue.substring(1), 10);
return new BigInteger(charValue.substring(1), 8);
}
}

static BigInteger delimitedEscapeSequences(String charValue, int radix) {
int end = charValue.indexOf('}', 3);
if (end != -1) {
String value = charValue.substring(3, end);
if (radix != -1) {
return new BigInteger(value, radix);
} else { // character named by NAME
return BigInteger.valueOf(namedUniversalCharacter.getOrDefault(value, 1));
}
}
return BigInteger.ZERO;
}

// currently only NULL and NUL is supported, rest is mapped to 1
private static Map<String, Integer> createNamedUniversalCharacter() {
Map<String, Integer> result = new HashMap<>();
result.put("NULL", 0);
result.put("NUL", 0);
return Collections.unmodifiableMap(result);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,22 +59,34 @@ void decode_charcters() {
assertThat(PPNumber.decodeCharacter("1")).isEqualTo(BigInteger.valueOf('1'));
assertThat(PPNumber.decodeCharacter("A")).isEqualTo(BigInteger.valueOf('A'));

assertThat(PPNumber.decodeCharacter("\\t")).isEqualTo(BigInteger.valueOf('\t'));
assertThat(PPNumber.decodeCharacter("\\b")).isEqualTo(BigInteger.valueOf('\b'));
assertThat(PPNumber.decodeCharacter("\\n")).isEqualTo(BigInteger.valueOf('\n'));
assertThat(PPNumber.decodeCharacter("\\r")).isEqualTo(BigInteger.valueOf('\r'));
assertThat(PPNumber.decodeCharacter("\\f")).isEqualTo(BigInteger.valueOf('\f'));
// simple escape sequences
assertThat(PPNumber.decodeCharacter("\\'")).isEqualTo(BigInteger.valueOf('\''));
assertThat(PPNumber.decodeCharacter("\\\"")).isEqualTo(BigInteger.valueOf('\"'));
assertThat(PPNumber.decodeCharacter("\\?")).isEqualTo(BigInteger.valueOf(0x3f)); // \?
assertThat(PPNumber.decodeCharacter("\\\\")).isEqualTo(BigInteger.valueOf('\\'));
assertThat(PPNumber.decodeCharacter("\\a")).isEqualTo(BigInteger.valueOf(0x07)); // \a
assertThat(PPNumber.decodeCharacter("\\b")).isEqualTo(BigInteger.valueOf('\b'));
assertThat(PPNumber.decodeCharacter("\\f")).isEqualTo(BigInteger.valueOf('\f'));
assertThat(PPNumber.decodeCharacter("\\n")).isEqualTo(BigInteger.valueOf('\n'));
assertThat(PPNumber.decodeCharacter("\\r")).isEqualTo(BigInteger.valueOf('\r'));
assertThat(PPNumber.decodeCharacter("\\t")).isEqualTo(BigInteger.valueOf('\t'));
assertThat(PPNumber.decodeCharacter("\\v")).isEqualTo(BigInteger.valueOf(0x0b)); // \v

// numeric escape sequences
assertThat(PPNumber.decodeCharacter("\\0")).isEqualTo(BigInteger.valueOf(0));
assertThat(PPNumber.decodeCharacter("\\1")).isEqualTo(BigInteger.valueOf(1));

assertThat(PPNumber.decodeCharacter("\\123")).isEqualTo(BigInteger.valueOf(83));
assertThat(PPNumber.decodeCharacter("\\o{123}")).isEqualTo(BigInteger.valueOf(83));
assertThat(PPNumber.decodeCharacter("\\x00")).isEqualTo(BigInteger.valueOf(0));
assertThat(PPNumber.decodeCharacter("\\x01")).isEqualTo(BigInteger.valueOf(1));
assertThat(PPNumber.decodeCharacter("\\X00")).isEqualTo(BigInteger.valueOf(0));
assertThat(PPNumber.decodeCharacter("\\X01")).isEqualTo(BigInteger.valueOf(1));
assertThat(PPNumber.decodeCharacter("\\x0f")).isEqualTo(BigInteger.valueOf(15));
assertThat(PPNumber.decodeCharacter("\\x{FF}")).isEqualTo(BigInteger.valueOf(255));

// universal character names
assertThat(PPNumber.decodeCharacter("\\u12345")).isEqualTo(BigInteger.valueOf(0x1234));
assertThat(PPNumber.decodeCharacter("\\U123456789")).isEqualTo(BigInteger.valueOf(0x12345678));
assertThat(PPNumber.decodeCharacter("\\u{1234}")).isEqualTo(BigInteger.valueOf(0x1234));
assertThat(PPNumber.decodeCharacter("\\N{NULL}")).isEqualTo(BigInteger.valueOf(0));
assertThat(PPNumber.decodeCharacter("\\N{NUL}")).isEqualTo(BigInteger.valueOf(0));
assertThat(PPNumber.decodeCharacter("\\N{NEW LINE}")).isEqualTo(BigInteger.valueOf(1));
}

}
24 changes: 24 additions & 0 deletions cxx-squid/src/test/resources/parser/own/C++23/escape-sequences.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// simple escape sequences
auto s1 = '\'';
auto s2 = '\"';
auto s3 = '\?';
auto s4 = '\\';
auto s5 = '\a';
auto s6 = '\b';
auto s7 = '\f';
auto s8 = '\n';
auto s9 = '\r';
auto s10 = '\t';
auto s11 = '\v';

// numeric escape sequences
auto n1 = '\123';
auto n2 = '\o{12345}';
auto n3 = '\xABCDEF';
auto n4 = '\x{ABFF}';

// universal character names
auto u1 = '\u1234';
auto u2 = '\u{112233FF}';
auto u3 = '\U12345678';
auto u4 = '\N{NULL}';

0 comments on commit 19728ba

Please sign in to comment.