Skip to content

Commit

Permalink
Support exporting and re-importing ClassNameTrie content to an extern…
Browse files Browse the repository at this point in the history
…al resource (#3692)

* The trie data is stored using the same "modified-UTF8" format used by the JDK:
  https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8

* Don't alias '/' to '.' while building trie, only do that when querying
  • Loading branch information
mcculls authored Aug 2, 2022
1 parent 3cf4f7a commit 91e567f
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 11 deletions.
94 changes: 83 additions & 11 deletions internal-api/src/main/java/datadog/trace/util/ClassNameTrie.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package datadog.trace.util;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -103,6 +105,8 @@ public final class ClassNameTrie {
/** Constant to account for the fact that the last branch doesn't have a jump offset/id. */
private static final int NO_END_JUMP = 1;

private static final int FILE_MAGIC = 0xDD097213;

/** The compressed trie. */
private final char[] trieData;

Expand Down Expand Up @@ -196,10 +200,10 @@ public static int apply(char[] data, int[] longJumps, String key) {
public static class Builder {
private static final Pattern MAPPING_LINE = Pattern.compile("^\\s*(?:([0-9]+)\\s+)?([^\\s#]+)");

private char[] trieData = new char[8192];
private int trieLength = 0;
private int[] longJumps = new int[16];
private int longJumpCount = 0;
private char[] trieData;
private int trieLength;
private int[] longJumps;
private int longJumpCount;

public boolean isEmpty() {
return trieLength == 0;
Expand All @@ -211,9 +215,74 @@ public int apply(String key) {
}

public ClassNameTrie buildTrie() {
return new ClassNameTrie(
Arrays.copyOfRange(trieData, 0, trieLength),
Arrays.copyOfRange(longJumps, 0, longJumpCount));
// avoid unnecessary allocation when compaction isn't required
if (null != trieData && trieData.length > trieLength) {
trieData = Arrays.copyOfRange(trieData, 0, trieLength);
}
if (null != longJumps && longJumps.length > longJumpCount) {
longJumps = Arrays.copyOfRange(longJumps, 0, longJumpCount);
}
return new ClassNameTrie(trieData, longJumps);
}

/** Writes trie content to an external resource. */
public void writeTo(DataOutput out) throws IOException {
out.writeInt(FILE_MAGIC);
out.writeInt(trieLength);
for (int i = 0; i < trieLength; i++) {
char c = trieData[i];
if (c >= 0x0001 && c <= 0x007F) {
// write 7-bit non-zero char as 1 byte
out.writeByte(c);
} else if (c > 0x07FF) {
// write 16-bit char as 3 bytes (4+6+6)
out.writeByte(0xE0 | ((c >> 12) & 0x0F));
out.writeByte(0x80 | ((c >> 6) & 0x3F));
out.writeByte(0x80 | (c & 0x3F));
} else {
// write 11-bit char as 2 bytes (5+6)
out.writeByte(0xC0 | ((c >> 6) & 0x1F));
out.writeByte(0x80 | (c & 0x3F));
}
}
out.writeInt(longJumpCount);
for (int i = 0; i < longJumpCount; i++) {
out.writeInt(longJumps[i]);
}
}

/** Reads trie content from an external resource. */
public void readFrom(DataInput in) throws IOException {
int magic = in.readInt();
if (magic != FILE_MAGIC) {
throw new IOException("Unexpected file magic " + magic);
}
trieLength = in.readInt();
trieData = new char[trieLength];
for (int i = 0; i < trieLength; i++) {
byte b = in.readByte();
char c;
if ((b & 0x80) == 0) {
// read 7-bit non-zero char as 1 byte
c = (char) b;
} else if ((b & 0xE0) == 0xE0) {
// read 16-bit char as 3 bytes (4+6+6)
c = (char) (((b & 0x0F) << 12) | ((in.readByte() & 0x3F) << 6) | (in.readByte() & 0x3F));
} else {
// read 11-bit char as 2 bytes (5+6)
c = (char) (((b & 0x1F) << 6) | (in.readByte() & 0x3F));
}
trieData[i] = c;
}
longJumpCount = in.readInt();
if (longJumpCount > 0) {
longJumps = new int[longJumpCount];
for (int i = 0; i < longJumpCount; i++) {
longJumps[i] = in.readInt();
}
} else {
longJumps = null;
}
}

/** Reads a class-name mapping file into the current builder */
Expand Down Expand Up @@ -252,6 +321,7 @@ public void put(String className, int number) {
if (trieLength == 0) {
int keyLength = key.length();
trieLength = (keyLength > 1 ? 3 : 2) + keyLength;
trieData = new char[8192]; // create table on first mapping
trieData[0] = (char) 1;
trieData[1] = key.charAt(0);
if (keyLength > 1) {
Expand Down Expand Up @@ -280,8 +350,11 @@ private char setJump(int jump) {
if (jump < LONG_JUMP_MARKER) {
return (char) jump; // jump is small enough to fit into the trie
}
if (longJumpCount == longJumps.length) {
if (longJumpCount == 0) {
longJumps = new int[16]; // create table on first long-jump
} else if (longJumpCount == longJumps.length) {
int[] oldJumps = longJumps;
// expand table by 50% to fit additional long-jumps
longJumps = new int[longJumpCount + (longJumpCount >> 1)];
System.arraycopy(oldJumps, 0, longJumps, 0, longJumpCount);
}
Expand Down Expand Up @@ -319,8 +392,7 @@ private void insertMapping(String key, char valueToInsert) {
char branchCount = trieData[dataIndex++];

// trie is ordered, so we can use binary search to pick the right branch
int branchIndex =
Arrays.binarySearch(trieData, dataIndex, dataIndex + branchCount, c == '/' ? '.' : c);
int branchIndex = Arrays.binarySearch(trieData, dataIndex, dataIndex + branchCount, c);

if (branchIndex < 0) {
jumpOffset =
Expand Down Expand Up @@ -391,7 +463,7 @@ private void insertMapping(String key, char valueToInsert) {
int segmentEnd = dataIndex + segmentLength;
while (keyIndex < keyLength && dataIndex < segmentEnd) {
c = key.charAt(keyIndex);
if ((c == '/' ? '.' : c) != trieData[dataIndex]) {
if (c != trieData[dataIndex]) {
break;
}
keyIndex++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,28 @@ class ClassNameTrieTest extends DDSpecification {
assert trie.apply(it.key) == (0x1000 | it.value)
})
}

def 'trie content can be exported and re-imported'() {
setup:
def mapping = (0..128).collectEntries({
[UUID.randomUUID().toString().replace('-', '.'), it]
}) as TreeMap<String, Integer>
when:
def exporter = new ClassNameTrie.Builder()
// initial values
mapping.each { className, number ->
exporter.put(className, number)
}
// export
def sink = new ByteArrayOutputStream()
exporter.writeTo(new DataOutputStream(sink))
// re-import
def importer = new ClassNameTrie.Builder()
def source = new ByteArrayInputStream(sink.toByteArray())
importer.readFrom(new DataInputStream(source))
then:
mapping.each({
assert importer.apply(it.key) == it.value
})
}
}

0 comments on commit 91e567f

Please sign in to comment.