From 91e567fae0044832d074470a846f27765b3db957 Mon Sep 17 00:00:00 2001 From: Stuart McCulloch Date: Tue, 2 Aug 2022 17:23:21 +0100 Subject: [PATCH] Support exporting and re-importing ClassNameTrie content to an external resource (#3692) * The trie data is stored using the same "modified-UTF8" format used by the JDK: https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8 * Don't alias '/' to '.' while building trie, only do that when querying --- .../datadog/trace/util/ClassNameTrie.java | 94 ++++++++++++++++--- .../trace/util/ClassNameTrieTest.groovy | 24 +++++ 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/util/ClassNameTrie.java b/internal-api/src/main/java/datadog/trace/util/ClassNameTrie.java index 449a84b0eba..0cf49110134 100644 --- a/internal-api/src/main/java/datadog/trace/util/ClassNameTrie.java +++ b/internal-api/src/main/java/datadog/trace/util/ClassNameTrie.java @@ -1,5 +1,7 @@ package datadog.trace.util; +import java.io.DataInput; +import java.io.DataOutput; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -103,6 +105,8 @@ public final class ClassNameTrie { /** Constant to account for the fact that the last branch doesn't have a jump offset/id. */ private static final int NO_END_JUMP = 1; + private static final int FILE_MAGIC = 0xDD097213; + /** The compressed trie. */ private final char[] trieData; @@ -196,10 +200,10 @@ public static int apply(char[] data, int[] longJumps, String key) { public static class Builder { private static final Pattern MAPPING_LINE = Pattern.compile("^\\s*(?:([0-9]+)\\s+)?([^\\s#]+)"); - private char[] trieData = new char[8192]; - private int trieLength = 0; - private int[] longJumps = new int[16]; - private int longJumpCount = 0; + private char[] trieData; + private int trieLength; + private int[] longJumps; + private int longJumpCount; public boolean isEmpty() { return trieLength == 0; @@ -211,9 +215,74 @@ public int apply(String key) { } public ClassNameTrie buildTrie() { - return new ClassNameTrie( - Arrays.copyOfRange(trieData, 0, trieLength), - Arrays.copyOfRange(longJumps, 0, longJumpCount)); + // avoid unnecessary allocation when compaction isn't required + if (null != trieData && trieData.length > trieLength) { + trieData = Arrays.copyOfRange(trieData, 0, trieLength); + } + if (null != longJumps && longJumps.length > longJumpCount) { + longJumps = Arrays.copyOfRange(longJumps, 0, longJumpCount); + } + return new ClassNameTrie(trieData, longJumps); + } + + /** Writes trie content to an external resource. */ + public void writeTo(DataOutput out) throws IOException { + out.writeInt(FILE_MAGIC); + out.writeInt(trieLength); + for (int i = 0; i < trieLength; i++) { + char c = trieData[i]; + if (c >= 0x0001 && c <= 0x007F) { + // write 7-bit non-zero char as 1 byte + out.writeByte(c); + } else if (c > 0x07FF) { + // write 16-bit char as 3 bytes (4+6+6) + out.writeByte(0xE0 | ((c >> 12) & 0x0F)); + out.writeByte(0x80 | ((c >> 6) & 0x3F)); + out.writeByte(0x80 | (c & 0x3F)); + } else { + // write 11-bit char as 2 bytes (5+6) + out.writeByte(0xC0 | ((c >> 6) & 0x1F)); + out.writeByte(0x80 | (c & 0x3F)); + } + } + out.writeInt(longJumpCount); + for (int i = 0; i < longJumpCount; i++) { + out.writeInt(longJumps[i]); + } + } + + /** Reads trie content from an external resource. */ + public void readFrom(DataInput in) throws IOException { + int magic = in.readInt(); + if (magic != FILE_MAGIC) { + throw new IOException("Unexpected file magic " + magic); + } + trieLength = in.readInt(); + trieData = new char[trieLength]; + for (int i = 0; i < trieLength; i++) { + byte b = in.readByte(); + char c; + if ((b & 0x80) == 0) { + // read 7-bit non-zero char as 1 byte + c = (char) b; + } else if ((b & 0xE0) == 0xE0) { + // read 16-bit char as 3 bytes (4+6+6) + c = (char) (((b & 0x0F) << 12) | ((in.readByte() & 0x3F) << 6) | (in.readByte() & 0x3F)); + } else { + // read 11-bit char as 2 bytes (5+6) + c = (char) (((b & 0x1F) << 6) | (in.readByte() & 0x3F)); + } + trieData[i] = c; + } + longJumpCount = in.readInt(); + if (longJumpCount > 0) { + longJumps = new int[longJumpCount]; + for (int i = 0; i < longJumpCount; i++) { + longJumps[i] = in.readInt(); + } + } else { + longJumps = null; + } } /** Reads a class-name mapping file into the current builder */ @@ -252,6 +321,7 @@ public void put(String className, int number) { if (trieLength == 0) { int keyLength = key.length(); trieLength = (keyLength > 1 ? 3 : 2) + keyLength; + trieData = new char[8192]; // create table on first mapping trieData[0] = (char) 1; trieData[1] = key.charAt(0); if (keyLength > 1) { @@ -280,8 +350,11 @@ private char setJump(int jump) { if (jump < LONG_JUMP_MARKER) { return (char) jump; // jump is small enough to fit into the trie } - if (longJumpCount == longJumps.length) { + if (longJumpCount == 0) { + longJumps = new int[16]; // create table on first long-jump + } else if (longJumpCount == longJumps.length) { int[] oldJumps = longJumps; + // expand table by 50% to fit additional long-jumps longJumps = new int[longJumpCount + (longJumpCount >> 1)]; System.arraycopy(oldJumps, 0, longJumps, 0, longJumpCount); } @@ -319,8 +392,7 @@ private void insertMapping(String key, char valueToInsert) { char branchCount = trieData[dataIndex++]; // trie is ordered, so we can use binary search to pick the right branch - int branchIndex = - Arrays.binarySearch(trieData, dataIndex, dataIndex + branchCount, c == '/' ? '.' : c); + int branchIndex = Arrays.binarySearch(trieData, dataIndex, dataIndex + branchCount, c); if (branchIndex < 0) { jumpOffset = @@ -391,7 +463,7 @@ private void insertMapping(String key, char valueToInsert) { int segmentEnd = dataIndex + segmentLength; while (keyIndex < keyLength && dataIndex < segmentEnd) { c = key.charAt(keyIndex); - if ((c == '/' ? '.' : c) != trieData[dataIndex]) { + if (c != trieData[dataIndex]) { break; } keyIndex++; diff --git a/internal-api/src/test/groovy/datadog/trace/util/ClassNameTrieTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/ClassNameTrieTest.groovy index 0d72c537e5f..a8ef4d0b852 100644 --- a/internal-api/src/test/groovy/datadog/trace/util/ClassNameTrieTest.groovy +++ b/internal-api/src/test/groovy/datadog/trace/util/ClassNameTrieTest.groovy @@ -178,4 +178,28 @@ class ClassNameTrieTest extends DDSpecification { assert trie.apply(it.key) == (0x1000 | it.value) }) } + + def 'trie content can be exported and re-imported'() { + setup: + def mapping = (0..128).collectEntries({ + [UUID.randomUUID().toString().replace('-', '.'), it] + }) as TreeMap + when: + def exporter = new ClassNameTrie.Builder() + // initial values + mapping.each { className, number -> + exporter.put(className, number) + } + // export + def sink = new ByteArrayOutputStream() + exporter.writeTo(new DataOutputStream(sink)) + // re-import + def importer = new ClassNameTrie.Builder() + def source = new ByteArrayInputStream(sink.toByteArray()) + importer.readFrom(new DataInputStream(source)) + then: + mapping.each({ + assert importer.apply(it.key) == it.value + }) + } }