diff --git a/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java b/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java
index 48ccfb75..3ae701a1 100644
--- a/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java
+++ b/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java
@@ -78,6 +78,10 @@ public static class Node {
/** A builder for {@link TopLevelIndex}es. */
public static class Builder {
+ // If there are a lot of strings, we'll skip the first few map sizes. If not, 1K of memory
+ // isn't significant.
+ private final StringCache stringCache = new StringCache(1024);
+
public TopLevelIndex build() {
// Freeze the index. The immutability of nodes is enforced by making insert private, doing
// a deep copy here isn't necessary.
@@ -94,7 +98,7 @@ public void insert(ClassSymbol sym) {
int end = binaryName.indexOf('/');
Node curr = root;
while (end != -1) {
- String simpleName = binaryName.substring(start, end);
+ String simpleName = stringCache.getSubstring(binaryName, start, end);
curr = curr.insert(simpleName, null);
// If we've already inserted something with the current name (either a package or another
// symbol), bail out. When inserting elements from the classpath, this results in the
@@ -105,6 +109,7 @@ public void insert(ClassSymbol sym) {
start = end + 1;
end = binaryName.indexOf('/', start);
}
+ // Classname strings are probably unique so not worth caching.
String simpleName = binaryName.substring(start);
curr = curr.insert(simpleName, sym);
if (curr == null || !Objects.equals(curr.sym, sym)) {
diff --git a/java/com/google/turbine/binder/lookup/StringCache.java b/java/com/google/turbine/binder/lookup/StringCache.java
new file mode 100644
index 00000000..95ed6d83
--- /dev/null
+++ b/java/com/google/turbine/binder/lookup/StringCache.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2024 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.turbine.binder.lookup;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.common.collect.Maps;
+import java.util.HashMap;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * A cache for canonicalizing strings and string-like data.
+ *
+ *
This class is intended to reduce GC overhead in code where lots of duplicate strings might be
+ * allocated. As such, the internals are optimized not make allocations while searching for cached
+ * string instances.
+ *
+ *
Searches can be made with a variety of keys, without materializing the actual string they
+ * represent. Materialization only happens if the search fails.
+ */
+public final class StringCache {
+
+ /**
+ * A map from strings to themselves.
+ *
+ *
The key-type is {@link Object} so that {@link SubstringKey} can be used to search the map.
+ * Otherwise we could use a {@link Set}.
+ *
+ *
This approach exploits the (documented!) fact that {@link HashMap#get} only ever calls
+ * {@link #equals} on the key parameter, never the stored keys. This allows us to inject our own
+ * definition of equality, without needing to wrap the keys at rest.
+ */
+ private final HashMap