diff --git a/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java b/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java index 48ccfb75..3ae701a1 100644 --- a/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java +++ b/java/com/google/turbine/binder/lookup/SimpleTopLevelIndex.java @@ -78,6 +78,10 @@ public static class Node { /** A builder for {@link TopLevelIndex}es. */ public static class Builder { + // If there are a lot of strings, we'll skip the first few map sizes. If not, 1K of memory + // isn't significant. + private final StringCache stringCache = new StringCache(1024); + public TopLevelIndex build() { // Freeze the index. The immutability of nodes is enforced by making insert private, doing // a deep copy here isn't necessary. @@ -94,7 +98,7 @@ public void insert(ClassSymbol sym) { int end = binaryName.indexOf('/'); Node curr = root; while (end != -1) { - String simpleName = binaryName.substring(start, end); + String simpleName = stringCache.getSubstring(binaryName, start, end); curr = curr.insert(simpleName, null); // If we've already inserted something with the current name (either a package or another // symbol), bail out. When inserting elements from the classpath, this results in the @@ -105,6 +109,7 @@ public void insert(ClassSymbol sym) { start = end + 1; end = binaryName.indexOf('/', start); } + // Classname strings are probably unique so not worth caching. String simpleName = binaryName.substring(start); curr = curr.insert(simpleName, sym); if (curr == null || !Objects.equals(curr.sym, sym)) { diff --git a/java/com/google/turbine/binder/lookup/StringCache.java b/java/com/google/turbine/binder/lookup/StringCache.java new file mode 100644 index 00000000..95ed6d83 --- /dev/null +++ b/java/com/google/turbine/binder/lookup/StringCache.java @@ -0,0 +1,109 @@ +/* + * Copyright 2024 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.turbine.binder.lookup; + +import static com.google.common.base.Preconditions.checkArgument; + +import com.google.common.collect.Maps; +import java.util.HashMap; +import org.jspecify.annotations.Nullable; + +/** + * A cache for canonicalizing strings and string-like data. + * + *

This class is intended to reduce GC overhead in code where lots of duplicate strings might be + * allocated. As such, the internals are optimized not make allocations while searching for cached + * string instances. + * + *

Searches can be made with a variety of keys, without materializing the actual string they + * represent. Materialization only happens if the search fails. + */ +public final class StringCache { + + /** + * A map from strings to themselves. + * + *

The key-type is {@link Object} so that {@link SubstringKey} can be used to search the map. + * Otherwise we could use a {@link Set}. + * + *

This approach exploits the (documented!) fact that {@link HashMap#get} only ever calls + * {@link #equals} on the key parameter, never the stored keys. This allows us to inject our own + * definition of equality, without needing to wrap the keys at rest. + */ + private final HashMap cache; + + private final SubstringKey substringKey = new SubstringKey(); + + public StringCache(int expectedSize) { + this.cache = Maps.newHashMapWithExpectedSize(expectedSize); + } + + public String get(String str) { + String result = cache.putIfAbsent(str, str); + return (result == null) ? str : result; + } + + public String getSubstring(String superstring, int start, int end) { + checkArgument(0 <= start && start <= end && end <= superstring.length()); + + this.substringKey.fill(superstring, start, end); + String result = cache.get(this.substringKey); + if (result == null) { + result = superstring.substring(start, end); + cache.put(result, result); + } + return result; + } + + /** + * A key based on a substring view. + * + *

There is only one instance of SubstringKey per cache. This is possible because it's only + * ever used for searches, never to store values. This reuse prevents a lot of garbage generation. + */ + private static final class SubstringKey { + String superstring; + int start; + int end; + int length; + + public void fill(String superstring, int start, int end) { + this.superstring = superstring; + this.start = start; + this.end = end; + this.length = end - start; + } + + @Override + @SuppressWarnings({"EqualsBrokenForNull", "EqualsUnsafeCast", "dereference"}) + public boolean equals(@Nullable Object that) { + String thatString = (String) that; + return (thatString.length() == this.length) + && thatString.regionMatches(0, this.superstring, this.start, this.length); + } + + @Override + public int hashCode() { + // This implementation must exactly match the documented behavior of String.hashCode(). + int result = 0; + for (int i = this.start; i < this.end; i++) { + result = 31 * result + this.superstring.charAt(i); + } + return result; + } + } +} diff --git a/javatests/com/google/turbine/binder/lookup/StringCacheTest.java b/javatests/com/google/turbine/binder/lookup/StringCacheTest.java new file mode 100644 index 00000000..82d90d9e --- /dev/null +++ b/javatests/com/google/turbine/binder/lookup/StringCacheTest.java @@ -0,0 +1,128 @@ +/* + * Copyright 2024 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.turbine.binder.lookup; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public final class StringCacheTest { + + private final StringCache cache = new StringCache(16); + + @Test + public void get_string_canonicalizes() { + String foo0 = unique("foo"); + String foo1 = unique("foo"); + String bar0 = unique("bar"); + + String cacheFoo0 = cache.get(foo0); + String cacheFoo1 = cache.get(foo1); + String cacheBar0 = cache.get(bar0); + + assertThat(cacheFoo0).isSameInstanceAs(foo0); + assertThat(cacheFoo1).isSameInstanceAs(foo0); + assertThat(cacheBar0).isSameInstanceAs(bar0); + } + + @Test + public void getSubstring_string_checksBounds() { + String length10 = "0123456789"; + + assertThrows(Exception.class, () -> cache.getSubstring(length10, -1, 0)); + assertThrows(Exception.class, () -> cache.getSubstring(length10, -2, -1)); + assertThrows(Exception.class, () -> cache.getSubstring(length10, 0, 11)); + assertThrows(Exception.class, () -> cache.getSubstring(length10, 11, 12)); + assertThrows(Exception.class, () -> cache.getSubstring(length10, 6, 5)); + assertThat(cache.getSubstring(length10, 5, 6)).isNotNull(); + assertThat(cache.getSubstring(length10, 0, 0)).isNotNull(); + assertThat(cache.getSubstring(length10, 10, 10)).isNotNull(); + assertThat(cache.getSubstring(length10, 0, 10)).isNotNull(); + } + + @Test + public void getSubstring_string_canonicalizes() { + String foobarfoobar0 = unique("foobarfoobar"); + + String cacheFoo0 = cache.getSubstring(foobarfoobar0, 0, 3); + String cacheBar0 = cache.getSubstring(foobarfoobar0, 3, 6); + String cacheFoo1 = cache.getSubstring(foobarfoobar0, 6, 9); + String cacheBar1 = cache.getSubstring(foobarfoobar0, 9, 12); + + assertThat(cacheFoo0).isEqualTo("foo"); + assertThat(cacheFoo0).isSameInstanceAs(cacheFoo1); + assertThat(cacheBar0).isEqualTo("bar"); + assertThat(cacheBar0).isSameInstanceAs(cacheBar1); + } + + @Test + public void crossCanonicalization() { + String foo0 = unique("foo"); + String foofoo0 = unique("foofoo"); + + String cacheFoo0 = cache.get(foo0); + String cacheFoo1 = cache.getSubstring(foofoo0, 0, 3); + String cacheFoo2 = cache.getSubstring(foofoo0, 3, 6); + + assertThat(cacheFoo0).isSameInstanceAs(foo0); + assertThat(cacheFoo0).isSameInstanceAs(cacheFoo1); + assertThat(cacheFoo0).isSameInstanceAs(cacheFoo2); + } + + @Test + public void hashCollision() { + String nulnulnul0 = unique("\0\0\0"); + + String cacheEpsilon0 = cache.getSubstring(nulnulnul0, 0, 0); + String cacheEpsilon1 = cache.getSubstring(nulnulnul0, 1, 1); + String cacheEpsilon2 = cache.getSubstring(nulnulnul0, 2, 2); + String cacheEpsilon3 = cache.getSubstring(nulnulnul0, 3, 3); + String cacheNul0 = cache.getSubstring(nulnulnul0, 0, 1); + String cacheNul1 = cache.getSubstring(nulnulnul0, 1, 2); + String cacheNul2 = cache.getSubstring(nulnulnul0, 2, 3); + String cacheNulnul0 = cache.getSubstring(nulnulnul0, 0, 2); + String cacheNulnul1 = cache.getSubstring(nulnulnul0, 1, 3); + String cacheNulnulnul0 = cache.get(nulnulnul0); + + assertThat(cacheEpsilon0).isEqualTo(""); + assertThat(cacheEpsilon0.hashCode()).isEqualTo(0); + assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon1); + assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon2); + assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon3); + + assertThat(cacheNul0).isEqualTo("\0"); + assertThat(cacheNul0.hashCode()).isEqualTo(0); + assertThat(cacheNul0).isSameInstanceAs(cacheNul1); + assertThat(cacheNul0).isSameInstanceAs(cacheNul2); + + assertThat(cacheNulnul0).isEqualTo("\0\0"); + assertThat(cacheNulnul0.hashCode()).isEqualTo(0); + assertThat(cacheNulnul0).isSameInstanceAs(cacheNulnul1); + + assertThat(cacheNulnulnul0.hashCode()).isEqualTo(0); + assertThat(cacheNulnulnul0).isSameInstanceAs(nulnulnul0); + } + + @SuppressWarnings("StringCopy") // String literals are already canonicalized per class + private static String unique(String s) { + return new String(s); + } +}