Skip to content

Commit

Permalink
Optimize SimpleTopLevelIndex by using a StringCache to canonicalize s…
Browse files Browse the repository at this point in the history
…trings

The idea is to prevent allocation lots of duplicate strings, by exploiting the fact that most class files in a JAR have a lot of common path segments.

PiperOrigin-RevId: 691260507
  • Loading branch information
nreid260 authored and Javac Team committed Oct 30, 2024
1 parent b7b5536 commit 3fcb10a
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public static class Node {
/** A builder for {@link TopLevelIndex}es. */
public static class Builder {

// If there are a lot of strings, we'll skip the first few map sizes. If not, 1K of memory
// isn't significant.
private final StringCache stringCache = new StringCache(1024);

public TopLevelIndex build() {
// Freeze the index. The immutability of nodes is enforced by making insert private, doing
// a deep copy here isn't necessary.
Expand All @@ -94,7 +98,7 @@ public void insert(ClassSymbol sym) {
int end = binaryName.indexOf('/');
Node curr = root;
while (end != -1) {
String simpleName = binaryName.substring(start, end);
String simpleName = stringCache.getSubstring(binaryName, start, end);
curr = curr.insert(simpleName, null);
// If we've already inserted something with the current name (either a package or another
// symbol), bail out. When inserting elements from the classpath, this results in the
Expand All @@ -105,6 +109,7 @@ public void insert(ClassSymbol sym) {
start = end + 1;
end = binaryName.indexOf('/', start);
}
// Classname strings are probably unique so not worth caching.
String simpleName = binaryName.substring(start);
curr = curr.insert(simpleName, sym);
if (curr == null || !Objects.equals(curr.sym, sym)) {
Expand Down
109 changes: 109 additions & 0 deletions java/com/google/turbine/binder/lookup/StringCache.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright 2024 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.turbine.binder.lookup;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.collect.Maps;
import java.util.HashMap;
import org.jspecify.annotations.Nullable;

/**
* A cache for canonicalizing strings and string-like data.
*
* <p>This class is intended to reduce GC overhead in code where lots of duplicate strings might be
* allocated. As such, the internals are optimized not make allocations while searching for cached
* string instances.
*
* <p>Searches can be made with a variety of keys, without materializing the actual string they
* represent. Materialization only happens if the search fails.
*/
public final class StringCache {

/**
* A map from strings to themselves.
*
* <p>The key-type is {@link Object} so that {@link SubstringKey} can be used to search the map.
* Otherwise we could use a {@link Set}.
*
* <p>This approach exploits the (documented!) fact that {@link HashMap#get} only ever calls
* {@link #equals} on the key parameter, never the stored keys. This allows us to inject our own
* definition of equality, without needing to wrap the keys at rest.
*/
private final HashMap<Object, String> cache;

private final SubstringKey substringKey = new SubstringKey();

public StringCache(int expectedSize) {
this.cache = Maps.newHashMapWithExpectedSize(expectedSize);
}

public String get(String str) {
String result = cache.putIfAbsent(str, str);
return (result == null) ? str : result;
}

public String getSubstring(String superstring, int start, int end) {
checkArgument(0 <= start && start <= end && end <= superstring.length());

this.substringKey.fill(superstring, start, end);
String result = cache.get(this.substringKey);
if (result == null) {
result = superstring.substring(start, end);
cache.put(result, result);
}
return result;
}

/**
* A key based on a substring view.
*
* <p>There is only one instance of SubstringKey per cache. This is possible because it's only
* ever used for searches, never to store values. This reuse prevents a lot of garbage generation.
*/
private static final class SubstringKey {
String superstring;
int start;
int end;
int length;

public void fill(String superstring, int start, int end) {
this.superstring = superstring;
this.start = start;
this.end = end;
this.length = end - start;
}

@Override
@SuppressWarnings({"EqualsBrokenForNull", "EqualsUnsafeCast", "dereference"})
public boolean equals(@Nullable Object that) {
String thatString = (String) that;
return (thatString.length() == this.length)
&& thatString.regionMatches(0, this.superstring, this.start, this.length);
}

@Override
public int hashCode() {
// This implementation must exactly match the documented behavior of String.hashCode().
int result = 0;
for (int i = this.start; i < this.end; i++) {
result = 31 * result + this.superstring.charAt(i);
}
return result;
}
}
}
128 changes: 128 additions & 0 deletions javatests/com/google/turbine/binder/lookup/StringCacheTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Copyright 2024 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.turbine.binder.lookup;

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public final class StringCacheTest {

private final StringCache cache = new StringCache(16);

@Test
public void get_string_canonicalizes() {
String foo0 = unique("foo");
String foo1 = unique("foo");
String bar0 = unique("bar");

String cacheFoo0 = cache.get(foo0);
String cacheFoo1 = cache.get(foo1);
String cacheBar0 = cache.get(bar0);

assertThat(cacheFoo0).isSameInstanceAs(foo0);
assertThat(cacheFoo1).isSameInstanceAs(foo0);
assertThat(cacheBar0).isSameInstanceAs(bar0);
}

@Test
public void getSubstring_string_checksBounds() {
String length10 = "0123456789";

assertThrows(Exception.class, () -> cache.getSubstring(length10, -1, 0));
assertThrows(Exception.class, () -> cache.getSubstring(length10, -2, -1));
assertThrows(Exception.class, () -> cache.getSubstring(length10, 0, 11));
assertThrows(Exception.class, () -> cache.getSubstring(length10, 11, 12));
assertThrows(Exception.class, () -> cache.getSubstring(length10, 6, 5));
assertThat(cache.getSubstring(length10, 5, 6)).isNotNull();
assertThat(cache.getSubstring(length10, 0, 0)).isNotNull();
assertThat(cache.getSubstring(length10, 10, 10)).isNotNull();
assertThat(cache.getSubstring(length10, 0, 10)).isNotNull();
}

@Test
public void getSubstring_string_canonicalizes() {
String foobarfoobar0 = unique("foobarfoobar");

String cacheFoo0 = cache.getSubstring(foobarfoobar0, 0, 3);
String cacheBar0 = cache.getSubstring(foobarfoobar0, 3, 6);
String cacheFoo1 = cache.getSubstring(foobarfoobar0, 6, 9);
String cacheBar1 = cache.getSubstring(foobarfoobar0, 9, 12);

assertThat(cacheFoo0).isEqualTo("foo");
assertThat(cacheFoo0).isSameInstanceAs(cacheFoo1);
assertThat(cacheBar0).isEqualTo("bar");
assertThat(cacheBar0).isSameInstanceAs(cacheBar1);
}

@Test
public void crossCanonicalization() {
String foo0 = unique("foo");
String foofoo0 = unique("foofoo");

String cacheFoo0 = cache.get(foo0);
String cacheFoo1 = cache.getSubstring(foofoo0, 0, 3);
String cacheFoo2 = cache.getSubstring(foofoo0, 3, 6);

assertThat(cacheFoo0).isSameInstanceAs(foo0);
assertThat(cacheFoo0).isSameInstanceAs(cacheFoo1);
assertThat(cacheFoo0).isSameInstanceAs(cacheFoo2);
}

@Test
public void hashCollision() {
String nulnulnul0 = unique("\0\0\0");

String cacheEpsilon0 = cache.getSubstring(nulnulnul0, 0, 0);
String cacheEpsilon1 = cache.getSubstring(nulnulnul0, 1, 1);
String cacheEpsilon2 = cache.getSubstring(nulnulnul0, 2, 2);
String cacheEpsilon3 = cache.getSubstring(nulnulnul0, 3, 3);
String cacheNul0 = cache.getSubstring(nulnulnul0, 0, 1);
String cacheNul1 = cache.getSubstring(nulnulnul0, 1, 2);
String cacheNul2 = cache.getSubstring(nulnulnul0, 2, 3);
String cacheNulnul0 = cache.getSubstring(nulnulnul0, 0, 2);
String cacheNulnul1 = cache.getSubstring(nulnulnul0, 1, 3);
String cacheNulnulnul0 = cache.get(nulnulnul0);

assertThat(cacheEpsilon0).isEqualTo("");
assertThat(cacheEpsilon0.hashCode()).isEqualTo(0);
assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon1);
assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon2);
assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon3);

assertThat(cacheNul0).isEqualTo("\0");
assertThat(cacheNul0.hashCode()).isEqualTo(0);
assertThat(cacheNul0).isSameInstanceAs(cacheNul1);
assertThat(cacheNul0).isSameInstanceAs(cacheNul2);

assertThat(cacheNulnul0).isEqualTo("\0\0");
assertThat(cacheNulnul0.hashCode()).isEqualTo(0);
assertThat(cacheNulnul0).isSameInstanceAs(cacheNulnul1);

assertThat(cacheNulnulnul0.hashCode()).isEqualTo(0);
assertThat(cacheNulnulnul0).isSameInstanceAs(nulnulnul0);
}

@SuppressWarnings("StringCopy") // String literals are already canonicalized per class
private static String unique(String s) {
return new String(s);
}
}

0 comments on commit 3fcb10a

Please sign in to comment.