Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize SimpleTopLevelIndex by using a StringCache to canonicalize strings #353

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public static class Node {
/** A builder for {@link TopLevelIndex}es. */
public static class Builder {

// If there are a lot of strings, we'll skip the first few map sizes. If not, 1K of memory
// isn't significant.
private final StringCache stringCache = new StringCache(1024);

public TopLevelIndex build() {
// Freeze the index. The immutability of nodes is enforced by making insert private, doing
// a deep copy here isn't necessary.
Expand All @@ -94,7 +98,7 @@ public void insert(ClassSymbol sym) {
int end = binaryName.indexOf('/');
Node curr = root;
while (end != -1) {
String simpleName = binaryName.substring(start, end);
String simpleName = stringCache.getSubstring(binaryName, start, end);
curr = curr.insert(simpleName, null);
// If we've already inserted something with the current name (either a package or another
// symbol), bail out. When inserting elements from the classpath, this results in the
Expand All @@ -105,6 +109,7 @@ public void insert(ClassSymbol sym) {
start = end + 1;
end = binaryName.indexOf('/', start);
}
// Classname strings are probably unique so not worth caching.
String simpleName = binaryName.substring(start);
curr = curr.insert(simpleName, sym);
if (curr == null || !Objects.equals(curr.sym, sym)) {
Expand Down
109 changes: 109 additions & 0 deletions java/com/google/turbine/binder/lookup/StringCache.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright 2024 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.turbine.binder.lookup;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.collect.Maps;
import java.util.HashMap;
import org.jspecify.annotations.Nullable;

/**
* A cache for canonicalizing strings and string-like data.
*
* <p>This class is intended to reduce GC overhead in code where lots of duplicate strings might be
* allocated. As such, the internals are optimized not make allocations while searching for cached
* string instances.
*
* <p>Searches can be made with a variety of keys, without materializing the actual string they
* represent. Materialization only happens if the search fails.
*/
public final class StringCache {

/**
* A map from strings to themselves.
*
* <p>The key-type is {@link Object} so that {@link SubstringKey} can be used to search the map.
* Otherwise we could use a {@link Set}.
*
* <p>This approach exploits the (documented!) fact that {@link HashMap#get} only ever calls
* {@link #equals} on the key parameter, never the stored keys. This allows us to inject our own
* definition of equality, without needing to wrap the keys at rest.
*/
private final HashMap<Object, String> cache;

private final SubstringKey substringKey = new SubstringKey();

public StringCache(int expectedSize) {
this.cache = Maps.newHashMapWithExpectedSize(expectedSize);
}

public String get(String str) {
String result = cache.putIfAbsent(str, str);
return (result == null) ? str : result;
}

public String getSubstring(String superstring, int start, int end) {
checkArgument(0 <= start && start <= end && end <= superstring.length());

this.substringKey.fill(superstring, start, end);
String result = cache.get(this.substringKey);
if (result == null) {
result = superstring.substring(start, end);
cache.put(result, result);
}
return result;
}

/**
* A key based on a substring view.
*
* <p>There is only one instance of SubstringKey per cache. This is possible because it's only
* ever used for searches, never to store values. This reuse prevents a lot of garbage generation.
*/
private static final class SubstringKey {
String superstring;
int start;
int end;
int length;

public void fill(String superstring, int start, int end) {
this.superstring = superstring;
this.start = start;
this.end = end;
this.length = end - start;
}

@Override
@SuppressWarnings({"EqualsBrokenForNull", "EqualsUnsafeCast", "dereference"})
public boolean equals(@Nullable Object that) {
String thatString = (String) that;
return (thatString.length() == this.length)
&& thatString.regionMatches(0, this.superstring, this.start, this.length);
}

@Override
public int hashCode() {
// This implementation must exactly match the documented behavior of String.hashCode().
int result = 0;
for (int i = this.start; i < this.end; i++) {
result = 31 * result + this.superstring.charAt(i);
}
return result;
}
}
}
128 changes: 128 additions & 0 deletions javatests/com/google/turbine/binder/lookup/StringCacheTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Copyright 2024 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.turbine.binder.lookup;

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public final class StringCacheTest {

private final StringCache cache = new StringCache(16);

@Test
public void get_string_canonicalizes() {
String foo0 = unique("foo");
String foo1 = unique("foo");
String bar0 = unique("bar");

String cacheFoo0 = cache.get(foo0);
String cacheFoo1 = cache.get(foo1);
String cacheBar0 = cache.get(bar0);

assertThat(cacheFoo0).isSameInstanceAs(foo0);
assertThat(cacheFoo1).isSameInstanceAs(foo0);
assertThat(cacheBar0).isSameInstanceAs(bar0);
}

@Test
public void getSubstring_string_checksBounds() {
String length10 = "0123456789";

assertThrows(Exception.class, () -> cache.getSubstring(length10, -1, 0));
assertThrows(Exception.class, () -> cache.getSubstring(length10, -2, -1));
assertThrows(Exception.class, () -> cache.getSubstring(length10, 0, 11));
assertThrows(Exception.class, () -> cache.getSubstring(length10, 11, 12));
assertThrows(Exception.class, () -> cache.getSubstring(length10, 6, 5));
assertThat(cache.getSubstring(length10, 5, 6)).isNotNull();
assertThat(cache.getSubstring(length10, 0, 0)).isNotNull();
assertThat(cache.getSubstring(length10, 10, 10)).isNotNull();
assertThat(cache.getSubstring(length10, 0, 10)).isNotNull();
}

@Test
public void getSubstring_string_canonicalizes() {
String foobarfoobar0 = unique("foobarfoobar");

String cacheFoo0 = cache.getSubstring(foobarfoobar0, 0, 3);
String cacheBar0 = cache.getSubstring(foobarfoobar0, 3, 6);
String cacheFoo1 = cache.getSubstring(foobarfoobar0, 6, 9);
String cacheBar1 = cache.getSubstring(foobarfoobar0, 9, 12);

assertThat(cacheFoo0).isEqualTo("foo");
assertThat(cacheFoo0).isSameInstanceAs(cacheFoo1);
assertThat(cacheBar0).isEqualTo("bar");
assertThat(cacheBar0).isSameInstanceAs(cacheBar1);
}

@Test
public void crossCanonicalization() {
String foo0 = unique("foo");
String foofoo0 = unique("foofoo");

String cacheFoo0 = cache.get(foo0);
String cacheFoo1 = cache.getSubstring(foofoo0, 0, 3);
String cacheFoo2 = cache.getSubstring(foofoo0, 3, 6);

assertThat(cacheFoo0).isSameInstanceAs(foo0);
assertThat(cacheFoo0).isSameInstanceAs(cacheFoo1);
assertThat(cacheFoo0).isSameInstanceAs(cacheFoo2);
}

@Test
public void hashCollision() {
String nulnulnul0 = unique("\0\0\0");

String cacheEpsilon0 = cache.getSubstring(nulnulnul0, 0, 0);
String cacheEpsilon1 = cache.getSubstring(nulnulnul0, 1, 1);
String cacheEpsilon2 = cache.getSubstring(nulnulnul0, 2, 2);
String cacheEpsilon3 = cache.getSubstring(nulnulnul0, 3, 3);
String cacheNul0 = cache.getSubstring(nulnulnul0, 0, 1);
String cacheNul1 = cache.getSubstring(nulnulnul0, 1, 2);
String cacheNul2 = cache.getSubstring(nulnulnul0, 2, 3);
String cacheNulnul0 = cache.getSubstring(nulnulnul0, 0, 2);
String cacheNulnul1 = cache.getSubstring(nulnulnul0, 1, 3);
String cacheNulnulnul0 = cache.get(nulnulnul0);

assertThat(cacheEpsilon0).isEqualTo("");
assertThat(cacheEpsilon0.hashCode()).isEqualTo(0);
assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon1);
assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon2);
assertThat(cacheEpsilon0).isSameInstanceAs(cacheEpsilon3);

assertThat(cacheNul0).isEqualTo("\0");
assertThat(cacheNul0.hashCode()).isEqualTo(0);
assertThat(cacheNul0).isSameInstanceAs(cacheNul1);
assertThat(cacheNul0).isSameInstanceAs(cacheNul2);

assertThat(cacheNulnul0).isEqualTo("\0\0");
assertThat(cacheNulnul0.hashCode()).isEqualTo(0);
assertThat(cacheNulnul0).isSameInstanceAs(cacheNulnul1);

assertThat(cacheNulnulnul0.hashCode()).isEqualTo(0);
assertThat(cacheNulnulnul0).isSameInstanceAs(nulnulnul0);
}

@SuppressWarnings("StringCopy") // String literals are already canonicalized per class
private static String unique(String s) {
return new String(s);
}
}
Loading