diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 72635b62b1aacd..ecc916563a2a21 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -29,6 +29,7 @@ filegroup( "//src/main/java/com/google/devtools/build/lib/bazel/debug:srcs", "//src/main/java/com/google/devtools/build/lib/clock:srcs", "//src/main/java/com/google/devtools/build/lib/cmdline:srcs", + "//src/main/java/com/google/devtools/build/lib/collect/compacthashmap:srcs", "//src/main/java/com/google/devtools/build/lib/collect/compacthashset:srcs", "//src/main/java/com/google/devtools/build/lib/collect/nestedset:srcs", "//src/main/java/com/google/devtools/build/lib/collect:srcs", diff --git a/src/main/java/com/google/devtools/build/lib/collect/compacthashmap/BUILD b/src/main/java/com/google/devtools/build/lib/collect/compacthashmap/BUILD new file mode 100644 index 00000000000000..a2aca07a4a51e4 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/collect/compacthashmap/BUILD @@ -0,0 +1,19 @@ +package(default_visibility = ["//src:__subpackages__"]) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//src/main/java/com/google/devtools/build/lib:__pkg__"], +) + +# Library of collection utilities. +java_library( + name = "compacthashmap", + srcs = glob([ + "*.java", + ]), + deps = [ + "//third_party:guava", + "//third_party:jsr305", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/collect/compacthashmap/CompactHashMap.java b/src/main/java/com/google/devtools/build/lib/collect/compacthashmap/CompactHashMap.java new file mode 100644 index 00000000000000..9cfbf58c6fee4d --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/collect/compacthashmap/CompactHashMap.java @@ -0,0 +1,909 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * Copyright (C) 2012 The Guava Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.devtools.build.lib.collect.compacthashmap; + +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Ints; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import java.io.IOException; +import java.io.InvalidObjectException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.AbstractCollection; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Arrays; +import java.util.Collection; +import java.util.ConcurrentModificationException; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import org.checkerframework.checker.nullness.qual.MonotonicNonNull; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** + * CompactHashMap is an implementation of a Map. All optional operations (put and remove) are + * supported. Null keys and values are supported. + * + *
{@code containsKey(k)}, {@code put(k, v)} and {@code remove(k)} are all (expected and + * amortized) constant time operations. Expected in the hashtable sense (depends on the hash + * function doing a good job of distributing the elements to the buckets to a distribution not far + * from uniform), and amortized since some operations can trigger a hash table resize. + * + *
Unlike {@code java.util.HashMap}, iteration is only proportional to the actual {@code size()}, + * which is optimal, and not the size of the internal hashtable, which could be much larger + * than {@code size()}. Furthermore, this structure places significantly reduced load on the garbage + * collector by only using a constant number of internal objects. + * + *
If there are no removals, then iteration order for the {@link #entrySet}, {@link #keySet}, and + * {@link #values} views is the same as insertion order. Any removal invalidates any ordering + * guarantees. + * + *
This class should not be assumed to be universally superior to {@code java.util.HashMap}.
+ * Generally speaking, this class reduces object allocation and memory consumption at the price of
+ * moderately increased constant factors of CPU. Only use this class when there is a specific reason
+ * to prioritize memory over CPU.
+ *
+ * @author Louis Wasserman
+ */
+public class CompactHashMap Currently, the UNSET value means "null pointer", and any non negative value x is the actual
+ * index.
+ *
+ * Its size must be a power of two.
+ */
+ private transient int @MonotonicNonNull [] table;
+
+ /**
+ * Contains the logical entries, in the range of [0, size()). The high 32 bits of each long is the
+ * smeared hash of the element, whereas the low 32 bits is the "next" pointer (pointing to the
+ * next entry in the bucket chain). The pointers in [size(), entries.length) are all "null"
+ * (UNSET).
+ */
+ @VisibleForTesting transient long @MonotonicNonNull [] entries;
+
+ /**
+ * The keys of the entries in the map, in the range of [0, size()). The keys in [size(),
+ * keys.length) are all {@code null}.
+ */
+ @VisibleForTesting transient Object @MonotonicNonNull [] keys;
+
+ /**
+ * The values of the entries in the map, in the range of [0, size()). The values in [size(),
+ * values.length) are all {@code null}.
+ */
+ @VisibleForTesting transient Object @MonotonicNonNull [] values;
+
+ /**
+ * Keeps track of modifications of this set, to make it possible to throw
+ * ConcurrentModificationException in the iterator. Note that we choose not to make this volatile,
+ * so we do less of a "best effort" to track such errors, for better performance.
+ */
+ transient int modCount;
+
+ /** The number of elements contained in the set. */
+ private transient int size;
+
+ /** Constructs a new empty instance of {@code CompactHashMap}. */
+ CompactHashMap() {
+ init(DEFAULT_SIZE);
+ }
+
+ /**
+ * Constructs a new instance of {@code CompactHashMap} with the specified capacity.
+ *
+ * @param expectedSize the initial capacity of this {@code CompactHashMap}.
+ */
+ CompactHashMap(int expectedSize) {
+ init(expectedSize);
+ }
+
+ /** Pseudoconstructor for serialization support. */
+ void init(int expectedSize) {
+ Preconditions.checkArgument(expectedSize >= 0, "Expected size must be non-negative");
+ this.modCount = Math.max(1, expectedSize); // Save expectedSize for use in allocArrays()
+ }
+
+ /** Returns whether arrays need to be allocated. */
+ boolean needsAllocArrays() {
+ return table == null;
+ }
+
+ /** Handle lazy allocation of arrays. */
+ void allocArrays() {
+ checkState(needsAllocArrays(), "Arrays already allocated");
+
+ int expectedSize = modCount;
+ int buckets = closedTableSize(expectedSize, LOAD_FACTOR);
+ this.table = newTable(buckets);
+
+ this.entries = newEntries(expectedSize);
+ this.keys = new Object[expectedSize];
+ this.values = new Object[expectedSize];
+ }
+
+ private static int[] newTable(int size) {
+ int[] array = new int[size];
+ Arrays.fill(array, UNSET);
+ return array;
+ }
+
+ private static long[] newEntries(int size) {
+ long[] array = new long[size];
+ Arrays.fill(array, UNSET);
+ return array;
+ }
+
+ private int hashTableMask() {
+ return table.length - 1;
+ }
+
+ private static int getHash(long entry) {
+ return (int) (entry >>> 32);
+ }
+
+ /** Returns the index, or UNSET if the pointer is "null" */
+ private static int getNext(long entry) {
+ return (int) entry;
+ }
+
+ /** Returns a new entry value by changing the "next" index of an existing entry */
+ private static long swapNext(long entry, int newNext) {
+ return (HASH_MASK & entry) | (NEXT_MASK & newNext);
+ }
+
+ /**
+ * Mark an access of the specified entry. Used only in {@code CompactLinkedHashMap} for LRU
+ * ordering.
+ */
+ void accessEntry(int index) {
+ // no-op by default
+ }
+
+ @CanIgnoreReturnValue
+ @Override
+ public @Nullable V put(@Nullable K key, @Nullable V value) {
+ if (needsAllocArrays()) {
+ allocArrays();
+ }
+ long[] entries = this.entries;
+ Object[] keys = this.keys;
+ Object[] values = this.values;
+
+ int hash = smearedHash(key);
+ int tableIndex = hash & hashTableMask();
+ int newEntryIndex = this.size; // current size, and pointer to the entry to be appended
+ int next = table[tableIndex];
+ if (next == UNSET) { // uninitialized bucket
+ table[tableIndex] = newEntryIndex;
+ } else {
+ int last;
+ long entry;
+ do {
+ last = next;
+ entry = entries[next];
+ if (getHash(entry) == hash && Objects.equal(key, keys[next])) {
+ @SuppressWarnings("unchecked") // known to be a V
+ @Nullable
+ V oldValue = (V) values[next];
+
+ values[next] = value;
+ accessEntry(next);
+ return oldValue;
+ }
+ next = getNext(entry);
+ } while (next != UNSET);
+ entries[last] = swapNext(entry, newEntryIndex);
+ }
+ if (newEntryIndex == Integer.MAX_VALUE) {
+ throw new IllegalStateException("Cannot contain more than Integer.MAX_VALUE elements!");
+ }
+ int newSize = newEntryIndex + 1;
+ resizeMeMaybe(newSize);
+ insertEntry(newEntryIndex, key, value, hash);
+ this.size = newSize;
+ int oldCapacity = table.length;
+ if (needsResizing(newEntryIndex, oldCapacity, LOAD_FACTOR)) {
+ resizeTable(2 * oldCapacity);
+ }
+ modCount++;
+ return null;
+ }
+
+ /**
+ * Creates a fresh entry with the specified object at the specified position in the entry arrays.
+ */
+ void insertEntry(int entryIndex, @Nullable K key, @Nullable V value, int hash) {
+ this.entries[entryIndex] = ((long) hash << 32) | (NEXT_MASK & UNSET);
+ this.keys[entryIndex] = key;
+ this.values[entryIndex] = value;
+ }
+
+ /** Resizes the entries storage if necessary. */
+ private void resizeMeMaybe(int newSize) {
+ int entriesSize = entries.length;
+ if (newSize > entriesSize) {
+ int newCapacity = entriesSize + Math.max(1, entriesSize >>> 1);
+ if (newCapacity < 0) {
+ newCapacity = Integer.MAX_VALUE;
+ }
+ if (newCapacity != entriesSize) {
+ resizeEntries(newCapacity);
+ }
+ }
+ }
+
+ /**
+ * Resizes the internal entries array to the specified capacity, which may be greater or less than
+ * the current capacity.
+ */
+ void resizeEntries(int newCapacity) {
+ this.keys = Arrays.copyOf(keys, newCapacity);
+ this.values = Arrays.copyOf(values, newCapacity);
+ long[] entries = this.entries;
+ int oldCapacity = entries.length;
+ entries = Arrays.copyOf(entries, newCapacity);
+ if (newCapacity > oldCapacity) {
+ Arrays.fill(entries, oldCapacity, newCapacity, UNSET);
+ }
+ this.entries = entries;
+ }
+
+ private void resizeTable(int newCapacity) { // newCapacity always a power of two
+ int[] newTable = newTable(newCapacity);
+ long[] entries = this.entries;
+
+ int mask = newTable.length - 1;
+ for (int i = 0; i < size; i++) {
+ long oldEntry = entries[i];
+ int hash = getHash(oldEntry);
+ int tableIndex = hash & mask;
+ int next = newTable[tableIndex];
+ newTable[tableIndex] = i;
+ entries[i] = ((long) hash << 32) | (NEXT_MASK & next);
+ }
+
+ this.table = newTable;
+ }
+
+ private int indexOf(@Nullable Object key) {
+ if (needsAllocArrays()) {
+ return -1;
+ }
+ int hash = smearedHash(key);
+ int next = table[hash & hashTableMask()];
+ while (next != UNSET) {
+ long entry = entries[next];
+ if (getHash(entry) == hash && Objects.equal(key, keys[next])) {
+ return next;
+ }
+ next = getNext(entry);
+ }
+ return -1;
+ }
+
+ @Override
+ public boolean containsKey(@Nullable Object key) {
+ return indexOf(key) != -1;
+ }
+
+ @SuppressWarnings("unchecked") // values only contains Vs
+ @Override
+ public V get(@Nullable Object key) {
+ int index = indexOf(key);
+ accessEntry(index);
+ return (index == -1) ? null : (V) values[index];
+ }
+
+ @CanIgnoreReturnValue
+ @Override
+ public @Nullable V remove(@Nullable Object key) {
+ if (needsAllocArrays()) {
+ return null;
+ }
+ return remove(key, smearedHash(key));
+ }
+
+ private @Nullable V remove(@Nullable Object key, int hash) {
+ int tableIndex = hash & hashTableMask();
+ int next = table[tableIndex];
+ if (next == UNSET) { // empty bucket
+ return null;
+ }
+ int last = UNSET;
+ do {
+ if (getHash(entries[next]) == hash && Objects.equal(key, keys[next])) {
+ @SuppressWarnings("unchecked") // values only contains Vs
+ @Nullable
+ V oldValue = (V) values[next];
+
+ if (last == UNSET) {
+ // we need to update the root link from table[]
+ table[tableIndex] = getNext(entries[next]);
+ } else {
+ // we need to update the link from the chain
+ entries[last] = swapNext(entries[last], getNext(entries[next]));
+ }
+
+ moveLastEntry(next);
+ size--;
+ modCount++;
+ return oldValue;
+ }
+ last = next;
+ next = getNext(entries[next]);
+ } while (next != UNSET);
+ return null;
+ }
+
+ @CanIgnoreReturnValue
+ private V removeEntry(int entryIndex) {
+ return remove(keys[entryIndex], getHash(entries[entryIndex]));
+ }
+
+ /**
+ * Moves the last entry in the entry array into {@code dstIndex}, and nulls out its old position.
+ */
+ void moveLastEntry(int dstIndex) {
+ int srcIndex = size() - 1;
+ if (dstIndex < srcIndex) {
+ // move last entry to deleted spot
+ keys[dstIndex] = keys[srcIndex];
+ values[dstIndex] = values[srcIndex];
+ keys[srcIndex] = null;
+ values[srcIndex] = null;
+
+ // move the last entry to the removed spot, just like we moved the element
+ long lastEntry = entries[srcIndex];
+ entries[dstIndex] = lastEntry;
+ entries[srcIndex] = UNSET;
+
+ // also need to update whoever's "next" pointer was pointing to the last entry place
+ // reusing "tableIndex" and "next"; these variables were no longer needed
+ int tableIndex = getHash(lastEntry) & hashTableMask();
+ int lastNext = table[tableIndex];
+ if (lastNext == srcIndex) {
+ // we need to update the root pointer
+ table[tableIndex] = dstIndex;
+ } else {
+ // we need to update a pointer in an entry
+ int previous;
+ long entry;
+ do {
+ previous = lastNext;
+ lastNext = getNext(entry = entries[lastNext]);
+ } while (lastNext != srcIndex);
+ // here, entries[previous] points to the old entry location; update it
+ entries[previous] = swapNext(entry, dstIndex);
+ }
+ } else {
+ keys[dstIndex] = null;
+ values[dstIndex] = null;
+ entries[dstIndex] = UNSET;
+ }
+ }
+
+ int firstEntryIndex() {
+ return isEmpty() ? -1 : 0;
+ }
+
+ int getSuccessor(int entryIndex) {
+ return (entryIndex + 1 < size) ? entryIndex + 1 : -1;
+ }
+
+ /**
+ * Updates the index an iterator is pointing to after a call to remove: returns the index of the
+ * entry that should be looked at after a removal on indexRemoved, with indexBeforeRemove as the
+ * index that *was* the next entry that would be looked at.
+ */
+ int adjustAfterRemove(int indexBeforeRemove, @SuppressWarnings("unused") int indexRemoved) {
+ return indexBeforeRemove - 1;
+ }
+
+ private abstract class Itr