From 6288eb5860a75a7cf07333e5cdf7d4a60dec8138 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Wed, 6 Dec 2023 09:04:18 -0800 Subject: [PATCH] Use a larger buffer size for `java.util.zip.*Stream` classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `DeflaterInputStream`, `GZIPInputStream`, `GZIPOutputStream`, and `InflaterInputStream`, all use an internal byte buffer of 512 bytes by default. Whenever the wrapped stream exceeds this size, a full copy to a new buffer will occur, which will increase at increments of the same size. For example, a stream of length 2K will be copied four times. Increasing the size of the buffer we use can result in significant reductions in CPU usage (read: copies). Examples in the repository -------------------------- There are already two places where we increase the default size of these buffers: - `//src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java` - `//src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStream.java` Prior art --------- There is an open enhancement issue in the OpenJDK tracker on this which contains a benchmark for `InflaterOutputStream`: > Increase the default, internal buffer size of the Streams in `java.util.zip` > https://bugs.openjdk.org/browse/JDK-8242864 A similar change was merged in for JDK15+ in 2020: > Improve performance of `InflaterOutputStream.write()` > https://bugs.openjdk.org/browse/JDK-8242848 Providing a simple benchmark ---------------------------- I'm inlining a simple `jmh` benchmark and the results underneath it for one `GzipInputStream` case. The benchmark: ``` @Fork(1) @Threads(1) @Warmup(iterations = 2) @State(Scope.Benchmark) @OutputTimeUnit(TimeUnit.NANOSECONDS) public class GZIPInputStreamBenchmark { @Param({"1024", "3072", "9216"}) long inputLength; @Param({"512", "1024", "4096", "8192"}) int bufferSize; private byte[] content; @Setup(Level.Iteration) public void setup() throws IOException { var baos = new ByteArrayOutputStream(); // No need to set the buffer size on this as it's a one-time cost for setup and not counted in the result. var gzip = new GZIPOutputStream(baos); var inputBytes = generateRandomByteArrayOfLength(inputLength); gzip.write(inputBytes); gzip.finish(); this.content = baos.toByteArray(); } @Benchmark @BenchmarkMode(Mode.AverageTime) public void getGzipInputStream(Blackhole bh) throws IOException { try (var is = new ByteArrayInputStream(this.content); var gzip = new GZIPInputStream(is, bufferSize)) { bh.consume(gzip.readAllBytes()); } } byte[] generateRandomByteArrayOfLength(long length) { var random = new Random(); var intStream = random.ints(0, 5000).limit(length).boxed(); return intStream.collect( ByteArrayOutputStream::new, (baos, i) -> baos.write(i.intValue()), (baos1, baos2) -> baos1.write(baos2.toByteArray(), 0, baos2.size()) ).toByteArray(); } } ``` The results: ``` Benchmark (bufferSize) (inputLength) Mode Cnt Score Error Units GZIPInputStreamBenchmark.getGzipInputStream 512 1024 avgt 5 3207.217 ± 24.919 ns/op GZIPInputStreamBenchmark.getGzipInputStream 512 3072 avgt 5 5874.191 ± 5.827 ns/op GZIPInputStreamBenchmark.getGzipInputStream 512 9216 avgt 5 15567.345 ± 93.281 ns/op GZIPInputStreamBenchmark.getGzipInputStream 1024 1024 avgt 5 2580.566 ± 14.566 ns/op GZIPInputStreamBenchmark.getGzipInputStream 1024 3072 avgt 5 4154.582 ± 16.016 ns/op GZIPInputStreamBenchmark.getGzipInputStream 1024 9216 avgt 5 9942.521 ± 61.215 ns/op GZIPInputStreamBenchmark.getGzipInputStream 4096 1024 avgt 5 2150.255 ± 52.770 ns/op GZIPInputStreamBenchmark.getGzipInputStream 4096 3072 avgt 5 2289.185 ± 71.396 ns/op GZIPInputStreamBenchmark.getGzipInputStream 4096 9216 avgt 5 5656.891 ± 28.499 ns/op GZIPInputStreamBenchmark.getGzipInputStream 8192 1024 avgt 5 2177.427 ± 30.896 ns/op GZIPInputStreamBenchmark.getGzipInputStream 8192 3072 avgt 5 2517.390 ± 21.296 ns/op GZIPInputStreamBenchmark.getGzipInputStream 8192 9216 avgt 5 5227.932 ± 55.525 ns/op ``` Co-authored-by: Kushal Pisavadia Closes #20316. PiperOrigin-RevId: 588444920 Change-Id: I1fb47f0b08dcb8d72f3e2c43534c33d60efb87f2 --- .../devtools/build/singlejar/ZipCombiner.java | 18 +++++++++++++----- .../build/zip/ZipEntryInputStream.java | 4 ++-- .../lib/analysis/actions/FileWriteAction.java | 7 ++++--- .../rules/genquery/GenQueryOutputStream.java | 6 ++++-- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/java_tools/singlejar/java/com/google/devtools/build/singlejar/ZipCombiner.java b/src/java_tools/singlejar/java/com/google/devtools/build/singlejar/ZipCombiner.java index 44d4f0262b69e5..33cf205c7e4000 100644 --- a/src/java_tools/singlejar/java/com/google/devtools/build/singlejar/ZipCombiner.java +++ b/src/java_tools/singlejar/java/com/google/devtools/build/singlejar/ZipCombiner.java @@ -66,6 +66,7 @@ * ZIP format */ public class ZipCombiner implements AutoCloseable { + private static final int INFLATER_BUFFER_BYTES = 8192; public static final Date DOS_EPOCH = new Date(ZipUtil.DOS_EPOCH); /** * Whether to compress or decompress entries. @@ -440,7 +441,7 @@ public void addZip(File zipFile) throws IOException { entries.put(filename, null); InputStream in = zip.getRawInputStream(entry); if (entry.getMethod() == Compression.DEFLATED) { - in = new InflaterInputStream(in, getInflater()); + in = new InflaterInputStream(in, getInflater(), INFLATER_BUFFER_BYTES); } action.getStrategy().merge(in, action.getMergeBuffer()); break; @@ -492,7 +493,9 @@ private void writeEntryFromBuffer(ZipFileEntry entry, byte[] uncompressed) throw writeEntry(entry, new ByteArrayInputStream(uncompressed)); } else { ByteArrayOutputStream compressed = new ByteArrayOutputStream(); - copyStream(new DeflaterInputStream(new ByteArrayInputStream(uncompressed), getDeflater()), + copyStream( + new DeflaterInputStream( + new ByteArrayInputStream(uncompressed), getDeflater(), INFLATER_BUFFER_BYTES), compressed); entry.setMethod(Compression.DEFLATED); entry.setCompressedSize(compressed.size()); @@ -529,14 +532,19 @@ private void writeEntry(ZipReader zip, ZipFileEntry entry, EntryAction action) // from the raw file data and deflate to a temporary byte array to determine the deflated // size. Then use this byte array as the input stream for writing the entry. ByteArrayOutputStream tmp = new ByteArrayOutputStream(); - copyStream(new DeflaterInputStream(zip.getRawInputStream(entry), getDeflater()), tmp); + copyStream( + new DeflaterInputStream( + zip.getRawInputStream(entry), getDeflater(), INFLATER_BUFFER_BYTES), + tmp); data = new ByteArrayInputStream(tmp.toByteArray()); outEntry.setMethod(Compression.DEFLATED); outEntry.setCompressedSize(tmp.size()); } else if (mode == OutputMode.FORCE_STORED && entry.getMethod() != Compression.STORED) { // The output mode is stored, but the entry compression is not; create an inflater stream - // from the raw file data. - data = new InflaterInputStream(zip.getRawInputStream(entry), getInflater()); + // from the raw file data. + data = + new InflaterInputStream( + zip.getRawInputStream(entry), getInflater(), INFLATER_BUFFER_BYTES); outEntry.setMethod(Compression.STORED); outEntry.setCompressedSize(entry.getSize()); } else { diff --git a/src/java_tools/singlejar/java/com/google/devtools/build/zip/ZipEntryInputStream.java b/src/java_tools/singlejar/java/com/google/devtools/build/zip/ZipEntryInputStream.java index f162997a37c186..e40b049fabe928 100644 --- a/src/java_tools/singlejar/java/com/google/devtools/build/zip/ZipEntryInputStream.java +++ b/src/java_tools/singlejar/java/com/google/devtools/build/zip/ZipEntryInputStream.java @@ -15,7 +15,6 @@ package com.google.devtools.build.zip; import com.google.devtools.build.zip.ZipFileEntry.Compression; - import java.io.IOException; import java.io.InputStream; import java.util.zip.Inflater; @@ -24,6 +23,7 @@ /** An input stream for reading the file data of a ZIP file entry. */ class ZipEntryInputStream extends InputStream { + private static final int INFLATER_BUFFER_BYTES = 8192; private InputStream stream; private long rem; @@ -61,7 +61,7 @@ class ZipEntryInputStream extends InputStream { rem = zipEntry.getSize(); } if (!raw && zipEntry.getMethod() == Compression.DEFLATED) { - stream = new InflaterInputStream(stream, new Inflater(true)); + stream = new InflaterInputStream(stream, new Inflater(true), INFLATER_BUFFER_BYTES); } } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java index 83d4e026fbc1f2..08c6f51380765f 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java @@ -231,6 +231,7 @@ protected void computeKey( private static final class CompressedFileWriteAction extends FileWriteAction { private static final String GUID = "5bfba914-2251-11ee-be56-0242ac120002"; + private static final int GZIP_BYTES_BUFFER = 8192; private final byte[] compressedBytes; private final int uncompressedSize; @@ -252,7 +253,7 @@ private static final class CompressedFileWriteAction extends FileWriteAction { // Presize on the small end to avoid over-allocating memory. ByteArrayOutputStream byteStream = new ByteArrayOutputStream(dataToCompress.length / 100); - try (GZIPOutputStream zipStream = new GZIPOutputStream(byteStream)) { + try (GZIPOutputStream zipStream = new GZIPOutputStream(byteStream, GZIP_BYTES_BUFFER)) { zipStream.write(dataToCompress); } catch (IOException e) { // This should be impossible since we're writing to a byte array. @@ -268,7 +269,7 @@ private static final class CompressedFileWriteAction extends FileWriteAction { public String getFileContents() { byte[] uncompressedBytes = new byte[uncompressedSize]; try (GZIPInputStream zipStream = - new GZIPInputStream(new ByteArrayInputStream(compressedBytes))) { + new GZIPInputStream(new ByteArrayInputStream(compressedBytes), GZIP_BYTES_BUFFER)) { int read; int totalRead = 0; while (totalRead < uncompressedSize @@ -293,7 +294,7 @@ public String getFileContents() { public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { return out -> { try (GZIPInputStream gzipIn = - new GZIPInputStream(new ByteArrayInputStream(compressedBytes))) { + new GZIPInputStream(new ByteArrayInputStream(compressedBytes), GZIP_BYTES_BUFFER)) { ByteStreams.copy(gzipIn, out); } }; diff --git a/src/main/java/com/google/devtools/build/lib/rules/genquery/GenQueryOutputStream.java b/src/main/java/com/google/devtools/build/lib/rules/genquery/GenQueryOutputStream.java index 7a131a5381fcee..496dd51ec6e292 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/genquery/GenQueryOutputStream.java +++ b/src/main/java/com/google/devtools/build/lib/rules/genquery/GenQueryOutputStream.java @@ -43,6 +43,8 @@ class GenQueryOutputStream extends OutputStream { */ private static final int COMPRESSION_THRESHOLD = 1 << 20; + private static final int GZIP_BYTES_BUFFER = 8192; + /** * Encapsulates the output of a {@link GenQuery}'s query. CPU and memory overhead of individual * methods depends on the underlying content and settings. @@ -83,7 +85,7 @@ interface GenQueryResult { GenQueryOutputStream(boolean compressedOutputRequested) throws IOException { this.compressedOutputRequested = compressedOutputRequested; if (compressedOutputRequested) { - this.out = new GZIPOutputStream(bytesOut); + this.out = new GZIPOutputStream(bytesOut, GZIP_BYTES_BUFFER); this.outputWasCompressed = true; } else { this.out = bytesOut; @@ -138,7 +140,7 @@ private void maybeStartCompression(int additionalBytes) throws IOException { } ByteString.Output compressedBytesOut = ByteString.newOutput(); - GZIPOutputStream gzipOut = new GZIPOutputStream(compressedBytesOut); + GZIPOutputStream gzipOut = new GZIPOutputStream(compressedBytesOut, GZIP_BYTES_BUFFER); bytesOut.writeTo(gzipOut); bytesOut = compressedBytesOut; out = gzipOut;