From e59d453006372a59fd8d2461c641fd75c007a7f5 Mon Sep 17 00:00:00 2001 From: Justin Uang Date: Thu, 18 Jul 2024 13:01:16 -0400 Subject: [PATCH] perf: Set gcsUploadBufferSizeBytes to 8MB for Export Jobs to reduce RAM usage (#4366) --- .../cloud/bigtable/beam/sequencefiles/ExportJob.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/ExportJob.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/ExportJob.java index 0c7ef060b5..740466b05d 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/ExportJob.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/ExportJob.java @@ -23,6 +23,7 @@ import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.extensions.gcp.options.GcsOptions; import org.apache.beam.sdk.io.DefaultFilenamePolicy; import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.io.Read; @@ -95,7 +96,10 @@ */ @InternalExtensionOnly public class ExportJob { - public interface ExportOptions extends GcpOptions { + + static final int GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT = 8388608; + + public interface ExportOptions extends GcpOptions, GcsOptions { @Description("This Bigtable App Profile id.") ValueProvider getBigtableAppProfileId(); @@ -182,6 +186,9 @@ public static void main(String[] args) { ExportOptions opts = PipelineOptionsFactory.fromArgs(args).withValidation().as(ExportOptions.class); + if (opts.getGcsUploadBufferSizeBytes() == null) { + opts.setGcsUploadBufferSizeBytes(GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT); + } Pipeline pipeline = buildPipeline(opts);