Add temp_bucket parameter for resource google_dataproc_cluster (fixes

#7927) (#8131) * Add temp_bucket parameter for resource google_dataproc_cluster (#7927) * Fix temp_bucket field, which should be a computed field. * Add documentation on `temp_bucket`.
hashicorp · Jan 21, 2021 · 3181075 · 3181075
1 parent 139ac9e
commit 3181075
Show file tree

Hide file tree

Showing 3 changed files with 104 additions and 0 deletions.
diff --git a/google/resource_dataproc_cluster.go b/google/resource_dataproc_cluster.go
@@ -43,6 +43,7 @@ var (
 
 	clusterConfigKeys = []string{
 		"cluster_config.0.staging_bucket",
+		"cluster_config.0.temp_bucket",
 		"cluster_config.0.gce_cluster_config",
 		"cluster_config.0.master_config",
 		"cluster_config.0.worker_config",
@@ -158,6 +159,15 @@ func resourceDataprocCluster() *schema.Resource {
 							Description: ` The name of the cloud storage bucket ultimately used to house the staging data for the cluster. If staging_bucket is specified, it will contain this value, otherwise it will be the auto generated name.`,
 						},
 
+						"temp_bucket": {
+							Type:         schema.TypeString,
+							Optional:     true,
+							Computed:     true,
+							AtLeastOneOf: clusterConfigKeys,
+							ForceNew:     true,
+							Description:  `The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.`,
+						},
+
 						"gce_cluster_config": {
 							Type:         schema.TypeList,
 							Optional:     true,
@@ -779,6 +789,10 @@ func expandClusterConfig(d *schema.ResourceData, config *Config) (*dataproc.Clus
 		conf.ConfigBucket = v.(string)
 	}
 
+	if v, ok := d.GetOk("cluster_config.0.temp_bucket"); ok {
+		conf.TempBucket = v.(string)
+	}
+
 	c, err := expandGceClusterConfig(d, config)
 	if err != nil {
 		return nil, err
@@ -1208,6 +1222,7 @@ func flattenClusterConfig(d *schema.ResourceData, cfg *dataproc.ClusterConfig) (
 		"staging_bucket": d.Get("cluster_config.0.staging_bucket").(string),
 
 		"bucket":                    cfg.ConfigBucket,
+		"temp_bucket":               cfg.TempBucket,
 		"gce_cluster_config":        flattenGceClusterConfig(d, cfg.GceClusterConfig),
 		"security_config":           flattenSecurityConfig(d, cfg.SecurityConfig),
 		"software_config":           flattenSoftwareConfig(d, cfg.SoftwareConfig),

diff --git a/google/resource_dataproc_cluster_test.go b/google/resource_dataproc_cluster_test.go
@@ -419,6 +419,37 @@ func TestAccDataprocCluster_withStagingBucket(t *testing.T) {
 	})
 }
 
+func TestAccDataprocCluster_withTempBucket(t *testing.T) {
+	t.Parallel()
+
+	rnd := randString(t, 10)
+	var cluster dataproc.Cluster
+	clusterName := fmt.Sprintf("tf-test-dproc-%s", rnd)
+	bucketName := fmt.Sprintf("%s-temp-bucket", clusterName)
+
+	vcrTest(t, resource.TestCase{
+		PreCheck:     func() { testAccPreCheck(t) },
+		Providers:    testAccProviders,
+		CheckDestroy: testAccCheckDataprocClusterDestroy(t),
+		Steps: []resource.TestStep{
+			{
+				Config: testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName),
+				Check: resource.ComposeTestCheckFunc(
+					testAccCheckDataprocClusterExists(t, "google_dataproc_cluster.with_bucket", &cluster),
+					resource.TestCheckResourceAttr("google_dataproc_cluster.with_bucket", "cluster_config.0.temp_bucket", bucketName)),
+			},
+			{
+				// Simulate destroy of cluster by removing it from definition,
+				// but leaving the temp bucket (should not be auto deleted)
+				Config: testAccDataprocCluster_withTempBucketOnly(bucketName),
+				Check: resource.ComposeTestCheckFunc(
+					testAccCheckDataprocTempBucketExists(t, bucketName),
+				),
+			},
+		},
+	})
+}
+
 func TestAccDataprocCluster_withInitAction(t *testing.T) {
 	t.Parallel()
 
@@ -760,6 +791,22 @@ func testAccCheckDataprocStagingBucketExists(t *testing.T, bucketName string) re
 	}
 }
 
+func testAccCheckDataprocTempBucketExists(t *testing.T, bucketName string) resource.TestCheckFunc {
+	return func(s *terraform.State) error {
+
+		config := googleProviderConfig(t)
+
+		exists, err := validateBucketExists(bucketName, config)
+		if err != nil {
+			return err
+		}
+		if !exists {
+			return fmt.Errorf("Temp Bucket %s does not exist", bucketName)
+		}
+		return nil
+	}
+}
+
 func testAccCheckDataprocClusterHasOptionalComponents(cluster *dataproc.Cluster, components ...string) func(s *terraform.State) error {
 	return func(s *terraform.State) error {
 
@@ -1178,6 +1225,15 @@ resource "google_storage_bucket" "bucket" {
 `, bucketName)
 }
 
+func testAccDataprocCluster_withTempBucketOnly(bucketName string) string {
+	return fmt.Sprintf(`
+resource "google_storage_bucket" "bucket" {
+  name          = "%s"
+  force_destroy = "true"
+}
+`, bucketName)
+}
+
 func testAccDataprocCluster_withStagingBucketAndCluster(clusterName, bucketName string) string {
 	return fmt.Sprintf(`
 %s
@@ -1207,6 +1263,35 @@ resource "google_dataproc_cluster" "with_bucket" {
 `, testAccDataprocCluster_withStagingBucketOnly(bucketName), clusterName)
 }
 
+func testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName string) string {
+	return fmt.Sprintf(`
+%s
+
+resource "google_dataproc_cluster" "with_bucket" {
+  name   = "%s"
+  region = "us-central1"
+
+  cluster_config {
+    temp_bucket = google_storage_bucket.bucket.name
+
+    # Keep the costs down with smallest config we can get away with
+    software_config {
+      override_properties = {
+        "dataproc:dataproc.allow.zero.workers" = "true"
+      }
+    }
+
+    master_config {
+      machine_type = "e2-medium"
+      disk_config {
+        boot_disk_size_gb = 15
+      }
+    }
+  }
+}
+`, testAccDataprocCluster_withTempBucketOnly(bucketName), clusterName)
+}
+
 func testAccDataprocCluster_withLabels(rnd string) string {
 	return fmt.Sprintf(`
 resource "google_dataproc_cluster" "with_labels" {

diff --git a/website/docs/r/dataproc_cluster.html.markdown b/website/docs/r/dataproc_cluster.html.markdown
@@ -171,6 +171,10 @@ The `cluster_config` block supports:
    with other clusters in the same region/zone also choosing to use the auto generation
    option.
 
+* `temp_bucket` - (Optional) The Cloud Storage temp bucket used to store ephemeral cluster
+   and jobs data, such as Spark and MapReduce history files.
+   Note: If you don't explicitly specify a `temp_bucket` then GCP will auto create / assign one for you.
+
 * `gce_cluster_config` (Optional) Common config settings for resources of Google Compute Engine cluster
    instances, applicable to all instances in the cluster. Structure defined below.