diff --git a/google/resource_dataproc_cluster.go b/google/resource_dataproc_cluster.go index d450372ddea..a46853a293c 100644 --- a/google/resource_dataproc_cluster.go +++ b/google/resource_dataproc_cluster.go @@ -43,6 +43,7 @@ var ( clusterConfigKeys = []string{ "cluster_config.0.staging_bucket", + "cluster_config.0.temp_bucket", "cluster_config.0.gce_cluster_config", "cluster_config.0.master_config", "cluster_config.0.worker_config", @@ -158,6 +159,15 @@ func resourceDataprocCluster() *schema.Resource { Description: ` The name of the cloud storage bucket ultimately used to house the staging data for the cluster. If staging_bucket is specified, it will contain this value, otherwise it will be the auto generated name.`, }, + "temp_bucket": { + Type: schema.TypeString, + Optional: true, + Computed: true, + AtLeastOneOf: clusterConfigKeys, + ForceNew: true, + Description: `The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.`, + }, + "gce_cluster_config": { Type: schema.TypeList, Optional: true, @@ -779,6 +789,10 @@ func expandClusterConfig(d *schema.ResourceData, config *Config) (*dataproc.Clus conf.ConfigBucket = v.(string) } + if v, ok := d.GetOk("cluster_config.0.temp_bucket"); ok { + conf.TempBucket = v.(string) + } + c, err := expandGceClusterConfig(d, config) if err != nil { return nil, err @@ -1208,6 +1222,7 @@ func flattenClusterConfig(d *schema.ResourceData, cfg *dataproc.ClusterConfig) ( "staging_bucket": d.Get("cluster_config.0.staging_bucket").(string), "bucket": cfg.ConfigBucket, + "temp_bucket": cfg.TempBucket, "gce_cluster_config": flattenGceClusterConfig(d, cfg.GceClusterConfig), "security_config": flattenSecurityConfig(d, cfg.SecurityConfig), "software_config": flattenSoftwareConfig(d, cfg.SoftwareConfig), diff --git a/google/resource_dataproc_cluster_test.go b/google/resource_dataproc_cluster_test.go index e46b51a98ac..2e848fcab9f 100644 --- a/google/resource_dataproc_cluster_test.go +++ b/google/resource_dataproc_cluster_test.go @@ -419,6 +419,37 @@ func TestAccDataprocCluster_withStagingBucket(t *testing.T) { }) } +func TestAccDataprocCluster_withTempBucket(t *testing.T) { + t.Parallel() + + rnd := randString(t, 10) + var cluster dataproc.Cluster + clusterName := fmt.Sprintf("tf-test-dproc-%s", rnd) + bucketName := fmt.Sprintf("%s-temp-bucket", clusterName) + + vcrTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckDataprocClusterDestroy(t), + Steps: []resource.TestStep{ + { + Config: testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName), + Check: resource.ComposeTestCheckFunc( + testAccCheckDataprocClusterExists(t, "google_dataproc_cluster.with_bucket", &cluster), + resource.TestCheckResourceAttr("google_dataproc_cluster.with_bucket", "cluster_config.0.temp_bucket", bucketName)), + }, + { + // Simulate destroy of cluster by removing it from definition, + // but leaving the temp bucket (should not be auto deleted) + Config: testAccDataprocCluster_withTempBucketOnly(bucketName), + Check: resource.ComposeTestCheckFunc( + testAccCheckDataprocTempBucketExists(t, bucketName), + ), + }, + }, + }) +} + func TestAccDataprocCluster_withInitAction(t *testing.T) { t.Parallel() @@ -760,6 +791,22 @@ func testAccCheckDataprocStagingBucketExists(t *testing.T, bucketName string) re } } +func testAccCheckDataprocTempBucketExists(t *testing.T, bucketName string) resource.TestCheckFunc { + return func(s *terraform.State) error { + + config := googleProviderConfig(t) + + exists, err := validateBucketExists(bucketName, config) + if err != nil { + return err + } + if !exists { + return fmt.Errorf("Temp Bucket %s does not exist", bucketName) + } + return nil + } +} + func testAccCheckDataprocClusterHasOptionalComponents(cluster *dataproc.Cluster, components ...string) func(s *terraform.State) error { return func(s *terraform.State) error { @@ -1178,6 +1225,15 @@ resource "google_storage_bucket" "bucket" { `, bucketName) } +func testAccDataprocCluster_withTempBucketOnly(bucketName string) string { + return fmt.Sprintf(` +resource "google_storage_bucket" "bucket" { + name = "%s" + force_destroy = "true" +} +`, bucketName) +} + func testAccDataprocCluster_withStagingBucketAndCluster(clusterName, bucketName string) string { return fmt.Sprintf(` %s @@ -1207,6 +1263,35 @@ resource "google_dataproc_cluster" "with_bucket" { `, testAccDataprocCluster_withStagingBucketOnly(bucketName), clusterName) } +func testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName string) string { + return fmt.Sprintf(` +%s + +resource "google_dataproc_cluster" "with_bucket" { + name = "%s" + region = "us-central1" + + cluster_config { + temp_bucket = google_storage_bucket.bucket.name + + # Keep the costs down with smallest config we can get away with + software_config { + override_properties = { + "dataproc:dataproc.allow.zero.workers" = "true" + } + } + + master_config { + machine_type = "e2-medium" + disk_config { + boot_disk_size_gb = 15 + } + } + } +} +`, testAccDataprocCluster_withTempBucketOnly(bucketName), clusterName) +} + func testAccDataprocCluster_withLabels(rnd string) string { return fmt.Sprintf(` resource "google_dataproc_cluster" "with_labels" { diff --git a/website/docs/r/dataproc_cluster.html.markdown b/website/docs/r/dataproc_cluster.html.markdown index 0e5f52ba03c..47551e41e33 100644 --- a/website/docs/r/dataproc_cluster.html.markdown +++ b/website/docs/r/dataproc_cluster.html.markdown @@ -171,6 +171,10 @@ The `cluster_config` block supports: with other clusters in the same region/zone also choosing to use the auto generation option. +* `temp_bucket` - (Optional) The Cloud Storage temp bucket used to store ephemeral cluster + and jobs data, such as Spark and MapReduce history files. + Note: If you don't explicitly specify a `temp_bucket` then GCP will auto create / assign one for you. + * `gce_cluster_config` (Optional) Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.