Skip to content

Commit

Permalink
Add temp_bucket parameter for resource google_dataproc_cluster (fixes
Browse files Browse the repository at this point in the history
#7927) (#8131)

* Add temp_bucket parameter for resource google_dataproc_cluster (#7927)

* Fix temp_bucket field, which should be a computed field.

* Add documentation on `temp_bucket`.
  • Loading branch information
bartcode authored Jan 21, 2021
1 parent 139ac9e commit 3181075
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 0 deletions.
15 changes: 15 additions & 0 deletions google/resource_dataproc_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ var (

clusterConfigKeys = []string{
"cluster_config.0.staging_bucket",
"cluster_config.0.temp_bucket",
"cluster_config.0.gce_cluster_config",
"cluster_config.0.master_config",
"cluster_config.0.worker_config",
Expand Down Expand Up @@ -158,6 +159,15 @@ func resourceDataprocCluster() *schema.Resource {
Description: ` The name of the cloud storage bucket ultimately used to house the staging data for the cluster. If staging_bucket is specified, it will contain this value, otherwise it will be the auto generated name.`,
},

"temp_bucket": {
Type: schema.TypeString,
Optional: true,
Computed: true,
AtLeastOneOf: clusterConfigKeys,
ForceNew: true,
Description: `The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.`,
},

"gce_cluster_config": {
Type: schema.TypeList,
Optional: true,
Expand Down Expand Up @@ -779,6 +789,10 @@ func expandClusterConfig(d *schema.ResourceData, config *Config) (*dataproc.Clus
conf.ConfigBucket = v.(string)
}

if v, ok := d.GetOk("cluster_config.0.temp_bucket"); ok {
conf.TempBucket = v.(string)
}

c, err := expandGceClusterConfig(d, config)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1208,6 +1222,7 @@ func flattenClusterConfig(d *schema.ResourceData, cfg *dataproc.ClusterConfig) (
"staging_bucket": d.Get("cluster_config.0.staging_bucket").(string),

"bucket": cfg.ConfigBucket,
"temp_bucket": cfg.TempBucket,
"gce_cluster_config": flattenGceClusterConfig(d, cfg.GceClusterConfig),
"security_config": flattenSecurityConfig(d, cfg.SecurityConfig),
"software_config": flattenSoftwareConfig(d, cfg.SoftwareConfig),
Expand Down
85 changes: 85 additions & 0 deletions google/resource_dataproc_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,37 @@ func TestAccDataprocCluster_withStagingBucket(t *testing.T) {
})
}

func TestAccDataprocCluster_withTempBucket(t *testing.T) {
t.Parallel()

rnd := randString(t, 10)
var cluster dataproc.Cluster
clusterName := fmt.Sprintf("tf-test-dproc-%s", rnd)
bucketName := fmt.Sprintf("%s-temp-bucket", clusterName)

vcrTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckDataprocClusterDestroy(t),
Steps: []resource.TestStep{
{
Config: testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName),
Check: resource.ComposeTestCheckFunc(
testAccCheckDataprocClusterExists(t, "google_dataproc_cluster.with_bucket", &cluster),
resource.TestCheckResourceAttr("google_dataproc_cluster.with_bucket", "cluster_config.0.temp_bucket", bucketName)),
},
{
// Simulate destroy of cluster by removing it from definition,
// but leaving the temp bucket (should not be auto deleted)
Config: testAccDataprocCluster_withTempBucketOnly(bucketName),
Check: resource.ComposeTestCheckFunc(
testAccCheckDataprocTempBucketExists(t, bucketName),
),
},
},
})
}

func TestAccDataprocCluster_withInitAction(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -760,6 +791,22 @@ func testAccCheckDataprocStagingBucketExists(t *testing.T, bucketName string) re
}
}

func testAccCheckDataprocTempBucketExists(t *testing.T, bucketName string) resource.TestCheckFunc {
return func(s *terraform.State) error {

config := googleProviderConfig(t)

exists, err := validateBucketExists(bucketName, config)
if err != nil {
return err
}
if !exists {
return fmt.Errorf("Temp Bucket %s does not exist", bucketName)
}
return nil
}
}

func testAccCheckDataprocClusterHasOptionalComponents(cluster *dataproc.Cluster, components ...string) func(s *terraform.State) error {
return func(s *terraform.State) error {

Expand Down Expand Up @@ -1178,6 +1225,15 @@ resource "google_storage_bucket" "bucket" {
`, bucketName)
}

func testAccDataprocCluster_withTempBucketOnly(bucketName string) string {
return fmt.Sprintf(`
resource "google_storage_bucket" "bucket" {
name = "%s"
force_destroy = "true"
}
`, bucketName)
}

func testAccDataprocCluster_withStagingBucketAndCluster(clusterName, bucketName string) string {
return fmt.Sprintf(`
%s
Expand Down Expand Up @@ -1207,6 +1263,35 @@ resource "google_dataproc_cluster" "with_bucket" {
`, testAccDataprocCluster_withStagingBucketOnly(bucketName), clusterName)
}

func testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName string) string {
return fmt.Sprintf(`
%s
resource "google_dataproc_cluster" "with_bucket" {
name = "%s"
region = "us-central1"
cluster_config {
temp_bucket = google_storage_bucket.bucket.name
# Keep the costs down with smallest config we can get away with
software_config {
override_properties = {
"dataproc:dataproc.allow.zero.workers" = "true"
}
}
master_config {
machine_type = "e2-medium"
disk_config {
boot_disk_size_gb = 15
}
}
}
}
`, testAccDataprocCluster_withTempBucketOnly(bucketName), clusterName)
}

func testAccDataprocCluster_withLabels(rnd string) string {
return fmt.Sprintf(`
resource "google_dataproc_cluster" "with_labels" {
Expand Down
4 changes: 4 additions & 0 deletions website/docs/r/dataproc_cluster.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ The `cluster_config` block supports:
with other clusters in the same region/zone also choosing to use the auto generation
option.

* `temp_bucket` - (Optional) The Cloud Storage temp bucket used to store ephemeral cluster
and jobs data, such as Spark and MapReduce history files.
Note: If you don't explicitly specify a `temp_bucket` then GCP will auto create / assign one for you.

* `gce_cluster_config` (Optional) Common config settings for resources of Google Compute Engine cluster
instances, applicable to all instances in the cluster. Structure defined below.

Expand Down

0 comments on commit 3181075

Please sign in to comment.