Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add temp_bucket parameter for resource google_dataproc_cluster (fixes #7927) #8131

Merged
merged 3 commits into from
Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions google/resource_dataproc_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ var (

clusterConfigKeys = []string{
"cluster_config.0.staging_bucket",
"cluster_config.0.temp_bucket",
"cluster_config.0.gce_cluster_config",
"cluster_config.0.master_config",
"cluster_config.0.worker_config",
Expand Down Expand Up @@ -158,6 +159,15 @@ func resourceDataprocCluster() *schema.Resource {
Description: ` The name of the cloud storage bucket ultimately used to house the staging data for the cluster. If staging_bucket is specified, it will contain this value, otherwise it will be the auto generated name.`,
},

"temp_bucket": {
Type: schema.TypeString,
Optional: true,
bartcode marked this conversation as resolved.
Show resolved Hide resolved
Computed: true,
AtLeastOneOf: clusterConfigKeys,
ForceNew: true,
Description: `The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.`,
},

"gce_cluster_config": {
Type: schema.TypeList,
Optional: true,
Expand Down Expand Up @@ -779,6 +789,10 @@ func expandClusterConfig(d *schema.ResourceData, config *Config) (*dataproc.Clus
conf.ConfigBucket = v.(string)
}

if v, ok := d.GetOk("cluster_config.0.temp_bucket"); ok {
conf.TempBucket = v.(string)
}

c, err := expandGceClusterConfig(d, config)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1208,6 +1222,7 @@ func flattenClusterConfig(d *schema.ResourceData, cfg *dataproc.ClusterConfig) (
"staging_bucket": d.Get("cluster_config.0.staging_bucket").(string),

"bucket": cfg.ConfigBucket,
"temp_bucket": cfg.TempBucket,
"gce_cluster_config": flattenGceClusterConfig(d, cfg.GceClusterConfig),
"security_config": flattenSecurityConfig(d, cfg.SecurityConfig),
"software_config": flattenSoftwareConfig(d, cfg.SoftwareConfig),
Expand Down
85 changes: 85 additions & 0 deletions google/resource_dataproc_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,37 @@ func TestAccDataprocCluster_withStagingBucket(t *testing.T) {
})
}

func TestAccDataprocCluster_withTempBucket(t *testing.T) {
t.Parallel()

rnd := randString(t, 10)
var cluster dataproc.Cluster
clusterName := fmt.Sprintf("tf-test-dproc-%s", rnd)
bucketName := fmt.Sprintf("%s-temp-bucket", clusterName)

vcrTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckDataprocClusterDestroy(t),
Steps: []resource.TestStep{
{
Config: testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName),
Check: resource.ComposeTestCheckFunc(
testAccCheckDataprocClusterExists(t, "google_dataproc_cluster.with_bucket", &cluster),
resource.TestCheckResourceAttr("google_dataproc_cluster.with_bucket", "cluster_config.0.temp_bucket", bucketName)),
},
{
// Simulate destroy of cluster by removing it from definition,
// but leaving the temp bucket (should not be auto deleted)
Config: testAccDataprocCluster_withTempBucketOnly(bucketName),
Check: resource.ComposeTestCheckFunc(
testAccCheckDataprocTempBucketExists(t, bucketName),
),
},
},
})
}

func TestAccDataprocCluster_withInitAction(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -760,6 +791,22 @@ func testAccCheckDataprocStagingBucketExists(t *testing.T, bucketName string) re
}
}

func testAccCheckDataprocTempBucketExists(t *testing.T, bucketName string) resource.TestCheckFunc {
return func(s *terraform.State) error {

config := googleProviderConfig(t)

exists, err := validateBucketExists(bucketName, config)
if err != nil {
return err
}
if !exists {
return fmt.Errorf("Temp Bucket %s does not exist", bucketName)
}
return nil
}
}

func testAccCheckDataprocClusterHasOptionalComponents(cluster *dataproc.Cluster, components ...string) func(s *terraform.State) error {
return func(s *terraform.State) error {

Expand Down Expand Up @@ -1178,6 +1225,15 @@ resource "google_storage_bucket" "bucket" {
`, bucketName)
}

func testAccDataprocCluster_withTempBucketOnly(bucketName string) string {
return fmt.Sprintf(`
resource "google_storage_bucket" "bucket" {
name = "%s"
force_destroy = "true"
}
`, bucketName)
}

func testAccDataprocCluster_withStagingBucketAndCluster(clusterName, bucketName string) string {
return fmt.Sprintf(`
%s
Expand Down Expand Up @@ -1207,6 +1263,35 @@ resource "google_dataproc_cluster" "with_bucket" {
`, testAccDataprocCluster_withStagingBucketOnly(bucketName), clusterName)
}

func testAccDataprocCluster_withTempBucketAndCluster(clusterName, bucketName string) string {
return fmt.Sprintf(`
%s

resource "google_dataproc_cluster" "with_bucket" {
name = "%s"
region = "us-central1"

cluster_config {
temp_bucket = google_storage_bucket.bucket.name

# Keep the costs down with smallest config we can get away with
software_config {
override_properties = {
"dataproc:dataproc.allow.zero.workers" = "true"
}
}

master_config {
machine_type = "e2-medium"
disk_config {
boot_disk_size_gb = 15
}
}
}
}
`, testAccDataprocCluster_withTempBucketOnly(bucketName), clusterName)
}

func testAccDataprocCluster_withLabels(rnd string) string {
return fmt.Sprintf(`
resource "google_dataproc_cluster" "with_labels" {
Expand Down
4 changes: 4 additions & 0 deletions website/docs/r/dataproc_cluster.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ The `cluster_config` block supports:
with other clusters in the same region/zone also choosing to use the auto generation
option.

* `temp_bucket` - (Optional) The Cloud Storage temp bucket used to store ephemeral cluster
and jobs data, such as Spark and MapReduce history files.
Note: If you don't explicitly specify a `temp_bucket` then GCP will auto create / assign one for you.

* `gce_cluster_config` (Optional) Common config settings for resources of Google Compute Engine cluster
instances, applicable to all instances in the cluster. Structure defined below.

Expand Down