From 058ad4e248b4c67b9d8bc0c54f2b2674d3c89e24 Mon Sep 17 00:00:00 2001 From: nikhil Date: Fri, 19 Feb 2021 20:18:11 +0530 Subject: [PATCH 01/10] LogEncryptionKmsKeyId for EMR Clusters --- aws/resource_aws_emr_cluster.go | 10 ++++++++ aws/resource_aws_emr_cluster_test.go | 31 +++++++++++++++++++++--- scripts/gofmtcheck.sh | 2 +- website/docs/r/emr_cluster.html.markdown | 1 + 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/aws/resource_aws_emr_cluster.go b/aws/resource_aws_emr_cluster.go index 48cc18a49b8..618b99a99f3 100644 --- a/aws/resource_aws_emr_cluster.go +++ b/aws/resource_aws_emr_cluster.go @@ -65,6 +65,11 @@ func resourceAwsEMRCluster() *schema.Resource { Type: schema.TypeString, Computed: true, }, + "log_encryption_kms_key_id": { + Type: schema.TypeString, + ForceNew: true, + Optional: true, + }, "log_uri": { Type: schema.TypeString, ForceNew: true, @@ -856,6 +861,10 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error params.AdditionalInfo = aws.String(info) } + if v, ok := d.GetOk("log_encryption_kms_key_id"); ok { + params.LogEncryptionKmsKeyId = aws.String(v.(string)) + } + if v, ok := d.GetOk("log_uri"); ok { params.LogUri = aws.String(v.(string)) } @@ -1093,6 +1102,7 @@ func resourceAwsEMRClusterRead(d *schema.ResourceData, meta interface{}) error { d.Set("security_configuration", cluster.SecurityConfiguration) d.Set("autoscaling_role", cluster.AutoScalingRole) d.Set("release_label", cluster.ReleaseLabel) + d.Set("log_encryption_kms_key_id", cluster.LogEncryptionKmsKeyId) d.Set("log_uri", cluster.LogUri) d.Set("master_public_dns", cluster.MasterPublicDnsName) d.Set("visible_to_all_users", cluster.VisibleToAllUsers) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index a10cca67782..4b01d3be68e 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -3005,15 +3005,39 @@ resource "aws_emr_cluster" "tf-test-cluster" { func testAccAWSEmrClusterConfigS3Logging(r string) string { return testAccAWSEmrComposeConfig(false, testAccAWSEmrClusterConfigCurrentPartition(), + testAccAWSEmrClusterConfigIAMServiceRoleCustomAmiID(r), + testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), fmt.Sprintf(` resource "aws_s3_bucket" "test" { bucket = "%[1]s" force_destroy = true } +resource "aws_kms_key" "foo" { + description = "Terraform acc test %[1]s" + + policy = < Date: Fri, 19 Feb 2021 20:18:31 +0530 Subject: [PATCH 02/10] LogEncryptionKmsKeyId for EMR Clusters --- scripts/gofmtcheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gofmtcheck.sh b/scripts/gofmtcheck.sh index 41c1ac8e75e..dd2307acc57 100755 --- a/scripts/gofmtcheck.sh +++ b/scripts/gofmtcheck.sh @@ -7,7 +7,7 @@ if [[ -n ${gofmt_files} ]]; then echo 'gofmt needs running on the following files:' echo "${gofmt_files}" echo "You can use the command: \`make fmt\` to reformat code." - exit 0 + exit 1 fi exit 0 From fb76a8847d0beb735fa13cbc32710ac61ee26882 Mon Sep 17 00:00:00 2001 From: nikhil Date: Fri, 19 Feb 2021 20:19:05 +0530 Subject: [PATCH 03/10] LogEncryptionKmsKeyId for EMR Clusters --- scripts/gofmtcheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gofmtcheck.sh b/scripts/gofmtcheck.sh index dd2307acc57..f43ceefebe2 100755 --- a/scripts/gofmtcheck.sh +++ b/scripts/gofmtcheck.sh @@ -10,4 +10,4 @@ if [[ -n ${gofmt_files} ]]; then exit 1 fi -exit 0 +exit 0 \ No newline at end of file From f01295accf6673740559ffc462b78b70e2bcdf40 Mon Sep 17 00:00:00 2001 From: nikhil Date: Fri, 19 Feb 2021 20:19:41 +0530 Subject: [PATCH 04/10] LogEncryptionKmsKeyId for EMR Clusters --- scripts/gofmtcheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gofmtcheck.sh b/scripts/gofmtcheck.sh index f43ceefebe2..dd2307acc57 100755 --- a/scripts/gofmtcheck.sh +++ b/scripts/gofmtcheck.sh @@ -10,4 +10,4 @@ if [[ -n ${gofmt_files} ]]; then exit 1 fi -exit 0 \ No newline at end of file +exit 0 From 31daddd5a1c806ed81d7e61de5c931bfb1a4062e Mon Sep 17 00:00:00 2001 From: nikhil Date: Fri, 19 Feb 2021 20:46:56 +0530 Subject: [PATCH 05/10] LogEncryptionKmsKeyId for EMR Clusters --- aws/resource_aws_emr_cluster_test.go | 3 ++- website/docs/r/emr_cluster.html.markdown | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index 4b01d3be68e..cfdef36fd67 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -1185,6 +1185,7 @@ func TestAccAWSEMRCluster_s3Logging(t *testing.T) { Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), resource.TestCheckResourceAttr(resourceName, "log_uri", bucketName), + resource.TestMatchResourceAttr(resourceName, "log_encryption_kms_key_id", regexp.MustCompile("^arn")), ), }, { @@ -3053,7 +3054,7 @@ resource "aws_emr_cluster" "tf-test-cluster" { } log_encryption_kms_key_id = aws_kms_key.foo.key_id - log_uri = "s3://${aws_s3_bucket.test.bucket}/" + log_uri = "s3://${aws_s3_bucket.test.bucket}/" ec2_attributes { instance_profile = aws_iam_instance_profile.emr_instance_profile.arn diff --git a/website/docs/r/emr_cluster.html.markdown b/website/docs/r/emr_cluster.html.markdown index bbbc3c33082..cf7568e331a 100644 --- a/website/docs/r/emr_cluster.html.markdown +++ b/website/docs/r/emr_cluster.html.markdown @@ -329,7 +329,7 @@ The following arguments are supported: * `security_configuration` - (Optional) The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with `release_label` 4.8.0 or greater * `core_instance_group` - (Optional) Configuration block to use an [Instance Group](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-group-configuration.html#emr-plan-instance-groups) for the [core node type](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html#emr-plan-core). * `core_instance_fleet` - (Optional) Configuration block to use an [Instance Fleet](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-fleet.html) for the core node type. Cannot be specified if any `core_instance_group` configuration blocks are set. Detailed below. -* `log_encryption_kms_key_id` - (Optional) The AWS KMS master key ID used for encrypting log files. This attribute is only available with EMR version 5.30.0 and later, excluding EMR 6.0.0. +* `log_encryption_kms_key_id` - (Optional) The AWS KMS master key ID or arn used for encrypting log files. This attribute is only available with EMR version 5.30.0 and later, excluding EMR 6.0.0. * `log_uri` - (Optional) S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created * `applications` - (Optional) A list of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive * `termination_protection` - (Optional) Switch on/off termination protection (default is `false`, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to `false`. From 85a29b5fc44d5e5b6837ca31d2fe8482691c2823 Mon Sep 17 00:00:00 2001 From: nikhil Date: Fri, 19 Feb 2021 21:40:54 +0530 Subject: [PATCH 06/10] LogEncryptionKmsKeyId for EMR Clusters --- aws/resource_aws_emr_cluster_test.go | 4 ++-- website/docs/r/emr_cluster.html.markdown | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index cfdef36fd67..ba3f51fc220 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -1185,7 +1185,7 @@ func TestAccAWSEMRCluster_s3Logging(t *testing.T) { Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), resource.TestCheckResourceAttr(resourceName, "log_uri", bucketName), - resource.TestMatchResourceAttr(resourceName, "log_encryption_kms_key_id", regexp.MustCompile("^arn")), + testAccMatchResourceAttrRegionalARN(resourceName, "log_encryption_kms_key_id", "kms", regexp.MustCompile(`key/.+`)), ), }, { @@ -3053,7 +3053,7 @@ resource "aws_emr_cluster" "tf-test-cluster" { instance_type = "c4.large" } - log_encryption_kms_key_id = aws_kms_key.foo.key_id + log_encryption_kms_key_id = aws_kms_key.foo.arn log_uri = "s3://${aws_s3_bucket.test.bucket}/" ec2_attributes { diff --git a/website/docs/r/emr_cluster.html.markdown b/website/docs/r/emr_cluster.html.markdown index cf7568e331a..28eb199f038 100644 --- a/website/docs/r/emr_cluster.html.markdown +++ b/website/docs/r/emr_cluster.html.markdown @@ -329,7 +329,7 @@ The following arguments are supported: * `security_configuration` - (Optional) The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with `release_label` 4.8.0 or greater * `core_instance_group` - (Optional) Configuration block to use an [Instance Group](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-group-configuration.html#emr-plan-instance-groups) for the [core node type](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html#emr-plan-core). * `core_instance_fleet` - (Optional) Configuration block to use an [Instance Fleet](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-fleet.html) for the core node type. Cannot be specified if any `core_instance_group` configuration blocks are set. Detailed below. -* `log_encryption_kms_key_id` - (Optional) The AWS KMS master key ID or arn used for encrypting log files. This attribute is only available with EMR version 5.30.0 and later, excluding EMR 6.0.0. +* `log_encryption_kms_key_id` - (Optional) The AWS KMS customer master key (CMK) key ID or arn used for encrypting log files. This attribute is only available with EMR version 5.30.0 and later, excluding EMR 6.0.0. * `log_uri` - (Optional) S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created * `applications` - (Optional) A list of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive * `termination_protection` - (Optional) Switch on/off termination protection (default is `false`, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to `false`. From 547aa55bfc33ba55531ea8a41768597804d8609e Mon Sep 17 00:00:00 2001 From: nikhil Date: Sat, 20 Feb 2021 09:22:50 +0530 Subject: [PATCH 07/10] LogEncryptionKmsKeyId for EMR Clusters --- aws/resource_aws_emr_cluster_test.go | 77 ++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index ba3f51fc220..8bc92b59dbe 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -1182,6 +1182,38 @@ func TestAccAWSEMRCluster_s3Logging(t *testing.T) { Steps: []resource.TestStep{ { Config: testAccAWSEmrClusterConfigS3Logging(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSEmrClusterExists(resourceName, &cluster), + resource.TestCheckResourceAttr(resourceName, "log_uri", bucketName), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{ + "cluster_state", // Ignore RUNNING versus WAITING changes + "configurations", + "keep_job_flow_alive_when_no_steps", + }, + }, + }, + }) +} + +func TestAccAWSEMRCluster_s3LogEncryption(t *testing.T) { + var cluster emr.Cluster + + resourceName := "aws_emr_cluster.tf-test-cluster" + rName := acctest.RandomWithPrefix("tf-acc-test") + bucketName := fmt.Sprintf("s3n://%s/", rName) + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckAWSEmrDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAWSEmrClusterConfigS3Encryption(rName), Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), resource.TestCheckResourceAttr(resourceName, "log_uri", bucketName), @@ -3014,6 +3046,51 @@ resource "aws_s3_bucket" "test" { force_destroy = true } +resource "aws_emr_cluster" "tf-test-cluster" { + name = "%[1]s" + release_label = "emr-4.6.0" + applications = ["Spark"] + + termination_protection = false + keep_job_flow_alive_when_no_steps = true + + master_instance_group { + instance_type = "c4.large" + } + + core_instance_group { + instance_count = 1 + instance_type = "c4.large" + } + + log_uri = "s3://${aws_s3_bucket.test.bucket}/" + + ec2_attributes { + instance_profile = aws_iam_instance_profile.emr_instance_profile.arn + emr_managed_master_security_group = aws_security_group.test.id + emr_managed_slave_security_group = aws_security_group.test.id + subnet_id = aws_subnet.test.id + } + + service_role = aws_iam_role.emr_service.arn +} + +data "aws_caller_identity" "current" {} +`, r), + ) +} + +func testAccAWSEmrClusterConfigS3Encryption(r string) string { + return testAccAWSEmrComposeConfig(false, + testAccAWSEmrClusterConfigCurrentPartition(), + testAccAWSEmrClusterConfigIAMServiceRoleCustomAmiID(r), + testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), + fmt.Sprintf(` +resource "aws_s3_bucket" "test" { + bucket = "%[1]s" + force_destroy = true +} + resource "aws_kms_key" "foo" { description = "Terraform acc test %[1]s" From 1b45935e2253089cc4da49b0fa461f244cfab123 Mon Sep 17 00:00:00 2001 From: nikhil Date: Sat, 1 May 2021 10:04:17 +0530 Subject: [PATCH 08/10] LogEncryptionKmsKeyId for EMR Clusters --- aws/resource_aws_emr_cluster_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index 8bc92b59dbe..eb8b754c14c 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -1209,6 +1209,7 @@ func TestAccAWSEMRCluster_s3LogEncryption(t *testing.T) { bucketName := fmt.Sprintf("s3n://%s/", rName) resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, + ErrorCheck: testAccErrorCheck(t, emr.EndpointsID), Providers: testAccProviders, CheckDestroy: testAccCheckAWSEmrDestroy, Steps: []resource.TestStep{ From b8e00554a16aebe4188f2e36eb648da9e36fa72d Mon Sep 17 00:00:00 2001 From: Dirk Avery Date: Tue, 5 Oct 2021 17:10:11 -0400 Subject: [PATCH 09/10] tests/emr_cluster: Clean-up, standardize tests --- aws/resource_aws_emr_cluster_test.go | 2336 +++++++++++++------------- 1 file changed, 1183 insertions(+), 1153 deletions(-) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index eb8b754c14c..afebd0bec5a 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -80,7 +80,7 @@ func testSweepEmrClusters(region string) error { func TestAccAWSEMRCluster_basic(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -123,7 +123,7 @@ func TestAccAWSEMRCluster_additionalInfo(t *testing.T) { } }` - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -158,7 +158,7 @@ func TestAccAWSEMRCluster_additionalInfo(t *testing.T) { func TestAccAWSEMRCluster_disappears(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -181,7 +181,7 @@ func TestAccAWSEMRCluster_disappears(t *testing.T) { func TestAccAWSEMRCluster_configurationsJson(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -510,7 +510,7 @@ func TestAccAWSEMRCluster_Ec2Attributes_DefaultManagedSecurityGroups(t *testing. var vpc ec2.Vpc rName := acctest.RandomWithPrefix("tf-acc-test") - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" vpcResourceName := "aws_vpc.test" resource.ParallelTest(t, resource.TestCase{ @@ -551,7 +551,7 @@ func TestAccAWSEMRCluster_Ec2Attributes_DefaultManagedSecurityGroups(t *testing. func TestAccAWSEMRCluster_Kerberos_ClusterDedicatedKdc(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") password := fmt.Sprintf("NeverKeepPasswordsInPlainText%s!", rName) resource.ParallelTest(t, resource.TestCase{ @@ -755,7 +755,7 @@ func TestAccAWSEMRCluster_MasterInstanceGroup_Name(t *testing.T) { func TestAccAWSEMRCluster_security_config(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -767,7 +767,7 @@ func TestAccAWSEMRCluster_security_config(t *testing.T) { Config: testAccAWSEmrClusterConfig_SecurityConfiguration(rName), Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), - resource.TestCheckResourceAttrPair(resourceName, "security_configuration", "aws_emr_security_configuration.foo", "name"), + resource.TestCheckResourceAttrPair(resourceName, "security_configuration", "aws_emr_security_configuration.test", "name"), ), }, { @@ -787,7 +787,7 @@ func TestAccAWSEMRCluster_security_config(t *testing.T) { func TestAccAWSEMRCluster_Step_Basic(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -825,7 +825,7 @@ func TestAccAWSEMRCluster_Step_Basic(t *testing.T) { func TestAccAWSEMRCluster_Step_ConfigMode(t *testing.T) { var cluster1, cluster2, cluster3 emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -891,7 +891,7 @@ func TestAccAWSEMRCluster_Step_ConfigMode(t *testing.T) { func TestAccAWSEMRCluster_Step_Multiple(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1033,7 +1033,7 @@ func TestAccAWSEMRCluster_bootstrap_ordering(t *testing.T) { func TestAccAWSEMRCluster_terminationProtected(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1090,7 +1090,7 @@ func TestAccAWSEMRCluster_terminationProtected(t *testing.T) { func TestAccAWSEMRCluster_keepJob(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1099,7 +1099,7 @@ func TestAccAWSEMRCluster_keepJob(t *testing.T) { CheckDestroy: testAccCheckAWSEmrDestroy, Steps: []resource.TestStep{ { - Config: testAccAWSEmrClusterConfig_keepJob(rName, "false"), + Config: testAccAWSEmrClusterConfig_keepJob(rName, false), Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), resource.TestCheckResourceAttr(resourceName, "keep_job_flow_alive_when_no_steps", "false"), @@ -1122,7 +1122,7 @@ func TestAccAWSEMRCluster_keepJob(t *testing.T) { func TestAccAWSEMRCluster_visibleToAllUsers(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1171,7 +1171,7 @@ func TestAccAWSEMRCluster_visibleToAllUsers(t *testing.T) { func TestAccAWSEMRCluster_s3Logging(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") bucketName := fmt.Sprintf("s3n://%s/", rName) resource.ParallelTest(t, resource.TestCase{ @@ -1204,7 +1204,7 @@ func TestAccAWSEMRCluster_s3Logging(t *testing.T) { func TestAccAWSEMRCluster_s3LogEncryption(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") bucketName := fmt.Sprintf("s3n://%s/", rName) resource.ParallelTest(t, resource.TestCase{ @@ -1238,7 +1238,7 @@ func TestAccAWSEMRCluster_s3LogEncryption(t *testing.T) { func TestAccAWSEMRCluster_tags(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1283,7 +1283,7 @@ func TestAccAWSEMRCluster_tags(t *testing.T) { func TestAccAWSEMRCluster_root_volume_size(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1322,7 +1322,7 @@ func TestAccAWSEMRCluster_root_volume_size(t *testing.T) { func TestAccAWSEMRCluster_step_concurrency_level(t *testing.T) { var cluster emr.Cluster rName := acctest.RandomWithPrefix("tf-acc-test") - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, ErrorCheck: testAccErrorCheck(t, emr.EndpointsID), @@ -1360,7 +1360,7 @@ func TestAccAWSEMRCluster_step_concurrency_level(t *testing.T) { func TestAccAWSEMRCluster_ebs_config(t *testing.T) { var cluster emr.Cluster rName := acctest.RandomWithPrefix("tf-acc-test") - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, ErrorCheck: testAccErrorCheck(t, emr.EndpointsID), @@ -1392,7 +1392,7 @@ func TestAccAWSEMRCluster_ebs_config(t *testing.T) { func TestAccAWSEMRCluster_custom_ami_id(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1424,7 +1424,7 @@ func TestAccAWSEMRCluster_custom_ami_id(t *testing.T) { func TestAccAWSEMRCluster_InstanceFleet_basic(t *testing.T) { var cluster1, cluster2 emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" subnetResourceName := "aws_subnet.test" subnet2ResourceName := "aws_subnet.test2" rName := acctest.RandomWithPrefix("tf-acc-test") @@ -1491,7 +1491,7 @@ func TestAccAWSEMRCluster_InstanceFleet_basic(t *testing.T) { func TestAccAWSEMRCluster_InstanceFleet_master_only(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1731,435 +1731,250 @@ func testAccEmrDeleteManagedSecurityGroup(conn *ec2.EC2, securityGroup *ec2.Secu return err } -func testAccAWSEmrComposeConfig(mapPublicIPOnLaunch bool, config ...string) string { - return composeConfig(append(config, testAccAWSEmrClusterConfigBaseVpc(mapPublicIPOnLaunch))...) -} - -func testAccAWSEmrClusterConfigCurrentPartition() string { - return ` -data "aws_partition" "current" {} -` -} - -func testAccAWSEmrClusterConfig_bootstrap(r string) string { - return testAccAWSEmrComposeConfig(false, - testAccAWSEmrClusterConfigCurrentPartition(), - testAccAWSEmrClusterConfigIAMServiceRoleBase(r), - testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), - testAccAWSEmrClusterConfigBootstrapActionBucket(r), - fmt.Sprintf(` -resource "aws_emr_cluster" "test" { - name = "%[1]s" - release_label = "emr-5.0.0" - applications = ["Hadoop", "Hive"] - log_uri = "s3n://terraform/testlog/" - - master_instance_group { - instance_type = "c4.large" - } - - core_instance_group { - instance_count = 1 - instance_type = "c4.large" - } - - service_role = aws_iam_role.emr_service.arn - depends_on = [ - aws_route_table_association.test, - aws_iam_role_policy_attachment.emr_service, - aws_iam_role_policy_attachment.emr_instance_profile, - ] - - ec2_attributes { - subnet_id = aws_subnet.test.id - emr_managed_master_security_group = aws_security_group.test.id - emr_managed_slave_security_group = aws_security_group.test.id - instance_profile = aws_iam_instance_profile.emr_instance_profile.arn - } +// Sub-configs (used by other configs) - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif" - args = ["instance.isMaster=true", "echo running on master node"] +func testAccAWSEmrClusterConfigBaseVpc(rName string, mapPublicIPOnLaunch bool) string { + return fmt.Sprintf(` +data "aws_availability_zones" "available" { + # Many instance types are not available in this availability zone + exclude_zone_ids = ["usw2-az4"] + state = "available" + filter { + name = "opt-in-status" + values = ["opt-in-not-required"] } +} - bootstrap_action { - path = "s3://${aws_s3_bucket_object.testobject.bucket}/${aws_s3_bucket_object.testobject.key}" - name = "test" +resource "aws_vpc" "test" { + cidr_block = "10.0.0.0/16" + enable_dns_hostnames = true - args = ["1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - ] + tags = { + Name = %[1]q } } -`, r), - ) -} -func testAccAWSEmrClusterConfig_bootstrapAdd(r string) string { - return testAccAWSEmrComposeConfig(false, - testAccAWSEmrClusterConfigCurrentPartition(), - testAccAWSEmrClusterConfigIAMServiceRoleBase(r), - testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), - testAccAWSEmrClusterConfigBootstrapActionBucket(r), - fmt.Sprintf(` -resource "aws_emr_cluster" "test" { - name = "%[1]s" - release_label = "emr-5.0.0" - applications = ["Hadoop", "Hive"] - log_uri = "s3n://terraform/testlog/" - - master_instance_group { - instance_type = "c4.large" - } +resource "aws_internet_gateway" "test" { + vpc_id = aws_vpc.test.id - core_instance_group { - instance_count = 1 - instance_type = "c4.large" + tags = { + Name = %[1]q } +} - service_role = aws_iam_role.emr_service.arn - depends_on = [ - aws_route_table_association.test, - aws_iam_role_policy_attachment.emr_service, - aws_iam_role_policy_attachment.emr_instance_profile, - ] +resource "aws_security_group" "test" { + vpc_id = aws_vpc.test.id - ec2_attributes { - subnet_id = aws_subnet.test.id - emr_managed_master_security_group = aws_security_group.test.id - emr_managed_slave_security_group = aws_security_group.test.id - instance_profile = aws_iam_instance_profile.emr_instance_profile.arn + ingress { + from_port = 0 + protocol = "-1" + self = true + to_port = 0 } - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif" - args = ["instance.isMaster=true", "echo running on master node"] + egress { + cidr_blocks = ["0.0.0.0/0"] + from_port = 0 + protocol = "-1" + to_port = 0 } - bootstrap_action { - path = "s3://${aws_s3_bucket_object.testobject.bucket}/${aws_s3_bucket_object.testobject.key}" - name = "test" - - args = ["1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - ] + tags = { + Name = %[1]q } - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif-2" - args = ["instance.isMaster=true", "echo also running on master node"] + # EMR will modify ingress rules + lifecycle { + ignore_changes = [ingress] } } -`, r), - ) -} - -func testAccAWSEmrClusterConfig_bootstrapReorder(r string) string { - return testAccAWSEmrComposeConfig(false, - testAccAWSEmrClusterConfigCurrentPartition(), - testAccAWSEmrClusterConfigIAMServiceRoleBase(r), - testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), - testAccAWSEmrClusterConfigBootstrapActionBucket(r), - fmt.Sprintf(` -resource "aws_emr_cluster" "test" { - name = "%[1]s" - release_label = "emr-5.0.0" - applications = ["Hadoop", "Hive"] - log_uri = "s3n://terraform/testlog/" - master_instance_group { - instance_type = "c4.large" - } +resource "aws_subnet" "test" { + availability_zone = data.aws_availability_zones.available.names[0] + cidr_block = cidrsubnet(aws_vpc.test.cidr_block, 8, 0) + map_public_ip_on_launch = %[2]t + vpc_id = aws_vpc.test.id - core_instance_group { - instance_count = 1 - instance_type = "c4.large" + tags = { + Name = %[1]q } +} - service_role = aws_iam_role.emr_service.arn - depends_on = [ - aws_route_table_association.test, - aws_iam_role_policy_attachment.emr_service, - aws_iam_role_policy_attachment.emr_instance_profile, - ] +resource "aws_route_table" "test" { + vpc_id = aws_vpc.test.id - ec2_attributes { - subnet_id = aws_subnet.test.id - emr_managed_master_security_group = aws_security_group.test.id - emr_managed_slave_security_group = aws_security_group.test.id - instance_profile = aws_iam_instance_profile.emr_instance_profile.arn + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.test.id } +} - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif" - args = ["instance.isMaster=true", "echo running on master node"] - } +resource "aws_route_table_association" "test" { + route_table_id = aws_route_table.test.id + subnet_id = aws_subnet.test.id +} +`, rName, mapPublicIPOnLaunch) +} - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif-2" - args = ["instance.isMaster=true", "echo also running on master node"] - } +func testAccAWSEmrClusterConfigIAMInstanceProfileBase(rName string) string { + return fmt.Sprintf(` +resource "aws_iam_instance_profile" "emr_instance_profile" { + name = "%[1]s_profile" + role = aws_iam_role.emr_instance_profile.name +} - bootstrap_action { - path = "s3://${aws_s3_bucket_object.testobject.bucket}/${aws_s3_bucket_object.testobject.key}" - name = "test" +resource "aws_iam_role" "emr_instance_profile" { + name = "%[1]s_profile_role" - args = ["1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - ] - } + assume_role_policy = < Date: Tue, 5 Oct 2021 18:05:38 -0400 Subject: [PATCH 10/10] docs/emr_cluster: Clean up docs --- .changelog/17706.txt | 3 + website/docs/r/emr_cluster.html.markdown | 476 +++++++++++------------ 2 files changed, 228 insertions(+), 251 deletions(-) create mode 100644 .changelog/17706.txt diff --git a/.changelog/17706.txt b/.changelog/17706.txt new file mode 100644 index 00000000000..b2a8ef7639e --- /dev/null +++ b/.changelog/17706.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_emr_cluster: Add `log_encryption_kms_key_id` argument +``` \ No newline at end of file diff --git a/website/docs/r/emr_cluster.html.markdown b/website/docs/r/emr_cluster.html.markdown index 28eb199f038..9b28f8a540f 100644 --- a/website/docs/r/emr_cluster.html.markdown +++ b/website/docs/r/emr_cluster.html.markdown @@ -8,9 +8,7 @@ description: |- # Resource: aws_emr_cluster -Provides an Elastic MapReduce Cluster, a web service that makes it easy to -process large amounts of data efficiently. See [Amazon Elastic MapReduce Documentation](https://aws.amazon.com/documentation/elastic-mapreduce/) -for more information. +Provides an Elastic MapReduce Cluster, a web service that makes it easy to process large amounts of data efficiently. See [Amazon Elastic MapReduce Documentation](https://aws.amazon.com/documentation/elastic-mapreduce/) for more information. To configure [Instance Groups](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-group-configuration.html#emr-plan-instance-groups) for [task nodes](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html#emr-plan-task), see the [`aws_emr_instance_group` resource](/docs/providers/aws/r/emr_instance_group.html). @@ -138,15 +136,9 @@ EOF } ``` -The `aws_emr_cluster` resource typically requires two IAM roles, one for the EMR Cluster -to use as a service, and another to place on your Cluster Instances to interact -with AWS from those instances. The suggested role policy template for the EMR service is `AmazonElasticMapReduceRole`, -and `AmazonElasticMapReduceforEC2Role` for the EC2 profile. See the [Getting -Started](https://docs.aws.amazon.com/ElasticMapReduce/latest/ManagementGuide/emr-gs-launch-sample-cluster.html) -guide for more information on these IAM roles. There is also a fully-bootable -example Terraform configuration at the bottom of this page. +The `aws_emr_cluster` resource typically requires two IAM roles, one for the EMR Cluster to use as a service, and another to place on your Cluster Instances to interact with AWS from those instances. The suggested role policy template for the EMR service is `AmazonElasticMapReduceRole`, and `AmazonElasticMapReduceforEC2Role` for the EC2 profile. See the [Getting Started](https://docs.aws.amazon.com/ElasticMapReduce/latest/ManagementGuide/emr-gs-launch-sample-cluster.html) guide for more information on these IAM roles. There is also a fully-bootable example Terraform configuration at the bottom of this page. -## Instance Fleet +### Instance Fleet ```terraform resource "aws_emr_cluster" "example" { @@ -240,10 +232,7 @@ resource "aws_emr_instance_fleet" "task" { ### Enable Debug Logging -[Debug logging in EMR](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-debugging.html) -is implemented as a step. It is highly recommended to utilize the -[lifecycle configuration block](https://www.terraform.io/docs/configuration/meta-arguments/lifecycle.html) with `ignore_changes` if other -steps are being managed outside of Terraform. +[Debug logging in EMR](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-debugging.html) is implemented as a step. It is highly recommended that you utilize the [lifecycle configuration block](https://www.terraform.io/docs/configuration/meta-arguments/lifecycle.html) with `ignore_changes` if other steps are being managed outside of Terraform. ```terraform resource "aws_emr_cluster" "example" { @@ -315,243 +304,9 @@ resource "aws_emr_cluster" "example" { } ``` -## Argument Reference - -The following arguments are supported: - -* `name` - (Required) The name of the job flow -* `release_label` - (Required) The release label for the Amazon EMR release -* `master_instance_group` - (Optional) Configuration block to use an [Instance Group](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-group-configuration.html#emr-plan-instance-groups) for the [master node type](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html#emr-plan-master). -* `master_instance_fleet` - (Optional) Configuration block to use an [Instance Fleet](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-fleet.html) for the master node type. Cannot be specified if any `master_instance_group` configuration blocks are set. Detailed below. -* `scale_down_behavior` - (Optional) The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an `instance group` is resized. -* `additional_info` - (Optional) A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore Terraform cannot detect drift from the actual EMR cluster if its value is changed outside Terraform. -* `service_role` - (Required) IAM role that will be assumed by the Amazon EMR service to access AWS resources -* `security_configuration` - (Optional) The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with `release_label` 4.8.0 or greater -* `core_instance_group` - (Optional) Configuration block to use an [Instance Group](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-group-configuration.html#emr-plan-instance-groups) for the [core node type](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html#emr-plan-core). -* `core_instance_fleet` - (Optional) Configuration block to use an [Instance Fleet](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-instance-fleet.html) for the core node type. Cannot be specified if any `core_instance_group` configuration blocks are set. Detailed below. -* `log_encryption_kms_key_id` - (Optional) The AWS KMS customer master key (CMK) key ID or arn used for encrypting log files. This attribute is only available with EMR version 5.30.0 and later, excluding EMR 6.0.0. -* `log_uri` - (Optional) S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created -* `applications` - (Optional) A list of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive -* `termination_protection` - (Optional) Switch on/off termination protection (default is `false`, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to `false`. -* `keep_job_flow_alive_when_no_steps` - (Optional) Switch on/off run cluster with no steps or when all steps are complete (default is on) -* `ec2_attributes` - (Optional) Attributes for the EC2 instances running the job flow. Defined below -* `kerberos_attributes` - (Optional) Kerberos configuration for the cluster. Defined below -* `ebs_root_volume_size` - (Optional) Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later. -* `custom_ami_id` - (Optional) A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later. -* `bootstrap_action` - (Optional) Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below. -* `configurations` - (Optional) List of configurations supplied for the EMR cluster you are creating. Supply a configuration object for applications to override their default configuration. See [AWS Documentation](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html) for more information. -* `configurations_json` - (Optional) A JSON string for supplying list of configurations for the EMR cluster. - -~> **NOTE on configurations_json:** If the `Configurations` value is empty then you should skip -the `Configurations` field instead of providing empty list as value `"Configurations": []`. - -```terraform -resource "aws_emr_cluster" "cluster" { - # ... other configuration ... - - configurations_json = < **NOTE on EMR-Managed security groups:** These security groups will have any -missing inbound or outbound access rules added and maintained by AWS, to ensure -proper communication between instances in a cluster. The EMR service will -maintain these rules for groups provided in `emr_managed_master_security_group` -and `emr_managed_slave_security_group`; attempts to remove the required rules -may succeed, only for the EMR service to re-add them in a matter of minutes. -This may cause Terraform to fail to destroy an environment that contains an EMR -cluster, because the EMR service does not revoke rules added on deletion, -leaving a cyclic dependency between the security groups that prevents their -deletion. To avoid this, use the `revoke_rules_on_delete` optional attribute for -any Security Group used in `emr_managed_master_security_group` and -`emr_managed_slave_security_group`. See [Amazon EMR-Managed Security -Groups](http://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html) -for more information about the EMR-managed security group rules. - -## kerberos_attributes - -Attributes for Kerberos configuration - -* `ad_domain_join_password` - (Optional) The Active Directory password for `ad_domain_join_user`. Terraform cannot perform drift detection of this configuration. -* `ad_domain_join_user` - (Optional) Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. Terraform cannot perform drift detection of this configuration. -* `cross_realm_trust_principal_password` - (Optional) Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. Terraform cannot perform drift detection of this configuration. -* `kdc_admin_password` - (Required) The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. Terraform cannot perform drift detection of this configuration. -* `realm` - (Required) The name of the Kerberos realm to which all nodes in a cluster belong. For example, `EC2.INTERNAL` - -## master_instance_group Configuration Block - -Supported nested arguments for the `master_instance_group` configuration block: - -* `instance_type` - (Required) EC2 instance type for all instances in the instance group. -* `bid_price` - (Optional) Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances. -* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. -* `instance_count` - (Optional) Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource's `core_instance_group` to be configured. Public (Internet accessible) instances must be created in VPC subnets that have [map public IP on launch](/docs/providers/aws/r/subnet.html#map_public_ip_on_launch) enabled. Termination protection is automatically enabled when launched with multiple master nodes and Terraform must have the `termination_protection = false` configuration applied before destroying this resource. -* `name` - (Optional) Friendly name given to the instance group. - -## ebs_config - -Attributes for the EBS volumes attached to each EC2 instance in the `master_instance_group` and `core_instance_group` configuration blocks: +### Bootable Cluster -* `size` - (Required) The volume size, in gibibytes (GiB). -* `type` - (Required) The volume type. Valid options are `gp2`, `io1`, `standard` and `st1`. See [EBS Volume Types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html). -* `iops` - (Optional) The number of I/O operations per second (IOPS) that the volume supports -* `volumes_per_instance` - (Optional) The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1) - -## bootstrap_action - -* `name` - (Required) Name of the bootstrap action -* `path` - (Required) Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system -* `args` - (Optional) List of command line arguments to pass to the bootstrap action script - -## step - -This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). - -Attributes for step configuration - -* `action_on_failure` - (Required) The action to take if the step fails. Valid values: `TERMINATE_JOB_FLOW`, `TERMINATE_CLUSTER`, `CANCEL_AND_WAIT`, and `CONTINUE` -* `hadoop_jar_step` - (Required) The JAR file used for the step. Defined below. -* `name` - (Required) The name of the step. - -### hadoop_jar_step - -This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). - -Attributes for Hadoop job step configuration - -* `args` - (Optional) List of command line arguments passed to the JAR file's main function when executed. -* `jar` - (Required) Path to a JAR file run during the step. -* `main_class` - (Optional) Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file. -* `properties` - (Optional) Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function. - -## master_instance_fleet Configuration Block - -Supported nested arguments for the `master_instance_fleet` configuration block: - -* `instance_type_configs` - (Optional) Configuration block for instance fleet -* `launch_specifications` - (Optional) Configuration block for launch specification -* `target_on_demand_capacity` - (Optional) The target capacity of On-Demand units for the instance fleet, which determines how many On-Demand instances to provision. -* `target_spot_capacity` - (Optional) The target capacity of Spot units for the instance fleet, which determines how many Spot instances to provision. -* `name` - (Optional) Friendly name given to the instance fleet. - -## core_instance_fleet Configuration Block - -Supported nested arguments for the `core_instance_fleet` configuration block: - -* `instance_type_configs` - (Optional) Configuration block for instance fleet -* `launch_specifications` - (Optional) Configuration block for launch specification -* `target_on_demand_capacity` - (Optional) The target capacity of On-Demand units for the instance fleet, which determines how many On-Demand instances to provision. -* `target_spot_capacity` - (Optional) The target capacity of Spot units for the instance fleet, which determines how many Spot instances to provision. -* `name` - (Optional) Friendly name given to the instance fleet. - -## instance_type_configs Configuration Block - -* `bid_price` - (Optional) The bid price for each EC2 Spot instance type as defined by `instance_type`. Expressed in USD. If neither `bid_price` nor `bid_price_as_percentage_of_on_demand_price` is provided, `bid_price_as_percentage_of_on_demand_price` defaults to 100%. -* `bid_price_as_percentage_of_on_demand_price` - (Optional) The bid price, as a percentage of On-Demand price, for each EC2 Spot instance as defined by `instance_type`. Expressed as a number (for example, 20 specifies 20%). If neither `bid_price` nor `bid_price_as_percentage_of_on_demand_price` is provided, `bid_price_as_percentage_of_on_demand_price` defaults to 100%. -* `configurations` - (Optional) A configuration classification that applies when provisioning cluster instances, which can include configurations for applications and software that run on the cluster. List of `configuration` blocks. -* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. -* `instance_type` - (Required) An EC2 instance type, such as m4.xlarge. -* `weighted_capacity` - (Optional) The number of units that a provisioned instance of this type provides toward fulfilling the target capacities defined in `aws_emr_instance_fleet`. - -## configurations Configuration Block - -A configuration classification that applies when provisioning cluster instances, which can include configurations for applications and software that run on the cluster. See [Configuring Applications](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html). - -* `classification` - (Optional) The classification within a configuration. -* `properties` - (Optional) A map of properties specified within a configuration classification - -## launch_specifications Configuration Block - -* `on_demand_specification` - (Optional) Configuration block for on demand instances launch specifications -* `spot_specification` - (Optional) Configuration block for spot instances launch specifications - -## on_demand_specification Configuration Block - -The launch specification for On-Demand instances in the instance fleet, which determines the allocation strategy. -The instance fleet configuration is available only in Amazon EMR versions 4.8.0 and later, excluding 5.0.x versions. On-Demand instances allocation strategy is available in Amazon EMR version 5.12.1 and later. - -* `allocation_strategy` - (Required) Specifies the strategy to use in launching On-Demand instance fleets. Currently, the only option is `lowest-price` (the default), which launches the lowest price first. - -## spot_specification Configuration Block - -The launch specification for Spot instances in the fleet, which determines the defined duration, provisioning timeout behavior, and allocation strategy. - -* `allocation_strategy` - (Required) Specifies the strategy to use in launching Spot instance fleets. Currently, the only option is `capacity-optimized` (the default), which launches instances from Spot instance pools with optimal capacity for the number of instances that are launching. -* `block_duration_minutes` - (Optional) The defined duration for Spot instances (also known as Spot blocks) in minutes. When specified, the Spot instance does not terminate before the defined duration expires, and defined duration pricing for Spot instances applies. Valid values are 60, 120, 180, 240, 300, or 360. The duration period starts as soon as a Spot instance receives its instance ID. At the end of the duration, Amazon EC2 marks the Spot instance for termination and provides a Spot instance termination notice, which gives the instance a two-minute warning before it terminates. -* `timeout_action` - (Required) The action to take when TargetSpotCapacity has not been fulfilled when the TimeoutDurationMinutes has expired; that is, when all Spot instances could not be provisioned within the Spot provisioning timeout. Valid values are `TERMINATE_CLUSTER` and `SWITCH_TO_ON_DEMAND`. SWITCH_TO_ON_DEMAND specifies that if no Spot instances are available, On-Demand Instances should be provisioned to fulfill any remaining Spot capacity. -* `timeout_duration_minutes` - (Required) The spot provisioning timeout period in minutes. If Spot instances are not provisioned within this time period, the TimeOutAction is taken. Minimum value is 5 and maximum value is 1440. The timeout applies only during initial provisioning, when the cluster is first created. - -## Attributes Reference - -In addition to all arguments above, the following attributes are exported: - -* `arn`- The ARN of the cluster. -* `id` - The ID of the EMR Cluster -* `name` - The name of the cluster. -* `release_label` - The release label for the Amazon EMR release. -* `master_instance_group.0.id` - Master node type Instance Group ID, if using Instance Group for this node type. -* `master_public_dns` - The public DNS name of the master EC2 instance. -* `core_instance_group.0.id` - Core node type Instance Group ID, if using Instance Group for this node type. -* `log_uri` - The path to the Amazon S3 location where logs for this cluster are stored. -* `applications` - The applications installed on this cluster. -* `ec2_attributes` - Provides information about the EC2 instances in a cluster grouped by category: key name, subnet ID, IAM instance profile, and so on. -* `bootstrap_action` - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. -* `configurations` - The list of Configurations supplied to the EMR cluster. -* `service_role` - The IAM role that will be assumed by the Amazon EMR service to access AWS resources on your behalf. -* `visible_to_all_users` - Indicates whether the job flow is visible to all IAM users of the AWS account associated with the job flow. -* `tags_all` - A map of tags assigned to the resource, including those inherited from the provider [`default_tags` configuration block](https://www.terraform.io/docs/providers/aws/index.html#default_tags-configuration-block). - -## Example bootable config - -**NOTE:** This configuration demonstrates a minimal configuration needed to -boot an example EMR Cluster. It is not meant to display best practices. Please -use at your own risk. +**NOTE:** This configuration demonstrates a minimal configuration needed to boot an example EMR Cluster. It is not meant to display best practices. As with all examples, use at your own risk. ```terraform resource "aws_emr_cluster" "cluster" { @@ -852,6 +607,225 @@ EOF } ``` +## Argument Reference + +The following arguments are required: + +* `name` - (Required) Name of the job flow. +* `release_label` - (Required) Release label for the Amazon EMR release. +* `service_role` - (Required) IAM role that will be assumed by the Amazon EMR service to access AWS resources. + +The following arguments are optional: + +* `additional_info` - (Optional) JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore Terraform cannot detect drift from the actual EMR cluster if its value is changed outside Terraform. +* `applications` - (Optional) List of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive. +* `autoscaling_role` - (Optional) IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group. +* `bootstrap_action` - (Optional) Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. See below. +* `configurations` - (Optional) List of configurations supplied for the EMR cluster you are creating. Supply a configuration object for applications to override their default configuration. See [AWS Documentation](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html) for more information. +* `configurations_json` - (Optional) JSON string for supplying list of configurations for the EMR cluster. + +~> **NOTE on `configurations_json`:** If the `Configurations` value is empty then you should skip the `Configurations` field instead of providing an empty list as a value, `"Configurations": []`. + +```terraform +resource "aws_emr_cluster" "cluster" { + # ... other configuration ... + + configurations_json = < **NOTE on EMR-Managed security groups:** These security groups will have any missing inbound or outbound access rules added and maintained by AWS, to ensure proper communication between instances in a cluster. The EMR service will maintain these rules for groups provided in `emr_managed_master_security_group` and `emr_managed_slave_security_group`; attempts to remove the required rules may succeed, only for the EMR service to re-add them in a matter of minutes. This may cause Terraform to fail to destroy an environment that contains an EMR cluster, because the EMR service does not revoke rules added on deletion, leaving a cyclic dependency between the security groups that prevents their deletion. To avoid this, use the `revoke_rules_on_delete` optional attribute for any Security Group used in `emr_managed_master_security_group` and `emr_managed_slave_security_group`. See [Amazon EMR-Managed Security Groups](http://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html) for more information about the EMR-managed security group rules. + +### kerberos_attributes + +* `ad_domain_join_password` - (Optional) Active Directory password for `ad_domain_join_user`. Terraform cannot perform drift detection of this configuration. +* `ad_domain_join_user` - (Optional) Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. Terraform cannot perform drift detection of this configuration. +* `cross_realm_trust_principal_password` - (Optional) Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. Terraform cannot perform drift detection of this configuration. +* `kdc_admin_password` - (Required) Password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. Terraform cannot perform drift detection of this configuration. +* `realm` - (Required) Name of the Kerberos realm to which all nodes in a cluster belong. For example, `EC2.INTERNAL` + +### master_instance_fleet + +* `instance_type_configs` - (Optional) Configuration block for instance fleet. +* `launch_specifications` - (Optional) Configuration block for launch specification. +* `name` - (Optional) Friendly name given to the instance fleet. +* `target_on_demand_capacity` - (Optional) Target capacity of On-Demand units for the instance fleet, which determines how many On-Demand instances to provision. +* `target_spot_capacity` - (Optional) Target capacity of Spot units for the instance fleet, which determines how many Spot instances to provision. + +#### instance_type_configs + +See `instance_type_configs` above, under `core_instance_fleet`. + +#### launch_specifications + +See `launch_specifications` above, under `core_instance_fleet`. + +### master_instance_group + +Supported nested arguments for the `master_instance_group` configuration block: + +* `bid_price` - (Optional) Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances. +* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. +* `instance_count` - (Optional) Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource's `core_instance_group` to be configured. Public (Internet accessible) instances must be created in VPC subnets that have [map public IP on launch](/docs/providers/aws/r/subnet.html#map_public_ip_on_launch) enabled. Termination protection is automatically enabled when launched with multiple master nodes and Terraform must have the `termination_protection = false` configuration applied before destroying this resource. +* `instance_type` - (Required) EC2 instance type for all instances in the instance group. +* `name` - (Optional) Friendly name given to the instance group. + +#### ebs_config + +See `ebs_config` under `core_instance_group` above. + +### step + +This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). + +* `action_on_failure` - (Required) Action to take if the step fails. Valid values: `TERMINATE_JOB_FLOW`, `TERMINATE_CLUSTER`, `CANCEL_AND_WAIT`, and `CONTINUE` +* `hadoop_jar_step` - (Required) JAR file used for the step. See below. +* `name` - (Required) Name of the step. + +#### hadoop_jar_step + +This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). + +* `args` - (Optional) List of command line arguments passed to the JAR file's main function when executed. +* `jar` - (Required) Path to a JAR file run during the step. +* `main_class` - (Optional) Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file. +* `properties` - (Optional) Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function. + +## Attributes Reference + +In addition to all arguments above, the following attributes are exported: + +* `applications` - Applications installed on this cluster. +* `arn`- ARN of the cluster. +* `bootstrap_action` - List of bootstrap actions that will be run before Hadoop is started on the cluster nodes. +* `configurations` - List of Configurations supplied to the EMR cluster. +* `core_instance_group.0.id` - Core node type Instance Group ID, if using Instance Group for this node type. +* `ec2_attributes` - Provides information about the EC2 instances in a cluster grouped by category: key name, subnet ID, IAM instance profile, and so on. +* `id` - ID of the cluster. +* `log_uri` - Path to the Amazon S3 location where logs for this cluster are stored. +* `master_instance_group.0.id` - Master node type Instance Group ID, if using Instance Group for this node type. +* `master_public_dns` - Public DNS name of the master EC2 instance. +* `name` - Name of the cluster. +* `release_label` - Release label for the Amazon EMR release. +* `service_role` - IAM role that will be assumed by the Amazon EMR service to access AWS resources on your behalf. +* `tags_all` - Map of tags assigned to the resource, including those inherited from the provider [`default_tags` configuration block](https://www.terraform.io/docs/providers/aws/index.html#default_tags-configuration-block). +* `visible_to_all_users` - Indicates whether the job flow is visible to all IAM users of the AWS account associated with the job flow. + ## Import EMR clusters can be imported using the `id`, e.g.