diff --git a/.changelog/17706.txt b/.changelog/17706.txt new file mode 100644 index 00000000000..b2a8ef7639e --- /dev/null +++ b/.changelog/17706.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_emr_cluster: Add `log_encryption_kms_key_id` argument +``` \ No newline at end of file diff --git a/aws/resource_aws_emr_cluster.go b/aws/resource_aws_emr_cluster.go index 48cc18a49b8..618b99a99f3 100644 --- a/aws/resource_aws_emr_cluster.go +++ b/aws/resource_aws_emr_cluster.go @@ -65,6 +65,11 @@ func resourceAwsEMRCluster() *schema.Resource { Type: schema.TypeString, Computed: true, }, + "log_encryption_kms_key_id": { + Type: schema.TypeString, + ForceNew: true, + Optional: true, + }, "log_uri": { Type: schema.TypeString, ForceNew: true, @@ -856,6 +861,10 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error params.AdditionalInfo = aws.String(info) } + if v, ok := d.GetOk("log_encryption_kms_key_id"); ok { + params.LogEncryptionKmsKeyId = aws.String(v.(string)) + } + if v, ok := d.GetOk("log_uri"); ok { params.LogUri = aws.String(v.(string)) } @@ -1093,6 +1102,7 @@ func resourceAwsEMRClusterRead(d *schema.ResourceData, meta interface{}) error { d.Set("security_configuration", cluster.SecurityConfiguration) d.Set("autoscaling_role", cluster.AutoScalingRole) d.Set("release_label", cluster.ReleaseLabel) + d.Set("log_encryption_kms_key_id", cluster.LogEncryptionKmsKeyId) d.Set("log_uri", cluster.LogUri) d.Set("master_public_dns", cluster.MasterPublicDnsName) d.Set("visible_to_all_users", cluster.VisibleToAllUsers) diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index a10cca67782..afebd0bec5a 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -80,7 +80,7 @@ func testSweepEmrClusters(region string) error { func TestAccAWSEMRCluster_basic(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -123,7 +123,7 @@ func TestAccAWSEMRCluster_additionalInfo(t *testing.T) { } }` - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -158,7 +158,7 @@ func TestAccAWSEMRCluster_additionalInfo(t *testing.T) { func TestAccAWSEMRCluster_disappears(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -181,7 +181,7 @@ func TestAccAWSEMRCluster_disappears(t *testing.T) { func TestAccAWSEMRCluster_configurationsJson(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -510,7 +510,7 @@ func TestAccAWSEMRCluster_Ec2Attributes_DefaultManagedSecurityGroups(t *testing. var vpc ec2.Vpc rName := acctest.RandomWithPrefix("tf-acc-test") - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" vpcResourceName := "aws_vpc.test" resource.ParallelTest(t, resource.TestCase{ @@ -551,7 +551,7 @@ func TestAccAWSEMRCluster_Ec2Attributes_DefaultManagedSecurityGroups(t *testing. func TestAccAWSEMRCluster_Kerberos_ClusterDedicatedKdc(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") password := fmt.Sprintf("NeverKeepPasswordsInPlainText%s!", rName) resource.ParallelTest(t, resource.TestCase{ @@ -755,7 +755,7 @@ func TestAccAWSEMRCluster_MasterInstanceGroup_Name(t *testing.T) { func TestAccAWSEMRCluster_security_config(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -767,7 +767,7 @@ func TestAccAWSEMRCluster_security_config(t *testing.T) { Config: testAccAWSEmrClusterConfig_SecurityConfiguration(rName), Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), - resource.TestCheckResourceAttrPair(resourceName, "security_configuration", "aws_emr_security_configuration.foo", "name"), + resource.TestCheckResourceAttrPair(resourceName, "security_configuration", "aws_emr_security_configuration.test", "name"), ), }, { @@ -787,7 +787,7 @@ func TestAccAWSEMRCluster_security_config(t *testing.T) { func TestAccAWSEMRCluster_Step_Basic(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -825,7 +825,7 @@ func TestAccAWSEMRCluster_Step_Basic(t *testing.T) { func TestAccAWSEMRCluster_Step_ConfigMode(t *testing.T) { var cluster1, cluster2, cluster3 emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -891,7 +891,7 @@ func TestAccAWSEMRCluster_Step_ConfigMode(t *testing.T) { func TestAccAWSEMRCluster_Step_Multiple(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1033,7 +1033,7 @@ func TestAccAWSEMRCluster_bootstrap_ordering(t *testing.T) { func TestAccAWSEMRCluster_terminationProtected(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1090,7 +1090,7 @@ func TestAccAWSEMRCluster_terminationProtected(t *testing.T) { func TestAccAWSEMRCluster_keepJob(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1099,7 +1099,7 @@ func TestAccAWSEMRCluster_keepJob(t *testing.T) { CheckDestroy: testAccCheckAWSEmrDestroy, Steps: []resource.TestStep{ { - Config: testAccAWSEmrClusterConfig_keepJob(rName, "false"), + Config: testAccAWSEmrClusterConfig_keepJob(rName, false), Check: resource.ComposeTestCheckFunc( testAccCheckAWSEmrClusterExists(resourceName, &cluster), resource.TestCheckResourceAttr(resourceName, "keep_job_flow_alive_when_no_steps", "false"), @@ -1122,7 +1122,7 @@ func TestAccAWSEMRCluster_keepJob(t *testing.T) { func TestAccAWSEMRCluster_visibleToAllUsers(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1171,7 +1171,7 @@ func TestAccAWSEMRCluster_visibleToAllUsers(t *testing.T) { func TestAccAWSEMRCluster_s3Logging(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") bucketName := fmt.Sprintf("s3n://%s/", rName) resource.ParallelTest(t, resource.TestCase{ @@ -1201,10 +1201,44 @@ func TestAccAWSEMRCluster_s3Logging(t *testing.T) { }) } +func TestAccAWSEMRCluster_s3LogEncryption(t *testing.T) { + var cluster emr.Cluster + + resourceName := "aws_emr_cluster.test" + rName := acctest.RandomWithPrefix("tf-acc-test") + bucketName := fmt.Sprintf("s3n://%s/", rName) + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + ErrorCheck: testAccErrorCheck(t, emr.EndpointsID), + Providers: testAccProviders, + CheckDestroy: testAccCheckAWSEmrDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAWSEmrClusterConfigS3Encryption(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSEmrClusterExists(resourceName, &cluster), + resource.TestCheckResourceAttr(resourceName, "log_uri", bucketName), + testAccMatchResourceAttrRegionalARN(resourceName, "log_encryption_kms_key_id", "kms", regexp.MustCompile(`key/.+`)), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{ + "cluster_state", // Ignore RUNNING versus WAITING changes + "configurations", + "keep_job_flow_alive_when_no_steps", + }, + }, + }, + }) +} + func TestAccAWSEMRCluster_tags(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1249,7 +1283,7 @@ func TestAccAWSEMRCluster_tags(t *testing.T) { func TestAccAWSEMRCluster_root_volume_size(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1288,7 +1322,7 @@ func TestAccAWSEMRCluster_root_volume_size(t *testing.T) { func TestAccAWSEMRCluster_step_concurrency_level(t *testing.T) { var cluster emr.Cluster rName := acctest.RandomWithPrefix("tf-acc-test") - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, ErrorCheck: testAccErrorCheck(t, emr.EndpointsID), @@ -1326,7 +1360,7 @@ func TestAccAWSEMRCluster_step_concurrency_level(t *testing.T) { func TestAccAWSEMRCluster_ebs_config(t *testing.T) { var cluster emr.Cluster rName := acctest.RandomWithPrefix("tf-acc-test") - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, ErrorCheck: testAccErrorCheck(t, emr.EndpointsID), @@ -1358,7 +1392,7 @@ func TestAccAWSEMRCluster_ebs_config(t *testing.T) { func TestAccAWSEMRCluster_custom_ami_id(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1390,7 +1424,7 @@ func TestAccAWSEMRCluster_custom_ami_id(t *testing.T) { func TestAccAWSEMRCluster_InstanceFleet_basic(t *testing.T) { var cluster1, cluster2 emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" subnetResourceName := "aws_subnet.test" subnet2ResourceName := "aws_subnet.test2" rName := acctest.RandomWithPrefix("tf-acc-test") @@ -1457,7 +1491,7 @@ func TestAccAWSEMRCluster_InstanceFleet_basic(t *testing.T) { func TestAccAWSEMRCluster_InstanceFleet_master_only(t *testing.T) { var cluster emr.Cluster - resourceName := "aws_emr_cluster.tf-test-cluster" + resourceName := "aws_emr_cluster.test" rName := acctest.RandomWithPrefix("tf-acc-test") resource.ParallelTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, @@ -1697,235 +1731,252 @@ func testAccEmrDeleteManagedSecurityGroup(conn *ec2.EC2, securityGroup *ec2.Secu return err } -func testAccAWSEmrComposeConfig(mapPublicIPOnLaunch bool, config ...string) string { - return composeConfig(append(config, testAccAWSEmrClusterConfigBaseVpc(mapPublicIPOnLaunch))...) -} - -func testAccAWSEmrClusterConfigCurrentPartition() string { - return ` -data "aws_partition" "current" {} -` -} - -func testAccAWSEmrClusterConfig_bootstrap(r string) string { - return testAccAWSEmrComposeConfig(false, - testAccAWSEmrClusterConfigCurrentPartition(), - testAccAWSEmrClusterConfigIAMServiceRoleBase(r), - testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), - testAccAWSEmrClusterConfigBootstrapActionBucket(r), - fmt.Sprintf(` -resource "aws_emr_cluster" "test" { - name = "%[1]s" - release_label = "emr-5.0.0" - applications = ["Hadoop", "Hive"] - log_uri = "s3n://terraform/testlog/" - - master_instance_group { - instance_type = "c4.large" - } +// Sub-configs (used by other configs) - core_instance_group { - instance_count = 1 - instance_type = "c4.large" +func testAccAWSEmrClusterConfigBaseVpc(rName string, mapPublicIPOnLaunch bool) string { + return fmt.Sprintf(` +data "aws_availability_zones" "available" { + # Many instance types are not available in this availability zone + exclude_zone_ids = ["usw2-az4"] + state = "available" + filter { + name = "opt-in-status" + values = ["opt-in-not-required"] } +} - service_role = aws_iam_role.emr_service.arn - depends_on = [ - aws_route_table_association.test, - aws_iam_role_policy_attachment.emr_service, - aws_iam_role_policy_attachment.emr_instance_profile, - ] - - ec2_attributes { - subnet_id = aws_subnet.test.id - emr_managed_master_security_group = aws_security_group.test.id - emr_managed_slave_security_group = aws_security_group.test.id - instance_profile = aws_iam_instance_profile.emr_instance_profile.arn - } +resource "aws_vpc" "test" { + cidr_block = "10.0.0.0/16" + enable_dns_hostnames = true - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif" - args = ["instance.isMaster=true", "echo running on master node"] + tags = { + Name = %[1]q } +} - bootstrap_action { - path = "s3://${aws_s3_bucket_object.testobject.bucket}/${aws_s3_bucket_object.testobject.key}" - name = "test" +resource "aws_internet_gateway" "test" { + vpc_id = aws_vpc.test.id - args = ["1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - ] + tags = { + Name = %[1]q } } -`, r), - ) -} -func testAccAWSEmrClusterConfig_bootstrapAdd(r string) string { - return testAccAWSEmrComposeConfig(false, - testAccAWSEmrClusterConfigCurrentPartition(), - testAccAWSEmrClusterConfigIAMServiceRoleBase(r), - testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), - testAccAWSEmrClusterConfigBootstrapActionBucket(r), - fmt.Sprintf(` -resource "aws_emr_cluster" "test" { - name = "%[1]s" - release_label = "emr-5.0.0" - applications = ["Hadoop", "Hive"] - log_uri = "s3n://terraform/testlog/" +resource "aws_security_group" "test" { + vpc_id = aws_vpc.test.id - master_instance_group { - instance_type = "c4.large" + ingress { + from_port = 0 + protocol = "-1" + self = true + to_port = 0 } - core_instance_group { - instance_count = 1 - instance_type = "c4.large" + egress { + cidr_blocks = ["0.0.0.0/0"] + from_port = 0 + protocol = "-1" + to_port = 0 } - service_role = aws_iam_role.emr_service.arn - depends_on = [ - aws_route_table_association.test, - aws_iam_role_policy_attachment.emr_service, - aws_iam_role_policy_attachment.emr_instance_profile, - ] - - ec2_attributes { - subnet_id = aws_subnet.test.id - emr_managed_master_security_group = aws_security_group.test.id - emr_managed_slave_security_group = aws_security_group.test.id - instance_profile = aws_iam_instance_profile.emr_instance_profile.arn + tags = { + Name = %[1]q } - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif" - args = ["instance.isMaster=true", "echo running on master node"] + # EMR will modify ingress rules + lifecycle { + ignore_changes = [ingress] } +} - bootstrap_action { - path = "s3://${aws_s3_bucket_object.testobject.bucket}/${aws_s3_bucket_object.testobject.key}" - name = "test" +resource "aws_subnet" "test" { + availability_zone = data.aws_availability_zones.available.names[0] + cidr_block = cidrsubnet(aws_vpc.test.cidr_block, 8, 0) + map_public_ip_on_launch = %[2]t + vpc_id = aws_vpc.test.id - args = ["1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - ] + tags = { + Name = %[1]q } +} - bootstrap_action { - path = "s3://elasticmapreduce/bootstrap-actions/run-if" - name = "runif-2" - args = ["instance.isMaster=true", "echo also running on master node"] +resource "aws_route_table" "test" { + vpc_id = aws_vpc.test.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.test.id } } -`, r), - ) -} -func testAccAWSEmrClusterConfig_bootstrapReorder(r string) string { - return testAccAWSEmrComposeConfig(false, - testAccAWSEmrClusterConfigCurrentPartition(), - testAccAWSEmrClusterConfigIAMServiceRoleBase(r), - testAccAWSEmrClusterConfigIAMInstanceProfileBase(r), - testAccAWSEmrClusterConfigBootstrapActionBucket(r), - fmt.Sprintf(` -resource "aws_emr_cluster" "test" { - name = "%[1]s" - release_label = "emr-5.0.0" - applications = ["Hadoop", "Hive"] - log_uri = "s3n://terraform/testlog/" +resource "aws_route_table_association" "test" { + route_table_id = aws_route_table.test.id + subnet_id = aws_subnet.test.id +} +`, rName, mapPublicIPOnLaunch) +} - master_instance_group { - instance_type = "c4.large" - } +func testAccAWSEmrClusterConfigIAMInstanceProfileBase(rName string) string { + return fmt.Sprintf(` +resource "aws_iam_instance_profile" "emr_instance_profile" { + name = "%[1]s_profile" + role = aws_iam_role.emr_instance_profile.name +} - core_instance_group { - instance_count = 1 - instance_type = "c4.large" - } +resource "aws_iam_role" "emr_instance_profile" { + name = "%[1]s_profile_role" - service_role = aws_iam_role.emr_service.arn - depends_on = [ - aws_route_table_association.test, - aws_iam_role_policy_attachment.emr_service, - aws_iam_role_policy_attachment.emr_instance_profile, + assume_role_policy = < **NOTE on configurations_json:** If the `Configurations` value is empty then you should skip -the `Configurations` field instead of providing empty list as value `"Configurations": []`. - -```terraform -resource "aws_emr_cluster" "cluster" { - # ... other configuration ... - - configurations_json = < **NOTE on EMR-Managed security groups:** These security groups will have any -missing inbound or outbound access rules added and maintained by AWS, to ensure -proper communication between instances in a cluster. The EMR service will -maintain these rules for groups provided in `emr_managed_master_security_group` -and `emr_managed_slave_security_group`; attempts to remove the required rules -may succeed, only for the EMR service to re-add them in a matter of minutes. -This may cause Terraform to fail to destroy an environment that contains an EMR -cluster, because the EMR service does not revoke rules added on deletion, -leaving a cyclic dependency between the security groups that prevents their -deletion. To avoid this, use the `revoke_rules_on_delete` optional attribute for -any Security Group used in `emr_managed_master_security_group` and -`emr_managed_slave_security_group`. See [Amazon EMR-Managed Security -Groups](http://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html) -for more information about the EMR-managed security group rules. - -## kerberos_attributes - -Attributes for Kerberos configuration - -* `ad_domain_join_password` - (Optional) The Active Directory password for `ad_domain_join_user`. Terraform cannot perform drift detection of this configuration. -* `ad_domain_join_user` - (Optional) Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. Terraform cannot perform drift detection of this configuration. -* `cross_realm_trust_principal_password` - (Optional) Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. Terraform cannot perform drift detection of this configuration. -* `kdc_admin_password` - (Required) The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. Terraform cannot perform drift detection of this configuration. -* `realm` - (Required) The name of the Kerberos realm to which all nodes in a cluster belong. For example, `EC2.INTERNAL` - -## master_instance_group Configuration Block - -Supported nested arguments for the `master_instance_group` configuration block: - -* `instance_type` - (Required) EC2 instance type for all instances in the instance group. -* `bid_price` - (Optional) Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances. -* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. -* `instance_count` - (Optional) Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource's `core_instance_group` to be configured. Public (Internet accessible) instances must be created in VPC subnets that have [map public IP on launch](/docs/providers/aws/r/subnet.html#map_public_ip_on_launch) enabled. Termination protection is automatically enabled when launched with multiple master nodes and Terraform must have the `termination_protection = false` configuration applied before destroying this resource. -* `name` - (Optional) Friendly name given to the instance group. - -## ebs_config - -Attributes for the EBS volumes attached to each EC2 instance in the `master_instance_group` and `core_instance_group` configuration blocks: +### Bootable Cluster -* `size` - (Required) The volume size, in gibibytes (GiB). -* `type` - (Required) The volume type. Valid options are `gp2`, `io1`, `standard` and `st1`. See [EBS Volume Types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html). -* `iops` - (Optional) The number of I/O operations per second (IOPS) that the volume supports -* `volumes_per_instance` - (Optional) The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1) - -## bootstrap_action - -* `name` - (Required) Name of the bootstrap action -* `path` - (Required) Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system -* `args` - (Optional) List of command line arguments to pass to the bootstrap action script - -## step - -This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). - -Attributes for step configuration - -* `action_on_failure` - (Required) The action to take if the step fails. Valid values: `TERMINATE_JOB_FLOW`, `TERMINATE_CLUSTER`, `CANCEL_AND_WAIT`, and `CONTINUE` -* `hadoop_jar_step` - (Required) The JAR file used for the step. Defined below. -* `name` - (Required) The name of the step. - -### hadoop_jar_step - -This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). - -Attributes for Hadoop job step configuration - -* `args` - (Optional) List of command line arguments passed to the JAR file's main function when executed. -* `jar` - (Required) Path to a JAR file run during the step. -* `main_class` - (Optional) Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file. -* `properties` - (Optional) Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function. - -## master_instance_fleet Configuration Block - -Supported nested arguments for the `master_instance_fleet` configuration block: - -* `instance_type_configs` - (Optional) Configuration block for instance fleet -* `launch_specifications` - (Optional) Configuration block for launch specification -* `target_on_demand_capacity` - (Optional) The target capacity of On-Demand units for the instance fleet, which determines how many On-Demand instances to provision. -* `target_spot_capacity` - (Optional) The target capacity of Spot units for the instance fleet, which determines how many Spot instances to provision. -* `name` - (Optional) Friendly name given to the instance fleet. - -## core_instance_fleet Configuration Block - -Supported nested arguments for the `core_instance_fleet` configuration block: - -* `instance_type_configs` - (Optional) Configuration block for instance fleet -* `launch_specifications` - (Optional) Configuration block for launch specification -* `target_on_demand_capacity` - (Optional) The target capacity of On-Demand units for the instance fleet, which determines how many On-Demand instances to provision. -* `target_spot_capacity` - (Optional) The target capacity of Spot units for the instance fleet, which determines how many Spot instances to provision. -* `name` - (Optional) Friendly name given to the instance fleet. - -## instance_type_configs Configuration Block - -* `bid_price` - (Optional) The bid price for each EC2 Spot instance type as defined by `instance_type`. Expressed in USD. If neither `bid_price` nor `bid_price_as_percentage_of_on_demand_price` is provided, `bid_price_as_percentage_of_on_demand_price` defaults to 100%. -* `bid_price_as_percentage_of_on_demand_price` - (Optional) The bid price, as a percentage of On-Demand price, for each EC2 Spot instance as defined by `instance_type`. Expressed as a number (for example, 20 specifies 20%). If neither `bid_price` nor `bid_price_as_percentage_of_on_demand_price` is provided, `bid_price_as_percentage_of_on_demand_price` defaults to 100%. -* `configurations` - (Optional) A configuration classification that applies when provisioning cluster instances, which can include configurations for applications and software that run on the cluster. List of `configuration` blocks. -* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. -* `instance_type` - (Required) An EC2 instance type, such as m4.xlarge. -* `weighted_capacity` - (Optional) The number of units that a provisioned instance of this type provides toward fulfilling the target capacities defined in `aws_emr_instance_fleet`. - -## configurations Configuration Block - -A configuration classification that applies when provisioning cluster instances, which can include configurations for applications and software that run on the cluster. See [Configuring Applications](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html). - -* `classification` - (Optional) The classification within a configuration. -* `properties` - (Optional) A map of properties specified within a configuration classification - -## launch_specifications Configuration Block - -* `on_demand_specification` - (Optional) Configuration block for on demand instances launch specifications -* `spot_specification` - (Optional) Configuration block for spot instances launch specifications - -## on_demand_specification Configuration Block - -The launch specification for On-Demand instances in the instance fleet, which determines the allocation strategy. -The instance fleet configuration is available only in Amazon EMR versions 4.8.0 and later, excluding 5.0.x versions. On-Demand instances allocation strategy is available in Amazon EMR version 5.12.1 and later. - -* `allocation_strategy` - (Required) Specifies the strategy to use in launching On-Demand instance fleets. Currently, the only option is `lowest-price` (the default), which launches the lowest price first. - -## spot_specification Configuration Block - -The launch specification for Spot instances in the fleet, which determines the defined duration, provisioning timeout behavior, and allocation strategy. - -* `allocation_strategy` - (Required) Specifies the strategy to use in launching Spot instance fleets. Currently, the only option is `capacity-optimized` (the default), which launches instances from Spot instance pools with optimal capacity for the number of instances that are launching. -* `block_duration_minutes` - (Optional) The defined duration for Spot instances (also known as Spot blocks) in minutes. When specified, the Spot instance does not terminate before the defined duration expires, and defined duration pricing for Spot instances applies. Valid values are 60, 120, 180, 240, 300, or 360. The duration period starts as soon as a Spot instance receives its instance ID. At the end of the duration, Amazon EC2 marks the Spot instance for termination and provides a Spot instance termination notice, which gives the instance a two-minute warning before it terminates. -* `timeout_action` - (Required) The action to take when TargetSpotCapacity has not been fulfilled when the TimeoutDurationMinutes has expired; that is, when all Spot instances could not be provisioned within the Spot provisioning timeout. Valid values are `TERMINATE_CLUSTER` and `SWITCH_TO_ON_DEMAND`. SWITCH_TO_ON_DEMAND specifies that if no Spot instances are available, On-Demand Instances should be provisioned to fulfill any remaining Spot capacity. -* `timeout_duration_minutes` - (Required) The spot provisioning timeout period in minutes. If Spot instances are not provisioned within this time period, the TimeOutAction is taken. Minimum value is 5 and maximum value is 1440. The timeout applies only during initial provisioning, when the cluster is first created. - -## Attributes Reference - -In addition to all arguments above, the following attributes are exported: - -* `arn`- The ARN of the cluster. -* `id` - The ID of the EMR Cluster -* `name` - The name of the cluster. -* `release_label` - The release label for the Amazon EMR release. -* `master_instance_group.0.id` - Master node type Instance Group ID, if using Instance Group for this node type. -* `master_public_dns` - The public DNS name of the master EC2 instance. -* `core_instance_group.0.id` - Core node type Instance Group ID, if using Instance Group for this node type. -* `log_uri` - The path to the Amazon S3 location where logs for this cluster are stored. -* `applications` - The applications installed on this cluster. -* `ec2_attributes` - Provides information about the EC2 instances in a cluster grouped by category: key name, subnet ID, IAM instance profile, and so on. -* `bootstrap_action` - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. -* `configurations` - The list of Configurations supplied to the EMR cluster. -* `service_role` - The IAM role that will be assumed by the Amazon EMR service to access AWS resources on your behalf. -* `visible_to_all_users` - Indicates whether the job flow is visible to all IAM users of the AWS account associated with the job flow. -* `tags_all` - A map of tags assigned to the resource, including those inherited from the provider [`default_tags` configuration block](https://www.terraform.io/docs/providers/aws/index.html#default_tags-configuration-block). - -## Example bootable config - -**NOTE:** This configuration demonstrates a minimal configuration needed to -boot an example EMR Cluster. It is not meant to display best practices. Please -use at your own risk. +**NOTE:** This configuration demonstrates a minimal configuration needed to boot an example EMR Cluster. It is not meant to display best practices. As with all examples, use at your own risk. ```terraform resource "aws_emr_cluster" "cluster" { @@ -851,6 +607,225 @@ EOF } ``` +## Argument Reference + +The following arguments are required: + +* `name` - (Required) Name of the job flow. +* `release_label` - (Required) Release label for the Amazon EMR release. +* `service_role` - (Required) IAM role that will be assumed by the Amazon EMR service to access AWS resources. + +The following arguments are optional: + +* `additional_info` - (Optional) JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore Terraform cannot detect drift from the actual EMR cluster if its value is changed outside Terraform. +* `applications` - (Optional) List of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive. +* `autoscaling_role` - (Optional) IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group. +* `bootstrap_action` - (Optional) Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. See below. +* `configurations` - (Optional) List of configurations supplied for the EMR cluster you are creating. Supply a configuration object for applications to override their default configuration. See [AWS Documentation](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html) for more information. +* `configurations_json` - (Optional) JSON string for supplying list of configurations for the EMR cluster. + +~> **NOTE on `configurations_json`:** If the `Configurations` value is empty then you should skip the `Configurations` field instead of providing an empty list as a value, `"Configurations": []`. + +```terraform +resource "aws_emr_cluster" "cluster" { + # ... other configuration ... + + configurations_json = < **NOTE on EMR-Managed security groups:** These security groups will have any missing inbound or outbound access rules added and maintained by AWS, to ensure proper communication between instances in a cluster. The EMR service will maintain these rules for groups provided in `emr_managed_master_security_group` and `emr_managed_slave_security_group`; attempts to remove the required rules may succeed, only for the EMR service to re-add them in a matter of minutes. This may cause Terraform to fail to destroy an environment that contains an EMR cluster, because the EMR service does not revoke rules added on deletion, leaving a cyclic dependency between the security groups that prevents their deletion. To avoid this, use the `revoke_rules_on_delete` optional attribute for any Security Group used in `emr_managed_master_security_group` and `emr_managed_slave_security_group`. See [Amazon EMR-Managed Security Groups](http://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html) for more information about the EMR-managed security group rules. + +### kerberos_attributes + +* `ad_domain_join_password` - (Optional) Active Directory password for `ad_domain_join_user`. Terraform cannot perform drift detection of this configuration. +* `ad_domain_join_user` - (Optional) Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. Terraform cannot perform drift detection of this configuration. +* `cross_realm_trust_principal_password` - (Optional) Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. Terraform cannot perform drift detection of this configuration. +* `kdc_admin_password` - (Required) Password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. Terraform cannot perform drift detection of this configuration. +* `realm` - (Required) Name of the Kerberos realm to which all nodes in a cluster belong. For example, `EC2.INTERNAL` + +### master_instance_fleet + +* `instance_type_configs` - (Optional) Configuration block for instance fleet. +* `launch_specifications` - (Optional) Configuration block for launch specification. +* `name` - (Optional) Friendly name given to the instance fleet. +* `target_on_demand_capacity` - (Optional) Target capacity of On-Demand units for the instance fleet, which determines how many On-Demand instances to provision. +* `target_spot_capacity` - (Optional) Target capacity of Spot units for the instance fleet, which determines how many Spot instances to provision. + +#### instance_type_configs + +See `instance_type_configs` above, under `core_instance_fleet`. + +#### launch_specifications + +See `launch_specifications` above, under `core_instance_fleet`. + +### master_instance_group + +Supported nested arguments for the `master_instance_group` configuration block: + +* `bid_price` - (Optional) Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances. +* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. +* `instance_count` - (Optional) Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource's `core_instance_group` to be configured. Public (Internet accessible) instances must be created in VPC subnets that have [map public IP on launch](/docs/providers/aws/r/subnet.html#map_public_ip_on_launch) enabled. Termination protection is automatically enabled when launched with multiple master nodes and Terraform must have the `termination_protection = false` configuration applied before destroying this resource. +* `instance_type` - (Required) EC2 instance type for all instances in the instance group. +* `name` - (Optional) Friendly name given to the instance group. + +#### ebs_config + +See `ebs_config` under `core_instance_group` above. + +### step + +This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). + +* `action_on_failure` - (Required) Action to take if the step fails. Valid values: `TERMINATE_JOB_FLOW`, `TERMINATE_CLUSTER`, `CANCEL_AND_WAIT`, and `CONTINUE` +* `hadoop_jar_step` - (Required) JAR file used for the step. See below. +* `name` - (Required) Name of the step. + +#### hadoop_jar_step + +This argument is processed in [attribute-as-blocks mode](https://www.terraform.io/docs/configuration/attr-as-blocks.html). + +* `args` - (Optional) List of command line arguments passed to the JAR file's main function when executed. +* `jar` - (Required) Path to a JAR file run during the step. +* `main_class` - (Optional) Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file. +* `properties` - (Optional) Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function. + +## Attributes Reference + +In addition to all arguments above, the following attributes are exported: + +* `applications` - Applications installed on this cluster. +* `arn`- ARN of the cluster. +* `bootstrap_action` - List of bootstrap actions that will be run before Hadoop is started on the cluster nodes. +* `configurations` - List of Configurations supplied to the EMR cluster. +* `core_instance_group.0.id` - Core node type Instance Group ID, if using Instance Group for this node type. +* `ec2_attributes` - Provides information about the EC2 instances in a cluster grouped by category: key name, subnet ID, IAM instance profile, and so on. +* `id` - ID of the cluster. +* `log_uri` - Path to the Amazon S3 location where logs for this cluster are stored. +* `master_instance_group.0.id` - Master node type Instance Group ID, if using Instance Group for this node type. +* `master_public_dns` - Public DNS name of the master EC2 instance. +* `name` - Name of the cluster. +* `release_label` - Release label for the Amazon EMR release. +* `service_role` - IAM role that will be assumed by the Amazon EMR service to access AWS resources on your behalf. +* `tags_all` - Map of tags assigned to the resource, including those inherited from the provider [`default_tags` configuration block](https://www.terraform.io/docs/providers/aws/index.html#default_tags-configuration-block). +* `visible_to_all_users` - Indicates whether the job flow is visible to all IAM users of the AWS account associated with the job flow. + ## Import EMR clusters can be imported using the `id`, e.g.