From 4889f0983887b5b8696056c5f4bb746cb489d1a0 Mon Sep 17 00:00:00 2001
From: Vara Bonthu <vara.bonthu@gmail.com>
Date: Mon, 28 Oct 2024 22:28:05 -0700
Subject: [PATCH] feat: Spark benchmarks update to 3.5.3 (#683)

---
 ai-ml/emr-spark-rapids/README.md              |   2 +-
 ai-ml/nvidia-triton-server/README.md          |   4 +-
 analytics/terraform/datahub-on-eks/README.md  |   4 +-
 analytics/terraform/emr-eks-ack/README.md     |   4 +-
 analytics/terraform/emr-eks-fargate/README.md |   4 +-
 .../terraform/emr-eks-karpenter/README.md     |   2 +-
 .../terraform/spark-eks-ipv6/spark-team.tf    |   2 +-
 .../terraform/spark-k8s-operator/README.md    |  12 +-
 .../terraform/spark-k8s-operator/addons.tf    | 137 +++++++++++++---
 analytics/terraform/spark-k8s-operator/eks.tf |  79 ++++++++++
 .../examples/benchmark/README.md              |  37 +++++
 .../benchmark/tpcds-benchmark-1t-c7gd.yaml    | 149 ++++++++++++++++++
 .../benchmark/tpcds-benchmark-1t-r6g.yaml     | 124 +++++++++++++++
 .../benchmark/tpcds-benchmark-1t-r8g.yaml     | 125 +++++++++++++++
 .../benchmark/tpcds-benchmark-3t.yaml         | 106 -------------
 .../tpcds-benchmark-data-generation-1t.yaml   | 143 +++++++++++++++++
 .../tpcds-benchmark-data-generation-3t.yaml   | 101 ------------
 ...nvme-storage-yunikorn-gang-scheduling.yaml |  38 +----
 .../examples/docker/Dockerfile-benchmark      |  81 ++++++++++
 ...nvme-storage-yunikorn-gang-scheduling.yaml |  38 +----
 .../helm-values/spark-operator-values.yaml    |  65 --------
 .../helm-values/yunikorn-values.yaml          | 104 +-----------
 .../vllm-llama3.1-405b-trn1/docker/Dockerfile |   2 +-
 .../vllm-llama3.1-405b-trn1/docker/run.sh     |   4 +-
 .../llama3-405b-vllm-lws-deployment.yaml      |  36 +++--
 schedulers/terraform/argo-workflow/README.md  |   8 +-
 schedulers/terraform/aws-batch-eks/README.md  |   6 +-
 .../terraform/self-managed-airflow/README.md  |  10 +-
 streaming/flink/README.md                     |   4 +-
 streaming/kafka/README.md                     |   8 +-
 streaming/nifi/README.md                      |   4 +-
 streaming/spark-streaming/terraform/README.md |   8 +-
 32 files changed, 931 insertions(+), 520 deletions(-)
 create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/README.md
 create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml
 create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml
 create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml
 delete mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml
 create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml
 delete mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml
 create mode 100644 analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark
 delete mode 100644 analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml
diff --git a/ai-ml/emr-spark-rapids/README.md b/ai-ml/emr-spark-rapids/README.md
index e693a626d..bb91a9799 100644
--- a/ai-ml/emr-spark-rapids/README.md
+++ b/ai-ml/emr-spark-rapids/README.md
@@ -61,7 +61,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="input_enable_nvidia_gpu_operator"></a> [enable\_nvidia\_gpu\_operator](#input\_enable\_nvidia\_gpu\_operator) | Enable NVIDIA GPU Operator | `bool` | `false` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-spark-rapids"` | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no |
 
diff --git a/ai-ml/nvidia-triton-server/README.md b/ai-ml/nvidia-triton-server/README.md
index b86c32977..66b726b02 100644
--- a/ai-ml/nvidia-triton-server/README.md
+++ b/ai-ml/nvidia-triton-server/README.md
@@ -79,9 +79,9 @@
 | <a name="input_huggingface_token"></a> [huggingface\_token](#input\_huggingface\_token) | Hugging Face Secret Token | `string` | `"DUMMY_TOKEN_REPLACE_ME"` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"nvidia-triton-server"` | no |
 | <a name="input_ngc_api_key"></a> [ngc\_api\_key](#input\_ngc\_api\_key) | NGC API Key | `string` | `"DUMMY_NGC_API_KEY_REPLACE_ME"` | no |
-| <a name="input_nim_models"></a> [nim\_models](#input\_nim\_models) | NVIDIA NIM Models | <pre>list(object({<br/>    name    = string<br/>    id      = string<br/>    enable  = bool<br/>    num_gpu = string<br/>  }))</pre> | <pre>[<br/>  {<br/>    "enable": false,<br/>    "id": "nvcr.io/nim/meta/llama-3.1-8b-instruct",<br/>    "name": "llama-3-1-8b-instruct",<br/>    "num_gpu": "4"<br/>  },<br/>  {<br/>    "enable": true,<br/>    "id": "nvcr.io/nim/meta/llama3-8b-instruct",<br/>    "name": "llama3-8b-instruct",<br/>    "num_gpu": "1"<br/>  }<br/>]</pre> | no |
+| <a name="input_nim_models"></a> [nim\_models](#input\_nim\_models) | NVIDIA NIM Models | <pre>list(object({<br>    name    = string<br>    id      = string<br>    enable  = bool<br>    num_gpu = string<br>  }))</pre> | <pre>[<br>  {<br>    "enable": false,<br>    "id": "nvcr.io/nim/meta/llama-3.1-8b-instruct",<br>    "name": "llama-3-1-8b-instruct",<br>    "num_gpu": "4"<br>  },<br>  {<br>    "enable": true,<br>    "id": "nvcr.io/nim/meta/llama3-8b-instruct",<br>    "name": "llama3-8b-instruct",<br>    "num_gpu": "1"<br>  }<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no |
 
 ## Outputs
diff --git a/analytics/terraform/datahub-on-eks/README.md b/analytics/terraform/datahub-on-eks/README.md
index f749fbb28..fd1d95814 100644
--- a/analytics/terraform/datahub-on-eks/README.md
+++ b/analytics/terraform/datahub-on-eks/README.md
@@ -46,8 +46,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"datahub-on-eks"` | no |
 | <a name="input_private_subnet_ids"></a> [private\_subnet\_ids](#input\_private\_subnet\_ids) | Ids for existing private subnets - needed when create\_vpc set to false | `list(string)` | `[]` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.0.0/17",<br/>  "10.1.128.0/18"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.255.128/26",<br/>  "10.1.255.192/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/17",<br>  "10.1.128.0/18"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.255.128/26",<br>  "10.1.255.192/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR - must change to match the cidr of the existing VPC if create\_vpc set to false | `string` | `"10.1.0.0/16"` | no |
diff --git a/analytics/terraform/emr-eks-ack/README.md b/analytics/terraform/emr-eks-ack/README.md
index e5308048a..05b4872ed 100644
--- a/analytics/terraform/emr-eks-ack/README.md
+++ b/analytics/terraform/emr-eks-ack/README.md
@@ -54,8 +54,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 |------|-------------|------|---------|:--------:|
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.27"` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-ack"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.0.0/17",<br/>  "10.1.128.0/18"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.255.128/26",<br/>  "10.1.255.192/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/17",<br>  "10.1.128.0/18"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.255.128/26",<br>  "10.1.255.192/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no |
diff --git a/analytics/terraform/emr-eks-fargate/README.md b/analytics/terraform/emr-eks-fargate/README.md
index 2d6a8aa25..dae1b4eae 100644
--- a/analytics/terraform/emr-eks-fargate/README.md
+++ b/analytics/terraform/emr-eks-fargate/README.md
@@ -49,8 +49,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 |------|-------------|------|---------|:--------:|
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.27"` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-fargate"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.0.0/17",<br/>  "10.1.128.0/18"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.255.128/26",<br/>  "10.1.255.192/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/17",<br>  "10.1.128.0/18"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.255.128/26",<br>  "10.1.255.192/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no |
diff --git a/analytics/terraform/emr-eks-karpenter/README.md b/analytics/terraform/emr-eks-karpenter/README.md
index ded52b607..b8e7166fd 100644
--- a/analytics/terraform/emr-eks-karpenter/README.md
+++ b/analytics/terraform/emr-eks-karpenter/README.md
@@ -89,7 +89,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="input_enable_yunikorn"></a> [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `false` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-karpenter"` | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no |
 
diff --git a/analytics/terraform/spark-eks-ipv6/spark-team.tf b/analytics/terraform/spark-eks-ipv6/spark-team.tf
index beeddc9ba..b4c0bab3c 100644
--- a/analytics/terraform/spark-eks-ipv6/spark-team.tf
+++ b/analytics/terraform/spark-eks-ipv6/spark-team.tf
@@ -66,7 +66,7 @@ module "spark_team_a_irsa" {
 #---------------------------------------------------------------
 resource "aws_iam_policy" "spark" {
   description = "IAM role policy for Spark Job execution"
-  name_prefix        = "${local.name}-spark-irsa"
+  name_prefix = "${local.name}-spark-irsa"
   policy      = data.aws_iam_policy_document.spark_operator.json
 }
 
diff --git a/analytics/terraform/spark-k8s-operator/README.md b/analytics/terraform/spark-k8s-operator/README.md
index 305425918..171a59136 100644
--- a/analytics/terraform/spark-k8s-operator/README.md
+++ b/analytics/terraform/spark-k8s-operator/README.md
@@ -30,7 +30,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="module_ebs_csi_driver_irsa"></a> [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.34 |
 | <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
 | <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
-| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | 1.33.0 |
+| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | 1.34 |
 | <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
 | <a name="module_spark_team_irsa"></a> [spark\_team\_irsa](#module\_spark\_team\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
 | <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
@@ -70,18 +70,18 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
-| <a name="input_aws_auth_roles"></a> [aws\_auth\_roles](#input\_aws\_auth\_roles) | additional aws auth roles | <pre>list(<br/>    object(<br/>      {<br/>        rolearn  = string<br/>        username = string<br/>        groups = list(string<br/>        )<br/>      }<br/>    )<br/>  )</pre> | `[]` | no |
+| <a name="input_aws_auth_roles"></a> [aws\_auth\_roles](#input\_aws\_auth\_roles) | additional aws auth roles | <pre>list(<br>    object(<br>      {<br>        rolearn  = string<br>        username = string<br>        groups = list(string<br>        )<br>      }<br>    )<br>  )</pre> | `[]` | no |
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.30"` | no |
-| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br/>  "100.64.0.0/17",<br/>  "100.64.128.0/17"<br/>]</pre> | no |
+| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br>  "100.64.0.0/17",<br>  "100.64.128.0/17"<br>]</pre> | no |
 | <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
 | <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no |
 | <a name="input_enable_yunikorn"></a> [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `true` | no |
 | <a name="input_kms_key_admin_roles"></a> [kms\_key\_admin\_roles](#input\_kms\_key\_admin\_roles) | list of role ARNs to add to the KMS policy | `list(string)` | `[]` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"spark-operator-doeks"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br/>  "10.1.1.0/24",<br/>  "10.1.2.0/24"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br/>  "10.1.0.0/26",<br/>  "10.1.0.64/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br>  "10.1.1.0/24",<br>  "10.1.2.0/24"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br>  "10.1.0.0/26",<br>  "10.1.0.64/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no |
 
 ## Outputs
diff --git a/analytics/terraform/spark-k8s-operator/addons.tf b/analytics/terraform/spark-k8s-operator/addons.tf
index 7782d8595..8e66c2da2 100644
--- a/analytics/terraform/spark-k8s-operator/addons.tf
+++ b/analytics/terraform/spark-k8s-operator/addons.tf
@@ -41,7 +41,7 @@ resource "kubernetes_storage_class" "ebs_csi_encrypted_gp3_storage_class" {
 #---------------------------------------------------------------
 module "eks_data_addons" {
   source  = "aws-ia/eks-data-addons/aws"
-  version = "1.33.0" # ensure to update this to the latest/desired version
+  version = "1.34" # ensure to update this to the latest/desired version
 
   oidc_provider_arn = module.eks.oidc_provider_arn
 
@@ -54,6 +54,9 @@ module "eks_data_addons" {
       name: spark-compute-optimized
       clusterName: ${module.eks.cluster_name}
       ec2NodeClass:
+        amiFamily: AL2023
+        amiSelectorTerms:
+          - alias: al2023@latest # Amazon Linux 2023
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
           tags:
@@ -81,9 +84,9 @@ module "eks_data_addons" {
           - key: "karpenter.k8s.aws/instance-family"
             operator: In
             values: ["c5d"]
-          - key: "karpenter.k8s.aws/instance-cpu"
+          - key: "karpenter.k8s.aws/instance-size"
             operator: In
-            values: ["4", "8", "16", "36"]
+            values: ["4xlarge", "9xlarge", "12xlarge", "18xlarge", "24xlarge"]
           - key: "karpenter.k8s.aws/instance-hypervisor"
             operator: In
             values: ["nitro"]
@@ -93,9 +96,8 @@ module "eks_data_addons" {
         limits:
           cpu: 1000
         disruption:
-          consolidationPolicy: WhenEmpty
-          consolidateAfter: 30s
-          expireAfter: 720h
+          consolidationPolicy: WhenEmptyOrUnderutilized
+          consolidateAfter: 1m
         weight: 100
       EOT
       ]
@@ -106,6 +108,9 @@ module "eks_data_addons" {
       name: spark-graviton-memory-optimized
       clusterName: ${module.eks.cluster_name}
       ec2NodeClass:
+        amiFamily: AL2023
+        amiSelectorTerms:
+          - alias: al2023@latest # Amazon Linux 2023
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
           tags:
@@ -144,19 +149,76 @@ module "eks_data_addons" {
         limits:
           cpu: 1000
         disruption:
-          consolidationPolicy: WhenEmpty
-          consolidateAfter: 30s
-          expireAfter: 720h
+          consolidationPolicy: WhenEmptyOrUnderutilized
+          consolidateAfter: 1m
         weight: 50
       EOT
       ]
     }
+    spark-graviton-benchmark = {
+      values = [
+        <<-EOT
+      name: spark-graviton-benchmark
+      clusterName: ${module.eks.cluster_name}
+      ec2NodeClass:
+        amiFamily: AL2023
+        amiSelectorTerms:
+          - alias: al2023@latest # Amazon Linux 2023
+        karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
+        subnetSelectorTerms:
+          tags:
+            Name: "${module.eks.cluster_name}-private*"
+        securityGroupSelectorTerms:
+          tags:
+            Name: ${module.eks.cluster_name}-node
+        instanceStorePolicy: RAID0
+        blockDeviceMappings:
+          - deviceName: /dev/xvda
+            ebs:
+              volumeSize: 300Gi
+              volumeType: gp3
+              encrypted: true
+              deleteOnTermination: true
+      nodePool:
+        labels:
+          - NodeGroupType: SparkGravitonBenchmark
+        requirements:
+          - key: "karpenter.sh/capacity-type"
+            operator: In
+            values: ["on-demand"]
+          - key: "kubernetes.io/arch"
+            operator: In
+            values: ["arm64"]
+          - key: "karpenter.k8s.aws/instance-category"
+            operator: In
+            values: ["r"]
+          - key: "karpenter.k8s.aws/instance-family"
+            operator: In
+            values: ["r6g", "r6gd", "r7g", "r7gd", "r8g"]
+          - key: "karpenter.k8s.aws/instance-size"
+            operator: In
+            values: ["8xlarge", "12xlarge", "16xlarge"]
+          - key: "karpenter.k8s.aws/instance-generation"
+            operator: Gt
+            values: ["2"]
+        limits:
+          cpu: 2000
+        disruption:
+          consolidationPolicy: WhenEmptyOrUnderutilized
+          consolidateAfter: 1m
+        weight: 100
+      EOT
+      ]
+    }
     spark-memory-optimized = {
       values = [
         <<-EOT
       name: spark-memory-optimized
       clusterName: ${module.eks.cluster_name}
       ec2NodeClass:
+        amiFamily: AL2023
+        amiSelectorTerms:
+          - alias: al2023@latest # Amazon Linux 2023
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
           tags:
@@ -195,9 +257,8 @@ module "eks_data_addons" {
         limits:
           cpu: 1000
         disruption:
-          consolidationPolicy: WhenEmpty
-          consolidateAfter: 30s
-          expireAfter: 720h
+          consolidationPolicy: WhenEmptyOrUnderutilized
+          consolidateAfter: 1m
         weight: 100
       EOT
       ]
@@ -208,6 +269,9 @@ module "eks_data_addons" {
       name: spark-vertical-ebs-scale
       clusterName: ${module.eks.cluster_name}
       ec2NodeClass:
+        amiFamily: AL2023
+        amiSelectorTerms:
+          - alias: al2023@latest # Amazon Linux 2023
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
           tags:
@@ -217,9 +281,9 @@ module "eks_data_addons" {
             Name: ${module.eks.cluster_name}-node
         userData: |
           MIME-Version: 1.0
-          Content-Type: multipart/mixed; boundary="BOUNDARY"
+          Content-Type: multipart/mixed; boundary="//"
 
-          --BOUNDARY
+          --//
           Content-Type: text/x-shellscript; charset="us-ascii"
 
           #!/bin/bash
@@ -303,7 +367,7 @@ module "eks_data_addons" {
             /usr/bin/chown -hR +999:+1000 /mnt/k8s-disks
           fi
 
-          --BOUNDARY--
+          --//--
 
       nodePool:
         labels:
@@ -325,9 +389,8 @@ module "eks_data_addons" {
         limits:
           cpu: 1000
         disruption:
-          consolidationPolicy: WhenEmpty
-          consolidateAfter: 30s
-          expireAfter: 720h
+          consolidationPolicy: WhenEmptyOrUnderutilized
+          consolidateAfter: 1m
         weight: 100
       EOT
       ]
@@ -336,11 +399,38 @@ module "eks_data_addons" {
 
   #---------------------------------------------------------------
   # Spark Operator Add-on
+  # Add this to enable YuniKorn as Default Scheduler
+  #    controller:
+  #      batchScheduler:
+  #        enable: true
+  #        default: "yunikorn"
   #---------------------------------------------------------------
   enable_spark_operator = true
   spark_operator_helm_config = {
-    version = "1.4.2"
-    values  = [templatefile("${path.module}/helm-values/spark-operator-values.yaml", {})]
+    version = "2.0.2"
+    values = [
+      <<-EOT
+        controller:
+          batchScheduler:
+            enable: true
+            default: "yunikorn"
+        spark:
+          # -- List of namespaces where to run spark jobs.
+          # If empty string is included, all namespaces will be allowed.
+          # Make sure the namespaces have already existed.
+          jobNamespaces:
+            - default
+            - spark-team-a
+            - spark-team-b
+            - spark-team-c
+          serviceAccount:
+            # -- Specifies whether to create a service account for the controller.
+            create: false
+          rbac:
+            # -- Specifies whether to create RBAC resources for the controller.
+            create: false
+      EOT
+    ]
   }
 
   #---------------------------------------------------------------
@@ -348,9 +438,8 @@ module "eks_data_addons" {
   #---------------------------------------------------------------
   enable_yunikorn = var.enable_yunikorn
   yunikorn_helm_config = {
-    values = [templatefile("${path.module}/helm-values/yunikorn-values.yaml", {
-      image_version = "1.2.0"
-    })]
+    version = "1.6.0"
+    values  = [templatefile("${path.module}/helm-values/yunikorn-values.yaml", {})]
   }
 
   #---------------------------------------------------------------
@@ -455,7 +544,7 @@ module "eks_blueprints_addons" {
     }
   }
   karpenter = {
-    chart_version       = "v0.34.0"
+    chart_version       = "1.0.6"
     repository_username = data.aws_ecrpublic_authorization_token.token.user_name
     repository_password = data.aws_ecrpublic_authorization_token.token.password
   }
diff --git a/analytics/terraform/spark-k8s-operator/eks.tf b/analytics/terraform/spark-k8s-operator/eks.tf
index 6f4ca3098..65fd31252 100644
--- a/analytics/terraform/spark-k8s-operator/eks.tf
+++ b/analytics/terraform/spark-k8s-operator/eks.tf
@@ -207,5 +207,84 @@ module "eks" {
         NodeGroupType = "spark"
       }
     }
+
+    # The following Node groups are a placeholder to create Node groups for running Spark TPC-DS benchmarks
+    spark_graviton_r8g = {
+      name        = "spark-graviton-r8g"
+      description = "Spark managed node group for Graviton Benchmarks"
+      # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
+      subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
+        substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
+      ]
+
+      ami_type = "AL2023_ARM_64_STANDARD"
+
+      min_size     = 0 # Change min and desired to 6 for running benchmarks
+      max_size     = 8
+      desired_size = 0
+      # This storage is used as a shuffle for non NVMe SSD instances. e.g., r8g instances
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size           = 300
+            volume_type           = "gp3"
+            iops                  = 3000
+            encrypted             = true
+            delete_on_termination = true
+          }
+        }
+      }
+
+      instance_types = ["r8g.12xlarge"] # Change Instance type to run the benchmark with various instance types
+
+      labels = {
+        NodeGroupType = "spark-graviton-benchmark-mng-r8g"
+      }
+
+      tags = {
+        Name          = "spark-graviton-benchmark-mng-r8g"
+        NodeGroupType = "spark-graviton-benchmark-mng-r8g"
+      }
+    }
+
+    spark_graviton_r6g = {
+      name        = "spark-graviton-r6g"
+      description = "Spark managed node group for Graviton Benchmarks"
+      # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
+      subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
+        substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
+      ]
+
+      ami_type = "AL2023_ARM_64_STANDARD"
+
+      min_size     = 0
+      max_size     = 8
+      desired_size = 0
+      # This storage is used as a shuffle for non NVMe SSD instances. e.g., r8g instances
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size           = 300
+            volume_type           = "gp3"
+            iops                  = 3000
+            encrypted             = true
+            delete_on_termination = true
+          }
+        }
+      }
+
+      instance_types = ["r6g.12xlarge"] # Change Instance type to run the benchmark with various instance types
+
+      labels = {
+        NodeGroupType = "spark-graviton-benchmark-mng-r6g"
+      }
+
+      tags = {
+        Name          = "spark-graviton-benchmark-mng-r6g"
+        NodeGroupType = "spark-graviton-benchmark-mng-r6g"
+      }
+    }
   }
 }
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md b/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md
new file mode 100644
index 000000000..b1f956e2d
--- /dev/null
+++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md
@@ -0,0 +1,37 @@
+# Spark Benchmarking on Graviton Instances
+
+This document provides a step-by-step guide to execute Spark benchmarks on Graviton instances, specifically for comparing Graviton3 and Graviton4.
+
+ - r6g Benchmark Job: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml
+ - r8g Benchmark Job: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml
+
+## Steps to Execute the Job
+
+### Step 1: Deploy the Spark Operator Blueprint
+Deploy the Spark Operator Blueprint with YuniKorn scheduler. Follow the instructions provided in the [Data on EKS - Spark Operator with YuniKorn documentation](https://awslabs.github.io/data-on-eks/docs/blueprints/data-analytics/spark-operator-yunikorn).
+
+### Step 2: Create an S3 Bucket for TPC-DS Data
+Set up a dedicated S3 bucket to store the TPC-DS data output.
+
+### Step 3: Configure the S3 Bucket in the YAML File
+Replace `<S3_BUCKET>` with the name of your S3 bucket in the provided YAML configuration file.
+
+ - **r6g Benchmark Job**: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml
+ - **r8g Benchmark Job**: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml
+
+### Step 4: Ensure an EKS Managed Node Group with `r6g` Instances
+Make sure an EKS managed node group with `r6g` instances is available. Check the `eks.tf` file under [EKS Terraform Config](https://github.com/awslabs/data-on-eks/blob/main/analytics/terraform/spark-k8s-operator/eks.tf) for configuration details.
+
+### Step 5: Apply the Configuration
+Run the following command to apply the YAML configuration file:
+
+```bash
+kubectl apply -f <filename>
+```
+
+### Step 6: Verify Benchmark Results in S3
+After the job completes, navigate to the output path in your S3 bucket as specified in the YAML config file. You should see:
+
+One JSON file with the benchmark results
+One CSV file with the benchmark results
+The output files contain detailed benchmarking data for analysis.
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml
new file mode 100644
index 000000000..434b19931
--- /dev/null
+++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml
@@ -0,0 +1,149 @@
+# > **Note:** Please complete the following prerequisites before executing these jobs:
+
+# 1. Create an S3 bucket to store the generated data.
+# 2. Set up a managed node group with `c7gd` instances in the `eks.tf` file, configuring it with 6 instances. Use the existing node group as a reference.
+# 3. Confirm that the `"spark-team-a"` namespace is present.
+# 4. Replace `<S3_BUCKET>` with your actual S3 bucket name in the configuration.
+# 5. Run `kubectl apply -f <filename>` to deploy.
+
+---
+apiVersion: "sparkoperator.k8s.io/v1beta2"
+kind: SparkApplication
+metadata:
+  name: tpcds-benchmark-1tb-c7gd
+  namespace: spark-team-a
+spec:
+  batchScheduler: yunikorn
+  batchSchedulerOptions:
+    queue: root.default
+  type: Scala
+  mode: cluster
+  image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2
+  imagePullPolicy: IfNotPresent
+  sparkVersion: 3.5.3
+  mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL
+  mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar
+  arguments:
+    # TPC-DS data location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1TB"
+    # results location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1T-RESULT"
+    # Path to kit in the docker image
+    - "/opt/tpcds-kit/tools"
+    # Data Format
+    - "parquet"
+    # Scale factor (in GB)
+    - "1000" # changed from 3000 to 100gb for demo
+    # Number of iterations
+    - "1"
+    # Optimize queries with hive tables
+    - "false"
+    # Filter queries, will run all if empty - "q98-v2.4,q99-v2.4,ss_max-v2.4,q95-v2.4"
+    - ""
+    # Logging set to WARN
+    - "true"
+  sparkConf:
+    "spark.network.timeout": "2000s"
+    "spark.executor.heartbeatInterval": "300s"
+    # AQE
+    "spark.sql.adaptive.enabled": "true"
+    "spark.sql.adaptive.localShuffleReader.enabled": "true"
+    "spark.sql.adaptive.coalescePartitions.enabled": "true"
+    "spark.sql.adaptive.skewJoin.enabled": "true"
+    "spark.kubernetes.executor.podNamePrefix": "benchmark-exec-c7gd-"
+   # S3 Optimizations
+    # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode
+    "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"
+    "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"  # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html
+    "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
+    "spark.hadoop.fs.s3a.fast.upload": "true"
+    "spark.hadoop.fs.s3a.path.style.access": "true"
+    "spark.hadoop.fs.s3a.fast.upload.buffer": "disk"
+    "spark.hadoop.fs.s3a.buffer.dir": "/data1/s3a"
+    "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files
+    "spark.hadoop.fs.s3a.multipart.threshold": "256M"
+    "spark.hadoop.fs.s3a.threads.max": "50"
+    "spark.hadoop.fs.s3a.connection.maximum": "200"
+
+    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
+    "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70"
+    # "spark.hadoop.fs.s3a.readahead.range": "256K"
+
+    # -----------------------------------------------------
+    # This block is very critical when you get errors like
+    #     Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred
+    #     Caused by: java.net.SocketTimeoutException: timeout
+    # spark.kubernetes.local.dirs.tmpfs: "true" # More details here https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-ram-for-local-storage
+    spark.kubernetes.submission.connectionTimeout: "120000" # milliseconds
+    spark.kubernetes.submission.requestTimeout: "120000"
+    spark.kubernetes.driver.connectionTimeout: "120000"
+    spark.kubernetes.driver.requestTimeout: "120000"
+    # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size
+    # -----------------------------------------------------
+    # S3 Optimizations
+    "spark.hadoop.fs.s3a.multipart.size": "67108864"           # 64 MB part size for S3 uploads
+    "spark.hadoop.fs.s3a.threads.max": "40"                     # Limit S3 threads for optimized throughput
+    "spark.hadoop.fs.s3a.connection.maximum": "100"             # Set max connections for S3
+
+    # Data writing and shuffle tuning
+    "spark.shuffle.file.buffer": "1m"                           # Increase shuffle buffer for better disk I/O
+    "spark.reducer.maxSizeInFlight": "48m"                      # Increase reducer buffer size in-flight data
+
+    # Optional: Tuning multipart upload threshold
+    "spark.hadoop.fs.s3a.multipart.purge": "true"               # Automatically clear failed multipart uploads
+    "spark.hadoop.fs.s3a.multipart.threshold": "134217728"      # 128 MB threshold to start multi-part upload
+  driver:
+    cores: 4
+    coreLimit: "4.3"
+    memory: "5g"
+    memoryOverhead: "1g"
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    volumeMounts:
+      - name: spark-local-dir-1
+        mountPath: /data1
+    initContainers:
+      - name: volume-permission
+        image: public.ecr.aws/docker/library/busybox
+        command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1']
+        volumeMounts:
+          - name: spark-local-dir-1
+            mountPath: /data1
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: spark-graviton-benchmark-mng-c7gd # Create a node group with this label in eks.tf
+  executor:
+    cores: 4
+    coreLimit: "4.3"
+    memory: "6g"
+    memoryOverhead: "2g"
+    # 8 executors per node
+    instances: 47
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    volumeMounts:
+      - name: spark-local-dir-1
+        mountPath: /data1
+    initContainers:
+      - name: volume-permission
+        image: public.ecr.aws/docker/library/busybox
+        command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1']
+        volumeMounts:
+          - name: spark-local-dir-1
+            mountPath: /data1
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: spark-graviton-benchmark-mng-c7gd # Create a node group with this label in eks.tf
+  restartPolicy:
+    type: Never
+  volumes:
+    - name: spark-local-dir-1
+      hostPath:
+        path: "/mnt/k8s-disks/0"
+        type: DirectoryOrCreate
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml
new file mode 100644
index 000000000..de48774b0
--- /dev/null
+++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml
@@ -0,0 +1,124 @@
+# > **Note:** Before running these jobs, make sure the following prerequisites are completed:
+
+# 1. Set up an S3 bucket to store the generated data.
+# 2. Update the `eks.tf` file to modify `min_size=6` and `desired_size=6` for the managed node group `"spark_graviton_r6g"`, then run `terraform apply`.
+# 3. Confirm that the `"spark-team-a"` namespace exists.
+# 4. Replace `<S3_BUCKET>` in the configuration with your actual bucket name.
+# 5. Run `kubectl apply -f <filename>` to apply the configuration.
+
+---
+apiVersion: "sparkoperator.k8s.io/v1beta2"
+kind: SparkApplication
+metadata:
+  name: tpcds-benchmark-1tb-r6g
+  namespace: spark-team-a
+spec:
+  batchScheduler: yunikorn
+  batchSchedulerOptions:
+    queue: root.default
+  type: Scala
+  mode: cluster
+  image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2
+  imagePullPolicy: IfNotPresent
+  sparkVersion: 3.5.3
+  mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL
+  mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar
+  arguments:
+    # TPC-DS data location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1TB"
+    # results location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1T-RESULT"
+    # Path to kit in the docker image
+    - "/opt/tpcds-kit/tools"
+    # Data Format
+    - "parquet"
+    # Scale factor (in GB)
+    - "1000" # changed from 3000 to 100gb for demo
+    # Number of iterations
+    - "1"
+    # Optimize queries with hive tables
+    - "false"
+    # Filter queries, will run all if empty - "q98-v2.4,q99-v2.4,ss_max-v2.4,q95-v2.4"
+    - ""
+    # Logging set to WARN
+    - "true"
+  sparkConf:
+    "spark.network.timeout": "2000s"
+    "spark.executor.heartbeatInterval": "300s"
+    # AQE
+    "spark.sql.adaptive.enabled": "true"
+    "spark.sql.adaptive.localShuffleReader.enabled": "true"
+    "spark.sql.adaptive.coalescePartitions.enabled": "true"
+    "spark.sql.adaptive.skewJoin.enabled": "true"
+    "spark.kubernetes.executor.podNamePrefix": "benchmark-exec-r6g-"
+   # S3 Optimizations
+    # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode
+    "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"
+    "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"  # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html
+    "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
+    "spark.hadoop.fs.s3a.fast.upload": "true"
+    "spark.hadoop.fs.s3a.path.style.access": "true"
+    "spark.hadoop.fs.s3a.fast.upload.buffer": "disk"
+    "spark.hadoop.fs.s3a.buffer.dir": "/tmp/s3a"
+    "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files
+    "spark.hadoop.fs.s3a.multipart.threshold": "256M"
+    "spark.hadoop.fs.s3a.threads.max": "50"
+    "spark.hadoop.fs.s3a.connection.maximum": "200"
+
+    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
+    "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70"
+    # "spark.hadoop.fs.s3a.readahead.range": "256K"
+
+    # -----------------------------------------------------
+    # This block is very critical when you get errors like
+    #     Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred
+    #     Caused by: java.net.SocketTimeoutException: timeout
+    # spark.kubernetes.local.dirs.tmpfs: "true" # More details here https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-ram-for-local-storage
+    spark.kubernetes.submission.connectionTimeout: "120000" # milliseconds
+    spark.kubernetes.submission.requestTimeout: "120000"
+    spark.kubernetes.driver.connectionTimeout: "120000"
+    spark.kubernetes.driver.requestTimeout: "120000"
+    # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size
+    # -----------------------------------------------------
+    # S3 Optimizations
+    "spark.hadoop.fs.s3a.multipart.size": "67108864"           # 64 MB part size for S3 uploads
+    "spark.hadoop.fs.s3a.threads.max": "40"                     # Limit S3 threads for optimized throughput
+    "spark.hadoop.fs.s3a.connection.maximum": "100"             # Set max connections for S3
+
+    # Data writing and shuffle tuning
+    "spark.shuffle.file.buffer": "1m"                           # Increase shuffle buffer for better disk I/O
+    "spark.reducer.maxSizeInFlight": "48m"                      # Increase reducer buffer size in-flight data
+
+    # Optional: Tuning multipart upload threshold
+    "spark.hadoop.fs.s3a.multipart.purge": "true"               # Automatically clear failed multipart uploads
+    "spark.hadoop.fs.s3a.multipart.threshold": "134217728"      # 128 MB threshold to start multi-part upload
+  driver:
+    cores: 4
+    coreLimit: "4.3"
+    memory: "5g"
+    memoryOverhead: "1g"
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: spark-graviton-benchmark-mng-r6g
+  executor:
+    cores: 4
+    coreLimit: "4.3"
+    memory: "6g"
+    memoryOverhead: "2g"
+    # 8 executors per node
+    instances: 47
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: spark-graviton-benchmark-mng-r6g
+  restartPolicy:
+    type: Never
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml
new file mode 100644
index 000000000..c339c385b
--- /dev/null
+++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml
@@ -0,0 +1,125 @@
+# > **Note:** Before running these jobs, make sure the following prerequisites are completed:
+
+# 1. Set up an S3 bucket to store the generated data.
+# 2. Update the `eks.tf` file to modify `min_size=6` and `desired_size=6` for the managed node group `"spark_graviton_r8g"`, then run `terraform apply`.
+# 3. Confirm that the `"spark-team-a"` namespace exists.
+# 4. Replace `<S3_BUCKET>` in the configuration with your actual bucket name.
+# 5. Run `kubectl apply -f <filename>` to apply the configuration.
+
+---
+apiVersion: "sparkoperator.k8s.io/v1beta2"
+kind: SparkApplication
+metadata:
+  name: tpcds-benchmark-1tb-r8g
+  namespace: spark-team-a
+
+spec:
+  batchScheduler: yunikorn
+  batchSchedulerOptions:
+    queue: root.default
+  type: Scala
+  mode: cluster
+  image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2
+  imagePullPolicy: IfNotPresent
+  sparkVersion: 3.5.3
+  mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL
+  mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar
+  arguments:
+    # TPC-DS data location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1TB"
+    # results location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1T-RESULT"
+    # Path to kit in the docker image
+    - "/opt/tpcds-kit/tools"
+    # Data Format
+    - "parquet"
+    # Scale factor (in GB)
+    - "1000" # changed from 3000 to 100gb for demo
+    # Number of iterations
+    - "1"
+    # Optimize queries with hive tables
+    - "false"
+    # Filter queries, will run all if empty - "q98-v2.4,q99-v2.4,ss_max-v2.4,q95-v2.4"
+    - ""
+    # Logging set to WARN
+    - "true"
+  sparkConf:
+    "spark.network.timeout": "2000s"
+    "spark.executor.heartbeatInterval": "300s"
+    # AQE
+    "spark.sql.adaptive.enabled": "true"
+    "spark.sql.adaptive.localShuffleReader.enabled": "true"
+    "spark.sql.adaptive.coalescePartitions.enabled": "true"
+    "spark.sql.adaptive.skewJoin.enabled": "true"
+    "spark.kubernetes.executor.podNamePrefix": "benchmark-exec-r8g-"
+   # S3 Optimizations
+    # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode
+    "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"
+    "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"  # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html
+    "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
+    "spark.hadoop.fs.s3a.fast.upload": "true"
+    "spark.hadoop.fs.s3a.path.style.access": "true"
+    "spark.hadoop.fs.s3a.fast.upload.buffer": "disk"
+    "spark.hadoop.fs.s3a.buffer.dir": "/tmp/s3a"
+    "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files
+    "spark.hadoop.fs.s3a.multipart.threshold": "256M"
+    "spark.hadoop.fs.s3a.threads.max": "50"
+    "spark.hadoop.fs.s3a.connection.maximum": "200"
+
+    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
+    "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70"
+    # "spark.hadoop.fs.s3a.readahead.range": "256K"
+
+    # -----------------------------------------------------
+    # This block is very critical when you get errors like
+    #     Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred
+    #     Caused by: java.net.SocketTimeoutException: timeout
+    # spark.kubernetes.local.dirs.tmpfs: "true" # More details here https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-ram-for-local-storage
+    spark.kubernetes.submission.connectionTimeout: "120000" # milliseconds
+    spark.kubernetes.submission.requestTimeout: "120000"
+    spark.kubernetes.driver.connectionTimeout: "120000"
+    spark.kubernetes.driver.requestTimeout: "120000"
+    # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size
+    # -----------------------------------------------------
+    # S3 Optimizations
+    "spark.hadoop.fs.s3a.multipart.size": "67108864"           # 64 MB part size for S3 uploads
+    "spark.hadoop.fs.s3a.threads.max": "40"                     # Limit S3 threads for optimized throughput
+    "spark.hadoop.fs.s3a.connection.maximum": "100"             # Set max connections for S3
+
+    # Data writing and shuffle tuning
+    "spark.shuffle.file.buffer": "1m"                           # Increase shuffle buffer for better disk I/O
+    "spark.reducer.maxSizeInFlight": "48m"                      # Increase reducer buffer size in-flight data
+
+    # Optional: Tuning multipart upload threshold
+    "spark.hadoop.fs.s3a.multipart.purge": "true"               # Automatically clear failed multipart uploads
+    "spark.hadoop.fs.s3a.multipart.threshold": "134217728"      # 128 MB threshold to start multi-part upload
+  driver:
+    cores: 4
+    coreLimit: "4.3"
+    memory: "5g"
+    memoryOverhead: "1g"
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: spark-graviton-benchmark-mng-r8g
+  executor:
+    cores: 4
+    coreLimit: "4.3"
+    memory: "6g"
+    memoryOverhead: "2g"
+    # 8 executors per node
+    instances: 47
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: spark-graviton-benchmark-mng-r8g
+  restartPolicy:
+    type: Never
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml
deleted file mode 100644
index 9e1d37685..000000000
--- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-# NOTE: This example requires the following prerequisites before executing the jobs
-# 1. Ensure spark-team-a name space exists
-# 2. replace <S3_BUCKET> with your bucket name
-# 3. Ensure you run "analytics/spark-k8s-operator/spark-samples/tpcds-benchmark-data-generation-1t.yaml"  which generates 3 TB input data
-
----
-apiVersion: "sparkoperator.k8s.io/v1beta2"
-kind: SparkApplication
-metadata:
-  name: tpcds-benchmark-3tb
-  namespace: spark-team-a
-  labels:
-    app: "tpcds-benchmark"
-    applicationId: "tpcds-benchmark-3t"
-    # Assign the job to a Yunikorn Queue via label.
-    queue: root.prod
-spec:
-  type: Scala
-  mode: cluster
-  image: public.ecr.aws/data-on-eks/emr-on-eks-benchmark:3.1.2
-  imagePullPolicy: IfNotPresent
-  sparkVersion: 3.1.2
-  mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL
-  mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar
-  arguments:
-    # TPC-DS data location
-    - "s3://blogpost-sparkoneks-us-east-1/blog/BLOG_TPCDS-TEST-3T-partitioned"
-    # results location
-    - "s3://<S3_BUCKET>/TPCDS-TEST-3T-RESULT"
-    # Path to kit in the docker image
-    - "/opt/tpcds-kit/tools"
-    # Data Format
-    - "parquet"
-    # Scale factor (in GB)
-    - "3000" # changed from 3000 to 100gb for demo
-    # Number of iterations
-    - "1"
-    # Optimize queries with hive tables
-    - "false"
-    # Filter queries, will run all if empty - "q98-v2.4,q99-v2.4,ss_max-v2.4,q95-v2.4"
-    - ""
-    # Logging set to WARN
-    - "true"
-  sparkConf:
-    "spark.network.timeout": "2000s"
-    "spark.executor.heartbeatInterval": "300s"
-    # AQE
-    "spark.sql.adaptive.enabled": "true"
-    "spark.sql.adaptive.localShuffleReader.enabled": "true"
-    "spark.sql.adaptive.coalescePartitions.enabled": "true"
-    "spark.sql.adaptive.skewJoin.enabled": "true"
-    # "spark.sql.adaptive.logLevel": "WARN"
-    # IRSA for S3 connection
-    "spark.kubernetes.executor.podNamePrefix": "benchmark-exec"
-    "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider"
-    "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
-    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
-    "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70"
-    # Keep pods in a single AZ
-    # "spark.kubernetes.node.selector.topology.kubernetes.io/zone": "us-west-1b"
-    # "spark.kubernetes.node.selector.eks.amazonaws.com/capacityType": "ON_DEMAND"
-    # -----------------------------------------------------
-    # This block is very critical when you get errors like
-    #     Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred
-    #     Caused by: java.net.SocketTimeoutException: timeout
-    # spark.kubernetes.local.dirs.tmpfs: "true" # More details here https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-ram-for-local-storage
-    spark.kubernetes.submission.connectionTimeout: "120000" # milliseconds
-    spark.kubernetes.submission.requestTimeout: "120000"
-    spark.kubernetes.driver.connectionTimeout: "120000"
-    spark.kubernetes.driver.requestTimeout: "120000"
-    # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size
-    # -----------------------------------------------------
-  driver:
-    cores: 4
-    coreLimit: "4.1"
-    memory: "5g"
-    memoryOverhead: "1000"
-    serviceAccount: spark-team-a
-    # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod
-    # we do not need to leverage a hostPath mount or volume to leverage that storage.
-    # ephemeral-storage requests and limits can be used to manage the storage utilization
-    nodeSelector:
-      provisioner: spark-compute-optimized
-    tolerations:
-      - key: "spark-compute-optimized"
-        operator: "Exists"
-        effect: "NoSchedule"
-  executor:
-    cores: 4
-    coreLimit: "4.3"
-    memory: "6g"
-    memoryOverhead: "2g"
-    # 8 executors per node
-    instances: 47 # changed from 47 to 20 for demo
-    serviceAccount: spark-team-a
-    # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod
-    # we do not need to leverage a hostPath mount or volume to leverage that storage.
-    # ephemeral-storage requests and limits can be used to manage the storage utilization
-    nodeSelector:
-      provisioner: spark-compute-optimized
-    tolerations:
-      - key: "spark-compute-optimized"
-        operator: "Exists"
-        effect: "NoSchedule"
-  restartPolicy:
-    type: Never
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml
new file mode 100644
index 000000000..7b855ccab
--- /dev/null
+++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml
@@ -0,0 +1,143 @@
+# NOTE: This example requires the following prerequisites before executing the jobs
+# 1. Ensure spark-team-a name space exists
+# 2. replace <S3_BUCKET>  with your bucket name
+
+---
+apiVersion: "sparkoperator.k8s.io/v1beta2"
+kind: SparkApplication
+metadata:
+  name: tpcds-data-generation-1tb
+  namespace: spark-team-a
+spec:
+  batchScheduler: yunikorn
+  batchSchedulerOptions:
+    queue: root.default
+  type: Scala
+  mode: cluster
+  image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2
+  imagePullPolicy: IfNotPresent
+  sparkVersion: 3.5.3
+  mainClass: com.amazonaws.eks.tpcds.DataGeneration
+  mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar
+  arguments:
+    # TPC-DS data location
+    - "s3a://<S3_BUCKET>/TPCDS-TEST-1TB"
+    # Path to kit in the docker image
+    - "/opt/tpcds-kit/tools"
+    # Data Format
+    - "parquet"
+    # Scale factor (in GB) - S3 output size shows  309.4GB for 1000GB Input
+    - "1000"
+    # Generate data num partitions
+    - "200"
+    # Create the partitioned fact tables
+    - "true"
+    # Shuffle to get partitions coalesced into single files.
+    - "true"
+    # Logging set to WARN
+    - "true"
+  sparkConf:
+    "spark.executorEnv.JAVA_HOME": "/opt/java/openjdk"
+    "spark.driverEnv.JAVA_HOME": "/opt/java/openjdk"
+    "spark.network.timeout": "2000s"
+    "spark.executor.heartbeatInterval": "300s"
+    # Bug: memoryOverheadFactor is not calculated correctly with Spark Operator when used with YuniKorn Gang Scheduling. Just use memoryOverhead for driver and executor instead.
+    # "spark.kubernetes.memoryOverheadFactor": "0.3"
+    # AQE
+    "spark.sql.adaptive.enabled": "true"
+    "spark.sql.adaptive.localShuffleReader.enabled": "true"
+    "spark.sql.adaptive.coalescePartitions.enabled": "true"
+    "spark.sql.adaptive.skewJoin.enabled": "true"
+    "spark.sql.files.maxRecordsPerFile": "30000000"
+    "spark.serializer": "org.apache.spark.serializer.KryoSerializer"
+
+    # S3 Optimizations
+    "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
+    # AWS SDK V1 is in maintenance mode hence commented and enabled AWS SDK V2 Class with S3 CRT support
+    # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider"
+    # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html
+    "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"
+    "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider"
+    "spark.hadoop.fs.s3a.fast.upload": "true"
+    "spark.hadoop.fs.s3a.path.style.access": "true"
+    "spark.hadoop.fs.s3a.fast.upload.buffer": "disk"
+    "spark.hadoop.fs.s3a.buffer.dir": "/data1/s3a"
+    "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files
+    "spark.hadoop.fs.s3a.multipart.threshold": "256M"
+    "spark.hadoop.fs.s3a.threads.max": "50"
+    "spark.hadoop.fs.s3a.connection.maximum": "200"
+    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
+    "spark.kubernetes.executor.podNamePrefix": "oss-data-gen"
+    "spark.sql.shuffle.partitions": "2000"  # Adjust according to your job size
+    # "spark.hadoop.fs.s3a.committer.staging.conflict-mode": "append"
+    # Data writing and shuffle tuning
+    "spark.shuffle.file.buffer": "1m"
+    "spark.reducer.maxSizeInFlight": "128m"
+    # Java options for driver and executor
+    "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseG1GC"
+    "spark.driver.defaultJavaOptions": "-XX:+UseG1GC"
+    # -----------------------------------------------------
+    # This block is very critical when you get errors like
+    #     Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred
+    #     Caused by: java.net.SocketTimeoutException: timeout
+    # Timeout settings for large data generation
+    spark.kubernetes.submission.connectionTimeout: "60000000"
+    spark.kubernetes.submission.requestTimeout: "60000000"
+    spark.kubernetes.driver.connectionTimeout: "60000000"
+    spark.kubernetes.driver.requestTimeout: "60000000"
+  restartPolicy:
+    type: Never
+  driver:
+    cores: 11
+    # The maximum memory size of the container to the running executor is determined by the sum of
+    #  spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory
+    memory: "15g"
+    memoryOverhead: "4g"
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    volumeMounts:
+      - name: spark-local-dir-1
+        mountPath: /data1
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    initContainers:
+      - name: volume-permission
+        image: public.ecr.aws/docker/library/busybox
+        command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1']
+        volumeMounts:
+          - name: spark-local-dir-1
+            mountPath: /data1
+    nodeSelector:
+      NodeGroupType: SparkComputeOptimized
+  executor:
+    cores: 11
+    # The maximum memory size of the container to the running executor is determined by the sum of
+    #  spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory
+    memory: "15g"
+    memoryOverhead: "4g"
+    instances: 26
+    serviceAccount: spark-team-a
+    securityContext:
+      runAsUser: 185
+    volumeMounts:
+      - name: spark-local-dir-1
+        mountPath: /data1
+    initContainers:
+      - name: volume-permission
+        image: public.ecr.aws/docker/library/busybox
+        command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1']
+        volumeMounts:
+          - name: spark-local-dir-1
+            mountPath: /data1
+    env:
+      - name: JAVA_HOME
+        value: "/opt/java/openjdk"
+    nodeSelector:
+      NodeGroupType: SparkComputeOptimized
+  volumes:
+    - name: spark-local-dir-1
+      hostPath:
+        path: "/mnt/k8s-disks/0"
+        type: DirectoryOrCreate
diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml
deleted file mode 100644
index d6d66fdba..000000000
--- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-# NOTE: This example requires the following prerequisites before executing the jobs
-# 1. Ensure spark-team-a name space exists
-# 2. replace <S3_BUCKET> with your bucket name
-
----
-apiVersion: "sparkoperator.k8s.io/v1beta2"
-kind: SparkApplication
-metadata:
-  name: tpcds-data-generation-3t
-  namespace: spark-team-a
-  labels:
-    app: "tpcds-data-generation"
-    applicationId: "tpcds-data-generation-3t"
-    # Assign the job to a Yunikorn Queue via label.
-    queue: root.prod
-spec:
-  type: Scala
-  mode: cluster
-  image: public.ecr.aws/data-on-eks/emr-on-eks-benchmark:3.1.2
-  imagePullPolicy: IfNotPresent
-  sparkVersion: 3.1.2
-  mainClass: com.amazonaws.eks.tpcds.DataGeneration
-  mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar
-  arguments:
-    # TPC-DS data location
-    - "s3a://<S3_BUCKET>/TPCDS-TEST-3T"
-    # Path to kit in the docker image
-    - "/opt/tpcds-kit/tools"
-    # Data Format
-    - "parquet"
-    # Scale factor (in GB)
-    - "3000"
-    # Generate data num partitions
-    - "200"
-    # Create the partitioned fact tables
-    - "true"
-    # Shuffle to get partitions coalesced into single files.
-    - "true"
-    # Logging set to WARN
-    - "true"
-  sparkConf:
-    "spark.network.timeout": "2000s"
-    "spark.executor.heartbeatInterval": "300s"
-    "spark.kubernetes.memoryOverheadFactor": "0.3"
-    "spark.sql.files.maxRecordsPerFile": "30000000"
-    "spark.serializer": "org.apache.spark.serializer.KryoSerializer"
-    # "spark.local.dir": "/data1"
-
-    # S3 settings
-    "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider"
-    "spark.hadoop.fs.s3a.fast.upload": "true"
-    "spark.hadoop.fs.s3a.path.style.access": "true"
-    "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
-    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
-    "spark.kubernetes.executor.podNamePrefix": "oss-data-gen"
-    "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseG1GC"
-    "spark.driver.defaultJavaOptions": "-XX:+UseG1GC"
-    # -----------------------------------------------------
-    # This block is very critical when you get errors like
-    #     Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred
-    #     Caused by: java.net.SocketTimeoutException: timeout
-    # spark.kubernetes.local.dirs.tmpfs: "true"
-    spark.kubernetes.submission.connectionTimeout: "60000000"
-    spark.kubernetes.submission.requestTimeout: "60000000"
-    spark.kubernetes.driver.connectionTimeout: "60000000"
-    spark.kubernetes.driver.requestTimeout: "60000000"
-    # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size
-    # -----------------------------------------------------
-
-  restartPolicy:
-    type: Never
-  driver:
-    cores: 10
-    coreLimit: "10.1"
-    memory: "10g"
-    serviceAccount: spark-team-a
-    # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod
-    # we do not need to leverage a hostPath mount or volume to leverage that storage.
-    # ephemeral-storage requests and limits can be used to manage the storage utilization
-    nodeSelector:
-      provisioner: spark-compute-optimized
-    tolerations:
-      - key: "spark-compute-optimized"
-        operator: "Exists"
-        effect: "NoSchedule"
-  executor:
-    cores: 11
-    coreLimit: "11.1"
-    memory: "15g"
-    # 3 executors per node 9 nodes
-    instances: 26
-    serviceAccount: spark-team-a
-    # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod
-    # we do not need to leverage a hostPath mount or volume to leverage that storage.
-    # the data generation can utilize a large amount of storage
-    nodeSelector:
-      provisioner: spark-compute-optimized
-    tolerations:
-      - key: "spark-compute-optimized"
-        operator: "Exists"
-        effect: "NoSchedule"
diff --git a/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml b/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml
index 1ac2012a4..d6e173a8b 100644
--- a/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml
+++ b/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml
@@ -9,12 +9,10 @@ kind: SparkApplication
 metadata:
   name: "taxi-trip"
   namespace: spark-team-a
-  labels:
-    app: "taxi-trip"
-    applicationId: "taxi-trip-yunikorn"
-    # Assign the job to a Yunikorn Queue via label.
-    queue: root.test
 spec:
+  batchScheduler: yunikorn
+  batchSchedulerOptions:
+    queue: root.default
 #  To create Ingress object for Spark driver.
 #  Ensure Spark Operator Helm Chart deployed with Ingress enabled to use this feature
 #  sparkUIOptions:
@@ -88,36 +86,6 @@ spec:
     serviceAccount: spark-team-a
     labels:
       version: 3.2.1
-    annotations:
-      yunikorn.apache.org/schedulingPolicyParameters: "placeholderTimeoutSeconds=30 gangSchedulingStyle=Hard"
-      yunikorn.apache.org/task-group-name: "spark-driver"
-      # minMember should match with driver and executor instances
-      # minResource cpu and memory should match with driver and executor cpu and memory
-      yunikorn.apache.org/task-groups: |-
-        [{
-            "name": "spark-driver",
-            "minMember": 1,
-            "minResource": {
-              "cpu": "1200m",
-              "memory": "14Gi"
-            },
-            "nodeSelector": {
-              "NodeGroupType": "spark-on-demand-ca"
-            },
-            "tolerations": [{"key": "spark-on-demand-ca", "operator": "Exists", "effect": "NoSchedule"}]
-          },
-          {
-            "name": "spark-executor",
-            "minMember": 4,
-            "minResource": {
-              "cpu": "1200m",
-              "memory": "14Gi"
-            },
-            "nodeSelector": {
-              "NodeGroupType": "spark-spot-ca"
-            },
-            "tolerations": [{"key": "spark-spot-ca", "operator": "Exists", "effect": "NoSchedule"}]
-        }]
     # the r5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod
     # we do not need to leverage a hostPath mount or volume to leverage that storage.
     # ephemeral-storage requests and limits can be used to manage the storage utilization
diff --git a/analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark b/analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark
new file mode 100644
index 000000000..50b2cd47a
--- /dev/null
+++ b/analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark
@@ -0,0 +1,81 @@
+# Use the official Spark base image with Java 17 and Python 3
+FROM apache/spark:3.5.3-scala2.12-java17-python3-ubuntu as tpc-toolkit
+
+# Arguments for version control
+ARG HADOOP_VERSION=3.4.1
+ARG AWS_SDK_VERSION=2.29.0
+ARG SPARK_UID=185
+
+# Set environment variables
+ENV SPARK_HOME=/opt/spark
+
+# Set up as root to install dependencies and tools
+USER root
+
+# Install necessary build tools and specific sbt version 0.13.18
+RUN apt-get update && \
+    apt-get install -y \
+        gcc \
+        make \
+        flex \
+        bison \
+        git \
+        openjdk-17-jdk \
+        wget \
+        curl && \
+    # Install sbt 0.13.18
+    wget https://github.com/sbt/sbt/releases/download/v0.13.18/sbt-0.13.18.tgz && \
+    tar -xzf sbt-0.13.18.tgz -C /usr/local && \
+    ln -s /usr/local/sbt/bin/sbt /usr/local/bin/sbt && \
+    # Cleanup
+    rm sbt-0.13.18.tgz && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Clone and compile TPC-DS toolkit
+WORKDIR /opt
+RUN git clone https://github.com/databricks/tpcds-kit.git && \
+    cd tpcds-kit/tools && \
+    make OS=LINUX && \
+    chmod +x dsdgen dsqgen
+
+# Clone the SQL perf library and related files
+RUN git clone -b delta https://github.com/aws-samples/emr-on-eks-benchmark.git /tmp/emr-on-eks-benchmark
+
+# Build the Databricks SQL perf library
+RUN cd /tmp/emr-on-eks-benchmark/spark-sql-perf && sbt +package
+
+# Use the compiled Databricks SQL perf library to build benchmark utility
+RUN cd /tmp/emr-on-eks-benchmark/ && \
+    mkdir -p /tmp/emr-on-eks-benchmark/benchmark/libs && \
+    cp /tmp/emr-on-eks-benchmark/spark-sql-perf/target/scala-2.12/*.jar /tmp/emr-on-eks-benchmark/benchmark/libs && \
+    cd /tmp/emr-on-eks-benchmark/benchmark && sbt assembly
+
+# Remove any old Hadoop libraries
+RUN rm -f ${SPARK_HOME}/jars/hadoop-client-* && \
+    rm -f ${SPARK_HOME}/jars/hadoop-yarn-server-web-proxy-*.jar
+
+# Add Hadoop AWS connector and AWS SDK for S3A support, along with hadoop-common dependencies
+# TODO: hadoop-common, hadoop-yarn-server-web-proxy might not be required. Remove these and test it.
+RUN cd ${SPARK_HOME}/jars && \
+    wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar && \
+    wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client-api/${HADOOP_VERSION}/hadoop-client-api-${HADOOP_VERSION}.jar && \
+    wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client-runtime/${HADOOP_VERSION}/hadoop-client-runtime-${HADOOP_VERSION}.jar && \
+    wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/${HADOOP_VERSION}/hadoop-common-${HADOOP_VERSION}.jar && \
+    wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-yarn-server-web-proxy/${HADOOP_VERSION}/hadoop-yarn-server-web-proxy-${HADOOP_VERSION}.jar && \
+    wget https://repo1.maven.org/maven2/software/amazon/awssdk/bundle/${AWS_SDK_VERSION}/bundle-${AWS_SDK_VERSION}.jar
+
+# Create directory for TPC-DS data and set permissions
+RUN mkdir -p /opt/tpcds-data && \
+    chown -R ${SPARK_UID}:${SPARK_UID} /opt/tpcds-data
+
+# Copy the built JARs to Spark's jars directory
+RUN mkdir -p ${SPARK_HOME}/examples/jars/ && \
+    cp /tmp/emr-on-eks-benchmark/benchmark/target/scala-2.12/*jar ${SPARK_HOME}/examples/jars/ && \
+    chown -R ${SPARK_UID}:${SPARK_UID} ${SPARK_HOME}/examples
+
+# Set working directory
+WORKDIR ${SPARK_HOME}
+
+# Switch to non-root user
+USER ${SPARK_UID}
diff --git a/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml b/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml
index 20243db25..268a2665d 100644
--- a/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml
+++ b/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml
@@ -9,12 +9,10 @@ kind: SparkApplication
 metadata:
   name: "taxi-trip"
   namespace: spark-team-a
-  labels:
-    app: "taxi-trip"
-    applicationId: "taxi-trip-yunikorn"
-    # Assign the job to a Yunikorn Queue via label.
-    queue: root.test
 spec:
+  batchScheduler: yunikorn
+  batchSchedulerOptions:
+    queue: root.default
 #  To create Ingress object for Spark driver.
 #  Ensure Spark Operator Helm Chart deployed with Ingress enabled to use this feature
 #  sparkUIOptions:
@@ -98,36 +96,6 @@ spec:
         effect: "NoSchedule"
     labels:
       version: 3.2.1
-    annotations:
-      yunikorn.apache.org/schedulingPolicyParameters: "placeholderTimeoutSeconds=30 gangSchedulingStyle=Hard"
-      yunikorn.apache.org/task-group-name: "spark-driver"
-      # minMember should match with driver and executor instances
-      # minResource cpu and memory should match with driver and executor cpu and memory
-      yunikorn.apache.org/task-groups: |-
-        [{
-            "name": "spark-driver",
-            "minMember": 1,
-            "minResource": {
-              "cpu": "1200m",
-              "memory": "14Gi"
-            },
-            "nodeSelector": {
-              "NodeGroupType": "SparkComputeOptimized"
-            },
-            "tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}]
-          },
-          {
-            "name": "spark-executor",
-            "minMember": 4,
-            "minResource": {
-              "cpu": "1200m",
-              "memory": "14Gi"
-            },
-            "nodeSelector": {
-              "NodeGroupType": "SparkComputeOptimized"
-            },
-            "tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}]
-        }]
   executor:
     cores: 1
     coreLimit: "1200m"
diff --git a/analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml b/analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml
deleted file mode 100644
index 3ced8f515..000000000
--- a/analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-replicaCount: 1
-
-webhook:
-  # -- Enable webhook server
-  enable: true
-  # -- Webhook service port
-  port: 8080
-
-serviceAccounts:
-  spark:
-    # -- Create a service account for spark apps
-    create: true
-    # -- Optional name for the spark service account
-    name: ""
-    # -- Optional annotations for the spark service account
-    annotations: {}
-  sparkoperator:
-    # -- Create a service account for the operator
-    create: true
-    # -- Optional name for the operator service account
-    name: ""
-    # -- Optional annotations for the operator service account
-    annotations: {}
-
-# Enable this to monitor only one namespace with this Spark Operator.
-# By default, this operator monitors all namespaces for submitting Spark jobs.
-# Currently, it does not support selecting multiple namespaces to be monitored by each Spark Operator. You can select either one or all namespaces.
-# sparkJobNamespaces:
-#   - "spark-team-a"
-
-
-# -- Operator concurrency, higher values might increase memory usage
-controllerThreads: 10
-
-# resources -- Pod resource requests and limits
-# Note, that each job submission will spawn a JVM within the Spark Operator Pod using "/usr/local/openjdk-11/bin/java -Xmx128m".
-# Kubernetes may kill these Java processes at will to enforce resource limits. When that happens, you will see the following error:
-# 'failed to run spark-submit for SparkApplication [...]: signal: killed' - when this happens, you may want to increase memory limits.
-resources:
-   limits:
-     cpu: 200m
-     memory: 1Gi
-   requests:
-     cpu: 100m
-     memory: 512Mi
-
-batchScheduler:
-  # -- Enable batch scheduler for spark jobs scheduling. If enabled, users can specify batch scheduler name in spark application
-  enable: true
-
-
-#------------------------------------
-# THIS WILL CREATE SERVICE AND INGRESS OBJECT FOR EACH SPARK APPLICATION
-#------------------------------------
-uiService:
-  # -- Enable UI service creation for Spark application
-  enable: true
-
-  # -- Ingress URL format.
-  # Requires the UI service to be enabled by setting `uiService.enable` to true.
-  # 1/ Enable ingressUrlFormat to create an Ingress object for each Spark Job submitted using Spark Operator
-  # 2/ This setup also requires ingres-nginx to be deployed with NLB as LB with IP based routing.
-  # 3. Enter the NLB DNS name or enter Custom Domain name from route53 below which points to the NLB
-
-  # ingressUrlFormat: '<ENTER_NLB_DNS_NAME/CUSTOM_DOMAIN_NAME>/{{$appName}}'
diff --git a/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml b/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml
index 079653a85..2f8174d61 100644
--- a/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml
+++ b/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml
@@ -1,99 +1,3 @@
-
-
-imagePullSecrets:
-serviceAccount: yunikorn-admin
-
-image:
-  repository: apache/yunikorn
-  tag: scheduler-${image_version}
-  pullPolicy: Always
-
-pluginImage:
-  repository: apache/yunikorn
-  tag: scheduler-plugin-${image_version}
-  pullPolicy: Always
-
-nodeSelector: {}
-tolerations: []
-affinity: {}
-
-configuration: null    # deprecated; use queues.yaml in yunikornDefaults
-operatorPlugins: null  # deprecated; use service.operatorPlugins in yunikornDefaults
-placeHolderImage: null # deprecated; use service.placeholderImage in yunikornDefaults
-
-admissionController:
-  image:
-    repository: apache/yunikorn
-    tag: admission-${image_version}
-    pullPolicy: Always
-  replicaCount: 1
-  serviceAccount: yunikorn-admission-controller
-  hostNetwork: true
-  resources:
-    requests:
-      cpu: 200m
-      memory: 500Mi
-    limits:
-      cpu: 500m
-      memory: 500Mi
-  nodeSelector: {}
-  tolerations: []
-  affinity: {}
-  service:
-    type: ClusterIP
-  processNamespaces: null # deprecated; use admissionController.processNamespaces in yunikornDefaults
-  bypassNamespaces: null  # deprecated; use admissionController.bypassNamespaces in yunikornDefaults
-  labelNamespaces: null   # deprecated; use admissionController.labelNamespaces in yunikornDefaults
-  noLabelNamespaces: null # deprecated; use admissionController.noLabelNamespaces in yunikornDefaults
-
-web:
-  image:
-    repository: apache/yunikorn
-    tag: web-${image_version}
-    pullPolicy: Always
-  resources:
-    requests:
-      memory: 500Mi
-      cpu: 500m
-    limits:
-      memory: 500Mi
-      cpu: 500m
-
-service:
-  type: ClusterIP
-  port: 9080
-  portWeb: 9889
-
-ingress:
-  enabled: false
-  ingressClassName: ""
-  annotations: {}
-  hosts:
-    - host: chart-example.local
-      paths: []
-  pathType: Prefix
-  tls: []
-
-resources:
-  requests:
-    cpu: 400m
-    memory: 2Gi
-  limits:
-    cpu: 4
-    memory: 2Gi
-
-
-
-# When this flag is true, the admission controller will be installed along with the scheduler.
-# When this flag is false, the admission controller will not be installed.
-# Once the admission controller is installed, all traffic will be routing to yunikorn.
-embedAdmissionController: true
-
-# When this flag is true, the scheduler will be deployed as Kubernetes scheduler plugin.
-# When this flag is false, the scheduler will be deployed as a standalone scheduler.
-enableSchedulerPlugin: false
-
-
 # Bootstrap configuration for YuniKorn - will be rendered into yunikorn-defaults ConfigMap.
 # Any valid options for YuniKorn may be specified here.
 # Use this link for more values -> https://yunikorn.apache.org/docs/user_guide/service_config/#yunikorn-configuration
@@ -117,11 +21,11 @@ yunikornDefaults:
               - name: default
                 resources:
                   guaranteed:
-                    memory: 100G
-                    vcore: 10
+                    memory: 1000G
+                    vcore: 1000
                   max:
-                    memory: 100G
-                    vcore: 10
+                    memory: 1000G
+                    vcore: 1000
               - name: prod
                 resources:
                   guaranteed:
diff --git a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile
index d53b7feed..a22d55447 100644
--- a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile
+++ b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile
@@ -7,7 +7,7 @@
 # 1. The line "COPY neuron_artifacts/. /neuron_artifacts/" will be replaced or removed once Neuron SDK 2.20 is released.
 # 2. The line "COPY vllm /vllm" will be replaced or removed once Neuron SDK 2.20 is released, and the Neuron team upstreams the changes to the vLLM project.
 # -----------------------------------------------------------------------------------
-FROM public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.0-ubuntu20.04-0
+FROM public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.0-ubuntu20.04
 
 ENV VLLM_TARGET_DEVICE=neuron
 
diff --git a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh
index 8ed298c89..7f7f4ddb8 100644
--- a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh
+++ b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh
@@ -26,8 +26,8 @@ fi
 # Run the Python script
 python neuron_multi_node_runner.py \
 --model=$NEURON_MODEL_PATH \
---max-num-seqs=$MAX_NUM_SEQ \
---max-model-len=$MAX_MODEL_LENGTH \
+--max-num-seqs=$MAX_NUM_SEQS \
+--max-model-len=$MAX_MODEL_LEN \
 --block-size=$BLOCK_SIZE \
 --tensor-parallel-size=$GLOBAL_TP \
 --port=$VLLM_LEADER_SERVICE_PORT \
diff --git a/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml b/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml
index d59ff5ea1..4d2a2d7cf 100644
--- a/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml
+++ b/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml
@@ -1,8 +1,8 @@
 # Important Notice:
 # Before deploying this configuration, please ensure the following:
-# 1. **Region and Environment Variables**: Verify that the `AWS_DEFAULT_REGION` and other environment variables are correctly set for your deployment. This YAML is currently configured for the `us-west-2` region.
-# 2. **Pre-requisite**: Check the README.md file for instructions.
-# 3. **Custom Images**: This configuration uses a custom image hosted on ECR (`public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:latest`). Ensure that this image is accessible and meets your deployment needs.
+# 1. Region and Environment Variables: Verify that the `AWS_DEFAULT_REGION` and other environment variables are correctly set for your deployment. This YAML is currently configured for the `us-west-2` region.
+# 2. Pre-requisite: Check the README.md file for instructions.
+# 3. Custom Images: This configuration uses a custom image hosted on ECR (`public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:v8`). Ensure that this image is accessible and meets your deployment needs.
 
 ---
 apiVersion: v1
@@ -75,15 +75,23 @@ spec:
             value: us-west-2
           - name: NEURON_MODEL_PATH
             value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/
+          # - name: NEURON_CONTEXT_LENGTH_BUCKETS # not working
+          #   value: "1024,2048,4096,8192,16384,32768,65536,131072"
+          # - name: NEURON_TOKEN_GEN_BUCKETS # not working
+          #   value: "1024,2048,4096,8192,16384,32768,65536,131072"
           - name: NEURON_CONTEXT_LENGTH_ESTIMATE
             value: '[''1024'',''2048'',''4096'',''8192'',''16384'',''32768'',''65536'',''131072'']'
-          - name: MAX_MODEL_LENGTH
+          - name: MAX_MODEL_LEN
             value: "8192"
           - name: BLOCK_SIZE
             value: "8192"
-          - name: MAX_NUM_SEQ
+          - name: MAX_NUM_SEQS
             value: "2"
-          - name: NEURON_QUANT
+          - name: NEURON_CC_PIPELINE_FACTOR
+            value: "4"
+          - name: NEURON_COMPILE_CACHE_URL
+            value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/compiled_artifact
+          - name: NEURON_QUANT # not in the vllm code
             value: "False"
           - name: NEURON_SEQUENCE_PARALLEL
             value: "True"
@@ -99,7 +107,7 @@ spec:
             valueFrom:
               fieldRef:
                 fieldPath: metadata.annotations['leaderworkerset.sigs.k8s.io/size']
-          image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:latest
+          image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:v8
           imagePullPolicy: Always
           name: vllm-leader
           ports:
@@ -186,14 +194,22 @@ spec:
             value: us-west-2
           - name: NEURON_MODEL_PATH
             value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/
+          # - name: NEURON_CONTEXT_LENGTH_BUCKETS # not working
+          #   value: "1024,2048,4096,8192,16384,32768,65536,131072"
+          # - name: NEURON_TOKEN_GEN_BUCKETS # not working
+          #   value: "1024,2048,4096,8192,16384,32768,65536,131072"
           - name: NEURON_CONTEXT_LENGTH_ESTIMATE
             value: '[''1024'',''2048'',''4096'',''8192'',''16384'',''32768'',''65536'',''131072'']'
-          - name: MAX_MODEL_LENGTH
+          - name: MAX_MODEL_LEN
             value: "8192"
           - name: BLOCK_SIZE
             value: "8192"
-          - name: MAX_NUM_SEQ
+          - name: MAX_NUM_SEQS
             value: "2"
+          - name: NEURON_CC_PIPELINE_FACTOR # not required
+            value: "4"
+          - name: NEURON_COMPILE_CACHE_URL
+            value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/compiled_artifact
           - name: NEURON_QUANT
             value: "False"
           - name: NEURON_SEQUENCE_PARALLEL
@@ -210,7 +226,7 @@ spec:
             valueFrom:
               fieldRef:
                 fieldPath: metadata.annotations['leaderworkerset.sigs.k8s.io/size']
-          image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:latest
+          image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:v8
           imagePullPolicy: Always
           name: vllm-worker
           ports:
diff --git a/schedulers/terraform/argo-workflow/README.md b/schedulers/terraform/argo-workflow/README.md
index ebb39c7c2..6119fccdd 100644
--- a/schedulers/terraform/argo-workflow/README.md
+++ b/schedulers/terraform/argo-workflow/README.md
@@ -78,15 +78,15 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no |
-| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br/>  "100.64.0.0/17",<br/>  "100.64.128.0/17"<br/>]</pre> | no |
+| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br>  "100.64.0.0/17",<br>  "100.64.128.0/17"<br>]</pre> | no |
 | <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
 | <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no |
 | <a name="input_enable_yunikorn"></a> [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `true` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"doeks-spark-argo"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br/>  "10.1.1.0/24",<br/>  "10.1.2.0/24"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br/>  "10.1.0.0/26",<br/>  "10.1.0.64/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br>  "10.1.1.0/24",<br>  "10.1.2.0/24"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br>  "10.1.0.0/26",<br>  "10.1.0.64/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no |
 
 ## Outputs
diff --git a/schedulers/terraform/aws-batch-eks/README.md b/schedulers/terraform/aws-batch-eks/README.md
index 3a066e501..49fedd1cf 100644
--- a/schedulers/terraform/aws-batch-eks/README.md
+++ b/schedulers/terraform/aws-batch-eks/README.md
@@ -58,7 +58,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="input_aws_batch_doeks_jd_name"></a> [aws\_batch\_doeks\_jd\_name](#input\_aws\_batch\_doeks\_jd\_name) | The AWS Batch example job definition name | `string` | `"doeks-hello-world"` | no |
 | <a name="input_aws_batch_doeks_jq_name"></a> [aws\_batch\_doeks\_jq\_name](#input\_aws\_batch\_doeks\_jq\_name) | The AWS Batch EKS namespace | `string` | `"doeks-JQ1"` | no |
 | <a name="input_aws_batch_doeks_namespace"></a> [aws\_batch\_doeks\_namespace](#input\_aws\_batch\_doeks\_namespace) | The AWS Batch EKS namespace | `string` | `"doeks-aws-batch"` | no |
-| <a name="input_aws_batch_instance_types"></a> [aws\_batch\_instance\_types](#input\_aws\_batch\_instance\_types) | The set of instance types to launch for AWS Batch jobs. | `list(string)` | <pre>[<br/>  "optimal"<br/>]</pre> | no |
+| <a name="input_aws_batch_instance_types"></a> [aws\_batch\_instance\_types](#input\_aws\_batch\_instance\_types) | The set of instance types to launch for AWS Batch jobs. | `list(string)` | <pre>[<br>  "optimal"<br>]</pre> | no |
 | <a name="input_aws_batch_max_vcpus"></a> [aws\_batch\_max\_vcpus](#input\_aws\_batch\_max\_vcpus) | The minimum aggregate vCPU for AWS Batch compute environment | `number` | `256` | no |
 | <a name="input_aws_batch_min_vcpus"></a> [aws\_batch\_min\_vcpus](#input\_aws\_batch\_min\_vcpus) | The minimum aggregate vCPU for AWS Batch compute environment | `number` | `0` | no |
 | <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS Region | `string` | `"us-east-1"` | no |
@@ -67,8 +67,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="input_eks_private_cluster_endpoint"></a> [eks\_private\_cluster\_endpoint](#input\_eks\_private\_cluster\_endpoint) | Whether to have a private cluster endpoint for the EKS cluster. | `bool` | `true` | no |
 | <a name="input_eks_public_cluster_endpoint"></a> [eks\_public\_cluster\_endpoint](#input\_eks\_public\_cluster\_endpoint) | Whether to have a public cluster endpoint for the EKS cluster.   #WARNING: Avoid a public endpoint in preprod or prod accounts. This feature is designed for sandbox accounts, simplifying cluster deployment and testing. | `bool` | `true` | no |
 | <a name="input_num_azs"></a> [num\_azs](#input\_num\_azs) | The number of Availability Zones to deploy subnets to. Must be 2 or more | `number` | `2` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.0.0/17",<br/>  "10.1.128.0/18"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.255.128/26",<br/>  "10.1.255.192/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/17",<br>  "10.1.128.0/18"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.255.128/26",<br>  "10.1.255.192/26"<br>]</pre> | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no |
 
diff --git a/schedulers/terraform/self-managed-airflow/README.md b/schedulers/terraform/self-managed-airflow/README.md
index 5d7260551..c0f229deb 100644
--- a/schedulers/terraform/self-managed-airflow/README.md
+++ b/schedulers/terraform/self-managed-airflow/README.md
@@ -99,17 +99,17 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
-| <a name="input_db_private_subnets"></a> [db\_private\_subnets](#input\_db\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Airflow DB. | `list(string)` | <pre>[<br/>  "10.0.20.0/26",<br/>  "10.0.21.0/26"<br/>]</pre> | no |
+| <a name="input_db_private_subnets"></a> [db\_private\_subnets](#input\_db\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Airflow DB. | `list(string)` | <pre>[<br>  "10.0.20.0/26",<br>  "10.0.21.0/26"<br>]</pre> | no |
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no |
-| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br/>  "100.64.0.0/17",<br/>  "100.64.128.0/17"<br/>]</pre> | no |
+| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br>  "100.64.0.0/17",<br>  "100.64.128.0/17"<br>]</pre> | no |
 | <a name="input_enable_airflow"></a> [enable\_airflow](#input\_enable\_airflow) | Enable Apache Airflow | `bool` | `true` | no |
 | <a name="input_enable_airflow_spark_example"></a> [enable\_airflow\_spark\_example](#input\_enable\_airflow\_spark\_example) | Enable Apache Airflow and Spark Operator example | `bool` | `false` | no |
 | <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"self-managed-airflow"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br/>  "10.0.1.0/24",<br/>  "10.0.2.0/24"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br/>  "10.0.0.0/26",<br/>  "10.0.0.64/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br>  "10.0.1.0/24",<br>  "10.0.2.0/24"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br>  "10.0.0.0/26",<br>  "10.0.0.64/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.0.0.0/16"` | no |
 
 ## Outputs
diff --git a/streaming/flink/README.md b/streaming/flink/README.md
index fb2673697..876d564fb 100755
--- a/streaming/flink/README.md
+++ b/streaming/flink/README.md
@@ -73,8 +73,8 @@
 | <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no |
 | <a name="input_enable_yunikorn"></a> [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `true` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"flink-operator-doeks"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.0.0/17",<br/>  "10.1.128.0/18"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.255.128/26",<br/>  "10.1.255.192/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/17",<br>  "10.1.128.0/18"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.255.128/26",<br>  "10.1.255.192/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no |
 
diff --git a/streaming/kafka/README.md b/streaming/kafka/README.md
index f1deaafb1..56fc5cf55 100644
--- a/streaming/kafka/README.md
+++ b/streaming/kafka/README.md
@@ -59,13 +59,13 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.31"` | no |
-| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br/>  "100.64.0.0/17",<br/>  "100.64.128.0/17"<br/>]</pre> | no |
+| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br>  "100.64.0.0/17",<br>  "100.64.128.0/17"<br>]</pre> | no |
 | <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"kafka-on-eks"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br/>  "10.1.1.0/24",<br/>  "10.1.2.0/24"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br/>  "10.1.0.0/26",<br/>  "10.1.0.64/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br>  "10.1.1.0/24",<br>  "10.1.2.0/24"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br>  "10.1.0.0/26",<br>  "10.1.0.64/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no |
 
 ## Outputs
diff --git a/streaming/nifi/README.md b/streaming/nifi/README.md
index c1894ab32..25ce367da 100644
--- a/streaming/nifi/README.md
+++ b/streaming/nifi/README.md
@@ -82,8 +82,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"nifi-on-eks"` | no |
 | <a name="input_nifi_sub_domain"></a> [nifi\_sub\_domain](#input\_nifi\_sub\_domain) | Subdomain for NiFi cluster. | `string` | `"mynifi"` | no |
 | <a name="input_nifi_username"></a> [nifi\_username](#input\_nifi\_username) | NiFi login username | `string` | `"admin"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 16382 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.0.0/18",<br/>  "10.1.64.0/18",<br/>  "10.1.128.0/18"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 4094 IPs per Subnet | `list(string)` | <pre>[<br/>  "10.1.192.0/20",<br/>  "10.1.208.0/20",<br/>  "10.1.224.0/20"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 16382 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/18",<br>  "10.1.64.0/18",<br>  "10.1.128.0/18"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 4094 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.192.0/20",<br>  "10.1.208.0/20",<br>  "10.1.224.0/20"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no |
 
diff --git a/streaming/spark-streaming/terraform/README.md b/streaming/spark-streaming/terraform/README.md
index 9dd02dfbf..d60b6dd78 100644
--- a/streaming/spark-streaming/terraform/README.md
+++ b/streaming/spark-streaming/terraform/README.md
@@ -70,15 +70,15 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no |
-| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br/>  "100.64.0.0/17",<br/>  "100.64.128.0/17"<br/>]</pre> | no |
+| <a name="input_eks_data_plane_subnet_secondary_cidr"></a> [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` | <pre>[<br>  "100.64.0.0/17",<br>  "100.64.128.0/17"<br>]</pre> | no |
 | <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
 | <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no |
 | <a name="input_enable_yunikorn"></a> [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `false` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"spark-streaming-doeks"` | no |
-| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br/>  "10.1.1.0/24",<br/>  "10.1.2.0/24"<br/>]</pre> | no |
-| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br/>  "10.1.0.0/26",<br/>  "10.1.0.64/26"<br/>]</pre> | no |
+| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br>  "10.1.1.0/24",<br>  "10.1.2.0/24"<br>]</pre> | no |
+| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br>  "10.1.0.0/26",<br>  "10.1.0.64/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
-| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br/>  "100.64.0.0/16"<br/>]</pre> | no |
+| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br>  "100.64.0.0/16"<br>]</pre> | no |
 | <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no |
 
 ## Outputs