From 24c043cab18bf801d8f6f500735ac69d4c46ea8e Mon Sep 17 00:00:00 2001 From: Emterry Date: Wed, 13 Nov 2024 16:16:17 +0000 Subject: [PATCH 01/11] maintenance --- .../eks-cluster.tf | 4 +-- .../eks-pod-identities.tf | 2 +- .../environment-configuration.tf | 10 +++--- .../iam-policies.tf | 16 +++++----- .../analytical-platform-compute/iam-roles.tf | 32 +++++++++---------- .../analytical-platform-compute/s3-buckets.tf | 8 ++--- .../vpc-endpoints.tf | 2 +- .../analytical-platform-compute/vpc.tf | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/eks-cluster.tf b/terraform/environments/analytical-platform-compute/eks-cluster.tf index f96b5254937..d5e7be40b72 100644 --- a/terraform/environments/analytical-platform-compute/eks-cluster.tf +++ b/terraform/environments/analytical-platform-compute/eks-cluster.tf @@ -6,7 +6,7 @@ module "eks" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/eks/aws" - version = "20.26.0" + version = "20.29.0" cluster_name = local.eks_cluster_name cluster_version = local.environment_configuration.eks_cluster_version @@ -172,7 +172,7 @@ module "karpenter" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/eks/aws//modules/karpenter" - version = "20.26.0" + version = "20.29.0" cluster_name = module.eks.cluster_name diff --git a/terraform/environments/analytical-platform-compute/eks-pod-identities.tf b/terraform/environments/analytical-platform-compute/eks-pod-identities.tf index d3a85bf50cd..8b219126c30 100644 --- a/terraform/environments/analytical-platform-compute/eks-pod-identities.tf +++ b/terraform/environments/analytical-platform-compute/eks-pod-identities.tf @@ -7,7 +7,7 @@ module "aws_cloudwatch_metrics_pod_identity" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/eks-pod-identity/aws" - version = "1.5.0" + version = "1.7.0" name = "aws-cloudwatch-metrics" diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index 85e815d30ed..cbb49e64162 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -29,10 +29,10 @@ locals { eks_cluster_version = "1.31" eks_node_version = "1.25.0-388e1050" eks_cluster_addon_versions = { - coredns = "v1.11.3-eksbuild.1" - kube_proxy = "v1.31.0-eksbuild.5" - aws_ebs_csi_driver = "v1.35.0-eksbuild.1" - aws_efs_csi_driver = "v2.0.7-eksbuild.1" + coredns = "v1.11.3-eksbuild.2" + kube_proxy = "v1.31.1-eksbuild.2" + aws_ebs_csi_driver = "v1.36.0-eksbuild.1" + aws_efs_csi_driver = "v2.0.8-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" vpc_cni = "v1.18.5-eksbuild.1" @@ -86,7 +86,7 @@ locals { aws_efs_csi_driver = "v2.0.7-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" - vpc_cni = "v1.18.5-eksbuild.1" + vpc_cni = "v1.18.6-eksbuild.1" } /* Observability Platform */ diff --git a/terraform/environments/analytical-platform-compute/iam-policies.tf b/terraform/environments/analytical-platform-compute/iam-policies.tf index 63610c9d3bf..a9acae46b9a 100644 --- a/terraform/environments/analytical-platform-compute/iam-policies.tf +++ b/terraform/environments/analytical-platform-compute/iam-policies.tf @@ -18,7 +18,7 @@ module "eks_cluster_logs_kms_access_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "eks-cluster-logs-kms-access" @@ -45,7 +45,7 @@ module "karpenter_sqs_kms_access_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "karpenter-sqs-kms-access" @@ -71,7 +71,7 @@ module "amazon_prometheus_proxy_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "amazon-prometheus-proxy" @@ -98,7 +98,7 @@ module "managed_prometheus_kms_access_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "managed-prometheus-kms-access" @@ -147,7 +147,7 @@ module "mlflow_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "mlflow" @@ -168,7 +168,7 @@ module "gha_mojas_airflow_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "github-actions-mojas-airflow" @@ -258,7 +258,7 @@ module "analytical_platform_lake_formation_share_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "analytical-platform-lake-formation-sharing-policy" @@ -290,7 +290,7 @@ module "quicksight_vpc_connection_iam_policy" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" - version = "5.46.0" + version = "5.48.0" name_prefix = "quicksight-vpc-connection" diff --git a/terraform/environments/analytical-platform-compute/iam-roles.tf b/terraform/environments/analytical-platform-compute/iam-roles.tf index b8c42113cb6..2de24c1e9fe 100644 --- a/terraform/environments/analytical-platform-compute/iam-roles.tf +++ b/terraform/environments/analytical-platform-compute/iam-roles.tf @@ -3,7 +3,7 @@ module "vpc_cni_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "vpc-cni" attach_vpc_cni_policy = true @@ -24,7 +24,7 @@ module "ebs_csi_driver_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "ebs-csi-driver" attach_ebs_csi_policy = true @@ -44,7 +44,7 @@ module "efs_csi_driver_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "efs-csi-driver" attach_efs_csi_policy = true @@ -64,7 +64,7 @@ module "aws_for_fluent_bit_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "aws-for-fluent-bit" @@ -88,7 +88,7 @@ module "amazon_prometheus_proxy_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "amazon-prometheus-proxy" @@ -111,7 +111,7 @@ module "cluster_autoscaler_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "cluster-autoscaler" @@ -133,7 +133,7 @@ module "external_dns_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "external-dns" attach_external_dns_policy = true @@ -154,7 +154,7 @@ module "cert_manager_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "cert-manager" attach_cert_manager_policy = true @@ -175,7 +175,7 @@ module "external_secrets_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "external-secrets" attach_external_secrets_policy = true @@ -196,7 +196,7 @@ module "mlflow_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" role_name_prefix = "mlflow" @@ -219,7 +219,7 @@ module "gha_mojas_airflow_iam_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-github-oidc-role" - version = "5.46.0" + version = "5.48.0" name = "github-actions-mojas-airflow" @@ -237,7 +237,7 @@ module "lake_formation_share_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" - version = "5.46.0" + version = "5.48.0" create_role = true role_requires_mfa = false @@ -264,7 +264,7 @@ module "analytical_platform_ui_service_role" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.46.0" + version = "5.48.0" create_role = true @@ -287,7 +287,7 @@ module "analytical_platform_control_panel_service_role" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" - version = "5.46.0" + version = "5.48.0" allow_self_assume_role = true trusted_role_arns = [ @@ -310,7 +310,7 @@ module "analytical_platform_data_eng_dba_service_role" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" - version = "5.46.0" + version = "5.48.0" allow_self_assume_role = false trusted_role_arns = formatlist("arn:aws:iam::%s:root", [local.environment_management.account_ids[local.analytical_platform_environment], local.environment_management.account_ids["analytical-platform-management-production"]]) @@ -330,7 +330,7 @@ module "quicksight_vpc_connection_iam_role" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" - version = "5.46.0" + version = "5.48.0" create_role = true role_name_prefix = "quicksight-vpc-connection" diff --git a/terraform/environments/analytical-platform-compute/s3-buckets.tf b/terraform/environments/analytical-platform-compute/s3-buckets.tf index c65dedf3689..ffbfa4b740a 100644 --- a/terraform/environments/analytical-platform-compute/s3-buckets.tf +++ b/terraform/environments/analytical-platform-compute/s3-buckets.tf @@ -3,7 +3,7 @@ module "mlflow_bucket" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/s3-bucket/aws" - version = "4.2.1" + version = "4.2.2" bucket = "mojap-compute-${local.environment}-mlflow" @@ -66,7 +66,7 @@ module "mojap_derived_tables_replication_bucket" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/s3-bucket/aws" - version = "4.2.1" + version = "4.2.2" providers = { aws = aws.analytical-platform-compute-eu-west-1 @@ -127,7 +127,7 @@ module "mojap_compute_logs_bucket_eu_west_2" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/s3-bucket/aws" - version = "4.2.1" + version = "4.2.2" bucket = "mojap-compute-${local.environment}-logs-eu-west-2" @@ -179,7 +179,7 @@ module "mojap_compute_logs_bucket_eu_west_1" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/s3-bucket/aws" - version = "4.2.1" + version = "4.2.2" providers = { aws = aws.analytical-platform-compute-eu-west-1 diff --git a/terraform/environments/analytical-platform-compute/vpc-endpoints.tf b/terraform/environments/analytical-platform-compute/vpc-endpoints.tf index 75b40822f0b..e096613bece 100644 --- a/terraform/environments/analytical-platform-compute/vpc-endpoints.tf +++ b/terraform/environments/analytical-platform-compute/vpc-endpoints.tf @@ -3,7 +3,7 @@ module "vpc_endpoints" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" - version = "5.13.0" + version = "5.15.0" vpc_id = module.vpc.vpc_id subnet_ids = module.vpc.private_subnets diff --git a/terraform/environments/analytical-platform-compute/vpc.tf b/terraform/environments/analytical-platform-compute/vpc.tf index e82606e1482..f134388e418 100644 --- a/terraform/environments/analytical-platform-compute/vpc.tf +++ b/terraform/environments/analytical-platform-compute/vpc.tf @@ -6,7 +6,7 @@ module "vpc" { #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/vpc/aws" - version = "5.13.0" + version = "5.15.0" name = local.our_vpc_name azs = slice(data.aws_availability_zones.available.names, 0, 3) From 357d2efe67adce6fc489d7657ae538f7ffae9108 Mon Sep 17 00:00:00 2001 From: Emterry Date: Thu, 14 Nov 2024 09:43:18 +0000 Subject: [PATCH 02/11] update rest of addons --- .../environment-configuration.tf | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index cbb49e64162..4240dd2b18f 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -35,7 +35,7 @@ locals { aws_efs_csi_driver = "v2.0.8-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" - vpc_cni = "v1.18.5-eksbuild.1" + vpc_cni = "v1.18.6-eksbuild.1" } /* Data Engineering Airflow */ @@ -80,10 +80,10 @@ locals { eks_cluster_version = "1.31" eks_node_version = "1.25.0-388e1050" eks_cluster_addon_versions = { - coredns = "v1.11.3-eksbuild.1" - kube_proxy = "v1.31.0-eksbuild.5" - aws_ebs_csi_driver = "v1.35.0-eksbuild.1" - aws_efs_csi_driver = "v2.0.7-eksbuild.1" + coredns = "v1.11.3-eksbuild.2" + kube_proxy = "v1.31.1-eksbuild.2" + aws_ebs_csi_driver = "v1.36.0-eksbuild.1" + aws_efs_csi_driver = "v2.0.8-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" vpc_cni = "v1.18.6-eksbuild.1" @@ -130,13 +130,13 @@ locals { eks_cluster_version = "1.31" eks_node_version = "1.25.0-388e1050" eks_cluster_addon_versions = { - coredns = "v1.11.3-eksbuild.1" - kube_proxy = "v1.31.0-eksbuild.5" - aws_ebs_csi_driver = "v1.35.0-eksbuild.1" - aws_efs_csi_driver = "v2.0.7-eksbuild.1" + coredns = "v1.11.3-eksbuild.2" + kube_proxy = "v1.31.1-eksbuild.2" + aws_ebs_csi_driver = "v1.36.0-eksbuild.1" + aws_efs_csi_driver = "v2.0.8-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" - vpc_cni = "v1.18.5-eksbuild.1" + vpc_cni = "v1.18.6-eksbuild.1" } /* Data Engineering Airflow */ From e95332cfcc47e71c773942be70135cf476adc16e Mon Sep 17 00:00:00 2001 From: Emterry Date: Thu, 14 Nov 2024 16:16:59 +0000 Subject: [PATCH 03/11] add bottlerocket and helm patching --- .../environment-configuration.tf | 6 +++--- .../helm-charts-system.tf | 14 +++++++------- .../analytical-platform-compute/locals.tf | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index 4240dd2b18f..f422c9c8752 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -27,7 +27,7 @@ locals { /* EKS */ eks_sso_access_role = "modernisation-platform-sandbox" eks_cluster_version = "1.31" - eks_node_version = "1.25.0-388e1050" + eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" kube_proxy = "v1.31.1-eksbuild.2" @@ -78,7 +78,7 @@ locals { /* EKS */ eks_sso_access_role = "modernisation-platform-developer" eks_cluster_version = "1.31" - eks_node_version = "1.25.0-388e1050" + eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" kube_proxy = "v1.31.1-eksbuild.2" @@ -128,7 +128,7 @@ locals { /* EKS */ eks_sso_access_role = "modernisation-platform-developer" eks_cluster_version = "1.31" - eks_node_version = "1.25.0-388e1050" + eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" kube_proxy = "v1.31.1-eksbuild.2" diff --git a/terraform/environments/analytical-platform-compute/helm-charts-system.tf b/terraform/environments/analytical-platform-compute/helm-charts-system.tf index 7a1daf7d9fc..c7d2e120b0a 100644 --- a/terraform/environments/analytical-platform-compute/helm-charts-system.tf +++ b/terraform/environments/analytical-platform-compute/helm-charts-system.tf @@ -4,7 +4,7 @@ resource "helm_release" "kyverno" { name = "kyverno" repository = "https://kyverno.github.io/kyverno" chart = "kyverno" - version = "3.2.7" + version = "3.3.3" namespace = kubernetes_namespace.kyverno.metadata[0].name values = [ templatefile( @@ -71,7 +71,7 @@ resource "helm_release" "amazon_prometheus_proxy" { name = "amazon-prometheus-proxy" repository = "https://prometheus-community.github.io/helm-charts" chart = "kube-prometheus-stack" - version = "65.2.0" + version = "66.1.1" namespace = kubernetes_namespace.aws_observability.metadata[0].name values = [ templatefile( @@ -96,7 +96,7 @@ resource "helm_release" "cluster_autoscaler" { name = "cluster-autoscaler" repository = "https://kubernetes.github.io/autoscaler" chart = "cluster-autoscaler" - version = "9.43.0" + version = "9.43.2" namespace = kubernetes_namespace.cluster_autoscaler.metadata[0].name values = [ @@ -119,7 +119,7 @@ resource "helm_release" "karpenter_crd" { name = "karpenter-crd" repository = "oci://public.ecr.aws/karpenter" chart = "karpenter-crd" - version = "1.0.6" + version = "1.0.8" namespace = kubernetes_namespace.karpenter.metadata[0].name values = [ @@ -141,7 +141,7 @@ resource "helm_release" "karpenter" { name = "karpenter" repository = "oci://public.ecr.aws/karpenter" chart = "karpenter" - version = "1.0.6" + version = "1.0.8" namespace = kubernetes_namespace.karpenter.metadata[0].name values = [ @@ -283,7 +283,7 @@ resource "helm_release" "external_secrets" { name = "external-secrets" repository = "https://charts.external-secrets.io" chart = "external-secrets" - version = "0.10.4" + version = "0.10.5" namespace = kubernetes_namespace.external_secrets.metadata[0].name values = [ templatefile( @@ -310,7 +310,7 @@ resource "helm_release" "keda" { name = "keda" repository = "https://kedacore.github.io/charts" chart = "keda" - version = "2.15.1" + version = "2.16.0" namespace = kubernetes_namespace.keda.metadata[0].name values = [ templatefile( diff --git a/terraform/environments/analytical-platform-compute/locals.tf b/terraform/environments/analytical-platform-compute/locals.tf index 47a6272f27e..78e3b560296 100644 --- a/terraform/environments/analytical-platform-compute/locals.tf +++ b/terraform/environments/analytical-platform-compute/locals.tf @@ -17,7 +17,7 @@ locals { eks_cloudwatch_log_group_retention_in_days = 400 /* Kube Prometheus Stack */ - prometheus_operator_crd_version = "v0.77.1" + prometheus_operator_crd_version = "v0.78.1" /* Mapping Analytical Platform Environments to Modernisation Platform */ From def7a55eb620dfe1747405f5f1f229b1e1ed742f Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Thu, 14 Nov 2024 17:24:08 +0000 Subject: [PATCH 04/11] Revert BR Signed-off-by: Jacob Woffenden --- .../analytical-platform-compute/environment-configuration.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index f422c9c8752..51a61845f62 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -27,7 +27,8 @@ locals { /* EKS */ eks_sso_access_role = "modernisation-platform-sandbox" eks_cluster_version = "1.31" - eks_node_version = "1.26.2-360b7a38" + # eks_node_version = "1.26.2-360b7a38" + eks_node_version = "1.25.0-388e1050" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" kube_proxy = "v1.31.1-eksbuild.2" From 1187fed5b5889337d62dc27380cacf6945372547 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Thu, 14 Nov 2024 17:28:59 +0000 Subject: [PATCH 05/11] testing expireAfter Signed-off-by: Jacob Woffenden --- .../src/helm/charts/karpenter-configuration/Chart.yaml | 2 +- .../templates/node-pool-general-on-demand.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml index 2ac953cac14..81c00bc19ad 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml @@ -3,4 +3,4 @@ apiVersion: v2 name: karpenter-configuration description: A Helm chart to deploy Karpenter's configuration type: application -version: 1.9.0 \ No newline at end of file +version: 2.0.0 diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml index f9401e55efc..7f0add0aec5 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml @@ -12,6 +12,7 @@ spec: labels: compute.analytical-platform.service.justice.gov.uk/karpenter-node-pool: "general-on-demand" spec: + expireAfter: Never nodeClassRef: apiVersion: karpenter.k8s.aws/v1beta1 kind: EC2NodeClass From a87396d1ffee0982351f84943ac8256f6c899cd3 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Thu, 14 Nov 2024 18:10:39 +0000 Subject: [PATCH 06/11] Revert the revert Signed-off-by: Jacob Woffenden --- .../analytical-platform-compute/environment-configuration.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index 51a61845f62..f422c9c8752 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -27,8 +27,7 @@ locals { /* EKS */ eks_sso_access_role = "modernisation-platform-sandbox" eks_cluster_version = "1.31" - # eks_node_version = "1.26.2-360b7a38" - eks_node_version = "1.25.0-388e1050" + eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" kube_proxy = "v1.31.1-eksbuild.2" From ea876d83cbba7a19065f3944a335c9e0a91d6551 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Thu, 14 Nov 2024 18:56:41 +0000 Subject: [PATCH 07/11] Add spec.disruption.budgets Signed-off-by: Jacob Woffenden --- .../src/helm/charts/karpenter-configuration/Chart.yaml | 2 +- .../templates/node-pool-airflow-high-memory.yaml | 2 ++ .../templates/node-pool-general-on-demand.yaml | 3 ++- .../templates/node-pool-general-spot.yaml | 2 ++ .../templates/node-pool-gpu-on-demand.yaml | 2 ++ .../karpenter-configuration/templates/node-pool-gpu-spot.yaml | 2 ++ 6 files changed, 11 insertions(+), 2 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml index 81c00bc19ad..8ce4103e8c8 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml @@ -3,4 +3,4 @@ apiVersion: v2 name: karpenter-configuration description: A Helm chart to deploy Karpenter's configuration type: application -version: 2.0.0 +version: 2.1.0 diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml index bdf49b77d92..1c057cd7f2e 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml @@ -5,6 +5,8 @@ metadata: name: airflow-high-memory spec: disruption: + budgets: + - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml index 7f0add0aec5..7a75bb3132d 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml @@ -5,6 +5,8 @@ metadata: name: general-on-demand spec: disruption: + budgets: + - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: @@ -12,7 +14,6 @@ spec: labels: compute.analytical-platform.service.justice.gov.uk/karpenter-node-pool: "general-on-demand" spec: - expireAfter: Never nodeClassRef: apiVersion: karpenter.k8s.aws/v1beta1 kind: EC2NodeClass diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml index bceb43c80fb..d5693c34795 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml @@ -5,6 +5,8 @@ metadata: name: general-spot spec: disruption: + budgets: + - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml index 98cd1594723..4a33dc31414 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml @@ -5,6 +5,8 @@ metadata: name: gpu-on-demand spec: disruption: + budgets: + - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml index fcefdfeb057..7e1a3030375 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml @@ -5,6 +5,8 @@ metadata: name: gpu-spot spec: disruption: + budgets: + - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: From d40f6684bb65c23bcaf357103a57836dac6a352f Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Thu, 14 Nov 2024 19:41:12 +0000 Subject: [PATCH 08/11] Update kube-proxy and EFS CSI remove spec.disruption.budgets so it returns to default of 10% Signed-off-by: Jacob Woffenden --- .../analytical-platform-compute/environment-configuration.tf | 4 ++-- .../src/helm/charts/karpenter-configuration/Chart.yaml | 2 +- .../templates/node-pool-airflow-high-memory.yaml | 2 -- .../templates/node-pool-general-on-demand.yaml | 2 -- .../templates/node-pool-general-spot.yaml | 2 -- .../templates/node-pool-gpu-on-demand.yaml | 2 -- .../karpenter-configuration/templates/node-pool-gpu-spot.yaml | 2 -- 7 files changed, 3 insertions(+), 13 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index f422c9c8752..031d704d8d0 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -30,9 +30,9 @@ locals { eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" - kube_proxy = "v1.31.1-eksbuild.2" + kube_proxy = "v1.31.2-eksbuild.2" aws_ebs_csi_driver = "v1.36.0-eksbuild.1" - aws_efs_csi_driver = "v2.0.8-eksbuild.1" + aws_efs_csi_driver = "v2.0.9-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" vpc_cni = "v1.18.6-eksbuild.1" diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml index 8ce4103e8c8..462add8abe8 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml @@ -3,4 +3,4 @@ apiVersion: v2 name: karpenter-configuration description: A Helm chart to deploy Karpenter's configuration type: application -version: 2.1.0 +version: 2.2.0 diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml index 1c057cd7f2e..bdf49b77d92 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-airflow-high-memory.yaml @@ -5,8 +5,6 @@ metadata: name: airflow-high-memory spec: disruption: - budgets: - - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml index 7a75bb3132d..f9401e55efc 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-on-demand.yaml @@ -5,8 +5,6 @@ metadata: name: general-on-demand spec: disruption: - budgets: - - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml index d5693c34795..bceb43c80fb 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml @@ -5,8 +5,6 @@ metadata: name: general-spot spec: disruption: - budgets: - - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml index 4a33dc31414..98cd1594723 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml @@ -5,8 +5,6 @@ metadata: name: gpu-on-demand spec: disruption: - budgets: - - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml index 7e1a3030375..fcefdfeb057 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-spot.yaml @@ -5,8 +5,6 @@ metadata: name: gpu-spot spec: disruption: - budgets: - - nodes: "0" consolidationPolicy: WhenEmpty consolidateAfter: 5m template: From 38ae704acb7afa9f33319ffb27e1a8fcc4f8c4b7 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Thu, 14 Nov 2024 20:30:39 +0000 Subject: [PATCH 09/11] Update VPC CNI version --- .../analytical-platform-compute/environment-configuration.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index 031d704d8d0..c9a7deb68c6 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -35,7 +35,7 @@ locals { aws_efs_csi_driver = "v2.0.9-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" - vpc_cni = "v1.18.6-eksbuild.1" + vpc_cni = "v1.19.0-eksbuild.1" } /* Data Engineering Airflow */ From 3c111fc3c586bf1c4a8d495db3f96929c507cd50 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 18 Nov 2024 09:02:58 +0000 Subject: [PATCH 10/11] Update Prometheus Signed-off-by: Jacob Woffenden --- .../analytical-platform-compute/helm-charts-system.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/helm-charts-system.tf b/terraform/environments/analytical-platform-compute/helm-charts-system.tf index c7d2e120b0a..546726a9de2 100644 --- a/terraform/environments/analytical-platform-compute/helm-charts-system.tf +++ b/terraform/environments/analytical-platform-compute/helm-charts-system.tf @@ -71,7 +71,7 @@ resource "helm_release" "amazon_prometheus_proxy" { name = "amazon-prometheus-proxy" repository = "https://prometheus-community.github.io/helm-charts" chart = "kube-prometheus-stack" - version = "66.1.1" + version = "66.2.1" namespace = kubernetes_namespace.aws_observability.metadata[0].name values = [ templatefile( From 71110b2d782342010eaa300fcc89b60343764192 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 18 Nov 2024 09:45:56 +0000 Subject: [PATCH 11/11] Update test and prod Update EC2NodeClass block device config Signed-off-by: Jacob Woffenden --- .../environment-configuration.tf | 12 ++++++------ .../helm/charts/karpenter-configuration/Chart.yaml | 2 +- .../ec2-node-class-bottlerocket-general.yaml | 4 ++-- .../templates/ec2-node-class-bottlerocket-gpu.yaml | 4 ++-- .../templates/node-pool-general-spot.yaml | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index c9a7deb68c6..df1d8fe7f6d 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -81,12 +81,12 @@ locals { eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" - kube_proxy = "v1.31.1-eksbuild.2" + kube_proxy = "v1.31.2-eksbuild.2" aws_ebs_csi_driver = "v1.36.0-eksbuild.1" - aws_efs_csi_driver = "v2.0.8-eksbuild.1" + aws_efs_csi_driver = "v2.0.9-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" - vpc_cni = "v1.18.6-eksbuild.1" + vpc_cni = "v1.19.0-eksbuild.1" } /* Observability Platform */ @@ -131,12 +131,12 @@ locals { eks_node_version = "1.26.2-360b7a38" eks_cluster_addon_versions = { coredns = "v1.11.3-eksbuild.2" - kube_proxy = "v1.31.1-eksbuild.2" + kube_proxy = "v1.31.2-eksbuild.2" aws_ebs_csi_driver = "v1.36.0-eksbuild.1" - aws_efs_csi_driver = "v2.0.8-eksbuild.1" + aws_efs_csi_driver = "v2.0.9-eksbuild.1" aws_guardduty_agent = "v1.7.1-eksbuild.2" eks_pod_identity_agent = "v1.3.2-eksbuild.2" - vpc_cni = "v1.18.6-eksbuild.1" + vpc_cni = "v1.19.0-eksbuild.1" } /* Data Engineering Airflow */ diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml index 462add8abe8..649c63ed2ae 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml @@ -3,4 +3,4 @@ apiVersion: v2 name: karpenter-configuration description: A Helm chart to deploy Karpenter's configuration type: application -version: 2.2.0 +version: 2.3.0 diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-general.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-general.yaml index 710a0e0f9f4..bfaafdb48a3 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-general.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-general.yaml @@ -21,11 +21,11 @@ spec: blockDeviceMappings: - deviceName: /dev/xvdb ebs: - volumeSize: 100Gi + volumeSize: 200Gi volumeType: gp3 iops: 3000 encrypted: true kmsKeyID: {{ .Values.ebsKmsKeyId }} deleteOnTermination: true - throughput: 125 + throughput: 250 detailedMonitoring: true diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml index 60e96cec2b5..be59088d0df 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml @@ -21,11 +21,11 @@ spec: blockDeviceMappings: - deviceName: /dev/xvdb ebs: - volumeSize: 100Gi + volumeSize: 200Gi volumeType: gp3 iops: 3000 encrypted: true kmsKeyID: {{ .Values.ebsKmsKeyId }} deleteOnTermination: true - throughput: 125 + throughput: 250 detailedMonitoring: true diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml index bceb43c80fb..792f049909a 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-general-spot.yaml @@ -35,4 +35,4 @@ spec: values: ["c", "m", "r"] - key: karpenter.k8s.aws/instance-generation operator: Gt - values: ["2"] + values: ["4"]