From 0fd96ae32a8e0ff80a429c8c60a21e300d507bd9 Mon Sep 17 00:00:00 2001 From: Gary H <26419401+Gary-H9@users.noreply.github.com> Date: Mon, 22 Jul 2024 15:39:44 +0000 Subject: [PATCH 1/6] :wrench: Add GPU on demand node pool --- .../templates/node-pool-gpu-on-demand.yaml | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml new file mode 100644 index 00000000000..51beb7c1398 --- /dev/null +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml @@ -0,0 +1,34 @@ +--- +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: gpu-on-demand +spec: + disruption: + consolidationPolicy: WhenUnderutilized + template: + metadata: + labels: + compute.analytical-platform.service.justice.gov.uk/karpenter-node-pool: "gpu-on-demand" + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: bottlerocket-general + taints: + - key: compute.analytical-platform.service.justice.gov.uk/karpenter-node-pool + value: "gpu-on-demand" + effect: NoSchedule + requirements: + - key: kubernetes.io/arch + operator: In + values: ["amd64"] + - key: kubernetes.io/os + operator: In + values: ["linux"] + - key: karpenter.sh/capacity-type + operator: In + values: ["on-demand"] + - key: node.kubernetes.io/instance-type + operator: In + values: ["p3.2xlarge"] \ No newline at end of file From e7edef61a8e16aa219c668d53c58eae61084097c Mon Sep 17 00:00:00 2001 From: Gary H <26419401+Gary-H9@users.noreply.github.com> Date: Mon, 22 Jul 2024 15:41:00 +0000 Subject: [PATCH 2/6] :wrench: Define ollamate release --- .../helm-charts-applications.tf | 17 +++++++++++++++++ .../kubernetes-namespaces.tf | 11 +++++++++++ 2 files changed, 28 insertions(+) diff --git a/terraform/environments/analytical-platform-compute/helm-charts-applications.tf b/terraform/environments/analytical-platform-compute/helm-charts-applications.tf index 2f33522a772..af7d5c04d47 100644 --- a/terraform/environments/analytical-platform-compute/helm-charts-applications.tf +++ b/terraform/environments/analytical-platform-compute/helm-charts-applications.tf @@ -14,3 +14,20 @@ resource "helm_release" "ui" { ) ] } + +resource "helm_release" "ollamate" { + /* https://github.com/ministryofjustice/analytical-platform-ollamate */ + name = "ui" + repository = "oci://ghcr.io/ministryofjustice/analytical-platform-charts" + version = "0.0.0-rc1" + chart = "analytical-platform-ollamate" + namespace = kubernetes_namespace.ollamate.metadata[0].name + values = [ + templatefile( + "${path.module}/src/helm/values/ollamate/values.yml.tftpl", + { + ollamate_hostname = "ollamate.${local.environment_configuration.route53_zone}" + } + ) + ] +} diff --git a/terraform/environments/analytical-platform-compute/kubernetes-namespaces.tf b/terraform/environments/analytical-platform-compute/kubernetes-namespaces.tf index a571e2e9515..3863747a43c 100644 --- a/terraform/environments/analytical-platform-compute/kubernetes-namespaces.tf +++ b/terraform/environments/analytical-platform-compute/kubernetes-namespaces.tf @@ -93,3 +93,14 @@ resource "kubernetes_namespace" "ui" { } } } + +resource "kubernetes_namespace" "ollamate" { + metadata { + name = "ollamate" + labels = { + "pod-security.kubernetes.io/enforce" = "restricted" + "compute.analytical-platform.service.justice.gov.uk/workload" = "ollamate" + } + } +} + From e48f223eb0f1f3ece5e1c12e456245855e046e14 Mon Sep 17 00:00:00 2001 From: Gary H <26419401+Gary-H9@users.noreply.github.com> Date: Mon, 22 Jul 2024 15:43:00 +0000 Subject: [PATCH 3/6] :wrench: Bump version --- .../src/helm/charts/karpenter-configuration/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml index 8570582482b..4252cdaf8ad 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml @@ -3,4 +3,4 @@ apiVersion: v2 name: karpenter-configuration description: A Helm chart to deploy Karpenter's configuration type: application -version: 1.0.0 +version: 1.1.0 From ad5ffec8c4f18d871c234eaa5b432c2e703ba0e2 Mon Sep 17 00:00:00 2001 From: Gary H <26419401+Gary-H9@users.noreply.github.com> Date: Tue, 23 Jul 2024 13:56:00 +0000 Subject: [PATCH 4/6] :wrench: Tinkering --- .../helm-charts-applications.tf | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/helm-charts-applications.tf b/terraform/environments/analytical-platform-compute/helm-charts-applications.tf index af7d5c04d47..74578311abd 100644 --- a/terraform/environments/analytical-platform-compute/helm-charts-applications.tf +++ b/terraform/environments/analytical-platform-compute/helm-charts-applications.tf @@ -15,19 +15,19 @@ resource "helm_release" "ui" { ] } -resource "helm_release" "ollamate" { - /* https://github.com/ministryofjustice/analytical-platform-ollamate */ - name = "ui" - repository = "oci://ghcr.io/ministryofjustice/analytical-platform-charts" - version = "0.0.0-rc1" - chart = "analytical-platform-ollamate" - namespace = kubernetes_namespace.ollamate.metadata[0].name - values = [ - templatefile( - "${path.module}/src/helm/values/ollamate/values.yml.tftpl", - { - ollamate_hostname = "ollamate.${local.environment_configuration.route53_zone}" - } - ) - ] -} +# resource "helm_release" "ollamate" { +# /* https://github.com/ministryofjustice/analytical-platform-ollamate */ +# name = "ui" +# repository = "oci://ghcr.io/ministryofjustice/analytical-platform-charts" +# version = "0.0.0-rc1" +# chart = "analytical-platform-ollamate" +# namespace = kubernetes_namespace.ollamate.metadata[0].name +# values = [ +# templatefile( +# "${path.module}/src/helm/values/ollamate/values.yml.tftpl", +# { +# ollamate_hostname = "ollamate.${local.environment_configuration.route53_zone}" +# } +# ) +# ] +# } From fbef58d2a900f935374ed3048cc9258091911276 Mon Sep 17 00:00:00 2001 From: Gary H <26419401+Gary-H9@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:01:57 +0000 Subject: [PATCH 5/6] :wrench: Add nvidia flavoured bottlerocket --- .../ec2-node-class-bottlerocket-gpu.yaml | 31 +++++++++++++++++++ .../templates/node-pool-gpu-on-demand.yaml | 2 +- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml new file mode 100644 index 00000000000..60e96cec2b5 --- /dev/null +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/ec2-node-class-bottlerocket-gpu.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: bottlerocket-gpu +spec: + amiFamily: Bottlerocket + role: {{ .Values.nodeRole }} + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: {{ .Values.clusterName }} + securityGroupSelectorTerms: + - tags: + karpenter.sh/discovery: {{ .Values.clusterName }} + amiSelectorTerms: + - name: "bottlerocket-aws-k8s-{{ .Values.clusterVersion }}-nvidia-x86_64-v{{ .Values.nodeVersion }}" + metadataOptions: + httpEndpoint: enabled + httpPutResponseHopLimit: 1 + httpTokens: required + blockDeviceMappings: + - deviceName: /dev/xvdb + ebs: + volumeSize: 100Gi + volumeType: gp3 + iops: 3000 + encrypted: true + kmsKeyID: {{ .Values.ebsKmsKeyId }} + deleteOnTermination: true + throughput: 125 + detailedMonitoring: true diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml index 51beb7c1398..7b162b62fa3 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/templates/node-pool-gpu-on-demand.yaml @@ -14,7 +14,7 @@ spec: nodeClassRef: apiVersion: karpenter.k8s.aws/v1beta1 kind: EC2NodeClass - name: bottlerocket-general + name: bottlerocket-gpu taints: - key: compute.analytical-platform.service.justice.gov.uk/karpenter-node-pool value: "gpu-on-demand" From b9b0908ecd1ae412ad3e94aae9a4599002bb0281 Mon Sep 17 00:00:00 2001 From: Gary H <26419401+Gary-H9@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:06:55 +0000 Subject: [PATCH 6/6] :wrench: Bump version number --- .../src/helm/charts/karpenter-configuration/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml index 4252cdaf8ad..674fcfe1994 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml +++ b/terraform/environments/analytical-platform-compute/src/helm/charts/karpenter-configuration/Chart.yaml @@ -3,4 +3,4 @@ apiVersion: v2 name: karpenter-configuration description: A Helm chart to deploy Karpenter's configuration type: application -version: 1.1.0 +version: 1.2.0