diff --git a/manifests/modules/aiml/chatbot/.workshop/cleanup.sh b/manifests/modules/aiml/chatbot/.workshop/cleanup.sh index 7413232b9..e9b2527c9 100755 --- a/manifests/modules/aiml/chatbot/.workshop/cleanup.sh +++ b/manifests/modules/aiml/chatbot/.workshop/cleanup.sh @@ -2,13 +2,16 @@ set -e -#we want to delete the graviton cluster -delete-nodegroup graviton - logmessage "Deleting AIML resources..." kubectl delete namespace aiml --ignore-not-found +#add this for deleting the llama2 namespace +kubectl delete namespace llama2 --ignore-not-found + +#add this for deleting the ingress +kubectl delete ingress -n llama2 llama2 --ignore-not-found + logmessage "Deleting Karpenter NodePool and EC2NodeClass..." delete-all-if-crd-exists nodepools.karpenter.sh diff --git a/manifests/modules/aiml/chatbot/.workshop/terraform/main.tf b/manifests/modules/aiml/chatbot/.workshop/terraform/main.tf index d9b5e97c1..ef86eb99e 100644 --- a/manifests/modules/aiml/chatbot/.workshop/terraform/main.tf +++ b/manifests/modules/aiml/chatbot/.workshop/terraform/main.tf @@ -12,7 +12,7 @@ provider "aws" { alias = "virginia" } -data "aws_region" "current" {} +#data "aws_region" "current" {} data "aws_ecrpublic_authorization_token" "token" { provider = aws.virginia @@ -22,7 +22,9 @@ module "eks_blueprints_addons" { source = "aws-ia/eks-blueprints-addons/aws" version = "1.16.3" - enable_karpenter = true + enable_karpenter = true + enable_aws_load_balancer_controller = true + create_kubernetes_resources = false karpenter_enable_spot_termination = true karpenter_enable_instance_profile_creation = true @@ -35,6 +37,7 @@ module "eks_blueprints_addons" { cluster_endpoint = var.addon_context.aws_eks_cluster_endpoint cluster_version = var.eks_cluster_version oidc_provider_arn = var.addon_context.eks_oidc_provider_arn + } data "aws_subnets" "private" { @@ -56,33 +59,6 @@ resource "aws_s3_bucket" "chatbot" { tags = var.tags } -#resource "aws_iam_role" "graviton_node" { -# name = "${var.addon_context.eks_cluster_id}-graviton-node" - -# assume_role_policy = jsonencode({ -#Version = "2012-10-17" -#Statement = [ -#{ -#Action = "sts:AssumeRole" -#Effect = "Allow" -#Sid = "" -#Principal = { -#Service = "ec2.amazonaws.com" -#} -#}, -#] -# }) - -#managed_policy_arns = [ -# "arn:${var.addon_context.aws_partition_id}:iam::aws:policy/AmazonEKS_CNI_Policy", -# "arn:${var.addon_context.aws_partition_id}:iam::aws:policy/AmazonEKSWorkerNodePolicy", -# "arn:${var.addon_context.aws_partition_id}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", -# "arn:${var.addon_context.aws_partition_id}:iam::aws:policy/AmazonSSMManagedInstanceCore" -#] - -#tags = var.tags -#} - module "iam_assumable_role_chatbot" { source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc" version = "5.39.1" @@ -98,7 +74,7 @@ module "iam_assumable_role_chatbot" { resource "aws_iam_policy" "chatbot" { name = "${var.addon_context.eks_cluster_id}-chatbot" path = "/" - description = "IAM policy for the inferenct workload" + description = "IAM policy for the chatbot workload" policy = < id -#}) -#} diff --git a/manifests/modules/aiml/chatbot/.workshop/terraform/vars.tf b/manifests/modules/aiml/chatbot/.workshop/terraform/vars.tf index 812087dc5..cc899548b 100644 --- a/manifests/modules/aiml/chatbot/.workshop/terraform/vars.tf +++ b/manifests/modules/aiml/chatbot/.workshop/terraform/vars.tf @@ -33,3 +33,11 @@ variable "resources_precreated" { description = "Have expensive resources been created already" type = bool } + +# tflint-ignore: terraform_unused_declarations +variable "load_balancer_controller_chart_version" { + description = "The chart version of aws-load-balancer-controller to use" + type = string + # renovate-helm: depName=aws-load-balancer-controller + default = "1.8.1" +} \ No newline at end of file diff --git a/manifests/modules/aiml/chatbot/base/kustomization.yaml b/manifests/modules/aiml/chatbot/base/kustomization.yaml deleted file mode 100644 index e155dce9d..000000000 --- a/manifests/modules/aiml/chatbot/base/kustomization.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - serviceaccount.yaml - - namespace.yaml diff --git a/manifests/modules/aiml/chatbot/base/namespace.yaml b/manifests/modules/aiml/chatbot/base/namespace.yaml deleted file mode 100644 index 78f3cd071..000000000 --- a/manifests/modules/aiml/chatbot/base/namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: aiml diff --git a/manifests/modules/aiml/chatbot/base/serviceaccount.yaml b/manifests/modules/aiml/chatbot/base/serviceaccount.yaml deleted file mode 100644 index 010aa4b5f..000000000 --- a/manifests/modules/aiml/chatbot/base/serviceaccount.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: inference - namespace: aiml - annotations: - eks.amazonaws.com/role-arn: ${AIML_NEURON_ROLE_ARN} diff --git a/manifests/modules/aiml/chatbot/k8s-neuron-device-plugin-rbac.yaml b/manifests/modules/aiml/chatbot/k8s-neuron-device-plugin-rbac.yaml new file mode 100644 index 000000000..7bc6879ad --- /dev/null +++ b/manifests/modules/aiml/chatbot/k8s-neuron-device-plugin-rbac.yaml @@ -0,0 +1,59 @@ +# rbac.yaml +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: neuron-device-plugin +rules: + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - apiGroups: + - "" + resources: + - pods + verbs: + - update + - patch + - get + - list + - watch + - apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - update +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: neuron-device-plugin + namespace: kube-system +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: neuron-device-plugin + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: neuron-device-plugin +subjects: + - kind: ServiceAccount + name: neuron-device-plugin + namespace: kube-system diff --git a/manifests/modules/aiml/chatbot/k8s-neuron-device-plugin.yaml b/manifests/modules/aiml/chatbot/k8s-neuron-device-plugin.yaml new file mode 100644 index 000000000..3a895a6eb --- /dev/null +++ b/manifests/modules/aiml/chatbot/k8s-neuron-device-plugin.yaml @@ -0,0 +1,95 @@ +# https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: neuron-device-plugin-daemonset + namespace: kube-system +spec: + selector: + matchLabels: + name: neuron-device-plugin-ds + updateStrategy: + type: RollingUpdate + template: + metadata: + # Uncomment the annotation below if k8s version is 1.13 or lower + # annotations: + # scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: neuron-device-plugin-ds + spec: + serviceAccount: neuron-device-plugin + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: aws.amazon.com/neuron + operator: Exists + effect: NoSchedule + # Mark this pod as a critical add-on; when enabled, the critical add-on + # scheduler reserves resources for critical add-on pods so that they can + # be rescheduled after a failure. + # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ + priorityClassName: "system-node-critical" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + # Uncomment following matchExpressions if using k8s 1.16 or lower + #- matchExpressions: + # - key: "beta.kubernetes.io/instance-type" + # operator: In + # values: + # - inf1.xlarge + # - inf1.2xlarge + # - inf1.6xlarge + # - inf1.24xlarge + # - inf2.xlarge + # - inf2.8xlarge + # - inf2.24xlarge + # - inf2.48xlarge + # - trn1.2xlarge + # - trn1.32xlarge + # - trn1n.32xlarge + - matchExpressions: + - key: "node.kubernetes.io/instance-type" + operator: In + values: + - inf1.xlarge + - inf1.2xlarge + - inf1.6xlarge + - inf1.24xlarge + - inf2.xlarge + - inf2.8xlarge + - inf2.24xlarge + - inf2.48xlarge + - trn1.2xlarge + - trn1.32xlarge + - trn1n.32xlarge + containers: + # Find all neuron-device-plugin images at https://gallery.ecr.aws/neuron/neuron-device-plugin + - image: public.ecr.aws/neuron/neuron-device-plugin:2.19.16.0 + imagePullPolicy: Always + name: neuron-device-plugin + env: + - name: KUBECONFIG + value: /etc/kubernetes/kubelet.conf + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: infa-map + mountPath: /run + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: infa-map + hostPath: + path: /run diff --git a/manifests/modules/aiml/chatbot/nodepool/nodepool-inf2.yaml b/manifests/modules/aiml/chatbot/nodepool/nodepool-inf2.yaml index 049427e80..4c8b3950c 100644 --- a/manifests/modules/aiml/chatbot/nodepool/nodepool-inf2.yaml +++ b/manifests/modules/aiml/chatbot/nodepool/nodepool-inf2.yaml @@ -104,9 +104,9 @@ spec: # limits: # cpu: "10000" disruption: - consolidationPolicy: WhenUnderutilized + #consolidationPolicy: WhenUnderutilized consolidateAfter: 300s - #consolidationPolicy: WhenEmpty + consolidationPolicy: WhenEmpty expireAfter: 720h # 30 * 24h = 720h --- @@ -123,13 +123,13 @@ spec: encrypted: true volumeSize: 500Gi #originally 100Gi volumeType: gp3 - role: karpenter-eks-workshop-20240719154052842800000003 + role: ${KARPENTER_NODE_ROLE} securityGroupSelectorTerms: - tags: - karpenter.sh/discovery: eks-workshop + karpenter.sh/discovery: ${EKS_CLUSTER_NAME} subnetSelectorTerms: - tags: - karpenter.sh/discovery: eks-workshop + karpenter.sh/discovery: ${EKS_CLUSTER_NAME} tags: app.kubernetes.io/created-by: eks-workshop diff --git a/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml b/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml index ddfc618b2..a6e6250b1 100644 --- a/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml +++ b/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml @@ -82,9 +82,9 @@ spec: limits: cpu: "10000" disruption: - consolidationPolicy: WhenUnderutilized + #consolidationPolicy: WhenUnderutilized consolidateAfter: 300s - #consolidationPolicy: WhenEmpty + consolidationPolicy: WhenEmpty expireAfter: 720h # 30 * 24h = 720h --- @@ -102,12 +102,12 @@ spec: volumeSize: 200Gi #originally 100Gi volumeType: gp3 detailedMonitoring: true - role: karpenter-eks-workshop-20240719154052842800000003 + role: ${KARPENTER_NODE_ROLE} securityGroupSelectorTerms: - tags: - karpenter.sh/discovery: eks-workshop + karpenter.sh/discovery: ${EKS_CLUSTER_NAME} subnetSelectorTerms: - tags: - karpenter.sh/discovery: eks-workshop + karpenter.sh/discovery: ${EKS_CLUSTER_NAME} tags: app.kubernetes.io/created-by: eks-workshop diff --git a/manifests/modules/aiml/chatbot/Dockerfile b/manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/Dockerfile similarity index 100% rename from manifests/modules/aiml/chatbot/Dockerfile rename to manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/Dockerfile diff --git a/manifests/modules/aiml/chatbot/kustomization.yaml b/manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/kustomization.yaml similarity index 100% rename from manifests/modules/aiml/chatbot/kustomization.yaml rename to manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/kustomization.yaml diff --git a/manifests/modules/aiml/chatbot/ray-service-llama2.yaml b/manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/ray-service-llama2.yaml similarity index 95% rename from manifests/modules/aiml/chatbot/ray-service-llama2.yaml rename to manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/ray-service-llama2.yaml index 38e09642b..2203a170b 100644 --- a/manifests/modules/aiml/chatbot/ray-service-llama2.yaml +++ b/manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/ray-service-llama2.yaml @@ -138,9 +138,12 @@ metadata: name: llama2 namespace: llama2 annotations: - nginx.ingress.kubernetes.io/rewrite-target: "/$1" + #nginx.ingress.kubernetes.io/rewrite-target: "/$1" + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/target-type: ip + alb.ingress.kubernetes.io/healthcheck-path: /actuator/health/liveness spec: - ingressClassName: nginx + ingressClassName: alb rules: - http: paths: diff --git a/manifests/modules/aiml/chatbot/ray_serve_llama2.py b/manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/ray_serve_llama2.py similarity index 100% rename from manifests/modules/aiml/chatbot/ray_serve_llama2.py rename to manifests/modules/aiml/chatbot/ray-service-llama2-chatbot/ray_serve_llama2.py