Skip to content

Commit

Permalink
feat: Jupyterhub blueprint upgrade (awslabs#554)
Browse files Browse the repository at this point in the history
Signed-off-by: Vara Bonthu <[email protected]>
  • Loading branch information
vara-bonthu authored and ovaleanu committed Aug 10, 2024
1 parent 6021a28 commit c2091dd
Show file tree
Hide file tree
Showing 25 changed files with 563 additions and 594 deletions.
482 changes: 339 additions & 143 deletions ai-ml/jupyterhub/addons.tf

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ai-ml/jupyterhub/examples/test-pods/timeslicing-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
app: time-slicing-verification
spec:
nodeSelector:
provisioner: gpu-ts
NodePool: gpu-ts
tolerations:
- key: nvidia.com/gpu
operator: Exists
Expand Down
2 changes: 1 addition & 1 deletion ai-ml/jupyterhub/examples/test-pods/verify-gpu-access.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ spec:
app: verify-gpu
spec:
nodeSelector:
karpenter.sh/provisioner-name: gpu # Force schedule a node with time slicing support
NodePool: gpu-mig # Force schedule a node with time slicing support
tolerations: # To tolerate the taint on the nodes
- key: "nvidia.com/gpu"
operator: "Exists"
Expand Down
40 changes: 0 additions & 40 deletions ai-ml/jupyterhub/helm/coredns-autoscaler/values.yaml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ singleuser:
kubespawner_override:
node_selector:
NodePool: trainium
hub.jupyter.org/node-purpose: user
tolerations:
- key: aws.amazon.com/neuroncore
operator: Exists
Expand Down Expand Up @@ -186,6 +187,7 @@ singleuser:
image: cschranz/gpu-jupyter:v1.5_cuda-11.6_ubuntu-20.04_python-only
node_selector:
NodePool: gpu-ts # TIME-SLICING: Use this config with time-slicing mode
hub.jupyter.org/node-purpose: user
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand All @@ -210,6 +212,7 @@ singleuser:
node_selector:
provisioner: cluster-autoscaler
node.kubernetes.io/instance-type: p4d.24xlarge
hub.jupyter.org/node-purpose: user
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand All @@ -232,8 +235,8 @@ singleuser:
kubespawner_override:
image: cschranz/gpu-jupyter:v1.5_cuda-11.6_ubuntu-20.04_python-only
node_selector:
node.kubernetes.io/instance-type: p4d.24xlarge
NodePool: gpu
NodePool: gpu-mig
hub.jupyter.org/node-purpose: user
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand Down
13 changes: 8 additions & 5 deletions ai-ml/jupyterhub/helm/jupyterhub/jupyterhub-values-dummy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ singleuser:
kubespawner_override:
node_selector:
NodePool: trainium
hub.jupyter.org/node-purpose: user
tolerations:
- key: aws.amazon.com/neuroncore
operator: Exists
Expand Down Expand Up @@ -127,9 +128,10 @@ singleuser:
description: "GPU Time-Slicing with Single GPU VMs (G5 2x, 4x, 8x, 16x) | nvidia.com/gpu: 1 | Karpenter AutoScaling"
kubespawner_override:
# namespace: data-team-a
image: cschranz/gpu-jupyter:v1.5_cuda-11.6_ubuntu-20.04_python-only
image: cschranz/gpu-jupyter:v1.6_cuda-11.8_ubuntu-22.04_python-only
node_selector:
NodePool: gpu-ts # TIME-SLICING: Use this config with time-slicing mode
hub.jupyter.org/node-purpose: user
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand All @@ -150,10 +152,11 @@ singleuser:
- display_name: Data Science (GPU + MIG on P4d.24xlarge)
description: "GPU MIG with P4d instances | nvidia.com/mig-1g.5gb: 1 | Cluster Autoscaler"
kubespawner_override:
image: cschranz/gpu-jupyter:v1.5_cuda-11.6_ubuntu-20.04_python-only
image: cschranz/gpu-jupyter:v1.6_cuda-11.8_ubuntu-22.04_python-only
node_selector:
provisioner: cluster-autoscaler
node.kubernetes.io/instance-type: p4d.24xlarge
hub.jupyter.org/node-purpose: user
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand All @@ -174,10 +177,10 @@ singleuser:
- display_name: Data Science (GPU - P4d.24xlarge)
description: "GPU with P4d instances | Karpenter Autoscaler"
kubespawner_override:
image: cschranz/gpu-jupyter:v1.5_cuda-11.6_ubuntu-20.04_python-only
image: cschranz/gpu-jupyter:v1.6_cuda-11.8_ubuntu-22.04_python-only
node_selector:
node.kubernetes.io/instance-type: p4d.24xlarge
NodePool: gpu
NodePool: gpu-mig
hub.jupyter.org/node-purpose: user
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand Down
5 changes: 0 additions & 5 deletions ai-ml/jupyterhub/helm/karpenter-resources/Chart.yaml

This file was deleted.

This file was deleted.

46 changes: 0 additions & 46 deletions ai-ml/jupyterhub/helm/karpenter-resources/templates/node-pool.yaml

This file was deleted.

10 changes: 0 additions & 10 deletions ai-ml/jupyterhub/helm/karpenter-resources/values.yaml

This file was deleted.

Loading

0 comments on commit c2091dd

Please sign in to comment.