From b25881ef2ff289aa17b20c6c3c4aa536e6710464 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 30 May 2021 20:26:45 +0200 Subject: [PATCH 01/43] deployer: accept provider=none for already setup k8s cluster access --- deployer/cluster.py | 2 ++ deployer/cluster.schema.yaml | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/deployer/cluster.py b/deployer/cluster.py index 29d6366b64..91de692529 100644 --- a/deployer/cluster.py +++ b/deployer/cluster.py @@ -31,6 +31,8 @@ def auth(self): yield from self.auth_azure() elif self.spec["provider"] == "kubeconfig": yield from self.auth_kubeconfig() + elif self.spec['provider'] == 'none': + yield else: raise ValueError(f'Provider {self.spec["provider"]} not supported') diff --git a/deployer/cluster.schema.yaml b/deployer/cluster.schema.yaml index e574dea7e6..19b2f58de2 100644 --- a/deployer/cluster.schema.yaml +++ b/deployer/cluster.schema.yaml @@ -27,7 +27,8 @@ properties: Cloud provider this cluster is running on. Used to perform authentication against the cluster. Currently supports gcp, aws, azure, and raw kubeconfig files. - enum: + enum: + - none - gcp - kubeconfig - aws From fbc0ddcf3e0de4ee7378c1517b281f2a2ffa768f Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 31 May 2021 03:14:35 +0200 Subject: [PATCH 02/43] jmte: add 2i2c config for jmte deployment --- shared/deployer/jmte.cluster.yaml | 210 ++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 shared/deployer/jmte.cluster.yaml diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml new file mode 100644 index 0000000000..36efe2bc81 --- /dev/null +++ b/shared/deployer/jmte.cluster.yaml @@ -0,0 +1,210 @@ +name: jmte +provider: none +# kubeconfig: +# file: secrets/jmte.yaml +hubs: + - name: prod + domain: hub.jupytearth.org + template: daskhub + auth0: + connection: github + config: &config + + basehub: + # Cloudformation: The EFS filesystem was created by cloudformation. + # + nfsPVC: + enabled: true + nfs: + # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html + mountOptions: + - rsize=1048576 + - wsize=1048576 + - timeo=600 + - soft # We pick soft over hard, so NFS lockups don't lead to hung processes + - retrans=2 + - noresvport + serverIP: fs-01707b06.efs.us-west-2.amazonaws.com + # baseShareName is required to be just "/" so that we can create + # various sub folders in the filesystem that our PV to access the + # NFS server can reference successfully as it isn't supported to + # access a not yet existing folder. This creation is automated by + # the nfs-share-creator resource part of the basehub Helm chart. + baseShareName: / + + + + jupyterhub: + custom: + homepage: + templateVars: + org: + name: Jupyter meets the Earth + logo_url: https://pangeo-data.github.io/jupyter-earth/_static/jupyter-earth.png + url: https://jupytearth.org + designed_by: + name: 2i2c + url: https://2i2c.org + operated_by: + name: 2i2c + url: https://2i2c.org + funded_by: + name: Jupyter meets the Earth + url: https://jupytearth.org + + singleuser: + # Eksctl: The service account was created by eksctl. + # + serviceAccountName: &user-sa s3-full-access + + # cmd: I've experimented with these settings to get a JupyterLab RTC + # setup functioning. It currently is, but is this what makes + # sense to get it to function? + # + # ref: https://github.com/jupyterlab-contrib/jupyterlab-link-share/issues/10#issuecomment-851899758 + # ref: https://github.com/jupyterlab/jupyterlab/blob/1c8ff104a99e294265e6cf476dcb46279b0c3593/binder/jupyter_notebook_config.py#L39 + # + # Note the default in z2jh is jupyterhub-singleuser. + cmd: + - jupyterhub-singleuser + - --LabApp.collaborative=True + - --ServerApp.allow_remote_access=True + + extraEnv: + # SCRATCH_BUCKET / PANGEO_SCRATCH are environment variables that + # help users write notebooks and such referencing this environment + # variable in a way that will work between users. + # + # $(ENV_VAR) will by evaluated by k8s automatically + # + # Cloudformation: The s3 bucket was created by cloudformation. + # + SCRATCH_BUCKET: s3://jmte-scratch/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: s3://jmte-scratch/$(JUPYTERHUB_USER) + + initContainers: + # Need to explicitly fix ownership here, since EFS doesn't do anonuid + - name: volume-mount-ownership-fix + image: busybox + command: ["sh", "-c", "id && chown 1000:1000 /home/jovyan && ls -lhd /home/jovyan"] + securityContext: + runAsUser: 0 + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + + image: + name: pangeo/pangeo-notebook + tag: "2021.05.15" # https://hub.docker.com/r/pangeo/pangeo-notebook/tags + + profileList: + - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB" + kubespawner_override: + cpu_guarantee: 0.225 + mem_guarantee: 0.875G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + - display_name: "4th of Medium: 1-4 CPU, 4-16 GB" + kubespawner_override: + cpu_guarantee: 0.875 + mem_guarantee: 3.5G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + - display_name: "Medium: 4 CPU, 16 GB" + kubespawner_override: + cpu_guarantee: 3.5 + mem_guarantee: 14G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + - display_name: "Large: 16 CPU, 64 GB" + kubespawner_override: + mem_guarantee: 56G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "16" } + - display_name: "Massive: 64 CPU, 256 GB" + kubespawner_override: + mem_guarantee: 224G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "64" } + + proxy: + # proxy notes: + # + # - Revert basehubs overrides as we don't install ingress-nginx and + # cert-manager yet, and therefore should use + # service.type=LoadBalancer instead of service.type=ClusterIP. + # Along with this, we also make use of the autohttps system that + # requires us to configure an letsencrypt email. + # + service: + type: LoadBalancer + https: + enabled: true + type: letsencrypt + letsencrypt: + contactEmail: erik@sundellopensource.se + + hub: + config: + Authenticator: + allowed_users: &users + - abbyazari # Abby Azari + - andersy005 # Anderson Banihirwe + - consideratio # Erik Sundell + - elliesch # Ellie Abrahams + - EMscience # Edom Moges + - espg # Shane Grigsby + - facusapienza21 # Facundo Sapienza + - fperez # Fernando Pérez + - kmpaul # Kevin Paul + - lrennels # Lisa Rennels + - mrsiegfried # Matthew Siegfried + - tsnow03 # Tasha Snow + - whyjz # Whyjay Zheng + - yuvipanda # Yuvi Panda + admin_users: *users + allowNamedServers: true + networkPolicy: + # FIXME: Required for dask gateway 0.9.0. It is fixed but a Helm + # chart of newer version is not yet released. + enabled: false + + + + dask-gateway: + # dask-gateway notes: + # + # - Explicitly unset daskhub's nodeSelectors for all pods except the + # worker pods. The tolerations applied in the basehub config to all + # non-worker pods in dask-gateway will provide a preferred affinity + # towards suitable nodes without needing to have a label on them. Then + # we use the node label "k8s.dask.org/node-purpose: worker" + # specifically for enforce workers to schedule on such nodes. + # + traefik: + nodeSelector: null + controller: + nodeSelector: null + gateway: + nodeSelector: null + backend: + scheduler: + extraPodConfig: + nodeSelector: + hub.jupyter.org/node-purpose: user + serviceAccountName: *user-sa + worker: + extraPodConfig: + nodeSelector: + k8s.dask.org/node-purpose: worker + serviceAccountName: *user-sa + + extraConfig: + idle: | + # timeout after 30 minutes of inactivity + c.KubeClusterConfig.idle_timeout = 1800 + limits: | + # per Dask cluster limits. + c.ClusterConfig.cluster_max_cores = 256 + c.ClusterConfig.cluster_max_memory = "1028G" From 4e5051e2ca92f806eaecd4113e740b8dc55efb81 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 31 May 2021 03:17:06 +0200 Subject: [PATCH 03/43] jmte: add aws infra (eksctl/cloudformation) --- eksctl/README.md | 74 ++++++++ eksctl/cloudformation-extras.yaml | 293 ++++++++++++++++++++++++++++++ eksctl/eksctl-cluster-config.yaml | 274 ++++++++++++++++++++++++++++ 3 files changed, 641 insertions(+) create mode 100644 eksctl/README.md create mode 100644 eksctl/cloudformation-extras.yaml create mode 100644 eksctl/eksctl-cluster-config.yaml diff --git a/eksctl/README.md b/eksctl/README.md new file mode 100644 index 0000000000..7a08315ee3 --- /dev/null +++ b/eksctl/README.md @@ -0,0 +1,74 @@ +### Setup of k8s cluster via eksctl + +TODO describe... + +### Setup of extras via cloudformation + +TODO describe... + +### Setup of cluster-autoscaler in the k8s cluster + +`eksctl` doesn't automatically install a cluster-autoscaler and it is not part +of a EKS based k8s cluster by itself, so it needs to be manually installed. The +cluster-autoscaler will need permissions to do its job though, and for that we +use some flags in our eksctl config file and then we install it with a Helm +chart. + +#### eksctl configuration for cluster-autoscaler + +We need our eksctl-cluster-config.yaml to: + +1. Declare `nodeGroups.*.iam.withAddonPolicies.autoScaler=true`. + + I believe doing so is what makes the following tags automatically be applied + on node groups, which is required by the cluster-autoscaler to detect them. + + ``` + k8s.io/cluster-autoscaler/ + k8s.io/cluster-autoscaler/enabled + ``` + +2. Declare additional tags for labels/taints. + + ```yaml + nodeGroups: + - name: worker-xlarge + labels: + k8s.dask.org/node-purpose: worker + taints: + k8s.dask.org_dedicated: worker:NoSchedule + + # IMPORTANT: we also provide these tags alongside the labels/taints + # to help the cluster-autoscaler do its job. + # + tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.dask.org/node-purpose: worker + k8s.io/cluster-autoscaler/node-template/taint/k8s.dask.org_dedicated: worker:NoSchedule + ``` + + +#### Installation of cluster-autoscaler + +We rely on the [cluster-autoscaler Helm chart](https://github.com/kubernetes/autoscaler/tree/master/charts/cluster-autoscaler) to manage the k8s resources for the cluster-autoscaler we need to manually complement the k8s cluster with. + +``` +helm upgrade cluster-autocaler cluster-autoscaler \ + --install \ + --repo https://kubernetes.github.io/autoscaler \ + --version 9.9.2 \ + --namespace kube-system \ + --set autoDiscovery.clusterName=jmte \ + --set awsRegion=us-west-2 +``` + +### Misc + +- Create a auth0 application for github +- Update dns record ([jupytearth.org is managed on GCP by Erik](https://console.cloud.google.com/net-services/dns/zones/jupytearth-org/details?folder=&organizationId=&project=domains-sos)) + +### FIXME: Open questions + +- How is cluster-autoscaler acquiring the permissions it needs? Is it by being + located on the node where we have + `nodeGroups.*.iam.withAddonPolicies.autoScaler=true`? Then we have ended up + granting permission to all pods on all nodes that are too high. diff --git a/eksctl/cloudformation-extras.yaml b/eksctl/cloudformation-extras.yaml new file mode 100644 index 0000000000..c80f541be8 --- /dev/null +++ b/eksctl/cloudformation-extras.yaml @@ -0,0 +1,293 @@ +# Cloudformation is like Terraform but specific to AWS, in other words, it +# allows you to declare some cloud infrastructure in configuration files that +# you can then request be setup on AWS by a CLI (aws cloudformation deploy). A +# quick intro is available here: https://www.youtube.com/watch?v=Omppm_YUG2g +# +# This cloudformation configuration contain what we need to complement the +# eksctl created k8s cluster for the deployer script to run in our CI system. + +# Goals: +# +# 1. For us maintainers to be able to encrypt/decrypt secret content with +# mozilla/sops directly, but also let hubploy use mozilla/sops to decrypt +# them using a AWS service account. This will require AWS KMS to be setup. +# 2. To enable hubploy to build and push docker images to our default AWS +# container registry (.dkr.ecr..amazonaws.com). +# +# Required AWS infrastructure to create: +# +# 1. A dedicated service account (AWS::IAM::User), with an associated +# AccessKey (AWS::IAM::AccessKey). +# 2. A KMS service (AWS::KMS::Key), and permissions to use it to the dedicated +# service account. +# 3. Permissions for the dedicated service account to push to the default +# container registry. +# 4. Permissions for the dedicated service account to work against the k8s +# cluster created by eksctl, which use cloudformation under the hood. +# +# 5. FUTURE: s3 stuff? +# 6. FUTURE: EFS stuff? +# + +# Operations: +# +# Create/Update: +# aws cloudformation deploy --stack-name=jmte-extras --template-file=./cloudformation-extras.yaml --capabilities=CAPABILITY_NAMED_IAM +# +# Inspect: +# aws cloudformation describe-stacks --stack-name=jmte-extras +# +# Delete: +# aws cloudformation delete-stack --stack-name=jmte-extras +# + +# References: +# +# AWS Cloudformation console: +# https://console.aws.amazon.com/cloudformation/home +# +# AWS Cloudformation intro: +# https://www.youtube.com/watch?v=Omppm_YUG2g +# +# AWS IAM intro: +# https://www.youtube.com/watch?v=3A5hRIT8zdo +# +# The starting point for me: +# https://medium.com/mercos-engineering/secrets-as-a-code-with-mozilla-sops-and-aws-kms-d069c45ae1b9 +# +# Reference on !Join: +# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/intrinsic-function-reference-join.html +# + +# The parameters we need to provide to create this cloudformation stack +Parameters: + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/parameters-section-structure.html + EksClusterName: + Type: String + Default: jmte + EcrRepositoryName: + Type: String + Default: jmte/user-env + IamUserName: + Type: String + Default: ci + IamRoleNameEcr: + Type: String + Default: ci-ecr + IamRoleNameEks: + Type: String + Default: ci-eks + + +# The resources we want to be created as part of this cloudformation stack +Resources: + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-iam-user.html + IamUser: + Type: AWS::IAM::User + Properties: + UserName: !Ref IamUserName + + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-iam-accesskey.html + IamAccessKey: + Type: AWS::IAM::AccessKey + Properties: + UserName: !Ref IamUser + + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-iam-role.html + IamRoleEcr: + Type: AWS::IAM::Role + Properties: + RoleName: !Ref IamRoleNameEcr + Policies: + - PolicyName: EcrAccess + PolicyDocument: + Version: 2012-10-17 + Statement: + # I have failed restricting this further... + - Effect: Allow + Action: + - ecr:* + Resource: "*" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Sid: AllowRoleToBeAssumedByOurUser + Effect: Allow + Principal: + AWS: !Join + - '' + - - 'arn:aws:iam::' + - !Ref AWS::AccountId + - :user/ + - !Ref IamUser + Action: + - sts:AssumeRole + IamRoleEks: + Type: AWS::IAM::Role + Properties: + RoleName: !Ref IamRoleNameEks + Policies: + - PolicyName: EksAccess + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - eks:DescribeCluster + Resource: !Join + - '' + - - 'arn:aws:eks:' + - !Ref AWS::Region + - ':' + - !Ref AWS::AccountId + - ':cluster/' + - !Ref EksClusterName + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Sid: AllowRoleToBeAssumedByOurUser + Effect: Allow + Principal: + AWS: !Join + - '' + - - 'arn:aws:iam::' + - !Ref AWS::AccountId + - :user/ + - !Ref IamUser + Action: + - sts:AssumeRole + + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-efs-filesystem.html + # + EfsFileSystem: + Type: AWS::EFS::FileSystem + Properties: + BackupPolicy: + Status: ENABLED + Encrypted: true + EfsMountTarget0: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: !GetAtt EfsFileSystem.FileSystemId + SecurityGroups: + - {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup"}} + SubnetId: { "Fn::Select": [0, { "Fn::Split": [",", {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic"}}]}] } + EfsMountTarget1: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: !GetAtt EfsFileSystem.FileSystemId + SecurityGroups: + - {"Fn::ImportValue": {"Fn::Sub" : "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup"}} + SubnetId: { "Fn::Select": [1, { "Fn::Split": [",", {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic"}}]}] } + EfsMountTarget2: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: !GetAtt EfsFileSystem.FileSystemId + SecurityGroups: + - {"Fn::ImportValue": {"Fn::Sub" : "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup"}} + SubnetId: { "Fn::Select": [2, { "Fn::Split": [",", {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic"}}]}] } + + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-s3-bucket.html + # + S3Bucket: + Type: AWS::S3::Bucket + Properties: + AccessControl: Private + BucketName: !Sub ${EksClusterName}-scratch + + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecr-repository.html + # + EcrRepository: + Type: AWS::ECR::Repository + Properties: + RepositoryName: !Ref EcrRepositoryName + RepositoryPolicyText: + Version: 2008-10-17 + Statement: + - Sid: Allow pull for who are authenticated with our account + Effect: Allow + Principal: + AWS: !Ref AWS::AccountId + Action: + - ecr:GetDownloadUrlForLayer + - ecr:BatchGetImage + + # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-kms-key.html + # + KmsKey: + Type: AWS::KMS::Key + Properties: + Description: Enables mozilla/sops to encrypt/decrypt secrets just in time. + KeyPolicy: + Version: 2012-10-17 + Statement: + - Sid: Enable Root IAM User Permissions + Effect: Allow + Principal: + AWS: !Join + - '' + - - 'arn:aws:iam::' + - !Ref AWS::AccountId + - :root + Action: 'kms:*' + Resource: '*' + - Sid: Enable User Permissions + Effect: Allow + Principal: + AWS: !Join + - '' + - - 'arn:aws:iam::' + - !Ref AWS::AccountId + - :user/ + - !Ref IamUser + Action: + - "kms:DescribeKey" + - "kms:Encrypt" + - "kms:Decrypt" + - "kms:ReEncrypt*" + - "kms:GenerateDataKey" + - "kms:GenerateDataKeyWithoutPlaintext" + Resource: '*' + + +# The relevant information from the created resources. +Outputs: + # A Role to control the k8s cluster + IamRoleEksArn: + Value: !GetAtt IamRoleEks.Arn + Description: The role with permission to work against k8s. + + # A role to control the docker registry + IamRoleEcrArn: + Value: !GetAtt IamRoleEcr.Arn + Description: | + The Role with permission to push to our image registry. + EcrRepository: + Value: !Join + - '' + - - !Ref AWS::AccountId + - .dkr.ecr. + - !Ref AWS::Region + - .amazonaws.com/ + - !Ref EcrRepositoryName + Description: The image repository for the user environment image. + + S3Bucket: + Value: !Ref S3Bucket + Description: An S3 private scratch bucket that all users share read/write permission to. + + EfsFileSystemId: + Value: !GetAtt EfsFileSystem.FileSystemId + + # The KMS system is not in use currently! Instead we use the 2i2c centralized + # Google KMS keychain instead to have one less account to manage. + KmsKeyArn: + Value: !GetAtt KmsKey.Arn + Description: Use this to set creation_rules[0].kms in .sops.yaml + + AwsAccessKeyId: + Value: !Ref IamAccessKey + Description: Use this to set AWS_ACCESS_KEY_ID as a GitHub project secret + AwsSecretAccessKey: + Value: !GetAtt IamAccessKey.SecretAccessKey + Description: Use this to set AWS_SECRET_ACCESS_KEY as a GitHub project secret diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml new file mode 100644 index 0000000000..787c9e9fd7 --- /dev/null +++ b/eksctl/eksctl-cluster-config.yaml @@ -0,0 +1,274 @@ +# This eksctl configuration file represents the cluster and node groups for use +# by the cluster. +# ref: https://eksctl.io/usage/schema/ +# +# Cluster operations: +# ref: https://eksctl.io/usage/cluster-upgrade/ +# +# create: eksctl create cluster --config-file=eksctl-cluster-config.yaml --set-kubeconfig-context +# upgrade: eksctl upgrade cluster --config-file=eksctl-cluster-config.yaml +# delete: eksctl delete cluster --config-file=eksctl-cluster-config.yaml +# +# Node group operations: +# ref: https://eksctl.io/usage/managing-nodegroups/ +# +# eksctl get nodegroups --cluster jmte +# +# eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --approve +# eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" +# eksctl delete nodegroup --cluster jmte --name core-a +# eksctl create nodegroup --cluster jmte --name core-a +# +# eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --approve && eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" +# +# Attribution: this was based on @yuvipanda's work in 2i2c! <3 +# ref: https://github.com/2i2c-org/pangeo-hubs/blob/8e552bc198d8339efe8c003cb847849255e8f8ed/aws/eksctl-config.yaml +# + + + +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig +metadata: + name: jmte + # region: + # The region was chosen to to us-west-2 (Oregon) to be close to a CMIP-6 + # dataset. + # + region: us-west-2 + version: "1.19" + tags: + 2i2c.org/project: jmte + +# availabilityZones: +# For the EKS control plane, arbitrary chosen but made explicit to ensure we +# can locate the node pool on an AZ where the EKS control plane exist as +# required. +# +availabilityZones: [us-west-2d, us-west-2b, us-west-2a] + + + +# This section will create additional k8s ServiceAccount's that are coupled with +# AWS Role's. By declaring pods to use them, you can grant these pods the +# associated permissions. For this deployment, we create a k8s ServiceAccount +# with Full S3 credentials which we then also declare user pods and dask worker +# pods will make use of. +# +iam: + withOIDC: true # https://eksctl.io/usage/security/#withoidc + # serviceAccounts like nodeGroups etc can be managed directly with eksctl, for + # more information, see: https://eksctl.io/usage/iamserviceaccounts/ + # + # eksctl create iamserviceaccount --config-file=eksctl-cluster-config.yaml + # + serviceAccounts: + - metadata: + name: s3-full-access + namespace: prod + labels: + aws-usage: application + attachPolicyARNs: + - arn:aws:iam::aws:policy/AmazonS3FullAccess + - metadata: + name: s3-full-access + namespace: staging + labels: + aws-usage: application + attachPolicyARNs: + - arn:aws:iam::aws:policy/AmazonS3FullAccess + + + +# Choose the type of node group? +# - nodeGroups cannot be updated but must be recreated on changes: +# https://eksctl.io/usage/managing-nodegroups/#nodegroup-immutability +# - managedNodeGroups cannot scale to zero: +# https://github.com/aws/containers-roadmap/issues/724 +# +# Choosing instance type? +# - Maximum pods: https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt +# - Node specs: https://aws.amazon.com/ec2/instance-types/ +# - Cost: https://ec2pricing.net/ +# +# Management advice: +# - Always use a suffix for node group names that you can replace with something +# to create a new node group and delete the old. You will run into issues if +# you name it "core" and "core-a" instead of "core-a" and "core-b", such as +# when deleting "core" you end up draining both node groups. +# +nodeGroups: + - name: core-a + availabilityZones: [us-west-2d] # aws ec2 describe-availability-zones --region + instanceType: m5.large # 28 pods, 2 cpu, 8 GB + minSize: 0 + maxSize: 2 + desiredCapacity: 1 + volumeSize: 80 + labels: + hub.jupyter.org/node-purpose: core + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: core + iam: + withAddonPolicies: + autoScaler: true + efs: true + + # 57 pods, 4 cpu, 16 GB (Intel, 10 GBits network) + - name: user-a-4 + availabilityZones: &user-availabilityZones [us-west-2d] + instanceType: &user-instanceType m5.xlarge + minSize: &user-minSize 0 + maxSize: &user-maxSize 4 + desiredCapacity: &user-desiredCapacity 0 + volumeSize: &user-volumeSize 80 + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-cpu: "4" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "4" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + iam: &user-iam + withAddonPolicies: + autoScaler: true + efs: true + + # 233 pods, 16 cpu, 64 GB (Intel, 10 GBits network) + - name: user-a-16 + availabilityZones: *user-availabilityZones + instanceType: m5.4xlarge + minSize: *user-minSize + maxSize: *user-maxSize + desiredCapacity: *user-desiredCapacity + volumeSize: *user-volumeSize + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-cpu: "16" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "16" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + iam: *user-iam + + # 736 pods, 64 cpu, 256 GB (Intel, 20 GBits network) + - name: user-a-64 + availabilityZones: *user-availabilityZones + instanceType: m5.16xlarge + minSize: *user-minSize + maxSize: *user-maxSize + desiredCapacity: *user-desiredCapacity + volumeSize: *user-volumeSize + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-cpu: "64" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "64" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + iam: *user-iam + + + + # Worker node pools using cheaper spot instances that are temporary. + # + # References: + # - About spotAllocationStrategy: https://aws.amazon.com/blogs/compute/introducing-the-capacity-optimized-allocation-strategy-for-amazon-ec2-spot-instances/ + # - About instancesDistribution: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-autoscaling-autoscalinggroup-instancesdistribution.html + # + # Note: instance types with different capacity (CPU/Memory) must have + # different node pools for the cluster autoscaler to work properly. + # + # "Due to the Cluster Autoscaler’s limitations (more on that in the next + # section) on which Instance type to expand, it’s important to choose + # instances of the same size (vCPU and memory) for each InstanceGroup." + # + # ref: https://medium.com/riskified-technology/run-kubernetes-on-aws-ec2-spot-instances-with-zero-downtime-f7327a95dea + # + # Note: use of YAML merge below (<<) would be great, but it is not supported + # and was just part of YAML 1.1 but not 1.0 or 1.2. + # + - name: worker-a-4 + availabilityZones: &worker-availabilityZones [us-west-2d, us-west-2b, us-west-2a] + minSize: &worker-minSize 0 + maxSize: &worker-maxSize 8 + desiredCapacity: &worker-desiredCapacity 0 + volumeSize: &worker-volumeSize 80 + labels: + k8s.dask.org/node-purpose: worker + 2i2c.org/node-cpu: "4" + taints: + k8s.dask.org_dedicated: worker:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.dask.org/node-purpose: worker + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "4" + k8s.io/cluster-autoscaler/node-template/taint/k8s.dask.org_dedicated: worker:NoSchedule + iam: &worker-iam + withAddonPolicies: + autoScaler: true + efs: true + # Spot instance specific configuration + instancesDistribution: + instanceTypes: + - m5a.xlarge # 57 pods, 4 cpu, 16 GB (AMD, 10 GBits network, 100% cost) + - m5.xlarge # 57 pods, 4 cpu, 16 GB (Intel, 10 GBits network, ~112% cost) + # - m5n.xlarge # 57 pods, 4 cpu, 16 GB (Intel, 25 GBits network, ~139% cost) + onDemandBaseCapacity: &worker-onDemandBaseCapacity 0 + onDemandPercentageAboveBaseCapacity: &worker-onDemandPercentageAboveBaseCapacity 0 + spotAllocationStrategy: &worker-spotAllocationStrategy capacity-optimized + + - name: worker-a-16 + availabilityZones: *worker-availabilityZones + minSize: *worker-minSize + maxSize: *worker-maxSize + desiredCapacity: *worker-desiredCapacity + volumeSize: *worker-volumeSize + labels: + k8s.dask.org/node-purpose: worker + 2i2c.org/node-cpu: "16" + taints: + k8s.dask.org_dedicated: worker:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.dask.org/node-purpose: worker + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "16" + k8s.io/cluster-autoscaler/node-template/taint/k8s.dask.org_dedicated: worker:NoSchedule + iam: *worker-iam + instancesDistribution: + instanceTypes: + - m5a.4xlarge # 233 pods, 16 cpu, 64 GB (AMD, 10 GBits network, 100% cost) + - m5.4xlarge # 233 pods, 16 cpu, 64 GB (Intel, 10 GBits network, ~112% cost) + # - m5n.4xlarge # 233 pods, 16 cpu, 64 GB (Intel, 25 GBits network, ~139% cost) + onDemandBaseCapacity: *worker-onDemandBaseCapacity + onDemandPercentageAboveBaseCapacity: *worker-onDemandPercentageAboveBaseCapacity + spotAllocationStrategy: *worker-spotAllocationStrategy + + - name: worker-a-64 + availabilityZones: *worker-availabilityZones + minSize: *worker-minSize + maxSize: *worker-maxSize + desiredCapacity: *worker-desiredCapacity + volumeSize: *worker-volumeSize + labels: + k8s.dask.org/node-purpose: worker + 2i2c.org/node-cpu: "64" + taints: + k8s.dask.org_dedicated: worker:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.dask.org/node-purpose: worker + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "64" + k8s.io/cluster-autoscaler/node-template/taint/k8s.dask.org_dedicated: worker:NoSchedule + iam: *worker-iam + instancesDistribution: + instanceTypes: + - m5a.16xlarge # 736 pods, 64 cpu, 256 GB (AMD, 12 GBits network, 100% cost) + - m5.16xlarge # 736 pods, 64 cpu, 256 GB (Intel, 20 GBits network, ~112% cost) + # - m5n.16xlarge # 736 pods, 64 cpu, 256 GB (Intel, 75 GBits network, ~139% cost) + onDemandBaseCapacity: *worker-onDemandBaseCapacity + onDemandPercentageAboveBaseCapacity: *worker-onDemandPercentageAboveBaseCapacity + spotAllocationStrategy: *worker-spotAllocationStrategy From 5dca036e289c6c45fb96bf6d9798a3d48da20037 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 2 Jun 2021 01:15:52 +0200 Subject: [PATCH 04/43] jmte: add volume hack to chown /shared folder --- shared/deployer/jmte.cluster.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 36efe2bc81..cf06a31ecd 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -86,13 +86,16 @@ hubs: # Need to explicitly fix ownership here, since EFS doesn't do anonuid - name: volume-mount-ownership-fix image: busybox - command: ["sh", "-c", "id && chown 1000:1000 /home/jovyan && ls -lhd /home/jovyan"] + command: ["sh", "-c", "id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan"] securityContext: runAsUser: 0 volumeMounts: - name: home mountPath: /home/jovyan subPath: "{username}" + - name: home + mountPath: /home/jovyan/shared + subPath: _shared image: name: pangeo/pangeo-notebook From 9f912fba49a0f4f49ee672048b6631c5f79990b6 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 2 Jun 2021 02:10:14 +0200 Subject: [PATCH 05/43] jmte: add choldgraf --- shared/deployer/jmte.cluster.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index cf06a31ecd..01524e5d1d 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -155,6 +155,7 @@ hubs: - abbyazari # Abby Azari - andersy005 # Anderson Banihirwe - consideratio # Erik Sundell + - choldgraf # Chris Holdgraf - elliesch # Ellie Abrahams - EMscience # Edom Moges - espg # Shane Grigsby From f68460a988c1a318d335937ac833d2d567fbb50f Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 2 Jun 2021 02:10:32 +0200 Subject: [PATCH 06/43] jmte: opt out of default tolerations --- shared/deployer/jmte.cluster.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 01524e5d1d..c6d7863e90 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -197,6 +197,7 @@ hubs: extraPodConfig: nodeSelector: hub.jupyter.org/node-purpose: user + k8s.dask.org/node-purpose: null serviceAccountName: *user-sa worker: extraPodConfig: From 10fee3873c170e6221a00635dbca8c77b2d46e7f Mon Sep 17 00:00:00 2001 From: Fernando Perez Date: Wed, 2 Jun 2021 13:36:19 -0700 Subject: [PATCH 07/43] jmte: add @jonathan-taylor as allowed user --- shared/deployer/jmte.cluster.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index c6d7863e90..1b5d49736c 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -167,6 +167,7 @@ hubs: - tsnow03 # Tasha Snow - whyjz # Whyjay Zheng - yuvipanda # Yuvi Panda + - jonathan-taylor # Jonathan Taylor admin_users: *users allowNamedServers: true networkPolicy: From 51e3815e1286285cf62ffd0f6311ed1012cd0893 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sat, 10 Jul 2021 23:01:03 +0200 Subject: [PATCH 08/43] jmte: set default profile list option and add descriptions --- shared/deployer/jmte.cluster.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 1b5d49736c..cdb8310ac8 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -103,29 +103,35 @@ hubs: profileList: - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB" + default: True + description: "A shared machine, the recommended option until you experience a limitation." kubespawner_override: cpu_guarantee: 0.225 mem_guarantee: 0.875G mem_limit: null node_selector: { 2i2c.org/node-cpu: "4" } - display_name: "4th of Medium: 1-4 CPU, 4-16 GB" + description: "A shared machine." kubespawner_override: cpu_guarantee: 0.875 mem_guarantee: 3.5G mem_limit: null node_selector: { 2i2c.org/node-cpu: "4" } - display_name: "Medium: 4 CPU, 16 GB" + description: "A dedicated machine for you." kubespawner_override: cpu_guarantee: 3.5 mem_guarantee: 14G mem_limit: null node_selector: { 2i2c.org/node-cpu: "4" } - display_name: "Large: 16 CPU, 64 GB" + description: "A dedicated machine for you." kubespawner_override: mem_guarantee: 56G mem_limit: null node_selector: { 2i2c.org/node-cpu: "16" } - display_name: "Massive: 64 CPU, 256 GB" + description: "A dedicated machine for you." kubespawner_override: mem_guarantee: 224G mem_limit: null From ea69e42b900070dbfd2473e44f8356a4e63b707d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 18 Jul 2021 22:03:56 +0200 Subject: [PATCH 09/43] jmte: add jupyterhub-ssh --- helm-charts/daskhub/Chart.yaml | 3 ++ helm-charts/daskhub/values.schema.yaml | 6 +++ shared/deployer/jmte.cluster.yaml | 75 +++++++++++++++++++++++++- 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/helm-charts/daskhub/Chart.yaml b/helm-charts/daskhub/Chart.yaml index fd104fbdbc..e3ab544756 100644 --- a/helm-charts/daskhub/Chart.yaml +++ b/helm-charts/daskhub/Chart.yaml @@ -13,3 +13,6 @@ dependencies: - name: dask-gateway version: "2022.10.0" repository: "https://helm.dask.org/" + - name: jupyterhub-ssh + version: 0.0.1-n114.h3c48a9f + repository: https://yuvipanda.github.io/jupyterhub-ssh/ diff --git a/helm-charts/daskhub/values.schema.yaml b/helm-charts/daskhub/values.schema.yaml index ccf3cd201d..228289adc1 100644 --- a/helm-charts/daskhub/values.schema.yaml +++ b/helm-charts/daskhub/values.schema.yaml @@ -27,6 +27,12 @@ properties: dask-gateway: type: object additionalProperties: true + # jupyterhub-ssh is a dependent helm chart, we rely on its schema validation + # for values passed to it and are not imposing restrictions on them in this + # helm chart. + jupyterhub-ssh: + type: object + additionalProperties: true global: type: object additionalProperties: true diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index cdb8310ac8..8fb271e32e 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -146,14 +146,75 @@ hubs: # Along with this, we also make use of the autohttps system that # requires us to configure an letsencrypt email. # - service: - type: LoadBalancer https: enabled: true type: letsencrypt letsencrypt: contactEmail: erik@sundellopensource.se + service: + # jupyterhub-ssh/sftp integration part 1/3: + # + # We must accept traffic to the k8s Service (proxy-public) receiving traffic + # from the internet. Port 22 is typically used for both SSH and SFTP, but we + # can't use the same port for both so we use 2222 for SFTP in this example. + # + extraPorts: + - name: ssh + port: 22 + targetPort: ssh + - name: sftp + port: 2222 + targetPort: sftp + traefik: + # jupyterhub-ssh/sftp integration part 2/3: + # + # We must accept traffic arriving to the autohttps pod (traefik) from the + # proxy-public service. Expose a port and update the NetworkPolicy + # to tolerate incoming (ingress) traffic on the exposed port. + # + extraPorts: + - name: ssh + containerPort: 8022 + - name: sftp + containerPort: 2222 + networkPolicy: + allowedIngressPorts: [http, https, ssh, sftp] + # jupyterhub-ssh/sftp integration part 3/3: + # + # We must let traefik know it should listen for traffic (traefik entrypoint) + # and route it (traefik router) onwards to the jupyterhub-ssh k8s Service + # (traefik service). + # + extraStaticConfig: + entryPoints: + ssh-entrypoint: + address: :8022 + sftp-entrypoint: + address: :2222 + extraDynamicConfig: + tcp: + services: + ssh-service: + loadBalancer: + servers: + - address: jupyterhub-ssh:22 + sftp-service: + loadBalancer: + servers: + - address: jupyterhub-sftp:22 + routers: + ssh-router: + entrypoints: [ssh-entrypoint] + rule: HostSNI(`*`) + service: ssh-service + sftp-router: + entrypoints: [sftp-entrypoint] + rule: HostSNI(`*`) + service: sftp-service + + + hub: config: Authenticator: @@ -220,3 +281,13 @@ hubs: # per Dask cluster limits. c.ClusterConfig.cluster_max_cores = 256 c.ClusterConfig.cluster_max_memory = "1028G" + + + jupyterhub-ssh: + hubUrl: http://proxy-http:8000 + + ssh: + enabled: true + + sftp: + enabled: false From 291d12d4aa2d7ba93612558778c804cfe7149956 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 11 Aug 2021 23:58:38 +0200 Subject: [PATCH 10/43] jmte: enable jupyterhub-sftp --- helm-charts/daskhub/Chart.yaml | 2 +- shared/deployer/jmte.cluster.yaml | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/helm-charts/daskhub/Chart.yaml b/helm-charts/daskhub/Chart.yaml index e3ab544756..df30291825 100644 --- a/helm-charts/daskhub/Chart.yaml +++ b/helm-charts/daskhub/Chart.yaml @@ -14,5 +14,5 @@ dependencies: version: "2022.10.0" repository: "https://helm.dask.org/" - name: jupyterhub-ssh - version: 0.0.1-n114.h3c48a9f + version: 0.0.1-n142.h402a3d6 repository: https://yuvipanda.github.io/jupyterhub-ssh/ diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 8fb271e32e..8e52330f6c 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -153,6 +153,9 @@ hubs: contactEmail: erik@sundellopensource.se service: + # Revert an unwanted basehub default + type: LoadBalancer + # jupyterhub-ssh/sftp integration part 1/3: # # We must accept traffic to the k8s Service (proxy-public) receiving traffic @@ -282,7 +285,9 @@ hubs: c.ClusterConfig.cluster_max_cores = 256 c.ClusterConfig.cluster_max_memory = "1028G" - + # jupyterhub-ssh values.yaml reference: + # https://github.com/yuvipanda/jupyterhub-ssh/blob/main/helm-chart/jupyterhub-ssh/values.yaml + # jupyterhub-ssh: hubUrl: http://proxy-http:8000 @@ -290,4 +295,7 @@ hubs: enabled: true sftp: - enabled: false + enabled: true + pvc: + enabled: true + name: home-nfs From b19b6ac91c6ffd6a747462fe3d8dbe82b523a569 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 3 Sep 2021 04:06:15 +0200 Subject: [PATCH 11/43] jmte: add GPUs --- eksctl/eksctl-cluster-config.yaml | 81 +++++++++++++++++++++++++++++++ shared/deployer/jmte.cluster.yaml | 24 +++++++++ 2 files changed, 105 insertions(+) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 787c9e9fd7..04a4ca00a8 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -90,6 +90,9 @@ iam: # - Maximum pods: https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt # - Node specs: https://aws.amazon.com/ec2/instance-types/ # - Cost: https://ec2pricing.net/ +# - Instance availability in zone: +# - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-discovery.html +# - aws ec2 describe-instance-type-offerings --location-type "availability-zone" --filters Name=location,Values=us-west-2d --region us-west-2 | grep g4dn # # Management advice: # - Always use a suffix for node group names that you can replace with something @@ -97,6 +100,14 @@ iam: # you name it "core" and "core-a" instead of "core-a" and "core-b", such as # when deleting "core" you end up draining both node groups. # +# Common gotcha: +# - AWS quotas may stop you from scaling up. The symptoms for this will be that +# you observe that a scale up request has been made by the cluster-autoscaler +# but no new node ever comes online. If that happens, you should visit +# https://.console.aws.amazon.com/ec2autoscaling/home, click +# on the auto scaling group (ASG), then go to the activity tab and verify that +# you have run into a quota issue. Following that, you make a request to AWS using provided link: https://aws.amazon.com/contact-us/ec2-request +# nodeGroups: - name: core-a availabilityZones: [us-west-2d] # aws ec2 describe-availability-zones --region @@ -174,6 +185,76 @@ nodeGroups: k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule iam: *user-iam + # GPU Nodes. + # + # g4dn was chosen based on input from Shane in this comment + # https://github.com/pangeo-data/jupyter-earth/issues/77#issuecomment-910864707. + # + # For reference of the available choices, see + # https://aws.amazon.com/ec2/instance-types/#Accelerated_Computing. + # + # For reference on the GPU device plugin that needs to be installed, but is + # installed automatically by eksctl, see: + # https://eksctl.io/usage/gpu-support/#gpu-support + # + # The machine nodes AMI (what is installed when it starts) for GPU nodes may + # require you to subscribe to the AMI and accept some license. For more info, + # see: + # https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/deep-learning-containers-eks-setup.html#deep-learning-containers-eks-setup-licensing + # + # Note that we opted for us-west-2b here because g4dn machines were not + # available in us-west-2d. + # + # 57 pods, 4 cpu, 16 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU + - name: user-gpu-a-4 + availabilityZones: &user-gpu-availabilityZones [us-west-2b] + instanceType: g4dn.xlarge + minSize: *user-minSize + maxSize: *user-maxSize + desiredCapacity: *user-desiredCapacity + volumeSize: *user-volumeSize + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-cpu: "4" + 2i2c.org/node-gpu: "1" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "4" + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + k8s.io/cluster-autoscaler/node-template/taint/nvidia.com/gpu: NoSchedule + iam: &user-iam + withAddonPolicies: + autoScaler: true + efs: true + + # 233 pods, 16 cpu, 64 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU + - name: user-gpu-a-16 + availabilityZones: *user-gpu-availabilityZones + instanceType: g4dn.4xlarge + minSize: *user-minSize + maxSize: *user-maxSize + desiredCapacity: *user-desiredCapacity + volumeSize: *user-volumeSize + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-cpu: "16" + 2i2c.org/node-gpu: "1" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "16" + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + k8s.io/cluster-autoscaler/node-template/taint/nvidia.com/gpu: NoSchedule + iam: &user-iam + withAddonPolicies: + autoScaler: true + efs: true + # Worker node pools using cheaper spot instances that are temporary. diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 8e52330f6c..69ecd09591 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -110,6 +110,7 @@ hubs: mem_guarantee: 0.875G mem_limit: null node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} - display_name: "4th of Medium: 1-4 CPU, 4-16 GB" description: "A shared machine." kubespawner_override: @@ -117,6 +118,7 @@ hubs: mem_guarantee: 3.5G mem_limit: null node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} - display_name: "Medium: 4 CPU, 16 GB" description: "A dedicated machine for you." kubespawner_override: @@ -124,18 +126,38 @@ hubs: mem_guarantee: 14G mem_limit: null node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} - display_name: "Large: 16 CPU, 64 GB" description: "A dedicated machine for you." kubespawner_override: mem_guarantee: 56G mem_limit: null node_selector: { 2i2c.org/node-cpu: "16" } + extra_resource_limits: {} - display_name: "Massive: 64 CPU, 256 GB" description: "A dedicated machine for you." kubespawner_override: mem_guarantee: 224G mem_limit: null node_selector: { 2i2c.org/node-cpu: "64" } + extra_resource_limits: {} + - display_name: "Medium GPU: 4 CPU, 16 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + cpu_guarantee: 3.5 + mem_guarantee: 14G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4", 2i2c.org/node-gpu: "1" } + extra_resource_limits: + nvidia.com/gpu: "1" + - display_name: "Large GPU: 16 CPU, 64 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + mem_guarantee: 56G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "16", 2i2c.org/node-gpu: "1" } + extra_resource_limits: + nvidia.com/gpu: "1" proxy: # proxy notes: @@ -155,6 +177,8 @@ hubs: service: # Revert an unwanted basehub default type: LoadBalancer + annotations: + service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600" # jupyterhub-ssh/sftp integration part 1/3: # From 28f30068e8eb55374225a934fe1f288baf4e4077 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 1 Oct 2021 16:36:11 +0200 Subject: [PATCH 12/43] jmte: increase start timeout to handle edge cases --- shared/deployer/jmte.cluster.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 69ecd09591..df34a6e4be 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -70,6 +70,11 @@ hubs: - --LabApp.collaborative=True - --ServerApp.allow_remote_access=True + # Increased as we have experienced a too slow image pull at least + # once. Our pods can take ~6-7 minutes to start on a new node it + # seems, so this gives us some margin. + startTimeout: 900 + extraEnv: # SCRATCH_BUCKET / PANGEO_SCRATCH are environment variables that # help users write notebooks and such referencing this environment From b388c0489c25a4d176c0975589500d2137762e07 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 1 Oct 2021 16:36:34 +0200 Subject: [PATCH 13/43] jmte: increase user and worker node's disk volumes --- eksctl/eksctl-cluster-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 04a4ca00a8..f2b111c09f 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -115,7 +115,7 @@ nodeGroups: minSize: 0 maxSize: 2 desiredCapacity: 1 - volumeSize: 80 + volumeSize: 250 labels: hub.jupyter.org/node-purpose: core tags: @@ -132,7 +132,7 @@ nodeGroups: minSize: &user-minSize 0 maxSize: &user-maxSize 4 desiredCapacity: &user-desiredCapacity 0 - volumeSize: &user-volumeSize 80 + volumeSize: &user-volumeSize 500 labels: hub.jupyter.org/node-purpose: user 2i2c.org/node-cpu: "4" @@ -280,7 +280,7 @@ nodeGroups: minSize: &worker-minSize 0 maxSize: &worker-maxSize 8 desiredCapacity: &worker-desiredCapacity 0 - volumeSize: &worker-volumeSize 80 + volumeSize: &worker-volumeSize 500 labels: k8s.dask.org/node-purpose: worker 2i2c.org/node-cpu: "4" From cd3e206e6d3765fdf0340629846b71482dfe8e9e Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 14 Oct 2021 04:30:22 +0200 Subject: [PATCH 14/43] jmte: configure x1.16xlarge nodes --- eksctl/eksctl-cluster-config.yaml | 43 +++++++++++++++++++++++++++++++ shared/deployer/jmte.cluster.yaml | 7 +++++ 2 files changed, 50 insertions(+) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index f2b111c09f..9b74aec009 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -185,6 +185,49 @@ nodeGroups: k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule iam: *user-iam + # High memory nodes. + # + # The local SSD storage available on these high memory nodes is not exposed by + # default in some easy way but is rather quite tricky to make use of in k8s. + # To make that happen, one needs to have a daemonset installed to prepare the + # nodes that has local storage to make it exposed. + # + # A discussion on how this is done is made in + # https://github.com/pangeo-data/jupyter-earth/issues/88. + # + # To figure out what availability zones we could use, I used the command below + # and took the union of that output with the zones of the EKS control plane + # configured in the root level of this config. I'm not sure if I could use + # nodes in other availability zones. + # + # aws ec2 describe-instance-type-offerings \ + # --region us-west-2 \ + # --filter Name=instance-type,Values=x1.16xlarge \ + # --location-type=availability-zone + # + # 233 pods, 64 cpu, 976 GB, 1,920 GB local SSD storage, (Intel, 10 GBits + # network) + - name: user-highmem-a-64 + availabilityZones: &user-highmem-availabilityZones [us-west-2b, us-west-2a] + instanceType: x1.16xlarge + minSize: *user-minSize + maxSize: *user-maxSize + desiredCapacity: *user-desiredCapacity + volumeSize: *user-volumeSize + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-highmem-cpu: "64" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-highmem-cpu: "64" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + iam: &user-iam + withAddonPolicies: + autoScaler: true + efs: true + # GPU Nodes. # # g4dn was chosen based on input from Shane in this comment diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index df34a6e4be..313da55c7a 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -146,6 +146,13 @@ hubs: mem_limit: null node_selector: { 2i2c.org/node-cpu: "64" } extra_resource_limits: {} + - display_name: "Massive high-memory: 64 CPU, 976 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 900G + mem_limit: null + node_selector: { 2i2c.org/node-highmem-cpu: "64" } + extra_resource_limits: {} - display_name: "Medium GPU: 4 CPU, 16 GB, 1 T4 Tensor Core GPU" description: "A dedicated machine for you with one GPU attached." kubespawner_override: From 8d7ac1f13a6525b7e89dced7697db2cf76afcafd Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 28 Oct 2021 19:51:36 +0200 Subject: [PATCH 15/43] jmte: tweak configuration related to GPU --- debug-pod.yaml | 93 +++++++++++++++++++++++++++++++ eksctl/eksctl-cluster-config.yaml | 37 ++++++++++-- shared/deployer/jmte.cluster.yaml | 26 +++++++++ 3 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 debug-pod.yaml diff --git a/debug-pod.yaml b/debug-pod.yaml new file mode 100644 index 0000000000..b456001eaa --- /dev/null +++ b/debug-pod.yaml @@ -0,0 +1,93 @@ +apiVersion: v1 +kind: Pod +metadata: + annotations: + hub.jupyter.org/username: fperez + labels: + app: jupyterhub + chart: jupyterhub-1.1.1 + component: singleuser-server + heritage: jupyterhub + hub.jupyter.org/network-access-hub: "true" + hub.jupyter.org/network-access-proxy-http: "true" + hub.jupyter.org/servername: "" + hub.jupyter.org/username: fperez + release: prod + name: jupyter-fperez-debugging + namespace: prod +spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: hub.jupyter.org/node-purpose + operator: In + values: + - user + weight: 100 + containers: + - args: + - jupyterhub-singleuser + - --ip=0.0.0.0 + - --port=8888 + - --SingleUserNotebookApp.default_url=/lab + image: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env:c6d9558 + name: notebook + ports: + - containerPort: 8888 + name: notebook-port + protocol: TCP + resources: + requests: + cpu: 225m + memory: "939524096" + initContainers: + - command: + - sh + - -c + - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan & sleep infinity + image: busybox + imagePullPolicy: Always + name: volume-mount-ownership-fix + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /home/jovyan + name: home + subPath: fperez + - mountPath: /home/jovyan/shared + name: home + subPath: _shared + nodeSelector: + 2i2c.org/node-cpu: "4" + priority: 0 + priorityClassName: prod-default-priority + restartPolicy: OnFailure + schedulerName: prod-user-scheduler + securityContext: + fsGroup: 100 + serviceAccount: s3-full-access + serviceAccountName: s3-full-access + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + key: hub.jupyter.org/dedicated + operator: Equal + value: user + - effect: NoSchedule + key: hub.jupyter.org_dedicated + operator: Equal + value: user + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + volumes: + - name: home + persistentVolumeClaim: + claimName: home-nfs \ No newline at end of file diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 9b74aec009..b97ddee8be 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -15,9 +15,10 @@ # eksctl get nodegroups --cluster jmte # # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --approve -# eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" -# eksctl delete nodegroup --cluster jmte --name core-a -# eksctl create nodegroup --cluster jmte --name core-a +# eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --install-nvidia-plugin=false +# eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-gpu-a-*" --install-nvidia-plugin=false +# eksctl delete nodegroup --cluster jmte --name core-a --approve +# eksctl create nodegroup --cluster jmte --name core-a --install-nvidia-plugin=false # # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --approve && eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" # @@ -238,7 +239,31 @@ nodeGroups: # # For reference on the GPU device plugin that needs to be installed, but is # installed automatically by eksctl, see: - # https://eksctl.io/usage/gpu-support/#gpu-support + # https://eksctl.io/usage/gpu-support/#gpu-support. With that said, the + # daemonset must still have a toleration set manually on it. + # + # Do a `kubectl edit ds -n kube-system nvidia-device-plugin-daemonset` and add + # the following entries under tolerations: + # + # - effect: NoSchedule + # key: hub.jupyter.org/dedicated + # operator: Equal + # value: user + # - effect: NoSchedule + # key: hub.jupyter.org_dedicated + # operator: Equal + # value: user + # + # Sadly, something is making this change reset. I don't know why, but I + # suspect it happens whenever I do something with eksctl - perhaps whenever I + # do something with the nodegroup realted to GPU nodes. I think it resets + # whenever a GPU based nodegroup is created unless + # --install-nvidia-plugin=false it passed to the `eksctl create nodegroup` + # command. + # + # It seems I may need to specify additional tags also, with associated value + # for the GPU of choice: + # https://github.com/kubernetes/autoscaler/blob/e80ab518340f88f364fe3ef063f8303755125971/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go#L40-L47 # # The machine nodes AMI (what is installed when it starts) for GPU nodes may # require you to subscribe to the AMI and accept some license. For more info, @@ -260,9 +285,12 @@ nodeGroups: hub.jupyter.org/node-purpose: user 2i2c.org/node-cpu: "4" 2i2c.org/node-gpu: "1" + k8s.amazonaws.com/accelerator: "nvidia-tesla-t4" taints: hub.jupyter.org_dedicated: user:NoSchedule + nvidia.com/gpu: NoSchedule tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.amazonaws.com/accelerator: "nvidia-tesla-t4" k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "4" k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" @@ -292,7 +320,6 @@ nodeGroups: k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "16" k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule - k8s.io/cluster-autoscaler/node-template/taint/nvidia.com/gpu: NoSchedule iam: &user-iam withAddonPolicies: autoScaler: true diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 313da55c7a..b1609f4e81 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -53,6 +53,16 @@ hubs: url: https://jupytearth.org singleuser: + # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles + # + # Example: + # + # extraFiles: + # bash-extras: + # mountPath: /etc/test.txt + # stringData: | + # hello world! + # Eksctl: The service account was created by eksctl. # serviceAccountName: &user-sa s3-full-access @@ -258,6 +268,9 @@ hubs: config: Authenticator: allowed_users: &users + # This is just listing a few of the users/admins, a lot of + # users has been added manually, see: + # https://github.com/pangeo-data/jupyter-earth/issues/53 - abbyazari # Abby Azari - andersy005 # Anderson Banihirwe - consideratio # Erik Sundell @@ -301,6 +314,19 @@ hubs: nodeSelector: null backend: scheduler: + # IMPORTANT: We have experienced that the scheduler can fail with + # 1GB memory limit. This was observed "stream closed" + # from the python client working against the + # Dask-Gateway created DaskCluster. + # + # CommClosedError: in : Stream is closed + # + cores: + request: 1 + limit: 64 + memory: + request: 2G + limit: 500G extraPodConfig: nodeSelector: hub.jupyter.org/node-purpose: user From 351adc500043b5f465f3b4659575bd125b106d8a Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 3 Sep 2021 04:05:19 +0200 Subject: [PATCH 16/43] jmte: disable JupyterLab collaborative mode, awaiting critical bugfixes --- shared/deployer/jmte.cluster.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index b1609f4e81..4156e4bb2d 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -67,18 +67,18 @@ hubs: # serviceAccountName: &user-sa s3-full-access - # cmd: I've experimented with these settings to get a JupyterLab RTC - # setup functioning. It currently is, but is this what makes - # sense to get it to function? - # - # ref: https://github.com/jupyterlab-contrib/jupyterlab-link-share/issues/10#issuecomment-851899758 - # ref: https://github.com/jupyterlab/jupyterlab/blob/1c8ff104a99e294265e6cf476dcb46279b0c3593/binder/jupyter_notebook_config.py#L39 - # - # Note the default in z2jh is jupyterhub-singleuser. + # cmd: Note the default in z2jh is jupyterhub-singleuser. cmd: - jupyterhub-singleuser - - --LabApp.collaborative=True - - --ServerApp.allow_remote_access=True + # FIXME: Collaborative mode is disabled due to critical issues + # reported in + # https://discourse.jupyter.org/t/plans-on-bringing-rtc-to-jupyterhub/9813/13 + # seem to remain according to Tasha Snow. + # + # These issues may be resolved by + # https://github.com/jupyterlab/jupyterlab/pull/11599. + # + # - --LabApp.collaborative=True # Increased as we have experienced a too slow image pull at least # once. Our pods can take ~6-7 minutes to start on a new node it From f7f749b70a16926d8f0a0ce7837408f18da4f4f2 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 26 Jan 2022 01:44:52 +0100 Subject: [PATCH 17/43] jmte: adjust to basehub values refactoring --- shared/deployer/jmte.cluster.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 4156e4bb2d..95397b83d8 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -13,9 +13,12 @@ hubs: basehub: # Cloudformation: The EFS filesystem was created by cloudformation. # - nfsPVC: + nfs: enabled: true - nfs: + shareCreator: + enabled: true + pv: + serverIP: fs-01707b06.efs.us-west-2.amazonaws.com # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html mountOptions: - rsize=1048576 @@ -24,7 +27,6 @@ hubs: - soft # We pick soft over hard, so NFS lockups don't lead to hung processes - retrans=2 - noresvport - serverIP: fs-01707b06.efs.us-west-2.amazonaws.com # baseShareName is required to be just "/" so that we can create # various sub folders in the filesystem that our PV to access the # NFS server can reference successfully as it isn't supported to From 314d146957e567093597b4b07351bc2bc32a0d05 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 26 Jan 2022 01:45:24 +0100 Subject: [PATCH 18/43] jmte: github-app-auth-user: add related gitconfig and env vars --- shared/deployer/jmte.cluster.yaml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 95397b83d8..0a121ebc33 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -56,14 +56,22 @@ hubs: singleuser: # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles - # - # Example: - # - # extraFiles: - # bash-extras: - # mountPath: /etc/test.txt - # stringData: | - # hello world! + extraFiles: + # github-app-user-auth requires: + # - Installed python package + # - GITHUB_APP_CLIENT_ID environment set + # - This configuration + # + # NOTE: an associated GitHub App has been created by Erik Sundell + # aka. @consideRatio and can be configured by him at: + # https://github.com/settings/apps/hub-jupytearth-org-github-integ + # + github-app-user-auth: + mountPath: /etc/gitconfig + stringData: | + [credential] + helper = store --file=/tmp/github-app-git-credentials + # Eksctl: The service account was created by eksctl. # @@ -98,7 +106,11 @@ hubs: # SCRATCH_BUCKET: s3://jmte-scratch/$(JUPYTERHUB_USER) PANGEO_SCRATCH: s3://jmte-scratch/$(JUPYTERHUB_USER) - + # GITHUB_APP_CLIENT_ID, see notes in singleuser.extraFiles about + # this environment variable. Two entries are created as I think + # the shorter may be deprecated soon. + GITHUB_APP_CLIENT_ID: Iv1.a073b1649637af12 + GITHUB_APP_USER_AUTH_CLIENT_ID: Iv1.a073b1649637af12 initContainers: # Need to explicitly fix ownership here, since EFS doesn't do anonuid - name: volume-mount-ownership-fix From b439dbfe7310bacc084f2c4fca6df10ad32cb694 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 28 Feb 2022 11:04:51 +0100 Subject: [PATCH 19/43] jmte: enable possibility to show hidden files --- shared/deployer/jmte.cluster.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml index 0a121ebc33..af7b541699 100644 --- a/shared/deployer/jmte.cluster.yaml +++ b/shared/deployer/jmte.cluster.yaml @@ -66,12 +66,18 @@ hubs: # aka. @consideRatio and can be configured by him at: # https://github.com/settings/apps/hub-jupytearth-org-github-integ # - github-app-user-auth: + gitconfig: mountPath: /etc/gitconfig stringData: | [credential] helper = store --file=/tmp/github-app-git-credentials - + jupyter_notebook_config.json: + mountPath: /etc/jupyter/jupyter_notebook_config.json + data: + # Allow jupyterlab option to show hidden files in browser + # https://github.com/berkeley-dsep-infra/datahub/issues/3160 + ContentsManager: + allow_hidden: true # Eksctl: The service account was created by eksctl. # From 93d1acd993834ecabdf7fa3ca8dfe8c934ebb970 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 18 Mar 2022 18:08:19 +0100 Subject: [PATCH 20/43] jmte: adjust to deployer script rework --- config/clusters/jmte/cluster.yaml | 19 ++ config/clusters/jmte/common.values.yaml | 318 +++++++++++++++++++ config/clusters/jmte/prod.values.yaml | 90 ++++++ config/clusters/jmte/staging.values.yaml | 29 ++ debug-pod.yaml | 114 +++---- deployer/cluster.py | 2 +- deployer/cluster.schema.yaml | 2 +- eksctl/cloudformation-extras.yaml | 121 +++++-- eksctl/eksctl-cluster-config.yaml | 29 +- shared/deployer/jmte.cluster.yaml | 383 ----------------------- 10 files changed, 620 insertions(+), 487 deletions(-) create mode 100644 config/clusters/jmte/cluster.yaml create mode 100644 config/clusters/jmte/common.values.yaml create mode 100644 config/clusters/jmte/prod.values.yaml create mode 100644 config/clusters/jmte/staging.values.yaml delete mode 100644 shared/deployer/jmte.cluster.yaml diff --git a/config/clusters/jmte/cluster.yaml b/config/clusters/jmte/cluster.yaml new file mode 100644 index 0000000000..2b916d0f5b --- /dev/null +++ b/config/clusters/jmte/cluster.yaml @@ -0,0 +1,19 @@ +name: jmte +provider: none +hubs: + - name: staging + domain: staging.hub.jupytearth.org + helm_chart: daskhub + auth0: + connection: github + helm_chart_values_files: + - common.values.yaml + - staging.values.yaml + - name: prod + domain: hub.jupytearth.org + helm_chart: daskhub + auth0: + connection: github + helm_chart_values_files: + - common.values.yaml + - prod.values.yaml diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml new file mode 100644 index 0000000000..1aa0faf34f --- /dev/null +++ b/config/clusters/jmte/common.values.yaml @@ -0,0 +1,318 @@ +basehub: + # Cloudformation: The EFS filesystem was created by cloudformation. + # + nfs: + # enabled is adjusted by staging/prod values + # enabled: true + shareCreator: + enabled: true + pv: + serverIP: fs-01707b06.efs.us-west-2.amazonaws.com + # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html + mountOptions: + - rsize=1048576 + - wsize=1048576 + - timeo=600 + - soft # We pick soft over hard, so NFS lockups don't lead to hung processes + - retrans=2 + - noresvport + # baseShareName is required to be just "/" so that we can create + # various sub folders in the filesystem that our PV to access the + # NFS server can reference successfully as it isn't supported to + # access a not yet existing folder. This creation is automated by + # the nfs-share-creator resource part of the basehub Helm chart. + baseShareName: / + + jupyterhub: + custom: + homepage: + templateVars: + org: + name: Jupyter meets the Earth + logo_url: https://pangeo-data.github.io/jupyter-earth/_static/jupyter-earth.png + url: https://jupytearth.org + designed_by: + name: 2i2c + url: https://2i2c.org + operated_by: + name: 2i2c + url: https://2i2c.org + funded_by: + name: Jupyter meets the Earth + url: https://jupytearth.org + + scheduling: + userScheduler: + # Revert basehubs default that relies on GKE's built in scheduler that + # is optimized to pack pods into busy nodes. This is a AWS EKS based + # hub without such default scheduler. + enabled: true + + singleuser: + # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles + extraFiles: + # github-app-user-auth requires: + # - Installed python package + # - GITHUB_APP_CLIENT_ID environment set + # - This configuration + # + # NOTE: an associated GitHub App has been created by Erik Sundell + # aka. @consideRatio and can be configured by him at: + # https://github.com/settings/apps/hub-jupytearth-org-github-integ + # + gitconfig: + mountPath: /etc/gitconfig + stringData: | + [credential] + helper = store --file=/tmp/github-app-git-credentials + jupyter_notebook_config.json: + mountPath: /etc/jupyter/jupyter_notebook_config.json + data: + # Allow jupyterlab option to show hidden files in browser + # https://github.com/berkeley-dsep-infra/datahub/issues/3160 + ContentsManager: + allow_hidden: true + + # Eksctl: The service account was created by eksctl. + # + # serviceAccountName is added to prod values + # serviceAccountName: &user-sa s3-full-access + + # Increased as we have experienced a too slow image pull at least + # once. Our pods can take ~6-7 minutes to start on a new node it + # seems, so this gives us some margin. + startTimeout: 900 + + extraEnv: + # GITHUB_APP_CLIENT_ID, see notes in singleuser.extraFiles about + # this environment variable. Two entries are created as I think + # the shorter may be deprecated soon. + GITHUB_APP_CLIENT_ID: Iv1.a073b1649637af12 + GITHUB_APP_USER_AUTH_CLIENT_ID: Iv1.a073b1649637af12 + + image: + # NOTE: We use the jupyterhub-configurator so this image/tag is not + # relevant. Visit its UI to configure the hub. + # + # staging: https://staging.hub.jupytearth.org/services/configurator/ + # prod: https://hub.jupytearth.org/services/configurator/ + pullPolicy: Always + name: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env + tag: "latest" + + profileList: + - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB" + default: True + description: "A shared machine, the recommended option until you experience a limitation." + kubespawner_override: + cpu_guarantee: 0.225 + mem_guarantee: 0.875G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} + - display_name: "4th of Medium: 1-4 CPU, 4-16 GB" + description: "A shared machine." + kubespawner_override: + cpu_guarantee: 0.875 + mem_guarantee: 3.5G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} + - display_name: "Medium: 4 CPU, 16 GB" + description: "A dedicated machine for you." + kubespawner_override: + cpu_guarantee: 3.5 + mem_guarantee: 14G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} + - display_name: "Large: 16 CPU, 64 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 56G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "16" } + extra_resource_limits: {} + - display_name: "Massive: 64 CPU, 256 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 224G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "64" } + extra_resource_limits: {} + - display_name: "Massive high-memory: 64 CPU, 976 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 900G + mem_limit: null + node_selector: { 2i2c.org/node-highmem-cpu: "64" } + extra_resource_limits: {} + - display_name: "Medium GPU: 4 CPU, 16 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + cpu_guarantee: 3.5 + mem_guarantee: 14G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4", 2i2c.org/node-gpu: "1" } + extra_resource_limits: + nvidia.com/gpu: "1" + - display_name: "Large GPU: 16 CPU, 64 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + mem_guarantee: 56G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "16", 2i2c.org/node-gpu: "1" } + extra_resource_limits: + nvidia.com/gpu: "1" + + proxy: + # proxy notes: + # + # - Revert basehubs overrides as we don't install ingress-nginx and + # cert-manager yet, and therefore should use + # service.type=LoadBalancer instead of service.type=ClusterIP. + # Along with this, we also make use of the autohttps system that + # requires us to configure an letsencrypt email. + # + https: + enabled: true + type: letsencrypt + letsencrypt: + contactEmail: erik@sundellopensource.se + + service: + # Revert an unwanted basehub default + type: LoadBalancer + annotations: + service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600" + + # jupyterhub-ssh/sftp integration part 1/3: + # + # We must accept traffic to the k8s Service (proxy-public) receiving traffic + # from the internet. Port 22 is typically used for both SSH and SFTP, but we + # can't use the same port for both so we use 2222 for SFTP in this example. + # + extraPorts: + - name: ssh + port: 22 + targetPort: ssh + - name: sftp + port: 2222 + targetPort: sftp + traefik: + # jupyterhub-ssh/sftp integration part 2/3: + # + # We must accept traffic arriving to the autohttps pod (traefik) from the + # proxy-public service. Expose a port and update the NetworkPolicy + # to tolerate incoming (ingress) traffic on the exposed port. + # + extraPorts: + - name: ssh + containerPort: 8022 + - name: sftp + containerPort: 2222 + networkPolicy: + allowedIngressPorts: [http, https, ssh, sftp] + # jupyterhub-ssh/sftp integration part 3/3: + # + # extraStaticConfig is adjusted by staging/prod values + # extraDynamicConfig is adjusted by staging/prod values + + hub: + config: + Authenticator: + allowed_users: &users + # This is just listing a few of the users/admins, a lot of + # users has been added manually, see: + # https://github.com/pangeo-data/jupyter-earth/issues/53 + - abbyazari # Abby Azari + - andersy005 # Anderson Banihirwe + - consideratio # Erik Sundell + - choldgraf # Chris Holdgraf + - elliesch # Ellie Abrahams + - EMscience # Edom Moges + - espg # Shane Grigsby + - facusapienza21 # Facundo Sapienza + - fperez # Fernando Pérez + - kmpaul # Kevin Paul + - lrennels # Lisa Rennels + - mrsiegfried # Matthew Siegfried + - tsnow03 # Tasha Snow + - whyjz # Whyjay Zheng + - yuvipanda # Yuvi Panda + - jonathan-taylor # Jonathan Taylor + admin_users: *users + allowNamedServers: true + networkPolicy: + # FIXME: Required for dask gateway 0.9.0. It is fixed but a Helm + # chart of newer version is not yet released. + enabled: false + +dask-gateway: + # dask-gateway notes: + # + # - Explicitly unset daskhub's nodeSelectors for all pods except the + # worker pods. The tolerations applied in the basehub config to all + # non-worker pods in dask-gateway will provide a preferred affinity + # towards suitable nodes without needing to have a label on them. Then + # we use the node label "k8s.dask.org/node-purpose: worker" + # specifically for enforce workers to schedule on such nodes. + # + traefik: + nodeSelector: null + controller: + nodeSelector: null + gateway: + nodeSelector: null + backend: + scheduler: + # IMPORTANT: We have experienced that the scheduler can fail with + # 1GB memory limit. This was observed "stream closed" + # from the python client working against the + # Dask-Gateway created DaskCluster. + # + # CommClosedError: in : Stream is closed + # + cores: + request: 1 + limit: 64 + memory: + request: 2G + limit: 500G + extraPodConfig: + nodeSelector: + hub.jupyter.org/node-purpose: user + k8s.dask.org/node-purpose: null + # serviceAccountName is adjusted by staging/prod values + # serviceAccountName: *user-sa + worker: + extraPodConfig: + nodeSelector: + k8s.dask.org/node-purpose: worker + # serviceAccountName is adjusted by staging/prod values + # serviceAccountName: *user-sa + + extraConfig: + idle: | + # timeout after 30 minutes of inactivity + c.KubeClusterConfig.idle_timeout = 1800 + limits: | + # per Dask cluster limits. + c.ClusterConfig.cluster_max_cores = 256 + c.ClusterConfig.cluster_max_memory = "1028G" + +# jupyterhub-ssh values.yaml reference: +# https://github.com/yuvipanda/jupyterhub-ssh/blob/main/helm-chart/jupyterhub-ssh/values.yaml +# +jupyterhub-ssh: + hubUrl: http://proxy-http:8000 + + ssh: + enabled: true + + sftp: + # enabled is adjusted by staging/prod values + # enabled: true + pvc: + enabled: true + name: home-nfs diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml new file mode 100644 index 0000000000..9ff43eca6a --- /dev/null +++ b/config/clusters/jmte/prod.values.yaml @@ -0,0 +1,90 @@ +basehub: + nfs: + enabled: true + + jupyterhub: + singleuser: + # Eksctl: The service account was created by eksctl. + # + serviceAccountName: &user-sa s3-full-access + + extraEnv: + # SCRATCH_BUCKET / PANGEO_SCRATCH are environment variables that + # help users write notebooks and such referencing this environment + # variable in a way that will work between users. + # + # $(ENV_VAR) will by evaluated by k8s automatically + # + # Cloudformation: The s3 bucket was created by cloudformation. + # + SCRATCH_BUCKET: s3://jmte-scratch/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: s3://jmte-scratch/$(JUPYTERHUB_USER) + + initContainers: + # Need to explicitly fix ownership here, since EFS doesn't do anonuid + - name: volume-mount-ownership-fix + image: busybox + command: + [ + "sh", + "-c", + "id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan", + ] + securityContext: + runAsUser: 0 + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + - name: home + mountPath: /home/jovyan/shared + subPath: _shared + + proxy: + traefik: + # jupyterhub-ssh/sftp integration part 3/3: + # + # We must let traefik know it should listen for traffic (traefik entrypoint) + # and route it (traefik router) onwards to the jupyterhub-ssh k8s Service + # (traefik service). + # + extraStaticConfig: + entryPoints: + ssh-entrypoint: + address: :8022 + sftp-entrypoint: + address: :2222 + extraDynamicConfig: + tcp: + services: + ssh-service: + loadBalancer: + servers: + - address: jupyterhub-ssh:22 + sftp-service: + loadBalancer: + servers: + - address: jupyterhub-sftp:22 + routers: + ssh-router: + entrypoints: [ssh-entrypoint] + rule: HostSNI(`*`) + service: ssh-service + sftp-router: + entrypoints: [sftp-entrypoint] + rule: HostSNI(`*`) + service: sftp-service + +dask-gateway: + gateway: + backend: + scheduler: + extraPodConfig: + serviceAccountName: *user-sa + worker: + extraPodConfig: + serviceAccountName: *user-sa + +jupyterhub-ssh: + sftp: + enabled: true diff --git a/config/clusters/jmte/staging.values.yaml b/config/clusters/jmte/staging.values.yaml new file mode 100644 index 0000000000..570bd8ebe2 --- /dev/null +++ b/config/clusters/jmte/staging.values.yaml @@ -0,0 +1,29 @@ +basehub: + nfs: + enabled: false + + jupyterhub: + custom: + singleuserAdmin: + extraVolumeMounts: [] + + singleuser: + storage: + type: none + extraVolumeMounts: [] + + # cmd: Note the default in z2jh is jupyterhub-singleuser. + cmd: + - jupyterhub-singleuser + # WARNING: Collaborative mode is enabled in the staging hub specifically + # to debug a critical issue leading to a loss of data. + # + # ref: https://github.com/jupyterlab/jupyterlab/issues/12154#issuecomment-1069352840 + # ref: https://discourse.jupyter.org/t/plans-on-bringing-rtc-to-jupyterhub/9813/13 + # ref: https://github.com/jupyterlab/jupyterlab/pull/11599 + # + - --LabApp.collaborative=True + +jupyterhub-ssh: + sftp: + enabled: false diff --git a/debug-pod.yaml b/debug-pod.yaml index b456001eaa..53a6d76a7d 100644 --- a/debug-pod.yaml +++ b/debug-pod.yaml @@ -19,46 +19,46 @@ spec: affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - - preference: - matchExpressions: - - key: hub.jupyter.org/node-purpose - operator: In - values: - - user - weight: 100 + - preference: + matchExpressions: + - key: hub.jupyter.org/node-purpose + operator: In + values: + - user + weight: 100 containers: - - args: - - jupyterhub-singleuser - - --ip=0.0.0.0 - - --port=8888 - - --SingleUserNotebookApp.default_url=/lab - image: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env:c6d9558 - name: notebook - ports: - - containerPort: 8888 - name: notebook-port - protocol: TCP - resources: - requests: - cpu: 225m - memory: "939524096" + - args: + - jupyterhub-singleuser + - --ip=0.0.0.0 + - --port=8888 + - --SingleUserNotebookApp.default_url=/lab + image: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env:c6d9558 + name: notebook + ports: + - containerPort: 8888 + name: notebook-port + protocol: TCP + resources: + requests: + cpu: 225m + memory: "939524096" initContainers: - - command: - - sh - - -c - - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan & sleep infinity - image: busybox - imagePullPolicy: Always - name: volume-mount-ownership-fix - securityContext: - runAsUser: 0 - volumeMounts: - - mountPath: /home/jovyan - name: home - subPath: fperez - - mountPath: /home/jovyan/shared - name: home - subPath: _shared + - command: + - sh + - -c + - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan & sleep infinity + image: busybox + imagePullPolicy: Always + name: volume-mount-ownership-fix + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /home/jovyan + name: home + subPath: fperez + - mountPath: /home/jovyan/shared + name: home + subPath: _shared nodeSelector: 2i2c.org/node-cpu: "4" priority: 0 @@ -71,23 +71,23 @@ spec: serviceAccountName: s3-full-access terminationGracePeriodSeconds: 30 tolerations: - - effect: NoSchedule - key: hub.jupyter.org/dedicated - operator: Equal - value: user - - effect: NoSchedule - key: hub.jupyter.org_dedicated - operator: Equal - value: user - - effect: NoExecute - key: node.kubernetes.io/not-ready - operator: Exists - tolerationSeconds: 300 - - effect: NoExecute - key: node.kubernetes.io/unreachable - operator: Exists - tolerationSeconds: 300 + - effect: NoSchedule + key: hub.jupyter.org/dedicated + operator: Equal + value: user + - effect: NoSchedule + key: hub.jupyter.org_dedicated + operator: Equal + value: user + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 volumes: - - name: home - persistentVolumeClaim: - claimName: home-nfs \ No newline at end of file + - name: home + persistentVolumeClaim: + claimName: home-nfs diff --git a/deployer/cluster.py b/deployer/cluster.py index 91de692529..2a451fd2d6 100644 --- a/deployer/cluster.py +++ b/deployer/cluster.py @@ -31,7 +31,7 @@ def auth(self): yield from self.auth_azure() elif self.spec["provider"] == "kubeconfig": yield from self.auth_kubeconfig() - elif self.spec['provider'] == 'none': + elif self.spec["provider"] == "none": yield else: raise ValueError(f'Provider {self.spec["provider"]} not supported') diff --git a/deployer/cluster.schema.yaml b/deployer/cluster.schema.yaml index 19b2f58de2..f14a7f3691 100644 --- a/deployer/cluster.schema.yaml +++ b/deployer/cluster.schema.yaml @@ -27,7 +27,7 @@ properties: Cloud provider this cluster is running on. Used to perform authentication against the cluster. Currently supports gcp, aws, azure, and raw kubeconfig files. - enum: + enum: - none - gcp - kubeconfig diff --git a/eksctl/cloudformation-extras.yaml b/eksctl/cloudformation-extras.yaml index c80f541be8..2bf9fa60ce 100644 --- a/eksctl/cloudformation-extras.yaml +++ b/eksctl/cloudformation-extras.yaml @@ -78,7 +78,6 @@ Parameters: Type: String Default: ci-eks - # The resources we want to be created as part of this cloudformation stack Resources: # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-iam-user.html @@ -115,8 +114,8 @@ Resources: Effect: Allow Principal: AWS: !Join - - '' - - - 'arn:aws:iam::' + - "" + - - "arn:aws:iam::" - !Ref AWS::AccountId - :user/ - !Ref IamUser @@ -135,13 +134,13 @@ Resources: Action: - eks:DescribeCluster Resource: !Join - - '' - - - 'arn:aws:eks:' - - !Ref AWS::Region - - ':' - - !Ref AWS::AccountId - - ':cluster/' - - !Ref EksClusterName + - "" + - - "arn:aws:eks:" + - !Ref AWS::Region + - ":" + - !Ref AWS::AccountId + - ":cluster/" + - !Ref EksClusterName AssumeRolePolicyDocument: Version: 2012-10-17 Statement: @@ -149,8 +148,8 @@ Resources: Effect: Allow Principal: AWS: !Join - - '' - - - 'arn:aws:iam::' + - "" + - - "arn:aws:iam::" - !Ref AWS::AccountId - :user/ - !Ref IamUser @@ -170,22 +169,91 @@ Resources: Properties: FileSystemId: !GetAtt EfsFileSystem.FileSystemId SecurityGroups: - - {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup"}} - SubnetId: { "Fn::Select": [0, { "Fn::Split": [",", {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic"}}]}] } + - { + "Fn::ImportValue": + { + "Fn::Sub": "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup", + }, + } + SubnetId: + { + "Fn::Select": + [ + 0, + { + "Fn::Split": + [ + ",", + { + "Fn::ImportValue": + { + "Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic", + }, + }, + ], + }, + ], + } EfsMountTarget1: Type: AWS::EFS::MountTarget Properties: FileSystemId: !GetAtt EfsFileSystem.FileSystemId SecurityGroups: - - {"Fn::ImportValue": {"Fn::Sub" : "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup"}} - SubnetId: { "Fn::Select": [1, { "Fn::Split": [",", {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic"}}]}] } + - { + "Fn::ImportValue": + { + "Fn::Sub": "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup", + }, + } + SubnetId: + { + "Fn::Select": + [ + 1, + { + "Fn::Split": + [ + ",", + { + "Fn::ImportValue": + { + "Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic", + }, + }, + ], + }, + ], + } EfsMountTarget2: Type: AWS::EFS::MountTarget Properties: FileSystemId: !GetAtt EfsFileSystem.FileSystemId SecurityGroups: - - {"Fn::ImportValue": {"Fn::Sub" : "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup"}} - SubnetId: { "Fn::Select": [2, { "Fn::Split": [",", {"Fn::ImportValue": {"Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic"}}]}] } + - { + "Fn::ImportValue": + { + "Fn::Sub": "eksctl-${EksClusterName}-cluster::SharedNodeSecurityGroup", + }, + } + SubnetId: + { + "Fn::Select": + [ + 2, + { + "Fn::Split": + [ + ",", + { + "Fn::ImportValue": + { + "Fn::Sub": "eksctl-${EksClusterName}-cluster::SubnetsPublic", + }, + }, + ], + }, + ], + } # ref: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-s3-bucket.html # @@ -225,18 +293,18 @@ Resources: Effect: Allow Principal: AWS: !Join - - '' - - - 'arn:aws:iam::' + - "" + - - "arn:aws:iam::" - !Ref AWS::AccountId - :root - Action: 'kms:*' - Resource: '*' + Action: "kms:*" + Resource: "*" - Sid: Enable User Permissions Effect: Allow Principal: AWS: !Join - - '' - - - 'arn:aws:iam::' + - "" + - - "arn:aws:iam::" - !Ref AWS::AccountId - :user/ - !Ref IamUser @@ -247,8 +315,7 @@ Resources: - "kms:ReEncrypt*" - "kms:GenerateDataKey" - "kms:GenerateDataKeyWithoutPlaintext" - Resource: '*' - + Resource: "*" # The relevant information from the created resources. Outputs: @@ -264,7 +331,7 @@ Outputs: The Role with permission to push to our image registry. EcrRepository: Value: !Join - - '' + - "" - - !Ref AWS::AccountId - .dkr.ecr. - !Ref AWS::Region diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index b97ddee8be..2806d04454 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -26,8 +26,6 @@ # ref: https://github.com/2i2c-org/pangeo-hubs/blob/8e552bc198d8339efe8c003cb847849255e8f8ed/aws/eksctl-config.yaml # - - apiVersion: eksctl.io/v1alpha5 kind: ClusterConfig metadata: @@ -48,8 +46,6 @@ metadata: # availabilityZones: [us-west-2d, us-west-2b, us-west-2a] - - # This section will create additional k8s ServiceAccount's that are coupled with # AWS Role's. By declaring pods to use them, you can grant these pods the # associated permissions. For this deployment, we create a k8s ServiceAccount @@ -57,7 +53,7 @@ availabilityZones: [us-west-2d, us-west-2b, us-west-2a] # pods will make use of. # iam: - withOIDC: true # https://eksctl.io/usage/security/#withoidc + withOIDC: true # https://eksctl.io/usage/security/#withoidc # serviceAccounts like nodeGroups etc can be managed directly with eksctl, for # more information, see: https://eksctl.io/usage/iamserviceaccounts/ # @@ -79,8 +75,6 @@ iam: attachPolicyARNs: - arn:aws:iam::aws:policy/AmazonS3FullAccess - - # Choose the type of node group? # - nodeGroups cannot be updated but must be recreated on changes: # https://eksctl.io/usage/managing-nodegroups/#nodegroup-immutability @@ -111,8 +105,8 @@ iam: # nodeGroups: - name: core-a - availabilityZones: [us-west-2d] # aws ec2 describe-availability-zones --region - instanceType: m5.large # 28 pods, 2 cpu, 8 GB + availabilityZones: [us-west-2d] # aws ec2 describe-availability-zones --region + instanceType: m5.large # 28 pods, 2 cpu, 8 GB minSize: 0 maxSize: 2 desiredCapacity: 1 @@ -325,8 +319,6 @@ nodeGroups: autoScaler: true efs: true - - # Worker node pools using cheaper spot instances that are temporary. # # References: @@ -346,7 +338,8 @@ nodeGroups: # and was just part of YAML 1.1 but not 1.0 or 1.2. # - name: worker-a-4 - availabilityZones: &worker-availabilityZones [us-west-2d, us-west-2b, us-west-2a] + availabilityZones: + &worker-availabilityZones [us-west-2d, us-west-2b, us-west-2a] minSize: &worker-minSize 0 maxSize: &worker-maxSize 8 desiredCapacity: &worker-desiredCapacity 0 @@ -367,8 +360,8 @@ nodeGroups: # Spot instance specific configuration instancesDistribution: instanceTypes: - - m5a.xlarge # 57 pods, 4 cpu, 16 GB (AMD, 10 GBits network, 100% cost) - - m5.xlarge # 57 pods, 4 cpu, 16 GB (Intel, 10 GBits network, ~112% cost) + - m5a.xlarge # 57 pods, 4 cpu, 16 GB (AMD, 10 GBits network, 100% cost) + - m5.xlarge # 57 pods, 4 cpu, 16 GB (Intel, 10 GBits network, ~112% cost) # - m5n.xlarge # 57 pods, 4 cpu, 16 GB (Intel, 25 GBits network, ~139% cost) onDemandBaseCapacity: &worker-onDemandBaseCapacity 0 onDemandPercentageAboveBaseCapacity: &worker-onDemandPercentageAboveBaseCapacity 0 @@ -392,8 +385,8 @@ nodeGroups: iam: *worker-iam instancesDistribution: instanceTypes: - - m5a.4xlarge # 233 pods, 16 cpu, 64 GB (AMD, 10 GBits network, 100% cost) - - m5.4xlarge # 233 pods, 16 cpu, 64 GB (Intel, 10 GBits network, ~112% cost) + - m5a.4xlarge # 233 pods, 16 cpu, 64 GB (AMD, 10 GBits network, 100% cost) + - m5.4xlarge # 233 pods, 16 cpu, 64 GB (Intel, 10 GBits network, ~112% cost) # - m5n.4xlarge # 233 pods, 16 cpu, 64 GB (Intel, 25 GBits network, ~139% cost) onDemandBaseCapacity: *worker-onDemandBaseCapacity onDemandPercentageAboveBaseCapacity: *worker-onDemandPercentageAboveBaseCapacity @@ -417,8 +410,8 @@ nodeGroups: iam: *worker-iam instancesDistribution: instanceTypes: - - m5a.16xlarge # 736 pods, 64 cpu, 256 GB (AMD, 12 GBits network, 100% cost) - - m5.16xlarge # 736 pods, 64 cpu, 256 GB (Intel, 20 GBits network, ~112% cost) + - m5a.16xlarge # 736 pods, 64 cpu, 256 GB (AMD, 12 GBits network, 100% cost) + - m5.16xlarge # 736 pods, 64 cpu, 256 GB (Intel, 20 GBits network, ~112% cost) # - m5n.16xlarge # 736 pods, 64 cpu, 256 GB (Intel, 75 GBits network, ~139% cost) onDemandBaseCapacity: *worker-onDemandBaseCapacity onDemandPercentageAboveBaseCapacity: *worker-onDemandPercentageAboveBaseCapacity diff --git a/shared/deployer/jmte.cluster.yaml b/shared/deployer/jmte.cluster.yaml deleted file mode 100644 index af7b541699..0000000000 --- a/shared/deployer/jmte.cluster.yaml +++ /dev/null @@ -1,383 +0,0 @@ -name: jmte -provider: none -# kubeconfig: -# file: secrets/jmte.yaml -hubs: - - name: prod - domain: hub.jupytearth.org - template: daskhub - auth0: - connection: github - config: &config - - basehub: - # Cloudformation: The EFS filesystem was created by cloudformation. - # - nfs: - enabled: true - shareCreator: - enabled: true - pv: - serverIP: fs-01707b06.efs.us-west-2.amazonaws.com - # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html - mountOptions: - - rsize=1048576 - - wsize=1048576 - - timeo=600 - - soft # We pick soft over hard, so NFS lockups don't lead to hung processes - - retrans=2 - - noresvport - # baseShareName is required to be just "/" so that we can create - # various sub folders in the filesystem that our PV to access the - # NFS server can reference successfully as it isn't supported to - # access a not yet existing folder. This creation is automated by - # the nfs-share-creator resource part of the basehub Helm chart. - baseShareName: / - - - - jupyterhub: - custom: - homepage: - templateVars: - org: - name: Jupyter meets the Earth - logo_url: https://pangeo-data.github.io/jupyter-earth/_static/jupyter-earth.png - url: https://jupytearth.org - designed_by: - name: 2i2c - url: https://2i2c.org - operated_by: - name: 2i2c - url: https://2i2c.org - funded_by: - name: Jupyter meets the Earth - url: https://jupytearth.org - - singleuser: - # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles - extraFiles: - # github-app-user-auth requires: - # - Installed python package - # - GITHUB_APP_CLIENT_ID environment set - # - This configuration - # - # NOTE: an associated GitHub App has been created by Erik Sundell - # aka. @consideRatio and can be configured by him at: - # https://github.com/settings/apps/hub-jupytearth-org-github-integ - # - gitconfig: - mountPath: /etc/gitconfig - stringData: | - [credential] - helper = store --file=/tmp/github-app-git-credentials - jupyter_notebook_config.json: - mountPath: /etc/jupyter/jupyter_notebook_config.json - data: - # Allow jupyterlab option to show hidden files in browser - # https://github.com/berkeley-dsep-infra/datahub/issues/3160 - ContentsManager: - allow_hidden: true - - # Eksctl: The service account was created by eksctl. - # - serviceAccountName: &user-sa s3-full-access - - # cmd: Note the default in z2jh is jupyterhub-singleuser. - cmd: - - jupyterhub-singleuser - # FIXME: Collaborative mode is disabled due to critical issues - # reported in - # https://discourse.jupyter.org/t/plans-on-bringing-rtc-to-jupyterhub/9813/13 - # seem to remain according to Tasha Snow. - # - # These issues may be resolved by - # https://github.com/jupyterlab/jupyterlab/pull/11599. - # - # - --LabApp.collaborative=True - - # Increased as we have experienced a too slow image pull at least - # once. Our pods can take ~6-7 minutes to start on a new node it - # seems, so this gives us some margin. - startTimeout: 900 - - extraEnv: - # SCRATCH_BUCKET / PANGEO_SCRATCH are environment variables that - # help users write notebooks and such referencing this environment - # variable in a way that will work between users. - # - # $(ENV_VAR) will by evaluated by k8s automatically - # - # Cloudformation: The s3 bucket was created by cloudformation. - # - SCRATCH_BUCKET: s3://jmte-scratch/$(JUPYTERHUB_USER) - PANGEO_SCRATCH: s3://jmte-scratch/$(JUPYTERHUB_USER) - # GITHUB_APP_CLIENT_ID, see notes in singleuser.extraFiles about - # this environment variable. Two entries are created as I think - # the shorter may be deprecated soon. - GITHUB_APP_CLIENT_ID: Iv1.a073b1649637af12 - GITHUB_APP_USER_AUTH_CLIENT_ID: Iv1.a073b1649637af12 - initContainers: - # Need to explicitly fix ownership here, since EFS doesn't do anonuid - - name: volume-mount-ownership-fix - image: busybox - command: ["sh", "-c", "id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan"] - securityContext: - runAsUser: 0 - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: "{username}" - - name: home - mountPath: /home/jovyan/shared - subPath: _shared - - image: - name: pangeo/pangeo-notebook - tag: "2021.05.15" # https://hub.docker.com/r/pangeo/pangeo-notebook/tags - - profileList: - - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB" - default: True - description: "A shared machine, the recommended option until you experience a limitation." - kubespawner_override: - cpu_guarantee: 0.225 - mem_guarantee: 0.875G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "4" } - extra_resource_limits: {} - - display_name: "4th of Medium: 1-4 CPU, 4-16 GB" - description: "A shared machine." - kubespawner_override: - cpu_guarantee: 0.875 - mem_guarantee: 3.5G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "4" } - extra_resource_limits: {} - - display_name: "Medium: 4 CPU, 16 GB" - description: "A dedicated machine for you." - kubespawner_override: - cpu_guarantee: 3.5 - mem_guarantee: 14G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "4" } - extra_resource_limits: {} - - display_name: "Large: 16 CPU, 64 GB" - description: "A dedicated machine for you." - kubespawner_override: - mem_guarantee: 56G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "16" } - extra_resource_limits: {} - - display_name: "Massive: 64 CPU, 256 GB" - description: "A dedicated machine for you." - kubespawner_override: - mem_guarantee: 224G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "64" } - extra_resource_limits: {} - - display_name: "Massive high-memory: 64 CPU, 976 GB" - description: "A dedicated machine for you." - kubespawner_override: - mem_guarantee: 900G - mem_limit: null - node_selector: { 2i2c.org/node-highmem-cpu: "64" } - extra_resource_limits: {} - - display_name: "Medium GPU: 4 CPU, 16 GB, 1 T4 Tensor Core GPU" - description: "A dedicated machine for you with one GPU attached." - kubespawner_override: - cpu_guarantee: 3.5 - mem_guarantee: 14G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "4", 2i2c.org/node-gpu: "1" } - extra_resource_limits: - nvidia.com/gpu: "1" - - display_name: "Large GPU: 16 CPU, 64 GB, 1 T4 Tensor Core GPU" - description: "A dedicated machine for you with one GPU attached." - kubespawner_override: - mem_guarantee: 56G - mem_limit: null - node_selector: { 2i2c.org/node-cpu: "16", 2i2c.org/node-gpu: "1" } - extra_resource_limits: - nvidia.com/gpu: "1" - - proxy: - # proxy notes: - # - # - Revert basehubs overrides as we don't install ingress-nginx and - # cert-manager yet, and therefore should use - # service.type=LoadBalancer instead of service.type=ClusterIP. - # Along with this, we also make use of the autohttps system that - # requires us to configure an letsencrypt email. - # - https: - enabled: true - type: letsencrypt - letsencrypt: - contactEmail: erik@sundellopensource.se - - service: - # Revert an unwanted basehub default - type: LoadBalancer - annotations: - service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600" - - # jupyterhub-ssh/sftp integration part 1/3: - # - # We must accept traffic to the k8s Service (proxy-public) receiving traffic - # from the internet. Port 22 is typically used for both SSH and SFTP, but we - # can't use the same port for both so we use 2222 for SFTP in this example. - # - extraPorts: - - name: ssh - port: 22 - targetPort: ssh - - name: sftp - port: 2222 - targetPort: sftp - traefik: - # jupyterhub-ssh/sftp integration part 2/3: - # - # We must accept traffic arriving to the autohttps pod (traefik) from the - # proxy-public service. Expose a port and update the NetworkPolicy - # to tolerate incoming (ingress) traffic on the exposed port. - # - extraPorts: - - name: ssh - containerPort: 8022 - - name: sftp - containerPort: 2222 - networkPolicy: - allowedIngressPorts: [http, https, ssh, sftp] - # jupyterhub-ssh/sftp integration part 3/3: - # - # We must let traefik know it should listen for traffic (traefik entrypoint) - # and route it (traefik router) onwards to the jupyterhub-ssh k8s Service - # (traefik service). - # - extraStaticConfig: - entryPoints: - ssh-entrypoint: - address: :8022 - sftp-entrypoint: - address: :2222 - extraDynamicConfig: - tcp: - services: - ssh-service: - loadBalancer: - servers: - - address: jupyterhub-ssh:22 - sftp-service: - loadBalancer: - servers: - - address: jupyterhub-sftp:22 - routers: - ssh-router: - entrypoints: [ssh-entrypoint] - rule: HostSNI(`*`) - service: ssh-service - sftp-router: - entrypoints: [sftp-entrypoint] - rule: HostSNI(`*`) - service: sftp-service - - - - hub: - config: - Authenticator: - allowed_users: &users - # This is just listing a few of the users/admins, a lot of - # users has been added manually, see: - # https://github.com/pangeo-data/jupyter-earth/issues/53 - - abbyazari # Abby Azari - - andersy005 # Anderson Banihirwe - - consideratio # Erik Sundell - - choldgraf # Chris Holdgraf - - elliesch # Ellie Abrahams - - EMscience # Edom Moges - - espg # Shane Grigsby - - facusapienza21 # Facundo Sapienza - - fperez # Fernando Pérez - - kmpaul # Kevin Paul - - lrennels # Lisa Rennels - - mrsiegfried # Matthew Siegfried - - tsnow03 # Tasha Snow - - whyjz # Whyjay Zheng - - yuvipanda # Yuvi Panda - - jonathan-taylor # Jonathan Taylor - admin_users: *users - allowNamedServers: true - networkPolicy: - # FIXME: Required for dask gateway 0.9.0. It is fixed but a Helm - # chart of newer version is not yet released. - enabled: false - - - - dask-gateway: - # dask-gateway notes: - # - # - Explicitly unset daskhub's nodeSelectors for all pods except the - # worker pods. The tolerations applied in the basehub config to all - # non-worker pods in dask-gateway will provide a preferred affinity - # towards suitable nodes without needing to have a label on them. Then - # we use the node label "k8s.dask.org/node-purpose: worker" - # specifically for enforce workers to schedule on such nodes. - # - traefik: - nodeSelector: null - controller: - nodeSelector: null - gateway: - nodeSelector: null - backend: - scheduler: - # IMPORTANT: We have experienced that the scheduler can fail with - # 1GB memory limit. This was observed "stream closed" - # from the python client working against the - # Dask-Gateway created DaskCluster. - # - # CommClosedError: in : Stream is closed - # - cores: - request: 1 - limit: 64 - memory: - request: 2G - limit: 500G - extraPodConfig: - nodeSelector: - hub.jupyter.org/node-purpose: user - k8s.dask.org/node-purpose: null - serviceAccountName: *user-sa - worker: - extraPodConfig: - nodeSelector: - k8s.dask.org/node-purpose: worker - serviceAccountName: *user-sa - - extraConfig: - idle: | - # timeout after 30 minutes of inactivity - c.KubeClusterConfig.idle_timeout = 1800 - limits: | - # per Dask cluster limits. - c.ClusterConfig.cluster_max_cores = 256 - c.ClusterConfig.cluster_max_memory = "1028G" - - # jupyterhub-ssh values.yaml reference: - # https://github.com/yuvipanda/jupyterhub-ssh/blob/main/helm-chart/jupyterhub-ssh/values.yaml - # - jupyterhub-ssh: - hubUrl: http://proxy-http:8000 - - ssh: - enabled: true - - sftp: - enabled: true - pvc: - enabled: true - name: home-nfs From ce1652272e001224da87bb2dfb83fde9a6006c24 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 21 Apr 2022 20:18:58 +0200 Subject: [PATCH 21/43] jmte: add recent changes --- config/clusters/jmte/cluster.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/clusters/jmte/cluster.yaml b/config/clusters/jmte/cluster.yaml index 2b916d0f5b..1489ccd8cd 100644 --- a/config/clusters/jmte/cluster.yaml +++ b/config/clusters/jmte/cluster.yaml @@ -10,10 +10,11 @@ hubs: - common.values.yaml - staging.values.yaml - name: prod + display_name: "Jupyter Meets the Earth" domain: hub.jupytearth.org - helm_chart: daskhub auth0: connection: github + helm_chart: daskhub helm_chart_values_files: - common.values.yaml - prod.values.yaml From ec6046bd9f694a3058261d4ad5e666102cae4d9d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 29 Apr 2022 14:00:27 +0200 Subject: [PATCH 22/43] jmte: k8s 1.19 -> 1.22 upgrade and notes --- eksctl/eksctl-cluster-config.yaml | 66 +++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 2806d04454..24e788b9fc 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -16,6 +16,7 @@ # # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --approve # eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-a-*,worker-a-*" --install-nvidia-plugin=false +# eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-gpu-a-*" --approve # eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "user-gpu-a-*" --install-nvidia-plugin=false # eksctl delete nodegroup --cluster jmte --name core-a --approve # eksctl create nodegroup --cluster jmte --name core-a --install-nvidia-plugin=false @@ -35,7 +36,60 @@ metadata: # dataset. # region: us-west-2 - version: "1.19" + # version: + # The k8s control plane version, to upgrade this, see + # https://eksctl.io/usage/cluster-upgrade/. + # + # For reference, this is the steps I took when upgrading from k8s 1.19 to k8s + # 1.22, April 29th 2022. + # + # 1. Updated the version field in this config from 1.19 to 1.20 + # + # - It is not allowed to upgrade the control plane more than one minor at the time + # + # 2. Upgraded the control plane (takes ~10 minutes) + # + # eksctl upgrade cluster --config-file eksctl-cluster-config.yaml --approve + # + # 2. Deleted all non-core nodegroups + # + # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-*,worker-*" --approve + # + # 3. Updated the version field in this config from 1.20 to 1.22 + # + # - It is allowed to have a nodegroup +-2 minors away from the control plan version + # + # 4. Created a new core nodepool (core-b) + # + # eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "core-b" --install-nvidia-plugin=false + # + # 5. Deleted the old core nodepool (core-a) + # + # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "core-a" --approve + # + # 6. Upgraded add-ons (takes ~3*5s) + # + # eksctl utils update-kube-proxy --cluster=jmte --approve + # eksctl utils update-aws-node --cluster=jmte --approve + # eksctl utils update-coredns --cluster=jmte --approve + # + # 7. Update the version field in this config from 1.22 to 1.21 + # + # 8. Upgraded the control plane, as in step 2. + # + # 9. Upgraded add-ons, as in step 6. + # + # A. Update the version field in this config from 1.21 to 1.22 + # + # B. Upgraded the control plane, as in step 2. + # + # C. Upgraded add-ons, as in step 6. + # + # D. Recreated all nodegroups + # + # eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "*" --install-nvidia-plugin=false + # + version: "1.22" tags: 2i2c.org/project: jmte @@ -104,7 +158,7 @@ iam: # you have run into a quota issue. Following that, you make a request to AWS using provided link: https://aws.amazon.com/contact-us/ec2-request # nodeGroups: - - name: core-a + - name: core-b availabilityZones: [us-west-2d] # aws ec2 describe-availability-zones --region instanceType: m5.large # 28 pods, 2 cpu, 8 GB minSize: 0 @@ -248,12 +302,8 @@ nodeGroups: # operator: Equal # value: user # - # Sadly, something is making this change reset. I don't know why, but I - # suspect it happens whenever I do something with eksctl - perhaps whenever I - # do something with the nodegroup realted to GPU nodes. I think it resets - # whenever a GPU based nodegroup is created unless - # --install-nvidia-plugin=false it passed to the `eksctl create nodegroup` - # command. + # WARNING: If you create any nodegroup without --install-nvidia-plugin=false, + # the daemonset will reset and this change will be lost. # # It seems I may need to specify additional tags also, with associated value # for the GPU of choice: From c7619edf3d19b6a77b14863ee367b4543ef63ae7 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 8 Jun 2022 19:28:50 +0200 Subject: [PATCH 23/43] jmte: adjust dask-gateway config options --- config/clusters/jmte/common.values.yaml | 71 ++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index 1aa0faf34f..505b18e6c5 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -292,14 +292,81 @@ dask-gateway: # serviceAccountName is adjusted by staging/prod values # serviceAccountName: *user-sa + # Note that we are overriding options provided in 2i2c's helm chart that has + # default values for these config entries. + # extraConfig: + # This configuration represents options that can be presented to users + # that want to create a Dask cluster using dask-gateway. For more + # details, see https://gateway.dask.org/cluster-options.html + # + # The goal is to provide a simple configuration that allow the user some + # flexibility while also fitting well well on AWS nodes that are all + # having 1:4 ratio between CPU and GB of memory. By providing the + # username label, we help administrators to track user pods. + option_handler: | + from dask_gateway_server.options import Options, Select, String, Mapping + def cluster_options(user): + def option_handler(options): + if ":" not in options.image: + raise ValueError("When specifying an image you must also provide a tag") + + extra_labels = {} + extra_annotations = { + "prometheus.io/scrape": "true", + "prometheus.io/port": "8787", + } + chosen_worker_cpu = int(options.worker_specification.split("CPU")[0]) + chosen_worker_memory = 4 * chosen_worker_cpu + + # We multiply the requests by a fraction to ensure that the + # worker fit well within a node that need some resources + # reserved for system pods. + return { + # A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable + "image": options.image, + "scheduler_extra_pod_labels": extra_labels, + "scheduler_extra_pod_annotations": extra_annotations, + "worker_extra_pod_labels": extra_labels, + "worker_extra_pod_annotations": extra_annotations, + "worker_cores": 0.85 * chosen_worker_cpu, + "worker_cores_limit": chosen_worker_cpu, + "worker_memory": "%fG" % (0.85 * chosen_worker_memory), + "worker_memory_limit": "%fG" % chosen_worker_memory, + "environment": options.environment, + } + return Options( + Select( + "worker_specification", + [ + "1CPU, 4GB", + "2CPU, 8GB", + "4CPU, 16GB", + "8CPU, 32GB", + "16CPU, 64GB", + "32CPU, 128GB", + "64CPU, 256GB", + ], + default="1CPU, 4GB", + label="Worker specification", + ), + # The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable + String("image", label="Image"), + Mapping("environment", {}, label="Environment variables"), + handler=option_handler, + ) + c.Backend.cluster_options = cluster_options idle: | # timeout after 30 minutes of inactivity c.KubeClusterConfig.idle_timeout = 1800 limits: | # per Dask cluster limits. - c.ClusterConfig.cluster_max_cores = 256 - c.ClusterConfig.cluster_max_memory = "1028G" + # + # Limits removed for JMTE as I think they could hamper Shane Griggsby's + # work with powerful dask clusters. + # + # c.ClusterConfig.cluster_max_cores = 256 + # c.ClusterConfig.cluster_max_memory = "1028G" # jupyterhub-ssh values.yaml reference: # https://github.com/yuvipanda/jupyterhub-ssh/blob/main/helm-chart/jupyterhub-ssh/values.yaml From 93fd326dc05c627548b294eefeed123996e569b8 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 16 Jun 2022 09:42:33 +0200 Subject: [PATCH 24/43] jmte: update to gh-scoped-creds --- config/clusters/jmte/common.values.yaml | 30 +++++++++---------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index 505b18e6c5..c2abb33a43 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -51,20 +51,6 @@ basehub: singleuser: # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles extraFiles: - # github-app-user-auth requires: - # - Installed python package - # - GITHUB_APP_CLIENT_ID environment set - # - This configuration - # - # NOTE: an associated GitHub App has been created by Erik Sundell - # aka. @consideRatio and can be configured by him at: - # https://github.com/settings/apps/hub-jupytearth-org-github-integ - # - gitconfig: - mountPath: /etc/gitconfig - stringData: | - [credential] - helper = store --file=/tmp/github-app-git-credentials jupyter_notebook_config.json: mountPath: /etc/jupyter/jupyter_notebook_config.json data: @@ -84,11 +70,17 @@ basehub: startTimeout: 900 extraEnv: - # GITHUB_APP_CLIENT_ID, see notes in singleuser.extraFiles about - # this environment variable. Two entries are created as I think - # the shorter may be deprecated soon. - GITHUB_APP_CLIENT_ID: Iv1.a073b1649637af12 - GITHUB_APP_USER_AUTH_CLIENT_ID: Iv1.a073b1649637af12 + # github-app-user-auth requires: + # - Installed python package + # - GH_SCOPED_CREDS_APP_URL env var set + # - GITHUB_APP_CLIENT_ID env var set + # + # NOTE: an associated GitHub App has been created by Erik Sundell aka. + # @consideRatio and can be configured by him at: + # https://github.com/settings/apps/hub-jupytearth-org-github-integ + # + GH_SCOPED_CREDS_APP_URL: https://github.com/apps/hub-jupytearth-org-github-integ + GH_SCOPED_CREDS_CLIENT_ID: Iv1.a073b1649637af12 image: # NOTE: We use the jupyterhub-configurator so this image/tag is not From 10122167cb746f0be0f662ecc4fc24396d060bbc Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 16 Jun 2022 10:51:01 +0200 Subject: [PATCH 25/43] jmte: configure 1 small node placeholder pod --- config/clusters/jmte/prod.values.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml index 9ff43eca6a..66a2fce789 100644 --- a/config/clusters/jmte/prod.values.yaml +++ b/config/clusters/jmte/prod.values.yaml @@ -3,6 +3,15 @@ basehub: enabled: true jupyterhub: + scheduling: + userPlaceholder: + enabled: true + replicas: 1 + resources: + requests: + cpu: 2.5 + memory: 14G + singleuser: # Eksctl: The service account was created by eksctl. # From 5b74730869afa0075425641b842d2dc7a3987582 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 16 Jun 2022 11:01:15 +0200 Subject: [PATCH 26/43] jmte: Re-enable the continuous image puller --- config/clusters/jmte/prod.values.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml index 66a2fce789..30fc22bace 100644 --- a/config/clusters/jmte/prod.values.yaml +++ b/config/clusters/jmte/prod.values.yaml @@ -3,6 +3,13 @@ basehub: enabled: true jupyterhub: + # Reverts changes in basehub configuration to the z2jh defaults and ensures + # 1 pod is used as a placeholder pod, sized as the smallest node in the JMTE + # cluster. + # + prePuller: + continuous: + enabled: true scheduling: userPlaceholder: enabled: true From 2c93d9e571e420427a6741ad3cf16e039e4696c4 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 3 Jul 2022 16:52:40 +0200 Subject: [PATCH 27/43] jmte: configure nodeSelector for userPlaceholder pod --- config/clusters/jmte/prod.values.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml index 30fc22bace..baebefde14 100644 --- a/config/clusters/jmte/prod.values.yaml +++ b/config/clusters/jmte/prod.values.yaml @@ -5,7 +5,8 @@ basehub: jupyterhub: # Reverts changes in basehub configuration to the z2jh defaults and ensures # 1 pod is used as a placeholder pod, sized as the smallest node in the JMTE - # cluster. + # cluster. We also update singleuser.nodeSelector to ensure we default to + # have a placeholder for the smallest nodes only. # prePuller: continuous: @@ -20,6 +21,13 @@ basehub: memory: 14G singleuser: + # This default value will be relevant for the userPlaceholder + # configuration, but irrelevant for the defaults we override in our + # profileList configuration. + # + nodeSelector: + 2i2c.org/node-cpu: "4" + # Eksctl: The service account was created by eksctl. # serviceAccountName: &user-sa s3-full-access From 877419ce7c2a6661d06fa5f5f46101f679329945 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 3 Jul 2022 17:17:34 +0200 Subject: [PATCH 28/43] jmte: add env for the RDS db setup for jmte --- config/clusters/jmte/prod.values.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml index baebefde14..d2dae9ad49 100644 --- a/config/clusters/jmte/prod.values.yaml +++ b/config/clusters/jmte/prod.values.yaml @@ -43,6 +43,17 @@ basehub: # SCRATCH_BUCKET: s3://jmte-scratch/$(JUPYTERHUB_USER) PANGEO_SCRATCH: s3://jmte-scratch/$(JUPYTERHUB_USER) + # An Amazon RDS postgresql 14 database server has been setup on a + # machine with 4 cores and 32 GB memory. See + # https://us-west-2.console.aws.amazon.com/rds/home?region=us-west-2#modify-instance:id=jmte-db.https://us-west-2.console.aws.amazon.com/rds/home?region=us-west-2#modify-instance:id=jmte-db + # + # I created a postgresql user and database for use by some like this: + # + # CREATE USER proj WITH ENCRYPTED PASSWORD '***'; + # CREATE DATABASE proj; + # GRANT ALL PRIVILEGES ON DATABASE proj TO proj; + # + JMTE_DB_HOST: jmte-db.cqf1ngjal8bq.us-west-2.rds.amazonaws.com initContainers: # Need to explicitly fix ownership here, since EFS doesn't do anonuid From febbd095b6cdbd2ffd8df54903cc3a794c370593 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sat, 16 Jul 2022 15:45:21 +0200 Subject: [PATCH 29/43] jmte: ensure CUDA drivers propegate to containers --- config/clusters/jmte/common.values.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index c2abb33a43..baa7d097ea 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -81,6 +81,15 @@ basehub: # GH_SCOPED_CREDS_APP_URL: https://github.com/apps/hub-jupytearth-org-github-integ GH_SCOPED_CREDS_CLIENT_ID: Iv1.a073b1649637af12 + # NVIDIA_DRIVER_CAPABILITIES is added based on + # https://github.com/2i2c-org/infrastructure/pull/1314 and + # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities + # that indicate this is needed. + # + # It was added when `nvidia-smi` didn't report a CUDA driver version, + # and no /usr/local/cuda folders were found in the container filesystem. + # + NVIDIA_DRIVER_CAPABILITIES: compute,utility image: # NOTE: We use the jupyterhub-configurator so this image/tag is not From 18f0c825dc0166b4ebb116a55a2752a1f3f7bbab Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sat, 16 Jul 2022 15:45:39 +0200 Subject: [PATCH 30/43] jmte: add debugging pod manifest for node fs --- debug-pod-node-fs.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 debug-pod-node-fs.yml diff --git a/debug-pod-node-fs.yml b/debug-pod-node-fs.yml new file mode 100644 index 0000000000..15d69ce045 --- /dev/null +++ b/debug-pod-node-fs.yml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Pod +metadata: + name: node-fs-inspection +spec: + containers: + - name: node-fs-inspection + image: ubuntu:22.04 + command: ["sh", "-c", "sleep infinity"] + resources: + requests: + cpu: 225m + memory: "939524096" + volumeMounts: + - name: node-root-fs + mountPath: /node-root-fs + terminationGracePeriodSeconds: 1 + nodeSelector: + 2i2c.org/node-gpu: "1" + tolerations: + - effect: NoSchedule + key: hub.jupyter.org/dedicated + operator: Equal + value: user + - effect: NoSchedule + key: hub.jupyter.org_dedicated + operator: Equal + value: user + volumes: + - name: node-root-fs + hostPath: + path: / From b7323f647769cbafa1fbc6500cd7664a74d6308f Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 18 Jul 2022 22:08:12 +0200 Subject: [PATCH 31/43] jmte: add k8s memory based emptyDir volume mount to /dev/shm --- config/clusters/jmte/common.values.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index baa7d097ea..d91be0c245 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -58,6 +58,30 @@ basehub: # https://github.com/berkeley-dsep-infra/datahub/issues/3160 ContentsManager: allow_hidden: true + # /dev/shm is mounted as a filesystem path, where writing to it means to + # write to memory. + # + # How to: https://stackoverflow.com/questions/46085748/define-size-for-dev-shm-on-container-engine/46434614#46434614 + # Request for this by Ellie: https://fperezgroup.slack.com/archives/C020XCEFPEH/p1658168872788389 + # + storage: + extraVolumes: + - name: dev-shm + emptyDir: + medium: Memory + extraVolumeMounts: + - name: dev-shm + mountPath: /dev/shm + # FIXME: we override the list extraVolumeMounts which is also set in + # the the basehub chart, due to that, we need to add this here + # as well. An option is to add hub.extraConfig entries that + # append the kubespawner configuration to include these extra + # volume mounts. + # + - name: home + mountPath: /home/jovyan/shared + subPath: _shared + readOnly: true # Eksctl: The service account was created by eksctl. # From 3e04accffe686bc40eb56ba6e0e78d2d436dd268 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 1 Aug 2022 23:34:40 +0200 Subject: [PATCH 32/43] jmte: increase spawn timeout as eks slowly pulls images --- config/clusters/jmte/common.values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index d91be0c245..efbc4b5443 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -91,7 +91,7 @@ basehub: # Increased as we have experienced a too slow image pull at least # once. Our pods can take ~6-7 minutes to start on a new node it # seems, so this gives us some margin. - startTimeout: 900 + startTimeout: 1200 extraEnv: # github-app-user-auth requires: From 072d093274348a2b73f020b3b73a2c0aafe89954 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 5 Sep 2022 17:30:18 +0200 Subject: [PATCH 33/43] jmte: add another availability zone for highmem instances --- eksctl/eksctl-cluster-config.yaml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 24e788b9fc..98e43cd831 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -317,12 +317,19 @@ nodeGroups: # Note that we opted for us-west-2b here because g4dn machines were not # available in us-west-2d. # + # aws ec2 describe-instance-type-offerings \ + # --region us-west-2 \ + # --filter Name=instance-type,Values=g4dn.xlarge \ + # --location-type=availability-zone + # # 57 pods, 4 cpu, 16 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU - name: user-gpu-a-4 - availabilityZones: &user-gpu-availabilityZones [us-west-2b] + availabilityZones: &user-gpu-availabilityZones [us-west-2a, us-west-2b] instanceType: g4dn.xlarge minSize: *user-minSize - maxSize: *user-maxSize + # maxSize increased to accommodate request by Facu that a workshop is to + # support 8 simultaneous users with GPU servers. + maxSize: 10 desiredCapacity: *user-desiredCapacity volumeSize: *user-volumeSize labels: From 2843ee5f12007aef3655c4d36ce27f32fb4bdc7d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 5 Sep 2022 18:32:17 +0200 Subject: [PATCH 34/43] jmte: add shared-public folder --- config/clusters/jmte/common.values.yaml | 3 +++ config/clusters/jmte/prod.values.yaml | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index efbc4b5443..e0584a74ff 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -82,6 +82,9 @@ basehub: mountPath: /home/jovyan/shared subPath: _shared readOnly: true + - name: home + mountPath: /home/jovyan/shared-public + subPath: _shared_public # Eksctl: The service account was created by eksctl. # diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml index d2dae9ad49..d09ffe8c51 100644 --- a/config/clusters/jmte/prod.values.yaml +++ b/config/clusters/jmte/prod.values.yaml @@ -63,7 +63,7 @@ basehub: [ "sh", "-c", - "id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan", + "id && chown 1000:1000 /home/jovyan /home/jovyan/shared /home/jovyan/shared-public && ls -lhd /home/jovyan", ] securityContext: runAsUser: 0 @@ -74,6 +74,9 @@ basehub: - name: home mountPath: /home/jovyan/shared subPath: _shared + - name: home + mountPath: /home/jovyan/shared-public + subPath: _shared_public proxy: traefik: From 923475f507b54be28b90022ddc0b01e29ef805aa Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 22 Sep 2022 22:49:23 +0200 Subject: [PATCH 35/43] Slim down the debug-pod --- debug-pod.yaml | 54 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/debug-pod.yaml b/debug-pod.yaml index 53a6d76a7d..6fe9c5cc54 100644 --- a/debug-pod.yaml +++ b/debug-pod.yaml @@ -1,19 +1,7 @@ apiVersion: v1 kind: Pod metadata: - annotations: - hub.jupyter.org/username: fperez - labels: - app: jupyterhub - chart: jupyterhub-1.1.1 - component: singleuser-server - heritage: jupyterhub - hub.jupyter.org/network-access-hub: "true" - hub.jupyter.org/network-access-proxy-http: "true" - hub.jupyter.org/servername: "" - hub.jupyter.org/username: fperez - release: prod - name: jupyter-fperez-debugging + name: jupyter-debugging namespace: prod spec: affinity: @@ -27,49 +15,31 @@ spec: - user weight: 100 containers: - - args: - - jupyterhub-singleuser - - --ip=0.0.0.0 - - --port=8888 - - --SingleUserNotebookApp.default_url=/lab - image: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env:c6d9558 - name: notebook - ports: - - containerPort: 8888 - name: notebook-port - protocol: TCP - resources: - requests: - cpu: 225m - memory: "939524096" - initContainers: - - command: - - sh - - -c - - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan & sleep infinity + - name: busybox image: busybox - imagePullPolicy: Always - name: volume-mount-ownership-fix + command: ["sh", "-c", "sleep infinity"] securityContext: runAsUser: 0 volumeMounts: + - mountPath: /nfs + name: home - mountPath: /home/jovyan name: home subPath: fperez - mountPath: /home/jovyan/shared name: home subPath: _shared + - mountPath: /home/jovyan/shared-public + name: home + subPath: _shared_public + resources: + requests: + cpu: 225m + memory: "939524096" nodeSelector: 2i2c.org/node-cpu: "4" - priority: 0 - priorityClassName: prod-default-priority - restartPolicy: OnFailure schedulerName: prod-user-scheduler - securityContext: - fsGroup: 100 - serviceAccount: s3-full-access serviceAccountName: s3-full-access - terminationGracePeriodSeconds: 30 tolerations: - effect: NoSchedule key: hub.jupyter.org/dedicated From 9d1f700d5aac12a3980ab67dec9b9110278e8fb0 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 22 Sep 2022 22:53:25 +0200 Subject: [PATCH 36/43] jmte: add test of latest image --- config/clusters/jmte/common.values.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index e0584a74ff..710069a639 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -192,6 +192,16 @@ basehub: node_selector: { 2i2c.org/node-cpu: "16", 2i2c.org/node-gpu: "1" } extra_resource_limits: nvidia.com/gpu: "1" + - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB - Test of latest image" + description: "Helps us test an image before we make it the default" + kubespawner_override: + image: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env:latest + image_pull_policy: Always + cpu_guarantee: 0.225 + mem_guarantee: 0.875G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "4" } + extra_resource_limits: {} proxy: # proxy notes: From 4291da1c06afc01302219f76373b06f4b0a0667a Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 12 Dec 2022 10:47:30 +0100 Subject: [PATCH 37/43] jmte: add 256GB memory GPU node --- config/clusters/jmte/common.values.yaml | 8 +++++++ eksctl/eksctl-cluster-config.yaml | 28 +++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index 710069a639..fd2f539848 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -192,6 +192,14 @@ basehub: node_selector: { 2i2c.org/node-cpu: "16", 2i2c.org/node-gpu: "1" } extra_resource_limits: nvidia.com/gpu: "1" + - display_name: "Massive GPU: 64 CPU, 256 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + mem_guarantee: 224G + mem_limit: null + node_selector: { 2i2c.org/node-cpu: "64", 2i2c.org/node-gpu: "1" } + extra_resource_limits: + nvidia.com/gpu: "1" - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB - Test of latest image" description: "Helps us test an image before we make it the default" kubespawner_override: diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 98e43cd831..a9d6678b1f 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -322,7 +322,7 @@ nodeGroups: # --filter Name=instance-type,Values=g4dn.xlarge \ # --location-type=availability-zone # - # 57 pods, 4 cpu, 16 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU + # 28 pods, 4 cpu, 16 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU - name: user-gpu-a-4 availabilityZones: &user-gpu-availabilityZones [us-west-2a, us-west-2b] instanceType: g4dn.xlarge @@ -352,7 +352,7 @@ nodeGroups: autoScaler: true efs: true - # 233 pods, 16 cpu, 64 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU + # 28 pods, 16 cpu, 64 GB (Intel, 25 GBits network), 1 T4 Tensor Core GPU - name: user-gpu-a-16 availabilityZones: *user-gpu-availabilityZones instanceType: g4dn.4xlarge @@ -376,6 +376,30 @@ nodeGroups: autoScaler: true efs: true + # 57 pods, 64 cpu, 256 GB (Intel, 50 GBits network), 1 T4 Tensor Core GPU + - name: user-gpu-a-64 + availabilityZones: *user-gpu-availabilityZones + instanceType: g4dn.16xlarge + minSize: *user-minSize + maxSize: *user-maxSize + desiredCapacity: *user-desiredCapacity + volumeSize: *user-volumeSize + labels: + hub.jupyter.org/node-purpose: user + 2i2c.org/node-cpu: "64" + 2i2c.org/node-gpu: "1" + taints: + hub.jupyter.org_dedicated: user:NoSchedule + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "64" + k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + iam: &user-iam + withAddonPolicies: + autoScaler: true + efs: true + # Worker node pools using cheaper spot instances that are temporary. # # References: From 8927e310bc3312758c1e00ab98a34507994a725f Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 18 Dec 2022 15:30:32 +0100 Subject: [PATCH 38/43] jmte: update k8s from 1.22 to 1.24 --- eksctl/eksctl-cluster-config.yaml | 131 ++++++++++++++++++++++++++---- 1 file changed, 116 insertions(+), 15 deletions(-) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index a9d6678b1f..f66e5d1547 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -2,6 +2,10 @@ # by the cluster. # ref: https://eksctl.io/usage/schema/ # +# Get cluster credentials: +# +# eksctl utils write-kubeconfig --cluster=jmte +# # Cluster operations: # ref: https://eksctl.io/usage/cluster-upgrade/ # @@ -51,45 +55,138 @@ metadata: # # eksctl upgrade cluster --config-file eksctl-cluster-config.yaml --approve # - # 2. Deleted all non-core nodegroups + # 3. Deleted all non-core nodegroups # # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "user-*,worker-*" --approve # - # 3. Updated the version field in this config from 1.20 to 1.22 + # 4. Updated the version field in this config from 1.20 to 1.22 # # - It is allowed to have a nodegroup +-2 minors away from the control plan version # - # 4. Created a new core nodepool (core-b) + # 5. Created a new core nodepool (core-b) # # eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "core-b" --install-nvidia-plugin=false # - # 5. Deleted the old core nodepool (core-a) + # 6. Deleted the old core nodepool (core-a) # - # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "core-a" --approve + # eksctl delete nodegroup --config-file=eksctl-cluster-config.yaml --include "core-b" --approve # - # 6. Upgraded add-ons (takes ~3*5s) + # 7. Upgraded add-ons (takes ~3*5s) # # eksctl utils update-kube-proxy --cluster=jmte --approve # eksctl utils update-aws-node --cluster=jmte --approve # eksctl utils update-coredns --cluster=jmte --approve # - # 7. Update the version field in this config from 1.22 to 1.21 + # 8. Update the version field in this config from 1.22 to 1.21 # - # 8. Upgraded the control plane, as in step 2. + # 9. Upgraded the control plane, as in step 2. # - # 9. Upgraded add-ons, as in step 6. + # A. Upgraded add-ons, as in step 7. # - # A. Update the version field in this config from 1.21 to 1.22 + # B. Update the version field in this config from 1.21 to 1.22 # - # B. Upgraded the control plane, as in step 2. + # C. Upgraded the control plane, as in step 2. # - # C. Upgraded add-ons, as in step 6. + # D. Upgraded add-ons, as in step 7. # - # D. Recreated all nodegroups + # E. Recreated all nodegroups # # eksctl create nodegroup --config-file=eksctl-cluster-config.yaml --include "*" --install-nvidia-plugin=false # - version: "1.22" + # For reference, this is the steps I took when upgrading from k8s 1.22 to k8s + # 1.24, Dec 18th 2022. + # + # 1. Performed step 1-7 from above to, but migrated control plane from 1.22 to + # 1.23 and node groups from 1.22 to 1.24. + # + # 2. When performing step 7: + # + # - the aws-node daemonset's pods failed to start because of a too + # restrictive container securityContext not running as root. + # - the kube-proxy deamonset's pods failed to pull the image, it was not + # found. + # + # I patched the aws-node thing now, but went ahead with the upgrade to k8s + # 1.24 in the control plane, hoping another `eksctl utils update-aws-node` + # and `eksctl utils update-kube-proxy` would resolve the issues. + # + # Later I concluded the following: + # + # - aws-node issue: https://github.com/weaveworks/eksctl/issues/6048. + # Resolved by removing `runAsNonRoot: true` and + # `allowPrivilegeEscalation: false`. + # - kube-proxy issue: it went away when upgrading the plugin in 1.24 + # - the cluster-autoscaler failed to start initially, but made it in the + # end when other pods got running. + # + # 3. I upgraded the control plan to 1.24 (step 2 above) and re-upgraded add-ons + # (step 7 above). + # + # 4. I recreated all node groups as in step E above. + # + # 5. My hub pod entered a pending state because + # + # - 1 node(s) had no available volume zone + # - I think this is the issue: + # https://docs.aws.amazon.com/eks/latest/userguide/ebs-csi.html, I + # upgraded from v1.22 to v1.23+ without manually activating the plugin + # mentioned there. + # - Looking at + # https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html + # and running the command below, I conclude it was not active in my + # cluster. + # + # 6. (what I should have done) Getting ebs-csi-driver setup: + # + # What I think should have been done is to: + # + # 1. Ensure a service account was setup via this config: + # https://eksctl.io/usage/schema/#iam-serviceAccounts-wellKnownPolicies-ebsCSIController + # 2. Ensure that the addon was setup via this config: + # https://eksctl.io/usage/schema/#addons-wellKnownPolicies-ebsCSIController + # 3. Ensure that the node pools using ebs storage (core) was configured to use this: + # https://eksctl.io/usage/schema/#nodeGroups-iam-withAddonPolicies-ebs + # + # 6. (what I actually did) Getting ebs-csi-driver setup: + # + # I read the following instructions: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html#adding-ebs-csi-eks-add-on + # + # I did pre-requisites to setup permissions via: https://docs.aws.amazon.com/eks/latest/userguide/csi-iam-role.html + # + # UPDATE: I think this pre-requites step could be done via this config instead: + # https://eksctl.io/usage/schema/#iam-serviceAccounts-wellKnownPolicies-ebsCSIController + # + # eksctl get addon --name aws-ebs-csi-driver --cluster=jmte + # + # eksctl create iamserviceaccount \ + # --name=ebs-csi-controller-sa \ + # --namespace=kube-system \ + # --cluster=jmte \ + # --attach-policy-arn=arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy \ + # --approve \ + # --role-only \ + # --role-name=AmazonEKS_EBS_CSI_DriverRole + # + # I verified I didn't have a EBS driver installed already: + # + # eksctl get addon --name=aws-ebs-csi-driver --cluster=jmte + # + # I added the ebs driver addon: + # + # UPDATE: I think this main step could be done via this config instead: + # https://eksctl.io/usage/schema/#addons-wellKnownPolicies-ebsCSIController + # + # eksctl create addon --name=aws-ebs-csi-driver --cluster=jmte --service-account-role-arn=arn:aws:iam::286354552638:role/AmazonEKS_EBS_CSI_DriverRole --force + # + # The hub pod that mounted a PVC with ebs storage and got "1 node(s) had no + # available volume zone" was suddenly scheduled successfully! + # + # I think maybe we could manage to setup eksctl clusters to directly have + # this plugin via this config. For now, this was done with manual patches + # though. + # + + version: "1.24" tags: 2i2c.org/project: jmte @@ -158,7 +255,7 @@ iam: # you have run into a quota issue. Following that, you make a request to AWS using provided link: https://aws.amazon.com/contact-us/ec2-request # nodeGroups: - - name: core-b + - name: core-a availabilityZones: [us-west-2d] # aws ec2 describe-availability-zones --region instanceType: m5.large # 28 pods, 2 cpu, 8 GB minSize: 0 @@ -172,6 +269,10 @@ nodeGroups: iam: withAddonPolicies: autoScaler: true + # ebs: I'm not sure if this was needed because I added it before adding + # the ebs csi driver which was absolutely needed. Maybe this and + # the driver was needed. + ebs: true efs: true # 57 pods, 4 cpu, 16 GB (Intel, 10 GBits network) From 1217e0a914e46c95b1692c3ceaf23cd64ad25373 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 18 Dec 2022 15:56:51 +0100 Subject: [PATCH 39/43] basehub: add nfs.homeSpaceReporter.enabled --- helm-charts/basehub/templates/home-space-reporter.yaml | 4 +++- helm-charts/basehub/values.schema.yaml | 9 +++++++++ helm-charts/basehub/values.yaml | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/helm-charts/basehub/templates/home-space-reporter.yaml b/helm-charts/basehub/templates/home-space-reporter.yaml index fcac3f453c..f7db2fad70 100644 --- a/helm-charts/basehub/templates/home-space-reporter.yaml +++ b/helm-charts/basehub/templates/home-space-reporter.yaml @@ -1,6 +1,7 @@ # Deploy a prometheus node_exporter with the same home directory # we have for our hub mounted so we can monitor free space usage. -{{- if or .Values.nfs.enabled .Values.azureFile.enabled }} +{{- if .Values.nfs.homeSpaceReporter.enabled -}} +{{- if or .Values.nfs.enabled .Values.azureFile.enabled -}} apiVersion: apps/v1 kind: Deployment metadata: @@ -59,3 +60,4 @@ spec: claimName: home-nfs {{- end }} {{- end }} +{{- end }} diff --git a/helm-charts/basehub/values.schema.yaml b/helm-charts/basehub/values.schema.yaml index 71e708bc09..bf8b01322d 100644 --- a/helm-charts/basehub/values.schema.yaml +++ b/helm-charts/basehub/values.schema.yaml @@ -168,6 +168,7 @@ properties: required: - enabled - shareCreator + - homeSpaceReporter - pv properties: enabled: @@ -186,6 +187,14 @@ properties: items: type: object additionalProperties: true + homeSpaceReporter: + type: object + additionalProperties: false + required: + - enabled + properties: + enabled: + type: boolean pv: type: object additionalProperties: false diff --git a/helm-charts/basehub/values.yaml b/helm-charts/basehub/values.yaml index 71b574e67e..f50fc77e95 100644 --- a/helm-charts/basehub/values.yaml +++ b/helm-charts/basehub/values.yaml @@ -40,6 +40,8 @@ nfs: shareCreator: enabled: true tolerations: [] + homeSpaceReporter: + enabled: true pv: mountOptions: - soft From 5e429dd458dd0e11913875f7ea0b3b700ef8e3fa Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 18 Dec 2022 15:57:04 +0100 Subject: [PATCH 40/43] jmte: disable nfs.homeSpaceReporter.enabled --- config/clusters/jmte/common.values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index fd2f539848..29cd3c8ddc 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -6,6 +6,8 @@ basehub: # enabled: true shareCreator: enabled: true + homeSpaceReporter: + enabled: false pv: serverIP: fs-01707b06.efs.us-west-2.amazonaws.com # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html From 275acbcc436d8378dfca8f6ecbdcea6aa540a8ef Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 18 Dec 2022 17:03:27 +0100 Subject: [PATCH 41/43] jmte: remove old dask-gateway workaround --- config/clusters/jmte/common.values.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/config/clusters/jmte/common.values.yaml b/config/clusters/jmte/common.values.yaml index 29cd3c8ddc..3d5a550910 100644 --- a/config/clusters/jmte/common.values.yaml +++ b/config/clusters/jmte/common.values.yaml @@ -291,10 +291,6 @@ basehub: - jonathan-taylor # Jonathan Taylor admin_users: *users allowNamedServers: true - networkPolicy: - # FIXME: Required for dask gateway 0.9.0. It is fixed but a Helm - # chart of newer version is not yet released. - enabled: false dask-gateway: # dask-gateway notes: From 5b4be24f693f9e7202816258d99f60ab5213c278 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 18 Dec 2022 17:04:24 +0100 Subject: [PATCH 42/43] jmte: add proxy.htts.hosts explicitly as autohttps is used --- config/clusters/jmte/prod.values.yaml | 3 +++ config/clusters/jmte/staging.values.yaml | 5 +++++ eksctl/eksctl-cluster-config.yaml | 11 ++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/config/clusters/jmte/prod.values.yaml b/config/clusters/jmte/prod.values.yaml index d09ffe8c51..fd26f5c9d7 100644 --- a/config/clusters/jmte/prod.values.yaml +++ b/config/clusters/jmte/prod.values.yaml @@ -79,6 +79,9 @@ basehub: subPath: _shared_public proxy: + https: + hosts: + - hub.jupytearth.org traefik: # jupyterhub-ssh/sftp integration part 3/3: # diff --git a/config/clusters/jmte/staging.values.yaml b/config/clusters/jmte/staging.values.yaml index 570bd8ebe2..0317f77053 100644 --- a/config/clusters/jmte/staging.values.yaml +++ b/config/clusters/jmte/staging.values.yaml @@ -24,6 +24,11 @@ basehub: # - --LabApp.collaborative=True + proxy: + https: + hosts: + - staging.hub.jupytearth.org + jupyterhub-ssh: sftp: enabled: false diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index f66e5d1547..82dcd31a11 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -185,7 +185,16 @@ metadata: # this plugin via this config. For now, this was done with manual patches # though. # - + # 7. I realized the ingress -> service coupling didn't work, so + # https://hub.jupytearth.org got stuck. + # + # Resolution attempt failing: eksctl utils update-legacy-subnet-settings --cluster=jmte + # + # Resolution attempt succeeded: I had also upgraded the deployer and ended + # up without getting proxy.https.hosts set following this: + # https://github.com/2i2c-org/infrastructure/pull/1404/commits/ec6f0aee616cb16d8b8e2e99252bb4110716b5d2#diff-eedaf02b81cd907a3feb5e4389e9825226bf7dc82a0fb582f9ad367c00ba6651L37, + # by adding proxy.https.hosts things started working again. + # version: "1.24" tags: 2i2c.org/project: jmte From 27a45923b542615569d5fd758a61ad635e44c2fc Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 27 Jan 2023 00:03:34 +0100 Subject: [PATCH 43/43] jmte: fix GPU nodes labels/taints (16/64 CPU variants was failing) --- eksctl/eksctl-cluster-config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/eksctl/eksctl-cluster-config.yaml b/eksctl/eksctl-cluster-config.yaml index 82dcd31a11..c5e0bbc53f 100644 --- a/eksctl/eksctl-cluster-config.yaml +++ b/eksctl/eksctl-cluster-config.yaml @@ -474,13 +474,17 @@ nodeGroups: hub.jupyter.org/node-purpose: user 2i2c.org/node-cpu: "16" 2i2c.org/node-gpu: "1" + k8s.amazonaws.com/accelerator: "nvidia-tesla-t4" taints: hub.jupyter.org_dedicated: user:NoSchedule + nvidia.com/gpu: NoSchedule tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.amazonaws.com/accelerator: "nvidia-tesla-t4" k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "16" k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + k8s.io/cluster-autoscaler/node-template/taint/nvidia.com/gpu: NoSchedule iam: &user-iam withAddonPolicies: autoScaler: true @@ -498,13 +502,17 @@ nodeGroups: hub.jupyter.org/node-purpose: user 2i2c.org/node-cpu: "64" 2i2c.org/node-gpu: "1" + k8s.amazonaws.com/accelerator: "nvidia-tesla-t4" taints: hub.jupyter.org_dedicated: user:NoSchedule + nvidia.com/gpu: NoSchedule tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.amazonaws.com/accelerator: "nvidia-tesla-t4" k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-cpu: "64" k8s.io/cluster-autoscaler/node-template/label/2i2c.org/node-gpu: "1" k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org_dedicated: user:NoSchedule + k8s.io/cluster-autoscaler/node-template/taint/nvidia.com/gpu: NoSchedule iam: &user-iam withAddonPolicies: autoScaler: true