diff --git a/.github/workflows/publish-helm-chart.yml b/.github/workflows/publish-helm-chart.yml index 8ce0698..516e388 100644 --- a/.github/workflows/publish-helm-chart.yml +++ b/.github/workflows/publish-helm-chart.yml @@ -1,37 +1,26 @@ -name: Release Charts - -on: - push: - branches: - - main - +name: Publish charts +# Run the tasks on every push +on: push jobs: - release: - # depending on default permission settings for your org (contents being read-only or read-write for workloads), you will have to add permissions - # see: https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token - permissions: - contents: write + publish_charts: + name: Build and push Helm charts runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v3 + - name: Check out the repository + uses: actions/checkout@v2 with: + # This is important for the semver action to work correctly + # when determining the number of commits since the last tag fetch-depth: 0 + submodules: true - - name: Configure Git - run: | - git config user.name "$GITHUB_ACTOR" - git config user.email "$GITHUB_ACTOR@users.noreply.github.com" - - - name: Install Helm - uses: azure/setup-helm@v3 - env: - GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + - name: Get SemVer version for current commit + id: semver + uses: stackhpc/github-actions/semver@master - - name: Run chart-releaser - uses: helm/chart-releaser-action@v1.5.0 + - name: Publish Helm charts + uses: stackhpc/github-actions/helm-publish@master with: - charts_dir: . - env: - CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" - + token: ${{ secrets.GITHUB_TOKEN }} + version: ${{ steps.semver.outputs.version }} + app-version: ${{ steps.semver.outputs.short-sha }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ba5327 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Build artifacts from local helm install +slurm-cluster-chart/Chart.lock +slurm-cluster-chart/charts/ diff --git a/README.md b/README.md index f75e3af..c0b7d61 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # Slurm Docker Cluster -This is a multi-container Slurm cluster using Kubernetes. The Helm chart -creates a named volume for persistent storage of MySQL data files as well as -an NFS volume for shared storage. +This is a multi-container Slurm cluster using Kubernetes. The Slurm cluster Helm chart creates a named volume for persistent storage of MySQL data files. By default, it also installs the +RookNFS Helm chart (also in this repo) to provide shared storage across the Slurm cluster nodes. ## Dependencies @@ -27,12 +26,11 @@ The Helm chart will create the following named volumes: * var_lib_mysql ( -> /var/lib/mysql ) -A named ReadWriteMany (RWX) volume mounted to `/home` is also expected, this can be external or can be deployed using the scripts in the `/nfs` directory (See "Deploying the Cluster") +A named ReadWriteMany (RWX) volume mounted to `/home` is also expected, this can be external or can be deployed using the provided `rooknfs` chart directory (See "Deploying the Cluster"). ## Configuring the Cluster -All config files in `slurm-cluster-chart/files` will be mounted into the container to configure their respective services on startup. Note that changes to these files will not all be propagated to existing deployments (see "Reconfiguring the Cluster"). -Additional parameters can be found in the `values.yaml` file, which will be applied on a Helm chart deployment. Note that some of these values will also not propagate until the cluster is restarted (see "Reconfiguring the Cluster"). +All config files in `slurm-cluster-chart/files` will be mounted into the container to configure their respective services on startup. Note that changes to these files will not all be propagated to existing deployments (see "Reconfiguring the Cluster"). Additional parameters can be found in the `values.yaml` file for the Helm chart. Note that some of these values will also not propagate until the cluster is restarted (see "Reconfiguring the Cluster"). ## Deploying the Cluster @@ -40,27 +38,26 @@ Additional parameters can be found in the `values.yaml` file, which will be appl On initial deployment ONLY, run ```console -./generate-secrets.sh +./generate-secrets.sh [] ``` -This generates a set of secrets. If these need to be regenerated, see "Reconfiguring the Cluster" +This generates a set of secrets in the target namespace to be used by the Slurm cluster. If these need to be regenerated, see "Reconfiguring the Cluster" Be sure to take note of the Open Ondemand credentials, you will need them to access the cluster through a browser ### Connecting RWX Volume -A ReadWriteMany (RWX) volume is required, if a named volume exists, set `nfs.claimName` in the `values.yaml` file to its name. If not, manifests to deploy a Rook NFS volume are provided in the `/nfs` directory. You can deploy this by running -```console -./nfs/deploy-nfs.sh -``` -and leaving `nfs.claimName` as the provided value. +A ReadWriteMany (RWX) volume is required for shared storage across cluster nodes. By default, the Rook NFS Helm chart is installed as a dependency of the Slurm cluster chart in order to provide a RWX capable Storage Class for the required shared volume. If the target Kubernetes cluster has an existing storage class which should be used instead, then `storageClass` in `values.yaml` should be set to the name of this existing class and the RookNFS dependency should be disabled by setting `rooknfs.enabled = false`. In either case, the storage capacity of the provisioned RWX volume can be configured by setting the value of `storage.capacity`. + +See the separate RookNFS chart [values.yaml](./rooknfs/values.yaml) for further configuration options when using the RookNFS to provide the shared storage volume. ### Supplying Public Keys To access the cluster via `ssh`, you will need to make your public keys available. All your public keys from localhost can be added by running ```console -./publish-keys.sh +./publish-keys.sh [] ``` +where `` is the namespace in which the Slurm cluster chart will be deployed (i.e. using `helm install -n ...`). This will create a Kubernetes Secret in the appropriate namespace for the Slurm cluster to use. Omitting the namespace arg will install the secrets in the default namespace. ### Deploying with Helm @@ -68,6 +65,12 @@ After configuring `kubectl` with the appropriate `kubeconfig` file, deploy the c ```console helm install slurm-cluster-chart ``` + +NOTE: If using the RookNFS dependency, then the following must be run before installing the Slurm cluster chart +```console +helm dependency update slurm-cluster-chart +``` + Subsequent releases can be deployed using: ```console @@ -130,6 +133,7 @@ srun singularity exec docker://ghcr.io/stackhpc/mpitests-container:${MPI_CONTAIN ``` Note: The mpirun script assumes you are running as user 'rocky'. If you are running as root, you will need to include the --allow-run-as-root argument + ## Reconfiguring the Cluster ### Changes to config files @@ -173,3 +177,5 @@ and then restart the other dependent deployments to propagate changes: ```console kubectl rollout restart deployment slurmd slurmctld login slurmdbd ``` + +# Known Issues diff --git a/generate-secrets.sh b/generate-secrets.sh index e98b97e..a49ede2 100755 --- a/generate-secrets.sh +++ b/generate-secrets.sh @@ -1,35 +1,39 @@ #!/bin/bash +NAMESPACE="$1" +if [[ -z $1 ]]; then + NAMESPACE=default +fi -kubectl create secret generic database-auth-secret \ +kubectl -n $NAMESPACE create secret generic database-auth-secret \ --dry-run=client \ --from-literal=password=$(tr -dc 'A-Za-z0-9' /dev/null | base64 -w 0) \ -o yaml | \ -kubectl apply -f - +kubectl -n $NAMESPACE apply -f - mkdir -p ./temphostkeys/etc/ssh ssh-keygen -A -f ./temphostkeys -kubectl create secret generic host-keys-secret \ +kubectl -n $NAMESPACE create secret generic host-keys-secret \ --dry-run=client \ --from-file=./temphostkeys/etc/ssh \ -o yaml | \ -kubectl apply -f - +kubectl -n $NAMESPACE apply -f - rm -rf ./temphostkeys OOD_PASS=$(tr -dc 'A-Za-z0-9' =0-0" + repository: file://../rooknfs + condition: rooknfs.enabled diff --git a/slurm-cluster-chart/templates/check-jobs-finished-hook.yaml b/slurm-cluster-chart/templates/hooks/check-jobs-finished-hook.yaml similarity index 100% rename from slurm-cluster-chart/templates/check-jobs-finished-hook.yaml rename to slurm-cluster-chart/templates/hooks/check-jobs-finished-hook.yaml diff --git a/slurm-cluster-chart/templates/hooks/pre-delete.yaml b/slurm-cluster-chart/templates/hooks/pre-delete.yaml new file mode 100644 index 0000000..868cbbd --- /dev/null +++ b/slurm-cluster-chart/templates/hooks/pre-delete.yaml @@ -0,0 +1,55 @@ +{{- if .Values.rooknfs.enabled }} +# NOTE: The cleanup jobs defined here are required to ensure that things which +# Rook NFS is responsible for cleaning up are deleted before deleting the Rook +# pods which do the actual clean up of NFS resources. For example, the RWM PVC +# must be deleted before the Rook StorageClass and provisioner pod. However, +# the PVC cannot be deleted until the pods which are using it are deleted, so +# the various Slurm node pods must actually be the first resources deleted. +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: slurm-k8s-cleanup +--- +# TODO: Create a job-specific ClusterRole for the ServiceAccount +# instead of using the cluster-admin role here +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: slurm-k8s-cleanup +subjects: +- kind: ServiceAccount + name: slurm-k8s-cleanup + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: cluster-admin +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: slurm-k8s-pre-delete-cleanup + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": hook-succeeded + "helm.sh/hook-weight": "1" +spec: + template: + metadata: + name: slurm-k8s-pre-delete-cleanup + spec: + serviceAccountName: slurm-k8s-cleanup + containers: + - name: tester + image: bitnami/kubectl + command: + - "bin/bash" + - "-c" + - | + kubectl delete -n {{ .Release.Namespace }} deployment {{ .Values.login.name }} --wait --cascade=foreground + kubectl delete -n {{ .Release.Namespace }} statefulset {{ .Values.slurmctld.name }} --wait --cascade=foreground + kubectl delete -n {{ .Release.Namespace }} statefulset {{ .Values.slurmd.name }} --wait --cascade=foreground + kubectl delete -n {{ .Release.Namespace }} pvc {{ .Values.storage.claimName }} --wait + restartPolicy: Never +--- +{{- end }} diff --git a/slurm-cluster-chart/templates/login-deployment.yaml b/slurm-cluster-chart/templates/login.yaml similarity index 91% rename from slurm-cluster-chart/templates/login-deployment.yaml rename to slurm-cluster-chart/templates/login.yaml index 610811f..65e9983 100644 --- a/slurm-cluster-chart/templates/login-deployment.yaml +++ b/slurm-cluster-chart/templates/login.yaml @@ -5,9 +5,9 @@ metadata: labels: app.kubernetes.io/name: slurm app.kubernetes.io/component: login - name: login + name: {{ .Values.login.name }} spec: - replicas: {{ .Values.replicas.login }} + replicas: {{ .Values.login.replicas }} selector: matchLabels: app.kubernetes.io/name: slurm @@ -37,7 +37,7 @@ spec: - containerPort: 80 - containerPort: 443 volumeMounts: - - mountPath: {{ .Values.nfs.mountPath }} + - mountPath: {{ .Values.storage.mountPath }} name: slurm-jobdir - mountPath: /etc/slurm/ name: slurm-config-volume @@ -65,12 +65,12 @@ spec: hostname: login dnsConfig: searches: - - slurmd.default.svc.cluster.local + - slurmd.{{ .Release.Namespace }}.svc.cluster.local restartPolicy: Always volumes: - name: slurm-jobdir persistentVolumeClaim: - claimName: {{ .Values.nfs.claimName }} + claimName: {{ .Values.storage.claimName }} - name: slurm-config-volume configMap: name: {{ .Values.configmaps.slurmConf }} diff --git a/slurm-cluster-chart/templates/pvc.yaml b/slurm-cluster-chart/templates/pvc.yaml new file mode 100644 index 0000000..aab0856 --- /dev/null +++ b/slurm-cluster-chart/templates/pvc.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Values.storage.claimName }} +spec: + storageClassName: {{ .Values.storage.storageClassName }} + accessModes: + - ReadWriteMany + resources: + requests: + storage: {{ .Values.storage.capacity }} \ No newline at end of file diff --git a/slurm-cluster-chart/templates/slurmctld-statefulset.yaml b/slurm-cluster-chart/templates/slurmctld.yaml similarity index 87% rename from slurm-cluster-chart/templates/slurmctld-statefulset.yaml rename to slurm-cluster-chart/templates/slurmctld.yaml index dc0bf90..1644463 100644 --- a/slurm-cluster-chart/templates/slurmctld-statefulset.yaml +++ b/slurm-cluster-chart/templates/slurmctld.yaml @@ -5,7 +5,7 @@ metadata: labels: app.kubernetes.io/name: slurm app.kubernetes.io/component: slurmctld - name: slurmctld + name: {{ .Values.slurmctld.name }} spec: replicas: 1 selector: @@ -29,7 +29,7 @@ spec: - containerPort: 6817 resources: {} volumeMounts: - - mountPath: {{ .Values.nfs.mountPath }} + - mountPath: {{ .Values.storage.mountPath }} name: slurm-jobdir - mountPath: /etc/slurm/ name: slurm-config-volume @@ -40,12 +40,12 @@ spec: name: slurmctld-state dnsConfig: searches: - - slurmd.default.svc.cluster.local + - slurmd.{{ .Release.Namespace }}.svc.cluster.local restartPolicy: Always volumes: - name: slurm-jobdir persistentVolumeClaim: - claimName: {{ .Values.nfs.claimName }} + claimName: {{ .Values.storage.claimName }} - name: slurmctld-state persistentVolumeClaim: claimName: var-spool-slurmctld diff --git a/slurm-cluster-chart/templates/slurmd-deployment.yaml b/slurm-cluster-chart/templates/slurmd.yaml similarity index 86% rename from slurm-cluster-chart/templates/slurmd-deployment.yaml rename to slurm-cluster-chart/templates/slurmd.yaml index 4c2396e..62646b7 100644 --- a/slurm-cluster-chart/templates/slurmd-deployment.yaml +++ b/slurm-cluster-chart/templates/slurmd.yaml @@ -5,9 +5,9 @@ metadata: labels: app.kubernetes.io/name: slurm app.kubernetes.io/component: slurmd - name: slurmd + name: {{ .Values.slurmd.name }} spec: - replicas: {{ .Values.replicas.slurmd }} + replicas: {{ .Values.slurmd.replicas }} selector: matchLabels: app.kubernetes.io/name: slurm @@ -41,7 +41,7 @@ spec: volumeMounts: - mountPath: /etc/slurm/ name: slurm-config-volume - - mountPath: {{ .Values.nfs.mountPath }} + - mountPath: {{ .Values.storage.mountPath }} name: slurm-jobdir - mountPath: /tmp/munge.key name: munge-key-secret @@ -50,12 +50,12 @@ spec: privileged: true dnsConfig: searches: - - slurmd.default.svc.cluster.local + - slurmd.{{ .Release.Namespace }}.svc.cluster.local restartPolicy: Always volumes: - name: slurm-jobdir persistentVolumeClaim: - claimName: {{ .Values.nfs.claimName }} + claimName: {{ .Values.storage.claimName }} - name: slurm-config-volume configMap: name: {{ .Values.configmaps.slurmConf }} diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index 56a5e38..b20a2b3 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -1,12 +1,57 @@ slurmImage: ghcr.io/stackhpc/slurm-docker-cluster:7c0e2d9 -replicas: - slurmd: 2 - login: 1 +login: + # Deployment resource name + name: login + replicas: 1 -nfs: +slurmd: + # StatefulSet resource name + name: slurmd # NB this must match NodeName= in slurm-cluster-chart/files/slurm.conf + replicas: 2 + +slurmctld: + # StatefulSet resource name + name: slurmctld + # NOTE: We don't include a replicas field here because + # replicas > 1 for slurmctld needs extra Slurm config + +storage: mountPath: /home - claimName: rook-nfs-pv-claim + # The name of a Read-Write-Many StorageClass to use for + # the persistent volume which is shared across Slurm nodes + # Note: If using the default value then you must set + # rooknfs.enabled = true below to ensure that Rook NFS is + # installed on the cluster as a dependency of this Slurm + # chart. If you are using a separate RWM StorageClass, then + # set rooknfs.enabled = false + storageClassName: slurm-rook-nfs + # Name for the R-W-M volume to provision + claimName: slurm-shared-storage + # Capacite of the R-W-M volume + capacity: &capacity 10Gi # NB yaml anchor used so this value is also set for `rooknfs.storageCapacity` if necessary. + + +# Values to be passed to the rook-nfs sub-chart +# See rook-nfs sub-chart for full set of available config values +rooknfs: + enabled: true + # Name given to the RWM StorageClass created by Rook + # NB this must match storage.storageClassName when using Rook + storageClassName: slurm-rook-nfs + # Name for the NFSServer resource created by Rook + serverName: rook-nfs + # Capacity for the backing Read-Write-*Once* volume + # than Rook will create to provide the actual storage to + # the NFS server. Since we're using the Rook NFS in a + # slightly unconventional way here, we just want to anchor + # this value to the requested storage capacity for the RWM + # volume specified in storage.capacity + storageCapacity: *capacity + # Storage class to use for the Read-Write-Once backing PVC + # backingStorageClass: + + sqlImage: mariadb:10.10