From 17399f0b40fd17a914ed126a2307412b9ba04ea4 Mon Sep 17 00:00:00 2001
From: Harsh Thakkar <harshthakkar@google.com>
Date: Wed, 17 Jan 2024 05:07:35 +0000
Subject: [PATCH] Update spack gromac example tutorial and reference to use
 Slurm V6

---
 docs/tutorials/gromacs/spack-gromacs.md   | 67 +++++++++--------------
 docs/tutorials/gromacs/spack-gromacs.yaml | 52 ++++++++++--------
 2 files changed, 54 insertions(+), 65 deletions(-)

diff --git a/docs/tutorials/gromacs/spack-gromacs.md b/docs/tutorials/gromacs/spack-gromacs.md
index c8719aaba7..ce8400e1e5 100644
--- a/docs/tutorials/gromacs/spack-gromacs.md
+++ b/docs/tutorials/gromacs/spack-gromacs.md
@@ -5,7 +5,7 @@ easy for customers to deploy HPC environments on Google Cloud.
 
 In this tutorial you will use the HPC Toolkit to:
 
-* Deploy a [Slurm](https://github.com/SchedMD/slurm-gcp#readme) HPC cluster on
+* Deploy a [Slurm](https://github.com/GoogleCloudPlatform/slurm-gcp#readme) HPC cluster on
   Google Cloud
 * Use [Spack](https://spack.io/) to install the Gromacs application and all of
   its dependencies
@@ -13,10 +13,10 @@ In this tutorial you will use the HPC Toolkit to:
   cluster
 * Tear down the cluster
 
-Estimated time to complete:  
-The tutorial takes 2 hr. to complete,  
-of which 1.5 hr is for installing software  
-(without cache).  
+Estimated time to complete:
+The tutorial takes 2 hr. to complete,
+of which 1.5 hr is for installing software
+(without cache).
 
 > **_NOTE:_** With a complete Spack cache, the tutorial takes 30 min.
 
@@ -75,7 +75,7 @@ which should be open in the Cloud Shell Editor (on the left).
 
 This file describes the cluster you will deploy. It defines:
 
-* the existing default network from your project
+* a vpc network
 * a monitoring dashboard with metrics on your cluster
 * a definition of a custom Spack installation
 * a startup script that
@@ -84,7 +84,6 @@ This file describes the cluster you will deploy. It defines:
   * sets up a Spack environment including downloading an example input deck
   * places a submission script on a shared drive
 * a Slurm cluster
-  * a Slurm login node
   * a Slurm controller
   * An auto-scaling Slurm partition
 
@@ -106,27 +105,13 @@ contains the terraform needed to deploy your cluster.
 
 ## Deploy the Cluster
 
-Use the following commands to run terraform and deploy your cluster.
+Use below command to deploy your cluster.
 
 ```bash
-terraform -chdir=spack-gromacs/primary init
-terraform -chdir=spack-gromacs/primary apply
+./ghpc deploy spack-gromacs
 ```
 
-The `terraform apply` command will generate a _plan_ that describes the Google
-Cloud resources that will be deployed.
-
-You can review the plan and then start the deployment by typing
-**`yes [enter]`**.
-
-The deployment will take about 30 seconds. There should be regular status updates
-in the terminal.
-
-If the `apply` is successful, a message similar to the following will be
-displayed:
-
-<!-- Note: Bash blocks give "copy to cloud shell" option.  -->
-<!-- "shell" or "text" is used in places where command should not be run in cloud shell. -->
+After the deployment is finished, you should see below message.
 
 ```shell
 Apply complete! Resources: xx added, 0 changed, 0 destroyed.
@@ -144,30 +129,30 @@ controller. This command can be used to view progress and check for completion
 of the startup script:
 
 ```bash
-gcloud compute instances get-serial-port-output --port 1 --zone us-central1-c --project <walkthrough-project-id/> slurm-spack-gromacs-controller | grep google_metadata_script_runner
+gcloud compute instances get-serial-port-output --port 1 --zone us-central1-c --project <walkthrough-project-id/> spackgroma-controller | grep google_metadata_script_runner
 ```
 
 When the startup script has finished running you will see the following line as
 the final output from the above command:
-> _`slurm-spack-gromacs-controller google_metadata_script_runner: Finished running startup scripts.`_
+> _`spackgroma-controller google_metadata_script_runner: Finished running startup scripts.`_
 
 Optionally while you wait, you can see your deployed VMs on Google Cloud
 Console. Open the link below in a new window. Look for
-`slurm-spack-gromacs-controller` and `slurm-spack-gromacs-login0`. If you don't
+`spackgroma-controller`. If you don't
 see your VMs make sure you have the correct project selected (top left).
 
 ```text
 https://console.cloud.google.com/compute?project=<walkthrough-project-id/>
 ```
 
-## Connecting to the login node
+## Connecting to the controller node
 
-Once the startup script has completed, connect to the login node.
+Once the startup script has completed, connect to the controller node.
 
-Use the following command to ssh into the login node from cloud shell:
+Use the following command to ssh into the controller node from cloud shell:
 
 ```bash
-gcloud compute ssh slurm-spack-gromacs-login0 --zone us-central1-c --project <walkthrough-project-id/>
+gcloud compute ssh spackgroma-controller --zone us-central1-c --project <walkthrough-project-id/>
 ```
 
 You may be prompted to set up SSH. If so follow the prompts and if asked for a
@@ -191,15 +176,15 @@ following instructions:
    https://console.cloud.google.com/compute?project=<walkthrough-project-id/>
    ```
 
-1. Click on the `SSH` button associated with the `slurm-spack-gromacs-login0`
+1. Click on the `SSH` button associated with the `spackgroma-controller`
    instance.
 
    This will open a separate pop up window with a terminal into our newly
-   created Slurm login VM.
+   created Slurm controller VM.
 
 ## Run a Job on the Cluster
 
-   **The commands below should be run on the Slurm login node.**
+   **The commands below should be run on the Slurm controller node.**
 
 We will use the submission script (see line 122 of the blueprint) to submit a
 Gromacs job.
@@ -213,7 +198,7 @@ Gromacs job.
 2. Submit the job to Slurm to be scheduled:
 
    ```bash
-   sbatch /apps/gromacs/submit_gromacs.sh
+   sbatch /opt/apps/gromacs/submit_gromacs.sh
    ```
 
 3. Once submitted, you can watch the job progress by repeatedly calling the
@@ -227,7 +212,7 @@ The `sbatch` command trigger Slurm to auto-scale up several nodes to run the job
 
 You can refresh the `Compute Engine` > `VM instances` page and see that
 additional VMs are being/have been created. These will be named something like
-`slurm-spack-gromacs-compute-0-0`.
+`spackgroma-comput-0`.
 
 When running `squeue`, observe the job status start as `CF` (configuring),
 change to `R` (running) once the compute VMs have been created, and finally `CG`
@@ -247,8 +232,8 @@ about 5 minutes to run.
 Several files will have been generated in the `test_run/` folder you created.
 
 The `md.log` and `slurm-1.out` files have information on the run such as
-performance. You can view these files by running the following commandsq on the
-login node:
+performance. You can view these files by running the following commands on the
+controller node:
 
 ```bash
 cat slurm-*.out
@@ -273,9 +258,9 @@ https://console.cloud.google.com/monitoring/dashboards?project=<walkthrough-proj
 To avoid incurring ongoing charges we will want to destroy our cluster.
 
 For this we need to return to our cloud shell terminal. Run `exit` in the
-terminal to close the SSH connection to the login node:
+terminal to close the SSH connection to the controller node:
 
-> **_NOTE:_** If you are accessing the login node terminal via a separate pop-up
+> **_NOTE:_** If you are accessing the controller node terminal via a separate pop-up
 > then make sure to call `exit` in the pop-up window.
 
 ```bash
@@ -285,7 +270,7 @@ exit
 Run the following command in the cloud shell terminal to destroy the cluster:
 
 ```bash
-terraform -chdir=spack-gromacs/primary destroy -auto-approve
+./ghpc destroy spack-gromacs
 ```
 
 When complete you should see something like:
diff --git a/docs/tutorials/gromacs/spack-gromacs.yaml b/docs/tutorials/gromacs/spack-gromacs.yaml
index fe5bf475b1..285443c0b8 100644
--- a/docs/tutorials/gromacs/spack-gromacs.yaml
+++ b/docs/tutorials/gromacs/spack-gromacs.yaml
@@ -26,7 +26,7 @@ deployment_groups:
 - group: primary
   modules:
   - id: network1
-    source: modules/network/pre-existing-vpc
+    source: modules/network/vpc
 
   - id: hpc_dash
     source: modules/monitoring/dashboard
@@ -35,8 +35,8 @@ deployment_groups:
   - id: spack-setup
     source: community/modules/scripts/spack-setup
     settings:
-      install_dir: /apps/spack
-      spack_ref: v0.19.0
+      install_dir: /opt/apps/spack
+      spack_ref: v0.20.0
 
   - id: spack-execute
     source: community/modules/scripts/spack-execute
@@ -88,7 +88,7 @@ deployment_groups:
         # fi
         # spack buildcache keys --install --trust
 
-        spack config --scope defaults add config:build_stage:/apps/spack/spack-stage
+        spack config --scope defaults add config:build_stage:/opt/apps/spack/spack-stage
         spack config --scope defaults add -f /tmp/projections-config.yaml
         spack config --scope site add -f /tmp/slurm-external-config.yaml
 
@@ -107,22 +107,26 @@ deployment_groups:
     source: modules/scripts/startup-script
     settings:
       runners:
+      # remove lustre client temporary to avoid startup failure due to known
+      # issue.
+      - type: shell
+        destination: remove_lustre_client.sh
+        content: |
+          #!/bin/bash
+          rm /etc/yum.repos.d/lustre-client.repo
       - $(spack-execute.spack_runner)
       - type: shell
         destination: setup_gromacs.sh
         content: |
           #!/bin/bash
-          source /apps/spack/share/spack/setup-env.sh
+          source /opt/apps/spack/share/spack/setup-env.sh
           spack env activate gromacs
-          chmod -R a+rwX /apps/spack/var/spack/environments/gromacs
-          mkdir -p /apps/gromacs
-          chmod a+rwx /apps/gromacs
-          cd /apps/gromacs
+          cd /opt/apps/gromacs
           wget --no-verbose https://ftp.gromacs.org/pub/benchmarks/water_GMX50_bare.tar.gz
           tar xzf water_GMX50_bare.tar.gz
 
       - type: data
-        destination: /apps/gromacs/submit_gromacs.sh
+        destination: /opt/apps/gromacs/submit_gromacs.sh
         content: |
           #!/bin/bash
           #SBATCH -N 2
@@ -131,36 +135,36 @@ deployment_groups:
           # Size can be 0000.65  0000.96  0001.5  0003  0006  0012  0024  0048  0096  0192  0384  0768  1536  3072
           # Type can be 'pme' or 'rf'
 
-          source /apps/spack/share/spack/setup-env.sh
+          source /opt/apps/spack/share/spack/setup-env.sh
           spack env activate gromacs
 
           # Check that gmx_mpi exists
           which gmx_mpi
           cd $SLURM_SUBMIT_DIR
-          cp /apps/gromacs/water-cut1.0_GMX50_bare/1536/* .
+          cp /opt/apps/gromacs/water-cut1.0_GMX50_bare/1536/* .
           scontrol show hostnames ${SLURM_JOB_NODELIST} > hostfile
           gmx_mpi grompp -f pme.mdp -c conf.gro -p topol.top -o input.tpr
           mpirun -n 60 -hostfile hostfile -ppn 30 gmx_mpi mdrun -notunepme -dlb yes -v -resethway -noconfout -nsteps 4000 -s input.tpr
 
+  - id: compute_nodeset
+    source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
+    use: [network1]
+    settings:
+      node_count_dynamic_max: 20
+      bandwidth_tier: gvnic_enabled
+
   - id: compute_partition
-    source: community/modules/compute/SchedMD-slurm-on-gcp-partition
-    use:
-    - network1
+    source: community/modules/compute/schedmd-slurm-gcp-v6-partition
+    use: [compute_nodeset]
     settings:
       partition_name: compute
-      max_node_count: 20
 
   - id: slurm_controller
-    source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+    source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
     use:
     - network1
     - compute_partition
     settings:
+      disable_controller_public_ips: false
+      controller_startup_scripts_timeout: 21600
       controller_startup_script: $(controller-setup.startup_script)
-      login_node_count: 1
-
-  - id: slurm_login
-    source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
-    use:
-    - network1
-    - slurm_controller