From 713217cc4db05fff5ffb2b439f83cfcf5817645a Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Wed, 17 Jan 2024 21:25:11 +0000 Subject: [PATCH] Update hpc-slurm-legacy-sharedvpc example and references to use Slurm V6 --- .../examples/hpc-slurm-legacy-sharedvpc.yaml | 62 ++++++++++--------- examples/README.md | 4 +- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/community/examples/hpc-slurm-legacy-sharedvpc.yaml b/community/examples/hpc-slurm-legacy-sharedvpc.yaml index 9db8f115dd..d44d333140 100644 --- a/community/examples/hpc-slurm-legacy-sharedvpc.yaml +++ b/community/examples/hpc-slurm-legacy-sharedvpc.yaml @@ -55,46 +55,50 @@ deployment_groups: local_mount: /home connect_mode: PRIVATE_SERVICE_ACCESS - # This debug_partition will work out of the box without requesting additional GCP quota. + - id: debug_nodeset + source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset + use: [network1] + settings: + node_count_dynamic_max: 4 + machine_type: n2-standard-2 + enable_placement: false # the default is: true + - id: debug_partition - source: community/modules/compute/SchedMD-slurm-on-gcp-partition - use: - - network1 - - homefs + source: community/modules/compute/schedmd-slurm-gcp-v6-partition + use: [debug_nodeset, homefs] settings: partition_name: debug - max_node_count: 4 - enable_placement: false - exclusive: false - machine_type: n2-standard-2 + exclusive: false # allows nodes to stay up after jobs are done + is_default: true - # This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first. - - id: compute_partition - source: community/modules/compute/SchedMD-slurm-on-gcp-partition - use: - - network1 - - homefs + - id: compute_nodeset + source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset + use: [network1] settings: - partition_name: compute - max_node_count: 20 + node_count_dynamic_max: 20 bandwidth_tier: gvnic_enabled - - id: slurm_controller - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller - use: - - network1 - - homefs - - debug_partition # debug partition will be default as it is listed first - - compute_partition + - id: compute_partition + source: community/modules/compute/schedmd-slurm-gcp-v6-partition + use: [compute_nodeset, homefs] settings: - login_node_count: 1 - shared_vpc_host_project: $(vars.host_project_id) + partition_name: compute - id: slurm_login - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + source: community/modules/scheduler/schedmd-slurm-gcp-v6-login + use: [network1] + settings: + name_prefix: login + machine_type: n2-standard-4 + disable_login_public_ips: false + + - id: slurm_controller + source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller use: - network1 + - debug_partition + - compute_partition + - slurm_login - homefs - - slurm_controller settings: - shared_vpc_host_project: $(vars.host_project_id) + disable_controller_public_ips: false diff --git a/examples/README.md b/examples/README.md index 4d7d9ad79a..19fc3fe4b5 100644 --- a/examples/README.md +++ b/examples/README.md @@ -41,7 +41,7 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [hpc-slurm-chromedesktop.yaml](#hpc-slurm-chromedesktopyaml--) ![community-badge] ![experimental-badge] * [flux-cluster](#flux-clusteryaml--) ![community-badge] ![experimental-badge] * [tutorial-fluent.yaml](#tutorial-fluentyaml--) ![community-badge] ![experimental-badge] - * [hpc-slurm-legacy-sharedvpc.yaml](#hpc-slurm-legacy-sharedvpcyaml--) ![community-badge] ![deprecated-badge] + * [hpc-slurm-legacy-sharedvpc.yaml](#hpc-slurm-legacy-sharedvpcyaml--) ![community-badge] * [Blueprint Schema](#blueprint-schema) * [Writing an HPC Blueprint](#writing-an-hpc-blueprint) * [Blueprint Boilerplate](#blueprint-boilerplate) @@ -980,7 +980,7 @@ See [README](../community/examples/flux-framework/README.md) [flux-cluster.yaml]: ../community/examples/flux-framework/flux-cluster.yaml -### [hpc-slurm-legacy-sharedvpc.yaml] ![community-badge] ![deprecated-badge] +### [hpc-slurm-legacy-sharedvpc.yaml] ![community-badge] This blueprint demonstrates the use of the Slurm and Filestore modules in the service project of an existing Shared VPC. Before attempting to deploy the