diff --git a/community/examples/hpc-slurm-legacy-sharedvpc.yaml b/community/examples/hpc-slurm-legacy-sharedvpc.yaml index 9db8f115dd..d44d333140 100644 --- a/community/examples/hpc-slurm-legacy-sharedvpc.yaml +++ b/community/examples/hpc-slurm-legacy-sharedvpc.yaml @@ -55,46 +55,50 @@ deployment_groups: local_mount: /home connect_mode: PRIVATE_SERVICE_ACCESS - # This debug_partition will work out of the box without requesting additional GCP quota. + - id: debug_nodeset + source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset + use: [network1] + settings: + node_count_dynamic_max: 4 + machine_type: n2-standard-2 + enable_placement: false # the default is: true + - id: debug_partition - source: community/modules/compute/SchedMD-slurm-on-gcp-partition - use: - - network1 - - homefs + source: community/modules/compute/schedmd-slurm-gcp-v6-partition + use: [debug_nodeset, homefs] settings: partition_name: debug - max_node_count: 4 - enable_placement: false - exclusive: false - machine_type: n2-standard-2 + exclusive: false # allows nodes to stay up after jobs are done + is_default: true - # This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first. - - id: compute_partition - source: community/modules/compute/SchedMD-slurm-on-gcp-partition - use: - - network1 - - homefs + - id: compute_nodeset + source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset + use: [network1] settings: - partition_name: compute - max_node_count: 20 + node_count_dynamic_max: 20 bandwidth_tier: gvnic_enabled - - id: slurm_controller - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller - use: - - network1 - - homefs - - debug_partition # debug partition will be default as it is listed first - - compute_partition + - id: compute_partition + source: community/modules/compute/schedmd-slurm-gcp-v6-partition + use: [compute_nodeset, homefs] settings: - login_node_count: 1 - shared_vpc_host_project: $(vars.host_project_id) + partition_name: compute - id: slurm_login - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + source: community/modules/scheduler/schedmd-slurm-gcp-v6-login + use: [network1] + settings: + name_prefix: login + machine_type: n2-standard-4 + disable_login_public_ips: false + + - id: slurm_controller + source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller use: - network1 + - debug_partition + - compute_partition + - slurm_login - homefs - - slurm_controller settings: - shared_vpc_host_project: $(vars.host_project_id) + disable_controller_public_ips: false diff --git a/examples/README.md b/examples/README.md index be8eacbed6..0038bad968 100644 --- a/examples/README.md +++ b/examples/README.md @@ -25,6 +25,7 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [pfs-daos.yaml](#pfs-daosyaml-) ![community-badge] * [hpc-slurm-daos.yaml](#hpc-slurm-daosyaml-) ![community-badge] * [hpc-amd-slurm.yaml](#hpc-amd-slurmyaml-) ![community-badge] + * [hpc-slurm-legacy-sharedvpc.yaml](#hpc-slurm-legacy-sharedvpcyaml-) ![community-badge] * [client-google-cloud-storage.yaml](#client-google-cloud-storageyaml--) ![community-badge] ![experimental-badge] * [hpc-slurm-gromacs.yaml](#hpc-slurm-gromacsyaml--) ![community-badge] ![experimental-badge] * [omnia-cluster.yaml](#omnia-clusteryaml--) ![community-badge] ![experimental-badge] @@ -41,7 +42,6 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [hpc-slurm-chromedesktop.yaml](#hpc-slurm-chromedesktopyaml--) ![community-badge] ![experimental-badge] * [flux-cluster](#flux-clusteryaml--) ![community-badge] ![experimental-badge] * [tutorial-fluent.yaml](#tutorial-fluentyaml--) ![community-badge] ![experimental-badge] - * [hpc-slurm-legacy-sharedvpc.yaml](#hpc-slurm-legacy-sharedvpcyaml--) ![community-badge] ![deprecated-badge] * [Blueprint Schema](#blueprint-schema) * [Writing an HPC Blueprint](#writing-an-hpc-blueprint) * [Blueprint Boilerplate](#blueprint-boilerplate) @@ -977,12 +977,17 @@ See [README](../community/examples/flux-framework/README.md) [flux-cluster.yaml]: ../community/examples/flux-framework/flux-cluster.yaml -### [hpc-slurm-legacy-sharedvpc.yaml] ![community-badge] ![deprecated-badge] +### [hpc-slurm-legacy-sharedvpc.yaml] ![community-badge] This blueprint demonstrates the use of the Slurm and Filestore modules in -the service project of an existing Shared VPC. Before attempting to deploy the +the service project of an existing Shared VPC. Before attempting to deploy the blueprint, one must first complete [initial setup for provisioning Filestore in -a Shared VPC service project][fs-shared-vpc]. +a Shared VPC service project][fs-shared-vpc]. Depending on how the shared VPC +was created one may have to perform a few additional manual steps to configure +the VPC. One may need to create firewall rules allowing SSH to be able to access +the controller and login nodes. Also since this blueprint doesn't use external +IPs for compute nodes, one must needs to [set up cloud nat][cloudnat] and +[set up iap][iap]. [hpc-slurm-legacy-sharedvpc.yaml]: ../community/examples/hpc-slurm-legacy-sharedvpc.yaml [fs-shared-vpc]: https://cloud.google.com/filestore/docs/shared-vpc