From 925880bc31914547e10bbe7542e444b9ad1cbcd8 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 15 Sep 2022 15:16:11 -0700 Subject: [PATCH 1/2] Integrate DDN Lustre install script with startup-script --- .../file-system/DDN-EXAScaler/README.md | 43 ++++++++++++++++++- .../file-system/DDN-EXAScaler/outputs.tf | 35 ++++++++++++++- .../blueprints/lustre-with-new-vpc.yaml | 20 ++++++--- 3 files changed, 89 insertions(+), 9 deletions(-) diff --git a/community/modules/file-system/DDN-EXAScaler/README.md b/community/modules/file-system/DDN-EXAScaler/README.md index 1e0ad80581..bad224e570 100644 --- a/community/modules/file-system/DDN-EXAScaler/README.md +++ b/community/modules/file-system/DDN-EXAScaler/README.md @@ -21,7 +21,45 @@ More information about the architecture can be found at [marketplace]: https://console.developers.google.com/marketplace/product/ddnstorage/exascaler-cloud [architecture]: https://cloud.google.com/architecture/lustre-architecture +## Mounting + +To mount the DDN EXAScaler Lustre file system you must first install the DDN +Luster client and then call the proper `mount` command. + +When mounting to a Slurm resource both of these steps are automatically handled +with the use of the `use` command. See the +[hpc-cluster-high-io](../../../../examples/hpc-cluster-high-io.yaml) for an +example of using this module with Slurm. + +The DDN-EXAScaler module outputs runners that can be used with the +startup-script module to install the client and mount the file system when +mounting to other compute resources such as `vm-instance` or `cloud-batch-job`. +See the following example: + +```yaml + - id: lustrefs + source: community/modules/file-system/DDN-EXAScaler + use: [network1] + settings: {local_mount: /scratch} + + - id: mount-at-startup + source: modules/scripts/startup-script + settings: + runners: + - $(lustrefs.install_ddn_lustre_client_runner) + - $(lustrefs.mount_runner) + + - id: workstation + source: modules/compute/vm-instance + use: [network1, lustrefs, mount-at-startup] +``` + +See [additional documentation][ddn-install-docs] from DDN EXAScaler. + +[ddn-install-docs]: https://github.com/DDNStorage/exascaler-cloud-terraform/tree/master/gcp#install-new-exascaler-cloud-clients + ## Support + EXAScaler Cloud includes self-help support with access to publicly available documents and videos. Premium support includes 24x7x365 access to DDN's experts, along with support community access, automated notifications of updates and @@ -101,8 +139,11 @@ No resources. | Name | Description | |------|-------------| +| [client\_config](#output\_client\_config) | Script that will install DDN EXAScaler lustre client. The machine running this script must be on the same network & subnet as the EXAScaler. | | [http\_console](#output\_http\_console) | HTTP address to access the system web console. | -| [mount\_command](#output\_mount\_command) | Command to mount the file system. | +| [install\_ddn\_lustre\_client\_runner](#output\_install\_ddn\_lustre\_client\_runner) | Runner that encapsulates the `client_config` output on this module. | +| [mount\_command](#output\_mount\_command) | Command to mount the file system. `client_config` script must be run first. | +| [mount\_runner](#output\_mount\_runner) | Runner to mount the DDN EXAScaler Lustre file system | | [network\_storage](#output\_network\_storage) | Describes a EXAScaler system to be mounted by other systems. | | [private\_addresses](#output\_private\_addresses) | Private IP addresses for all instances. | | [ssh\_console](#output\_ssh\_console) | Instructions to ssh into the instances. | diff --git a/community/modules/file-system/DDN-EXAScaler/outputs.tf b/community/modules/file-system/DDN-EXAScaler/outputs.tf index 7a2da4c7bb..1343799a34 100644 --- a/community/modules/file-system/DDN-EXAScaler/outputs.tf +++ b/community/modules/file-system/DDN-EXAScaler/outputs.tf @@ -24,9 +24,39 @@ output "ssh_console" { value = module.ddn_exascaler.ssh_console } +output "client_config" { + description = "Script that will install DDN EXAScaler lustre client. The machine running this script must be on the same network & subnet as the EXAScaler." + value = module.ddn_exascaler.client_config +} + +output "install_ddn_lustre_client_runner" { + description = "Runner that encapsulates the `client_config` output on this module." + value = { + "type" = "shell" + "content" = module.ddn_exascaler.client_config + "destination" = "install_ddn_lustre_client.sh" + } +} + +locals { + split_mount_cmd = split(" ", module.ddn_exascaler.mount_command) + split_mount_cmd_wo_mountpoint = slice(local.split_mount_cmd, 0, length(local.split_mount_cmd) - 1) + mount_cmd = "${join(" ", local.split_mount_cmd_wo_mountpoint)} ${var.local_mount}" + mount_cmd_w_mkdir = "mkdir -p ${var.local_mount} && ${local.mount_cmd}" +} + output "mount_command" { - description = "Command to mount the file system." - value = module.ddn_exascaler.mount_command + description = "Command to mount the file system. `client_config` script must be run first." + value = local.mount_cmd_w_mkdir +} + +output "mount_runner" { + description = "Runner to mount the DDN EXAScaler Lustre file system" + value = { + "type" = "shell" + "content" = local.mount_cmd_w_mkdir + "destination" = "mount-ddn-lustre.sh" + } } output "http_console" { @@ -34,6 +64,7 @@ output "http_console" { value = module.ddn_exascaler.http_console } + output "network_storage" { description = "Describes a EXAScaler system to be mounted by other systems." value = { diff --git a/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml b/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml index f3d0bfb097..2fa0df824e 100644 --- a/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml +++ b/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml @@ -41,15 +41,18 @@ deployment_groups: settings: local_mount: /home - # Explicitly picking the local version of the module - id: scratchfs source: community/modules/file-system/DDN-EXAScaler - kind: terraform + use: [network1] settings: local_mount: /scratch - network_self_link: $(network1.network_self_link) - subnetwork_self_link: $(network1.subnetwork_self_link) - subnetwork_address: $(network1.subnetwork_address) + + - id: mount-exascaler + source: modules/scripts/startup-script + settings: + runners: + - $(scratchfs.install_ddn_lustre_client_runner) + - $(scratchfs.mount_runner) # Create a separate workstation to catch regressions in vm-instance - id: workstation @@ -58,11 +61,16 @@ deployment_groups: use: - network1 - homefs - - scratchfs + - mount-exascaler settings: name_prefix: test-workstation machine_type: c2-standard-4 + - id: wait0 + source: ./community/modules/scripts/wait-for-startup + settings: + instance_name: ((module.workstation.name[0])) + - id: compute_partition source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform From c44b7a46f27e5558b8bb1fde8941d37c85fb90ca Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 16 Sep 2022 08:30:54 -0700 Subject: [PATCH 2/2] Rename EXAScaler output to clarify it is a script --- community/modules/file-system/DDN-EXAScaler/README.md | 6 +++--- community/modules/file-system/DDN-EXAScaler/outputs.tf | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/community/modules/file-system/DDN-EXAScaler/README.md b/community/modules/file-system/DDN-EXAScaler/README.md index bad224e570..5c306508ac 100644 --- a/community/modules/file-system/DDN-EXAScaler/README.md +++ b/community/modules/file-system/DDN-EXAScaler/README.md @@ -139,10 +139,10 @@ No resources. | Name | Description | |------|-------------| -| [client\_config](#output\_client\_config) | Script that will install DDN EXAScaler lustre client. The machine running this script must be on the same network & subnet as the EXAScaler. | +| [client\_config\_script](#output\_client\_config\_script) | Script that will install DDN EXAScaler lustre client. The machine running this script must be on the same network & subnet as the EXAScaler. | | [http\_console](#output\_http\_console) | HTTP address to access the system web console. | -| [install\_ddn\_lustre\_client\_runner](#output\_install\_ddn\_lustre\_client\_runner) | Runner that encapsulates the `client_config` output on this module. | -| [mount\_command](#output\_mount\_command) | Command to mount the file system. `client_config` script must be run first. | +| [install\_ddn\_lustre\_client\_runner](#output\_install\_ddn\_lustre\_client\_runner) | Runner that encapsulates the `client_config_script` output on this module. | +| [mount\_command](#output\_mount\_command) | Command to mount the file system. `client_config_script` must be run first. | | [mount\_runner](#output\_mount\_runner) | Runner to mount the DDN EXAScaler Lustre file system | | [network\_storage](#output\_network\_storage) | Describes a EXAScaler system to be mounted by other systems. | | [private\_addresses](#output\_private\_addresses) | Private IP addresses for all instances. | diff --git a/community/modules/file-system/DDN-EXAScaler/outputs.tf b/community/modules/file-system/DDN-EXAScaler/outputs.tf index 1343799a34..4713a921a5 100644 --- a/community/modules/file-system/DDN-EXAScaler/outputs.tf +++ b/community/modules/file-system/DDN-EXAScaler/outputs.tf @@ -24,13 +24,13 @@ output "ssh_console" { value = module.ddn_exascaler.ssh_console } -output "client_config" { +output "client_config_script" { description = "Script that will install DDN EXAScaler lustre client. The machine running this script must be on the same network & subnet as the EXAScaler." value = module.ddn_exascaler.client_config } output "install_ddn_lustre_client_runner" { - description = "Runner that encapsulates the `client_config` output on this module." + description = "Runner that encapsulates the `client_config_script` output on this module." value = { "type" = "shell" "content" = module.ddn_exascaler.client_config @@ -46,7 +46,7 @@ locals { } output "mount_command" { - description = "Command to mount the file system. `client_config` script must be run first." + description = "Command to mount the file system. `client_config_script` must be run first." value = local.mount_cmd_w_mkdir } @@ -64,7 +64,6 @@ output "http_console" { value = module.ddn_exascaler.http_console } - output "network_storage" { description = "Describes a EXAScaler system to be mounted by other systems." value = {