Skip to content

Commit

Permalink
Merge branch 'develop' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
hgreebe authored Nov 19, 2023
2 parents f65fba9 + ba70d62 commit 4418311
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ This file is used to list changes made in each version of the AWS ParallelCluste
- Add support for Rocky Linux 8.
- Add support for EC2 Capacity Blocks for ML.
- Add support for `Scheduling/SlurmSettings/Database/DatabaseName` parameter to render `StorageLoc` in the slurmdbd configuration generated by ParallelCluster.
- Add the option to use EFS storage instead of NFS exports from the head node root volume for intra-cluster shared ParallelCluster, Intel, Slurm, and login node data.
- Add the option to use EFS storage instead of NFS exports from the head node root volume for intra-cluster shared ParallelCluster, Intel, Slurm, login node, and /home data.
- Allow for mounting `home` as an EFS or FSx external shared storage via the `SharedStorage` section of the config file.

**CHANGES**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ suites:
cluster:
node_type: 'HeadNode'
scheduler: 'slurm'
internal_shared_storage_type: 'efs'
shared_storage_type: 'efs'
efs_shared_dirs: '/opt/parallelcluster/init_shared'
efs_fs_ids: 'fs-03ad31942a4205839' # Existing FS, needs to be set when running the test
ebs_shared_dirs: ''
Expand All @@ -661,7 +661,7 @@ suites:
cluster:
node_type: 'ComputeFleet'
scheduler: 'slurm'
internal_shared_storage_type: 'efs'
shared_storage_type: 'efs'
efs_shared_dirs: '/opt/parallelcluster/init_shared'
efs_fs_ids: 'fs-03ad31942a4205839' # Existing FS, needs to be set when running the test
ebs_shared_dirs: ''
Expand All @@ -680,7 +680,7 @@ suites:
cluster:
node_type: 'LoginNode'
scheduler: 'slurm'
internal_shared_storage_type: 'efs'
shared_storage_type: 'efs'
efs_shared_dirs: '/opt/parallelcluster/init_shared'
efs_fs_ids: 'fs-03ad31942a4205839' # Existing FS, needs to be set when running the test
ebs_shared_dirs: ''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# Export the home dir from the head node when using ebs
include_recipe 'aws-parallelcluster-environment::export_home'

if node['cluster']['internal_shared_storage_type'] == 'ebs'
if node['cluster']['shared_storage_type'] == 'ebs'
# Export internal use dirs from the head node
include_recipe 'aws-parallelcluster-environment::export_internal_use_ebs'
# Mount intel on compute and login nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Check if home is a shared filesystem and return if it is because there is nothing to export
shared_storage = node['cluster']['efs_shared_dirs'].split(',') + node['cluster']['fsx_shared_dirs'].split(',') +
node['cluster']['ebs_shared_dirs'].split(',') + node['cluster']['raid_shared_dir'].split(',')
return if shared_storage.include?('/home') || shared_storage.include?('home')
return if shared_storage.include?('/home') || shared_storage.include?('home') || node['cluster']['shared_storage_type'] == 'efs'

case node['cluster']['node_type']
when 'HeadNode'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

# Check if home is a shared filesystem
shared_home = 'none'
shared_home = 'internal' if node['cluster']['shared_storage_type'] == 'efs'
shared_storage.each do |type, dirs|
next unless dirs.include?('/home') || dirs.include?('home')
shared_home = type
Expand Down Expand Up @@ -51,6 +52,19 @@
include_recipe "aws-parallelcluster-environment::update_fs_mapping"
include_recipe "aws-parallelcluster-environment::backup_home_shared_data"
case shared_home
when 'internal'
shared_storage['efs'].each_with_index do |dir, index|
next unless dir == node['cluster']['internal_initial_shared_dir']
efs "mount internal shared efs home" do
shared_dir_array ['/home']
efs_fs_id_array [node['cluster']['efs_fs_ids'].split(',')[index]]
efs_encryption_in_transit_array [node['cluster']['efs_encryption_in_transits'].split(',')[index]]
efs_iam_authorization_array [node['cluster']['efs_iam_authorizations'].split(',')[index]]
efs_mount_point_array ['/home']
action :mount
end
break
end
when 'efs'
shared_storage['efs'].each_with_index do |dir, index|
next unless dir == "/home" || dir == 'home'
Expand Down
4 changes: 2 additions & 2 deletions cookbooks/aws-parallelcluster-environment/recipes/init.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
action :configure
end

case node['cluster']['internal_shared_storage_type']
case node['cluster']['shared_storage_type']
when 'efs'
include_recipe "aws-parallelcluster-environment::mount_internal_use_efs"
when 'ebs'
include_recipe "aws-parallelcluster-environment::mount_internal_use_ebs"
else
raise "internal_shared_storage_type must be ebs or efs"
raise "shared_storage_type must be ebs or efs"
end

# Mount the home directory to all nodes if it is shared, otherwise mount the NFS share to compute and login nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@
end
end unless initial_shared_dir_array.empty?

# Add the mount point for home. If users decide to use a shared home via the managed EFS
# either on creation or update then this directory will be needed. If users don't need it, it will stay empty
# on the managed FS and be invisible to users
directory "#{node['cluster']['internal_initial_shared_dir']}/home" do
user 'root'
group 'root'
mode '0755'
action :create
recursive true
end

# Unmount the root of the EFS after creating the shared directories
# TODO this doesn't seem to unmount the EFS
efs "unmount internal efs" do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ suites:
cluster:
node_type: ComputeFleet
head_node_private_ip: '127.0.0.1'
internal_shared_storage_type: ebs
shared_storage_type: ebs
- name: config_head_node_munge
run_list:
- recipe[aws-parallelcluster-tests::setup]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
network get_vpc_cidr_list
writeable true
options ['no_root_squash']
only_if { node['cluster']['internal_shared_storage_type'] == 'ebs' }
only_if { node['cluster']['shared_storage_type'] == 'ebs' }
end unless on_docker?

template "#{node['cluster']['slurm']['install_dir']}/etc/slurm.conf" do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@
action %i(mount enable)
retries 10
retry_delay 6
only_if { node['cluster']['internal_shared_storage_type'] == 'ebs' }
only_if { node['cluster']['shared_storage_type'] == 'ebs' }
end
2 changes: 1 addition & 1 deletion kitchen.validate-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ _common_cluster_attributes: &_common_cluster_attributes
enable_efa: 'efa'
nvidia:
enabled: <%= ENV['NVIDIA_ENABLED'] %>
internal_shared_storage_type: ebs
shared_storage_type: ebs

_head_node_cluster_attributes: &_head_node_cluster_attributes
<< : *_common_cluster_attributes
Expand Down

0 comments on commit 4418311

Please sign in to comment.