Skip to content

Commit

Permalink
Refactor environment recipes to be clearer in functional description …
Browse files Browse the repository at this point in the history
…and remove unnecessary recipes
  • Loading branch information
dreambeyondorange committed Sep 20, 2023
1 parent 60d9074 commit 1364a59
Show file tree
Hide file tree
Showing 18 changed files with 122 additions and 234 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This file is used to list changes made in each version of the AWS ParallelCluste
- Install [Spack](https://spack.io) by default in cluster user's home directory.

**CHANGES**
- Migrate NFS exports from the head node root volume and associated data for intra-cluster shared storage to external AWS EFS filesystems attached to the cluster stack.

**BUG FIXES**
- Fix inconsistent scaling configuration after cluster update rollback when modifying the list of instance types declared in the Compute Resources.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
end

# generate the updated shared storages mapping file
include_recipe 'aws-parallelcluster-environment::fs_update'
include_recipe 'aws-parallelcluster-environment::update_fs_mapping'

include_recipe 'aws-parallelcluster-environment::directory_service'
include_recipe 'aws-parallelcluster-slurm::update' if node['cluster']['scheduler'] == 'slurm'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,10 @@ suites:
fsx_shared_dirs: ''
raid_shared_dir: ''
ephemeral_dir: test1
- name: fs_update
- name: update_fs_mapping
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-environment::fs_update]
- recipe[aws-parallelcluster-environment::update_fs_mapping]
verifier:
controls:
- fs_data_file_created_correctly
Expand All @@ -440,10 +440,10 @@ suites:
fsx_dns_names: dns1,dns2
fsx_mount_names: mount1,mount2
fsx_volume_junction_paths: value1,value2
- name: fs_update_default_values
- name: update_fs_mapping_default_values
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-environment::fs_update]
- recipe[aws-parallelcluster-environment::update_fs_mapping]
verifier:
controls:
- fs_data_file_with_default_values
Expand Down Expand Up @@ -472,42 +472,43 @@ suites:
scheduler: slurm
head_node_imds_secured: 'true'
head_node_imds_allowed_users: ['root', 'nobody']
- name: mount_shared_compute
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-environment::mount_shared]
verifier:
controls:
- mount_home
- mount_shared_compute
attributes:
dependencies:
- recipe:aws-parallelcluster-platform::directories
- resource:nfs
- recipe:aws-parallelcluster-environment::mock_export_directories
cluster:
node_type: 'ComputeFleet'
head_node_private_ip: '127.0.0.1'
head_node_home_path: '/fake_headnode_home'
shared_dir_head: '/fake_headnode_shared'
- name: mount_shared_login
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-environment::mount_shared]
verifier:
controls:
- mount_home
- mount_shared_login
attributes:
dependencies:
- recipe:aws-parallelcluster-platform::directories
- resource:nfs
- recipe:aws-parallelcluster-environment::mock_export_directories
cluster:
node_type: 'LoginNode'
head_node_private_ip: '127.0.0.1'
head_node_home_path: '/fake_headnode_home'
shared_dir_head: '/fake_headnode_shared'
# TODO replacement for the mount_internal_use_fs recipe since it uses shared storage
# - name: mount_shared_compute
# run_list:
# - recipe[aws-parallelcluster-tests::setup]
# - recipe[aws-parallelcluster-environment::mount_shared]
# verifier:
# controls:
# - mount_home
# - mount_shared_compute
# attributes:
# dependencies:
# - recipe:aws-parallelcluster-platform::directories
# - resource:nfs
# - recipe:aws-parallelcluster-environment::mock_export_directories
# cluster:
# node_type: 'ComputeFleet'
# head_node_private_ip: '127.0.0.1'
# head_node_home_path: '/fake_headnode_home'
# shared_dir_head: '/fake_headnode_shared'
# - name: mount_shared_login
# run_list:
# - recipe[aws-parallelcluster-tests::setup]
# - recipe[aws-parallelcluster-environment::mount_shared]
# verifier:
# controls:
# - mount_home
# - mount_shared_login
# attributes:
# dependencies:
# - recipe:aws-parallelcluster-platform::directories
# - resource:nfs
# - recipe:aws-parallelcluster-environment::mock_export_directories
# cluster:
# node_type: 'LoginNode'
# head_node_private_ip: '127.0.0.1'
# head_node_home_path: '/fake_headnode_home'
# shared_dir_head: '/fake_headnode_shared'
- name: raid_compute
run_list:
- recipe[aws-parallelcluster-tests::setup]
Expand Down Expand Up @@ -541,7 +542,7 @@ suites:
- name: shared_storages_compute
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-environment::shared_storages]
- recipe[aws-parallelcluster-environment::mount_cx_fs]
verifier:
controls:
- shared_storages_compute_and_login
Expand All @@ -555,7 +556,7 @@ suites:
- name: shared_storages_login
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-environment::shared_storages]
- recipe[aws-parallelcluster-environment::mount_cx_fs]
verifier:
controls:
- shared_storages_compute_and_login
Expand Down
8 changes: 4 additions & 4 deletions cookbooks/aws-parallelcluster-environment/recipes/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
action :configure
end
include_recipe 'aws-parallelcluster-environment::ephemeral_drives'
# fs_update generates the shared storages mapping file so must be executed before shared storages recipes
include_recipe 'aws-parallelcluster-environment::fs_update'
include_recipe 'aws-parallelcluster-environment::shared_storages'
# update_fs_mapping generates the shared storages mapping file so must be executed before shared storages recipes
include_recipe 'aws-parallelcluster-environment::update_fs_mapping'
include_recipe 'aws-parallelcluster-environment::export_home'
include_recipe 'aws-parallelcluster-environment::ebs'
include_recipe 'aws-parallelcluster-environment::raid'
include_recipe "aws-parallelcluster-environment::fs_mount"
include_recipe "aws-parallelcluster-environment::mount_cx_fs"
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# frozen_string_literal: true

#
# Copyright:: 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
# License. A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.

return if on_docker?

case node['cluster']['node_type']
when 'HeadNode'
volume "export /home" do
shared_dir "/home"
action :export
end
when 'ComputeFleet', 'LoginNode'
Chef::Log.info("Export only from the HeadNode")
else
raise "node_type must be HeadNode, ComputeFleet, or LoginNode"
end
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: true

# Copyright:: 2013-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Copyright:: 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
# License. A copy of the License is located at
Expand All @@ -23,12 +23,11 @@

# Identify the customer use filesystems and store their data in arrays for the EFS resource
efs_shared_dir_array.each_with_index do |dir, index|
unless node['cluster']['internal_shared_dirs'].include?(dir)
cx_shared_dir_array.push(dir)
cx_efs_fs_id_array.push(efs_fs_id_array[index])
cx_efs_encryption_array.push(efs_encryption_in_transit_array[index])
cx_efs_iam_array.push(efs_iam_authorization_array[index])
end
next if node['cluster']['internal_shared_dirs'].include?(dir)
cx_shared_dir_array.push(dir)
cx_efs_fs_id_array.push(efs_fs_id_array[index])
cx_efs_encryption_array.push(efs_encryption_in_transit_array[index])
cx_efs_iam_array.push(efs_iam_authorization_array[index])
end

# Mount EFS directories with the efs resource
Expand Down

This file was deleted.

4 changes: 1 addition & 3 deletions cookbooks/aws-parallelcluster-environment/recipes/init.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
cloudwatch "Configure CloudWatch" do
action :configure
end
include_recipe "aws-parallelcluster-environment::fs_update"
include_recipe "aws-parallelcluster-environment::update_fs_mapping"
include_recipe "aws-parallelcluster-environment::backup_internal_use_shared_data"
include_recipe "aws-parallelcluster-environment::mount_internal_use_fs"
include_recipe "aws-parallelcluster-environment::restore_internal_use_shared_data"

# include_recipe "aws-parallelcluster-environment::mount_shared"

include_recipe "aws-parallelcluster-environment::network_interfaces"
include_recipe 'aws-parallelcluster-environment::imds'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,16 @@

return if on_docker?

case node['cluster']['node_type']
when 'HeadNode'
Chef::Log.info("Internal Shared Storage #{node['cluster']['internal_shared_dirs']}")
if node['cluster']['node_type'] == 'HeadNode'
# For each, backup the data to a temp location
node['cluster']['internal_shared_dirs'].each do |dir|
bash "Backup #{dir}" do
user 'root'
group 'root'
code <<-EOH
mkdir -p /tmp#{dir}
rsync -a #{dir}/ /tmp#{dir}
rsync -a -X #{dir}/ /tmp#{dir}
EOH
end

end

when 'ComputeFleet', 'LoginNode'
Chef::Log.info("Backup only from the HeadNode")
else
raise "node_type must be HeadNode, ComputeFleet, or LoginNode"
end
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: true

# Copyright:: 2013-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Copyright:: 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
# License. A copy of the License is located at
Expand All @@ -23,12 +23,11 @@

# Identify the internal use filesystems and store their data in arrays for the EFS resource
efs_shared_dir_array.each_with_index do |dir, index|
if node['cluster']['internal_shared_dirs'].include?(dir)
internal_shared_dir_array.push(dir)
internal_efs_fs_id_array.push(efs_fs_id_array[index])
internal_efs_encryption_array.push(efs_encryption_in_transit_array[index])
internal_efs_iam_array.push(efs_iam_authorization_array[index])
end
next unless node['cluster']['internal_shared_dirs'].include?(dir)
internal_shared_dir_array.push(dir)
internal_efs_fs_id_array.push(efs_fs_id_array[index])
internal_efs_encryption_array.push(efs_encryption_in_transit_array[index])
internal_efs_iam_array.push(efs_iam_authorization_array[index])
end

# Mount EFS directories with the efs resource
Expand All @@ -41,7 +40,7 @@
not_if { internal_shared_dir_array.empty? }
end

# TODO replace home as NFS with shared /home
# TODO: replace home as NFS with shared /home
case node['cluster']['node_type']
when 'ComputeFleet', 'LoginNode'
include_recipe 'aws-parallelcluster-environment::mount_home'
Expand Down

This file was deleted.

Loading

0 comments on commit 1364a59

Please sign in to comment.