diff --git a/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml b/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml index 52fbe82e3..f3862ed1c 100644 --- a/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml +++ b/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml @@ -189,5 +189,5 @@ suites: scheduler: 'slurm' config: DevSettings: - SlurmSettings: + MungeKeySettings: MungeKeySecretArn: null diff --git a/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb b/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb index a3cd308e9..b4bf24216 100644 --- a/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb +++ b/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb @@ -78,7 +78,7 @@ def setup_munge_head_node # Generate munge key or get it's value from secrets manager munge_key_manager 'manage_munge_key' do munge_key_secret_arn lazy { - node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) + node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) } end @@ -88,7 +88,7 @@ def setup_munge_head_node def update_munge_head_node munge_key_manager 'update_munge_key' do - munge_key_secret_arn lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) } + munge_key_secret_arn lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) } action :update_munge_key only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? } end @@ -97,38 +97,51 @@ def update_munge_head_node share_munge_head_node end -def share_munge_head_node - # Share munge key +def share_munge_key_to_dir(shared_dir) bash 'share_munge_key' do user 'root' group 'root' - code <<-HEAD_SHARE_MUNGE_KEY + code <<-SHARE_MUNGE_KEY set -e - mkdir -p /home/#{node['cluster']['cluster_user']}/.munge + mkdir -p #{shared_dir}/.munge # Copy key to shared dir - cp /etc/munge/munge.key /home/#{node['cluster']['cluster_user']}/.munge/.munge.key - HEAD_SHARE_MUNGE_KEY + cp /etc/munge/munge.key #{shared_dir}/.munge/.munge.key + chmod 0700 #{shared_dir}/.munge + chmod 0600 #{shared_dir}/.munge/.munge.key + SHARE_MUNGE_KEY end end -def setup_munge_compute_node - # Get munge key +def share_munge_head_node + share_munge_key_to_dir(node['cluster']['shared_dir']) + share_munge_key_to_dir(node['cluster']['shared_dir_login']) +end + +def setup_munge_key(shared_dir) bash 'get_munge_key' do user 'root' group 'root' - code <<-COMPUTE_MUNGE_KEY + code <<-MUNGE_KEY set -e # Copy munge key from shared dir - cp /home/#{node['cluster']['cluster_user']}/.munge/.munge.key /etc/munge/munge.key + cp #{shared_dir}/.munge/.munge.key /etc/munge/munge.key # Set ownership on the key chown #{node['cluster']['munge']['user']}:#{node['cluster']['munge']['group']} /etc/munge/munge.key # Enforce correct permission on the key chmod 0600 /etc/munge/munge.key - COMPUTE_MUNGE_KEY + MUNGE_KEY retries 5 retry_delay 10 end +end + +def setup_munge_compute_node + setup_munge_key(node['cluster']['shared_dir']) + enable_munge_service +end +def setup_munge_login_node + setup_munge_key(node['cluster']['shared_dir_login']) enable_munge_service end diff --git a/cookbooks/aws-parallelcluster-slurm/libraries/update.rb b/cookbooks/aws-parallelcluster-slurm/libraries/update.rb index 81204312e..154de7275 100644 --- a/cookbooks/aws-parallelcluster-slurm/libraries/update.rb +++ b/cookbooks/aws-parallelcluster-slurm/libraries/update.rb @@ -79,7 +79,7 @@ def execute_command(command, user = "root", timeout = 300, raise_on_error = true cmd.stdout.strip end -# Verify if MungeKeySecretArn in SlurmSettings section of cluster configuration has been updated +# Verify if MungeKeySecretArn in MungeKeySettings section of cluster configuration has been updated def is_custom_munge_key_updated? - config_parameter_changed?(%w(DevSettings SlurmSettings MungeKeySecretArn)) + config_parameter_changed?(%w(DevSettings MungeKeySettings MungeKeySecretArn)) end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb index 74113ead5..322ab975f 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb @@ -288,10 +288,11 @@ group 'root' mode '0700' variables( - munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) }, + munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) }, region: node['cluster']['region'], munge_user: node['cluster']['munge']['user'], munge_group: node['cluster']['munge']['group'], - cluster_user: node['cluster']['cluster_user'] + shared_directory_compute: node['cluster']['shared_dir'], + shared_directory_login: node['cluster']['shared_dir_login'] ) end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_login.rb b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_login.rb index ce562c2c4..a037de1ee 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_login.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_login.rb @@ -16,6 +16,6 @@ # limitations under the License. # TODO: rename, find a better name that include login nodes -setup_munge_compute_node +setup_munge_login_node include_recipe 'aws-parallelcluster-slurm::mount_slurm_dir' diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/test/mock_munge_key.rb b/cookbooks/aws-parallelcluster-slurm/recipes/test/mock_munge_key.rb index 54fb6af9e..93335e2bc 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/test/mock_munge_key.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/test/mock_munge_key.rb @@ -1,10 +1,13 @@ -munge_user_dir = "/home/#{node['cluster']['cluster_user']}/.munge" -directory munge_user_dir do - mode '1777' -end +munge_dirs = %W(#{node['cluster']['shared_dir']}/.munge #{node['cluster']['shared_dir_login']}/.munge) + +munge_dirs.each do |munge_dir| + directory munge_dir do + mode '0700' + end -file "#{munge_user_dir}/.munge.key" do - content 'munge-key' - owner node['cluster']['munge']['user'] - group node['cluster']['munge']['group'] + file "#{munge_dir}/.munge.key" do + content 'munge-key' + owner node['cluster']['munge']['user'] + group node['cluster']['munge']['group'] + end end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb b/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb index 84bfdf22b..03bd4f5f6 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb @@ -208,11 +208,12 @@ def update_nodes_in_queue(strategy, queues) group 'root' mode '0700' variables( - munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) }, + munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) }, region: node['cluster']['region'], munge_user: node['cluster']['munge']['user'], munge_group: node['cluster']['munge']['group'], - cluster_user: node['cluster']['cluster_user'] + shared_directory_compute: node['cluster']['shared_dir'], + shared_directory_login: node['cluster']['shared_dir_login'] ) only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? } end diff --git a/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb b/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb index f9edd277b..4c0715e18 100644 --- a/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb +++ b/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb @@ -13,7 +13,8 @@ SECRET_ARN="<%= @munge_key_secret_arn %>" REGION="<%= @region %>" MUNGE_USER="<%= @munge_user %>" MUNGE_GROUP="<%= @munge_group %>" -CLUSTER_USER="<%= @cluster_user %>" +SHARED_DIRECTORY_COMPUTE="<%= @shared_directory_compute %>" +SHARED_DIRECTORY_LOGIN="<%= @shared_directory_login %>" # Check compute fleet status compute_fleet_status=$(get-compute-fleet-status.sh) @@ -75,9 +76,16 @@ else fi # Share munge key -echo "Sharing munge key" -mkdir -p /home/${CLUSTER_USER}/.munge -cp /etc/munge/munge.key /home/${CLUSTER_USER}/.munge/.munge.key +SHARED_DIRECTORIES=(${SHARED_DIRECTORY_COMPUTE} ${SHARED_DIRECTORY_LOGIN}) + +for dir in "${SHARED_DIRECTORIES[@]}"; do + echo "Sharing munge key to $dir" + mkdir -p "$dir/.munge" + cp /etc/munge/munge.key "$dir/.munge/.munge.key" + chmod 0700 "$dir/.munge" + chmod 0600 "$dir/.munge/.munge.key" +done + echo "Shared munge key" exit 0 diff --git a/kitchen.validate-config.yml b/kitchen.validate-config.yml index 98972d0e9..265e1795b 100644 --- a/kitchen.validate-config.yml +++ b/kitchen.validate-config.yml @@ -35,7 +35,7 @@ _head_node_cluster_attributes: &_head_node_cluster_attributes slurm_ddb_table: <%= ENV['DDB_TABLE'] %> config: DevSettings: - SlurmSettings: + MungeKeySettings: MungeKeySecretArn: null _compute_node_cluster_attributes: &_compute_node_cluster_attributes