diff --git a/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml b/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml index dabb39530..ec8c0fbbb 100644 --- a/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml +++ b/cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml @@ -176,3 +176,19 @@ suites: cluster: node_type: ComputeFleet head_node_private_ip: '127.0.0.1' + - name: config_head_node_munge + run_list: + - recipe[aws-parallelcluster-tests::setup] + - recipe[aws-parallelcluster-slurm::config_head_node] + verifier: + controls: + - /tag:config_munge/ + attributes: + node_type: HeadNode + scheduler: 'slurm' + cluster: + region: 'us-east-2' + config: + DevSettings: + SlurmSettings: + MungeKeySecretArn: 'arn:aws:secretsmanager:us-east-2:249582277112:secret:TestMungeKey-jDtnqi' diff --git a/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb b/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb index 9dcbb9fa6..fa35210b8 100644 --- a/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb +++ b/cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb @@ -65,26 +65,38 @@ def enable_munge_service end end +def restart_munge_service + service "munge" do + supports restart: true + action :restart + retries 5 + retry_delay 10 + end +end + def setup_munge_head_node - # Generate munge key - bash 'generate_munge_key' do - not_if { ::File.exist?('/etc/munge/munge.key') } - user node['cluster']['munge']['user'] - group node['cluster']['munge']['group'] - cwd '/tmp' - code <<-HEAD_CREATE_MUNGE_KEY - set -e - # Generates munge key in /etc/munge/munge.key - /usr/sbin/mungekey --verbose - # Enforce correct permission on the key - chmod 0600 /etc/munge/munge.key - HEAD_CREATE_MUNGE_KEY + # Generate munge key or get it's value from secrets manager + munge_key_manager 'manage_munge_key' do + munge_key_secret_arn lazy { + node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) + } end enable_munge_service share_munge_head_node end +def update_munge_head_node + munge_key_update_manager 'update_munge_key' do + munge_key_secret_arn lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) } + action :update_munge_key + only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? } + end + + restart_munge_service + share_munge_head_node +end + def share_munge_head_node # Share munge key bash 'share_munge_key' do diff --git a/cookbooks/aws-parallelcluster-slurm/libraries/update.rb b/cookbooks/aws-parallelcluster-slurm/libraries/update.rb index de9fbac08..e5a7df5a8 100644 --- a/cookbooks/aws-parallelcluster-slurm/libraries/update.rb +++ b/cookbooks/aws-parallelcluster-slurm/libraries/update.rb @@ -71,3 +71,11 @@ def execute_command(command, user = "root", timeout = 300, raise_on_error = true raise_command_error(command, cmd) if raise_on_error && cmd.error? cmd.stdout.strip end + +# Verify if MungeKeySecretArn in SlurmSetting section of cluster configuration has been updated +def is_custom_munge_key_updated? + require 'yaml' + config = YAML.safe_load(File.read(node['cluster']['cluster_config_path'])) + previous_config = YAML.safe_load(File.read(node['cluster']['previous_cluster_config_path'])) + config["DevSettings"]["SlurmSettings"]["MungeKeySecretArn"] != previous_config["DevSettings"]["SlurmSettings"]["MungeKeySecretArn"] +end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb index 8d4ce68f0..799c28b19 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_head_node.rb @@ -281,3 +281,17 @@ retries 5 retry_delay 2 end unless redhat_on_docker? + +template "#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh" do + source 'slurm/head_node/update_munge_key.sh.erb' + owner 'root' + group 'root' + mode '0700' + variables( + munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) }, + region: node['cluster']['region'], + munge_user: node['cluster']['munge']['user'], + munge_group: node['cluster']['munge']['group'], + cluster_user: node['cluster']['cluster_user'] + ) +end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb b/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb index 9f5ba1e7c..84bfdf22b 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb @@ -201,6 +201,24 @@ def update_nodes_in_queue(strategy, queues) only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_slurm_database_updated? } end unless on_docker? +# Update rotation script to update secret arn +template "#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh" do + source 'slurm/head_node/update_munge_key.sh.erb' + owner 'root' + group 'root' + mode '0700' + variables( + munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) }, + region: node['cluster']['region'], + munge_user: node['cluster']['munge']['user'], + munge_group: node['cluster']['munge']['group'], + cluster_user: node['cluster']['cluster_user'] + ) + only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? } +end + +update_munge_head_node + # The previous execute "generate_pcluster_slurm_configs" block resource may have overridden the slurmdbd password in # slurm_parallelcluster_slurmdbd.conf with a default value, so if it has run and Slurm accounting # is enabled we must pull the database password from Secrets Manager once again. diff --git a/cookbooks/aws-parallelcluster-slurm/resources/munge_key_manager.rb b/cookbooks/aws-parallelcluster-slurm/resources/munge_key_manager.rb new file mode 100644 index 000000000..cd6d10c49 --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/resources/munge_key_manager.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +# +# Cookbook:: aws-parallelcluster-slurm +# Recipe:: config_head_node +# +# Copyright:: 2013-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + +resource_name :munge_key_manager +provides :munge_key_manager +unified_mode true + +property :munge_key_secret_arn, String + +default_action :setup_munge_key + +action :setup_munge_key do + if new_resource.munge_key_secret_arn + # This block will fetch the munge key from Secrets Manager + bash 'fetch_and_decode_munge_key' do + user 'root' + group 'root' + cwd '/tmp' + code <<-FETCH_AND_DECODE + # Get encoded munge key from secrets manager + encoded_key=$(aws secretsmanager get-secret-value --secret-id #{new_resource.munge_key_secret_arn} --query 'SecretString' --output text --region #{node['cluster']['region']}) + # If encoded_key doesn't have a value, error and exit + if [ -z "$encoded_key" ]; then + echo "Error fetching munge key from Secrets Manager or the key is empty" + exit 1 + fi + + # Decode munge key and write to /etc/munge/munge.key + decoded_key=$(echo $encoded_key | base64 -d) + if [ $? -ne 0 ]; then + echo "Error decoding the munge key with base64" + exit 1 + fi + + echo "$decoded_key" > /etc/munge/munge.key + + # Set ownership on the key + chown #{node['cluster']['munge']['user']}:#{node['cluster']['munge']['group']} /etc/munge/munge.key + # Enforce correct permission on the key + chmod 0600 /etc/munge/munge.key + FETCH_AND_DECODE + end + else + # This block will generate a munge key if it doesn't exist + bash 'generate_munge_key' do + not_if { ::File.exist?('/etc/munge/munge.key') } + user node['cluster']['munge']['user'] + group node['cluster']['munge']['group'] + cwd '/tmp' + code <<-GENERATE_KEY + set -e + /usr/sbin/mungekey --verbose + chmod 0600 /etc/munge/munge.key + GENERATE_KEY + end + end +end diff --git a/cookbooks/aws-parallelcluster-slurm/resources/munge_key_update_manager.rb b/cookbooks/aws-parallelcluster-slurm/resources/munge_key_update_manager.rb new file mode 100644 index 000000000..889ce2bbc --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/resources/munge_key_update_manager.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +# +# Cookbook:: aws-parallelcluster-slurm +# Recipe:: config_head_node +# +# Copyright:: 2013-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + +resource_name :munge_key_update_manager +provides :munge_key_update_manager +unified_mode true + +property :munge_key_secret_arn, String + +default_action :update_munge_key + +action :update_munge_key do + bash 'remove_current_munge_key' do + user 'root' + group 'root' + cwd '/tmp' + code <<-REMOVE_CURRENT_MUNGE_KEY + if [ -f "/etc/munge/munge.key" ]; then + rm -f /etc/munge/munge.key + fi + REMOVE_CURRENT_MUNGE_KEY + end + + if new_resource.munge_key_secret_arn + bash 'fetch_and_decode_munge_key' do + user 'root' + group 'root' + cwd '/tmp' + code <<-FETCH_AND_DECODE + # Get encoded munge key from secrets manager + encoded_key=$(aws secretsmanager get-secret-value --secret-id #{new_resource.munge_key_secret_arn} --query 'SecretString' --output text --region #{node['cluster']['region']}) + # If encoded_key doesn't have a value, error and exit + if [ -z "$encoded_key" ]; then + echo "Error fetching munge key from Secrets Manager or the key is empty" + exit 1 + fi + + # Decode munge key and write to /etc/munge/munge.key + decoded_key=$(echo $encoded_key | base64 -d) + if [ $? -ne 0 ]; then + echo "Error decoding the munge key with base64" + exit 1 + fi + + echo "$decoded_key" > /etc/munge/munge.key + + # Set ownership on the key + chown #{node['cluster']['munge']['user']}:#{node['cluster']['munge']['group']} /etc/munge/munge.key + # Enforce correct permission on the key + chmod 0600 /etc/munge/munge.key + FETCH_AND_DECODE + end + else + bash 'generate_munge_key' do + user node['cluster']['munge']['user'] + group node['cluster']['munge']['group'] + cwd '/tmp' + code <<-GENERATE_KEY + set -e + /usr/sbin/mungekey --verbose + chmod 0600 /etc/munge/munge.key + GENERATE_KEY + end + end +end diff --git a/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb b/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb new file mode 100644 index 000000000..198032e45 --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb @@ -0,0 +1,64 @@ +#!/bin/bash +# This script updates the munge key used in the system. +# It fetches the key from AWS Secrets Manager or generates one if it doesn't exist. +# The script does not require any argument. +# +# Usage: ./update_munge_key.sh +# # + +set -e + +MUNGE_KEY_FILE="/etc/munge/munge.key" +SECRET_ARN="<%= @munge_key_secret_arn %>" +REGION="<%= @region %>" +MUNGE_USER="<%= @munge_user %>" +MUNGE_GROUP="<%= @munge_group %>" +CLUSTER_USER="<%= @cluster_user %>" + +# If SECRET_ARN is provided, fetch the munge key from Secrets Manager +if [ -n "${SECRET_ARN}" ]; then + echo "Fetching munge key from AWS Secrets Manager: ${SECRET_ARN}" + encoded_key=$(aws secretsmanager get-secret-value --secret-id ${SECRET_ARN} --query 'SecretString' --output text --region ${REGION}) + + if [ -z "${encoded_key}" ]; then + echo "Error fetching munge key from Secrets Manager or the key is empty" + exit 1 + fi + + # Decode munge key and write to munge.key file + decoded_key=$(echo $encoded_key | base64 -d) + if [ $? -ne 0 ]; then + echo "Error decoding the munge key with base64" + exit 1 + fi + + # Remove current munge key if exists + if [ -f "${MUNGE_KEY_FILE}" ]; then + rm -f ${MUNGE_KEY_FILE} + fi + + echo "${decoded_key}" > ${MUNGE_KEY_FILE} + + # Set ownership on the key + chown ${MUNGE_USER}:${MUNGE_GROUP} ${MUNGE_KEY_FILE} + # Enforce correct permission on the key + chmod 0600 ${MUNGE_KEY_FILE} + +else + echo "MUNGE KEY SECRET ARN isn't provided" + exit 1 +fi + +# Enable and restart munge service +systemctl enable munge +echo "Start to Restart munge service" +systemctl restart munge || { sleep 10; systemctl restart munge; } || { sleep 10; systemctl restart munge; } || { sleep 10; systemctl restart munge; } || { sleep 10; systemctl restart munge; } +echo "Restart munge service completed" + +# Share munge key +echo "Start to Share munge key" +mkdir -p /home/${CLUSTER_USER}/.munge +cp /etc/munge/munge.key /home/${CLUSTER_USER}/.munge/.munge.key +echo "Share munge key completed" + +exit 0 diff --git a/cookbooks/aws-parallelcluster-slurm/test/controls/munge_spec.rb b/cookbooks/aws-parallelcluster-slurm/test/controls/munge_spec.rb index c414b630f..9fb25d8bc 100644 --- a/cookbooks/aws-parallelcluster-slurm/test/controls/munge_spec.rb +++ b/cookbooks/aws-parallelcluster-slurm/test/controls/munge_spec.rb @@ -78,3 +78,31 @@ it { should be_running } end end + +control 'tag:config_munge_check_munge_key_exists' do + title 'Check if the munge key exists' + + describe file('/etc/munge/munge.key') do + it { should exist } + its('mode') { should cmp '0600' } + its('owner') { should eq node['cluster']['munge']['user'] } + its('group') { should eq node['cluster']['munge']['group'] } + end +end unless os_properties.redhat_on_docker? + +control 'tag:config_munge_check_munge_key_content' do + title 'Check if the munge key content is not empty' + + describe file('/etc/munge/munge.key') do + its('content') { should_not be_empty } + end +end unless os_properties.redhat_on_docker? + +control 'tag:config_munge_check_munge_key_error_messages' do + title 'Check for error messages related to munge key' + + describe file('/var/log/chef-client.log') do + its('content') { should_not match /Error fetching munge key/ } + its('content') { should_not match /Error decoding the munge key/ } + end +end unless os_properties.redhat_on_docker?