Skip to content

Commit

Permalink
[develop] Share the munge key from the Parallel Cluster shared folder (
Browse files Browse the repository at this point in the history
…aws#2467)

* Change the shared munge key to the shared directory

* Share munge key also to LoginNodes

* Correct config login and remove unused function in rotation script

* Address comments.

* Rename SHARED_DIRECTORY to SHARED_DIRECTORY_COMPUTE. Change SlurmSettings to MungeKeySettings to adopt new change in cli
  • Loading branch information
hehe7318 authored and hgreebe committed Nov 13, 2023
1 parent f457c7b commit 462e43c
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -189,5 +189,5 @@ suites:
scheduler: 'slurm'
config:
DevSettings:
SlurmSettings:
MungeKeySettings:
MungeKeySecretArn: null
39 changes: 26 additions & 13 deletions cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def setup_munge_head_node
# Generate munge key or get it's value from secrets manager
munge_key_manager 'manage_munge_key' do
munge_key_secret_arn lazy {
node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn)
node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn)
}
end

Expand All @@ -88,7 +88,7 @@ def setup_munge_head_node

def update_munge_head_node
munge_key_manager 'update_munge_key' do
munge_key_secret_arn lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) }
munge_key_secret_arn lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) }
action :update_munge_key
only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? }
end
Expand All @@ -97,38 +97,51 @@ def update_munge_head_node
share_munge_head_node
end

def share_munge_head_node
# Share munge key
def share_munge_key_to_dir(shared_dir)
bash 'share_munge_key' do
user 'root'
group 'root'
code <<-HEAD_SHARE_MUNGE_KEY
code <<-SHARE_MUNGE_KEY
set -e
mkdir -p /home/#{node['cluster']['cluster_user']}/.munge
mkdir -p #{shared_dir}/.munge
# Copy key to shared dir
cp /etc/munge/munge.key /home/#{node['cluster']['cluster_user']}/.munge/.munge.key
HEAD_SHARE_MUNGE_KEY
cp /etc/munge/munge.key #{shared_dir}/.munge/.munge.key
chmod 0700 #{shared_dir}/.munge
chmod 0600 #{shared_dir}/.munge/.munge.key
SHARE_MUNGE_KEY
end
end

def setup_munge_compute_node
# Get munge key
def share_munge_head_node
share_munge_key_to_dir(node['cluster']['shared_dir'])
share_munge_key_to_dir(node['cluster']['shared_dir_login'])
end

def setup_munge_key(shared_dir)
bash 'get_munge_key' do
user 'root'
group 'root'
code <<-COMPUTE_MUNGE_KEY
code <<-MUNGE_KEY
set -e
# Copy munge key from shared dir
cp /home/#{node['cluster']['cluster_user']}/.munge/.munge.key /etc/munge/munge.key
cp #{shared_dir}/.munge/.munge.key /etc/munge/munge.key
# Set ownership on the key
chown #{node['cluster']['munge']['user']}:#{node['cluster']['munge']['group']} /etc/munge/munge.key
# Enforce correct permission on the key
chmod 0600 /etc/munge/munge.key
COMPUTE_MUNGE_KEY
MUNGE_KEY
retries 5
retry_delay 10
end
end

def setup_munge_compute_node
setup_munge_key(node['cluster']['shared_dir'])
enable_munge_service
end

def setup_munge_login_node
setup_munge_key(node['cluster']['shared_dir_login'])
enable_munge_service
end

Expand Down
4 changes: 2 additions & 2 deletions cookbooks/aws-parallelcluster-slurm/libraries/update.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def execute_command(command, user = "root", timeout = 300, raise_on_error = true
cmd.stdout.strip
end

# Verify if MungeKeySecretArn in SlurmSettings section of cluster configuration has been updated
# Verify if MungeKeySecretArn in MungeKeySettings section of cluster configuration has been updated
def is_custom_munge_key_updated?
config_parameter_changed?(%w(DevSettings SlurmSettings MungeKeySecretArn))
config_parameter_changed?(%w(DevSettings MungeKeySettings MungeKeySecretArn))
end
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,11 @@
group 'root'
mode '0700'
variables(
munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) },
munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) },
region: node['cluster']['region'],
munge_user: node['cluster']['munge']['user'],
munge_group: node['cluster']['munge']['group'],
cluster_user: node['cluster']['cluster_user']
shared_directory_compute: node['cluster']['shared_dir'],
shared_directory_login: node['cluster']['shared_dir_login']
)
end
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
# limitations under the License.

# TODO: rename, find a better name that include login nodes
setup_munge_compute_node
setup_munge_login_node

include_recipe 'aws-parallelcluster-slurm::mount_slurm_dir'
19 changes: 11 additions & 8 deletions cookbooks/aws-parallelcluster-slurm/recipes/test/mock_munge_key.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
munge_user_dir = "/home/#{node['cluster']['cluster_user']}/.munge"
directory munge_user_dir do
mode '1777'
end
munge_dirs = %W(#{node['cluster']['shared_dir']}/.munge #{node['cluster']['shared_dir_login']}/.munge)

munge_dirs.each do |munge_dir|
directory munge_dir do
mode '0700'
end

file "#{munge_user_dir}/.munge.key" do
content 'munge-key'
owner node['cluster']['munge']['user']
group node['cluster']['munge']['group']
file "#{munge_dir}/.munge.key" do
content 'munge-key'
owner node['cluster']['munge']['user']
group node['cluster']['munge']['group']
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,12 @@ def update_nodes_in_queue(strategy, queues)
group 'root'
mode '0700'
variables(
munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn) },
munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) },
region: node['cluster']['region'],
munge_user: node['cluster']['munge']['user'],
munge_group: node['cluster']['munge']['group'],
cluster_user: node['cluster']['cluster_user']
shared_directory_compute: node['cluster']['shared_dir'],
shared_directory_login: node['cluster']['shared_dir_login']
)
only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? }
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ SECRET_ARN="<%= @munge_key_secret_arn %>"
REGION="<%= @region %>"
MUNGE_USER="<%= @munge_user %>"
MUNGE_GROUP="<%= @munge_group %>"
CLUSTER_USER="<%= @cluster_user %>"
SHARED_DIRECTORY_COMPUTE="<%= @shared_directory_compute %>"
SHARED_DIRECTORY_LOGIN="<%= @shared_directory_login %>"

# Check compute fleet status
compute_fleet_status=$(get-compute-fleet-status.sh)
Expand Down Expand Up @@ -75,9 +76,16 @@ else
fi

# Share munge key
echo "Sharing munge key"
mkdir -p /home/${CLUSTER_USER}/.munge
cp /etc/munge/munge.key /home/${CLUSTER_USER}/.munge/.munge.key
SHARED_DIRECTORIES=(${SHARED_DIRECTORY_COMPUTE} ${SHARED_DIRECTORY_LOGIN})

for dir in "${SHARED_DIRECTORIES[@]}"; do
echo "Sharing munge key to $dir"
mkdir -p "$dir/.munge"
cp /etc/munge/munge.key "$dir/.munge/.munge.key"
chmod 0700 "$dir/.munge"
chmod 0600 "$dir/.munge/.munge.key"
done

echo "Shared munge key"

exit 0
2 changes: 1 addition & 1 deletion kitchen.validate-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ _head_node_cluster_attributes: &_head_node_cluster_attributes
slurm_ddb_table: <%= ENV['DDB_TABLE'] %>
config:
DevSettings:
SlurmSettings:
MungeKeySettings:
MungeKeySecretArn: null

_compute_node_cluster_attributes: &_compute_node_cluster_attributes
Expand Down

0 comments on commit 462e43c

Please sign in to comment.