Skip to content

Commit

Permalink
Merge branch 'develop' into wip/munge-key-rotation
Browse files Browse the repository at this point in the history
  • Loading branch information
hehe7318 authored Sep 13, 2023
2 parents 332047f + c2709ce commit f52c5d0
Show file tree
Hide file tree
Showing 14 changed files with 508 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def log_exception(
):
def decorator_log_exception(function):
@functools.wraps(function)
def wrapper_log_expection(*args, **kwargs): # pylint: disable=R1710
def wrapper_log_exception(*args, **kwargs): # pylint: disable=R1710
try:
return function(*args, **kwargs)
except catch_exception as e:
Expand All @@ -77,8 +77,9 @@ def wrapper_log_expection(*args, **kwargs): # pylint: disable=R1710
if exception_to_raise:
raise exception_to_raise
raise
return None

return wrapper_log_expection
return wrapper_log_exception

return decorator_log_exception

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def remove_backup():
try:
os.remove(LOG_CONFIGS_BAK_PATH)
except FileNotFoundError:
# No need to remove the file, as the file isn't found anyway
pass


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,7 @@ def validate_device_name(device_name):
return True


def main():
syslog.syslog("Starting ec2_dev_2_volid.py script")
# Get dev
try:
dev = str(sys.argv[1])
validate_device_name(dev)
syslog.syslog(f"Input block device is {dev}")
except IndexError:
syslog.syslog(syslog.LOG_ERR, "Provide block device i.e. xvdf")

# Convert dev to mapping format
def adapt_device_name(dev):
if "nvme" in dev:
# For newer instances which expose EBS volumes as NVMe devices, translate the
# device name so boto can discover it.
Expand All @@ -70,45 +60,21 @@ def main():
else:
dev = dev.replace("xvd", "sd")
dev = "/dev/" + dev
return dev

# Get IMDSv2 token
token = get_imdsv2_token()

# Get instance ID
instance_id = requests.get(
"http://169.254.169.254/latest/meta-data/instance-id",
headers=token,
timeout=METADATA_REQUEST_TIMEOUT,
).text

# Get region
region = requests.get(
"http://169.254.169.254/latest/meta-data/placement/availability-zone",
headers=token,
timeout=METADATA_REQUEST_TIMEOUT,
).text
region = region[:-1]

# Parse configuration file to read proxy settings
def parse_proxy_config():
config = configparser.RawConfigParser()
config.read("/etc/boto.cfg")
proxy_config = Config()
if config.has_option("Boto", "proxy") and config.has_option("Boto", "proxy_port"):
proxy = config.get("Boto", "proxy")
proxy_port = config.get("Boto", "proxy_port")
proxy_config = Config(proxies={"https": f"{proxy}:{proxy_port}"})
return proxy_config

# Configure the AWS CA bundle.
# In US isolated regions the dedicated CA bundle will be used.
# In any other region, the default bundle will be used (None stands for the default settings).
# Note: We want to apply a more general solution that applies to every region,
# but for the time being this is enough to support US isolated regions without
# impacting the other ones.
ca_bundle = f"/etc/pki/{region}/certs/ca-bundle.pem" if region.startswith("us-iso") else None

# Connect to AWS using boto
ec2 = boto3.client("ec2", region_name=region, config=proxy_config, verify=ca_bundle)

def get_device_volume_id(ec2, dev, instance_id):
# Poll for blockdevicemapping
devices = ec2.describe_instance_attribute(InstanceId=instance_id, Attribute="blockDeviceMapping").get(
"BlockDeviceMappings"
Expand All @@ -127,8 +93,48 @@ def main():
dev_map = dict((d.get("DeviceName"), d) for d in devices)
loop_count += 1

# Return volume ID
volume_id = dev_map.get(dev).get("Ebs").get("VolumeId")
return dev_map.get(dev).get("Ebs").get("VolumeId")


def get_metadata_value(token, metadata_path):
return requests.get(
metadata_path,
headers=token,
timeout=METADATA_REQUEST_TIMEOUT,
).text


def main():
syslog.syslog("Starting ec2_dev_2_volid.py script")
try:
dev = str(sys.argv[1])
validate_device_name(dev)
syslog.syslog(f"Input block device is {dev}")
except IndexError:
syslog.syslog(syslog.LOG_ERR, "Provide block device i.e. xvdf")

dev = adapt_device_name(dev)

token = get_imdsv2_token()

instance_id = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/instance-id")

region = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/placement/availability-zone")
region = region[:-1]

proxy_config = parse_proxy_config()

# Configure the AWS CA bundle.
# In US isolated regions the dedicated CA bundle will be used.
# In any other region, the default bundle will be used (None stands for the default settings).
# Note: We want to apply a more general solution that applies to every region,
# but for the time being this is enough to support US isolated regions without
# impacting the other ones.
ca_bundle = f"/etc/pki/{region}/certs/ca-bundle.pem" if region.startswith("us-iso") else None

ec2 = boto3.client("ec2", region_name=region, config=proxy_config, verify=ca_bundle)

volume_id = get_device_volume_id(ec2, dev, instance_id)
print(volume_id)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def validate_device_name(device_name):
return True


def convert_dev(dev):
def adapt_device_name(dev):
# Translate the device name as provided by the OS to the one used by EC2
# FIXME This approach could be broken in some OS variants, see # pylint: disable=fixme
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html#identify-nvme-ebs-device
Expand Down Expand Up @@ -91,7 +91,7 @@ def get_imdsv2_token():

def attach_volume(volume_id, instance_id, ec2):
# Generate a list of system paths minus the root path
paths = [convert_dev(device) for device in get_all_devices()]
paths = [adapt_device_name(device) for device in get_all_devices()]

# List of possible block devices
block_devices = [
Expand Down Expand Up @@ -175,7 +175,6 @@ def detach_volume(volume_id, ec2):


def parse_proxy_config():
"""Parse configuration file to read proxy settings."""
config = configparser.RawConfigParser()
config.read("/etc/boto.cfg")
proxy_config = Config()
Expand All @@ -186,38 +185,33 @@ def parse_proxy_config():
return proxy_config


def handle_volume(volume_id, attach, detach):
# Get IMDSv2 token
token = get_imdsv2_token()

# Get instance ID
instance_id = requests.get(
"http://169.254.169.254/latest/meta-data/instance-id",
def get_metadata_value(token, metadata_path):
return requests.get(
metadata_path,
headers=token,
timeout=METADATA_REQUEST_TIMEOUT,
).text

# Get region
region = requests.get(
"http://169.254.169.254/latest/meta-data/placement/availability-zone",
headers=token,
timeout=METADATA_REQUEST_TIMEOUT,
).text

def handle_volume(volume_id, attach, detach):
token = get_imdsv2_token()

instance_id = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/instance-id")

region = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/placement/availability-zone")
region = region[:-1]

# Parse configuration file to read proxy settings
proxy_config = parse_proxy_config()

# Connect to AWS using boto
ec2 = boto3.client("ec2", region_name=region, config=proxy_config)

if attach and is_volume_avaialble(ec2, volume_id):
if attach and is_volume_available(ec2, volume_id):
attach_volume(volume_id, instance_id, ec2)
elif detach and is_volume_attached(ec2, volume_id):
detach_volume(volume_id, ec2)


def is_volume_avaialble(ec2, volume_id):
def is_volume_available(ec2, volume_id):
try:
state = ec2.describe_volumes(VolumeIds=[volume_id]).get("Volumes")[0].get("State")
if state == "available":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@

describe file('/usr/local/bin/cloudwatch_agent_config_util.py') do
it { should exist }
its('sha256sum') { should eq '980b0ba6e5922fe2983d3e866ac970622f59a26a4829b8262466739582176525' }
its('sha256sum') { should eq 'b816b4891a5e8f1e7ac94616db7927f7955ba72a8f53ec1b320402a2ac9c9b7f' }
its('owner') { should eq 'root' }
its('group') { should eq 'root' }
its('mode') { should cmp '0644' }
Expand Down
15 changes: 15 additions & 0 deletions cookbooks/aws-parallelcluster-slurm/kitchen.slurm-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,18 @@ suites:
cluster:
node_type: ComputeFleet
head_node_private_ip: '127.0.0.1'
- name: config_head_node_munge
run_list:
- recipe[aws-parallelcluster-tests::setup]
- recipe[aws-parallelcluster-slurm::config_munge_key]
verifier:
controls:
- /tag:config_munge/
attributes:
cluster:
node_type: HeadNode
scheduler: 'slurm'
config:
DevSettings:
SlurmSettings:
MungeKeySecretArn: null
18 changes: 5 additions & 13 deletions cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,11 @@ def enable_munge_service
end

def setup_munge_head_node
# Generate munge key
bash 'generate_munge_key' do
not_if { ::File.exist?('/etc/munge/munge.key') }
user node['cluster']['munge']['user']
group node['cluster']['munge']['group']
cwd '/tmp'
code <<-HEAD_CREATE_MUNGE_KEY
set -e
# Generates munge key in /etc/munge/munge.key
/usr/sbin/mungekey --verbose
# Enforce correct permission on the key
chmod 0600 /etc/munge/munge.key
HEAD_CREATE_MUNGE_KEY
# Generate munge key or get it's value from secrets manager
munge_key_manager 'manage_munge_key' do
munge_key_secret_arn lazy {
node['cluster']['config'].dig(:DevSettings, :SlurmSettings, :MungeKeySecretArn)
}
end

enable_munge_service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.

setup_munge_head_node unless redhat_on_docker?
include_recipe 'aws-parallelcluster-slurm::config_munge_key'

# Export /opt/slurm
nfs_export "#{node['cluster']['slurm']['install_dir']}" do
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

#
# Cookbook:: aws-parallelcluster-slurm
# Recipe:: config_munge_key
#
# Copyright:: 2013-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
# License. A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.

setup_munge_head_node unless redhat_on_docker?
72 changes: 72 additions & 0 deletions cookbooks/aws-parallelcluster-slurm/resources/munge_key_manager.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# frozen_string_literal: true

#
# Cookbook:: aws-parallelcluster-slurm
# Recipe:: config_head_node
#
# Copyright:: 2013-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
# License. A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.

resource_name :munge_key_manager
provides :munge_key_manager
unified_mode true

property :munge_key_secret_arn, String

default_action :setup_munge_key

action :setup_munge_key do
if new_resource.munge_key_secret_arn
# This block will fetch the munge key from Secrets Manager
bash 'fetch_and_decode_munge_key' do
user 'root'
group 'root'
cwd '/tmp'
code <<-FETCH_AND_DECODE
set -e
# Get encoded munge key from secrets manager
encoded_key=$(aws secretsmanager get-secret-value --secret-id #{new_resource.munge_key_secret_arn} --query 'SecretString' --output text --region #{node['cluster']['region']})
# If encoded_key doesn't have a value, error and exit
if [ -z "$encoded_key" ]; then
echo "Error fetching munge key from Secrets Manager or the key is empty"
exit 1
fi
# Decode munge key and write to /etc/munge/munge.key
decoded_key=$(echo $encoded_key | base64 -d)
if [ $? -ne 0 ]; then
echo "Error decoding the munge key with base64"
exit 1
fi
echo "$decoded_key" > /etc/munge/munge.key
# Set ownership on the key
chown #{node['cluster']['munge']['user']}:#{node['cluster']['munge']['group']} /etc/munge/munge.key
# Enforce correct permission on the key
chmod 0600 /etc/munge/munge.key
FETCH_AND_DECODE
end
else
# This block will generate a munge key if it doesn't exist
bash 'generate_munge_key' do
not_if { ::File.exist?('/etc/munge/munge.key') }
user node['cluster']['munge']['user']
group node['cluster']['munge']['group']
cwd '/tmp'
code <<-GENERATE_KEY
set -e
/usr/sbin/mungekey --verbose
chmod 0600 /etc/munge/munge.key
GENERATE_KEY
end
end
end
Loading

0 comments on commit f52c5d0

Please sign in to comment.