Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the way Pyxis and Enroot are configured. #2826

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
# ArmPL
default['conditions']['arm_pl_supported'] = arm_instance?

# Enroot + Pyxis
# Enroot
default['cluster']['enroot']['version'] = '3.4.1'
default['cluster']['pyxis']['version'] = '0.20.0'
default['cluster']['enroot']['temporary_dir'] = '/run/enroot'
default['cluster']['enroot']['persistent_dir'] = '/var/enroot'

# NVidia
default['cluster']['nvidia']['enabled'] = 'no'
Expand Down
3 changes: 0 additions & 3 deletions cookbooks/aws-parallelcluster-platform/recipes/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@
include_recipe 'aws-parallelcluster-platform::supervisord_config'
fetch_config 'Fetch and load cluster configs'
include_recipe 'aws-parallelcluster-platform::config_login' if node['cluster']['node_type'] == 'LoginNode'
enroot 'Configure Enroot' do
action :configure
end
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
directory node['cluster']['license_dir']
directory node['cluster']['configs_dir']
directory node['cluster']['shared_dir']
directory node['cluster']['examples_dir']
directory node['cluster']['shared_dir_login_nodes']

# Create ParallelCluster log folder
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: true
#
# Copyright:: 2013-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
Expand All @@ -16,52 +16,19 @@
default_action :setup

action :setup do
return if on_docker?
return if on_docker? || enroot_installed

action_install_package
end

action :configure do
return if on_docker?
return unless enroot_installed
enroot_examples_dir = "#{node['cluster']['examples_dir']}/enroot"

directory enroot_examples_dir

cookbook_file "/tmp/enroot.template.conf" do
source 'enroot/enroot.template.conf'
cookbook 'aws-parallelcluster-platform'
template "#{enroot_examples_dir}/enroot.conf" do
source 'enroot/enroot.conf.erb'
owner 'root'
group 'root'
mode '0755'
action :create_if_missing
end

bash "Configure enroot" do
user 'root'
code <<-ENROOT_CONFIGURE
set -e
ENROOT_CONFIG_RELEASE=pyxis
SHARED_DIR=#{node['cluster']['shared_dir']}
NONROOT_USER=#{node['cluster']['cluster_user']}
mkdir -p ${SHARED_DIR}/enroot
chown ${NONROOT_USER} ${SHARED_DIR}/enroot
ENROOT_CACHE_PATH=${SHARED_DIR}/enroot envsubst < /tmp/enroot.template.conf > /tmp/enroot.conf
mv /tmp/enroot.conf /etc/enroot/enroot.conf
chmod 0644 /etc/enroot/enroot.conf

mkdir -p /tmp/enroot
chmod 1777 /tmp/enroot
mkdir -p /tmp/enroot/data
chmod 1777 /tmp/enroot/data

chmod 1777 ${SHARED_DIR}/enroot

mkdir -p ${SHARED_DIR}/pyxis/
chown ${NONROOT_USER} ${SHARED_DIR}/pyxis/
sed -i '${s/$/ runtime_path=${SHARED_DIR}\\/pyxis/}' /opt/slurm/etc/plugstack.conf.d/pyxis.conf
SHARED_DIR=${SHARED_DIR} envsubst < /opt/slurm/etc/plugstack.conf.d/pyxis.conf > /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf
mv /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf

ENROOT_CONFIGURE
retries 3
retry_delay 5
mode '0644'
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
is_expected.to create_directory(node['cluster']['shared_dir'])
end

it 'creates examples directory' do
is_expected.to create_directory(node['cluster']['examples_dir'])
end

it 'creates log directory' do
is_expected.to create_directory(node['cluster']['log_base_dir']).with(
owner: 'root',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,9 @@ def self.setup(chef_run)
end
end
end

def self.configure(chef_run)
chef_run.converge_dsl('aws-parallelcluster-platform') do
enroot 'configure' do
action :configure
end
end
end
end

describe 'enroot:package_version' do
describe 'aws-parallelcluster-platform::enroot:package_version' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
cached(:chef_run) do
Expand All @@ -39,7 +31,34 @@ def self.configure(chef_run)
end
end

describe 'enroot:arch_suffix' do
describe 'aws-parallelcluster-platform::enroot:enroot_installed' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
binary = '/usr/bin/enroot'
[true, false].each do |binary_exist|
context "when binary #{binary} does #{'not ' unless binary_exist}exist" do
cached(:chef_run) do
allow(File).to receive(:exist?).with(binary).and_return(binary_exist)
runner = runner(platform: platform, version: version, step_into: ['enroot'])
ConvergeEnroot.setup(runner)
end

cached(:resource) do
chef_run.find_resource('enroot', 'setup')
end

expected_result = binary_exist

it "returns #{expected_result}" do
expect(resource.enroot_installed).to eq(expected_result)
end
end
end
end
end
end

describe 'aws-parallelcluster-platform::enroot:arch_suffix' do
for_all_oses do |platform, version|
context "on #{platform}#{version} - arm" do
cached(:chef_run) do
Expand Down Expand Up @@ -81,15 +100,66 @@ def self.configure(chef_run)
end
end

describe 'enroot:setup' do
describe 'aws-parallelcluster-platform::enroot:setup' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
cached(:cluster_examples_dir) { '/path/to/cluster/examples/dir' }
cached(:enroot_persistent_dir) { '/path/to/enroot/persistent/dir' }
cached(:enroot_temporary_dir) { '/path/to/enroot/temporary/dir' }

context "when enroot is already installed" do
let(:chef_run) do
stubs_for_resource('enroot') do |res|
allow(res).to receive(:enroot_installed).and_return(true)
end
runner(platform: platform, version: version, step_into: ['enroot']) do |node|
node.override['cluster']['enroot']['version'] = package_version
node.override['cluster']['examples_dir'] = cluster_examples_dir
end
end

before do
ConvergeEnroot.setup(chef_run)
end

it 'does not install Enroot' do
is_expected.not_to run_bash('Install enroot')
end

it 'does not create the Enroot configuration' do
is_expected.not_to create_template("#{cluster_examples_dir}/enroot/enroot.conf")
end
end

let(:chef_run) do
stubs_for_resource('enroot') do |res|
allow(res).to receive(:enroot_installed).and_return(false)
end
runner(platform: platform, version: version, step_into: ['enroot']) do |node|
node.override['cluster']['enroot']['version'] = package_version
node.override['cluster']['examples_dir'] = cluster_examples_dir
node.override['cluster']['enroot']['persistent_dir'] = enroot_persistent_dir
node.override['cluster']['enroot']['temporary_dir'] = enroot_temporary_dir
end
end

before do
ConvergeEnroot.setup(chef_run)
end

it 'installs Enroot' do
is_expected.not_to run_bash('Install enroot')
end

it 'creates the Enroot example configuration' do
is_expected.to create_template("#{cluster_examples_dir}/enroot/enroot.conf").with(
source: 'enroot/enroot.conf.erb',
owner: 'root',
group: 'root',
mode: '0644'
)
end

context 'when nvidia is enabled' do
before do
stubs_for_provider('enroot') do |resource|
Expand Down Expand Up @@ -128,44 +198,3 @@ def self.configure(chef_run)
end
end
end

describe 'enroot:configure' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
let(:chef_run) do
runner(platform: platform, version: version, step_into: ['enroot'])
end

context 'when enroot is installed' do
before do
stubs_for_provider('enroot') do |resource|
allow(resource).to receive(:enroot_installed).and_return(true)
end
ConvergeEnroot.configure(chef_run)
end
it 'run configure enroot script' do
is_expected.to run_bash('Configure enroot')
.with(retries: 3)
.with(retry_delay: 5)
.with(user: 'root')
end
end

context 'when enroot is not installed' do
before do
stubs_for_provider('enroot') do |resource|
allow(resource).to receive(:enroot_installed).and_return(false)
end
ConvergeEnroot.configure(chef_run)
end

it 'does not run configure enroot script' do
is_expected.not_to run_bash('Configure enroot')
.with(retries: 3)
.with(retry_delay: 5)
.with(user: 'root')
end
end
end
end
end
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ENROOT_LIBRARY_PATH /usr/lib/enroot
#ENROOT_SYSCONF_PATH /etc/enroot
ENROOT_RUNTIME_PATH /tmp/enroot/user-$(id -u)
ENROOT_CONFIG_PATH ${ENROOT_CONFIG_PATH}
ENROOT_CACHE_PATH ${ENROOT_CACHE_PATH}
ENROOT_DATA_PATH /tmp/enroot/data/user-$(id -u)
ENROOT_RUNTIME_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/runtime/user-$(id -u)
ENROOT_DATA_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/data/user-$(id -u)
ENROOT_CONFIG_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/config/user-$(id -u)
ENROOT_CACHE_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/cache/group-$(id -g)
#ENROOT_TEMP_PATH ${TMPDIR:-/tmp}

# Gzip program used to uncompress digest layers.
Expand Down Expand Up @@ -68,4 +68,4 @@ ENROOT_RESTRICT_DEV no
#all_proxy
#no_proxy
#http_proxy
#https_proxy
#https_proxy
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,26 @@

expected_enroot_version = node['cluster']['enroot']['version']

describe "gdrcopy version is expected to be #{expected_enroot_version}" do
describe "enroot version is expected to be #{expected_enroot_version}" do
subject { command('enroot version').stdout.strip() }
it { should eq expected_enroot_version }
end

persistent_dirs = %w(/etc/enroot)
persistent_dirs.each do |path|
describe directory(path) do
it { should exist }
its('owner') { should eq 'root' }
its('group') { should eq 'root' }
its('mode') { should cmp '0755' }
end
end
end

control 'tag:config_enroot_enabled_on_graphic_instances' do
only_if { !os_properties.on_docker? && ['yes', true].include?(node['cluster']['nvidia']['enabled']) }

describe file("/opt/parallelcluster/shared/enroot") do
describe file("/var/enroot/cache-group-1000") do
it { should exist }
its('group') { should eq 'root' }
end unless os_properties.redhat_on_docker?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
default['cluster']['license_dir'] = "#{node['cluster']['base_dir']}/licenses"
default['cluster']['configs_dir'] = "#{node['cluster']['base_dir']}/configs"
default['cluster']['shared_dir'] = "#{node['cluster']['base_dir']}/shared"
default['cluster']['examples_dir'] = "#{node['cluster']['base_dir']}/examples"
default['cluster']['shared_dir_login_nodes'] = "#{node['cluster']['base_dir']}/shared_login_nodes"
default['cluster']['log_base_dir'] = '/var/log/parallelcluster'
default['cluster']['etc_dir'] = '/etc/parallelcluster'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@

# Slurmdbd
default['cluster']['slurmdbd_service_enabled'] = "true"

# Spank
default['cluster']['slurm']['spank_config_dir'] = "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d"

# Pyxis
default['cluster']['pyxis']['version'] = '0.20.0'
default['cluster']['pyxis']['runtime_path'] = '/run/pyxis'
17 changes: 17 additions & 0 deletions cookbooks/aws-parallelcluster-slurm/libraries/pyxis.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# frozen_string_literal: true

# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file.
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
# See the License for the specific language governing permissions and limitations under the License.

def pyxis_installed?
::Dir.exist?('/usr/local/share/pyxis')
end
Loading
Loading