Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Configure Enroot and Pyxis only on HeadNode #2816

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions cookbooks/aws-parallelcluster-platform/recipes/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@
include_recipe 'aws-parallelcluster-platform::supervisord_config'
fetch_config 'Fetch and load cluster configs'
include_recipe 'aws-parallelcluster-platform::config_login' if node['cluster']['node_type'] == 'LoginNode'
enroot 'Configure Enroot' do
action :configure
end
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,38 @@
action :setup do
return if on_docker?
action_install_package
end

action :configure do
return if on_docker?
return unless enroot_installed

cookbook_file "/tmp/enroot.template.conf" do
source 'enroot/enroot.template.conf'
cookbook 'aws-parallelcluster-platform'
directory node['cluster']['enroot_dir'] do
owner 'root'
group 'root'
mode '0755'
action :create_if_missing
mode '1777'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be 0755?

action :create
end

bash "Configure enroot" do
user 'root'
code <<-ENROOT_CONFIGURE
set -e
ENROOT_CONFIG_RELEASE=pyxis
SHARED_DIR=#{node['cluster']['shared_dir']}
NONROOT_USER=#{node['cluster']['cluster_user']}
mkdir -p ${SHARED_DIR}/enroot
chown ${NONROOT_USER} ${SHARED_DIR}/enroot
ENROOT_CACHE_PATH=${SHARED_DIR}/enroot envsubst < /tmp/enroot.template.conf > /tmp/enroot.conf
mv /tmp/enroot.conf /etc/enroot/enroot.conf
chmod 0644 /etc/enroot/enroot.conf

mkdir -p /tmp/enroot
chmod 1777 /tmp/enroot
mkdir -p /tmp/enroot/data
chmod 1777 /tmp/enroot/data
directory node['cluster']['enroot_cache_path'] do
owner 'root'
group 'root'
mode '1777'
action :create
end

chmod 1777 ${SHARED_DIR}/enroot
directory "/run/enroot" do
mode '1777'
action :create
end

mkdir -p ${SHARED_DIR}/pyxis/
chown ${NONROOT_USER} ${SHARED_DIR}/pyxis/
sed -i '${s/$/ runtime_path=${SHARED_DIR}\\/pyxis/}' /opt/slurm/etc/plugstack.conf.d/pyxis.conf
SHARED_DIR=${SHARED_DIR} envsubst < /opt/slurm/etc/plugstack.conf.d/pyxis.conf > /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf
mv /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf
directory "/run/enroot/data" do
mode '1777'
action :create
end

ENROOT_CONFIGURE
retries 3
retry_delay 5
template "/etc/enroot/enroot.conf" do
source 'enroot/enroot.conf.erb'
cookbook 'aws-parallelcluster-platform'
owner 'root'
group 'root'
mode '0644'
action :create
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,6 @@ def self.setup(chef_run)
end
end
end

def self.configure(chef_run)
chef_run.converge_dsl('aws-parallelcluster-platform') do
enroot 'configure' do
action :configure
end
end
end
end

describe 'enroot:package_version' do
Expand Down Expand Up @@ -128,44 +120,3 @@ def self.configure(chef_run)
end
end
end

describe 'enroot:configure' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
let(:chef_run) do
runner(platform: platform, version: version, step_into: ['enroot'])
end

context 'when enroot is installed' do
before do
stubs_for_provider('enroot') do |resource|
allow(resource).to receive(:enroot_installed).and_return(true)
end
ConvergeEnroot.configure(chef_run)
end
it 'run configure enroot script' do
is_expected.to run_bash('Configure enroot')
.with(retries: 3)
.with(retry_delay: 5)
.with(user: 'root')
end
end

context 'when enroot is not installed' do
before do
stubs_for_provider('enroot') do |resource|
allow(resource).to receive(:enroot_installed).and_return(false)
end
ConvergeEnroot.configure(chef_run)
end

it 'does not run configure enroot script' do
is_expected.not_to run_bash('Configure enroot')
.with(retries: 3)
.with(retry_delay: 5)
.with(user: 'root')
end
end
end
end
end
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ENROOT_LIBRARY_PATH /usr/lib/enroot
#ENROOT_SYSCONF_PATH /etc/enroot
ENROOT_RUNTIME_PATH /tmp/enroot/user-$(id -u)
ENROOT_CONFIG_PATH ${ENROOT_CONFIG_PATH}
ENROOT_CACHE_PATH ${ENROOT_CACHE_PATH}
ENROOT_DATA_PATH /tmp/enroot/data/user-$(id -u)
ENROOT_RUNTIME_PATH /run/enroot/user-$(id -u)
ENROOT_CONFIG_PATH
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should create an enroot config path. Like /home/user-$(id -u)/.config/enroot

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our users could share the /home folder and we would end up in the original situation where these paths expected to be local became shared. I suggest to avoid the use of /home for these paths. In Pyxis/Enroot docs/Example sometimes they mention /home because they assume it's a local folder.

ENROOT_CACHE_PATH <%= node['cluster']['enroot_cache_path'] %>
ENROOT_DATA_PATH /run/enroot/data/user-$(id -u)
#ENROOT_TEMP_PATH ${TMPDIR:-/tmp}

# Gzip program used to uncompress digest layers.
Expand Down Expand Up @@ -68,4 +68,4 @@ ENROOT_RESTRICT_DEV no
#all_proxy
#no_proxy
#http_proxy
#https_proxy
#https_proxy
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# THIS IS AN EXAMPLE OF pyxis.conf file
# When you want to enable please move this to /opt/slurm/etc/plugstack.conf.d/
required /usr/local/lib/slurm/spank_pyxis.so runtime_path=<%= @pyxis_persistent_runtime_path %>
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,37 @@

expected_enroot_version = node['cluster']['enroot']['version']

describe "gdrcopy version is expected to be #{expected_enroot_version}" do
describe "enroot version is expected to be #{expected_enroot_version}" do
subject { command('enroot version').stdout.strip() }
it { should eq expected_enroot_version }
end

base_dir1 = "/etc/enroot"
etc_dirs = [ base_dir1, "#{base_dir1}/enroot-cache"]

etc_dirs.each do |path|
describe directory(path) do
it { should exist }
its('mode') { should cmp '01777' }
its('owner') { should eq 'root' }
its('group') { should eq 'root' }
end
end

base_dir2 = "/run/enroot"
tmp_dirs = [ base_dir2, "#{base_dir2}/data" ]
tmp_dirs.each do |path|
describe directory(path) do
it { should exist }
its('mode') { should cmp '01777' }
end
end
end

control 'tag:config_enroot_enabled_on_graphic_instances' do
only_if { !os_properties.on_docker? && ['yes', true].include?(node['cluster']['nvidia']['enabled']) }

describe file("/opt/parallelcluster/shared/enroot") do
describe file("/etc/enroot/enroot-cache") do
it { should exist }
its('group') { should eq 'root' }
end unless os_properties.redhat_on_docker?
Expand Down
5 changes: 5 additions & 0 deletions cookbooks/aws-parallelcluster-shared/attributes/cluster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@

# Default NFS mount options
default['cluster']['nfs']['hard_mount_options'] = 'hard,_netdev,noatime'

# Pyxis+Enroot Exmaple Config files
default['cluster']['config_examples_dir'] = "#{node['cluster']['configs_dir']}/examples"
default['cluster']['enroot_dir'] = "/etc/enroot"
default['cluster']['enroot_cache_path'] = "#{node['cluster']['enroot_dir']}/enroot-cache"
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,51 @@
cd /tmp/pyxis-#{pyxis_version}
CPPFLAGS='-I /opt/slurm/include/' make
CPPFLAGS='-I /opt/slurm/include/' make install
mkdir -p /opt/slurm/etc/plugstack.conf.d
echo -e 'include /opt/slurm/etc/plugstack.conf.d/*' | tee /opt/slurm/etc/plugstack.conf
ln -fs /usr/local/share/pyxis/pyxis.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf
PYXIS_INSTALL
retries 3
retry_delay 5
end

directory "#{node['cluster']['slurm']['install_dir']}/etc" do
user 'root'
group 'root'
mode '0755'
end

directory "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d"

directory node['cluster']['config_examples_dir']

directory "#{node['cluster']['config_examples_dir']}/spank"

directory "#{node['cluster']['config_examples_dir']}/pyxis"

directory "/run/pyxis" do
owner node['cluster']['cluster_user']
# group node['cluster']['cluster_user']
mode '1777'
action :create
end

template "#{node['cluster']['config_examples_dir']}/spank/plugstack.conf" do
source 'pyxis/plugstack.conf.erb'
cookbook 'aws-parallelcluster-slurm'
owner 'root'
group 'root'
mode '0644'
end

link '/usr/local/share/pyxis/pyxis.conf' do
to "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d/pyxis.conf"
end

template "#{node['cluster']['config_examples_dir']}/pyxis/pyxis.conf" do
source 'pyxis/pyxis.conf.erb'
cookbook 'aws-parallelcluster-platform'
owner 'root'
group 'root'
mode '0644'
variables(
pyxis_persistent_runtime_path: "/run/pyxis"
)
end
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include <%= node['cluster']['slurm']['install_dir'] %>/etc/plugstack.conf.d/*
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,37 @@

title 'Checks Pyxis has been installed'

describe file("/opt/slurm/etc/plugstack.conf.d/pyxis.conf") do
describe directory('/opt/slurm/etc') do
it { should exist }
its('mode') { should cmp '0755' }
its('owner') { should eq 'root' }
its('group') { should eq 'root' }
end

base_dir = "/opt/parallelcluster/configs/examples"
dirs = [ base_dir, "#{base_dir}/spank", "#{base_dir}/pyxis" ]
dirs.each do |path|
describe directory(path) do
it { should exist }
end
end

describe directory('/run/pyxis') do
it { should exist }
its('owner') { should eq "#{node['cluster']['cluster_user']}" }
end

describe directory('/opt/slurm/etc/plugstack.conf.d') do
it { should exist }
its('owner') { should eq 'root' }
its('group') { should eq 'root' }
end

describe file("#{base_dir}/pyxis/pyxis.conf") do
it { should exist }
end

describe file("#{base_dir}/spank/plugstack.conf") do
it { should exist }
end
end
Loading