From db107cf590bae80ef82aedcc25ece8512e059de9 Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Mon, 14 Oct 2024 23:19:27 +0200 Subject: [PATCH] Fix the way Pyxis and Enroot are configured. 1. Pyxis is disabled by default. In particular, the Enroot, SPANK and Pyxis config files required to enable it are stored in `/opt/parallelcluster/examples` folder so that they are ineffective but can be used by the user to enable Pyxis by simply moving them to the expected location. 2. Moved Pyxis and Enroot configuration to build time (there was no reason to configure Pyxis and Enroot at runtime) 3. Skip Enroot installation if Enroot is already installed. 4. Skip Pyxis installation if Pyxis is already installed. 5. The sample configurations provided for Pyxis uses runtime path to `/run/pyxis`. As per [documentation](https://github.com/NVIDIA/pyxis/wiki/Setup#slurm-plugstack-configuration) a tmpfs should be used. 6. The sample configuration provided for Enroot uses the following paths, as suggested in [documentation](https://github.com/NVIDIA/pyxis/wiki/Setup#enroot-configuration-example) 1. Using tmpfs storage for `ENROOT_RUNTIME_PATH` and `ENROOT_DATA_PATH` 2. Using a persistent local storage for `ENROOT_CACHE_PATH` and `ENROOT_CONFIG_PATH`. 7. We do not create any directory used in the Pyxis or Enroot sample configuration. The user is supposed to create the desired directories. 8. *Minor*: Moved Pyxis attributes from platform cookbook to slurm cookbook because Pyxis is a SLURM plugin so it would be conceptually wrong to have its attributes defined in platform cookbook. 9. Added missing unit tests. Signed-off-by: Giacomo Marciani --- .../attributes/platform.rb | 5 +- .../recipes/config.rb | 3 - .../recipes/install/directories.rb | 1 + .../enroot/partial/_enroot_common.rb | 51 ++----- .../spec/unit/recipes/directories_spec.rb | 4 + .../spec/unit/resources/enroot_spec.rb | 133 +++++++++++------- .../enroot/enroot.conf.erb} | 10 +- .../test/controls/enroot_spec.rb | 24 +++- .../attributes/cluster.rb | 1 + .../attributes/slurm_attributes.rb | 7 + .../libraries/pyxis.rb | 17 +++ .../recipes/install/install_pyxis.rb | 35 ++++- .../spec/unit/libraries/pyxis_spec.rb | 27 ++++ .../spec/unit/recipes/install_pyxis_spec.rb | 107 ++++++++++++++ .../default/pyxis/plugstack.conf.erb | 3 + .../templates/default/pyxis/pyxis.conf.erb | 3 + .../test/controls/pyxis_spec.rb | 21 ++- 17 files changed, 339 insertions(+), 113 deletions(-) rename cookbooks/aws-parallelcluster-platform/{files/enroot/enroot.template.conf => templates/enroot/enroot.conf.erb} (82%) create mode 100644 cookbooks/aws-parallelcluster-slurm/libraries/pyxis.rb create mode 100644 cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/pyxis_spec.rb create mode 100644 cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/install_pyxis_spec.rb create mode 100644 cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/plugstack.conf.erb create mode 100644 cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/pyxis.conf.erb diff --git a/cookbooks/aws-parallelcluster-platform/attributes/platform.rb b/cookbooks/aws-parallelcluster-platform/attributes/platform.rb index e9190067fe..e2c0f8687b 100644 --- a/cookbooks/aws-parallelcluster-platform/attributes/platform.rb +++ b/cookbooks/aws-parallelcluster-platform/attributes/platform.rb @@ -9,9 +9,10 @@ # ArmPL default['conditions']['arm_pl_supported'] = arm_instance? -# Enroot + Pyxis +# Enroot default['cluster']['enroot']['version'] = '3.4.1' -default['cluster']['pyxis']['version'] = '0.20.0' +default['cluster']['enroot']['temporary_dir'] = '/run/enroot' +default['cluster']['enroot']['persistent_dir'] = '/var/enroot' # NVidia default['cluster']['nvidia']['enabled'] = 'no' diff --git a/cookbooks/aws-parallelcluster-platform/recipes/config.rb b/cookbooks/aws-parallelcluster-platform/recipes/config.rb index af2a2c7049..153518a128 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/config.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/config.rb @@ -26,6 +26,3 @@ include_recipe 'aws-parallelcluster-platform::supervisord_config' fetch_config 'Fetch and load cluster configs' include_recipe 'aws-parallelcluster-platform::config_login' if node['cluster']['node_type'] == 'LoginNode' -enroot 'Configure Enroot' do - action :configure -end diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/directories.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/directories.rb index e091648686..53096a8b48 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/directories.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/directories.rb @@ -21,6 +21,7 @@ directory node['cluster']['license_dir'] directory node['cluster']['configs_dir'] directory node['cluster']['shared_dir'] +directory node['cluster']['examples_dir'] directory node['cluster']['shared_dir_login_nodes'] # Create ParallelCluster log folder diff --git a/cookbooks/aws-parallelcluster-platform/resources/enroot/partial/_enroot_common.rb b/cookbooks/aws-parallelcluster-platform/resources/enroot/partial/_enroot_common.rb index 54a71a03de..2f2a4979c2 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/enroot/partial/_enroot_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/enroot/partial/_enroot_common.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true # -# Copyright:: 2013-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). # You may not use this file except in compliance with the License. @@ -16,52 +16,19 @@ default_action :setup action :setup do - return if on_docker? + return if on_docker? || enroot_installed + action_install_package -end -action :configure do - return if on_docker? - return unless enroot_installed + enroot_examples_dir = "#{node['cluster']['examples_dir']}/enroot" + + directory enroot_examples_dir - cookbook_file "/tmp/enroot.template.conf" do - source 'enroot/enroot.template.conf' - cookbook 'aws-parallelcluster-platform' + template "#{enroot_examples_dir}/enroot.conf" do + source 'enroot/enroot.conf.erb' owner 'root' group 'root' - mode '0755' - action :create_if_missing - end - - bash "Configure enroot" do - user 'root' - code <<-ENROOT_CONFIGURE - set -e - ENROOT_CONFIG_RELEASE=pyxis - SHARED_DIR=#{node['cluster']['shared_dir']} - NONROOT_USER=#{node['cluster']['cluster_user']} - mkdir -p ${SHARED_DIR}/enroot - chown ${NONROOT_USER} ${SHARED_DIR}/enroot - ENROOT_CACHE_PATH=${SHARED_DIR}/enroot envsubst < /tmp/enroot.template.conf > /tmp/enroot.conf - mv /tmp/enroot.conf /etc/enroot/enroot.conf - chmod 0644 /etc/enroot/enroot.conf - - mkdir -p /tmp/enroot - chmod 1777 /tmp/enroot - mkdir -p /tmp/enroot/data - chmod 1777 /tmp/enroot/data - - chmod 1777 ${SHARED_DIR}/enroot - - mkdir -p ${SHARED_DIR}/pyxis/ - chown ${NONROOT_USER} ${SHARED_DIR}/pyxis/ - sed -i '${s/$/ runtime_path=${SHARED_DIR}\\/pyxis/}' /opt/slurm/etc/plugstack.conf.d/pyxis.conf - SHARED_DIR=${SHARED_DIR} envsubst < /opt/slurm/etc/plugstack.conf.d/pyxis.conf > /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf - mv /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf - - ENROOT_CONFIGURE - retries 3 - retry_delay 5 + mode '0644' end end diff --git a/cookbooks/aws-parallelcluster-platform/spec/unit/recipes/directories_spec.rb b/cookbooks/aws-parallelcluster-platform/spec/unit/recipes/directories_spec.rb index 56e65af56b..f91cdf51bc 100644 --- a/cookbooks/aws-parallelcluster-platform/spec/unit/recipes/directories_spec.rb +++ b/cookbooks/aws-parallelcluster-platform/spec/unit/recipes/directories_spec.rb @@ -36,6 +36,10 @@ is_expected.to create_directory(node['cluster']['shared_dir']) end + it 'creates examples directory' do + is_expected.to create_directory(node['cluster']['examples_dir']) + end + it 'creates log directory' do is_expected.to create_directory(node['cluster']['log_base_dir']).with( owner: 'root', diff --git a/cookbooks/aws-parallelcluster-platform/spec/unit/resources/enroot_spec.rb b/cookbooks/aws-parallelcluster-platform/spec/unit/resources/enroot_spec.rb index 026c648399..9ffc17719c 100644 --- a/cookbooks/aws-parallelcluster-platform/spec/unit/resources/enroot_spec.rb +++ b/cookbooks/aws-parallelcluster-platform/spec/unit/resources/enroot_spec.rb @@ -9,17 +9,9 @@ def self.setup(chef_run) end end end - - def self.configure(chef_run) - chef_run.converge_dsl('aws-parallelcluster-platform') do - enroot 'configure' do - action :configure - end - end - end end -describe 'enroot:package_version' do +describe 'aws-parallelcluster-platform::enroot:package_version' do for_all_oses do |platform, version| context "on #{platform}#{version}" do cached(:chef_run) do @@ -39,7 +31,34 @@ def self.configure(chef_run) end end -describe 'enroot:arch_suffix' do +describe 'aws-parallelcluster-platform::enroot:enroot_installed' do + for_all_oses do |platform, version| + context "on #{platform}#{version}" do + binary = '/usr/bin/enroot' + [true, false].each do |binary_exist| + context "when binary #{binary} does #{'not ' unless binary_exist}exist" do + cached(:chef_run) do + allow(File).to receive(:exist?).with(binary).and_return(binary_exist) + runner = runner(platform: platform, version: version, step_into: ['enroot']) + ConvergeEnroot.setup(runner) + end + + cached(:resource) do + chef_run.find_resource('enroot', 'setup') + end + + expected_result = binary_exist + + it "returns #{expected_result}" do + expect(resource.enroot_installed).to eq(expected_result) + end + end + end + end + end +end + +describe 'aws-parallelcluster-platform::enroot:arch_suffix' do for_all_oses do |platform, version| context "on #{platform}#{version} - arm" do cached(:chef_run) do @@ -81,15 +100,66 @@ def self.configure(chef_run) end end -describe 'enroot:setup' do +describe 'aws-parallelcluster-platform::enroot:setup' do for_all_oses do |platform, version| context "on #{platform}#{version}" do + cached(:cluster_examples_dir) { '/path/to/cluster/examples/dir' } + cached(:enroot_persistent_dir) { '/path/to/enroot/persistent/dir' } + cached(:enroot_temporary_dir) { '/path/to/enroot/temporary/dir' } + + context "when enroot is already installed" do + let(:chef_run) do + stubs_for_resource('enroot') do |res| + allow(res).to receive(:enroot_installed).and_return(true) + end + runner(platform: platform, version: version, step_into: ['enroot']) do |node| + node.override['cluster']['enroot']['version'] = package_version + node.override['cluster']['examples_dir'] = cluster_examples_dir + end + end + + before do + ConvergeEnroot.setup(chef_run) + end + + it 'does not install Enroot' do + is_expected.not_to run_bash('Install enroot') + end + + it 'does not create the Enroot configuration' do + is_expected.not_to create_template("#{cluster_examples_dir}/enroot/enroot.conf") + end + end + let(:chef_run) do + stubs_for_resource('enroot') do |res| + allow(res).to receive(:enroot_installed).and_return(false) + end runner(platform: platform, version: version, step_into: ['enroot']) do |node| node.override['cluster']['enroot']['version'] = package_version + node.override['cluster']['examples_dir'] = cluster_examples_dir + node.override['cluster']['enroot']['persistent_dir'] = enroot_persistent_dir + node.override['cluster']['enroot']['temporary_dir'] = enroot_temporary_dir end end + before do + ConvergeEnroot.setup(chef_run) + end + + it 'installs Enroot' do + is_expected.not_to run_bash('Install enroot') + end + + it 'creates the Enroot example configuration' do + is_expected.to create_template("#{cluster_examples_dir}/enroot/enroot.conf").with( + source: 'enroot/enroot.conf.erb', + owner: 'root', + group: 'root', + mode: '0644' + ) + end + context 'when nvidia is enabled' do before do stubs_for_provider('enroot') do |resource| @@ -128,44 +198,3 @@ def self.configure(chef_run) end end end - -describe 'enroot:configure' do - for_all_oses do |platform, version| - context "on #{platform}#{version}" do - let(:chef_run) do - runner(platform: platform, version: version, step_into: ['enroot']) - end - - context 'when enroot is installed' do - before do - stubs_for_provider('enroot') do |resource| - allow(resource).to receive(:enroot_installed).and_return(true) - end - ConvergeEnroot.configure(chef_run) - end - it 'run configure enroot script' do - is_expected.to run_bash('Configure enroot') - .with(retries: 3) - .with(retry_delay: 5) - .with(user: 'root') - end - end - - context 'when enroot is not installed' do - before do - stubs_for_provider('enroot') do |resource| - allow(resource).to receive(:enroot_installed).and_return(false) - end - ConvergeEnroot.configure(chef_run) - end - - it 'does not run configure enroot script' do - is_expected.not_to run_bash('Configure enroot') - .with(retries: 3) - .with(retry_delay: 5) - .with(user: 'root') - end - end - end - end -end diff --git a/cookbooks/aws-parallelcluster-platform/files/enroot/enroot.template.conf b/cookbooks/aws-parallelcluster-platform/templates/enroot/enroot.conf.erb similarity index 82% rename from cookbooks/aws-parallelcluster-platform/files/enroot/enroot.template.conf rename to cookbooks/aws-parallelcluster-platform/templates/enroot/enroot.conf.erb index a069e062e1..e9cecb7267 100644 --- a/cookbooks/aws-parallelcluster-platform/files/enroot/enroot.template.conf +++ b/cookbooks/aws-parallelcluster-platform/templates/enroot/enroot.conf.erb @@ -1,9 +1,9 @@ #ENROOT_LIBRARY_PATH /usr/lib/enroot #ENROOT_SYSCONF_PATH /etc/enroot -ENROOT_RUNTIME_PATH /tmp/enroot/user-$(id -u) -ENROOT_CONFIG_PATH ${ENROOT_CONFIG_PATH} -ENROOT_CACHE_PATH ${ENROOT_CACHE_PATH} -ENROOT_DATA_PATH /tmp/enroot/data/user-$(id -u) +ENROOT_RUNTIME_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/runtime/user-$(id -u) +ENROOT_DATA_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/data/user-$(id -u) +ENROOT_CONFIG_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/config/user-$(id -u) +ENROOT_CACHE_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/cache/group-$(id -g) #ENROOT_TEMP_PATH ${TMPDIR:-/tmp} # Gzip program used to uncompress digest layers. @@ -68,4 +68,4 @@ ENROOT_RESTRICT_DEV no #all_proxy #no_proxy #http_proxy -#https_proxy \ No newline at end of file +#https_proxy diff --git a/cookbooks/aws-parallelcluster-platform/test/controls/enroot_spec.rb b/cookbooks/aws-parallelcluster-platform/test/controls/enroot_spec.rb index 0dc1116b16..8a8f698ef4 100644 --- a/cookbooks/aws-parallelcluster-platform/test/controls/enroot_spec.rb +++ b/cookbooks/aws-parallelcluster-platform/test/controls/enroot_spec.rb @@ -14,16 +14,36 @@ expected_enroot_version = node['cluster']['enroot']['version'] - describe "gdrcopy version is expected to be #{expected_enroot_version}" do + describe "enroot version is expected to be #{expected_enroot_version}" do subject { command('enroot version').stdout.strip() } it { should eq expected_enroot_version } end + + persistent_dirs = %w(/etc/enroot /var/enroot) + persistent_dirs.each do |path| + describe directory(path) do + it { should exist } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + its('mode') { should cmp '01777' } + end + end + + temporary_dirs = [ "/run/enroot" ] + temporary_dirs.each do |path| + describe directory(path) do + it { should exist } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + its('mode') { should cmp '01777' } + end + end end control 'tag:config_enroot_enabled_on_graphic_instances' do only_if { !os_properties.on_docker? && ['yes', true].include?(node['cluster']['nvidia']['enabled']) } - describe file("/opt/parallelcluster/shared/enroot") do + describe file("/var/enroot/cache-group-1000") do it { should exist } its('group') { should eq 'root' } end unless os_properties.redhat_on_docker? diff --git a/cookbooks/aws-parallelcluster-shared/attributes/cluster.rb b/cookbooks/aws-parallelcluster-shared/attributes/cluster.rb index 430d10dfe9..def13a134a 100644 --- a/cookbooks/aws-parallelcluster-shared/attributes/cluster.rb +++ b/cookbooks/aws-parallelcluster-shared/attributes/cluster.rb @@ -4,6 +4,7 @@ default['cluster']['license_dir'] = "#{node['cluster']['base_dir']}/licenses" default['cluster']['configs_dir'] = "#{node['cluster']['base_dir']}/configs" default['cluster']['shared_dir'] = "#{node['cluster']['base_dir']}/shared" +default['cluster']['examples_dir'] = "#{node['cluster']['base_dir']}/examples" default['cluster']['shared_dir_login_nodes'] = "#{node['cluster']['base_dir']}/shared_login_nodes" default['cluster']['log_base_dir'] = '/var/log/parallelcluster' default['cluster']['etc_dir'] = '/etc/parallelcluster' diff --git a/cookbooks/aws-parallelcluster-slurm/attributes/slurm_attributes.rb b/cookbooks/aws-parallelcluster-slurm/attributes/slurm_attributes.rb index a075694cdd..07e2809c3e 100644 --- a/cookbooks/aws-parallelcluster-slurm/attributes/slurm_attributes.rb +++ b/cookbooks/aws-parallelcluster-slurm/attributes/slurm_attributes.rb @@ -18,3 +18,10 @@ # Slurmdbd default['cluster']['slurmdbd_service_enabled'] = "true" + +# Spank +default['cluster']['slurm']['spank_config_dir'] = "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d" + +# Pyxis +default['cluster']['pyxis']['version'] = '0.20.0' +default['cluster']['pyxis']['runtime_path'] = '/run/pyxis' diff --git a/cookbooks/aws-parallelcluster-slurm/libraries/pyxis.rb b/cookbooks/aws-parallelcluster-slurm/libraries/pyxis.rb new file mode 100644 index 0000000000..f6bc477fa7 --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/libraries/pyxis.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. +# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +def pyxis_installed? + ::Dir.exist?('/usr/local/share/pyxis') +end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pyxis.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pyxis.rb index 42785b11a4..bfc7db8a41 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pyxis.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pyxis.rb @@ -4,7 +4,7 @@ # Cookbook:: aws-parallelcluster-slurm # Recipe:: install_pyxis # -# Copyright:: Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the # License. A copy of the License is located at @@ -16,11 +16,15 @@ # limitations under the License. return unless nvidia_enabled? +return if pyxis_installed? pyxis_version = node['cluster']['pyxis']['version'] pyxis_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/pyxis/v#{pyxis_version}.tar.gz" pyxis_tarball = "#{node['cluster']['sources_dir']}/pyxis-#{pyxis_version}.tar.gz" +spank_examples_dir = "#{node['cluster']['examples_dir']}/spank" +pyxis_examples_dir = "#{node['cluster']['examples_dir']}/pyxis" + remote_file pyxis_tarball do source pyxis_url mode '0644' @@ -35,12 +39,31 @@ set -e tar xf #{pyxis_tarball} -C /tmp cd /tmp/pyxis-#{pyxis_version} - CPPFLAGS='-I /opt/slurm/include/' make - CPPFLAGS='-I /opt/slurm/include/' make install - mkdir -p /opt/slurm/etc/plugstack.conf.d - echo -e 'include /opt/slurm/etc/plugstack.conf.d/*' | tee /opt/slurm/etc/plugstack.conf - ln -fs /usr/local/share/pyxis/pyxis.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf + CPPFLAGS='-I #{node['cluster']['slurm']['install_dir']}/include/' make + CPPFLAGS='-I #{node['cluster']['slurm']['install_dir']}/include/' make install PYXIS_INSTALL retries 3 retry_delay 5 end + +# Spank configurations + +directory spank_examples_dir + +template "#{spank_examples_dir}/plugstack.conf" do + source 'pyxis/plugstack.conf.erb' + owner 'root' + group 'root' + mode '0644' +end + +# Pyxis configurations + +directory pyxis_examples_dir + +template "#{pyxis_examples_dir}/pyxis.conf" do + source 'pyxis/pyxis.conf.erb' + owner 'root' + group 'root' + mode '0644' +end diff --git a/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/pyxis_spec.rb b/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/pyxis_spec.rb new file mode 100644 index 0000000000..e08ac616a5 --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/pyxis_spec.rb @@ -0,0 +1,27 @@ +require 'spec_helper' + +describe "aws-parallelcluster-slurm:libraries:pyxis" do + let(:node) do + { + "cluster" => { "change_set_path" => "/SHARED_DIR/change-set.json" }, + } + end + + let(:mock_shared_storage_change_info) { instance_double(SharedStorageChangeInfo) } + + shared_examples "the correct method" do |dir_exists, expected_result| + it "returns #{expected_result}" do + allow(Dir).to receive(:exist?).with("/usr/local/share/pyxis").and_return(dir_exists) + result = pyxis_installed? + expect(result).to eq(expected_result) + end + end + + context "when installation folder exists" do + include_examples "the correct method", true, true + end + + context "when installation folder does not exist" do + include_examples "the correct method", false, false + end +end diff --git a/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/install_pyxis_spec.rb b/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/install_pyxis_spec.rb new file mode 100644 index 0000000000..e9305968fb --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/install_pyxis_spec.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +# Copyright:: 2024 Amazon.com, Inc. and its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + +require 'spec_helper' + +describe 'aws-parallelcluster-slurm::install_pyxis' do + for_all_oses do |platform, version| + context "on #{platform}#{version}" do + cached(:cluster_artifacts_s3_url) { 'https://REGION-aws-parallelcluster.s3.REGION.AWS_DOMAIN' } + cached(:cluster_sources_dir) { '/path/to/cluster/sources/dir' } + cached(:cluster_examples_dir) { '/path/to/cluster/examples/dir' } + cached(:slurm_install_dir) { '/path/to/slurm/install/dir' } + cached(:pyxis_version) { '1.2.3' } + cached(:pyxis_runtime_dir) { '/path/to/pyxis/runtime/dir' } + cached(:chef_run) do + runner = runner(platform: platform, version: version) do |node| + RSpec::Mocks.configuration.allow_message_expectations_on_nil = true + + node.override['cluster']['artifacts_s3_url'] = cluster_artifacts_s3_url + node.override['cluster']['sources_dir'] = cluster_sources_dir + node.override['cluster']['examples_dir'] = cluster_examples_dir + node.override['cluster']['slurm']['install_dir'] = slurm_install_dir + node.override['cluster']['pyxis']['version'] = pyxis_version + node.override['cluster']['pyxis']['runtime_path'] = pyxis_runtime_dir + end + allow_any_instance_of(Object).to receive(:nvidia_enabled?).and_return(true) + allow_any_instance_of(Object).to receive(:pyxis_installed?).and_return(false) + runner.converge(described_recipe) + end + + it 'downloads Pyxis tarball' do + is_expected.to create_if_missing_remote_file("#{cluster_sources_dir}/pyxis-#{pyxis_version}.tar.gz").with( + source: "#{cluster_artifacts_s3_url}/dependencies/pyxis/v#{pyxis_version}.tar.gz", + mode: '0644', + retries: 3, + retry_delay: 5 + ) + end + + it 'install Pyxis' do + is_expected.to run_bash('Install pyxis').with( + user: 'root', + retries: 3, + retry_delay: 5, + code: <<-CODE + set -e + tar xf #{cluster_sources_dir}/pyxis-#{pyxis_version}.tar.gz -C /tmp + cd /tmp/pyxis-#{pyxis_version} + CPPFLAGS='-I #{slurm_install_dir}/include/' make + CPPFLAGS='-I #{slurm_install_dir}/include/' make install + CODE + ) + end + + it 'creates the Spank examples directory' do + is_expected.to create_directory("#{cluster_examples_dir}/spank") + end + + it 'creates the Spank example configuration' do + is_expected.to create_template("#{cluster_examples_dir}/spank/plugstack.conf").with( + source: 'pyxis/plugstack.conf.erb', + owner: 'root', + group: 'root', + mode: '0644' + ) + end + + it 'creates the Pyxis examples directory' do + is_expected.to create_directory("#{cluster_examples_dir}/pyxis") + end + + it 'creates the Pyxis example configuration' do + is_expected.to create_template("#{cluster_examples_dir}/pyxis/pyxis.conf").with( + source: 'pyxis/pyxis.conf.erb', + owner: 'root', + group: 'root', + mode: '0644' + ) + end + + context "when Pyxis is already installed" do + cached(:chef_run) do + runner = runner(platform: platform, version: version) do |_node| + RSpec::Mocks.configuration.allow_message_expectations_on_nil = true + end + allow_any_instance_of(Object).to receive(:nvidia_enabled?).and_return(true) + allow_any_instance_of(Object).to receive(:pyxis_installed?).and_return(true) + runner.converge(described_recipe) + end + + it 'does not install Pyxis' do + is_expected.not_to run_bash('Install pyxis') + end + end + end + end +end diff --git a/cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/plugstack.conf.erb b/cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/plugstack.conf.erb new file mode 100644 index 0000000000..abc166cf27 --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/plugstack.conf.erb @@ -0,0 +1,3 @@ +# THIS IS AN EXAMPLE OF Spank config file +# When you want to enable please move this to /opt/slurm/etc/plugstack.conf +include <%= node['cluster']['slurm']['spank_config_dir'] %>/* diff --git a/cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/pyxis.conf.erb b/cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/pyxis.conf.erb new file mode 100644 index 0000000000..a5b5db774b --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/templates/default/pyxis/pyxis.conf.erb @@ -0,0 +1,3 @@ +# THIS IS AN EXAMPLE OF pyxis.conf file +# When you want to enable please move this to /opt/slurm/etc/plugstack.conf.d/pyxis.conf +required /usr/local/lib/slurm/spank_pyxis.so runtime_path=<%= node['cluster']['pyxis']['runtime_path'] %> diff --git a/cookbooks/aws-parallelcluster-slurm/test/controls/pyxis_spec.rb b/cookbooks/aws-parallelcluster-slurm/test/controls/pyxis_spec.rb index b00da25670..dbac5fd31c 100644 --- a/cookbooks/aws-parallelcluster-slurm/test/controls/pyxis_spec.rb +++ b/cookbooks/aws-parallelcluster-slurm/test/controls/pyxis_spec.rb @@ -14,7 +14,26 @@ title 'Checks Pyxis has been installed' - describe file("/opt/slurm/etc/plugstack.conf.d/pyxis.conf") do + describe directory('/opt/slurm/etc') do + it { should exist } + its('mode') { should cmp '0755' } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + end + + examples_dir = "/opt/parallelcluster/configs/examples" + dirs = [ examples_dir, "#{examples_dir}/spank", "#{examples_dir}/pyxis" ] + dirs.each do |path| + describe directory(path) do + it { should exist } + end + end + + describe file("#{examples_dir}/pyxis/pyxis.conf") do + it { should exist } + end + + describe file("#{examples_dir}/spank/plugstack.conf") do it { should exist } end end