Skip to content

Commit

Permalink
[ADC Build Image] Get build dependencies through awscli
Browse files Browse the repository at this point in the history
  • Loading branch information
hgreebe committed Jun 28, 2024
1 parent 429f517 commit 46daffb
Show file tree
Hide file tree
Showing 27 changed files with 194 additions and 145 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,5 @@
not_if { ::File.exist?("#{virtualenv_path}/bin/activate") }
end

remote_file "#{node['cluster']['base_dir']}/awsbatch-dependencies.tgz" do
source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz"
mode '0644'
retries 3
retry_delay 5
action :create_if_missing
end

bash 'pip install' do
user 'root'
group 'root'
cwd "#{node['cluster']['base_dir']}"
code <<-REQ
set -e
tar xzf awsbatch-dependencies.tgz
cd awsbatch
#{virtualenv_path}/bin/pip install * -f ./ --no-index
REQ
end

node.default['cluster']['awsbatch_virtualenv_path'] = virtualenv_path
node_attributes "dump node attributes"
25 changes: 23 additions & 2 deletions cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,35 @@
curl --retry 3 -L -o aws-parallelcluster.tgz ${custom_package_url}
mkdir aws-parallelcluster-awsbatch-cli
tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli
aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz awsbatch-dependencies.tgz --region #{node['cluster']['region']}
tar xzf awsbatch-dependencies.tgz
cd awsbatch
#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install * -f ./ --no-index
cd ..
cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-*
#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/
CLI
end
else
# Install aws-parallelcluster-awsbatch-cli package
execute "pip_install_parallelcluster_awsbatch_cli" do
command "#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install aws-parallelcluster-awsbatch-cli==#{node['cluster']['parallelcluster-awsbatch-cli-version']}"
bash "install aws-parallelcluster-awsbatch-cli" do
cwd Chef::Config[:file_cache_path]
code <<-CLI
set -e
package_url=#{node['cluster']['artifacts_build_url']}/awsbatch/aws-parallelcluster.tgz
aws s3 cp ${package_url} aws-parallelcluster.tgz --region #{node['cluster']['region']}
mkdir aws-parallelcluster-awsbatch-cli
tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli
aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz awsbatch-dependencies.tgz --region #{node['cluster']['region']}
tar xzf awsbatch-dependencies.tgz
cd awsbatch
#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install * -f ./ --no-index
cd ..
cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-*
#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/
CLI
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@
# TODO: once the pyenv Chef resource supports installing packages from a path (e.g. `pip install .`), convert the
# bash block to a recipe that uses the pyenv resource.

remote_file "#{Chef::Config[:file_cache_path]}/node-dependencies.tgz" do
source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz"
mode '0644'
retries 3
retry_delay 5
action :create_if_missing
end

bash "install custom aws-parallelcluster-node" do
cwd Chef::Config[:file_cache_path]
code <<-NODE
Expand All @@ -45,6 +37,13 @@
rm -fr aws-parallelcluster-custom-node
mkdir aws-parallelcluster-custom-node
tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-custom-node
aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz node-dependencies.tgz --region #{node['cluster']['region']}
tar xzf node-dependencies.tgz
cd node
#{node_virtualenv_path}/bin/pip install * -f ./ --no-index
cd ..
cd aws-parallelcluster-custom-node/*aws-parallelcluster-node-*
pip install .
deactivate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,27 @@
if is_custom_node?
include_recipe 'aws-parallelcluster-computefleet::custom_parallelcluster_node'
else
execute "install official aws-parallelcluster-node" do
command "#{virtualenv_path}/bin/pip install aws-parallelcluster-node==#{node['cluster']['parallelcluster-node-version']}"
bash "install official aws-parallelcluster-node" do
cwd Chef::Config[:file_cache_path]
code <<-NODE
set -e
[[ ":$PATH:" != *":/usr/local/bin:"* ]] && PATH="/usr/local/bin:${PATH}"
echo "PATH is $PATH"
source #{node_virtualenv_path}/bin/activate
pip uninstall --yes aws-parallelcluster-node
node_url=#{node['cluster']['artifacts_build_url']}/node/aws-parallelcluster-node.tgz
aws s3 cp ${node_url} aws-parallelcluster-node.tgz --region #{node['cluster']['region']}
rm -fr aws-parallelcluster-node
mkdir aws-parallelcluster-node
tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-node
aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz node-dependencies.tgz --region #{node['cluster']['region']}
tar xzf node-dependencies.tgz
cd node
#{node_virtualenv_path}/bin/pip install * -f ./ --no-index
cd ..
cd aws-parallelcluster-node/*aws-parallelcluster-node-*
pip install .
deactivate
NODE
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,13 @@
not_if { ::File.exist?("#{virtualenv_path}/bin/activate") }
end

remote_file "#{node['cluster']['base_dir']}/cfn-dependencies.tgz" do
source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/cfn-dependencies.tgz"
mode '0644'
retries 3
retry_delay 5
action :create_if_missing
end

bash 'pip install' do
user 'root'
group 'root'
cwd "#{node['cluster']['base_dir']}"
code <<-REQ
set -e
aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/cfn-dependencies.tgz cfn-dependencies.tgz --region #{node['cluster']['region']}
tar xzf cfn-dependencies.tgz
cd cfn
#{virtualenv_path}/bin/pip install * -f ./ --no-index
Expand All @@ -57,7 +50,7 @@
cfnbootstrap_package = "aws-cfn-bootstrap-py3-#{cfnbootstrap_version}.tar.gz"

region = node['cluster']['region']
bucket = region.start_with?('cn-') ? 's3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster' : "s3.amazonaws.com"
bucket = region.start_with?('cn-') ? 's3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster' : "s3.#{aws_region}.#{aws_domain}"

remote_file "/tmp/#{cfnbootstrap_package}" do
source "https://#{bucket}/cloudformation-examples/#{cfnbootstrap_package}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

public_key_local_path = "#{node['cluster']['sources_dir']}/amazon-cloudwatch-agent.gpg"
remote_file public_key_local_path do
source 'https://s3.amazonaws.com/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg'
source "https://s3.#{aws_region}.#{aws_domain}/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg"
retries 3
retry_delay 5
action :create_if_missing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ def conflicting_packages
end

def prerequisites
%w(environment-modules libibverbs-utils librdmacm-utils)
%w(libibverbs-utils librdmacm-utils)
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@

action :download_and_install do
# Get EFA Installer
region = aws_region
efa_installer_url = "https://efa-installer.amazonaws.com/aws-efa-installer-#{new_resource.efa_version}.tar.gz"
if region.start_with?("us-iso")
efa_installer_url = "https://aws-efa-installer.s3.#{aws_region}.#{aws_domain}/aws-efa-installer-#{new_resource.efa_version}.tar.gz"
end
remote_file efa_tarball do
source efa_installer_url
mode '0644'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,8 @@
action_class do
def base_url
# https://docs.aws.amazon.com/fsx/latest/LustreGuide/install-lustre-client.html#lustre-client-rhel
"https://fsx-lustre-client-repo.s3.amazonaws.com/el/#{node['platform_version']}/$basearch"
end
"https://fsx-lustre-client-repo.s3.#{aws_region}.#{aws_domain}/el/#{node['platform_version']}/$basearch" end

def public_key
"https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc"
end
"https://fsx-lustre-client-repo-public-keys.s3.#{aws_region}.#{aws_domain}/fsx-rpm-public-key.asc" end
end
18 changes: 15 additions & 3 deletions cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@
# See the License for the specific language governing permissions and limitations under the License.

return if ::File.exist?("/usr/local/bin/aws") || redhat_on_docker?
return if platform?('amazon')

file_cache_path = Chef::Config[:file_cache_path]
region = aws_region
awscli_url = "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip"
if region.start_with?("us-iso")
awscli_url ="https://aws-sdk-common-infra-dca-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip"
end

remote_file 'download awscli bundle from s3' do
path "#{file_cache_path}/awscli-bundle.zip"
source 'https://s3.amazonaws.com/aws-cli/awscli-bundle.zip'
source awscli_url
path
retries 5
retry_delay 5
Expand All @@ -34,6 +40,12 @@
overwrite true
end

bash 'install awscli' do
code "#{cookbook_virtualenv_path}/bin/python #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws"
if region.start_with?("us-iso")
bash 'install awscli' do
code "#{file_cache_path}/awscli/aws/install -i /usr/local/aws -b /usr/local/bin/aws"
end
else
bash 'install awscli' do
code "#{cookbook_virtualenv_path}/bin/python#{node['cluster']['python-major-minor-version']} #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws"
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,13 @@
not_if { ::File.exist?("#{cookbook_virtualenv_path}/bin/activate") }
end

remote_file "#{node['cluster']['base_dir']}/cookbook-dependencies.tgz" do
source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/cookbook-dependencies.tgz"
mode '0644'
retries 3
retry_delay 5
action :create_if_missing
end

bash 'pip install' do
user 'root'
group 'root'
cwd "#{node['cluster']['base_dir']}"
code <<-REQ
set -e
aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/cookbook-dependencies.tgz cookbook-dependencies.tgz --region #{node['cluster']['region']}
tar xzf cookbook-dependencies.tgz
cd dependencies
#{virtualenv_path}/bin/pip install * -f ./ --no-index
Expand Down
28 changes: 18 additions & 10 deletions cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
cuda_complete_version = "#{cuda_version}.#{cuda_patch}"
cuda_version_suffix = '535.104.05'
cuda_arch = arm_instance? ? 'linux_sbsa' : 'linux'
cuda_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run"
cuda_samples_version = '12.2'
cuda_samples_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/samples/v#{cuda_samples_version}.tar.gz"
tmp_cuda_run = '/tmp/cuda.run'
tmp_cuda_sample_archive = '/tmp/cuda-sample.tar.gz'

Expand All @@ -35,12 +33,17 @@
node_attributes 'Save cuda and cuda samples versions for InSpec tests'

# Get CUDA run file
remote_file tmp_cuda_run do
source cuda_url
mode '0755'
bash 'Get CUDA run file from s3' do
user 'root'
group 'root'
cwd "#{node['cluster']['sources_dir']}"
code <<-CUDA
set -e
aws s3 cp #{node['cluster']['artifacts_build_url']}/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run #{tmp_cuda_run} --region #{node['cluster']['region']}
chmod 755 #{tmp_cuda_run}
CUDA
retries 3
retry_delay 5
not_if { ::File.exist?("/usr/local/cuda-#{cuda_version}") }
end

# Install CUDA driver
Expand All @@ -59,12 +62,17 @@
end

# Get CUDA Sample Files
remote_file tmp_cuda_sample_archive do
source cuda_samples_url
mode '0644'
bash 'get CUDA Sample Files from s3' do
user 'root'
group 'root'
cwd "#{node['cluster']['sources_dir']}"
code <<-CUDA
set -e
aws s3 cp #{node['cluster']['artifacts_build_url']}/cuda/samples/v#{cuda_samples_version}.tar.gz #{tmp_cuda_sample_archive} --region #{node['cluster']['region']}
chmod 644 #{tmp_cuda_sample_archive}
CUDA
retries 3
retry_delay 5
not_if { ::File.exist?("/usr/local/cuda-#{cuda_version}/samples") }
end

# Unpack CUDA Samples
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
intelmpi_installation_path = "/opt/intel/mpi/#{intelmpi_version}"
intelmpi_installer = "l_mpi_oneapi_p_#{intelmpi_full_version}_offline.sh"
intelmpi_installer_path = "#{node['cluster']['sources_dir']}/#{intelmpi_installer}"
intelmpi_installer_url = "#{node['cluster']['artifacts_s3_url']}/impi/#{intelmpi_installer}"
intelmpi_installer_url = "#{node['cluster']['base_build_url']}/archives/impi/#{intelmpi_installer}"
intelmpi_qt_version = '6.5.3'

# Prerequisite for module install
Expand All @@ -39,12 +39,17 @@
end

# fetch intelmpi installer script
remote_file intelmpi_installer_path do
source intelmpi_installer_url
mode '0744'
retries 3
retry_delay 5
not_if { ::File.exist?(intelmpi_installation_path.to_s) }
bash 'get intelmpi from s3' do
user 'root'
group 'root'
cwd "#{node['cluster']['sources_dir']}"
code <<-IMPI
set -e
aws s3 cp #{intelmpi_installer_url} #{intelmpi_installer_path} --region #{node['cluster']['region']}
chmod 744 #{intelmpi_installer_path}
IMPI
retries 5
retry_delay 10
end

bash "install intel mpi" do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,15 @@ def optionally_disable_rnd

# Extract DCV packages
unless ::File.exist?(dcv_tarball)
remote_file dcv_tarball do
source dcv_url
checksum dcv_sha256sum
mode '0644'
bash 'get dcv from s3' do
user 'root'
group 'root'
cwd "#{node['cluster']['sources_dir']}"
code <<-DCV
set -e
aws s3 cp #{dcv_url} #{dcv_tarball} --region #{node['cluster']['region']}
chmod 644 #{dcv_tarball}
DCV
retries 3
retry_delay 5
end
Expand Down Expand Up @@ -276,7 +281,7 @@ def dcv_url
end

def dcv_tarball
"#{node['cluster']['sources_dir']}/dcv-#{node['cluster']['dcv']['version']}.tgz"
"#{node['cluster']['artifacts_build_url']}/dcv/#{dcv_package}.tgz"
end

def dcvauth_virtualenv
Expand Down
Loading

0 comments on commit 46daffb

Please sign in to comment.