diff --git a/cookbooks/aws-parallelcluster-entrypoints/recipes/init.rb b/cookbooks/aws-parallelcluster-entrypoints/recipes/init.rb index 664228d96..3e4d9ceca 100644 --- a/cookbooks/aws-parallelcluster-entrypoints/recipes/init.rb +++ b/cookbooks/aws-parallelcluster-entrypoints/recipes/init.rb @@ -13,7 +13,9 @@ # limitations under the License. include_recipe "aws-parallelcluster-platform::enable_chef_error_handler" - +fetch_config 'Upload Common Dna to s3' do + action :share_common_dna +end include_recipe "aws-parallelcluster-shared::setup_envars" os_type 'Validate OS type specified by the user is the same as the OS identified by Ohai' diff --git a/cookbooks/aws-parallelcluster-entrypoints/recipes/update.rb b/cookbooks/aws-parallelcluster-entrypoints/recipes/update.rb index f69aa2453..d60186429 100644 --- a/cookbooks/aws-parallelcluster-entrypoints/recipes/update.rb +++ b/cookbooks/aws-parallelcluster-entrypoints/recipes/update.rb @@ -12,7 +12,9 @@ # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and # limitations under the License. include_recipe "aws-parallelcluster-shared::setup_envars" - +fetch_config 'Upload Common Dna to s3' do + action :share_common_dna +end # Fetch and load cluster configs include_recipe 'aws-parallelcluster-platform::update' diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb b/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb index 7a82d5777..fbf2367b5 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb @@ -46,16 +46,46 @@ ) end -template '/etc/cfn/hooks.d/pcluster-update.conf' do - source 'cfn_bootstrap/cfn-hook-update.conf.erb' - owner 'root' - group 'root' - mode '0400' - variables( - stack_id: node['cluster']['stack_arn'], - region: node['cluster']['region'], - cloudformation_url: cloudformation_url, - cfn_init_role: instance_role_name, - launch_template_resource_id: node['cluster']['launch_template_id'] - ) +case node['cluster']['node_type'] +when 'HeadNode', 'LoginNode' + template '/etc/cfn/hooks.d/pcluster-update.conf' do + source 'cfn_bootstrap/cfn-hook-update.conf.erb' + owner 'root' + group 'root' + mode '0400' + variables( + stack_id: node['cluster']['stack_arn'], + region: node['cluster']['region'], + cloudformation_url: cloudformation_url, + cfn_init_role: instance_role_name, + launch_template_resource_id: node['cluster']['launch_template_id'] + ) + end + +when 'ComputeFleet' + template "#{node['cluster']['scripts_dir']}/cfn-hup-update-compute-action.sh" do + source "cfn_bootstrap/cfn-hup-update-compute-action.sh.erb" + owner 'root' + group 'root' + mode '0744' + variables( + clusterS3Bucket: node['cluster']['cluster_s3_bucket'], + region: node['cluster']['region'], + clusterS3ArtifactDir: node['cluster']['cluster_config_s3_key'].chomp('/configs/cluster-config-with-implied-values.yaml'), + clusterConfigVersion: node['cluster']['cluster_config_version'], + launch_template_resource_id: node['cluster']['launch_template_id'], + # cluster_config_version_path: node['cluster']['shared_dir']/cluster-config-version + ) + end + + + template '/etc/cfn/hooks.d/pcluster-update.conf' do + source 'cfn_bootstrap/cfn-hook-update-compute.conf.erb' + owner 'root' + group 'root' + mode '0400' + variables( + launch_template_resource_id: node['cluster']['launch_template_id'] + ) + end end diff --git a/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hook-update-compute.conf.erb b/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hook-update-compute.conf.erb new file mode 100644 index 000000000..1956e0059 --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hook-update-compute.conf.erb @@ -0,0 +1,5 @@ +[parallelcluster-update] +triggers=post.update +path=Resources.<%= @launch_template_resource_id %>.Metadata.AWS::CloudFormation::Init +action=PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin; . /etc/parallelcluster/pcluster_cookbook_environment.sh; ./opt/parallelcluster/scripts/cfn-hup-update-compute-action.sh +runas=root diff --git a/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup-update-compute-action.sh.erb b/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup-update-compute-action.sh.erb new file mode 100644 index 000000000..06e69f2f0 --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup-update-compute-action.sh.erb @@ -0,0 +1,40 @@ +#!/bin/bash +set -ex + + +PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin; +. /etc/parallelcluster/pcluster_cookbook_environment.sh; + +S3_BUCKET=<%= @clusterS3Bucket %> +S3_ARTIFACT_DIR=<%= @clusterS3ArtifactDir %> +OLD_CLUSTER_CONFIG_VERSION=<%= @clusterConfigVersion %> +REGION=<%= @region %> +#CLUSTER_CONFIG_VERSION_FILE=<%#= cluster_config_version_path %> +GET_LATEST_CLUSTER_CONFIG=true + +while $GET_LATEST_CLUSTER_CONFIG; do + echo "Old Cluster config version is $OLD_CLUSTER_CONFIG_VERSION" + sleep 60 + #NEW_CLUSTER_CONFIG_VERSION=$(aws s3api list-object-versions --bucket ${S3_BUCKET} --prefix "${S3_ARTIFACT_DIR}/configs/cluster-config-with-implied-values.yaml" --region ${REGION} | jq -r '.Versions[] | select(.IsLatest == true) | .VersionId' 2>&1 || error_exit "${!S3API_RESULT}") + NEW_CLUSTER_CONFIG_VERSION=$(cat /opt/parallelcluster/shared/cluster-config-version ) +# NEW_CLUSTER_CONFIG_VERSION=$(cat /var/lib/cfn-hup/data/metadata_db.json | jq -r '.[].deployConfigFiles.files.config_version.content') + echo "New Cluster config version is $NEW_CLUSTER_CONFIG_VERSION" + + if [[ -n "${NEW_CLUSTER_CONFIG_VERSION}" ]] && [[ "${NEW_CLUSTER_CONFIG_VERSION}" != "${OLD_CLUSTER_CONFIG_VERSION}" ]]; then + GET_LATEST_CLUSTER_CONFIG=false + CLUSTER_CONFIG_VERSION=$NEW_CLUSTER_CONFIG_VERSION + AWS_RETRY_MODE=standard + echo "Running S3 commands" + aws s3api get-object --bucket ${S3_BUCKET} --key "${S3_ARTIFACT_DIR}/assets/common-dna-${CLUSTER_CONFIG_VERSION}.json" --region ${REGION} /tmp/common-dna.json 2>&1 || error_exit "${!S3API_RESULT}" + aws s3api get-object --bucket ${S3_BUCKET} --key "${S3_ARTIFACT_DIR}/assets/ComputeNode/compute-dna-<%= @launch_template_resource_id %>-${CLUSTER_CONFIG_VERSION}.json" --region ${REGION} /tmp/compute-dna.json 2>&1 || error_exit "${!S3API_RESULT}" + aws s3api get-object --bucket ${S3_BUCKET} --key "${S3_ARTIFACT_DIR}/assets/extra-${CLUSTER_CONFIG_VERSION}.json" --region ${REGION} /tmp/extra.json 2>&1 || error_exit "${!S3API_RESULT}" + echo "Completed S3 commands" + + mkdir -p /etc/chef/ohai/hints + touch /etc/chef/ohai/hints/ec2.json + jq -s ".[0] * .[1] * .[2] * .[3]" /tmp/common-dna.json /tmp/compute-dna.json /tmp/stack-arn.json /tmp/extra.json > /etc/chef/dna.json || ( echo "jq not installed"; cp /tmp/common-dna.json /tmp/compute-dna.json /etc/chef/dna.json ) + cd /etc/chef + cinc-client --local-mode --config /etc/chef/client.rb --log_level info --logfile /var/log/chef-client.log --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::update && /opt/parallelcluster/scripts/fetch_and_run -postupdate + fi + +done diff --git a/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb b/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb index 0f2b492ad..548aab5fd 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb @@ -9,6 +9,19 @@ default_action :run + +action :share_common_dna do + return if on_docker? + Chef::Log.debug("Upload common_dna.json in s3") + case node['cluster']['node_type'] + when 'HeadNode' + + execute_command('Update HeadNode Ip', "sed -i 's/HEAD_NODE_PRIVATE_IP/#{get_primary_ip}/g' /tmp/common-dna.json") + + upload_common_dna('upload_common_dna_to_s3', "#{node['cluster']['common_dna_s3_key']}", '/tmp/common-dna.json') + end +end + action :run do return if on_docker? Chef::Log.debug("Called fetch_config with update (#{new_resource.update})") @@ -185,4 +198,14 @@ def wait_cluster_config_file(path) timeout 5 end end + + def upload_common_dna(command_label, key, file_path, version_id = nil) + fetch_s3_object_command = "#{cookbook_virtualenv_path}/bin/aws s3api put-object" \ + " --bucket #{node['cluster']['cluster_s3_bucket']}" \ + " --key #{key}" \ + " --region #{node['cluster']['region']}" \ + " --body #{file_path}" + fetch_s3_object_command += " --version-id #{version_id}" unless version_id.nil? + execute_command(command_label, fetch_s3_object_command) + end end