From 69f9cd233913d8b323723b7340d6af6fa1ceb2ee Mon Sep 17 00:00:00 2001 From: Enrico Usai Date: Mon, 26 Jun 2023 15:01:21 +0200 Subject: [PATCH] Export default ENV when PATH is required for command execution exportfs and authconfig execution was failing with: No such file or directory - exportfs/authconfig * `default_env true` documentation: https://docs.chef.io/resources/execute/#properties * Related issue: https://github.com/sous-chefs/nfs/issues/106 Signed-off-by: Enrico Usai --- .../cloudwatch/cloudwatch_agent_config.json | 19 +++++++ .../cloudwatch/write_cloudwatch_agent_json.py | 52 ++++++++++++++++--- .../cloudwatch/partial/_cloudwatch_common.rb | 3 +- .../_system_authentication_alinux_centos.rb | 1 + .../partial/_system_authentication_debian.rb | 1 + .../system_authentication_redhat8.rb | 1 + .../resources/volume.rb | 1 + 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/files/cloudwatch/cloudwatch_agent_config.json b/cookbooks/aws-parallelcluster-environment/files/cloudwatch/cloudwatch_agent_config.json index cc727fb6c7..c0fd6b5688 100644 --- a/cookbooks/aws-parallelcluster-environment/files/cloudwatch/cloudwatch_agent_config.json +++ b/cookbooks/aws-parallelcluster-environment/files/cloudwatch/cloudwatch_agent_config.json @@ -193,6 +193,25 @@ ], "feature_conditions": [] }, + { + "timestamp_format_key": "json", + "file_path": "/var/log/parallelcluster/clustermgtd.nodemap", + "log_stream_name": "clustermgtd_nodemap", + "log_group_key": "job_info", + "schedulers": [ + "slurm" + ], + "platforms": [ + "amazon", + "centos", + "redhat", + "ubuntu" + ], + "node_roles": [ + "HeadNode" + ], + "feature_conditions": [] + }, { "timestamp_format_key": "json", "file_path": "/var/log/parallelcluster/slurm_resume.events", diff --git a/cookbooks/aws-parallelcluster-environment/files/cloudwatch/write_cloudwatch_agent_json.py b/cookbooks/aws-parallelcluster-environment/files/cloudwatch/write_cloudwatch_agent_json.py index 6e1067a517..670d0d9553 100644 --- a/cookbooks/aws-parallelcluster-environment/files/cloudwatch/write_cloudwatch_agent_json.py +++ b/cookbooks/aws-parallelcluster-environment/files/cloudwatch/write_cloudwatch_agent_json.py @@ -30,6 +30,17 @@ def parse_args(): help="Role this node plays in the cluster " "(i.e., is it a compute node or the head node?)", ) parser.add_argument("--scheduler", required=True, choices=["slurm", "awsbatch"], help="Scheduler") + parser.add_argument( + "--additional-log-groups", + required=False, + help="format: =,=,=,...", + ) + parser.add_argument( + "--output-path", + required=False, + default=AWS_CLOUDWATCH_CFG_PATH, + help="Overwrite the default output path", + ) return parser.parse_args() @@ -38,15 +49,19 @@ def gethostname(): return socket.gethostname().split(".")[0] -def write_config(config): - """Write config to AWS_CLOUDWATCH_CFG_PATH.""" - with open(AWS_CLOUDWATCH_CFG_PATH, "w+", encoding="utf-8") as output_config_file: +def write_config(config, output_file): + """Write config to output_file.""" + with open(output_file, "w+", encoding="utf-8") as output_config_file: json.dump(config, output_config_file, indent=4) -def add_log_group_name_params(log_group_name, configs): +def add_log_group_name_params(default_log_group_name, log_group_name_map, configs): """Add a "log_group_name": log_group_name to every config.""" for config in configs: + log_group_name = default_log_group_name + group_key = config.get("log_group_key") + if group_key: + log_group_name = log_group_name_map.get(group_key) config.update({"log_group_name": log_group_name}) return configs @@ -110,13 +125,24 @@ def select_configs_for_feature(configs): selected_configs.append(config) return selected_configs +def select_configs_for_log_groups(configs, log_group_map): + selected_configs = [] + for config in configs: + condition = config.get("log_group_key", None) + if condition and condition not in log_group_map.keys(): + continue + selected_configs.append(config) + return selected_configs + + -def select_logs(configs, args): +def select_logs(configs, log_group_map, args): """Select the appropriate set of log configs.""" selected_configs = select_configs_for_scheduler(configs, args.scheduler) selected_configs = select_configs_for_node_role(selected_configs, args.node_role) selected_configs = select_configs_for_platform(selected_configs, args.platform) selected_configs = select_configs_for_feature(selected_configs) + selected_configs = select_configs_for_log_groups(selected_configs, log_group_map) return selected_configs @@ -214,20 +240,30 @@ def get_dict_value(value, attributes, default=None): return default return value +def parse_additional_log_groups_map(log_group_string): + if not log_group_string: + return {} + pairs = log_group_string.split(",") + log_group_map = {} + for pair in pairs: + pair_split = pair.split("=") + log_group_map[pair_split[0]] = pair_split[1] + return log_group_map def main(): """Create cloudwatch agent config file.""" args = parse_args() config_data = read_data(args.config) - log_configs = select_logs(config_data["log_configs"], args) + log_group_map = parse_additional_log_groups_map(args.additional_log_groups) + log_configs = select_logs(config_data["log_configs"], log_group_map, args) log_configs = add_timestamps(log_configs, config_data["timestamp_formats"]) - log_configs = add_log_group_name_params(args.log_group, log_configs) + log_configs = add_log_group_name_params(args.log_group, log_group_map, log_configs) log_configs = add_instance_log_stream_prefixes(log_configs) log_configs = filter_output_fields(log_configs) metric_configs = select_metrics(config_data["metric_configs"], args) metric_configs = add_append_dimensions(metric_configs, config_data["metric_configs"]) metric_configs = add_aggregation_dimensions(metric_configs, config_data["metric_configs"]) - write_config(create_config(log_configs, metric_configs)) + write_config(create_config(log_configs, metric_configs), args.output_path) if __name__ == "__main__": diff --git a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb index 8eb15383e4..9a6fac57b2 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb @@ -152,7 +152,8 @@ def package_path command "#{cookbook_virtualenv_path}/bin/python #{config_script_path} "\ "--platform #{node['platform']} --config $CONFIG_DATA_PATH --log-group $LOG_GROUP_NAME "\ - "--scheduler $SCHEDULER --node-role $NODE_ROLE" + "--scheduler $SCHEDULER --node-role $NODE_ROLE"\ + "--additional-log-groups job-info=$LOG_GROUP_NAME" end unless redhat_on_docker? execute "cloudwatch-agent-start" do diff --git a/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_alinux_centos.rb b/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_alinux_centos.rb index 6af38f9187..f8b2cc38b8 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_alinux_centos.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_alinux_centos.rb @@ -17,6 +17,7 @@ user 'root' # Tell NSS, PAM to use SSSD for system authentication and identity information command "authconfig --enablemkhomedir --enablesssdauth --enablesssd --updateall" + default_env true sensitive true end end diff --git a/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_debian.rb b/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_debian.rb index 9703100331..3cf849b6bc 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_debian.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/system_authentication/partial/_system_authentication_debian.rb @@ -17,6 +17,7 @@ user 'root' # Enable PAM mkhomedir module command "pam-auth-update --enable mkhomedir" + default_env true sensitive true end end diff --git a/cookbooks/aws-parallelcluster-environment/resources/system_authentication/system_authentication_redhat8.rb b/cookbooks/aws-parallelcluster-environment/resources/system_authentication/system_authentication_redhat8.rb index 194a784ba0..30230e1d83 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/system_authentication/system_authentication_redhat8.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/system_authentication/system_authentication_redhat8.rb @@ -30,6 +30,7 @@ # authconfig is a compatibility tool, replaced by authselect command "authselect select sssd with-mkhomedir" sensitive true + default_env true end unless redhat_on_docker? end diff --git a/cookbooks/aws-parallelcluster-environment/resources/volume.rb b/cookbooks/aws-parallelcluster-environment/resources/volume.rb index fddccb9a80..c65eb6d0be 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/volume.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/volume.rb @@ -128,5 +128,6 @@ execute "unexport volume" do command "exportfs -ra" + default_env true end end