Skip to content

Commit

Permalink
Add script /etc/slurm/slurm_resume_fail.sh for cloud scheduling
Browse files Browse the repository at this point in the history
  • Loading branch information
pescobar authored and ansible@cerebro committed Aug 15, 2023
1 parent 1f0d959 commit 28fbd56
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 0 deletions.
10 changes: 10 additions & 0 deletions tasks/slurm-master-openstack-cloud-scheduling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@
when: not slurm_config_git_repo is defined
notify: Restart slurmctld service

- name: Deploy /etc/slurm/slurm_resume_fail.sh
ansible.builtin.template:
src: slurm_resume_fail.sh
dest: /etc/slurm/slurm_resume_fail.sh
owner: "{{ slurm_user[ansible_os_family] }}"
group: "{{ slurm_group[ansible_os_family] }}"
mode: 0755
when: not slurm_config_git_repo is defined
notify: Restart slurmctld service

- name: Create folder /etc/openstack/ to deploy /etc/openstack/clouds.yaml
ansible.builtin.file:
path: /etc/openstack/
Expand Down
1 change: 1 addition & 0 deletions templates/slurm.conf.j2.cloud.example
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ DefMemPerCPU=1800
#TreeWidth=10000
PrivateData=cloud # Powered down nodes in the cloud are visible
ResumeProgram=/etc/slurm/slurm_resume_openstack.py
ResumeFailProgram=/etc/slurm/slurm_resume_fail.sh
SuspendProgram=/etc/slurm/slurm_suspend_openstack.py
ResumeRate=2 #number of nodes per minute that can be created; 0 means no limit
ResumeTimeout=300 #max time in seconds between ResumeProgram running and when the node is ready for use
Expand Down
7 changes: 7 additions & 0 deletions templates/slurm_resume_fail.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
scontrol update nodename=$1 state=RESUME
if [ $? != 0 ];then
logger "scontrol resume failed for node $1"
else
logger "node $1 resumed successfully or already resumed"
fi

0 comments on commit 28fbd56

Please sign in to comment.