forked from OleHolmNielsen/Slurm_tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nodesuspend
executable file
·58 lines (47 loc) · 1.71 KB
/
nodesuspend
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env bash
# Slurm nodes suspend/resume script
# Helper scripts required: power_ipmi power_azure power_noaction
# We must define some node features power_xxx in slurm.conf, for example for IPMI and Azure cloud:
# NodeName=node[001-100] Feature=xeon2650v4,opa,xeon24,power_ipmi
# NodeName=cloud[001-100] Feature=xeon8272cl,power_azure
# We can read the node features using:
# sinfo -hN -O "Nodelist: ,Features:" --nodes=$* | uniq
# Define the action
action="-s"
# The environment variables passed from slurmctld do not include USER etc.
export USER=`whoami`
export PATH=/usr/local/bin:$PATH
# Check for empty argument list
if [[ $# < 1 ]]
then
exit 0
fi
# Get the node list including features (in a single call to minimize load on slurmctld)
TMPFILE=`mktemp`
sinfo -hN -O "Nodelist: ,Features:" --nodes=$* > $TMPFILE
# We require the "nodeset" command from the ClusterShell package. Install it by:
# yum install epel-release
# yum install clustershell
# Node suspend/resume DUMMY action used only for testing the power_save module
# Select the nodelist with the "power_noaction" feature
nodelist=`grep power_noaction $TMPFILE | uniq | awk '{print $1}' | nodeset --fold`
if [[ -n "$nodelist" ]]
then
power_noaction $action $nodelist
fi
# Node suspend/resume by IPMI
# Select the nodelist with the "power_ipmi" feature
nodelist=`grep power_ipmi $TMPFILE | uniq | awk '{print $1}' | nodeset --fold`
if [[ -n "$nodelist" ]]
then
power_ipmi $action $nodelist
fi
# Execute Azure VM nodes suspend/deallocate
# Select the nodelist with the "power_azure" feature
nodelist=`grep power_azure $TMPFILE | uniq | awk '{print $1}' | nodeset --fold`
if [[ -n "$nodelist" ]]
then
power_azure $action $nodelist
fi
# Cleanup
rm -f $TMPFILE