forked from OleHolmNielsen/Slurm_tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
power_statistics
executable file
·85 lines (74 loc) · 2.95 KB
/
power_statistics
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env bash
# Get power usage statistics from Slurm nodes using FreeIPMI tools from https://www.gnu.org/software/freeipmi/
# Author: [email protected]
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/
# MODIFY THIS:
# Add these lines (uncommented) to the users' .bashrc file which should export variables like:
# export IPMI_USER=root
# export IPMI_PASSWORD=verysecretpassword
# Define the node BMC DNS name: BMC DNS-name is the node name plus this suffix:
BMC_SUFFIX="b"
# For example: node c190 BMC has DNS name c190b
# Prerequisites:
# * Install this RPM package: yum install freeipmi
# * We require the "nodeset" command from the ClusterShell package. Install it by:
# yum install epel-release
# yum install clustershell
# Check the Slurm nodelist
if [[ $# != 1 ]]
then
echo "ERROR: No Slurm nodelist has been given"
echo "Usage: $0 nodelist"
exit 1
fi
# List of nodenames and BMC DNS names
nodelist=$1
# Append the BMC's DNS name suffix BMC_SUFFIX to the nodes' DNS names
# using the ClusterShell command "nodeset"
bmclong=`nodeset -O "%s${BMC_SUFFIX}" --expand $nodelist`
bmclist=`nodeset --fold $bmclong`
# Source the users' .bashrc file which should export variables like:
# export IPMI_USER=root
# export IPMI_PASSWORD=verysecretpassword
# Note: The environment variables set by slurmctld do NOT include PATH, USER, HOME etc.
source ~/.bashrc
USER=`whoami`
# Prepend the path where FreeIPMI tools live
export PATH=/usr/sbin:$PATH
if [[ -z "$IPMI_USER" || -z "$IPMI_PASSWORD" ]]
then
echo "ERROR: The user IPMI_USER and/or password IPMI_PASSWORD have not been set in ~/.bashrc"
exit 1
fi
#
# Use The FreeIPMI command "ipmi-dcmi" to read node power statistics
#
# Specify the IPMI 2.0 cipher suite ID to use:
# HPE and SuperMicro BMCs only support "-I 3"
cipher="-I 17"
# fallbackcipher="-I 3"
fallbackcipher=""
driver="-D LAN_2_0"
# First try cipher suite 17 with a fallback (for HPE and SuperMicro BMCs)
tempfile=`mktemp`
ipmi-dcmi $driver $cipher --username=$IPMI_USER --password=$IPMI_PASSWORD --hostname=$bmclist --get-system-power-statistics > $tempfile 2>&1
if [ $? -ne 0 ]
then
# Error: Try fallback cipher
ipmi-dcmi $driver $fallbackcipher --username=$IPMI_USER --password=$IPMI_PASSWORD --hostname=$bmclist --get-system-power-statistics > $tempfile 2>&1
fi
tempfile2=`mktemp`
# Print Current Power lines (output is sorted by BMC DNS name)
grep "Current Power" $tempfile > $tempfile2
if [ -s $tempfile2 ]
then
sort $tempfile2
# Calculate total and average power (if more than 1 node)
cat $tempfile2 | awk '{total+=$5}END{if(NR>1) printf("Total: Current Power (%d nodes) : %.0f Watts (Average %.1f Watts)\n", NR, total, total/NR)}'
else
# The ipmi-dcmi command fails on BMCs from Huawei, Xfusion (and others)
# If a BMC's DNS hostname does not resolve correctly, ipmi-dcmi will also report no response
echo "ERROR: No Current Power output was obtained by the ipmi-dcmi command"
echo "BMC list was: $bmclist"
fi
rm -f $tempfile $tempfile2