-
Notifications
You must be signed in to change notification settings - Fork 3
/
slurm.conf.j2.cloud.example
244 lines (189 loc) · 8.96 KB
/
slurm.conf.j2.cloud.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# **Note:** This file needs to have identical contents on all nodes of
# the cluster. See the `slurm.conf` man page for more information.
#
#
# Unique name for identifying this cluster entries in the DB
ClusterName={{ slurm_cluster_name }}
ControlMachine={{ slurm_master_host }}
## scheduler settings
#
SchedulerType=sched/backfill
SchedulerPort=7321
SelectType=select/cons_res
SelectTypeParameters=CR_Core_Memory
#SelectTypeParameters=CR_LLN,CR_Core_Memory
{% if slurm_config_deploy_lua_submit_plugin %}
JobSubmitPlugins=lua
{% endif %}
# use the "multifactor" plugin with weights set up to be multi-user ready
PriorityType=priority/multifactor
#PriorityWeightAge=2000
#PriorityWeightFairshare=10000
#PriorityWeightJobSize=1000
#PriorityWeightPartition=4000
#PriorityWeightQOS=4000
PriorityWeightAge=100
PriorityWeightFairshare=10000
PriorityWeightJobSize=500
PriorityWeightPartition=10000
PriorityWeightQOS=2000
# # define reboot program
#RebootProgram = "/sbin/shutdown -r now"
# fair share settings
PriorityDecayHalfLife=14-0
PriorityUsageResetPeriod=NONE
PriorityMaxAge=7-0
PriorityFavorSmall=NO
PriorityFlags=FAIR_TREE
## Topology for IB network
#TopologyPlugin=topology/tree
# Configure support for our GPUs
#GresTypes=gpu
## accounting settings
#
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageEnforce=associations,limits,qos
#AccountingStoreJobComment=YES
AccountingStorageHost={{ slurm_dbd_host }}
AccountingStoragePort=6819
#AccountingStorageTRES=gres/gpu
# the "job completion" info is redundant if the accounting
# infrastructure is enabled, so turn it off as it's an endless source
# of authentication and DB connection problems ...
# TODO setup ElasticSearch configuration
# JobCompType=jobcomp/elasticsearch
# JobCompLoc=http://jobs-accounting.cluster.bc2.ch:9200
# No power consumption acct
AcctGatherEnergyType=acct_gather_energy/none
# No IB usage accounting
AcctGatherInfinibandType=acct_gather_infiniband/none
# No filesystem accounting (only works with Lustre)
AcctGatherFilesystemType=acct_gather_filesystem/none
# No job profiling (for now)
AcctGatherProfileType=acct_gather_profile/none
#AcctGatherProfileType=acct_gather_profile/hdf5
#JobAcctGatherType=jobacct_gather/linux
JobacctGatherType=jobacct_gather/cgroup
JobAcctGatherFrequency=60
## job execution settings
#
CheckpointType=checkpoint/none
#CheckpointType=checkpoint/blcr
#JobCheckpointDir=/var/lib/slurm/checkpoint
# requeue jobs on node failure, unless users ask otherwise
JobRequeue=1
# max number of jobs in a job array
MaxArraySize=150000
# max number of jobs pending + running
MaxJobCount=1000000 #1'000'000
MpiDefault=pmi2
# Note: Apparently, the `MpiParams` option is needed also for non-mpi
# jobs in slurm 2.5.3.
#MpiParams=ports=12000-12999
# track resource usage via Linux /proc tree
#ProctrackType=proctrack/linuxproc
ProctrackType=proctrack/cgroup
# do not propagate `ulimit` restrictions found on login nodes
PropagateResourceLimits=NONE
# automatically return nodes to service, unless they have been marked DOWN by admins
ReturnToService=0
#TaskPlugin=task/none
TaskPlugin=task/affinity,task/cgroup # as recommended in https://slurm.schedmd.com/cgroup.conf.html
#Prolog=/etc/slurm/scripts/prolog.sh
#TaskProlog=/etc/slurm/scripts/task_prolog.sh
#Epilog=/etc/slurm/scripts/epilog.sh
#TmpFs=/scratch
# limit virtual mem usage to 101% of real mem usage
#VSizeFactor=101
# misc timeout settings (commented lines show the default)
#
BatchStartTimeout=60
CompleteWait=35
#EpilogMsgTime=2000
#HealthCheckInterval=0
#HealthCheckProgram=
InactiveLimit=0
KillWait=30
#MessageTimeout=10
MessageTimeout=60
#ResvOverRun=0
MinJobAge=300
#OverTimeLimit=0
UnkillableStepTimeout=180
#VSizeFactor=0
Waittime=0
## `slurmctld` settings (controller nodes)
#
##ControlMachine=localhost
#ControlAddr=10.1.1.156
SlurmUser={{ slurm_user[ansible_os_family] }}
{% if ansible_os_family == "RedHat" %}
SlurmctldPidFile=/var/run/slurmctld.pid
{% endif %}
{% if ansible_os_family == "Debian" %}
SlurmctldPidFile=/run/slurmctld.pid
{% endif %}
SlurmctldPort=6817
SlurmctldTimeout=300
StateSaveLocation={{ slurm_slurmctld_spool_path | default('/var/spool/slurmctld') }}
SlurmctldDebug=info
SlurmctldLogFile=/var/log/slurm/slurmctld.log
DebugFlags=backfill,cpu_bind,priority,reservation,selecttype,steps
#MailProg=/usr/bin/smail
## `slurmd` settings (compute nodes)
#
SlurmdPort=6818
{% if ansible_os_family == "RedHat" %}
SlurmdPidFile=/var/run/slurmd.pid
{% endif %}
{% if ansible_os_family == "Debian" %}
SlurmdPidFile=/run/slurmd.pid
{% endif %}
SlurmdSpoolDir={{ slurm_slurmd_spool_path | default('/var/spool/slurm') }}
SlurmdTimeout=180
SlurmdDebug=info
SlurmdLogFile=/var/log/slurm/slurmd.log
AuthType=auth/munge
CryptoType=crypto/munge
DisableRootJobs=NO
{% if slurm_openstack_cloud_reg_addrs %}
# We use cloud_reg_addrs so slurmd reports the node ip to slurmctld on boot
# This is not required when using neutron internal dns
SlurmctldParameters=enable_configless,idle_on_node_suspend,cloud_reg_addrs
{% else %}
# We don't use cloud_reg_addrs option because we use the neutron internal dns
SlurmctldParameters=enable_configless,idle_on_node_suspend
{% endif %}
## Default options for jobs
#
DefMemPerCPU=1800
## cloud settings
#TreeWidth=10000
PrivateData=cloud # Powered down nodes in the cloud are visible
ResumeProgram=/etc/slurm/slurm_resume_openstack.py
ResumeFailProgram=/etc/slurm/slurm_resume_fail.sh
SuspendProgram=/etc/slurm/slurm_suspend_openstack.py
ResumeRate=2 #number of nodes per minute that can be created; 0 means no limit
ResumeTimeout=300 #max time in seconds between ResumeProgram running and when the node is ready for use
SuspendRate=2 #number of nodes per minute that can be suspended/destroyed
SuspendTime=600 #time in seconds before an idle node is suspended
SuspendTimeout=180 #time between running SuspendProgram and the node being completely down
CommunicationParameters=NoAddrCache # By default, Slurm will cache a node's network address after successfully establishing the node's network address. This option disables the cache and Slurm will look up the node's network address each time a connection is made. This is useful, for example, in a cloud environment where the node addresses come and go out of DNS
SuspendExcParts=static # nodes in these partition won't be suspended
## static nodes
NodeName={{ local_slurm_compute_hostname_prefix }}-template CPUs=2 RealMemory=3500 Sockets=2 CoresPerSocket=1 ThreadsPerCore=1
## dynamic nodes
## This config is manually configured for each tenant
## Feature "security_groups" will be converted to a list in the resume script. Each security group name must be splitted using a pipe |
## Don't use flavor "m1.large" in biomedit cloud. The disk size in flavor "m1.large" is too small
NodeName={{ local_slurm_compute_hostname_prefix }}-dynamic-[01-04] CPUs=8 RealMemory=15880 Sockets=8 CoresPerSocket=1 ThreadsPerCore=1 MemSpecLimit=1024 State=CLOUD Features=image={{ local_slurm_compute_image_name }},flavor={{ local_slurm_flavor_8c_16g }},keypair=project_{{ local_tenant_name }},network={{ local_tenant_name }}_network,security_groups={{ local_tenant_name }}_default|{{ local_tenant_name }}_slurm_default
NodeName={{ local_slurm_compute_hostname_prefix }}-dynamic-[05-08] CPUs=16 RealMemory=32000 Sockets=16 CoresPerSocket=1 ThreadsPerCore=1 MemSpecLimit=1024 State=CLOUD Features=image={{ local_slurm_compute_image_name }},flavor={{ local_slurm_flavor_16c_32g }},keypair=project_{{ local_tenant_name }},network={{ local_tenant_name }}_network,security_groups={{ local_tenant_name }}_default|{{ local_tenant_name }}_slurm_default
NodeName={{ local_slurm_compute_hostname_prefix }}-dynamic-[09-12] CPUs=16 RealMemory=64264 Sockets=16 CoresPerSocket=1 ThreadsPerCore=1 MemSpecLimit=1024 State=CLOUD Features=image={{ local_slurm_compute_image_name }},flavor={{ local_slurm_flavor_16c_64g }},keypair=project_{{ local_tenant_name }},network={{ local_tenant_name }}_network,security_groups={{ local_tenant_name }}_default|{{ local_tenant_name }}_slurm_default
NodeName={{ local_slurm_compute_hostname_prefix }}-dynamic-[13-14] CPUs=16 RealMemory=128772 Sockets=16 CoresPerSocket=1 ThreadsPerCore=1 MemSpecLimit=1024 State=CLOUD Features=image={{ local_slurm_compute_image_name }},flavor={{ local_slurm_flavor_16c_128g }},keypair=project_{{ local_tenant_name }},network={{ local_tenant_name }}_network,security_groups={{ local_tenant_name }}_default|{{ local_tenant_name }}_slurm_default
## Cluster partitions
PartitionName=static Nodes={{ local_slurm_compute_hostname_prefix }}-template, Default=NO LLN=NO State=INACTIVE
PartitionName=dynamic-8cores-16g Nodes={{ local_slurm_compute_hostname_prefix }}-dynamic-[01-04], Default=YES LLN=NO State=UP
PartitionName=dynamic-16cores-32g Nodes={{ local_slurm_compute_hostname_prefix }}-dynamic-[05-08], Default=NO LLN=NO State=UP
PartitionName=dynamic-16cores-64g Nodes={{ local_slurm_compute_hostname_prefix }}-dynamic-[09-12], Default=NO LLN=NO State=UP
PartitionName=dynamic-16cores-128g Nodes={{ local_slurm_compute_hostname_prefix }}-dynamic-[13-14], Default=NO LLN=NO State=UP
# vim: syntax=sh