revise the setting of number of thread by using MKL

hclhkbu · Mar 13, 2017 · 9e60e42 · 9e60e42
1 parent b1d0377
commit 9e60e42
Show file tree

Hide file tree

Showing 46 changed files with 1,783 additions and 396 deletions.
diff --git a/batch-bencmarks-cpu-gpu20.sh b/batch-bencmarks-cpu-gpu20.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+# The benchmarks of all toolkits 
+python benchmark.py -config ./configs/bm2cpu1.config -post True
+python benchmark.py -config ./configs/bm2cpu2.config -post True
+python benchmark.py -config ./configs/bm2cpu4.config -post True
+python benchmark.py -config ./configs/bm2cpu8.config -post True
+python benchmark.py -config ./configs/bm2cpu16.config -post True
+python benchmark.py -config ./configs/bm2cpu32.config -post True
diff --git a/batch-bencmarks-gpu-gpu20.sh b/batch-bencmarks-gpu-gpu20.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# The benchmarks of all toolkits 
+python benchmark.py -config ./configs/gpuk80.config -post True
diff --git a/configs/bm2cpu1.config b/configs/bm2cpu1.config
@@ -1,15 +1,15 @@
 flag:		sgbenchmark6 #Flag of current experiment
-tools:		torch			#Tools to benchmark
+tools:		caffe,cntk,mxnet,tensorflow,torch			#Tools to benchmark
 experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <learning rate>
 {
-#		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
+		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
 		cnn;		alexnet;	-1;		1;		1024;		2;		50000;		0.01
 		cnn;		resnet;		-1;		1;		128;		2;		50000;		0.01
-#		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
+		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
 }
 host_file:	None		#Path to host file or None
-cpu_name:	E5-2630v3	#CPU model
-device_name:	E5-2630v3		#GPU model
+cpu_name:	E5-2630v4	#CPU model
+device_name:	E5-2630v4		#GPU model
 cpu_count:	1		#CPU count for cpu parallel
 cuda:		8.0		#CUDA version
 cudnn:		5.1		#CUDNN version

diff --git a/configs/bm2cpu16.config b/configs/bm2cpu16.config
@@ -1,19 +1,15 @@
 flag:		sgbenchmark6 #Flag of current experiment
-tools:		torch			#Tools to benchmark
+tools:		caffe,cntk,mxnet,tensorflow,torch			#Tools to benchmark
 experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <learning rate>
 {
-<<<<<<< HEAD
-#		fc;		fcn5;		-1;		1;		1024;		4;		60000;		0.05
-=======
-#		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
->>>>>>> b259c6d55c4beb261f3e7634d50cbb1acdbd4031
+		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
 		cnn;		alexnet;	-1;		1;		1024;		2;		50000;		0.01
 		cnn;		resnet;		-1;		1;		128;		2;		50000;		0.01
-#		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
+		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
 }
 host_file:	None		#Path to host file or None
-cpu_name:	E5-2630v3	#CPU model
-device_name:	E5-2630v3		#GPU model
+cpu_name:	E5-2630v4	#CPU model
+device_name:	E5-2630v4		#GPU model
 cpu_count:	16		#CPU count for cpu parallel
 cuda:		8.0		#CUDA version
 cudnn:		5.1		#CUDNN version

diff --git a/configs/bm2cpu2.config b/configs/bm2cpu2.config
@@ -1,15 +1,15 @@
 flag:		sgbenchmark6 #Flag of current experiment
-tools:		torch			#Tools to benchmark
+tools:		caffe,cntk,mxnet,tensorflow,torch			#Tools to benchmark
 experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <learning rate>
 {
-#		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
+		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
 		cnn;		alexnet;	-1;		1;		1024;		2;		50000;		0.01
 		cnn;		resnet;		-1;		1;		128;		2;		50000;		0.01
-#		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
+		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
 }
 host_file:	None		#Path to host file or None
-cpu_name:	E5-2630v3	#CPU model
-device_name:	E5-2630v3		#GPU model
+cpu_name:	E5-2630v4	#CPU model
+device_name:	E5-2630v4		#GPU model
 cpu_count:	2		#CPU count for cpu parallel
 cuda:		8.0		#CUDA version
 cudnn:		5.1		#CUDNN version

diff --git a/configs/bm2cpu32.config b/configs/bm2cpu32.config
@@ -1,15 +1,15 @@
 flag:		sgbenchmark6 #Flag of current experiment
-tools:		torch			#Tools to benchmark
+tools:		caffe,cntk,mxnet,tensorflow,torch			#Tools to benchmark
 experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <learning rate>
 {
-#		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
+		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
 		cnn;		alexnet;	-1;		1;		1024;		2;		50000;		0.01
 		cnn;		resnet;		-1;		1;		128;		2;		50000;		0.01
-#		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
+		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
 }
 host_file:	None		#Path to host file or None
-cpu_name:	E5-2630v3	#CPU model
-device_name:	E5-2630v3		#GPU model
+cpu_name:	E5-2630v4	#CPU model
+device_name:	E5-2630v4		#GPU model
 cpu_count:	32		#CPU count for cpu parallel
 cuda:		8.0		#CUDA version
 cudnn:		5.1		#CUDNN version

diff --git a/configs/bm2cpu4.config b/configs/bm2cpu4.config
@@ -1,15 +1,15 @@
 flag:		sgbenchmark6 #Flag of current experiment
-tools:		torch			#Tools to benchmark
+tools:		caffe,cntk,mxnet,tensorflow,torch			#Tools to benchmark
 experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <learning rate>
 {
-#		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
+		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
 		cnn;		alexnet;	-1;		1;		1024;		2;		50000;		0.01
 		cnn;		resnet;		-1;		1;		128;		2;		50000;		0.01
-#		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
+		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
 }
 host_file:	None		#Path to host file or None
-cpu_name:	E5-2630v3	#CPU model
-device_name:	E5-2630v3		#GPU model
+cpu_name:	E5-2630v4	#CPU model
+device_name:	E5-2630v4		#GPU model
 cpu_count:	4		#CPU count for cpu parallel
 cuda:		8.0		#CUDA version
 cudnn:		5.1		#CUDNN version

diff --git a/configs/bm2cpu8.config b/configs/bm2cpu8.config
@@ -1,15 +1,15 @@
 flag:		sgbenchmark6 #Flag of current experiment
-tools:		torch			#Tools to benchmark
+tools:		caffe,cntk,mxnet,tensorflow,torch			#Tools to benchmark
 experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <learning rate>
 {
-#		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
+		fc;		fcn5;		-1;	1;		1024;		4;		60000;		0.05
 		cnn;		alexnet;	-1;		1;		1024;		2;		50000;		0.01
 		cnn;		resnet;		-1;		1;		128;		2;		50000;		0.01
-#		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
+		rnn;		lstm;		-1;		1;		128;		2;		2048;		0.1
 }
 host_file:	None		#Path to host file or None
-cpu_name:	E5-2630v3	#CPU model
-device_name:	E5-2630v3		#GPU model
+cpu_name:	E5-2630v4	#CPU model
+device_name:	E5-2630v4		#GPU model
 cpu_count:	8		#CPU count for cpu parallel
 cuda:		8.0		#CUDA version
 cudnn:		5.1		#CUDNN version

diff --git a/configs/gpuk80.config b/configs/gpuk80.config
@@ -0,0 +1,32 @@
+flag:		sgbenchmark6		#Flag of current experiment
+tools:		torch			#Tools to benchmark
+experiments: #<network type>; <network name>;  <device id>; <gpu count>;  <batch size>;  <number of epochs>;  <epoch size>; <Learning rate>
+{
+		fc;		    fcn5;		0;		1;		4096;		40;		60000;		0.05
+		fc;		    fcn5;		0;		1;		2048;		40;		60000;		0.05
+		fc;		    fcn5;		0;		1;		1024;		40;		60000;		0.05
+		fc;		    fcn5;		0;		1;		512;	   	40;		60000;		0.05
+		fc;		    fcn5;		0;		1;		342;	   	40;		60000;		0.05
+		cnn;		alexnet;	0;		1;		2048;		40;		50000;		0.01
+		cnn;		alexnet;	0;		1;		1024;		40;		50000;		0.01
+		cnn;		alexnet;	0;		1;		512;  	 	40;		50000;		0.01
+		cnn;		alexnet;	0;		1;		256;	   	40;		50000;		0.01
+		cnn;		alexnet;	0;		1;		128;	   	40;		50000;		0.01
+		cnn;		alexnet;	0;		1;		86;   		40;		50000;		0.01
+		cnn;		resnet;		0;		1;		128;		40;		50000;		0.01
+		cnn;		resnet;		0;		1;		64;		    40;		50000;		0.01
+		cnn;		resnet;		0;		1;		32;		    40;		50000;		0.01
+		cnn;		resnet;		0;		1;		16;		    40;		50000;		0.01
+		cnn;		resnet;		0;		1;		11;		    40;		50000;		0.01
+#		rnn;		lstm;		0;		1;		1024;		20;		-1;	    	0.1
+#		rnn;		lstm;		0;		1;		512;   		20;		-1;	    	0.1
+#		rnn;		lstm;		0;		1;		256;   		20;		-1;	    	0.1
+#		rnn;		lstm;		0;		1;		128;   		20;		-1;	    	0.1
+#		rnn;		lstm;		0;		1;		64;   		20;		-1;	    	0.1
+}
+host_file:	None		#Path to host file or None
+cpu_name:	E5-2630v4	#CPU model
+device_name:	K80		#GPU model
+cuda:		8.0		#CUDA version
+cudnn:		5.1		#CUDNN version
+cuda_driver:	367.48		#CUDA driver version
diff --git a/post_record.py b/post_record.py
@@ -53,7 +53,7 @@ def post_record(**args):
     object_id = post_record(flag=p.flag, network=p.network, batch_size=p.batch_size, device_name=p.device_name,
                 gpu_count=p.gpu_count, cpu_count=p.cpu_count, cpu_name=p.cpu_name, epoch_size=p.epoch_size, epoch=p.epoch,
                 total_time=p.total_time, average_time=p.average_time, tool_name=p.tool_name, avg_mem=p.average_mem, 
-                epoch_info=p.epoch_info, log_file=p.log_file, cuda=p.cuda, cudnn=p.cudnn, cuda_driver=p.cuda_driver)
+                epoch_info=p.epoch_info, log_file=p.log_file, cuda=p.cuda, cudnn=p.cudnn, cuda_driver=p.cuda_driver, version=p.experiment_version)
     #object_id = post_record(flag='test', network='network')
     print 'post finished, object_id: ', object_id
 
diff --git a/synthetic/experiments/cntk/cnn/alexnet/alexnet.cntk b/synthetic/experiments/cntk/cnn/alexnet/alexnet.cntk
@@ -1,7 +1,7 @@
 WorkDir=.
 ModelDir=$WorkDir$/Output/$ConfigName$
-#DataDir=/home/comp/csshshi/data/cntk
-DataDir=/home/ipdps/data/cntk/synthetic
+DataDir=/home/comp/csshshi/data/cntk
+#DataDir=/home/ipdps/data/cntk/synthetic
 
 ndlMacros=$WorkDir$/Macros.ndl
 

diff --git a/synthetic/experiments/cntk/cnn/resnet/resnet.cntk b/synthetic/experiments/cntk/cnn/resnet/resnet.cntk
@@ -2,8 +2,8 @@ RootDir = "."
 
 ConfigDir = "$RootDir$"
 #DataDir = "$RootDir$"
-#DataDir=/home/comp/csshshi/data/cntk
-DataDir=/home/ipdps/data/cntk/synthetic
+DataDir=/home/comp/csshshi/data/cntk
+#DataDir=/home/ipdps/data/cntk/synthetic
 OutputDir = "$RootDir$/Output"
 ModelDir = "$OutputDir$/Models"
 

diff --git a/synthetic/experiments/cntk/fc/ffn26752.cntk b/synthetic/experiments/cntk/fc/ffn26752.cntk
@@ -1,8 +1,8 @@
 WorkDir=.
 ModelDir=$WorkDir$/Output/$ConfigName$
 #stderr=$WorkDir$/logs/$ConfigName$/out
-#DataDir=/home/comp/csshshi/data/cntk
-DataDir=/home/ipdps/data/cntk/synthetic
+DataDir=/home/comp/csshshi/data/cntk
+#DataDir=/home/ipdps/data/cntk/synthetic
 precision=float
 
 deviceId=0

diff --git a/synthetic/experiments/tensorflow/cnn/alexnet/alexnetbm.py b/synthetic/experiments/tensorflow/cnn/alexnet/alexnetbm.py
@@ -102,11 +102,11 @@ def loss(logits, labels):
     batch_size = tf.size(labels)
     labels = tf.expand_dims(labels, 1)
     indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
-    concated = tf.concat(1, [indices, labels])
+    concated = tf.concat(axis=1, values=[indices, labels])
     onehot_labels = tf.sparse_to_dense(
-        concated, tf.pack([batch_size, 1000]), 1.0, 0.0)
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
-                                                            onehot_labels,
+        concated, tf.stack([batch_size, 1000]), 1.0, 0.0)
+    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
+                                                            labels=onehot_labels,
                                                             name='xentropy')
     loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
     return loss
@@ -184,7 +184,7 @@ def run_benchmark():
     last_layer = inference(images)
 
     # Build an initialization operation.
-    init = tf.initialize_all_variables()
+    init = tf.global_variables_initializer()
 
     # Start running operations on the Graph.
     sess = tf.Session(config=config)

diff --git a/synthetic/experiments/tensorflow/cnn/alexnet/report.txt b/synthetic/experiments/tensorflow/cnn/alexnet/report.txt
@@ -0,0 +1,57 @@
+--------------------------------------------------------------------------------
+Processing file 'alexnetbm.py'
+ outputting to 'alexnetbm1.py'
+--------------------------------------------------------------------------------
+
+'alexnetbm.py' Line 105
+--------------------------------------------------------------------------------
+
+Added keyword 'concat_dim' to reordered function 'tf.concat'
+Added keyword 'values' to reordered function 'tf.concat'
+
+    Old:     concated = tf.concat(1, [indices, labels])
+
+    New:     concated = tf.concat(axis=1, values=[indices, labels])
+                                  ~~~~~   ~~~~~~~                   
+
+'alexnetbm.py' Line 187
+--------------------------------------------------------------------------------
+
+Renamed function 'tf.initialize_all_variables' to 'tf.global_variables_initializer'
+
+    Old:     init = tf.initialize_all_variables()
+                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~   
+    New:     init = tf.global_variables_initializer()
+                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
+
+'alexnetbm.py' Line 107
+--------------------------------------------------------------------------------
+
+Renamed function 'tf.pack' to 'tf.stack'
+
+    Old:         concated, tf.pack([batch_size, 1000]), 1.0, 0.0)
+                           ~~~~~~~                                
+    New:         concated, tf.stack([batch_size, 1000]), 1.0, 0.0)
+                           ~~~~~~~~                                
+
+'alexnetbm.py' Line 108
+--------------------------------------------------------------------------------
+
+Added keyword 'logits' to reordered function 'tf.nn.softmax_cross_entropy_with_logits'
+
+    Old:     cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
+
+    New:     cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
+                                                                     ~~~~~~~        
+
+'alexnetbm.py' Line 109
+--------------------------------------------------------------------------------
+
+Added keyword 'labels' to reordered function 'tf.nn.softmax_cross_entropy_with_logits'
+
+    Old:                                                             onehot_labels,
+
+    New:                                                             labels=onehot_labels,
+                                                                     ~~~~~~~               
+
+