Doc for adding new tools updated, network configuration checked

hclhkbu · Feb 28, 2017 · 9433de4 · 9433de4
1 parent de4b1dc
commit 9433de4
Show file tree

Hide file tree

Showing 8 changed files with 98 additions and 61 deletions.
diff --git a/network-configs/NetworksConfigsDoc.txt b/network-configs/NetworksConfigsDoc.txt
@@ -6,17 +6,10 @@ Number of Parameters: 13707264 = 13MB
 Experiment configs:
 
 Learning Rate: 0.05
-Evaluate Step: 2 
 -----------------------
     Batch Size | Epoch
 -----------------------
-    32         | 40
-    -------------------
-    64         | 40
-    -------------------
-    128        | 40
-    -------------------
-    256        | 40
+    342        | 40
     -------------------
     512        | 40
     -------------------
@@ -59,7 +52,7 @@ Evaluate Step: 2
     Batch Size | Epoch
 -----------------------
     -------------------
-    32         | 40
+    86         | 40
     -------------------
     64         | 40
     -------------------
@@ -70,6 +63,8 @@ Evaluate Step: 2
     512        | 40
     -------------------
     1024       | 40
+    -------------------
+    2048       | 40
 -----------------------
 For multiple GPUs, the batch size are set as follows:
 ----------------
@@ -120,7 +115,7 @@ Evaluate Step: 2
 -----------------------
     Batch Size | Epoch
 -----------------------
-    8          | 40
+    11          | 40
     -------------------
     16         | 40
     -------------------
@@ -129,8 +124,6 @@ Evaluate Step: 2
     64         | 40
     -------------------
     128        | 40
-    -------------------
-    256        | 40
 -----------------------
 For multiple GPUs, the batch size are set as follows:
 ----------------

diff --git a/synthetic/scripts/batch-bencmarks.sh b/synthetic/scripts/batch-bencmarks.sh
@@ -21,6 +21,7 @@
 #minibatch=64    iterations=8    epochs=4    device_id=0     network_name=resnet     ./cnn-benchmarks.sh
 
 # GPU-1 RetNet 
+sleep 1200
 minibatch=8     iterations=8    epochs=4    device_id=1     network_name=resnet     ./cnn-benchmarks.sh
 minibatch=16    iterations=8    epochs=4    device_id=1     network_name=resnet     ./cnn-benchmarks.sh
 minibatch=32    iterations=8    epochs=4    device_id=1     network_name=resnet     ./cnn-benchmarks.sh

diff --git a/synthetic/scripts/batch-cpus-gpu10.sh b/synthetic/scripts/batch-cpus-gpu10.sh
@@ -1,12 +1,12 @@
 # CPU Version
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=1 ./cnn-benchmarks.sh
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=8 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=1 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=8 ./cnn-benchmarks.sh
 
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=1 ./cnn-benchmarks.sh
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=8 ./cnn-benchmarks.sh
 
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=1  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=8  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=1  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=8  ./fc-benchmarks.sh
 
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=1  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=8  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=1  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=8  ./fc-benchmarks.sh
diff --git a/synthetic/scripts/batch-cpus-gpu11.sh b/synthetic/scripts/batch-cpus-gpu11.sh
@@ -1,12 +1,12 @@
 # CPU Version
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=2 ./cnn-benchmarks.sh
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=16 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=2 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=16 ./cnn-benchmarks.sh
 
 inibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=2 ./cnn-benchmarks.sh
 inibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=16 ./cnn-benchmarks.sh
 
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=2  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752 	OMP_NUM_THREADS=16 ./fc-benchmarks.sh
-
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=2  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752l6 	OMP_NUM_THREADS=16 ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=2  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752 	OMP_NUM_THREADS=16 ./fc-benchmarks.sh
+#
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=2  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752l6 	OMP_NUM_THREADS=16 ./fc-benchmarks.sh
diff --git a/synthetic/scripts/batch-cpus-gpu12.sh b/synthetic/scripts/batch-cpus-gpu12.sh
@@ -1,12 +1,12 @@
 # CPU Version
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=4 ./cnn-benchmarks.sh
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=32 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=4 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=32 ./cnn-benchmarks.sh
 #
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=4 ./cnn-benchmarks.sh
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=32 ./cnn-benchmarks.sh
 
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=4  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752 	OMP_NUM_THREADS=32 ./fc-benchmarks.sh
-
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=4  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752l6 	OMP_NUM_THREADS=32 ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=4  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752 	OMP_NUM_THREADS=32 ./fc-benchmarks.sh
+#
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=4  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752l6 	OMP_NUM_THREADS=32 ./fc-benchmarks.sh
diff --git a/synthetic/scripts/batch-cpus.sh b/synthetic/scripts/batch-cpus.sh
@@ -1,29 +1,29 @@
 # CPU Version
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=1 ./cnn-benchmarks.sh
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=2 ./cnn-benchmarks.sh
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=4 ./cnn-benchmarks.sh
-minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=8 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=1 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=2 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=4 ./cnn-benchmarks.sh
+#minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=8 ./cnn-benchmarks.sh
 #minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=16 ./cnn-benchmarks.sh
 #minibatch=16    iterations=3    epochs=2    device_id=-1     network_name=alexnet       OMP_NUM_THREADS=32 ./cnn-benchmarks.sh
 #
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=1 ./cnn-benchmarks.sh
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=2 ./cnn-benchmarks.sh
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=4 ./cnn-benchmarks.sh
 minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=8 ./cnn-benchmarks.sh
-#minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=16 ./cnn-benchmarks.sh
-#minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=32 ./cnn-benchmarks.sh
+minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=16 ./cnn-benchmarks.sh
+minibatch=16    iterations=2    epochs=3    device_id=-1     network_name=resnet        OMP_NUM_THREADS=32 ./cnn-benchmarks.sh
 
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=1  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=2  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=4  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=8  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=1  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=2  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=4  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752       OMP_NUM_THREADS=8  ./fc-benchmarks.sh
 #minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752 	OMP_NUM_THREADS=16 ./fc-benchmarks.sh
 #minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752 	OMP_NUM_THREADS=32 ./fc-benchmarks.sh
 #
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=1  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=2  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=4  ./fc-benchmarks.sh
-minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=8  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=1  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=2  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=4  ./fc-benchmarks.sh
+#minibatch=64    iterations=8    epochs=4     device_id=-1   network_name=ffn26752l6     OMP_NUM_THREADS=8  ./fc-benchmarks.sh
 #minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752l6 	OMP_NUM_THREADS=16 ./fc-benchmarks.sh
 #minibatch=64    iterations=8    epochs=4    device_id=-1     network_name=ffn26752l6 	OMP_NUM_THREADS=32 ./fc-benchmarks.sh
 #
diff --git a/tools/Readme.md b/tools/Readme.md
@@ -1,12 +1,47 @@
-## How to add new tools: ##
-- 1. Put your tools' scripts directory here and copy the benchmark scrip under common/xxxbm.py (rename it to <your tool name>bm.py) in it.
-- 2. Make sure xxxbm.py will take all those parameters pre-defined, you may ignore some of them as long as it works.
-- 3. xxxbm.py should print out the running result which will be taken by benchmark.py and post to the server, and the format of the result is:
+# How to add new tools #
+## Overview
+-  Put your tools' scripts directory here and copy the benchmark scrip  common/xxxbm.py (rename it to \<your tool name\>bm.py) and test script testbm.sh in it.
+-  Make sure \<your tool name>bm.py will take all those parameters pre-defined, you may ignore some of them as long as your scripts work. \<your tool name>bm.py servers as an entry and will be invoked by ../benchmark.py who only cares input parameters and output results of \<your tool name>bm.py.
+-  All tests in common/testbm.sh should be passed before put new tools into use
+-  Please create a readme file in your tool's directory including details about tool setup, data preparation, dependencies and environment etc.   
+## \<Your tool name>bm.py explained
+### Input options
+| Input Argument |                                               Details                                              |                   Default value                  |
+|:--------------:|:--------------------------------------------------------------------------------------------------:|:------------------------------------------------:|
+|      -log      | Name of log file                                                                                   | You may set your own default value for debugging |
+|   -batchSize   | Batch size for each GPU. If you are using n GPUs for a test, n*batchSize will be fed to the model. | Depend on memory size                            |
+|    -network    | Name of network, values can be [fcn5 / alexnet / resnet / lstm]                                    | fcn5                                             |
+|     -devId     | Training device: -1 for CPU; 0 for GPU 0; 0,2 for GPU0 and GPU2.                                   | None but training device must be specified              |
+|   -numEpochs   | Number of epochs                                                                                   | 10                                               |
+|   -epochSize   | Number of training samples for a epoch (not all tools need this parameter)                          | 50000                                            |
+|    -hostFile   | Path to the host file if you are running on multiple machines                                      | None*                                            |
+|    -cpuCount   | If devId=-1, you need to specify how may CPUs will be used for training                            | 1                                                |
+|    -gpuCount   | If devId != -1, you need to specify how many GPUs will be used                                     | None                                             |
+|       -lr      | Learning rate                                                                                      | None                                             |
+|    -netType    | Network type, values can be [fc / cnn / rnn]                                                       | None                                             |
+|     -debug     | Boolean value, true for debugging this script.                                                     | False                                            |
+###Output
+-  \<your tool name>bm.py should print out the running result which will be taken by benchmark.py and post to the server, and the format of the result is:
 ```
 -t $totalTimeInSecond -a $averageMiniBatchTimeInSecond -I $lossValueOfEachEpoch
 ```
-Example of $lossValueOfEachEpoch (There are 4 epochs' item, and splitted by comma, and the 3 values in each item represent current epoch number, test accuracy and cross entropy value respectively.):
-```
+Example of *$lossValueOfEachEpoch* (There are 4 epoch items, and splitted by `,`, and 3 values in each item splitted by `:` represents epoch number, accuracy and cross entropy respectively.):
+`
 0:-:2.32620807,1:-:2.49505453,2:-:2.30122152,3:-:2.30028142
-```
-- 4. All tests in common/testbm.sh should be passed before put it into use
+`
+###Benchmark procedure
+#### 1. Build cmd.
+- In order to make this framework be competible with different types of deep learinig tools written in different languages, <tool>bm.py is only an interface that standardize the input and output. You need to use arguments above to determine the variable cmd, and it will be executed in a subshell by calling `os.system(cmd)` during which a log file must be genrated containing necessary information for post processing. Some tools will generate a log file automatically, if not redirect all stdout and stderr to the log file. The name of log file ends with ".log". Here are some examples of cmd:   
+Caffe: `caffe train -solver=fcn5-b1024-GPU-solver1.prototxt -gpu=0 >& /root/dlbench/tools/caffe/fc/debug.log`
+Mxnet: `cd fc; python train_mnist.py --lr 0.05 --batch-size 4096 --num-epochs 40 --num-examples 60000 --gpus 1 --kv-store device >& mxnet_debug.log`
+Torch: `THC_CACHING_ALLOCATOR=1 CUDA_VISIBLE_DEVICES=1 THC_CACHING_ALLOCATOR=1 th Main.lua -LR 0.05 -dataset MNIST -network ffn5 -batchSize 342 -epoch 40 -logfile torch_debug.log -save torch_debug.log`
+#### 2. Execute cmd
+- Normally you don't need to change anything in this part. Cmd will be executed and total running time will be measured as well.
+#### 3. Post processing
+- There're three pieces of information need to be printed out:   
+**Total runing time** `t`: xxxbm.py handles it already   
+**Average batch time** `avgBatch`: average training time for one batch of date. Note that if there are more than one GPU, the batch size is n\*args.batchSize since the input argument args.batchSize is for each GPU core. If new tool doesn't measure the batch batch time directly, you need to convert other metrics into seconds/batch here.   
+**Batch info** `info`: grep test accuracy\* and cross entropy for each epoch from the logfile and form them as \<epoch number>:\<test accuracy>:\<cross entropy>,\<epoch number+1>:\<test accuracy>:\<cross entropy>,...   
+Then just print out the result: `print "-t " + str(t) + " -a " + str(avgBatch) + " -I " + info`   
+and save the original log file under \<project root path>/logs/    
+*There's no need to test the model after each epoch for now, but we still leave the space for future use.
diff --git a/tools/common/xxxbm.py b/tools/common/xxxbm.py
@@ -25,6 +25,14 @@
 
 # Build cmd for benchmark
 cmd = ''
+batchSize = 0
+if args.devId is -1: #run on CPU
+	batchSize = args.batchSize
+	pass
+else:
+	batchSize = args.batchSize*args.gpuCount
+	pass
+
 
 
 # Execute cmd 
@@ -35,16 +43,16 @@
 if args.debug is True: print("Time diff: " + str(t))
 
 
-#Save log file
+#Post processing
 logPath = '' 
-with open(logPath, "a") as logFile:
-    logFile.write("Total time: " + str(t) + "\n")
-    logFile.write("cmd: " + cmd + "\n")
-os.system("cp " + logPath + " ../../logs")
-
 
-# Parse log file and extract benchmark info
 avgBatch = (avgEpoch/int(numSamples))*float(batchSize)
 if args.debug is True: print("Avg Batch: " + str(avgBatch))
 info = ''
 print "-t " + str(t) + " -a " + str(avgBatch) + " -I " + info
+
+#Save log file
+with open(logPath, "a") as logFile:
+    logFile.write("Total time: " + str(t) + "\n")
+    logFile.write("cmd: " + cmd + "\n")
+os.system("cp " + logPath + " ../../logs")