From 9e60e42a860f5baba6407bd68c197ce334ea68bb Mon Sep 17 00:00:00 2001 From: shyhuai Date: Tue, 14 Mar 2017 01:10:25 +0800 Subject: [PATCH] revise the setting of number of thread by using MKL --- batch-bencmarks-cpu-gpu20.sh | 8 + batch-bencmarks-gpu-gpu20.sh | 3 + configs/bm2cpu1.config | 10 +- configs/bm2cpu16.config | 14 +- configs/bm2cpu2.config | 10 +- configs/bm2cpu32.config | 10 +- configs/bm2cpu4.config | 10 +- configs/bm2cpu8.config | 10 +- configs/gpuk80.config | 32 + post_record.py | 2 +- .../experiments/cntk/cnn/alexnet/alexnet.cntk | 4 +- .../experiments/cntk/cnn/resnet/resnet.cntk | 4 +- synthetic/experiments/cntk/fc/ffn26752.cntk | 4 +- .../tensorflow/cnn/alexnet/alexnetbm.py | 10 +- .../tensorflow/cnn/alexnet/report.txt | 57 ++ .../tensorflow/cnn/resnet/report.txt | 106 +++ .../tensorflow/cnn/resnet/resnet.py | 26 +- .../tensorflow/cnn/resnet/resnet_train.py | 20 +- .../experiments/tensorflow/fc/ffn26752bm1.py | 86 +++ .../experiments/tensorflow/fc/report.txt | 6 + .../experiments/tensorflow/fc/tf_upgrade.py | 681 ++++++++++++++++++ .../scripts/batch-bencmarks-gpu-gpu15.sh | 22 +- .../scripts/batch-bencmarks-gpu-gpu20.sh | 30 + synthetic/scripts/cnn-benchmarks.sh | 5 +- synthetic/scripts/fc-benchmarks.sh | 6 +- tools/caffe/caffebm.py | 1 + tools/cntk/cnn/alexnet/alexnet_cifar10.cntk | 2 +- tools/cntk/cnn/resnet/resnet.cntk | 2 +- tools/cntk/cntkbm.py | 1 + tools/cntk/fc/fcn5.cntk | 2 +- tools/cntk/multinodes/fc/Macros.ndl | 35 - tools/cntk/multinodes/fc/fc.sh | 6 - tools/cntk/multinodes/fc/fcn5.cntk | 78 -- tools/cntk/multinodes/fc/fcn8.cntk | 89 --- tools/cntk/multinodes/fc/ffn.cntk | 87 --- tools/cntk/rnn/lstm/lstm.cntk | 2 +- tools/mxnet/mxnetbm.py | 1 + .../tensorflow/cnn/alexnet/alexnet_cifar10.py | 12 +- .../cnn/alexnet/alexnet_cifar10_multi_gpu1.py | 328 +++++++++ tools/tensorflow/cnn/alexnet/report.txt | 78 ++ tools/tensorflow/fc/fcn5_mnist.py | 4 +- tools/tensorflow/fc/fcn5_mnist_multi_gpu1.py | 228 ++++++ tools/tensorflow/fc/models.py | 2 +- tools/tensorflow/fc/report.txt | 37 + tools/tensorflow/tensorflowbm.py | 1 + tools/torch/torchbm.py | 7 +- 46 files changed, 1783 insertions(+), 396 deletions(-) create mode 100755 batch-bencmarks-cpu-gpu20.sh create mode 100755 batch-bencmarks-gpu-gpu20.sh create mode 100644 configs/gpuk80.config create mode 100644 synthetic/experiments/tensorflow/cnn/alexnet/report.txt create mode 100644 synthetic/experiments/tensorflow/cnn/resnet/report.txt create mode 100644 synthetic/experiments/tensorflow/fc/ffn26752bm1.py create mode 100644 synthetic/experiments/tensorflow/fc/report.txt create mode 100644 synthetic/experiments/tensorflow/fc/tf_upgrade.py create mode 100755 synthetic/scripts/batch-bencmarks-gpu-gpu20.sh delete mode 100644 tools/cntk/multinodes/fc/Macros.ndl delete mode 100644 tools/cntk/multinodes/fc/fc.sh delete mode 100644 tools/cntk/multinodes/fc/fcn5.cntk delete mode 100644 tools/cntk/multinodes/fc/fcn8.cntk delete mode 100644 tools/cntk/multinodes/fc/ffn.cntk create mode 100644 tools/tensorflow/cnn/alexnet/alexnet_cifar10_multi_gpu1.py create mode 100644 tools/tensorflow/cnn/alexnet/report.txt create mode 100644 tools/tensorflow/fc/fcn5_mnist_multi_gpu1.py create mode 100644 tools/tensorflow/fc/report.txt diff --git a/batch-bencmarks-cpu-gpu20.sh b/batch-bencmarks-cpu-gpu20.sh new file mode 100755 index 0000000..b98fe4a --- /dev/null +++ b/batch-bencmarks-cpu-gpu20.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# The benchmarks of all toolkits +python benchmark.py -config ./configs/bm2cpu1.config -post True +python benchmark.py -config ./configs/bm2cpu2.config -post True +python benchmark.py -config ./configs/bm2cpu4.config -post True +python benchmark.py -config ./configs/bm2cpu8.config -post True +python benchmark.py -config ./configs/bm2cpu16.config -post True +python benchmark.py -config ./configs/bm2cpu32.config -post True diff --git a/batch-bencmarks-gpu-gpu20.sh b/batch-bencmarks-gpu-gpu20.sh new file mode 100755 index 0000000..39d42a5 --- /dev/null +++ b/batch-bencmarks-gpu-gpu20.sh @@ -0,0 +1,3 @@ +#!/bin/bash +# The benchmarks of all toolkits +python benchmark.py -config ./configs/gpuk80.config -post True diff --git a/configs/bm2cpu1.config b/configs/bm2cpu1.config index bb8203b..11d87e8 100644 --- a/configs/bm2cpu1.config +++ b/configs/bm2cpu1.config @@ -1,15 +1,15 @@ flag: sgbenchmark6 #Flag of current experiment -tools: torch #Tools to benchmark +tools: caffe,cntk,mxnet,tensorflow,torch #Tools to benchmark experiments: #; ; ; ; ; ; ; { -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 + fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 cnn; alexnet; -1; 1; 1024; 2; 50000; 0.01 cnn; resnet; -1; 1; 128; 2; 50000; 0.01 -# rnn; lstm; -1; 1; 128; 2; 2048; 0.1 + rnn; lstm; -1; 1; 128; 2; 2048; 0.1 } host_file: None #Path to host file or None -cpu_name: E5-2630v3 #CPU model -device_name: E5-2630v3 #GPU model +cpu_name: E5-2630v4 #CPU model +device_name: E5-2630v4 #GPU model cpu_count: 1 #CPU count for cpu parallel cuda: 8.0 #CUDA version cudnn: 5.1 #CUDNN version diff --git a/configs/bm2cpu16.config b/configs/bm2cpu16.config index 4529bb5..1f4651c 100644 --- a/configs/bm2cpu16.config +++ b/configs/bm2cpu16.config @@ -1,19 +1,15 @@ flag: sgbenchmark6 #Flag of current experiment -tools: torch #Tools to benchmark +tools: caffe,cntk,mxnet,tensorflow,torch #Tools to benchmark experiments: #; ; ; ; ; ; ; { -<<<<<<< HEAD -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 -======= -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 ->>>>>>> b259c6d55c4beb261f3e7634d50cbb1acdbd4031 + fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 cnn; alexnet; -1; 1; 1024; 2; 50000; 0.01 cnn; resnet; -1; 1; 128; 2; 50000; 0.01 -# rnn; lstm; -1; 1; 128; 2; 2048; 0.1 + rnn; lstm; -1; 1; 128; 2; 2048; 0.1 } host_file: None #Path to host file or None -cpu_name: E5-2630v3 #CPU model -device_name: E5-2630v3 #GPU model +cpu_name: E5-2630v4 #CPU model +device_name: E5-2630v4 #GPU model cpu_count: 16 #CPU count for cpu parallel cuda: 8.0 #CUDA version cudnn: 5.1 #CUDNN version diff --git a/configs/bm2cpu2.config b/configs/bm2cpu2.config index 7017392..7032f64 100644 --- a/configs/bm2cpu2.config +++ b/configs/bm2cpu2.config @@ -1,15 +1,15 @@ flag: sgbenchmark6 #Flag of current experiment -tools: torch #Tools to benchmark +tools: caffe,cntk,mxnet,tensorflow,torch #Tools to benchmark experiments: #; ; ; ; ; ; ; { -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 + fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 cnn; alexnet; -1; 1; 1024; 2; 50000; 0.01 cnn; resnet; -1; 1; 128; 2; 50000; 0.01 -# rnn; lstm; -1; 1; 128; 2; 2048; 0.1 + rnn; lstm; -1; 1; 128; 2; 2048; 0.1 } host_file: None #Path to host file or None -cpu_name: E5-2630v3 #CPU model -device_name: E5-2630v3 #GPU model +cpu_name: E5-2630v4 #CPU model +device_name: E5-2630v4 #GPU model cpu_count: 2 #CPU count for cpu parallel cuda: 8.0 #CUDA version cudnn: 5.1 #CUDNN version diff --git a/configs/bm2cpu32.config b/configs/bm2cpu32.config index 5cafeea..9e88bc0 100644 --- a/configs/bm2cpu32.config +++ b/configs/bm2cpu32.config @@ -1,15 +1,15 @@ flag: sgbenchmark6 #Flag of current experiment -tools: torch #Tools to benchmark +tools: caffe,cntk,mxnet,tensorflow,torch #Tools to benchmark experiments: #; ; ; ; ; ; ; { -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 + fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 cnn; alexnet; -1; 1; 1024; 2; 50000; 0.01 cnn; resnet; -1; 1; 128; 2; 50000; 0.01 -# rnn; lstm; -1; 1; 128; 2; 2048; 0.1 + rnn; lstm; -1; 1; 128; 2; 2048; 0.1 } host_file: None #Path to host file or None -cpu_name: E5-2630v3 #CPU model -device_name: E5-2630v3 #GPU model +cpu_name: E5-2630v4 #CPU model +device_name: E5-2630v4 #GPU model cpu_count: 32 #CPU count for cpu parallel cuda: 8.0 #CUDA version cudnn: 5.1 #CUDNN version diff --git a/configs/bm2cpu4.config b/configs/bm2cpu4.config index 4b40114..3bd2b9d 100644 --- a/configs/bm2cpu4.config +++ b/configs/bm2cpu4.config @@ -1,15 +1,15 @@ flag: sgbenchmark6 #Flag of current experiment -tools: torch #Tools to benchmark +tools: caffe,cntk,mxnet,tensorflow,torch #Tools to benchmark experiments: #; ; ; ; ; ; ; { -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 + fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 cnn; alexnet; -1; 1; 1024; 2; 50000; 0.01 cnn; resnet; -1; 1; 128; 2; 50000; 0.01 -# rnn; lstm; -1; 1; 128; 2; 2048; 0.1 + rnn; lstm; -1; 1; 128; 2; 2048; 0.1 } host_file: None #Path to host file or None -cpu_name: E5-2630v3 #CPU model -device_name: E5-2630v3 #GPU model +cpu_name: E5-2630v4 #CPU model +device_name: E5-2630v4 #GPU model cpu_count: 4 #CPU count for cpu parallel cuda: 8.0 #CUDA version cudnn: 5.1 #CUDNN version diff --git a/configs/bm2cpu8.config b/configs/bm2cpu8.config index ee32a32..e27a521 100644 --- a/configs/bm2cpu8.config +++ b/configs/bm2cpu8.config @@ -1,15 +1,15 @@ flag: sgbenchmark6 #Flag of current experiment -tools: torch #Tools to benchmark +tools: caffe,cntk,mxnet,tensorflow,torch #Tools to benchmark experiments: #; ; ; ; ; ; ; { -# fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 + fc; fcn5; -1; 1; 1024; 4; 60000; 0.05 cnn; alexnet; -1; 1; 1024; 2; 50000; 0.01 cnn; resnet; -1; 1; 128; 2; 50000; 0.01 -# rnn; lstm; -1; 1; 128; 2; 2048; 0.1 + rnn; lstm; -1; 1; 128; 2; 2048; 0.1 } host_file: None #Path to host file or None -cpu_name: E5-2630v3 #CPU model -device_name: E5-2630v3 #GPU model +cpu_name: E5-2630v4 #CPU model +device_name: E5-2630v4 #GPU model cpu_count: 8 #CPU count for cpu parallel cuda: 8.0 #CUDA version cudnn: 5.1 #CUDNN version diff --git a/configs/gpuk80.config b/configs/gpuk80.config new file mode 100644 index 0000000..2e55512 --- /dev/null +++ b/configs/gpuk80.config @@ -0,0 +1,32 @@ +flag: sgbenchmark6 #Flag of current experiment +tools: torch #Tools to benchmark +experiments: #; ; ; ; ; ; ; +{ + fc; fcn5; 0; 1; 4096; 40; 60000; 0.05 + fc; fcn5; 0; 1; 2048; 40; 60000; 0.05 + fc; fcn5; 0; 1; 1024; 40; 60000; 0.05 + fc; fcn5; 0; 1; 512; 40; 60000; 0.05 + fc; fcn5; 0; 1; 342; 40; 60000; 0.05 + cnn; alexnet; 0; 1; 2048; 40; 50000; 0.01 + cnn; alexnet; 0; 1; 1024; 40; 50000; 0.01 + cnn; alexnet; 0; 1; 512; 40; 50000; 0.01 + cnn; alexnet; 0; 1; 256; 40; 50000; 0.01 + cnn; alexnet; 0; 1; 128; 40; 50000; 0.01 + cnn; alexnet; 0; 1; 86; 40; 50000; 0.01 + cnn; resnet; 0; 1; 128; 40; 50000; 0.01 + cnn; resnet; 0; 1; 64; 40; 50000; 0.01 + cnn; resnet; 0; 1; 32; 40; 50000; 0.01 + cnn; resnet; 0; 1; 16; 40; 50000; 0.01 + cnn; resnet; 0; 1; 11; 40; 50000; 0.01 +# rnn; lstm; 0; 1; 1024; 20; -1; 0.1 +# rnn; lstm; 0; 1; 512; 20; -1; 0.1 +# rnn; lstm; 0; 1; 256; 20; -1; 0.1 +# rnn; lstm; 0; 1; 128; 20; -1; 0.1 +# rnn; lstm; 0; 1; 64; 20; -1; 0.1 +} +host_file: None #Path to host file or None +cpu_name: E5-2630v4 #CPU model +device_name: K80 #GPU model +cuda: 8.0 #CUDA version +cudnn: 5.1 #CUDNN version +cuda_driver: 367.48 #CUDA driver version diff --git a/post_record.py b/post_record.py index 7ed5efd..a37f4cb 100755 --- a/post_record.py +++ b/post_record.py @@ -53,7 +53,7 @@ def post_record(**args): object_id = post_record(flag=p.flag, network=p.network, batch_size=p.batch_size, device_name=p.device_name, gpu_count=p.gpu_count, cpu_count=p.cpu_count, cpu_name=p.cpu_name, epoch_size=p.epoch_size, epoch=p.epoch, total_time=p.total_time, average_time=p.average_time, tool_name=p.tool_name, avg_mem=p.average_mem, - epoch_info=p.epoch_info, log_file=p.log_file, cuda=p.cuda, cudnn=p.cudnn, cuda_driver=p.cuda_driver) + epoch_info=p.epoch_info, log_file=p.log_file, cuda=p.cuda, cudnn=p.cudnn, cuda_driver=p.cuda_driver, version=p.experiment_version) #object_id = post_record(flag='test', network='network') print 'post finished, object_id: ', object_id diff --git a/synthetic/experiments/cntk/cnn/alexnet/alexnet.cntk b/synthetic/experiments/cntk/cnn/alexnet/alexnet.cntk index 08b2f67..7da5905 100644 --- a/synthetic/experiments/cntk/cnn/alexnet/alexnet.cntk +++ b/synthetic/experiments/cntk/cnn/alexnet/alexnet.cntk @@ -1,7 +1,7 @@ WorkDir=. ModelDir=$WorkDir$/Output/$ConfigName$ -#DataDir=/home/comp/csshshi/data/cntk -DataDir=/home/ipdps/data/cntk/synthetic +DataDir=/home/comp/csshshi/data/cntk +#DataDir=/home/ipdps/data/cntk/synthetic ndlMacros=$WorkDir$/Macros.ndl diff --git a/synthetic/experiments/cntk/cnn/resnet/resnet.cntk b/synthetic/experiments/cntk/cnn/resnet/resnet.cntk index e7aaec2..00504a2 100644 --- a/synthetic/experiments/cntk/cnn/resnet/resnet.cntk +++ b/synthetic/experiments/cntk/cnn/resnet/resnet.cntk @@ -2,8 +2,8 @@ RootDir = "." ConfigDir = "$RootDir$" #DataDir = "$RootDir$" -#DataDir=/home/comp/csshshi/data/cntk -DataDir=/home/ipdps/data/cntk/synthetic +DataDir=/home/comp/csshshi/data/cntk +#DataDir=/home/ipdps/data/cntk/synthetic OutputDir = "$RootDir$/Output" ModelDir = "$OutputDir$/Models" diff --git a/synthetic/experiments/cntk/fc/ffn26752.cntk b/synthetic/experiments/cntk/fc/ffn26752.cntk index 3d1522f..efc165c 100644 --- a/synthetic/experiments/cntk/fc/ffn26752.cntk +++ b/synthetic/experiments/cntk/fc/ffn26752.cntk @@ -1,8 +1,8 @@ WorkDir=. ModelDir=$WorkDir$/Output/$ConfigName$ #stderr=$WorkDir$/logs/$ConfigName$/out -#DataDir=/home/comp/csshshi/data/cntk -DataDir=/home/ipdps/data/cntk/synthetic +DataDir=/home/comp/csshshi/data/cntk +#DataDir=/home/ipdps/data/cntk/synthetic precision=float deviceId=0 diff --git a/synthetic/experiments/tensorflow/cnn/alexnet/alexnetbm.py b/synthetic/experiments/tensorflow/cnn/alexnet/alexnetbm.py index 9c5a6a9..d1a7dec 100644 --- a/synthetic/experiments/tensorflow/cnn/alexnet/alexnetbm.py +++ b/synthetic/experiments/tensorflow/cnn/alexnet/alexnetbm.py @@ -102,11 +102,11 @@ def loss(logits, labels): batch_size = tf.size(labels) labels = tf.expand_dims(labels, 1) indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) - concated = tf.concat(1, [indices, labels]) + concated = tf.concat(axis=1, values=[indices, labels]) onehot_labels = tf.sparse_to_dense( - concated, tf.pack([batch_size, 1000]), 1.0, 0.0) - cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, - onehot_labels, + concated, tf.stack([batch_size, 1000]), 1.0, 0.0) + cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, + labels=onehot_labels, name='xentropy') loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') return loss @@ -184,7 +184,7 @@ def run_benchmark(): last_layer = inference(images) # Build an initialization operation. - init = tf.initialize_all_variables() + init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=config) diff --git a/synthetic/experiments/tensorflow/cnn/alexnet/report.txt b/synthetic/experiments/tensorflow/cnn/alexnet/report.txt new file mode 100644 index 0000000..60eb09c --- /dev/null +++ b/synthetic/experiments/tensorflow/cnn/alexnet/report.txt @@ -0,0 +1,57 @@ +-------------------------------------------------------------------------------- +Processing file 'alexnetbm.py' + outputting to 'alexnetbm1.py' +-------------------------------------------------------------------------------- + +'alexnetbm.py' Line 105 +-------------------------------------------------------------------------------- + +Added keyword 'concat_dim' to reordered function 'tf.concat' +Added keyword 'values' to reordered function 'tf.concat' + + Old: concated = tf.concat(1, [indices, labels]) + + New: concated = tf.concat(axis=1, values=[indices, labels]) + ~~~~~ ~~~~~~~ + +'alexnetbm.py' Line 187 +-------------------------------------------------------------------------------- + +Renamed function 'tf.initialize_all_variables' to 'tf.global_variables_initializer' + + Old: init = tf.initialize_all_variables() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + New: init = tf.global_variables_initializer() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +'alexnetbm.py' Line 107 +-------------------------------------------------------------------------------- + +Renamed function 'tf.pack' to 'tf.stack' + + Old: concated, tf.pack([batch_size, 1000]), 1.0, 0.0) + ~~~~~~~ + New: concated, tf.stack([batch_size, 1000]), 1.0, 0.0) + ~~~~~~~~ + +'alexnetbm.py' Line 108 +-------------------------------------------------------------------------------- + +Added keyword 'logits' to reordered function 'tf.nn.softmax_cross_entropy_with_logits' + + Old: cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, + + New: cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, + ~~~~~~~ + +'alexnetbm.py' Line 109 +-------------------------------------------------------------------------------- + +Added keyword 'labels' to reordered function 'tf.nn.softmax_cross_entropy_with_logits' + + Old: onehot_labels, + + New: labels=onehot_labels, + ~~~~~~~ + + diff --git a/synthetic/experiments/tensorflow/cnn/resnet/report.txt b/synthetic/experiments/tensorflow/cnn/resnet/report.txt new file mode 100644 index 0000000..6b1a110 --- /dev/null +++ b/synthetic/experiments/tensorflow/cnn/resnet/report.txt @@ -0,0 +1,106 @@ +-------------------------------------------------------------------------------- +Processing file 'resnet_train.py' + outputting to 'resnet_train1.py' +-------------------------------------------------------------------------------- + +'resnet_train.py' Line 69 +-------------------------------------------------------------------------------- + +Renamed function 'tf.all_variables' to 'tf.global_variables' + + Old: saver = tf.train.Saver(tf.all_variables()) + ~~~~~~~~~~~~~~~~ + New: saver = tf.train.Saver(tf.global_variables()) + ~~~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 71 +-------------------------------------------------------------------------------- + +Renamed function 'tf.merge_all_summaries' to 'tf.summary.merge_all' + + Old: summary_op = tf.merge_all_summaries() + ~~~~~~~~~~~~~~~~~~~~~~ + New: summary_op = tf.summary.merge_all() + ~~~~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 41 +-------------------------------------------------------------------------------- + +Renamed function 'tf.scalar_summary' to 'tf.summary.scalar' + + Old: tf.scalar_summary('loss_avg', ema.average(loss_)) + ~~~~~~~~~~~~~~~~~ + New: tf.summary.scalar('loss_avg', ema.average(loss_)) + ~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 47 +-------------------------------------------------------------------------------- + +Renamed function 'tf.scalar_summary' to 'tf.summary.scalar' + + Old: tf.scalar_summary('val_top1_error_avg', top1_error_avg) + ~~~~~~~~~~~~~~~~~ + New: tf.summary.scalar('val_top1_error_avg', top1_error_avg) + ~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 49 +-------------------------------------------------------------------------------- + +Renamed function 'tf.scalar_summary' to 'tf.summary.scalar' + + Old: tf.scalar_summary('learning_rate', FLAGS.learning_rate) + ~~~~~~~~~~~~~~~~~ + New: tf.summary.scalar('learning_rate', FLAGS.learning_rate) + ~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 73 +-------------------------------------------------------------------------------- + +Renamed function 'tf.initialize_all_variables' to 'tf.global_variables_initializer' + + Old: init = tf.initialize_all_variables() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + New: init = tf.global_variables_initializer() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 55 +-------------------------------------------------------------------------------- + +Renamed function 'tf.histogram_summary' to 'tf.summary.histogram' + + Old: tf.histogram_summary(var.op.name + '/gradients', grad) + ~~~~~~~~~~~~~~~~~~~~ + New: tf.summary.histogram(var.op.name + '/gradients', grad) + ~~~~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 79 +-------------------------------------------------------------------------------- + +Renamed function 'tf.train.SummaryWriter' to 'tf.summary.FileWriter' + + Old: summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) + ~~~~~~~~~~~~~~~~~~~~~~ + New: summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) + ~~~~~~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 60 +-------------------------------------------------------------------------------- + +Renamed function 'tf.image_summary' to 'tf.summary.image' + + Old: tf.image_summary('images', images) + ~~~~~~~~~~~~~~~~ + New: tf.summary.image('images', images) + ~~~~~~~~~~~~~~~~ + +'resnet_train.py' Line 63 +-------------------------------------------------------------------------------- + +Renamed function 'tf.histogram_summary' to 'tf.summary.histogram' + + Old: tf.histogram_summary(var.op.name, var) + ~~~~~~~~~~~~~~~~~~~~ + New: tf.summary.histogram(var.op.name, var) + ~~~~~~~~~~~~~~~~~~~~ + + diff --git a/synthetic/experiments/tensorflow/cnn/resnet/resnet.py b/synthetic/experiments/tensorflow/cnn/resnet/resnet.py index ba51d7d..63d6fcb 100644 --- a/synthetic/experiments/tensorflow/cnn/resnet/resnet.py +++ b/synthetic/experiments/tensorflow/cnn/resnet/resnet.py @@ -77,7 +77,7 @@ def inference(x, is_training, x = stack(x, c) # post-net - x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") + x = tf.reduce_mean(x, axis=[1, 2], name="avg_pool") if num_classes != None: with tf.variable_scope('fc'): @@ -127,7 +127,7 @@ def inference_small_config(x, c): x = stack(x, c) # post-net - x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") + x = tf.reduce_mean(x, axis=[1, 2], name="avg_pool") if c['num_classes'] != None: with tf.variable_scope('fc'): @@ -138,20 +138,20 @@ def inference_small_config(x, c): def _imagenet_preprocess(rgb): """Changes RGB [0,1] valued image to BGR [0,255] with mean subtracted.""" - red, green, blue = tf.split(3, 3, rgb * 255.0) - bgr = tf.concat(3, [blue, green, red]) + red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb * 255.0) + bgr = tf.concat(axis=3, values=[blue, green, red]) bgr -= IMAGENET_MEAN_BGR return bgr def loss(logits, labels): - cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) cross_entropy_mean = tf.reduce_mean(cross_entropy) regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss_ = tf.add_n([cross_entropy_mean] + regularization_losses) - tf.scalar_summary('loss', loss_) + tf.summary.scalar('loss', loss_) return loss_ @@ -231,7 +231,7 @@ def bn(x, c): if c['use_bias']: bias = _get_variable('bias', params_shape, - initializer=tf.zeros_initializer) + initializer=tf.zeros_initializer()) return x + bias @@ -239,18 +239,18 @@ def bn(x, c): beta = _get_variable('beta', params_shape, - initializer=tf.zeros_initializer) + initializer=tf.zeros_initializer()) gamma = _get_variable('gamma', params_shape, - initializer=tf.ones_initializer) + initializer=tf.ones_initializer()) moving_mean = _get_variable('moving_mean', params_shape, - initializer=tf.zeros_initializer, + initializer=tf.zeros_initializer(), trainable=False) moving_variance = _get_variable('moving_variance', params_shape, - initializer=tf.ones_initializer, + initializer=tf.ones_initializer(), trainable=False) # These ops will only be preformed when training. @@ -284,7 +284,7 @@ def fc(x, c): weight_decay=FC_WEIGHT_STDDEV) biases = _get_variable('biases', shape=[num_units_out], - initializer=tf.zeros_initializer) + initializer=tf.zeros_initializer()) x = tf.nn.xw_plus_b(x, weights, biases) return x @@ -301,7 +301,7 @@ def _get_variable(name, regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: regularizer = None - collections = [tf.GraphKeys.VARIABLES, RESNET_VARIABLES] + collections = [tf.GraphKeys.GLOBAL_VARIABLES, RESNET_VARIABLES] return tf.get_variable(name, shape=shape, initializer=initializer, diff --git a/synthetic/experiments/tensorflow/cnn/resnet/resnet_train.py b/synthetic/experiments/tensorflow/cnn/resnet/resnet_train.py index 5a98f49..4c5f32c 100644 --- a/synthetic/experiments/tensorflow/cnn/resnet/resnet_train.py +++ b/synthetic/experiments/tensorflow/cnn/resnet/resnet_train.py @@ -38,45 +38,45 @@ def train(is_training, logits, images, labels): # loss_avg ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) tf.add_to_collection(UPDATE_OPS_COLLECTION, ema.apply([loss_])) - tf.scalar_summary('loss_avg', ema.average(loss_)) + tf.summary.scalar('loss_avg', ema.average(loss_)) # validation stats ema = tf.train.ExponentialMovingAverage(0.9, val_step) val_op = tf.group(val_step.assign_add(1), ema.apply([top1_error])) top1_error_avg = ema.average(top1_error) - tf.scalar_summary('val_top1_error_avg', top1_error_avg) + tf.summary.scalar('val_top1_error_avg', top1_error_avg) - tf.scalar_summary('learning_rate', FLAGS.learning_rate) + tf.summary.scalar('learning_rate', FLAGS.learning_rate) opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, MOMENTUM) grads = opt.compute_gradients(loss_) for grad, var in grads: if grad is not None and not FLAGS.minimal_summaries: - tf.histogram_summary(var.op.name + '/gradients', grad) + tf.summary.histogram(var.op.name + '/gradients', grad) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) if not FLAGS.minimal_summaries: # Display the training images in the visualizer. - tf.image_summary('images', images) + tf.summary.image('images', images) for var in tf.trainable_variables(): - tf.histogram_summary(var.op.name, var) + tf.summary.histogram(var.op.name, var) batchnorm_updates = tf.get_collection(UPDATE_OPS_COLLECTION) batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, batchnorm_updates_op) - saver = tf.train.Saver(tf.all_variables()) + saver = tf.train.Saver(tf.global_variables()) - summary_op = tf.merge_all_summaries() + summary_op = tf.summary.merge_all() - init = tf.initialize_all_variables() + init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) tf.train.start_queue_runners(sess=sess) - summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) + summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) if FLAGS.resume: latest = tf.train.latest_checkpoint(FLAGS.train_dir) diff --git a/synthetic/experiments/tensorflow/fc/ffn26752bm1.py b/synthetic/experiments/tensorflow/fc/ffn26752bm1.py new file mode 100644 index 0000000..9bfdc22 --- /dev/null +++ b/synthetic/experiments/tensorflow/fc/ffn26752bm1.py @@ -0,0 +1,86 @@ +# A feed-forward DNN with 5 hidden layers using sigmoid activations. +import os +import time +import tensorflow as tf +#import ffn +import argparse + +from ffn26752 import * + +device_str = '' + +def set_parameters(epochs, minibatch, iterations, device_id): + """ + iterations means the number of iterations in each epoch + """ + global device_str + if int(device_id) >= 0: + device_str = '/gpu:%d'%int(device_id) + else: + # cpus + device_str = '/cpu:0' + global numMinibatches + numMinibatches = iterations*epochs + #numMinibatches = (138493+minibatch-1)/minibatch * epochs + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("-e", "--epochs", help="the number of epochs", type=int, default=4) + parser.add_argument("-b", "--minibatch", help="minibatch size", type=int, default=128) + parser.add_argument("-i", "--iterations", help="iterations", type=int, default=2) + parser.add_argument("-d", "--deviceid", help="specified device id", type=int, default=0) + args = parser.parse_args() + + epochs = args.epochs + minibatch = args.minibatch + iterations = args.iterations + device_id = args.deviceid + minibatchSize = args.minibatch + + set_parameters(epochs, minibatch, iterations, device_id) + + program_start_time = time.time() + + # Create the model + if (FLAGS.noInputFeed): + features, labels = getFakeMinibatch(minibatchSize) + else: + features = tf.placeholder("float", [None, featureDim]) + labels = tf.placeholder("float", [None, labelDim]) + config = tf.ConfigProto(allow_soft_placement=True) + if device_str.find('cpu') >= 0: # cpu version + num_threads = os.getenv('OMP_NUM_THREADS', 1) + print 'num_threads: ', num_threads + config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=int(num_threads)) + + + with tf.device(device_str): + crossEntropy, accuracy = getLossAndAccuracyForSubBatch(features, labels) + trainStep = tf.train.GradientDescentOptimizer(0.01).minimize(crossEntropy) + + # Train + #sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.logDevicePlacement, allow_soft_placement=True)) + sess = tf.Session(config=config) + init = tf.global_variables_initializer() + sess.run(init) + + perMinibatchTime = [] + for i in range(numMinibatches): + if (FLAGS.noInputFeed == False): + minibatchFeatures, minibatchLabels = getFakeMinibatch(minibatchSize) + + startTime = time.time() + if (FLAGS.noInputFeed): + sess.run([trainStep, accuracy]) + else: + sess.run([trainStep, accuracy], feed_dict={features: minibatchFeatures, labels: minibatchLabels}) + + currMinibatchDuration = time.time() - startTime + perMinibatchTime.append(currMinibatchDuration) + + printTrainingStats(1, minibatchSize, perMinibatchTime) + + program_end_time = time.time() + #print('Program finished, Total seconds: %s' % (program_end_time - program_start_time)) diff --git a/synthetic/experiments/tensorflow/fc/report.txt b/synthetic/experiments/tensorflow/fc/report.txt new file mode 100644 index 0000000..3c69d4b --- /dev/null +++ b/synthetic/experiments/tensorflow/fc/report.txt @@ -0,0 +1,6 @@ +-------------------------------------------------------------------------------- +Processing file 'ffn26752.py' + outputting to 'ffn267521.py' +-------------------------------------------------------------------------------- + + diff --git a/synthetic/experiments/tensorflow/fc/tf_upgrade.py b/synthetic/experiments/tensorflow/fc/tf_upgrade.py new file mode 100644 index 0000000..bcff10f --- /dev/null +++ b/synthetic/experiments/tensorflow/fc/tf_upgrade.py @@ -0,0 +1,681 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Upgrader for Python scripts from pre-1.0 TensorFlow to 1.0 TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import argparse +import ast +import collections +import os +import shutil +import sys +import tempfile +import traceback + + +class APIChangeSpec(object): + """List of maps that describe what changed in the API.""" + + def __init__(self): + # Maps from a function name to a dictionary that describes how to + # map from an old argument keyword to the new argument keyword. + self.function_keyword_renames = { + "tf.count_nonzero": { + "reduction_indices": "axis" + }, + "tf.reduce_all": { + "reduction_indices": "axis" + }, + "tf.reduce_any": { + "reduction_indices": "axis" + }, + "tf.reduce_max": { + "reduction_indices": "axis" + }, + "tf.reduce_mean": { + "reduction_indices": "axis" + }, + "tf.reduce_min": { + "reduction_indices": "axis" + }, + "tf.reduce_prod": { + "reduction_indices": "axis" + }, + "tf.reduce_sum": { + "reduction_indices": "axis" + }, + "tf.reduce_logsumexp": { + "reduction_indices": "axis" + }, + "tf.expand_dims": { + "dim": "axis" + }, + "tf.argmax": { + "dimension": "axis" + }, + "tf.argmin": { + "dimension": "axis" + }, + "tf.reduce_join": { + "reduction_indices": "axis" + }, + "tf.sparse_concat": { + "concat_dim": "axis" + }, + "tf.sparse_split": { + "split_dim": "axis" + }, + "tf.sparse_reduce_sum": { + "reduction_axes": "axis" + }, + "tf.reverse_sequence": { + "seq_dim": "seq_axis", + "batch_dim": "batch_axis" + }, + "tf.sparse_reduce_sum_sparse": { + "reduction_axes": "axis" + }, + "tf.squeeze": { + "squeeze_dims": "axis" + }, + "tf.split": { + "split_dim": "axis", + "num_split": "num_or_size_splits" + }, + "tf.concat": { + "concat_dim": "axis" + }, + } + + # Mapping from function to the new name of the function + self.function_renames = { + "tf.inv": "tf.reciprocal", + "tf.contrib.deprecated.scalar_summary": "tf.summary.scalar", + "tf.contrib.deprecated.histogram_summary": "tf.summary.histogram", + "tf.listdiff": "tf.setdiff1d", + "tf.list_diff": "tf.setdiff1d", + "tf.mul": "tf.multiply", + "tf.neg": "tf.negative", + "tf.sub": "tf.subtract", + "tf.train.SummaryWriter": "tf.summary.FileWriter", + "tf.scalar_summary": "tf.summary.scalar", + "tf.histogram_summary": "tf.summary.histogram", + "tf.audio_summary": "tf.summary.audio", + "tf.image_summary": "tf.summary.image", + "tf.merge_summary": "tf.summary.merge", + "tf.merge_all_summaries": "tf.summary.merge_all", + "tf.image.per_image_whitening": "tf.image.per_image_standardization", + "tf.all_variables": "tf.global_variables", + "tf.VARIABLES": "tf.GLOBAL_VARIABLES", + "tf.initialize_all_variables": "tf.global_variables_initializer", + "tf.initialize_variables": "tf.variables_initializer", + "tf.initialize_local_variables": "tf.local_variables_initializer", + "tf.batch_matrix_diag": "tf.matrix_diag", + "tf.batch_band_part": "tf.band_part", + "tf.batch_set_diag": "tf.set_diag", + "tf.batch_matrix_transpose": "tf.matrix_transpose", + "tf.batch_matrix_determinant": "tf.matrix_determinant", + "tf.batch_matrix_inverse": "tf.matrix_inverse", + "tf.batch_cholesky": "tf.cholesky", + "tf.batch_cholesky_solve": "tf.cholesky_solve", + "tf.batch_matrix_solve": "tf.matrix_solve", + "tf.batch_matrix_triangular_solve": "tf.matrix_triangular_solve", + "tf.batch_matrix_solve_ls": "tf.matrix_solve_ls", + "tf.batch_self_adjoint_eig": "tf.self_adjoint_eig", + "tf.batch_self_adjoint_eigvals": "tf.self_adjoint_eigvals", + "tf.batch_svd": "tf.svd", + "tf.batch_fft": "tf.fft", + "tf.batch_ifft": "tf.ifft", + "tf.batch_ifft2d": "tf.ifft2d", + "tf.batch_fft3d": "tf.fft3d", + "tf.batch_ifft3d": "tf.ifft3d", + "tf.select": "tf.where", + "tf.complex_abs": "tf.abs", + "tf.batch_matmul": "tf.matmul", + "tf.pack": "tf.stack", + "tf.unpack": "tf.unstack", + } + + self.change_to_function = { + "tf.ones_initializer", + "tf.zeros_initializer", + } + + # Functions that were reordered should be changed to the new keyword args + # for safety, if positional arguments are used. If you have reversed the + # positional arguments yourself, this could do the wrong thing. + self.function_reorders = { + "tf.split": ["axis", "num_or_size_splits", "value", "name"], + "tf.sparse_split": ["axis", "num_or_size_splits", "value", "name"], + "tf.concat": ["concat_dim", "values", "name"], + "tf.svd": ["tensor", "compute_uv", "full_matrices", "name"], + "tf.nn.softmax_cross_entropy_with_logits": [ + "logits", "labels", "dim", "name"], + "tf.nn.sparse_softmax_cross_entropy_with_logits": [ + "logits", "labels", "name"], + "tf.nn.sigmoid_cross_entropy_with_logits": [ + "logits", "labels", "name"] + } + + # Specially handled functions. + self.function_handle = {"tf.reverse": self._reverse_handler} + + @staticmethod + def _reverse_handler(file_edit_recorder, node): + # TODO(aselle): Could check for a literal list of bools and try to convert + # them to indices. + comment = ("ERROR: tf.reverse has had its argument semantics changed\n" + "significantly the converter cannot detect this reliably, so you" + "need to inspect this usage manually.\n") + file_edit_recorder.add(comment, + node.lineno, + node.col_offset, + "tf.reverse", + "tf.reverse", + error="tf.reverse requires manual check.") + + +class FileEditTuple(collections.namedtuple( + "FileEditTuple", ["comment", "line", "start", "old", "new"])): + """Each edit that is recorded by a FileEditRecorder. + + Fields: + comment: A description of the edit and why it was made. + line: The line number in the file where the edit occurs (1-indexed). + start: The line number in the file where the edit occurs (0-indexed). + old: text string to remove (this must match what was in file). + new: text string to add in place of `old`. + """ + + __slots__ = () + + +class FileEditRecorder(object): + """Record changes that need to be done to the file.""" + + def __init__(self, filename): + # all edits are lists of chars + self._filename = filename + + self._line_to_edit = collections.defaultdict(list) + self._errors = [] + + def process(self, text): + """Process a list of strings, each corresponding to the recorded changes. + + Args: + text: A list of lines of text (assumed to contain newlines) + Returns: + A tuple of the modified text and a textual description of what is done. + Raises: + ValueError: if substitution source location does not have expected text. + """ + + change_report = "" + + # Iterate of each line + for line, edits in self._line_to_edit.items(): + offset = 0 + # sort by column so that edits are processed in order in order to make + # indexing adjustments cumulative for changes that change the string + # length + edits.sort(key=lambda x: x.start) + + # Extract each line to a list of characters, because mutable lists + # are editable, unlike immutable strings. + char_array = list(text[line - 1]) + + # Record a description of the change + change_report += "%r Line %d\n" % (self._filename, line) + change_report += "-" * 80 + "\n\n" + for e in edits: + change_report += "%s\n" % e.comment + change_report += "\n Old: %s" % (text[line - 1]) + + # Make underscore buffers for underlining where in the line the edit was + change_list = [" "] * len(text[line - 1]) + change_list_new = [" "] * len(text[line - 1]) + + # Iterate for each edit + for e in edits: + # Create effective start, end by accounting for change in length due + # to previous edits + start_eff = e.start + offset + end_eff = start_eff + len(e.old) + + # Make sure the edit is changing what it should be changing + old_actual = "".join(char_array[start_eff:end_eff]) + if old_actual != e.old: + raise ValueError("Expected text %r but got %r" % + ("".join(e.old), "".join(old_actual))) + # Make the edit + char_array[start_eff:end_eff] = list(e.new) + + # Create the underline highlighting of the before and after + change_list[e.start:e.start + len(e.old)] = "~" * len(e.old) + change_list_new[start_eff:end_eff] = "~" * len(e.new) + + # Keep track of how to generate effective ranges + offset += len(e.new) - len(e.old) + + # Finish the report comment + change_report += " %s\n" % "".join(change_list) + text[line - 1] = "".join(char_array) + change_report += " New: %s" % (text[line - 1]) + change_report += " %s\n\n" % "".join(change_list_new) + return "".join(text), change_report, self._errors + + def add(self, comment, line, start, old, new, error=None): + """Add a new change that is needed. + + Args: + comment: A description of what was changed + line: Line number (1 indexed) + start: Column offset (0 indexed) + old: old text + new: new text + error: this "edit" is something that cannot be fixed automatically + Returns: + None + """ + + self._line_to_edit[line].append( + FileEditTuple(comment, line, start, old, new)) + if error: + self._errors.append("%s:%d: %s" % (self._filename, line, error)) + + +class TensorFlowCallVisitor(ast.NodeVisitor): + """AST Visitor that finds TensorFlow Function calls. + + Updates function calls from old API version to new API version. + """ + + def __init__(self, filename, lines): + self._filename = filename + self._file_edit = FileEditRecorder(filename) + self._lines = lines + self._api_change_spec = APIChangeSpec() + + def process(self, lines): + return self._file_edit.process(lines) + + def generic_visit(self, node): + ast.NodeVisitor.generic_visit(self, node) + + def _rename_functions(self, node, full_name): + function_renames = self._api_change_spec.function_renames + try: + new_name = function_renames[full_name] + self._file_edit.add("Renamed function %r to %r" % (full_name, + new_name), + node.lineno, node.col_offset, full_name, new_name) + except KeyError: + pass + + def _get_attribute_full_path(self, node): + """Traverse an attribute to generate a full name e.g. tf.foo.bar. + + Args: + node: A Node of type Attribute. + + Returns: + a '.'-delimited full-name or None if the tree was not a simple form. + i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c". + """ + curr = node + items = [] + while not isinstance(curr, ast.Name): + if not isinstance(curr, ast.Attribute): + return None + items.append(curr.attr) + curr = curr.value + items.append(curr.id) + return ".".join(reversed(items)) + + def _find_true_position(self, node): + """Return correct line number and column offset for a given node. + + This is necessary mainly because ListComp's location reporting reports + the next token after the list comprehension list opening. + + Args: + node: Node for which we wish to know the lineno and col_offset + """ + import re + find_open = re.compile("^\s*(\\[).*$") + find_string_chars = re.compile("['\"]") + + if isinstance(node, ast.ListComp): + # Strangely, ast.ListComp returns the col_offset of the first token + # after the '[' token which appears to be a bug. Workaround by + # explicitly finding the real start of the list comprehension. + line = node.lineno + col = node.col_offset + # loop over lines + while 1: + # Reverse the text to and regular expression search for whitespace + text = self._lines[line-1] + reversed_preceding_text = text[:col][::-1] + # First find if a [ can be found with only whitespace between it and + # col. + m = find_open.match(reversed_preceding_text) + if m: + new_col_offset = col - m.start(1) - 1 + return line, new_col_offset + else: + if (reversed_preceding_text=="" or + reversed_preceding_text.isspace()): + line = line - 1 + prev_line = self._lines[line - 1] + # TODO(aselle): + # this is poor comment detection, but it is good enough for + # cases where the comment does not contain string literal starting/ + # ending characters. If ast gave us start and end locations of the + # ast nodes rather than just start, we could use string literal + # node ranges to filter out spurious #'s that appear in string + # literals. + comment_start = prev_line.find("#") + if comment_start == -1: + col = len(prev_line) -1 + elif find_string_chars.search(prev_line[comment_start:]) is None: + col = comment_start + else: + return None, None + else: + return None, None + # Most other nodes return proper locations (with notably does not), but + # it is not possible to use that in an argument. + return node.lineno, node.col_offset + + + def visit_Call(self, node): # pylint: disable=invalid-name + """Handle visiting a call node in the AST. + + Args: + node: Current Node + """ + + + # Find a simple attribute name path e.g. "tf.foo.bar" + full_name = self._get_attribute_full_path(node.func) + + # Make sure the func is marked as being part of a call + node.func.is_function_for_call = True + + if full_name and full_name.startswith("tf."): + # Call special handlers + function_handles = self._api_change_spec.function_handle + if full_name in function_handles: + function_handles[full_name](self._file_edit, node) + + # Examine any non-keyword argument and make it into a keyword argument + # if reordering required. + function_reorders = self._api_change_spec.function_reorders + function_keyword_renames = ( + self._api_change_spec.function_keyword_renames) + + if full_name in function_reorders: + reordered = function_reorders[full_name] + for idx, arg in enumerate(node.args): + lineno, col_offset = self._find_true_position(arg) + if lineno is None or col_offset is None: + self._file_edit.add( + "Failed to add keyword %r to reordered function %r" + % (reordered[idx], full_name), arg.lineno, arg.col_offset, + "", "", + error="A necessary keyword argument failed to be inserted.") + else: + keyword_arg = reordered[idx] + if (full_name in function_keyword_renames and + keyword_arg in function_keyword_renames[full_name]): + keyword_arg = function_keyword_renames[full_name][keyword_arg] + self._file_edit.add("Added keyword %r to reordered function %r" + % (reordered[idx], full_name), lineno, + col_offset, "", keyword_arg + "=") + + # Examine each keyword argument and convert it to the final renamed form + renamed_keywords = ({} if full_name not in function_keyword_renames else + function_keyword_renames[full_name]) + for keyword in node.keywords: + argkey = keyword.arg + argval = keyword.value + + if argkey in renamed_keywords: + argval_lineno, argval_col_offset = self._find_true_position(argval) + if (argval_lineno is not None and argval_col_offset is not None): + # TODO(aselle): We should scan backward to find the start of the + # keyword key. Unfortunately ast does not give you the location of + # keyword keys, so we are forced to infer it from the keyword arg + # value. + key_start = argval_col_offset - len(argkey) - 1 + key_end = key_start + len(argkey) + 1 + if self._lines[argval_lineno - 1][key_start:key_end] == argkey + "=": + self._file_edit.add("Renamed keyword argument from %r to %r" % + (argkey, renamed_keywords[argkey]), + argval_lineno, + argval_col_offset - len(argkey) - 1, + argkey + "=", renamed_keywords[argkey] + "=") + continue + self._file_edit.add( + "Failed to rename keyword argument from %r to %r" % + (argkey, renamed_keywords[argkey]), + argval.lineno, + argval.col_offset - len(argkey) - 1, + "", "", + error="Failed to find keyword lexographically. Fix manually.") + + ast.NodeVisitor.generic_visit(self, node) + + def visit_Attribute(self, node): # pylint: disable=invalid-name + """Handle bare Attributes i.e. [tf.foo, tf.bar]. + + Args: + node: Node that is of type ast.Attribute + """ + full_name = self._get_attribute_full_path(node) + if full_name and full_name.startswith("tf."): + self._rename_functions(node, full_name) + if full_name in self._api_change_spec.change_to_function: + if not hasattr(node, "is_function_for_call"): + new_text = full_name + "()" + self._file_edit.add("Changed %r to %r"%(full_name, new_text), + node.lineno, node.col_offset, full_name, new_text) + + ast.NodeVisitor.generic_visit(self, node) + + +class TensorFlowCodeUpgrader(object): + """Class that handles upgrading a set of Python files to TensorFlow 1.0.""" + + def __init__(self): + pass + + def process_file(self, in_filename, out_filename): + """Process the given python file for incompatible changes. + + Args: + in_filename: filename to parse + out_filename: output file to write to + Returns: + A tuple representing number of files processed, log of actions, errors + """ + + # Write to a temporary file, just in case we are doing an implace modify. + with open(in_filename, "r") as in_file, \ + tempfile.NamedTemporaryFile("w", delete=False) as temp_file: + ret = self.process_opened_file( + in_filename, in_file, out_filename, temp_file) + + shutil.move(temp_file.name, out_filename) + return ret + + # Broad exceptions are required here because ast throws whatever it wants. + # pylint: disable=broad-except + def process_opened_file(self, in_filename, in_file, out_filename, out_file): + """Process the given python file for incompatible changes. + + This function is split out to facilitate StringIO testing from + tf_upgrade_test.py. + + Args: + in_filename: filename to parse + in_file: opened file (or StringIO) + out_filename: output file to write to + out_file: opened file (or StringIO) + Returns: + A tuple representing number of files processed, log of actions, errors + """ + process_errors = [] + text = "-" * 80 + "\n" + text += "Processing file %r\n outputting to %r\n" % (in_filename, + out_filename) + text += "-" * 80 + "\n\n" + + parsed_ast = None + lines = in_file.readlines() + try: + parsed_ast = ast.parse("".join(lines)) + except Exception: + text += "Failed to parse %r\n\n" % in_filename + text += traceback.format_exc() + if parsed_ast: + visitor = TensorFlowCallVisitor(in_filename, lines) + visitor.visit(parsed_ast) + out_text, new_text, process_errors = visitor.process(lines) + text += new_text + if out_file: + out_file.write(out_text) + text += "\n" + return 1, text, process_errors + # pylint: enable=broad-except + + def process_tree(self, root_directory, output_root_directory): + """Processes upgrades on an entire tree of python files in place. + + Note that only Python files. If you have custom code in other languages, + you will need to manually upgrade those. + + Args: + root_directory: Directory to walk and process. + output_root_directory: Directory to use as base + Returns: + A tuple of files processed, the report string ofr all files, and errors + """ + + # make sure output directory doesn't exist + if output_root_directory and os.path.exists(output_root_directory): + print("Output directory %r must not already exist." % ( + output_root_directory)) + sys.exit(1) + + # make sure output directory does not overlap with root_directory + norm_root = os.path.split(os.path.normpath(root_directory)) + norm_output = os.path.split(os.path.normpath(output_root_directory)) + if norm_root == norm_output: + print("Output directory %r same as input directory %r" % ( + root_directory, output_root_directory)) + sys.exit(1) + + # Collect list of files to process (we do this to correctly handle if the + # user puts the output directory in some sub directory of the input dir) + files_to_process = [] + for dir_name, _, file_list in os.walk(root_directory): + py_files = [f for f in file_list if f.endswith(".py")] + for filename in py_files: + fullpath = os.path.join(dir_name, filename) + fullpath_output = os.path.join( + output_root_directory, os.path.relpath(fullpath, root_directory)) + files_to_process.append((fullpath, fullpath_output)) + + file_count = 0 + tree_errors = [] + report = "" + report += ("=" * 80) + "\n" + report += "Input tree: %r\n" % root_directory + report += ("=" * 80) + "\n" + + for input_path, output_path in files_to_process: + output_directory = os.path.dirname(output_path) + if not os.path.isdir(output_directory): + os.makedirs(output_directory) + file_count += 1 + _, l_report, l_errors = self.process_file(input_path, output_path) + tree_errors += l_errors + report += l_report + return file_count, report, tree_errors + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="""Convert a TensorFlow Python file to 1.0 + +Simple usage: + tf_convert.py --infile foo.py --outfile bar.py + tf_convert.py --intree ~/code/old --outtree ~/code/new +""") + parser.add_argument( + "--infile", + dest="input_file", + help="If converting a single file, the name of the file " + "to convert") + parser.add_argument( + "--outfile", + dest="output_file", + help="If converting a single file, the output filename.") + parser.add_argument( + "--intree", + dest="input_tree", + help="If converting a whole tree of files, the directory " + "to read from (relative or absolute).") + parser.add_argument( + "--outtree", + dest="output_tree", + help="If converting a whole tree of files, the output " + "directory (relative or absolute).") + parser.add_argument( + "--reportfile", + dest="report_filename", + help=("The name of the file where the report log is " + "stored." + "(default: %(default)s)"), + default="report.txt") + args = parser.parse_args() + + upgrade = TensorFlowCodeUpgrader() + report_text = None + report_filename = args.report_filename + files_processed = 0 + if args.input_file: + files_processed, report_text, errors = upgrade.process_file( + args.input_file, args.output_file) + files_processed = 1 + elif args.input_tree: + files_processed, report_text, errors = upgrade.process_tree( + args.input_tree, args.output_tree) + else: + parser.print_help() + if report_text: + open(report_filename, "w").write(report_text) + print("TensorFlow 1.0 Upgrade Script") + print("-----------------------------") + print("Converted %d files\n" % files_processed) + print("Detected %d errors that require attention" % len(errors)) + print("-" * 80) + print("\n".join(errors)) + print("\nMake sure to read the detailed log %r\n" % report_filename) diff --git a/synthetic/scripts/batch-bencmarks-gpu-gpu15.sh b/synthetic/scripts/batch-bencmarks-gpu-gpu15.sh index 856f57b..dd3a942 100755 --- a/synthetic/scripts/batch-bencmarks-gpu-gpu15.sh +++ b/synthetic/scripts/batch-bencmarks-gpu-gpu15.sh @@ -2,25 +2,25 @@ # The benchmarks of all toolkits # GPU-0 AlexNet -#minibatch=16 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh -#minibatch=32 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh -#minibatch=64 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh -#minibatch=128 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=16 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=32 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=64 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=128 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh # ## GPU-0 RetNet -#minibatch=8 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh -#minibatch=16 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh -#minibatch=32 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh -#minibatch=64 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=8 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=16 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=32 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=64 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh # # GPU-0 Fully Connected: FFN26752 minibatch=32 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh minibatch=64 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh minibatch=128 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh -#minibatch=256 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh -#minibatch=512 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh -#minibatch=1024 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=256 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=512 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=1024 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh ## GPU-0 Fully Connected: FFN26752 6 Hidden Layers #minibatch=256 iterations=8 epochs=4 device_id=0 network_name=ffn26752l6 ./fc-benchmarks.sh diff --git a/synthetic/scripts/batch-bencmarks-gpu-gpu20.sh b/synthetic/scripts/batch-bencmarks-gpu-gpu20.sh new file mode 100755 index 0000000..dd3a942 --- /dev/null +++ b/synthetic/scripts/batch-bencmarks-gpu-gpu20.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# The benchmarks of all toolkits + +# GPU-0 AlexNet +minibatch=16 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=32 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=64 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +minibatch=128 iterations=8 epochs=4 device_id=0 network_name=alexnet ./cnn-benchmarks.sh +# +## GPU-0 RetNet +minibatch=8 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=16 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=32 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +minibatch=64 iterations=8 epochs=4 device_id=0 network_name=resnet ./cnn-benchmarks.sh +# + +# GPU-0 Fully Connected: FFN26752 +minibatch=32 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=64 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=128 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=256 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=512 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh +minibatch=1024 iterations=8 epochs=4 device_id=0 network_name=ffn26752 ./fc-benchmarks.sh + +## GPU-0 Fully Connected: FFN26752 6 Hidden Layers +#minibatch=256 iterations=8 epochs=4 device_id=0 network_name=ffn26752l6 ./fc-benchmarks.sh +#minibatch=512 iterations=8 epochs=4 device_id=0 network_name=ffn26752l6 ./fc-benchmarks.sh +#minibatch=1024 iterations=8 epochs=4 device_id=0 network_name=ffn26752l6 ./fc-benchmarks.sh +#minibatch=2048 iterations=8 epochs=4 device_id=0 network_name=ffn26752l6 ./fc-benchmarks.sh +#minibatch=4096 iterations=8 epochs=4 device_id=0 network_name=ffn26752l6 ./fc-benchmarks.sh diff --git a/synthetic/scripts/cnn-benchmarks.sh b/synthetic/scripts/cnn-benchmarks.sh index 13e1e8c..35d0ac5 100755 --- a/synthetic/scripts/cnn-benchmarks.sh +++ b/synthetic/scripts/cnn-benchmarks.sh @@ -4,8 +4,9 @@ ########### # CNN ########### -REPO_HOME=/home/comp/pengfeixu/dlbench/synthetic +#REPO_HOME=/home/comp/pengfeixu/dlbench/synthetic #REPO_HOME=/home/ipdps/dpBenchmark/synthetic +REPO_HOME=/home/comp/csshshi/repositories/dpBenchmark/synthetic current_path=$REPO_HOME/scripts experiments_path=$REPO_HOME/experiments log_path=$REPO_HOME/logs @@ -36,7 +37,7 @@ hostName=`hostname` #tools=( "caffe" "cntk" "tensorflow" "torch" ) -tools=( "mxnet" ) +tools=( "mxnet" "cntk") benchmark_logfile=${current_path}/${network_type}-${network_name}-gpu${device_id}.bm echo -e 'GPU:'${device_id}'\nNUM_THREADS (for CPU): '${OMP_NUM_THREADS}'\nNetwork: '${network_name}'\nEpochs: '${epochs}'\nMinibatch: '${minibatch}'\nIterations: '${iterations}'\nBenchmark Time: '${running_time}'\n_________________\n'>> ${benchmark_logfile} echo -e 'ToolName\t\t\tAverageTime(s)'>>${benchmark_logfile} diff --git a/synthetic/scripts/fc-benchmarks.sh b/synthetic/scripts/fc-benchmarks.sh index e18381f..348993e 100755 --- a/synthetic/scripts/fc-benchmarks.sh +++ b/synthetic/scripts/fc-benchmarks.sh @@ -4,8 +4,8 @@ ########### # CNN ########### -REPO_HOME=/home/ipdps/dpBenchmark/synthetic -#REPO_HOME=/home/comp/csshshi/repositories/dpBenchmark/synthetic +#REPO_HOME=/home/ipdps/dpBenchmark/synthetic +REPO_HOME=/home/comp/csshshi/repositories/dpBenchmark/synthetic current_path=$REPO_HOME/scripts experiments_path=$REPO_HOME/experiments log_path=$REPO_HOME/logs @@ -36,7 +36,7 @@ hostName=`hostname` #tools=( "caffe" "cntk" "dsstne" "tensorflow" "torch" ) #tools=( "caffe" "cntk" "tensorflow" "torch" ) #cpu versions, exclude dsstne -tools=( "cntk" ) +tools=( "cntk" "mxnet") benchmark_logfile=${current_path}/${network_type}-${network_name}-gpu${device_id}.bm echo -e 'GPU:'${device_id}'\nNUM_THREADS (for CPU): '${OMP_NUM_THREADS}'\nNetwork: '${network_name}'\nEpochs: '${epochs}'\nMinibatch: '${minibatch}'\nIterations: '${iterations}'\nBenchmark Time: '${running_time}'\n_________________\n'>> ${benchmark_logfile} echo -e 'ToolName\t\t\tAverageTime(s)'>>${benchmark_logfile} diff --git a/tools/caffe/caffebm.py b/tools/caffe/caffebm.py index 5826c59..c608d70 100644 --- a/tools/caffe/caffebm.py +++ b/tools/caffe/caffebm.py @@ -25,6 +25,7 @@ # Set system variable os.environ['OMP_NUM_THREADS'] = args.cpuCount os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount +os.environ['MKL_NUM_THREADS'] = args.cpuCount # Build cmd for benchmark root_path = os.path.dirname(os.path.abspath(__file__)) diff --git a/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk b/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk index efd6162..55fee8e 100644 --- a/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk +++ b/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk @@ -1,7 +1,7 @@ WorkDir=. OutputDir = "$WorkDir$/Output" ModelDir = "$OutputDir$/Models" -DataDir = "/home/ipdps/data/cntk/cifar10" +DataDir = "/home/comp/csshshi/data/cntk/cifar10" #DataDir = "/home/comp/pengfeixu/Data/cntk/cifar10" precision=float diff --git a/tools/cntk/cnn/resnet/resnet.cntk b/tools/cntk/cnn/resnet/resnet.cntk index 6f6a451..2a00deb 100644 --- a/tools/cntk/cnn/resnet/resnet.cntk +++ b/tools/cntk/cnn/resnet/resnet.cntk @@ -1,7 +1,7 @@ RootDir = "." ConfigDir = "$RootDir$" -DataDir = "/home/ipdps/data/cntk/cifar10" +DataDir = "/home/comp/csshshi/data/cntk/cifar10" #DataDir = "/home/comp/pengfeixu/Data/cntk/cifar10" #DataDir = "/home/ipdps/Data/cntk/cifar10" OutputDir = "$RootDir$/Output" diff --git a/tools/cntk/cntkbm.py b/tools/cntk/cntkbm.py index c782685..e5f2422 100644 --- a/tools/cntk/cntkbm.py +++ b/tools/cntk/cntkbm.py @@ -26,6 +26,7 @@ # Set system variable os.environ['OMP_NUM_THREADS'] = args.cpuCount os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount +os.environ['MKL_NUM_THREADS'] = args.cpuCount # Build cmd for benchmark root_path = os.path.dirname(os.path.abspath(__file__)) diff --git a/tools/cntk/fc/fcn5.cntk b/tools/cntk/fc/fcn5.cntk index db6a3c8..9246a37 100644 --- a/tools/cntk/fc/fcn5.cntk +++ b/tools/cntk/fc/fcn5.cntk @@ -1,7 +1,7 @@ WorkDir= "." ConfigDir= "." ModelDir=$WorkDir$/Output -DataDir=/home/ipdps/data/cntk/mnist +DataDir=/home/comp/csshshi/data/cntk/mnist #DataDir=/home/comp/pengfeixu/Data/cntk/mnist #ndlMacros = "$ConfigDir$/Macros.ndl" precision=float diff --git a/tools/cntk/multinodes/fc/Macros.ndl b/tools/cntk/multinodes/fc/Macros.ndl deleted file mode 100644 index f6e5ee0..0000000 --- a/tools/cntk/multinodes/fc/Macros.ndl +++ /dev/null @@ -1,35 +0,0 @@ -ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) -{ - convW = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) - # conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true) - conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout="cudnn") - convB = Parameter(outMap, 1, init = fixedValue, value = bValue) - convPlusB = Plus(conv, convB); - act = RectifiedLinear(convPlusB); -} - -#ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) -#[ -# W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) -# b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn") -# c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn") -# z = Plus(c, b); -# y = RectifiedLinear(z); -#] - -DNNReLULayer(inDim, outDim, x, wScale, bValue) -{ - W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale) - b = Parameter(outDim, init = fixedValue, value = bValue) - t = Times(W, x) - z = Plus(t, b) - y = RectifiedLinear(z) -} - -DNNLastLayer(hiddenDim, labelDim, x, wScale, bValue) -{ - W = Parameter(labelDim, hiddenDim, init = Gaussian, initValueScale = wScale) - b = Parameter(labelDim, init = fixedValue, value = bValue) - t = Times(W, x) - z = Plus(t, b) -} diff --git a/tools/cntk/multinodes/fc/fc.sh b/tools/cntk/multinodes/fc/fc.sh deleted file mode 100644 index 1b61586..0000000 --- a/tools/cntk/multinodes/fc/fc.sh +++ /dev/null @@ -1,6 +0,0 @@ -start=`date +%s.%N` -cntk configFile=ffn26752.cntk configName=ffn >1GPU.log 2>&1 -end=`date +%s.%N` -runtime=$( echo "$end - $start" | bc -l ) -echo "finished with execute time: ${runtime}" >>1GPU.log - diff --git a/tools/cntk/multinodes/fc/fcn5.cntk b/tools/cntk/multinodes/fc/fcn5.cntk deleted file mode 100644 index 043ea80..0000000 --- a/tools/cntk/multinodes/fc/fcn5.cntk +++ /dev/null @@ -1,78 +0,0 @@ -WorkDir=. -ModelDir=$WorkDir$/Output/$ConfigName$ -#stderr=$WorkDir$/logs/$ConfigName$/out -DataDir=/home/dl/data/cntk -precision=float - -deviceId=0 -minibatchSize=1024 -epochSize=4096 -maxEpochs=4 - -makeMode=false - -command=Train - -featureDim = 26752 -labelDim = 26752 -hiddenDim = 2048 - -initOnCPUOnly=true -parallelTrain=true -prefetch=true - -Train=[ - action=train - modelPath=$ModelDir$/fc26752 - traceLevel=1 - - SimpleNetworkBuilder=[ - layerSizes=$featureDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$labelDim$ - initOnCPUOnly=true - trainingCriterion=CrossEntropyWithSoftmax - evalCriterion=ErrorPrediction - layerTypes=Sigmoid - applyMeanVarNorm=false - initValueScale=1.0 - uniformInit=true - needPrior=false - ] - - SGD=[ - epochSize=$epochSize$ - minibatchSize=$minibatchSize$ - maxEpochs=$maxEpochs$ - learningRatesPerMB=0.01 - numMBsToShowResult=4 - momentumPerSample=0 - dropoutRate=0.0 - - ParallelTrain=[ - parallelizationMethod=DataParallelSGD - distributedMBReading=true - parallelizationStartEpoch=1 - DataParallelSGD=[ - gradientBits=1 - ] - ] - - gradUpdateType=None - normWithAveMultiplier=true - clippingThresholdPerSample=1#INF - ] -] - -reader=[ - readerType=UCIFastReader - file=$DataDir$/data26752_4k.txt - features=[ - dim=$featureDim$ - start=1 - ] - labels=[ - dim=1 - start=0 - labelDim=$labelDim$ - labelMappingFile=$DataDir$/labelmap26752.txt - ] -] diff --git a/tools/cntk/multinodes/fc/fcn8.cntk b/tools/cntk/multinodes/fc/fcn8.cntk deleted file mode 100644 index 069f877..0000000 --- a/tools/cntk/multinodes/fc/fcn8.cntk +++ /dev/null @@ -1,89 +0,0 @@ -WorkDir=. -ModelDir=$WorkDir$/Output/$ConfigName$ -#stderr=$WorkDir$/logs/$ConfigName$/out -DataDir=/home/dl/data/cntk -precision=float - -deviceId=0 -minibatchSize=1024 -epochSize=4096 -maxEpochs=2 - -makeMode=false - -command=Train - -featureDim = 26752 -labelDim = 26752 -hiddenDim = 2048 - -initOnCPUOnly=true -parallelTrain=false -prefetch=true - -Train=[ - action=train - modelPath=$ModelDir$/fc26752l6 - #deviceId=1 - traceLevel=1 - - SimpleNetworkBuilder=[ - #layerSizes=$featureDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$labelDim$ - layerSizes=$featureDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$labelDim$ - initOnCPUOnly=true - trainingCriterion=CrossEntropyWithSoftmax - evalCriterion=ErrorPrediction - layerTypes=Sigmoid - applyMeanVarNorm=false - initValueScale=1.0 - uniformInit=true - needPrior=false - ] - - SGD=[ - epochSize=$epochSize$ - minibatchSize=$minibatchSize$ - maxEpochs=$maxEpochs$ - learningRatesPerMB=0.01 - numMBsToShowResult=4 - momentumPerSample=0 - dropoutRate=0.0 - - #epochSize=4096 - #minibatchSize=256 - #maxEpochs=2 - #learningRatesPerMB=0.01 - #numMBsToShowResult=4 - #momentumPerSample=0 - #dropoutRate=0.0 - - ParallelTrain=[ - parallelizationMethod=DataParallelSGD - distributedMBReading=true - parallelizationStartEpoch=1 - DataParallelSGD=[ - gradientBits=1 - ] - ] - - gradUpdateType=None - normWithAveMultiplier=true - clippingThresholdPerSample=1#INF - ] -] - -reader=[ - readerType=UCIFastReader - file=$DataDir$/data26752_4k.txt - features=[ - dim=$featureDim$ - start=1 - ] - labels=[ - dim=1 - start=0 - labelDim=$labelDim$ - labelMappingFile=$DataDir$/labelmap26752.txt - #labelMappingFile=$WorkDir$/labelmap.txt - ] -] diff --git a/tools/cntk/multinodes/fc/ffn.cntk b/tools/cntk/multinodes/fc/ffn.cntk deleted file mode 100644 index 4f89735..0000000 --- a/tools/cntk/multinodes/fc/ffn.cntk +++ /dev/null @@ -1,87 +0,0 @@ -WorkDir=. -ModelDir=$WorkDir$/Output/$ConfigName$ -#stderr=$WorkDir$/logs/$ConfigName$/out -DataDir=/home/dl/data/cntk -precision=float - -makeMode=false - -command=Train - -deviceId=1 -minibatchSize=1024 -epochSize=262144 -maxEpochs=2 - -featureDim = 512 -labelDim = 1000 -hiddenDim = 2048 - -initOnCPUOnly=true -parallelTrain=false -prefetch=true - -Train=[ - action=train - modelPath=$ModelDir$/fc - traceLevel=1 - - SimpleNetworkBuilder=[ - #layerSizes=$featureDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$labelDim$ - layerSizes=$featureDim$:$hiddenDim$:$hiddenDim$:$hiddenDim$:$labelDim$ - trainingCriterion=CrossEntropyWithSoftmax - evalCriterion=ErrorPrediction - layerTypes=Sigmoid - applyMeanVarNorm=false - initValueScale=1.0 - uniformInit=true - needPrior=false - ] - - SGD=[ - #epochSize=262144 - #minibatchSize=1024 - #learningRatesPerMB=0.01 - #numMBsToShowResult=4 - #momentumPerSample=0 - #dropoutRate=0.0 - #maxEpochs=2 - - epochSize=$epochSize$ - minibatchSize=$minibatchSize$ - maxEpochs=$maxEpochs$ - learningRatesPerMB=0.01 - numMBsToShowResult=4 - momentumPerSample=0 - dropoutRate=0.0 - - ParallelTrain=[ - parallelizationMethod=DataParallelSGD - distributedMBReading=true - parallelizationStartEpoch=1 - DataParallelSGD=[ - gradientBits=1 - ] - ] - - gradUpdateType=None - normWithAveMultiplier=true - clippingThresholdPerSample=1#INF - ] -] - -reader=[ - readerType=UCIFastReader - file=$DataDir$/data1000.txt - features=[ - dim=$featureDim$ - start=1 - ] - labels=[ - dim=1 - start=0 - labelDim=$labelDim$ - labelMappingFile=$DataDir$/labelmap.1K.txt - #labelMappingFile=$WorkDir$/labelmap.txt - ] -] diff --git a/tools/cntk/rnn/lstm/lstm.cntk b/tools/cntk/rnn/lstm/lstm.cntk index d70da11..1aaf20c 100644 --- a/tools/cntk/rnn/lstm/lstm.cntk +++ b/tools/cntk/rnn/lstm/lstm.cntk @@ -7,7 +7,7 @@ RootDir = "." ConfigDir = "$RootDir$" #DataDir = "/home/comp/pengfeixu/data/cntk/ptb" #DataDir = "/home/comp/csshshi/data/cntk/ptb" -DataDir = "/home/ipdps/data/cntk/ptb" +DataDir = "/home/comp/csshshi/data/cntk/ptb" OutputDir = "$RootDir$/Output" ModelDir = "$OutputDir$/Models" diff --git a/tools/mxnet/mxnetbm.py b/tools/mxnet/mxnetbm.py index 9bdd708..bb4ad90 100644 --- a/tools/mxnet/mxnetbm.py +++ b/tools/mxnet/mxnetbm.py @@ -28,6 +28,7 @@ # Set system variable os.environ['OMP_NUM_THREADS'] = args.cpuCount os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount +os.environ['MKL_NUM_THREADS'] = args.cpuCount # Build cmd exePath = "" diff --git a/tools/tensorflow/cnn/alexnet/alexnet_cifar10.py b/tools/tensorflow/cnn/alexnet/alexnet_cifar10.py index 203eaf8..9af79d1 100644 --- a/tools/tensorflow/cnn/alexnet/alexnet_cifar10.py +++ b/tools/tensorflow/cnn/alexnet/alexnet_cifar10.py @@ -134,11 +134,11 @@ def loss(logits, labels): batch_size = tf.size(labels) labels = tf.expand_dims(labels, 1) indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) - concated = tf.concat(1, [indices, labels]) + concated = tf.concat(axis=1, values=[indices, labels]) onehot_labels = tf.sparse_to_dense( - concated, tf.pack([batch_size, 10]), 1.0, 0.0) - cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, - onehot_labels, + concated, tf.stack([batch_size, 10]), 1.0, 0.0) + cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, + labels=onehot_labels, name='xentropy') loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') return loss @@ -216,10 +216,10 @@ def train(): grad = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss_value) # Create a saver. - saver = tf.train.Saver(tf.all_variables()) + saver = tf.train.Saver(tf.global_variables()) # Build an initialization operation. - init = tf.initialize_all_variables() + init = tf.global_variables_initializer() # Start running operations on the Graph. sess.run(init) coord = tf.train.Coordinator() diff --git a/tools/tensorflow/cnn/alexnet/alexnet_cifar10_multi_gpu1.py b/tools/tensorflow/cnn/alexnet/alexnet_cifar10_multi_gpu1.py new file mode 100644 index 0000000..57a51bc --- /dev/null +++ b/tools/tensorflow/cnn/alexnet/alexnet_cifar10_multi_gpu1.py @@ -0,0 +1,328 @@ +from datetime import datetime + +import time +import cifar10_input +#import unpickle as cifar10_input + +import tensorflow as tf +import numpy as np +import os + +FLAGS = tf.app.flags.FLAGS + +parameters = [] +device_str = '' + +conv_counter = 1 +pool_counter = 1 +norm_counter = 1 +affine_counter = 1 +pad_counter = 1 + +FLAGS = tf.app.flags.FLAGS +# Basic model parameters. +tf.app.flags.DEFINE_integer('batch_size', 1024, """Number of images to process in a batch.""") +tf.app.flags.DEFINE_integer('epochs', 40, """Max epochs for training.""") +tf.app.flags.DEFINE_integer('log_step', 100, """Log step""") +tf.app.flags.DEFINE_integer('eval_step', 1, """Evaluate step of epoch""") +tf.app.flags.DEFINE_string('device_ids', '0,1', """Device ids. split by comma, e.g. 0,1""") +#tf.app.flags.DEFINE_string('data_dir', '/home/comp/csshshi/data/tensorflow/cifar10/cifar-10-batches-bin', """Data directory""") +tf.app.flags.DEFINE_string('data_dir', os.environ['HOME']+'/data/tensorflow/cifar10/cifar-10-batches-bin', """Data directory""") +#tf.app.flags.DEFINE_string('data_dir', '/home/comp/pengfeixu/Data/tensorflow/cifar10/cifar-10-batches-bin', """Data directory""") +tf.app.flags.DEFINE_string('train_dir', './trained_models/', + """Path to the data directory.""") +tf.app.flags.DEFINE_boolean('use_fp16', False, + """Train the model using fp16.""") +tf.app.flags.DEFINE_boolean('log_device_placement', True, + """Whether to log device placement.""") +tf.app.flags.DEFINE_integer('num_gpus', 2, """How many GPUs to use.""") + +EPOCH_SIZE = 50000 +TEST_SIZE = 10000 + + +def _init_global_variables(): + global conv_counter + global pool_counter + global norm_counter + global affine_counter + global pad_counter + conv_counter = 1 + pool_counter = 1 + norm_counter = 1 + affine_counter = 1 + pad_counter = 1 + + +def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType): + global conv_counter + global parameters + name = 'conv' + str(conv_counter) + conv_counter += 1 + with tf.variable_scope(name) as scope: + #kernel = tf.get_variable(name='weights', initializer=tf.random_normal([kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-2)) + kernel = tf.get_variable(name='weights', shape=[kH, kW, nIn, nOut], initializer=tf.truncated_normal_initializer(dtype=tf.float32, stddev=1e-2)) + strides = [1, dH, dW, 1] + conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType) + #biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), + # trainable=True, name='biases') + biases = tf.get_variable(name='biases', initializer=tf.constant(0.0, shape=[nOut], dtype=tf.float32), dtype=tf.float32) + bias = tf.reshape(tf.nn.bias_add(conv, biases), + conv.get_shape()) + parameters += [kernel, biases] + return bias + + +def _relu(inpOp): + return tf.nn.relu(inpOp) + + +def _padding(inpOp, pad): + global pad_counter + name = 'pad' + str(pad_counter) + pad_counter += 1 + with tf.name_scope(name) as scope: + padded_input = tf.pad(inpOp, [[0, 0], [pad, pad], [pad, pad], [0, 0]], "CONSTANT", name='pad') + print('padded_input: ', padded_input) + return padded_input + + +def _norm(inpOp, local_size, alpha, beta): + global norm_counter + name = 'norm' + str(norm_counter) + norm = tf.nn.lrn(inpOp, local_size, bias=1.0, alpha=alpha, beta=beta, name=name) + return norm + + +def _affine(inpOp, nIn, nOut): + global affine_counter + global parameters + name = 'affine' + str(affine_counter) + affine_counter += 1 + with tf.variable_scope(name) as scope: + #kernel = tf.get_variable(name='weights', initializer=tf.random_normal([nIn, nOut], + # dtype=tf.float32, + # stddev=1e-2)) + kernel = tf.get_variable(name='weights', shape=[nIn, nOut], initializer=tf.truncated_normal_initializer(dtype=tf.float32, + stddev=1e-2)) + biases = tf.get_variable(name='biases', shape=[nOut], initializer=tf.constant_initializer()) + affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) + parameters += [kernel, biases] + return affine1 + +def _mpool(inpOp, kH, kW, dH, dW): + global pool_counter + global parameters + name = 'pool' + str(pool_counter) + pool_counter += 1 + ksize = [1, kH, kW, 1] + strides = [1, dH, dW, 1] + return tf.nn.max_pool(inpOp, + ksize=ksize, + strides=strides, + padding='VALID', + name=name) + +def _avgpool(inpOp, kH, kW, dH, dW): + global pool_counter + name = 'pool' + str(pool_counter) + pool_counter += 1 + ksize = [1, kH, kW, 1] + strides = [1, dH, dW, 1] + return tf.nn.avg_pool(inpOp, + ksize=ksize, + strides=strides, + padding='VALID', + name=name) + +def loss_function(logits, labels): + batch_size = tf.size(labels) + labels = tf.expand_dims(labels, 1) + indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) + concated = tf.concat(axis=1, values=[indices, labels]) + onehot_labels = tf.sparse_to_dense( + concated, tf.stack([batch_size, 10]), 1.0, 0.0) + cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, + labels=onehot_labels, + name='xentropy') + loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') + return loss + +def inference(images): + pad1 = _padding(images, 2) + conv1 = _conv (pad1, 3, 32, 5, 5, 1, 1, 'VALID') + pool1 = _mpool(conv1, 3, 3, 2, 2) + relu1 = _relu(pool1) + #norm1 = _norm(relu1, 3, 5e-05, 0.75) + + pad2 = _padding(relu1, 2) + conv2 = _conv (pad2, 32, 32, 5, 5, 1, 1, 'VALID') + pool2 = _mpool(conv2, 3, 3, 2, 2) + relu2 = _relu(pool2) + #norm2 = _norm(relu2, 3, 5e-05, 0.75) + + pad3 = _padding(relu2, 2) + conv3 = _conv (pad3, 32, 64, 5, 5, 1, 1, 'VALID') + relu3 = _relu(conv3) + pool3 = _avgpool(relu3, 3, 3, 2, 2) + print('pool3: ', pool3) + + resh1 = tf.reshape(pool3, [-1, 64 * 3 * 3]) + affn1 = _affine(resh1, 64*3*3, 10) + + return affn1 + +def average_gradients(tower_grads): + """Calculate the average gradient for each shared variable across all towers. + + Note that this function provides a synchronization point across all towers. + + Args: + tower_grads: List of lists of (gradient, variable) tuples. The outer list + is over individual gradients. The inner list is over the gradient + calculation for each tower. + Returns: + List of pairs of (gradient, variable) where the gradient has been averaged + across all towers. + """ + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # Note that each grad_and_vars looks like the following: + # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) + grads = [] + for g, _ in grad_and_vars: + # Add 0 dimension to the gradients to represent the tower. + expanded_g = tf.expand_dims(g, 0) + + # Append on a 'tower' dimension which we will average over below. + grads.append(expanded_g) + + # Average over the 'tower' dimension. + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + # Keep in mind that the Variables are redundant because they are shared + # across towers. So .. we will just return the first tower's pointer to + # the Variable. + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + +def train(): + global parameters + config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) + with tf.Graph().as_default(), tf.device("/cpu:0"): + global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) + + device_ids = FLAGS.device_ids.split(',') + print('device_ids: ', device_ids) + if len(device_ids) > FLAGS.num_gpus: + print('The device_ids should have the same number of GPUs with num_gpus') + return + + lr = 0.001 + optimizer = tf.train.GradientDescentOptimizer(lr) + #optimizer = tf.train.MomentumOptimizer(lr, 0.9) + + def assign_to_device(device, ps_device="/cpu:0"): + def _assign(op): + node_def = op if isinstance(op, tf.NodeDef) else op.node_def + if node_def.op == "Variable": + return ps_device + else: + return device + return _assign + + tower_grads = [] + average_loss_tensor = [] + for i in xrange(FLAGS.num_gpus): + print('what is i: ', i) + #with tf.device(assign_to_device('/gpu:%s'%device_ids[i])): + with tf.device('/gpu:%s'%device_ids[i]): + with tf.name_scope('%s_%s' % ('TOWER', device_ids[i])) as n_scope: + _init_global_variables() + images, labels = cifar10_input.inputs(False, FLAGS.data_dir, FLAGS.batch_size) + logits = inference(images) + loss = loss_function(logits, labels) + + tf.add_to_collection('losses', loss) + tf.add_n(tf.get_collection('losses'), name='total_loss') + + losses = tf.get_collection('losses', n_scope) + total_loss = tf.add_n(losses, name='total_loss') + average_loss_tensor.append(total_loss) + + tf.get_variable_scope().reuse_variables() + print('total_loss: ', total_loss) + grads = optimizer.compute_gradients(total_loss) + print('grads: ', grads) + + tower_grads.append(grads) + + print('tower_grads: ', tower_grads) + print('len0: ', len(tower_grads[0])) + print('len1: ', len(tower_grads[1])) + + grads = average_gradients(tower_grads) + apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) + train_op = apply_gradient_op + average_op = tf.reduce_mean(average_loss_tensor, 0) + + # Create a saver. + saver = tf.train.Saver(tf.global_variables()) + + init = tf.global_variables_initializer() + sess = tf.Session(config=config) + sess.run(init) + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess=sess, coord=coord) + + real_batch_size = FLAGS.batch_size * FLAGS.num_gpus + num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) + iterations = FLAGS.epochs * num_batches_per_epoch + average_batch_time = 0.0 + epochs_info = [] + + step = 0 + average_loss = 0.0 + for step in xrange(iterations): + start_time = time.time() + #_, loss_v = sess.run([train_op, total_loss]) + _, loss_v = sess.run([train_op, average_op]) + duration = time.time() - start_time + average_batch_time += float(duration) + + assert not np.isnan(loss_v), 'Model diverged with loss = NaN' + average_loss += loss_v + + if step % FLAGS.log_step == 0: + examples_per_sec = real_batch_size / duration + sec_per_batch = float(duration) + format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') + print (format_str % (datetime.now(), step, loss_v, examples_per_sec, sec_per_batch)) + + if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: + average_loss /= num_batches_per_epoch * FLAGS.eval_step + print ('epoch: %d, loss: %.2f' % (step /num_batches_per_epoch, average_loss)) + epochs_info.append('%d:_:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) + average_loss = 0.0 + + checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step) + + coord.request_stop() + coord.join(threads) + + average_batch_time /= iterations + print 'average_batch_time: ', average_batch_time + print ('epoch_info: %s' % ','.join(epochs_info)) + + +def main(_): + train() + + +if __name__ == '__main__': + tf.app.run() diff --git a/tools/tensorflow/cnn/alexnet/report.txt b/tools/tensorflow/cnn/alexnet/report.txt new file mode 100644 index 0000000..1cfc903 --- /dev/null +++ b/tools/tensorflow/cnn/alexnet/report.txt @@ -0,0 +1,78 @@ +-------------------------------------------------------------------------------- +Processing file 'alexnet_cifar10_multi_gpu.py' + outputting to 'alexnet_cifar10_multi_gpu1.py' +-------------------------------------------------------------------------------- + +'alexnet_cifar10_multi_gpu.py' Line 201 +-------------------------------------------------------------------------------- + +Added keyword 'concat_dim' to reordered function 'tf.concat' +Added keyword 'values' to reordered function 'tf.concat' + + Old: grad = tf.concat(0, grads) + + New: grad = tf.concat(axis=0, values=grads) + ~~~~~ ~~~~~~~ + +'alexnet_cifar10_multi_gpu.py' Line 274 +-------------------------------------------------------------------------------- + +Renamed function 'tf.all_variables' to 'tf.global_variables' + + Old: saver = tf.train.Saver(tf.all_variables()) + ~~~~~~~~~~~~~~~~ + New: saver = tf.train.Saver(tf.global_variables()) + ~~~~~~~~~~~~~~~~~~~ + +'alexnet_cifar10_multi_gpu.py' Line 142 +-------------------------------------------------------------------------------- + +Added keyword 'concat_dim' to reordered function 'tf.concat' +Added keyword 'values' to reordered function 'tf.concat' + + Old: concated = tf.concat(1, [indices, labels]) + + New: concated = tf.concat(axis=1, values=[indices, labels]) + ~~~~~ ~~~~~~~ + +'alexnet_cifar10_multi_gpu.py' Line 144 +-------------------------------------------------------------------------------- + +Renamed function 'tf.pack' to 'tf.stack' + + Old: concated, tf.pack([batch_size, 10]), 1.0, 0.0) + ~~~~~~~ + New: concated, tf.stack([batch_size, 10]), 1.0, 0.0) + ~~~~~~~~ + +'alexnet_cifar10_multi_gpu.py' Line 145 +-------------------------------------------------------------------------------- + +Added keyword 'logits' to reordered function 'tf.nn.softmax_cross_entropy_with_logits' + + Old: cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, + + New: cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, + ~~~~~~~ + +'alexnet_cifar10_multi_gpu.py' Line 146 +-------------------------------------------------------------------------------- + +Added keyword 'labels' to reordered function 'tf.nn.softmax_cross_entropy_with_logits' + + Old: onehot_labels, + + New: labels=onehot_labels, + ~~~~~~~ + +'alexnet_cifar10_multi_gpu.py' Line 276 +-------------------------------------------------------------------------------- + +Renamed function 'tf.initialize_all_variables' to 'tf.global_variables_initializer' + + Old: init = tf.initialize_all_variables() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + New: init = tf.global_variables_initializer() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + diff --git a/tools/tensorflow/fc/fcn5_mnist.py b/tools/tensorflow/fc/fcn5_mnist.py index 535c80e..6e1fccb 100644 --- a/tools/tensorflow/fc/fcn5_mnist.py +++ b/tools/tensorflow/fc/fcn5_mnist.py @@ -1,5 +1,5 @@ import tensorflow as tf -import models +import models1 as models import time import os import numpy as np @@ -84,7 +84,7 @@ def train(model='fcn5'): #optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss) optimizer = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss) - init = tf.initialize_all_variables() + init = tf.global_variables_initializer() sess.run(init) tf.train.start_queue_runners(sess=sess) batch_size_per_epoch = int((EPOCH_SIZE + FLAGS.batch_size - 1)/ FLAGS.batch_size) diff --git a/tools/tensorflow/fc/fcn5_mnist_multi_gpu1.py b/tools/tensorflow/fc/fcn5_mnist_multi_gpu1.py new file mode 100644 index 0000000..b555ada --- /dev/null +++ b/tools/tensorflow/fc/fcn5_mnist_multi_gpu1.py @@ -0,0 +1,228 @@ +import os +import tensorflow as tf +import models +import time +import numpy as np +from datetime import datetime +from tensorflow.examples.tutorials.mnist import input_data + + +FLAGS = tf.app.flags.FLAGS +# Basic model parameters. + +tf.app.flags.DEFINE_string('train_dir', './multigpu-trained', + """Directory where to write event logs """ + """and checkpoint.""") +tf.app.flags.DEFINE_integer('batch_size', 1024, """Number of images to process in a batch.""") +tf.app.flags.DEFINE_integer('epochs', 40, """Max epochs for training.""") +tf.app.flags.DEFINE_integer('log_step', 10, """Log step""") +tf.app.flags.DEFINE_integer('eval_step', 1, """Evaluate step of epoch""") +tf.app.flags.DEFINE_string('device_ids', '0,1', """Device ids. split by comma, e.g. 0,1""") +#tf.app.flags.DEFINE_string('data_dir', '/home/comp/csshshi/data/tensorflow/MNIST_data/', +tf.app.flags.DEFINE_string('data_dir', os.environ['HOME']+'/data/tensorflow/MNIST_data/', +#tf.app.flags.DEFINE_string('data_dir', '/home/comp/pengfeixu/Data/tensorflow/MNIST_data/', + """Path to the data directory.""") +tf.app.flags.DEFINE_boolean('use_fp16', False, + """Train the model using fp16.""") +tf.app.flags.DEFINE_boolean('log_device_placement', True, + """Whether to log device placement.""") +tf.app.flags.DEFINE_integer('num_gpus', 2, """How many GPUs to use.""") + +EPOCH_SIZE = 60000 +TEST_SIZE = 10000 + + +def createFakeData(count, featureDim, labelDim): + features = np.random.randn(count, featureDim) + labels = np.random.randint(0, labelDim, size=(count, 1)) + return features, labels + +features, labels = createFakeData(1024, 32*32*3, 10) + + +def getFakeMinibatch(minibatchSize, labelDim): + feat = features[:minibatchSize] + l = labels[:minibatchSize] + lab = np.zeros((minibatchSize, labelDim)) + for i in range(lab.shape[0]): + lab[i][l[i]] = 1 + return feat, lab + +mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) + + +def get_real_batch_data(batch_size, label_dim): + batch_xs, batch_ys = mnist.train.next_batch(batch_size) + return batch_xs, batch_ys + + +def average_gradients(tower_grads): + """Calculate the average gradient for each shared variable across all towers. + + Note that this function provides a synchronization point across all towers. + + Args: + tower_grads: List of lists of (gradient, variable) tuples. The outer list + is over individual gradients. The inner list is over the gradient + calculation for each tower. + Returns: + List of pairs of (gradient, variable) where the gradient has been averaged + across all towers. + """ + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # Note that each grad_and_vars looks like the following: + # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) + grads = [] + for g, _ in grad_and_vars: + # Add 0 dimension to the gradients to represent the tower. + expanded_g = tf.expand_dims(g, 0) + + # Append on a 'tower' dimension which we will average over below. + grads.append(expanded_g) + + # Average over the 'tower' dimension. + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + # Keep in mind that the Variables are redundant because they are shared + # across towers. So .. we will just return the first tower's pointer to + # the Variable. + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + + + +def train(model='fcn5'): + if FLAGS.num_gpus < 2: + print("The number of GPU should be 2 or more, if you use one GPU, please use fcn5_mnist.py to train") + return + + config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=FLAGS.log_device_placement) + + with tf.Graph().as_default(), tf.device("/cpu:0"): + global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) + + device_ids = FLAGS.device_ids.split(',') + if len(device_ids) > FLAGS.num_gpus: + print('The device_ids should have the same number of GPUs with num_gpus') + return + + lr = 0.05 + optimizer = tf.train.GradientDescentOptimizer(lr) + + # TF1.0 has error with this momentum optimizer, it should be fixed... + #optimizer = tf.train.MomentumOptimizer(lr, 0.9) + + def assign_to_device(device, ps_device="/cpu:0"): + def _assign(op): + node_def = op if isinstance(op, tf.NodeDef) else op.node_def + if node_def.op == "Variable": + return ps_device + else: + return device + return _assign + + tower_grads = [] + feed_vars = [] + average_loss_tensor = [] + for i in xrange(FLAGS.num_gpus): + with tf.device(assign_to_device('/gpu:%s'%device_ids[i])): + with tf.name_scope('%s_%s' % ('TOWER', device_ids[i])) as scope: + feature_dim = models.feature_dim + label_dim = models.label_dim + images = tf.placeholder(tf.float32, [None, feature_dim], name='images') + labels = tf.placeholder(tf.float32, [None, label_dim], name='labels') + feed_vars.append((images, labels)) + + logits = models.model_fcn5(images) + loss = models.loss(logits, labels) + tf.add_to_collection('losses', loss) + + #tf.add_n(tf.get_collection('losses'), name='total_loss') + losses = tf.get_collection('losses', scope) + total_loss = tf.add_n(losses, name='total_loss') + average_loss_tensor.append(total_loss) + + tf.get_variable_scope().reuse_variables() + grads = optimizer.compute_gradients(total_loss) + tower_grads.append(grads) + + print('tower_grads: ', tower_grads, '\nlen: ', len(tower_grads)) + print ('total_loss: ', total_loss) + + grads = average_gradients(tower_grads) + apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) + + train_op = apply_gradient_op + average_op = tf.reduce_mean(average_loss_tensor, 0) + saver = tf.train.Saver(tf.global_variables()) + + init = tf.global_variables_initializer() + sess = tf.Session(config=config) + sess.run(init) + + # Add initialize for other variables + uninitialized_vars = [] + for var in tf.all_variables(): + try: + sess.run(var) + except tf.errors.FailedPreconditionError: + uninitialized_vars.append(var) + init_new_vars_op = tf.initialize_variables(uninitialized_vars) + sess.run(init_new_vars_op) + + tf.train.start_queue_runners(sess=sess) + + real_batch_size = FLAGS.batch_size * FLAGS.num_gpus + num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) + iterations = FLAGS.epochs * num_batches_per_epoch + average_batch_time = 0.0 + epochs_info = [] + + step = 0 + average_loss = 0.0 + for step in range(iterations): + start_time = time.time() + imgs, labs = get_real_batch_data(real_batch_size, 10) + feed_dict = {} + for i in range(FLAGS.num_gpus): + feed_dict[feed_vars[i][0]] = imgs[i*FLAGS.batch_size:(i+1)*FLAGS.batch_size] + feed_dict[feed_vars[i][1]] = labs[i*FLAGS.batch_size:(i+1)*FLAGS.batch_size] + # _, loss_value = sess.run([train_op, total_loss], feed_dict=feed_dict) + _, loss_value = sess.run([train_op, average_op], feed_dict=feed_dict) + duration = time.time() - start_time + average_batch_time += float(duration) + average_loss += loss_value + + assert not np.isnan(loss_value), 'Model diverged with loss = NaN' + + if step % FLAGS.log_step == 0: + examples_per_sec = (FLAGS.batch_size * FLAGS.num_gpus) / duration + sec_per_batch = float(duration) + format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') + print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) + + if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: + average_loss /= num_batches_per_epoch * FLAGS.eval_step + print ('epoch: %d, loss: %.2f' % (step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) + epochs_info.append('%d:-:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) + average_loss = 0.0 + + checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step) + + average_batch_time /= iterations + print 'average_batch_time: ', average_batch_time + print ('epoch_info: %s' % ','.join(epochs_info)) + + +def main(argv=None): + train(model='fcn5') + + +if __name__ == '__main__': + tf.app.run() diff --git a/tools/tensorflow/fc/models.py b/tools/tensorflow/fc/models.py index ed94bf3..93c4dfb 100644 --- a/tools/tensorflow/fc/models.py +++ b/tools/tensorflow/fc/models.py @@ -51,7 +51,7 @@ def model_fcn8(features): def loss(logits, labels): labels = tf.cast(labels, tf.float32) - cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, labels) + cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) loss = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') return loss diff --git a/tools/tensorflow/fc/report.txt b/tools/tensorflow/fc/report.txt new file mode 100644 index 0000000..df29a5e --- /dev/null +++ b/tools/tensorflow/fc/report.txt @@ -0,0 +1,37 @@ +-------------------------------------------------------------------------------- +Processing file 'fcn5_mnist_multi_gpu.py' + outputting to 'fcn5_mnist_multi_gpu1.py' +-------------------------------------------------------------------------------- + +'fcn5_mnist_multi_gpu.py' Line 160 +-------------------------------------------------------------------------------- + +Renamed function 'tf.all_variables' to 'tf.global_variables' + + Old: saver = tf.train.Saver(tf.all_variables()) + ~~~~~~~~~~~~~~~~ + New: saver = tf.train.Saver(tf.global_variables()) + ~~~~~~~~~~~~~~~~~~~ + +'fcn5_mnist_multi_gpu.py' Line 162 +-------------------------------------------------------------------------------- + +Renamed function 'tf.initialize_all_variables' to 'tf.global_variables_initializer' + + Old: init = tf.initialize_all_variables() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + New: init = tf.global_variables_initializer() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +'fcn5_mnist_multi_gpu.py' Line 85 +-------------------------------------------------------------------------------- + +Added keyword 'concat_dim' to reordered function 'tf.concat' +Added keyword 'values' to reordered function 'tf.concat' + + Old: grad = tf.concat(0, grads) + + New: grad = tf.concat(axis=0, values=grads) + ~~~~~ ~~~~~~~ + + diff --git a/tools/tensorflow/tensorflowbm.py b/tools/tensorflow/tensorflowbm.py index 446d6ab..519099a 100644 --- a/tools/tensorflow/tensorflowbm.py +++ b/tools/tensorflow/tensorflowbm.py @@ -26,6 +26,7 @@ # Set system variable os.environ['OMP_NUM_THREADS'] = args.cpuCount os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount +os.environ['MKL_NUM_THREADS'] = args.cpuCount # Build cmd for benchmark root_path = os.path.dirname(os.path.abspath(__file__)) tool_path = root_path + "/" + args.netType diff --git a/tools/torch/torchbm.py b/tools/torch/torchbm.py index 3d4e848..ee88948 100644 --- a/tools/torch/torchbm.py +++ b/tools/torch/torchbm.py @@ -25,8 +25,9 @@ if args.debug: print("args: " + str(args)) # Set system variable -#os.environ['OMP_NUM_THREADS'] = args.cpuCount -#os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount +os.environ['OMP_NUM_THREADS'] = args.cpuCount +os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount +os.environ['MKL_NUM_THREADS'] = args.cpuCount # Build cmd cmd = "THC_CACHING_ALLOCATOR=1 th Main.lua " @@ -46,7 +47,7 @@ if "-" not in args.devId: cmd = "THC_CACHING_ALLOCATOR=1 CUDA_VISIBLE_DEVICES=" + args.devId + " th rnn/recurrent-language-model.lua --cuda " else: - cmd = "OMP_NUM_THREADS=%s OPENBLAS_NUM_THREADS=%s th rnn/recurrent-language-model.lua --lstm --startlr 1 " % (args.cpuCount, args.cpuCount) + cmd = "OMP_NUM_THREADS=%s OPENBLAS_NUM_THREADS=%s MKL_NUM_THREADS=%s th rnn/recurrent-language-model.lua --lstm --startlr 1 " % (args.cpuCount, args.cpuCount, args.cpuCount) else: print("Device not set, please set device by adding -devId <-1 or 0,1,2,3>. See help for more") sys.exit(-2)