diff --git a/benchmark.py b/benchmark.py index 8393bd4..2a8148b 100644 --- a/benchmark.py +++ b/benchmark.py @@ -30,7 +30,7 @@ content = f.readlines() #print content for line in content: - line = line.split('#')[0].replace('\t','').replace('\n','') + line = line.split('#')[0].replace('\t','').replace('\n','').replace(' ', '') if len(line) < 1 or "None" in line: continue if not config_experiments: diff --git a/configs/v8withinspur/gtx980.config b/configs/v8withinspur/gtx980.config new file mode 100644 index 0000000..47353d0 --- /dev/null +++ b/configs/v8withinspur/gtx980.config @@ -0,0 +1,33 @@ +flag: sgbenchmark6v8inspur #Flag of current experiment +tools: caffe,cntk,mxnet,torch,tensorflow #Tools to benchmark +#tools: mxnet #Tools to benchmark +experiments: #; ; ; ; ; ; ; +{ + fc; fcn5; 2; 1; 4096; 40; 60000; 0.05 + fc; fcn5; 2; 1; 2048; 40; 60000; 0.05 + fc; fcn5; 2; 1; 1024; 40; 60000; 0.05 + fc; fcn5; 2; 1; 512; 40; 60000; 0.05 + fc; fcn5; 2; 1; 342; 40; 60000; 0.05 + cnn; alexnet; 2; 1; 2048; 40; 50000; 0.01 + cnn; alexnet; 2; 1; 1024; 40; 50000; 0.01 + cnn; alexnet; 2; 1; 512; 40; 50000; 0.01 + cnn; alexnet; 2; 1; 256; 40; 50000; 0.01 + cnn; alexnet; 2; 1; 128; 40; 50000; 0.01 + cnn; alexnet; 2; 1; 86; 40; 50000; 0.01 + cnn; resnet; 2; 1; 128; 40; 50000; 0.01 + cnn; resnet; 2; 1; 64; 40; 50000; 0.01 + cnn; resnet; 2; 1; 32; 40; 50000; 0.01 + cnn; resnet; 2; 1; 16; 40; 50000; 0.01 +# cnn; resnet; 2; 1; 11; 40; 50000; 0.01 +# rnn; lstm; 2; 1; 1024; 20; -1; 0.1 +# rnn; lstm; 2; 1; 512; 20; -1; 0.1 +# rnn; lstm; 2; 1; 256; 20; -1; 0.1 +# rnn; lstm; 2; 1; 128; 20; -1; 0.1 +} +host_file: None #Path to host file or None +cpu_name: i7-6800K #CPU model +device_name: GTX980 #GPU model +gpu_name: GTX980 #GPU model +cuda: 8.0 #CUDA version +cudnn: 5.1 #CUDNN version +cuda_driver: 381.09 #CUDA driver version diff --git a/configs/v8withinspur/titanx.config b/configs/v8withinspur/titanx.config new file mode 100644 index 0000000..2069f84 --- /dev/null +++ b/configs/v8withinspur/titanx.config @@ -0,0 +1,31 @@ +flag: test_ #Flag of current experiment +tools: caffe,cntk,mxnet,torch,tensorflow #Tools to benchmark +experiments: #; ; ; ; ; ; ; +{ + fc; fcn5; 1; 1; 4096; 2; 60000; 0.05 +# fc; fcn5; 1; 1; 2048; 40; 60000; 0.05 +# fc; fcn5; 1; 1; 1024; 40; 60000; 0.05 +# fc; fcn5; 1; 1; 512; 40; 60000; 0.05 +# fc; fcn5; 1; 1; 342; 40; 60000; 0.05 +# cnn; alexnet; 1; 1; 2048; 40; 50000; 0.01 + cnn; alexnet; 1; 1; 1024; 2; 50000; 0.01 +# cnn; alexnet; 1; 1; 512; 40; 50000; 0.01 +# cnn; alexnet; 1; 1; 256; 40; 50000; 0.01 +# cnn; alexnet; 1; 1; 128; 40; 50000; 0.01 +# cnn; alexnet; 1; 1; 86; 40; 50000; 0.01 + cnn; resnet; 1; 1; 128; 2; 50000; 0.01 +# cnn; resnet; 1; 1; 64; 40; 50000; 0.01 +# cnn; resnet; 1; 1; 32; 40; 50000; 0.01 +# cnn; resnet; 1; 1; 16; 40; 50000; 0.01 +# cnn; resnet; 1; 1; 11; 40; 50000; 0.01 +# rnn; lstm; 1; 1; 1024; 20; -1; 0.1 +# rnn; lstm; 1; 1; 512; 20; -1; 0.1 +# rnn; lstm; 1; 1; 256; 20; -1; 0.1 +# rnn; lstm; 1; 1; 128; 20; -1; 0.1 +} +host_file: None #Path to host file or None +cpu_name: i7-6800K #CPU model +device_name: GTX980 #GPU model +cuda: 8.0 #CUDA version +cudnn: 5.1 #CUDNN version +cuda_driver: 367.48 #CUDA driver version diff --git a/configs/v8withinspur/titanx_pascal.config b/configs/v8withinspur/titanx_pascal.config new file mode 100644 index 0000000..27cf2d6 --- /dev/null +++ b/configs/v8withinspur/titanx_pascal.config @@ -0,0 +1,33 @@ +flag: sgbenchmark6v8inspur #Flag of current experiment +tools: caffe,cntk,mxnet,torch,tensorflow #Tools to benchmark +#tools: mxnet #Tools to benchmark +experiments: #; ; ; ; ; ; ; +{ + fc; fcn5; 1; 1; 4096; 40; 60000; 0.05 + fc; fcn5; 1; 1; 2048; 40; 60000; 0.05 + fc; fcn5; 1; 1; 1024; 40; 60000; 0.05 + fc; fcn5; 1; 1; 512; 40; 60000; 0.05 + fc; fcn5; 1; 1; 342; 40; 60000; 0.05 + cnn; alexnet; 1; 1; 2048; 40; 50000; 0.01 + cnn; alexnet; 1; 1; 1024; 40; 50000; 0.01 + cnn; alexnet; 1; 1; 512; 40; 50000; 0.01 + cnn; alexnet; 1; 1; 256; 40; 50000; 0.01 + cnn; alexnet; 1; 1; 128; 40; 50000; 0.01 + cnn; alexnet; 1; 1; 86; 40; 50000; 0.01 + cnn; resnet; 1; 1; 128; 40; 50000; 0.01 + cnn; resnet; 1; 1; 64; 40; 50000; 0.01 + cnn; resnet; 1; 1; 32; 40; 50000; 0.01 + cnn; resnet; 1; 1; 16; 40; 50000; 0.01 +# cnn; resnet; 1; 1; 11; 40; 50000; 0.01 +# rnn; lstm; 1; 1; 1024; 20; -1; 0.1 +# rnn; lstm; 1; 1; 512; 20; -1; 0.1 +# rnn; lstm; 1; 1; 256; 20; -1; 0.1 +# rnn; lstm; 1; 1; 128; 20; -1; 0.1 +} +host_file: None #Path to host file or None +cpu_name: i7-6800K #CPU model +device_name: TitanX_Pascal #GPU model +gpu_name: TitanX_Pascal #GPU model +cuda: 8.0 #CUDA version +cudnn: 5.1 #CUDNN version +cuda_driver: 381.09 #CUDA driver version diff --git a/post_record.py b/post_record.py index a37f4cb..5cdebe1 100755 --- a/post_record.py +++ b/post_record.py @@ -6,12 +6,6 @@ import requests import json -from pymongo import MongoClient - -mongo_client = MongoClient("mongodb://%s:%s/" % (settings.MONGO_HOST, settings.MONGO_PORT)) -db = mongo_client[settings.MONGO_DBNAME] -if settings.MONGO_AUTH_USER_NAME: - auth = db.authenticate(settings.MONGO_AUTH_USER_NAME, settings.MONGO_AUTH_PASSWORD) def post_record(**args): """ @@ -26,7 +20,6 @@ def post_record(**args): data = json.dumps(args) ret = requests.post(settings.RESOURCE_URI, {'data': data}) print ret - #db['record'].insert(args) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Post experiments record tool') diff --git a/tools/caffe/caffebm.py b/tools/caffe/caffebm.py index dcd3fb6..d2e292f 100644 --- a/tools/caffe/caffebm.py +++ b/tools/caffe/caffebm.py @@ -47,7 +47,7 @@ cmd += ' >& ' + log_path ## Execute cmd -print cmd # Debug +#print cmd # Debug t = time.time() os.system(cmd) t = time.time() - t diff --git a/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk b/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk index 55fee8e..ae0296e 100644 --- a/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk +++ b/tools/cntk/cnn/alexnet/alexnet_cifar10.cntk @@ -1,7 +1,7 @@ WorkDir=. OutputDir = "$WorkDir$/Output" ModelDir = "$OutputDir$/Models" -DataDir = "/home/comp/csshshi/data/cntk/cifar10" +DataDir = "/home/shshi/data/cntk/cifar10" #DataDir = "/home/comp/pengfeixu/Data/cntk/cifar10" precision=float diff --git a/tools/cntk/cnn/resnet/resnet.cntk b/tools/cntk/cnn/resnet/resnet.cntk index 40971a6..403869e 100644 --- a/tools/cntk/cnn/resnet/resnet.cntk +++ b/tools/cntk/cnn/resnet/resnet.cntk @@ -1,7 +1,7 @@ RootDir = "." ConfigDir = "$RootDir$" -DataDir = "/home/comp/pengfeixu/data/cntk/cifar10" +DataDir = "/home/shshi/data/cntk/cifar10" #DataDir = "/home/comp/pengfeixu/Data/cntk/cifar10" #DataDir = "/home/ipdps/Data/cntk/cifar10" OutputDir = "$RootDir$/Output" diff --git a/tools/cntk/fc/fcn5.cntk b/tools/cntk/fc/fcn5.cntk index 9246a37..4af1220 100644 --- a/tools/cntk/fc/fcn5.cntk +++ b/tools/cntk/fc/fcn5.cntk @@ -1,7 +1,7 @@ WorkDir= "." ConfigDir= "." ModelDir=$WorkDir$/Output -DataDir=/home/comp/csshshi/data/cntk/mnist +DataDir=/home/shshi/data/cntk/mnist #DataDir=/home/comp/pengfeixu/Data/cntk/mnist #ndlMacros = "$ConfigDir$/Macros.ndl" precision=float diff --git a/tools/common/extract_info.py b/tools/common/extract_info.py index 5b02b6e..58e6484 100644 --- a/tools/common/extract_info.py +++ b/tools/common/extract_info.py @@ -32,48 +32,41 @@ def print_arguments(info): def extract_info_caffe(filename): """ - Use for caffe-rc5 + Use for caffe1.0.0 """ f = open(filename) content = f.readlines() useful_lines = [] accuracies = [] - is_fist = True is_cpu = False - interval = 0 average_times = [] + loss_indexes = [] + all_losses = [] for index, line in enumerate(content): if line.find('Use CPU') > 0: is_cpu = True - if line.find('solver.cpp:219') > 0: - #interval += 1 - #if interval == 3 or is_fist: + if line.find('solver.cpp:218') > 0: useful_lines.append(line) - #interval = 0 - #is_fist = False - if (line.find('solver.cpp:398] Test net output #1:') > 0 and len(useful_lines) > 0)or (is_cpu and line.find('Snapshotting to binary proto file ') > 0): - if (not is_fist) or is_cpu: - if not is_cpu: - iteration = content[index-5].split()[5].strip(',') - else: - iteration = content[index-7].split()[5].strip(',') - accuracy = content[index-1].split()[-1] - #loss = content[index].split()[10] - #loss = content[index-6].split()[-1] - if content[index+1].find("Optimization Done") > 0: # last iter - loss = content[index-4].split()[-1] - else: - loss = content[index+1].split()[-1] - # Append (iteration, accuracy) - #print '-----append useful: ', useful_lines - if len(useful_lines) > 1: - average_time, loss = _calculate_average_caffe(useful_lines) - average_times.append(average_time) - accuracies.append((iteration, accuracy, loss)) - useful_lines = [] - elif not is_cpu: - is_fist = False - #interval = 0 + items = line.split(' ') + iteration_idx = items.index('Iteration') + 1 + iteration = items[iteration_idx] + average_time = float(items[iteration_idx+3].split('s/')[0]) + average_times.append(average_time) + loss = float(items[-1].split('\n')[0]) + all_losses.append(loss) + #accuracies.append((iteration, '-', loss)) + if line.find('Testing net ') > 0: + items = line.split(' ') + iteration_idx = items.index('Iteration') + 1 + loss_indexes.append(int(items[iteration_idx].split(',')[0])) + start_index = loss_indexes[0] + for i in loss_indexes[1:]: + end_index = i + iteration = i + loss = np.mean(all_losses[start_index:end_index]) + accuracies.append((iteration, '-', loss)) + start_index = end_index + #print average_times average_time = np.average(average_times) try: diff --git a/tools/tensorflow/cnn/alexnet/t.sh b/tools/tensorflow/cnn/alexnet/t.sh index 2e94226..0a79093 100755 --- a/tools/tensorflow/cnn/alexnet/t.sh +++ b/tools/tensorflow/cnn/alexnet/t.sh @@ -7,6 +7,6 @@ CUDA_VISIBLE_DEVICES=$deviceId python alexnet_cifar10.py --batch_size=$batch_siz end=`date +%s.%N` runtime=$( echo "$end - $start" | bc -l ) echo "finished with execute time: ${runtime}" -python cifar10_eval.py -rm trained_models/* -rm train_eval/* +#python cifar10_eval.py +rm -rf trained_models +rm -rf train_eval diff --git a/tools/tensorflow/fc/t.sh b/tools/tensorflow/fc/t.sh index 0b49748..301b402 100755 --- a/tools/tensorflow/fc/t.sh +++ b/tools/tensorflow/fc/t.sh @@ -1,7 +1,7 @@ #!/bin/bash start=`date +%s.%N` mkdir multigpu-trained -python fcn5_mnist.py --batch_size=$batch_size --epochs=$epochs --device_id=$deviceId +CUDA_VISIBLE_DEVICES=$deviceId python fcn5_mnist.py --batch_size=$batch_size --epochs=$epochs --device_id=$deviceId end=`date +%s.%N` runtime=$( echo "$end - $start" | bc -l ) echo "finished with execute time: ${runtime}" diff --git a/tools/tensorflow/tensorflowbm.py b/tools/tensorflow/tensorflowbm.py index cc9a91b..9acc555 100644 --- a/tools/tensorflow/tensorflowbm.py +++ b/tools/tensorflow/tensorflowbm.py @@ -27,6 +27,9 @@ os.environ['OMP_NUM_THREADS'] = args.cpuCount os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount os.environ['MKL_NUM_THREADS'] = args.cpuCount +if os.path.exists('%s/tf12/bin' % os.environ['HOME']): + os.system('source %s/tf12/bin/activate' % os.environ['HOME']) + # Build cmd for benchmark root_path = os.path.dirname(os.path.abspath(__file__)) tool_path = root_path + "/" + args.netType @@ -58,6 +61,6 @@ with open(log_path, "a") as logFile: logFile.write("Total time: " + str(t) + "\n") logFile.write("cmd: " + cmd + "\n") -os.system("mv " + log_path + " ../../logs") +os.system("mv " + log_path + " ../../logs/")