Skip to content

Commit

Permalink
revise log collecting for caffe1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
shyhuai committed Jul 5, 2017
1 parent 814b981 commit 6f7e3c0
Show file tree
Hide file tree
Showing 13 changed files with 134 additions and 48 deletions.
2 changes: 1 addition & 1 deletion benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
content = f.readlines()
#print content
for line in content:
line = line.split('#')[0].replace('\t','').replace('\n','')
line = line.split('#')[0].replace('\t','').replace('\n','').replace(' ', '')
if len(line) < 1 or "None" in line:
continue
if not config_experiments:
Expand Down
33 changes: 33 additions & 0 deletions configs/v8withinspur/gtx980.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
flag: sgbenchmark6v8inspur #Flag of current experiment
tools: caffe,cntk,mxnet,torch,tensorflow #Tools to benchmark
#tools: mxnet #Tools to benchmark
experiments: #<network type>; <network name>; <device id>; <gpu count>; <batch size>; <number of epochs>; <epoch size>; <Learning rate>
{
fc; fcn5; 2; 1; 4096; 40; 60000; 0.05
fc; fcn5; 2; 1; 2048; 40; 60000; 0.05
fc; fcn5; 2; 1; 1024; 40; 60000; 0.05
fc; fcn5; 2; 1; 512; 40; 60000; 0.05
fc; fcn5; 2; 1; 342; 40; 60000; 0.05
cnn; alexnet; 2; 1; 2048; 40; 50000; 0.01
cnn; alexnet; 2; 1; 1024; 40; 50000; 0.01
cnn; alexnet; 2; 1; 512; 40; 50000; 0.01
cnn; alexnet; 2; 1; 256; 40; 50000; 0.01
cnn; alexnet; 2; 1; 128; 40; 50000; 0.01
cnn; alexnet; 2; 1; 86; 40; 50000; 0.01
cnn; resnet; 2; 1; 128; 40; 50000; 0.01
cnn; resnet; 2; 1; 64; 40; 50000; 0.01
cnn; resnet; 2; 1; 32; 40; 50000; 0.01
cnn; resnet; 2; 1; 16; 40; 50000; 0.01
# cnn; resnet; 2; 1; 11; 40; 50000; 0.01
# rnn; lstm; 2; 1; 1024; 20; -1; 0.1
# rnn; lstm; 2; 1; 512; 20; -1; 0.1
# rnn; lstm; 2; 1; 256; 20; -1; 0.1
# rnn; lstm; 2; 1; 128; 20; -1; 0.1
}
host_file: None #Path to host file or None
cpu_name: i7-6800K #CPU model
device_name: GTX980 #GPU model
gpu_name: GTX980 #GPU model
cuda: 8.0 #CUDA version
cudnn: 5.1 #CUDNN version
cuda_driver: 381.09 #CUDA driver version
31 changes: 31 additions & 0 deletions configs/v8withinspur/titanx.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
flag: test_ #Flag of current experiment
tools: caffe,cntk,mxnet,torch,tensorflow #Tools to benchmark
experiments: #<network type>; <network name>; <device id>; <gpu count>; <batch size>; <number of epochs>; <epoch size>; <Learning rate>
{
fc; fcn5; 1; 1; 4096; 2; 60000; 0.05
# fc; fcn5; 1; 1; 2048; 40; 60000; 0.05
# fc; fcn5; 1; 1; 1024; 40; 60000; 0.05
# fc; fcn5; 1; 1; 512; 40; 60000; 0.05
# fc; fcn5; 1; 1; 342; 40; 60000; 0.05
# cnn; alexnet; 1; 1; 2048; 40; 50000; 0.01
cnn; alexnet; 1; 1; 1024; 2; 50000; 0.01
# cnn; alexnet; 1; 1; 512; 40; 50000; 0.01
# cnn; alexnet; 1; 1; 256; 40; 50000; 0.01
# cnn; alexnet; 1; 1; 128; 40; 50000; 0.01
# cnn; alexnet; 1; 1; 86; 40; 50000; 0.01
cnn; resnet; 1; 1; 128; 2; 50000; 0.01
# cnn; resnet; 1; 1; 64; 40; 50000; 0.01
# cnn; resnet; 1; 1; 32; 40; 50000; 0.01
# cnn; resnet; 1; 1; 16; 40; 50000; 0.01
# cnn; resnet; 1; 1; 11; 40; 50000; 0.01
# rnn; lstm; 1; 1; 1024; 20; -1; 0.1
# rnn; lstm; 1; 1; 512; 20; -1; 0.1
# rnn; lstm; 1; 1; 256; 20; -1; 0.1
# rnn; lstm; 1; 1; 128; 20; -1; 0.1
}
host_file: None #Path to host file or None
cpu_name: i7-6800K #CPU model
device_name: GTX980 #GPU model
cuda: 8.0 #CUDA version
cudnn: 5.1 #CUDNN version
cuda_driver: 367.48 #CUDA driver version
33 changes: 33 additions & 0 deletions configs/v8withinspur/titanx_pascal.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
flag: sgbenchmark6v8inspur #Flag of current experiment
tools: caffe,cntk,mxnet,torch,tensorflow #Tools to benchmark
#tools: mxnet #Tools to benchmark
experiments: #<network type>; <network name>; <device id>; <gpu count>; <batch size>; <number of epochs>; <epoch size>; <Learning rate>
{
fc; fcn5; 1; 1; 4096; 40; 60000; 0.05
fc; fcn5; 1; 1; 2048; 40; 60000; 0.05
fc; fcn5; 1; 1; 1024; 40; 60000; 0.05
fc; fcn5; 1; 1; 512; 40; 60000; 0.05
fc; fcn5; 1; 1; 342; 40; 60000; 0.05
cnn; alexnet; 1; 1; 2048; 40; 50000; 0.01
cnn; alexnet; 1; 1; 1024; 40; 50000; 0.01
cnn; alexnet; 1; 1; 512; 40; 50000; 0.01
cnn; alexnet; 1; 1; 256; 40; 50000; 0.01
cnn; alexnet; 1; 1; 128; 40; 50000; 0.01
cnn; alexnet; 1; 1; 86; 40; 50000; 0.01
cnn; resnet; 1; 1; 128; 40; 50000; 0.01
cnn; resnet; 1; 1; 64; 40; 50000; 0.01
cnn; resnet; 1; 1; 32; 40; 50000; 0.01
cnn; resnet; 1; 1; 16; 40; 50000; 0.01
# cnn; resnet; 1; 1; 11; 40; 50000; 0.01
# rnn; lstm; 1; 1; 1024; 20; -1; 0.1
# rnn; lstm; 1; 1; 512; 20; -1; 0.1
# rnn; lstm; 1; 1; 256; 20; -1; 0.1
# rnn; lstm; 1; 1; 128; 20; -1; 0.1
}
host_file: None #Path to host file or None
cpu_name: i7-6800K #CPU model
device_name: TitanX_Pascal #GPU model
gpu_name: TitanX_Pascal #GPU model
cuda: 8.0 #CUDA version
cudnn: 5.1 #CUDNN version
cuda_driver: 381.09 #CUDA driver version
7 changes: 0 additions & 7 deletions post_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@
import requests
import json

from pymongo import MongoClient

mongo_client = MongoClient("mongodb://%s:%s/" % (settings.MONGO_HOST, settings.MONGO_PORT))
db = mongo_client[settings.MONGO_DBNAME]
if settings.MONGO_AUTH_USER_NAME:
auth = db.authenticate(settings.MONGO_AUTH_USER_NAME, settings.MONGO_AUTH_PASSWORD)

def post_record(**args):
"""
Expand All @@ -26,7 +20,6 @@ def post_record(**args):
data = json.dumps(args)
ret = requests.post(settings.RESOURCE_URI, {'data': data})
print ret
#db['record'].insert(args)

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Post experiments record tool')
Expand Down
2 changes: 1 addition & 1 deletion tools/caffe/caffebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
cmd += ' >& ' + log_path

## Execute cmd
print cmd # Debug
#print cmd # Debug
t = time.time()
os.system(cmd)
t = time.time() - t
Expand Down
2 changes: 1 addition & 1 deletion tools/cntk/cnn/alexnet/alexnet_cifar10.cntk
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
WorkDir=.
OutputDir = "$WorkDir$/Output"
ModelDir = "$OutputDir$/Models"
DataDir = "/home/comp/csshshi/data/cntk/cifar10"
DataDir = "/home/shshi/data/cntk/cifar10"
#DataDir = "/home/comp/pengfeixu/Data/cntk/cifar10"

precision=float
Expand Down
2 changes: 1 addition & 1 deletion tools/cntk/cnn/resnet/resnet.cntk
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
RootDir = "."

ConfigDir = "$RootDir$"
DataDir = "/home/comp/pengfeixu/data/cntk/cifar10"
DataDir = "/home/shshi/data/cntk/cifar10"
#DataDir = "/home/comp/pengfeixu/Data/cntk/cifar10"
#DataDir = "/home/ipdps/Data/cntk/cifar10"
OutputDir = "$RootDir$/Output"
Expand Down
2 changes: 1 addition & 1 deletion tools/cntk/fc/fcn5.cntk
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
WorkDir= "."
ConfigDir= "."
ModelDir=$WorkDir$/Output
DataDir=/home/comp/csshshi/data/cntk/mnist
DataDir=/home/shshi/data/cntk/mnist
#DataDir=/home/comp/pengfeixu/Data/cntk/mnist
#ndlMacros = "$ConfigDir$/Macros.ndl"
precision=float
Expand Down
55 changes: 24 additions & 31 deletions tools/common/extract_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,48 +32,41 @@ def print_arguments(info):

def extract_info_caffe(filename):
"""
Use for caffe-rc5
Use for caffe1.0.0
"""
f = open(filename)
content = f.readlines()
useful_lines = []
accuracies = []
is_fist = True
is_cpu = False
interval = 0
average_times = []
loss_indexes = []
all_losses = []
for index, line in enumerate(content):
if line.find('Use CPU') > 0:
is_cpu = True
if line.find('solver.cpp:219') > 0:
#interval += 1
#if interval == 3 or is_fist:
if line.find('solver.cpp:218') > 0:
useful_lines.append(line)
#interval = 0
#is_fist = False
if (line.find('solver.cpp:398] Test net output #1:') > 0 and len(useful_lines) > 0)or (is_cpu and line.find('Snapshotting to binary proto file ') > 0):
if (not is_fist) or is_cpu:
if not is_cpu:
iteration = content[index-5].split()[5].strip(',')
else:
iteration = content[index-7].split()[5].strip(',')
accuracy = content[index-1].split()[-1]
#loss = content[index].split()[10]
#loss = content[index-6].split()[-1]
if content[index+1].find("Optimization Done") > 0: # last iter
loss = content[index-4].split()[-1]
else:
loss = content[index+1].split()[-1]
# Append (iteration, accuracy)
#print '-----append useful: ', useful_lines
if len(useful_lines) > 1:
average_time, loss = _calculate_average_caffe(useful_lines)
average_times.append(average_time)
accuracies.append((iteration, accuracy, loss))
useful_lines = []
elif not is_cpu:
is_fist = False
#interval = 0
items = line.split(' ')
iteration_idx = items.index('Iteration') + 1
iteration = items[iteration_idx]
average_time = float(items[iteration_idx+3].split('s/')[0])
average_times.append(average_time)
loss = float(items[-1].split('\n')[0])
all_losses.append(loss)
#accuracies.append((iteration, '-', loss))
if line.find('Testing net ') > 0:
items = line.split(' ')
iteration_idx = items.index('Iteration') + 1
loss_indexes.append(int(items[iteration_idx].split(',')[0]))
start_index = loss_indexes[0]
for i in loss_indexes[1:]:
end_index = i
iteration = i
loss = np.mean(all_losses[start_index:end_index])
accuracies.append((iteration, '-', loss))
start_index = end_index

#print average_times
average_time = np.average(average_times)
try:
Expand Down
6 changes: 3 additions & 3 deletions tools/tensorflow/cnn/alexnet/t.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ CUDA_VISIBLE_DEVICES=$deviceId python alexnet_cifar10.py --batch_size=$batch_siz
end=`date +%s.%N`
runtime=$( echo "$end - $start" | bc -l )
echo "finished with execute time: ${runtime}"
python cifar10_eval.py
rm trained_models/*
rm train_eval/*
#python cifar10_eval.py
rm -rf trained_models
rm -rf train_eval
2 changes: 1 addition & 1 deletion tools/tensorflow/fc/t.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
start=`date +%s.%N`
mkdir multigpu-trained
python fcn5_mnist.py --batch_size=$batch_size --epochs=$epochs --device_id=$deviceId
CUDA_VISIBLE_DEVICES=$deviceId python fcn5_mnist.py --batch_size=$batch_size --epochs=$epochs --device_id=$deviceId
end=`date +%s.%N`
runtime=$( echo "$end - $start" | bc -l )
echo "finished with execute time: ${runtime}"
Expand Down
5 changes: 4 additions & 1 deletion tools/tensorflow/tensorflowbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
os.environ['OMP_NUM_THREADS'] = args.cpuCount
os.environ['OPENBLAS_NUM_THREADS'] = args.cpuCount
os.environ['MKL_NUM_THREADS'] = args.cpuCount
if os.path.exists('%s/tf12/bin' % os.environ['HOME']):
os.system('source %s/tf12/bin/activate' % os.environ['HOME'])

# Build cmd for benchmark
root_path = os.path.dirname(os.path.abspath(__file__))
tool_path = root_path + "/" + args.netType
Expand Down Expand Up @@ -58,6 +61,6 @@
with open(log_path, "a") as logFile:
logFile.write("Total time: " + str(t) + "\n")
logFile.write("cmd: " + cmd + "\n")
os.system("mv " + log_path + " ../../logs")
os.system("mv " + log_path + " ../../logs/")


0 comments on commit 6f7e3c0

Please sign in to comment.