Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

standardize memory optimization configurations #99

Merged
merged 2 commits into from
Aug 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/caffe/net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ class Net {
/// Whether to compute and display debug info for the net.
bool debug_info_;

/// Memory optimization related stuff.
vector< shared_ptr<SyncedMemory> > shared_storage_;
std::set<string> excluded_blob_names_;

DISABLE_COPY_AND_ASSIGN(Net);
};
Expand Down
48 changes: 38 additions & 10 deletions src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
for (int input_id = 0; input_id < param.input_size(); ++input_id) {
const int layer_id = -1; // inputs have fake layer ID -1
AppendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx);

// input blobs are excluded from memory optimization by default
excluded_blob_names_.insert(param.input(input_id));
}
DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
// For each layer, set up its input and output
Expand Down Expand Up @@ -282,6 +285,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
LOG(INFO) << "This network produces output " << *it;
net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
net_output_blob_indices_.push_back(blob_name_to_idx[*it]);

// add output blob name to default excluded blobs
excluded_blob_names_.insert(*it);
}
for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
blob_names_index_[blob_names_[blob_id]] = blob_id;
Expand All @@ -295,10 +301,16 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);

// optimize memory
const NetParameter_OptimizeMem om = param.optimize_mem();
const bool need_optimze_mem =
(om == NetParameter_OptimizeMem_TRAIN_ONLY && phase_ == TRAIN)
|| (om == NetParameter_OptimizeMem_ALL_OPTIM);
(param.mem_param().optimize_train() && phase_ == TRAIN)
|| (param.mem_param().optimize_test() && phase_ == TEST);

// add additional specified blobs to the exclusion list
for (int ex_id = 0; ex_id < param.mem_param().exclude_blob_size(); ++ex_id){
excluded_blob_names_.insert(param.mem_param().exclude_blob(ex_id));
}

// launch memory optimization if necessary
if (!debug_info_ && need_optimze_mem) {
MemoryOptimize();
}
Expand Down Expand Up @@ -1036,6 +1048,10 @@ int FindSlot(vector<SlotMeta>& slot_vec, const string& key){
return -1;
}

inline bool check_exclude(const std::set<string>& exclude_list, const string& blob_name){
return exclude_list.find(blob_name) != exclude_list.end();
}

template <typename Dtype>
void Net<Dtype>::MemoryOptimize() {
// Dry run phase
Expand All @@ -1046,13 +1062,15 @@ void Net<Dtype>::MemoryOptimize() {

// Forward pass, try to reuse blobs' data memory
for (int i = 0; i < layers_.size(); ++i) {
if (layers_[i]->layer_param().no_mem_opt()) continue;
const vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
const vector<Blob<Dtype>* >& layer_bottom = bottom_vecs_[i];
LOG(INFO) << "layer " << i << " " << layer_names_[i];
// Find slot for each top blob's data
for (int i_top = 0; i_top < layer_top.size(); ++i_top) {
const string& top_name = blob_names_[top_id_vecs_[i][i_top]];

if (check_exclude(excluded_blob_names_, top_name)) continue;

int idx = FindSlot(slots, top_name + "_data");
if (idx == -1) {
// Detect share data conditions
Expand All @@ -1075,8 +1093,12 @@ void Net<Dtype>::MemoryOptimize() {
LOG(INFO) << "top " << top_name << " acquires data slot " << idx;
}
} else {
slots[idx].IncRef();
slot_index[top_name + "_data"] = idx;
if (idx != -1) {
// idx == -1 means the top blob is (recursively) sharing data with an excluded bottom blob
// This makes this blob itself excluded from the optimization
slots[idx].IncRef();
slot_index[top_name + "_data"] = idx;
}
}
} else {
// Top data blob is already assigned a slot (maybe inplace layer).
Expand All @@ -1088,6 +1110,9 @@ void Net<Dtype>::MemoryOptimize() {
if (phase_ == TRAIN && layer_need_backward_[i]) continue;
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];

if (check_exclude(excluded_blob_names_, bottom_name)) continue;

int idx = FindSlot(slots, bottom_name + "_data");
if (slot_index.find(bottom_name + "_data") != slot_index.end()) {
idx = slot_index[bottom_name + "_data"];
Expand All @@ -1113,8 +1138,10 @@ void Net<Dtype>::MemoryOptimize() {
// first deal with bottoms
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom){
const string& bottom_name = blob_names_[layer_bottom_idx[i_bottom]];

if (check_exclude(excluded_blob_names_, bottom_name)) continue;

int idx = FindSlot(slots, bottom_name + "_diff");
if (!(layers_[i]->layer_param().no_mem_opt())){
if (idx == -1){
//detect share diff conditions
bool sharing_diff = false;
Expand All @@ -1132,8 +1159,8 @@ void Net<Dtype>::MemoryOptimize() {
}else{
LOG(INFO) << "sharing diff using slot "<<idx;
if(idx != -1) {
// idx == -1 means this is an output blob
// as a good practice, we do not touch the output blobs' diff memroy cause leads to unwanted behaviors.
// idx == -1 means the bottom blob is (recursively) sharing diff with an excluded top blob
// This makes this blob itself excluded from the optimization
slots[idx].IncRef();
slot_index[bottom_name + "_diff"] = idx;
}
Expand All @@ -1143,7 +1170,6 @@ void Net<Dtype>::MemoryOptimize() {
// usually this means in-place operation
slots[idx].IncRef();
}
}
LOG(INFO)<<"bottom blob "<<i_bottom<<" name "
<<bottom_name<<" slot id "<<idx;
}
Expand All @@ -1152,6 +1178,8 @@ void Net<Dtype>::MemoryOptimize() {
for (int i_top = 0; i_top < layer_top.size(); ++i_top){
const string& top_name = blob_names_[layer_top_idx[i_top]];

if (check_exclude(excluded_blob_names_, top_name)) continue;

// find the top in the slots
int idx = FindSlot(slots, top_name + "_diff");

Expand Down
26 changes: 18 additions & 8 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,8 @@ message NetParameter {
// connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.

// Whether to perform memory optimization
enum OptimizeMem {
NO_OPTIM = 0;
TRAIN_ONLY = 1;
ALL_OPTIM = 2;
}
optional OptimizeMem optimize_mem = 11 [default=TRAIN_ONLY];
// The configuration of memory optimization
optional MemoryOptimizationParameter mem_param = 200;

// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
Expand Down Expand Up @@ -322,7 +317,6 @@ message LayerParameter {
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;

optional bool no_mem_opt = 900 [default = false];
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
Expand Down Expand Up @@ -1301,3 +1295,19 @@ message BatchReductionParameter {
repeated int32 level = 1;
optional ReductionParameter reduction_param = 2;
}

message MemoryOptimizationParameter {
// Mode of optimization

// whether to optimize for all nets specified in train phase
optional bool optimize_train = 1 [default = true];

// whether to optimize for all nets specified in test phase
optional bool optimize_test = 2 [default = false];

// By default, all blobs for input and output are excluded from the optimization for safety.
// However, one can also claim a few blobs to be excluded.
// This is rather helpful when extracting features from intermediate blobs or debugging problems.
repeated string exclude_blob = 3;

}