From ddc0587ca12e3366b7b05c8f363340c85b885713 Mon Sep 17 00:00:00 2001
From: Noiredd <snowball91b@gmail.com>
Date: Mon, 16 Oct 2017 15:52:25 +0200
Subject: [PATCH] Hotfix for accuracy interfering with training: internal
 buffer added instead of reusing bottom blob

---
 include/caffe/layers/accuracy_layer.hpp |  2 ++
 src/caffe/layers/accuracy_layer.cpp     |  1 +
 src/caffe/layers/accuracy_layer.cu      | 11 +++--------
 3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/include/caffe/layers/accuracy_layer.hpp b/include/caffe/layers/accuracy_layer.hpp
index dd2247b9e4d..130adf5228e 100644
--- a/include/caffe/layers/accuracy_layer.hpp
+++ b/include/caffe/layers/accuracy_layer.hpp
@@ -92,6 +92,8 @@ class AccuracyLayer : public Layer<Dtype> {
   int ignore_label_;
   /// Keeps counts of the number of samples per class.
   Blob<Dtype> nums_buffer_;
+  /// Intermediate results for the GPU implementation
+  Blob<Dtype> gpu_buffer_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp
index 392829e6db8..27435cf892f 100644
--- a/src/caffe/layers/accuracy_layer.cpp
+++ b/src/caffe/layers/accuracy_layer.cpp
@@ -34,6 +34,7 @@ void AccuracyLayer<Dtype>::Reshape(
       << "label count (number of labels) must be N*H*W, "
       << "with integer values in {0, 1, ..., C-1}.";
   vector<int> top_shape(0);  // Accuracy is a scalar; 0 axes.
+  gpu_buffer_.ReshapeLike(*bottom[0]);
   top[0]->Reshape(top_shape);
   if (top.size() > 1) {
     // Per-class accuracy is a vector; 1 axes.
diff --git a/src/caffe/layers/accuracy_layer.cu b/src/caffe/layers/accuracy_layer.cu
index a8cff936ccb..ef375601a83 100644
--- a/src/caffe/layers/accuracy_layer.cu
+++ b/src/caffe/layers/accuracy_layer.cu
@@ -71,16 +71,11 @@ void AccuracyLayer<Dtype>::Forward_gpu(
   const int dim = bottom[0]->count() / outer_num_;
   const int num_labels = bottom[0]->shape(label_axis_);
   const int nthreads = outer_num_ * inner_num_;
-  // Since this memory is not used for anything,
-  // we use it here to avoid having to allocate new GPU
-  // memory to accumulate intermediate results in the kernel.
-  Dtype* acc_data = bottom[0]->mutable_gpu_diff();
+  Dtype* acc_data = gpu_buffer_.mutable_gpu_data();
   if (top.size() == 1) {
     // simple case - report only global accuracy.
 
-    // Similarly, this memory is never used elsewhere, and thus we can use it
-    // to avoid having to allocate additional GPU memory.
-    Dtype* counts = bottom[1]->mutable_gpu_diff();
+    Dtype* counts = gpu_buffer_.mutable_gpu_diff();
     // NOLINT_NEXT_LINE(whitespace/operators)
     AccuracyForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
         CAFFE_CUDA_NUM_THREADS>>>(nthreads, bottom_data, bottom_label,
@@ -113,7 +108,7 @@ void AccuracyLayer<Dtype>::Forward_gpu(
 
     // get the overall accuracy
     Dtype acc;
-    caffe_gpu_asum(bottom[0]->count(), acc_data, &acc);
+    caffe_gpu_asum(gpu_buffer_.count(), acc_data, &acc);
     Dtype valid_count;
     caffe_gpu_asum(nums_buffer_.count(), counts, &valid_count);
     if (valid_count > 0) {