Use REGISTER_CPU_GRADIENT_OPERATOR for many operators (pytorch#12616)

Summary: Pull Request resolved: pytorch#12616 Focusing on operators in common use on mobile. Also use GRADIENT_OPERATOR_SCHEMA. Reviewed By: Yangqing Differential Revision: D10245216 fbshipit-source-id: 5cc023da170149b637fe3c729d3756af948aa265
flyingcarpet-network · Oct 24, 2018 · 97b6a25 · 97b6a25
1 parent df47bbe
commit 97b6a25
Show file tree

Hide file tree

Showing 13 changed files with 39 additions and 27 deletions.
diff --git a/caffe2/operators/channel_shuffle_op.cc b/caffe2/operators/channel_shuffle_op.cc
@@ -134,7 +134,7 @@ bool ChannelShuffleGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
 }
 
 REGISTER_CPU_OPERATOR(ChannelShuffle, ChannelShuffleOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(
+REGISTER_CPU_GRADIENT_OPERATOR(
     ChannelShuffleGradient,
     ChannelShuffleGradientOp<float, CPUContext>);
 
@@ -143,7 +143,7 @@ OPERATOR_SCHEMA(ChannelShuffle)
     .NumInputs(1)
     .NumOutputs(1)
     .InheritOnnxSchema();
-OPERATOR_SCHEMA(ChannelShuffleGradient)
+GRADIENT_OPERATOR_SCHEMA(ChannelShuffleGradient)
     .IdenticalTypeAndShape()
     .NumInputs(1)
     .NumOutputs(1);

diff --git a/caffe2/operators/clip_op.cc b/caffe2/operators/clip_op.cc
@@ -33,7 +33,7 @@ bool ClipGradientOp<float, CPUContext>::RunOnDevice() {
 }
 
 REGISTER_CPU_OPERATOR(Clip, ClipOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(ClipGradient, ClipGradientOp<float, CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(ClipGradient, ClipGradientOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(Clip)
     .NumInputs(1)
@@ -113,7 +113,10 @@ Y: [[45. 20. 59. 60. 48.]
         "*(Tensor`<float>`)* Output tensor clipped within range [`min`, `max`].")
     .InheritOnnxSchema();
 
-OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1).AllowInplace({{1, 0}});
+GRADIENT_OPERATOR_SCHEMA(ClipGradient)
+    .NumInputs(2)
+    .NumOutputs(1)
+    .AllowInplace({{1, 0}});
 
 class GetClipGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;

diff --git a/caffe2/operators/dropout_op.cc b/caffe2/operators/dropout_op.cc
@@ -58,7 +58,9 @@ bool DropoutGradientOp<float, CPUContext>::RunOnDevice() {
 }
 
 REGISTER_CPU_OPERATOR(Dropout, DropoutOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(DropoutGrad, DropoutGradientOp<float, CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(
+    DropoutGrad,
+    DropoutGradientOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(Dropout)
     .NumInputs(1)
@@ -160,7 +162,7 @@ mask: [[False False False  True  True]
         "nonzero, this output is not filled.")
     .InheritOnnxSchema();
 
-OPERATOR_SCHEMA(DropoutGrad)
+GRADIENT_OPERATOR_SCHEMA(DropoutGrad)
     .NumInputs(1, 2)
     .NumOutputs(1)
     .AllowInplace({{0, 0}});

diff --git a/caffe2/operators/elu_op.cc b/caffe2/operators/elu_op.cc
@@ -42,7 +42,7 @@ REGISTER_CPU_OPERATOR(
         TensorTypes<float>,
         CPUContext,
         EluFunctor<CPUContext>>);
-REGISTER_CPU_OPERATOR(
+REGISTER_CPU_GRADIENT_OPERATOR(
     EluGradient,
     BinaryElementwiseWithArgsOp<
         TensorTypes<float>,
@@ -116,7 +116,7 @@ print("Y:\n", workspace.FetchBlob("Y"))
     .InheritOnnxSchema();
 
 // Input: Y, dY, output: dX
-OPERATOR_SCHEMA(EluGradient)
+GRADIENT_OPERATOR_SCHEMA(EluGradient)
     .NumInputs(2)
     .NumOutputs(1)
     .AllowInplace({{1, 0}})

diff --git a/caffe2/operators/normalize_op.cc b/caffe2/operators/normalize_op.cc
@@ -66,10 +66,10 @@ Given a matrix, apply L2-normalization along the specified dimension.
 )DOC")
     .IdenticalTypeAndShape();
 
-REGISTER_CPU_OPERATOR(
+REGISTER_CPU_GRADIENT_OPERATOR(
     NormalizeGradient,
     NormalizeGradientOp<float, CPUContext>);
-OPERATOR_SCHEMA(NormalizeGradient)
+GRADIENT_OPERATOR_SCHEMA(NormalizeGradient)
     .NumInputs(2)
     .NumOutputs(1)
     .Arg("axis", "axis to normalize");

diff --git a/caffe2/operators/pad_op.cc b/caffe2/operators/pad_op.cc
@@ -418,7 +418,9 @@ std::vector<TensorShape> PadImageOp<float, CPUContext>::PadTensorInference(
 }
 
 REGISTER_CPU_OPERATOR(PadImage, PadImageOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(PadImageGradient, PadImageGradientOp<float, CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(
+    PadImageGradient,
+    PadImageGradientOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(PadImage)
     .NumInputs(1)
@@ -444,7 +446,7 @@ values and stride sizes defined by the ConvPoolOpBase operator.
         "the tensor. Dimensions will vary based on various pad and stride "
         "sizes.");
 
-OPERATOR_SCHEMA(PadImageGradient).NumInputs(1).NumOutputs(1);
+GRADIENT_OPERATOR_SCHEMA(PadImageGradient).NumInputs(1).NumOutputs(1);
 
 class GetPadImageGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;

diff --git a/caffe2/operators/prelu_op.cc b/caffe2/operators/prelu_op.cc
@@ -254,7 +254,9 @@ bool PReluGradientOp<float, CPUContext>::RunOnDevice() {
 }
 
 REGISTER_CPU_OPERATOR(PRelu, PReluOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(PReluGradient, PReluGradientOp<float, CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(
+    PReluGradient,
+    PReluGradientOp<float, CPUContext>);
 
 // Input: X, Slope, output: Y
 OPERATOR_SCHEMA(PRelu)
@@ -335,7 +337,7 @@ print("Y:\n", workspace.FetchBlob("Y"))
     .InheritOnnxSchema();
 
 // Input: Y, dY, output: dX
-OPERATOR_SCHEMA(PReluGradient).NumInputs(4).NumOutputs(2).SetDoc(R"DOC(
+GRADIENT_OPERATOR_SCHEMA(PReluGradient).NumInputs(4).NumOutputs(2).SetDoc(R"DOC(
 
 PReluGradient takes both Y and dY and uses this to update dX and dW according
 to the chain rule and derivatives of the rectified linear function.

diff --git a/caffe2/operators/quant_decode_op.cc b/caffe2/operators/quant_decode_op.cc
@@ -6,7 +6,7 @@
 namespace caffe2 {
 
 REGISTER_CPU_OPERATOR(QuantDecode, QuantDecodeOp<QuantDecodeRunTy::RUN_ALWAYS>);
-REGISTER_CPU_OPERATOR(QuantDecodeGradient, QuantDecodeGradientOp);
+REGISTER_CPU_GRADIENT_OPERATOR(QuantDecodeGradient, QuantDecodeGradientOp);
 #ifdef CAFFE2_USE_MPSCNN
 REGISTER_CPU_OPERATOR(
     MPSCNNQuantDecode,
@@ -42,7 +42,7 @@ codes (input 1 ~ n).
     .Output(1, "decoded_1", "Decoded tensor for codes_1 (float)")
     .Output(2, "decoded_n", "Decoded tensor for codes_n (float)");
 
-OPERATOR_SCHEMA(QuantDecodeGradient)
+GRADIENT_OPERATOR_SCHEMA(QuantDecodeGradient)
     .NumInputs([](int in) { return in >= 3 && in % 2 == 1; })
     .NumOutputs(1);
 

diff --git a/caffe2/operators/relu_op.cc b/caffe2/operators/relu_op.cc
@@ -67,7 +67,7 @@ REGISTER_CPU_OPERATOR(
         TensorTypes<float>,
         CPUContext,
         ReluFunctor<CPUContext>>);
-REGISTER_CPU_OPERATOR(
+REGISTER_CPU_GRADIENT_OPERATOR(
     ReluGradient,
     BinaryElementwiseOp<
         TensorTypes<float>,
@@ -140,7 +140,7 @@ print("Y:\n", workspace.FetchBlob("Y"))
     .InheritOnnxSchema();
 
 // Input: Y, dY, output: dX
-OPERATOR_SCHEMA(ReluGradient)
+GRADIENT_OPERATOR_SCHEMA(ReluGradient)
     .NumInputs(2)
     .NumOutputs(1)
     .AllowInplace({{1, 0}})

diff --git a/caffe2/operators/resize_op.cc b/caffe2/operators/resize_op.cc
@@ -151,8 +151,9 @@ bool ResizeNearestGradientOp<float, CPUContext>::RunOnDevice() {
 }
 
 REGISTER_CPU_OPERATOR(ResizeNearest, ResizeNearestOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(ResizeNearestGradient,
-                      ResizeNearestGradientOp<float, CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(
+    ResizeNearestGradient,
+    ResizeNearestGradientOp<float, CPUContext>);
 
 // Input: X, output: Y
 OPERATOR_SCHEMA(ResizeNearest)
@@ -176,7 +177,7 @@ output_height = floor(output_height * height_scale)
     .InheritOnnxSchema("Upsample");
 
 // Input: dY, output: dX
-OPERATOR_SCHEMA(ResizeNearestGradient)
+GRADIENT_OPERATOR_SCHEMA(ResizeNearestGradient)
     .NumInputs(2, 3)
     .NumOutputs(1)
     .Arg("width_scale", "Scale along width dimension")

diff --git a/caffe2/operators/slice_op.cc b/caffe2/operators/slice_op.cc
@@ -4,7 +4,7 @@
 namespace caffe2 {
 
 REGISTER_CPU_OPERATOR(Slice, SliceOp<CPUContext>);
-REGISTER_CPU_OPERATOR(SliceGradient, SliceGradientOp<CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(SliceGradient, SliceGradientOp<CPUContext>);
 
 OPERATOR_SCHEMA(Slice)
     .NumInputs(1, 3)
@@ -112,7 +112,7 @@ print("Y:", workspace.FetchBlob("Y"))
     .Output(0, "Y", "(*Tensor*): sliced output tensor")
     .InheritOnnxSchema();
 
-OPERATOR_SCHEMA(SliceGradient);
+GRADIENT_OPERATOR_SCHEMA(SliceGradient);
 
 namespace {
 struct GetSliceGradient : public GradientMakerBase {

diff --git a/caffe2/operators/softmax_op.cc b/caffe2/operators/softmax_op.cc
@@ -79,7 +79,9 @@ bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
 }
 
 REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CPUContext>);
+REGISTER_CPU_GRADIENT_OPERATOR(
+    SoftmaxGradient,
+    SoftmaxGradientOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(Softmax)
     .NumInputs(1)
@@ -163,7 +165,7 @@ softmax: [[0.24422921 0.43525138 0.18582782 0.12303016 0.01166145]]
     .InheritOnnxSchema();
 
 // Input: Y, dY. Output: dX
-OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
+GRADIENT_OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
 
 class GetSoftmaxGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;

diff --git a/caffe2/operators/softsign_op.cc b/caffe2/operators/softsign_op.cc
@@ -40,7 +40,7 @@ REGISTER_CPU_OPERATOR(
         TensorTypes<float>,
         CPUContext,
         SoftsignFunctor<CPUContext>>);
-REGISTER_CPU_OPERATOR(
+REGISTER_CPU_GRADIENT_OPERATOR(
     SoftsignGradient,
     BinaryElementwiseOp<
         TensorTypes<float>,
@@ -108,7 +108,7 @@ print("Y:\n", workspace.FetchBlob("Y"))
     .Output(0, "output", "Output data blob with same shape as input")
     .InheritOnnxSchema();
 
-OPERATOR_SCHEMA(SoftsignGradient)
+GRADIENT_OPERATOR_SCHEMA(SoftsignGradient)
     .NumInputs(2)
     .NumOutputs(1)
     .AllowInplace({{1, 0}})