From 2b7f751fe0c4b41483cefc02124f5925be4df6af Mon Sep 17 00:00:00 2001
From: zhangxiaoli73 <380761639@qq.com>
Date: Fri, 6 Dec 2019 12:17:40 +0800
Subject: [PATCH] support roialign backward (#2975)

* support roialign backward

* fix sparselinear unit test
---
 .../intel/analytics/bigdl/nn/RoiAlign.scala   | 190 +++++++++++++++++-
 .../analytics/bigdl/nn/RoiAlignSpec.scala     |  87 ++++++++
 .../analytics/bigdl/nn/SparseLinearSpec.scala |   8 +-
 3 files changed, 273 insertions(+), 12 deletions(-)

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/RoiAlign.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/RoiAlign.scala
index a81ba967dd6..40a18757ec5 100644
--- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/RoiAlign.scala
+++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/RoiAlign.scala
@@ -18,7 +18,7 @@ package com.intel.analytics.bigdl.nn
 
 import com.intel.analytics.bigdl.tensor.Tensor
 import com.intel.analytics.bigdl.utils.Table
-import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
+import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 
 import scala.reflect._
@@ -49,11 +49,11 @@ class RoiAlign[T: ClassTag] (
   val pooledW: Int,
   val mode: String = "avg",
   val aligned: Boolean = true
-)(implicit ev: TensorNumeric[T]) extends AbstractModule[Table, Tensor[T], T]{
-  override def updateOutput(input: Table): Tensor[T] = {
+)(implicit ev: TensorNumeric[T]) extends AbstractModule[Activity, Tensor[T], T]{
+  override def updateOutput(input: Activity): Tensor[T] = {
     if (classTag[T] == classTag[Float]) {
-      val data = input[Tensor[Float]](1)
-      val rois = input[Tensor[Float]](2)
+      val data = input.toTable[Tensor[Float]](1)
+      val rois = input.toTable[Tensor[Float]](2)
 
       val num_rois = rois.size(1)
       val channels = data.size(2)
@@ -78,8 +78,8 @@ class RoiAlign[T: ClassTag] (
         width,
         spatialScale)
     } else if (classTag[T] == classTag[Double]) {
-      val data = input[Tensor[Double]](1)
-      val rois = input[Tensor[Double]](2)
+      val data = input.toTable[Tensor[Double]](1)
+      val rois = input.toTable[Tensor[Double]](2)
 
       val num_rois = rois.size(1)
       val channels = data.size(2)
@@ -110,8 +110,180 @@ class RoiAlign[T: ClassTag] (
     output
   }
 
-  override def updateGradInput(input: Table, gradOutput: Tensor[T]): Table = {
-    throw new UnsupportedOperationException("Not support backward propagation")
+
+  private def bilinearInterpolateGradient(height: Int, width: Int, y: Float, x: Float)
+    : (Float, Float, Float, Float, Int, Int, Int, Int) = {
+    var w1: Float = 0.0f
+    var w2: Float = 0.0f
+    var w3: Float = 0.0f
+    var w4: Float = 0.0f
+    var x_low : Int = 0
+    var x_high: Int = 0
+    var y_low: Int = 0
+    var y_high: Int = 0
+
+    // deal with cases that inverse elements are out of feature map boundary
+    if (y < -1.0 || y > height || x < -1.0 || x > width) {
+      // empty
+      return (w1, w2, w3, w4, x_low, x_high, y_low, y_high)
+    }
+
+    var realY = if (y <= 0) 0 else y
+    var realX = if (x <= 0) 0 else x
+    y_low = realY.toInt
+    x_low = realX.toInt
+
+    if (y_low >= height - 1) {
+      y_high = height - 1
+      y_low = height - 1
+      realY = y_low
+    } else y_high = y_low + 1
+
+    if (x_low >= width - 1) {
+      x_high = width - 1
+      x_low = width - 1
+      realX = x_low
+    } else x_high = x_low + 1
+
+    val ly = realY - y_low
+    val lx = realX - x_low
+    val hy = 1.0 - ly
+    val hx = 1.0 - lx
+
+    w1 = (hy * hx).toFloat
+    w2 = (hy * lx).toFloat
+    w3 = (ly * hx).toFloat
+    w4 = (ly * lx).toFloat
+
+    return (w1, w2, w3, w4, x_low, x_high, y_low, y_high)
+  }
+
+  private def roiAlignBackward(
+    nums: Int,
+    gradOutputArr: Array[T],
+    gradInputArr: Array[T],
+    gradInputOffset: Int,
+    rois: Array[T],
+    channels: Int,
+    height: Int,
+    width: Int,
+    pooled_height: Int,
+    pooled_width: Int,
+    sampling_ratio : Int,
+    n_stride : Int,
+    c_stride : Int,
+    h_stride : Int,
+    w_stride : Int,
+    spatial_scale: Float) {
+    val roi_cols = 4
+    for (index <- 0 until nums) {
+      val pw = index % pooled_width
+      val ph = (index / pooled_width) % pooled_height
+      val c = (index / pooled_width / pooled_height) % channels
+      val n = index / pooled_width / pooled_height / channels
+      val offset_rois = n * roi_cols
+
+      val offset = if (aligned) 0.5f else 0.0f
+      val roi_start_w = ev.toType[Float](rois(offset_rois)) * spatial_scale - offset
+      val roi_start_h = ev.toType[Float](rois(offset_rois + 1)) * spatial_scale - offset
+      val roi_end_w = ev.toType[Float](rois(offset_rois + 2)) * spatial_scale - offset
+      val roi_end_h = ev.toType[Float](rois(offset_rois + 3)) * spatial_scale - offset
+
+      var roi_width = roi_end_w - roi_start_w
+      var roi_height = roi_end_h - roi_start_h
+
+      if (aligned) {
+        require(roi_width >= 0 && roi_height >= 0,
+          s"ROIs in ROIAlign do not have non-negative size!" +
+            s"But get ${roi_height} ${roi_width}")
+      } else {
+        roi_width = math.max(roi_width, 1.0f)
+        roi_height = math.max(roi_height, 1.0f)
+      }
+
+      val bin_size_h = roi_height / pooled_height
+      val bin_size_w = roi_width / pooled_width
+      val output_offset = n * n_stride + c * c_stride
+      val grad_output_value = gradOutputArr(output_offset + ph * h_stride + pw * w_stride)
+
+      // We use roi_bin_grid to sample the grid and mimic integral
+      val roi_bin_grid_h =
+        if (sampling_ratio > 0) sampling_ratio else math.ceil(roi_height / pooled_height).toInt
+      val roi_bin_grid_w =
+        if (sampling_ratio > 0) sampling_ratio else math.ceil(roi_width / pooled_width).toInt
+
+      // We do average (integral) pooling inside a bin
+      val count = roi_bin_grid_h * roi_bin_grid_w
+
+      for (iy <- 0 until roi_bin_grid_h) {
+        val y = roi_start_h + ph * bin_size_h + (iy + 0.5) * bin_size_h / roi_bin_grid_h
+        for (ix <- 0 until roi_bin_grid_w) {
+          val x = roi_start_w + pw * bin_size_w + (ix + 0.5) * bin_size_w / roi_bin_grid_w
+
+          val (w1, w2, w3, w4, x_low, x_high, y_low, y_high) =
+            bilinearInterpolateGradient(height, width, y.toFloat, x.toFloat)
+
+          val g1 = ev.times(grad_output_value, ev.fromType(w1 / count))
+          val g2 = ev.times(grad_output_value, ev.fromType(w2 / count))
+          val g3 = ev.times(grad_output_value, ev.fromType(w3 / count))
+          val g4 = ev.times(grad_output_value, ev.fromType(w4 / count))
+
+          if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+            gradInputArr(gradInputOffset + y_low * width + x_low) =
+              ev.plus(gradInputArr(gradInputOffset + y_low * width + x_low), g1)
+            gradInputArr(gradInputOffset + y_low * width + x_high) =
+              ev.plus(gradInputArr(gradInputOffset + y_low * width + x_high), g2)
+            gradInputArr(gradInputOffset + y_high * width + x_low) =
+              ev.plus(gradInputArr(gradInputOffset + y_high * width + x_low), g3)
+            gradInputArr(gradInputOffset + y_high * width + x_high) =
+              ev.plus(gradInputArr(gradInputOffset + y_high * width + x_high), g4)
+          }
+        }
+      }
+    }
+  }
+
+  override def updateGradInput(input: Activity, gradOutput: Tensor[T]): Activity = {
+    require(mode == "avg", s"Only support backward for average mode, but get ${mode}")
+    val data = input.toTable[Tensor[T]](1)
+    val rois = input.toTable[Tensor[T]](2)
+    val num_rois = rois.size(1)
+    val channels = data.size(2)
+    val height = data.size(3)
+    val width = data.size(4)
+
+    require(gradOutput.isContiguous(), "gradOutput should be contiguous")
+    require(gradOutput.dim() == 4, s"gradOutput should be with 4 dims, but get ${gradOutput.dim()}")
+
+    val n_stride = gradOutput.stride(1)
+    val c_stride = gradOutput.stride(2)
+    val h_stride = gradOutput.stride(3)
+    val w_stride = gradOutput.stride(4)
+
+    if (gradInput == null) gradInput = Tensor[T]()
+    gradInput.toTensor[T].resize(channels, height, width)
+    val gradInputArr = gradInput.toTensor[T].storage().array()
+    val gradInputOffset = gradInput.toTensor[T].storageOffset() - 1
+
+    roiAlignBackward(
+      gradOutput.nElement(),
+      gradOutputArr = gradOutput.asInstanceOf[Tensor[T]].storage().array(),
+      gradInputArr = gradInputArr,
+      gradInputOffset = 0,
+      rois = rois.storage().array(),
+      channels = channels,
+      height = height,
+      width = width,
+      pooled_height = pooledH,
+      pooled_width = pooledW,
+      sampling_ratio = samplingRatio,
+      n_stride = n_stride,
+      c_stride = c_stride,
+      h_stride = h_stride,
+      w_stride = w_stride,
+      spatial_scale = spatialScale)
+
+    gradInput
   }
 
   private def poolOneRoiFloat(
diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/RoiAlignSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/RoiAlignSpec.scala
index 219b9ef496c..158459fcaec 100644
--- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/RoiAlignSpec.scala
+++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/RoiAlignSpec.scala
@@ -16,6 +16,7 @@
 
 package com.intel.analytics.bigdl.nn
 
+import com.intel.analytics.bigdl.nn.mkldnn.Equivalent
 import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
 import com.intel.analytics.bigdl.utils.RandomGenerator._
 import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest
@@ -209,6 +210,92 @@ class RoiAlignSpec extends FlatSpec with Matchers {
     out should be(expectedWithAlign)
     out2 should be(expected)
   }
+
+  "backward" should "work correctly" in {
+    val input = Tensor[Float](T(T(T(
+      T(0.0611, 0.2246, 0.2343, 0.1771, 0.5561, 0.1094, 0.4609, 0.7084,
+        0.5798, 0.4967),
+      T(0.5104, 0.3295, 0.7182, 0.3845, 0.0898, 0.1175, 0.6402, 0.1968,
+        0.5124, 0.7118),
+      T(0.9249, 0.9997, 0.8927, 0.8767, 0.8450, 0.1544, 0.1705, 0.9842,
+        0.8127, 0.4358),
+      T(0.4143, 0.4284, 0.7578, 0.9225, 0.9643, 0.1760, 0.9539, 0.3134,
+        0.4544, 0.2956),
+      T(0.1875, 0.2433, 0.3493, 0.4441, 0.4069, 0.2859, 0.8036, 0.3218,
+        0.3639, 0.2985),
+      T(0.6635, 0.2552, 0.4144, 0.8396, 0.7418, 0.2865, 0.7929, 0.5001,
+        0.8977, 0.1051),
+      T(0.5809, 0.9867, 0.1315, 0.2391, 0.3047, 0.5158, 0.4514, 0.4929,
+        0.5301, 0.2647),
+      T(0.1671, 0.5482, 0.2380, 0.5374, 0.4422, 0.6454, 0.5376, 0.2245,
+        0.6632, 0.8439),
+      T(0.0109, 0.2807, 0.9301, 0.5438, 0.8123, 0.7750, 0.7308, 0.9924,
+        0.7282, 0.2328),
+      T(0.9997, 0.5540, 0.4200, 0.5419, 0.8642, 0.4312, 0.1213, 0.8956,
+        0.8784, 0.9128)))))
+
+    val rois = Tensor[Float](T(T(0.0f, 0.0f, 9.0f, 9.0f),
+      T(0.0f, 5.0f, 4.0f, 9.0f),
+      T(5.0f, 5.0f, 9.0f, 9.0f)))
+
+    val layer = RoiAlign[Float](spatialScale = 1, samplingRatio = 2, pooledH = 5,
+      pooledW = 5, aligned = true)
+    val out = layer.forward(T(input, rois))
+
+    val output = Tensor[Float](T(T(T(
+      T(0.2593, 0.3618, 0.2819, 0.3935, 0.5265),
+      T(0.7170, 0.8159, 0.6562, 0.4006, 0.6567),
+      T(0.3210, 0.4949, 0.5372, 0.5892, 0.4368),
+      T(0.6147, 0.3702, 0.4642, 0.5216, 0.5698),
+      T(0.2292, 0.5687, 0.6427, 0.6625, 0.6822))),
+
+      T(T(T(0.5731, 0.3794, 0.3402, 0.4984, 0.7202),
+        T(0.6138, 0.7188, 0.4918, 0.2772, 0.4116),
+        T(0.3937, 0.6494, 0.4761, 0.2458, 0.3759),
+        T(0.1376, 0.3636, 0.4568, 0.4737, 0.5367),
+        T(0.1754, 0.2846, 0.5770, 0.7363, 0.5957))),
+
+      T(T(T(0.3776, 0.6335, 0.6252, 0.5709, 0.6844),
+        T(0.4507, 0.5218, 0.5245, 0.5387, 0.5696),
+        T(0.5452, 0.5203, 0.4266, 0.4301, 0.5784),
+        T(0.6602, 0.6221, 0.5252, 0.5232, 0.6680),
+        T(0.7253, 0.6559, 0.7846, 0.8819, 0.6998)))))
+
+    val gradOutput = Tensor[Float](T(T(
+      T(T(0.9688, 0.4150, 0.4094, 0.6885, 0.6800),
+        T(0.6415, 0.4019, 0.4875, 0.9569, 0.5172),
+        T(0.9534, 0.8540, 0.9555, 0.0836, 0.1684),
+        T(0.1883, 0.9384, 0.3543, 0.2027, 0.5069),
+        T(0.7145, 0.6801, 0.9717, 0.2403, 0.3372))),
+      T(T(T(0.5260, 0.1794, 0.4793, 0.3070, 0.7682),
+        T(0.6350, 0.7321, 0.9899, 0.1897, 0.6957),
+        T(0.1313, 0.9514, 0.3386, 0.5337, 0.1051),
+        T(0.1800, 0.4603, 0.7114, 0.5114, 0.2422),
+        T(0.1480, 0.2527, 0.2014, 0.3004, 0.7147))),
+      T(T(T(0.4033, 0.9819, 0.4697, 0.3446, 0.7631),
+        T(0.3554, 0.2396, 0.6231, 0.6009, 0.3054),
+        T(0.2082, 0.2404, 0.6693, 0.7529, 0.1088),
+        T(0.0441, 0.4054, 0.0348, 0.7627, 0.0077),
+        T(0.9582, 0.6859, 0.3182, 0.5291, 0.3420)))))
+
+    Equivalent.nearequals(output, out, 1e-3) should be(true)
+
+    val grad = layer.backward(T(input, rois), gradOutput).toTensor[Float]
+
+    val expectedGrad = Tensor[Float](T(T(
+      T(0.3203, 0.2666, 0.1312, 0.1305, 0.1295, 0.1816, 0.2177, 0.2157, 0.2150, 0.0098),
+      T(0.2828, 0.2374, 0.1246, 0.1265, 0.1292, 0.1868, 0.2267, 0.2018, 0.1945, 0.0088),
+      T(0.2029, 0.1776, 0.1216, 0.1322, 0.1475, 0.2314, 0.2895, 0.1867, 0.1565, 0.0071),
+      T(0.2432, 0.2201, 0.1775, 0.1889, 0.2054, 0.1912, 0.1814, 0.1288, 0.1133, 0.0051),
+      T(0.3845, 0.3403, 0.3323, 0.3769, 0.3154, 0.2258, 0.1666, 0.1222, 0.1580, 0.0195),
+      T(0.8482, 0.8043, 0.8665, 0.9852, 0.3694, 0.7024, 0.9496, 0.7323, 0.8099, 0.1104),
+      T(0.8683, 1.2765, 1.0463, 0.7984, 0.2498, 0.4796, 0.7130, 1.1149, 0.6427, 0.0529),
+      T(0.6204, 1.1059, 1.0230, 0.6332, 0.3176, 0.4221, 0.5735, 0.9508, 0.4563, 0.0167),
+      T(0.4918, 0.6479, 0.7008, 0.8754, 0.5076, 0.9881, 0.7134, 0.6981, 0.5184, 0.0460),
+      T(0.0427, 0.0525, 0.0614, 0.1103, 0.0510, 0.1533, 0.1064, 0.0863, 0.0695, 0.0079))))
+
+    Equivalent.nearequals(grad, expectedGrad, 1e-3) should be(true)
+  }
 }
 
 class RoiAlignSerialTest extends ModuleSerializationTest {
diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SparseLinearSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SparseLinearSpec.scala
index 4c1da650a73..da88ce1e365 100644
--- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SparseLinearSpec.scala
+++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SparseLinearSpec.scala
@@ -19,7 +19,7 @@ package com.intel.analytics.bigdl.nn
 import org.scalatest.{FlatSpec, Matchers}
 import com.intel.analytics.bigdl.numeric.NumericFloat
 import com.intel.analytics.bigdl.tensor.{SparseTensor, Tensor}
-import com.intel.analytics.bigdl.utils.T
+import com.intel.analytics.bigdl.utils.{RandomGenerator, T}
 import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest
 
 import scala.util.Random
@@ -143,9 +143,11 @@ class SparseLinearSpec extends FlatSpec with Matchers {
   }
 
   "Sparse Linear" should "return the same result with Linear 7" in {
+    RandomGenerator.RNG.setSeed(10)
+    val rnd = new Random(10)
     val gradOutput = Tensor(4, 2).rand()
-    val input = Tensor(4, 1023213).apply1(_ => Random.nextInt(100000) / 99999 * Random.nextFloat())
-    val input2 = Tensor(4, 50).apply1(_ => Random.nextInt(2) * Random.nextFloat())
+    val input = Tensor(4, 1023213).apply1(_ => rnd.nextInt(100000) / 99999 * rnd.nextFloat())
+    val input2 = Tensor(4, 50).apply1(_ => rnd.nextInt(2) * rnd.nextFloat())
     val sl = SparseLinear(1023263, 2, backwardStart = 1, backwardLength = 1023263)
     val sj = SparseJoinTable(2)
     val sparseModel = Sequential().add(ParallelTable().add(Identity()).add(Identity()))