Implement reference nGraph implementation for "Interpolate-4" with 5D…

… tensor support in the "linear_onnx" mode (#3948) * Commit. * Written the structure InfoForLinearONNXMode5D that contains info to perform interpolation in 'linear_onnx' mode for 5D tensors. * Started to write the method get_info_for_linear_onnx_mode5D() that returns info for calculations of 'linear_onnx' mode in 5D case. * Written the method InterpolateEvalHelper::get_info_for_linear_onnx_mode5D(). * Code style fix. * Started to write calculation of 5D case of 'linear_onnx' mode. * Written the method void InterpolateEval<T>::linear_onnx5D_func(const T* input_data, T* out). * Added dispatching of 4D/5D cases of the mode 'linear_onnx'. * Fixed code style. * Some fixes. * Code style fixes. * Now linear_onnx_func throws an exception for incorrect input rank. * Code style fix. * Started to write tests for evaluation of 'linear_onnx' mode in the 5D case. * Added first test for linear_onnx 5D. * Small fixes. * Written tests for evaluation of Interpolate-4 in linear_onnx 5D case. * Some code style fixes. * Small fix. * Corrected documentation. * Started to write generic implementation of 'linear_onnx' mode, for any ranks. * Written the draft of a generic (for all ranks) implementation of 'linear_onnx' mode. * Small fixes. * Small fix. * Small fix. * Small fix. * Code style fix. * Small fix. * Code style fix. * Some fixes. * Some fix. * Small fix. * Small fix. * Code style fix. * Added check for axes correctness into a generic implementation of the 'linear_onnx' mode. * Now 5D case of the 'linear_onnx' mode is calculated using generic function. * Code style fix. * Deleted unused variable. * Added debug prints. * Small fix. * Some fixes. * Code style fix. * Now all ranks are processed by a generic implementation in the 'linear_onnx' mode. * Deleted name of missed test. * Deleted 4D case implementation of the 'linear_onnx' mode. * Reverted change in tests. * Added needed 'const' modifiers and added a comment about the variable 'axis_idx_offset'. * Small fixes.
openvinotoolkit · Feb 9, 2021 · 2c4c3a7 · 2c4c3a7
1 parent deca4fc
commit 2c4c3a7
Show file tree

Hide file tree

Showing 4 changed files with 429 additions and 136 deletions.
diff --git a/docs/ops/image/Interpolate_4.md b/docs/ops/image/Interpolate_4.md
@@ -15,7 +15,7 @@
   * **Type**: string
   * **Default value**: none
   * **Required**: *yes*
-  * **Note**: Only 2D and 4D tensors with `axes = {0, 1}` and `axes = {2, 3}` respectively are supported for `"mode" == "linear_onnx"`.
+  **Note**: Only 2D, 3D, 4D, 5D tensors with `axes = {0, 1}`, `axes = {0, 1, 2}`, `axes = {2, 3}`,  `axes = {2, 3, 4}` respectively are supported for `"mode" == "linear_onnx"`.
 
 * *shape_calculation_mode*
 
@@ -366,7 +366,119 @@ class InterpolateCalculation:
 
         return result
 
-    def onnx_linear_interpolation(self, input_data):
+    def onnx_linear_interpolation5D(self, input_data):
+        rank = len(self.input_shape)
+        assert rank in [3, 5], "mode 'linear_onnx' supports only 3D or 5D tensors"
+        assert set(self.axes) == {2, 3, 4} or set(self.axes) == {0, 1, 2}, \
+            "mode 'linear_onnx' supports only case when axes = {2, 3, 4} or axes = {0, 1, 2}"
+
+        result = np.zeros(self.output_shape)
+
+        if rank == 3:
+            reshaped_data = np.reshape(input_data, (1, 1, self.input_shape[0], self.input_shape[1], self.input_shape[2]))
+            result = np.reshape(result,  (1, 1, self.output_shape[0], self.output_shape[1], self.output_shape[2]))
+        else:
+            reshaped_data = input_data
+
+        input_shape = np.array(reshaped_data.shape).astype(np.int64)
+        output_shape = np.array(result.shape).astype(np.int64)
+
+        batch_size = input_shape[0];
+        num_channels = input_shape[1];
+        input_depth = input_shape[2];
+        input_height = input_shape[3];
+        input_width = input_shape[4];
+        output_depth = output_shape[2];
+        output_height = output_shape[3];
+        output_width = output_shape[4];
+
+        depth_scale = self.scales[0];
+        height_scale = self.scales[1];
+        width_scale = self.scales[2];
+
+        z_original = np.zeros(output_depth).astype(np.float)
+        y_original = np.zeros(output_height).astype(np.float)
+        x_original = np.zeros(output_width).astype(np.float)
+
+        in_z1 = np.zeros(output_depth).astype(np.int64)
+        in_z2 = np.zeros(output_depth).astype(np.int64)
+        in_y1 = np.zeros(output_height).astype(np.int64)
+        in_y2 = np.zeros(output_height).astype(np.int64)
+        in_x1 = np.zeros(output_width).astype(np.int64)
+        in_x2 = np.zeros(output_width).astype(np.int64)
+
+        dz1 = np.zeros(output_depth).astype(np.float)
+        dz2 = np.zeros(output_depth).astype(np.float)
+
+        dy1 = np.zeros(output_height).astype(np.float)
+        dy2 = np.zeros(output_height).astype(np.float)
+
+        dx1 = np.zeros(output_width).astype(np.float)
+        dx2 = np.zeros(output_width).astype(np.float)
+
+        for z in range(0, output_depth):
+            in_z = self.get_original_coordinate(z, depth_scale, output_depth, input_depth)
+            z_original[z] = in_z
+            in_z = max(0, min(in_z, input_depth - 1))
+            in_z1[z] = max(0, min(int(in_z), input_depth - 1))
+            in_z2[z] = min(in_z1[z] + 1, input_depth - 1)
+            dz1[z] = abs(in_z - in_z1[z])
+            dz2[z] = abs(in_z - in_z2[z])
+
+            if in_z1[z] == in_z2[z]:
+                dz1[z] = 0.5
+                dz2[z] = 0.5
+
+        for y in range(0, output_height):
+            in_y = self.get_original_coordinate(y, height_scale, output_height, input_height)
+            y_original[y] = in_y
+            in_y = max(0, min(in_y, input_height - 1))
+            in_y1[y] = max(0, min(int(in_y), input_height - 1))
+            in_y2[y] = min(in_y1[y] + 1, input_height - 1)
+            dy1[y] = abs(in_y - in_y1[y])
+            dy2[y] = abs(in_y - in_y2[y])
+
+            if in_y1[y] == in_y2[y]:
+                dy1[y] = 0.5
+                dy2[y] = 0.5
+
+        for x in range(0, output_width):
+            in_x = self.get_original_coordinate(x, width_scale, output_width, input_width);
+            x_original[x] = in_x
+            in_x = max(0.0, min(in_x, input_width - 1));
+
+            in_x1[x] = min(in_x, input_width - 1);
+            in_x2[x] = min(in_x1[x] + 1, input_width - 1);
+
+            dx1[x] = abs(in_x - in_x1[x]);
+            dx2[x] = abs(in_x - in_x2[x]);
+            if in_x1[x] == in_x2[x]:
+                dx1[x] = 0.5
+                dx2[x] = 0.5
+        for n in range(0, batch_size):
+            for c in range(0, num_channels):
+                for z in range(0, output_depth):
+                    for y in range(0, output_height):
+                        for x in range(0, output_width):
+                            x111 = reshaped_data[n, c, in_z1[z], in_y1[y], in_x1[x]]
+                            x211 = reshaped_data[n, c, in_z1[z], in_y1[y], in_x2[x]]
+                            x121 = reshaped_data[n, c, in_z1[z], in_y2[y], in_x1[x]]
+                            x221 = reshaped_data[n, c, in_z1[z], in_y2[y], in_x2[x]]
+                            x112 = reshaped_data[n, c, in_z2[z], in_y1[y], in_x1[x]]
+                            x212 = reshaped_data[n, c, in_z2[z], in_y1[y], in_x2[x]]
+                            x122 = reshaped_data[n, c, in_z2[z], in_y2[y], in_x1[x]]
+                            x222 = reshaped_data[n, c, in_z2[z], in_y2[y], in_x2[x]]
+
+                            temp = dx2[x] * dy2[y] * dz2[z] * x111 + dx1[x] * dy2[y] * dz2[z] * x211
+                            temp += dx2[x] * dy1[y] * dz2[z] * x121 + dx1[x] * dy1[y] * dz2[z] * x221
+                            temp += dx2[x] * dy2[y] * dz1[z] * x112 + dx1[x] * dy2[y] * dz1[z] * x212
+                            temp += dx2[x] * dy1[y] * dz1[z] * x122 + dx1[x] * dy1[y] * dz1[z] * x222
+
+                            result[n, c, z, y, x] = temp
+
+        return np.reshape(result, self.output_shape)
+
+    def onnx_linear_interpolation4D(self, input_data):
         rank = len(self.input_shape)
         assert rank in [2, 4], "mode 'linear_onnx' supports only 2D or 4D tensors"
         assert set(self.axes) == {2, 3} or set(self.axes) == {0, 1}, \
@@ -446,6 +558,15 @@ class InterpolateCalculation:
 
         return np.reshape(result, self.output_shape)
 
+    def onnx_linear_interpolation(self, input_data):
+        rank = len(self.input_shape)
+        assert rank in [2, 3, 4, 5], "mode 'linear_onnx' supports only 2D, 3D, 4D, or 5D tensors"
+
+        if rank in [2, 4]:
+            self.onnx_linear_interpolation4D(input_data)
+        else:
+            self.onnx_linear_interpolation5D(input_data)
+
     def nearest_interpolation(self, input_data):
         result = np.zeros(self.output_shape)
 

diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp
@@ -244,30 +244,20 @@ namespace ngraph
 
                 Coordinate get_input_coords_for_nearest_mode(const Coordinate& output_coord);
 
-                struct InfoForLinearONNXMode
+                struct InfoForGenericLinearONNXMode
                 {
-                    std::vector<float> y_original;
-                    std::vector<float> x_original;
-
-                    std::vector<int64_t> input_width_mul_y1;
-                    std::vector<int64_t> input_width_mul_y2;
-                    std::vector<int64_t> in_x1;
-                    std::vector<int64_t> in_x2;
-
-                    std::vector<float> dy1;
-                    std::vector<float> dy2;
-                    std::vector<float> dx1;
-                    std::vector<float> dx2;
-
+                    int64_t input_data_ptr_increment;
+                    int64_t output_data_ptr_increment;
                     int64_t batch_size;
                     int64_t num_channels;
-                    int64_t input_height;
-                    int64_t input_width;
-                    int64_t output_height;
-                    int64_t output_width;
+                    int64_t spatial_rank;
+                    std::vector<int64_t> input_index_multipliers;
+                    std::vector<int64_t> output_index_multipliers;
+                    std::vector<int64_t> input_spatial_shape;
+                    std::vector<int64_t> output_spatial_shape;
                 };
 
-                InfoForLinearONNXMode get_info_for_linear_onnx_mode();
+                InfoForGenericLinearONNXMode get_info_for_generic_linear_onnx();
 
                 struct InfoForLinearMode
                 {
@@ -392,7 +382,7 @@ namespace ngraph
                 /// \param out pointer to memory block for output data
                 void linear_func(const T* input_data, T* out);
 
-                /// \brief Calculates interpolation as in ONNX 'linear' mode
+                /// \brief Calculates interpolation as in ONNX 'linear' mode (generic case)
                 ///
                 /// \param input_data pointer to input data
                 /// \param out pointer to memory block for output data
@@ -456,56 +446,152 @@ namespace ngraph
             template <typename T>
             void InterpolateEval<T>::linear_onnx_func(const T* input_data, T* out)
             {
-                size_t input_rank = m_input_data_shape.size();
-                size_t num_of_axes = m_axes.size();
+                const size_t input_rank = m_input_data_shape.size();
+
+                assert(input_rank > 1);
 
-                assert((input_rank == 2) || (input_rank == 4));
-                assert((num_of_axes == 2) || (num_of_axes == input_rank));
+                const size_t num_of_axes = m_axes.size();
 
-                bool correct_axes = ((m_axes[0] == 0) && (m_axes[1] == 1)) ||
-                                    ((m_axes[0] == 2) && (m_axes[1] == 3));
+                bool correct_axes = ((input_rank == 2) && (num_of_axes == 2) && (m_axes[0] == 0) &&
+                                     (m_axes[1] == 1)) ||
+                                    ((input_rank == 3) && (num_of_axes == 3) && (m_axes[0] == 0) &&
+                                     (m_axes[1] == 1) && (m_axes[2] == 2));
 
-                if ((num_of_axes == 4) && (input_rank == 4))
+                if (input_rank >= 4)
                 {
-                    correct_axes = (m_axes[0] == 0) && (m_axes[1] == 1) && (m_axes[2] == 2) &&
-                                   (m_axes[3] == 3);
+                    std::vector<int64_t> all_axes;
+                    std::vector<int64_t> axes_without_batch_and_channels;
+                    all_axes.push_back(0);
+                    all_axes.push_back(1);
+                    for (int64_t i = 2; i < static_cast<int64_t>(input_rank); ++i)
+                    {
+                        all_axes.push_back(i);
+                        axes_without_batch_and_channels.push_back(i);
+                    }
+
+                    correct_axes = ((num_of_axes == input_rank) && (m_axes == all_axes)) ||
+                                   ((num_of_axes == input_rank - 2) &&
+                                    (m_axes == axes_without_batch_and_channels));
                 }
 
                 assert(correct_axes);
 
-                const auto info = helper.get_info_for_linear_onnx_mode();
+                const auto info = helper.get_info_for_generic_linear_onnx();
+
+                const int64_t batch_size = info.batch_size;
+                const int64_t num_channels = info.num_channels;
+
+                const auto& input_index_multipliers = info.input_index_multipliers;
+                const auto& output_index_multipliers = info.output_index_multipliers;
+
+                const int64_t input_data_ptr_increment = info.input_data_ptr_increment;
+                const int64_t output_data_ptr_increment = info.output_data_ptr_increment;
+
+                const auto& input_spatial_shape = info.input_spatial_shape;
 
-                int64_t batch_size = info.batch_size;
-                int64_t num_channels = info.num_channels;
-                int64_t output_height = info.output_height;
-                int64_t output_width = info.output_width;
-                int64_t input_height = info.input_height;
-                int64_t input_width = info.input_width;
+                // This mode supports only interpolation with respect to spatial dimensions,
+                // not with respect to batch or channels. That is, we can have only two cases:
+                //     num_of_axes == input_rank
+                // or
+                //     num_of_axes == input_rank - 2.
+                // Hence, if num_of_axes != input_rank, then interpolated axes indices are
+                //    [0, 1, ..., num_of_axes - 1]
+                // Otherwise, if num_of_axes == input_rank, interpolated axes indices are
+                //    [2, 3, ..., num_of_axes - 1]
+                const int64_t axis_idx_offset = (input_rank == num_of_axes) ? 2 : 0;
+
+                const int64_t spatial_rank = info.spatial_rank;
+                const int64_t points_in_neighbor = 1 << spatial_rank;
 
                 const T* xdata = input_data;
                 T* ydata = out;
                 for (int64_t n = 0; n < batch_size; ++n)
                 {
                     for (int64_t c = 0; c < num_channels; ++c)
                     {
-                        for (int64_t y = 0; y < output_height; ++y)
+                        for (int64_t idx = 0; idx < output_data_ptr_increment; ++idx)
                         {
-                            for (int64_t x = 0; x < output_width; ++x)
+                            // 1. Get the current spatial coords vector.
+                            std::vector<int64_t> output_coords(spatial_rank);
+                            int64_t curr = idx;
+                            for (int64_t j = 0; j < spatial_rank - 1; ++j)
                             {
-                                T x11 = xdata[info.input_width_mul_y1[y] + info.in_x1[x]];
-                                T x21 = xdata[info.input_width_mul_y1[y] + info.in_x2[x]];
-                                T x12 = xdata[info.input_width_mul_y2[y] + info.in_x1[x]];
-                                T x22 = xdata[info.input_width_mul_y2[y] + info.in_x2[x]];
-
-                                ydata[output_width * y + x] =
-                                    static_cast<T>(info.dx2[x] * info.dy2[y] * x11 +
-                                                   info.dx1[x] * info.dy2[y] * x21 +
-                                                   info.dx2[x] * info.dy1[y] * x12 +
-                                                   info.dx1[x] * info.dy1[y] * x22);
+                                output_coords[j] = curr / output_index_multipliers[j];
+                                curr %= output_index_multipliers[j];
                             }
+                            output_coords[spatial_rank - 1] = curr;
+
+                            // 2. Some preliminaries.
+                            std::vector<int64_t> in1(spatial_rank);
+                            std::vector<int64_t> in2(spatial_rank);
+                            std::vector<float> d1(spatial_rank);
+                            std::vector<float> d2(spatial_rank);
+
+                            for (int64_t i = 0; i < spatial_rank; ++i)
+                            {
+                                float out_coord = static_cast<float>(output_coords[i]);
+
+                                float in_coord =
+                                    helper.get_in_coord(out_coord, i + axis_idx_offset);
+                                in_coord = std::max(
+                                    0.0f,
+                                    std::min(in_coord,
+                                             static_cast<float>(input_spatial_shape[i] - 1)));
+
+                                const int64_t in_coord1 = std::min(static_cast<int64_t>(in_coord),
+                                                                   input_spatial_shape[i] - 1);
+                                const int64_t in_coord2 =
+                                    std::min(in_coord1 + 1, input_spatial_shape[i] - 1);
+
+                                in1[i] = in_coord1;
+                                in2[i] = in_coord2;
+                                d1[i] = std::fabs(in_coord - in_coord1);
+                                d2[i] = std::fabs(in_coord - in_coord2);
+
+                                if (in_coord1 == in_coord2)
+                                {
+                                    d1[i] = 0.5f;
+                                    d2[i] = 0.5f;
+                                }
+                            }
+
+                            // 3. Get values in all points of a neighborhood.
+                            std::vector<T> values_of_input_points(points_in_neighbor);
+                            for (int64_t i = 0; i < points_in_neighbor; ++i)
+                            {
+                                int64_t offset = 0;
+                                for (int64_t j = 0; j < spatial_rank; ++j)
+                                {
+                                    if (i & (1 << (spatial_rank - 1 - j)))
+                                    {
+                                        offset += in1[j] * input_index_multipliers[j];
+                                    }
+                                    else
+                                    {
+                                        offset += in2[j] * input_index_multipliers[j];
+                                    }
+                                }
+                                values_of_input_points[i] = xdata[offset];
+                            }
+
+                            // 4. Interpolation.
+                            float sum = 0.0f;
+                            for (int64_t i = 0; i < points_in_neighbor; ++i)
+                            {
+                                float coeff = 1.0f;
+                                for (int64_t j = 0; j < spatial_rank; ++j)
+                                {
+                                    coeff *= (i & (1 << (spatial_rank - 1 - j))) ? d1[j] : d2[j];
+                                }
+                                sum += coeff * values_of_input_points[points_in_neighbor - 1 - i];
+                            }
+
+                            // 6. Store result.
+                            ydata[idx] = static_cast<T>(sum);
                         }
-                        xdata += input_height * input_width;
-                        ydata += output_width * output_height;
+
+                        xdata += input_data_ptr_increment;
+                        ydata += output_data_ptr_increment;
                     }
                 }
             }