Enable per-frame transforms

Signed-off-by: Kamil Tokarski <[email protected]>
NVIDIA · May 31, 2022 · b1992cc · b1992cc
1 parent 59e1215
commit b1992cc
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 26 deletions.
diff --git a/dali/operators/geometry/affine_transforms/combine_transforms.cc b/dali/operators/geometry/affine_transforms/combine_transforms.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include "dali/pipeline/operator/op_spec.h"
 #include "dali/pipeline/workspace/workspace.h"
 #include "dali/pipeline/operator/operator.h"
+#include "dali/pipeline/operator/sequence_operator.h"
 
 #define TRANSFORM_INPUT_TYPES (float)
 
@@ -47,10 +48,10 @@ Example: combining [T1, T2, T3] is equivalent to T3(T2(T1(...))) for default ord
   .NumOutput(1)
   .AddParent("TransformAttr");
 
-class CombineTransformsCPU : public Operator<CPUBackend> {
+class CombineTransformsCPU : public SequenceOperator<CPUBackend> {
  public:
   explicit CombineTransformsCPU(const OpSpec &spec) :
-      Operator<CPUBackend>(spec),
+      SequenceOperator<CPUBackend>(spec),
       reverse_order_(spec.GetArgument<bool>("reverse_order")) {
   }
 
@@ -151,6 +152,18 @@ class CombineTransformsCPU : public Operator<CPUBackend> {
     ), DALI_FAIL(make_string("Unsupported data type: ", dtype_)));  // NOLINT
   }
 
+  void PostprocessOutputs(workspace_t<CPUBackend> &ws) override {
+    if (this->IsExpanding()) {
+      auto &out = ws.template Output<CPUBackend>(0);
+      int sample_dim = out.sample_dim();
+      assert(sample_dim > 0);
+      TensorLayout layout;
+      layout.resize(sample_dim, '*');
+      layout[0] = 'F';
+      out.SetLayout(layout);
+    }
+  }
+
  private:
   using SupportedDims = dims<1, 2, 3, 4, 5, 6>;
   DALIDataType dtype_ = DALI_FLOAT;

diff --git a/dali/operators/geometry/affine_transforms/transform_base_op.h b/dali/operators/geometry/affine_transforms/transform_base_op.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -24,9 +24,10 @@
 #include "dali/core/static_switch.h"
 #include "dali/kernels/kernel_manager.h"
 #include "dali/pipeline/data/types.h"
+#include "dali/pipeline/operator/arg_helper.h"
 #include "dali/pipeline/operator/op_spec.h"
 #include "dali/pipeline/operator/operator.h"
-#include "dali/pipeline/operator/arg_helper.h"
+#include "dali/pipeline/operator/sequence_operator.h"
 #include "dali/pipeline/workspace/workspace.h"
 
 #define TRANSFORM_INPUT_TYPES (float)
@@ -45,11 +46,11 @@ using affine_mat_t = mat<mat_dim, mat_dim, T>;
  * As with any CRTP-based system, any non-private method can be shadowed by the TransformImpl class.
  */
 template <typename Backend, typename TransformImpl>
-class TransformBaseOp : public Operator<Backend> {
+class TransformBaseOp : public SequenceOperator<Backend, true> {
+ using Base = SequenceOperator<Backend, true>;
  public:
   explicit TransformBaseOp(const OpSpec &spec) :
-      Operator<Backend>(spec),
-      reverse_order_(spec.GetArgument<bool>("reverse_order")) {
+      Base(spec), reverse_order_(spec.GetArgument<bool>("reverse_order")) {
     matrix_data_.set_pinned(false);
     matrix_data_.set_type(dtype_);
   }
@@ -144,6 +145,18 @@ class TransformBaseOp : public Operator<Backend> {
     ), DALI_FAIL(make_string("Unsupported data type: ", dtype_)));  // NOLINT
   }
 
+  void PostprocessOutputs(workspace_t<Backend> &ws) override {
+    if (this->IsExpanding()) {
+      auto &out = ws.template Output<Backend>(0);
+      int sample_dim = out.sample_dim();
+      assert(sample_dim > 0);
+      TensorLayout layout;
+      layout.resize(sample_dim, '*');
+      layout[0] = 'F';
+      out.SetLayout(layout);
+    }
+  }
+
   int input_transform_ndim(const workspace_t<Backend> &ws) const {
     assert(has_input_);
     auto &input = ws.template Input<Backend>(0);

diff --git a/dali/operators/geometry/affine_transforms/transform_crop.cc b/dali/operators/geometry/affine_transforms/transform_crop.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@ If another transform matrix is passed as an input, the operator applies the tran
     If left empty, a vector of zeros will be assumed.
     If a single value is provided, it will be repeated to match the number of dimensions
 )code",
-    std::vector<float>{0.0}, true)
+    std::vector<float>{0.0}, true, true)
   .AddOptionalArg(
     "from_end",
     R"code(The upper bound of the original coordinate space.
@@ -45,7 +45,7 @@ If another transform matrix is passed as an input, the operator applies the tran
     If left empty, a vector of ones will be assumed.
     If a single value is provided, it will be repeated to match the number of dimensions
 )code",
-    std::vector<float>{1.0}, true)
+    std::vector<float>{1.0}, true, true)
   .AddOptionalArg(
     "to_start",
     R"code(The lower bound of the destination coordinate space.
@@ -54,7 +54,7 @@ If another transform matrix is passed as an input, the operator applies the tran
     If left empty, a vector of zeros will be assumed.
     If a single value is provided, it will be repeated to match the number of dimensions
 )code",
-    std::vector<float>{0.0}, true)
+    std::vector<float>{0.0}, true, true)
   .AddOptionalArg(
     "to_end",
     R"code(The upper bound of the destination coordinate space.
@@ -63,7 +63,7 @@ If another transform matrix is passed as an input, the operator applies the tran
     If left empty, a vector of ones will be assumed.
     If a single value is provided, it will be repeated to match the number of dimensions
 )code",
-    std::vector<float>{1.0}, true)
+    std::vector<float>{1.0}, true, true)
   .AddOptionalArg(
     "absolute",
     R"code(If set to true, start and end coordinates will be swapped if start > end.)code",

diff --git a/dali/operators/geometry/affine_transforms/transform_rotation.cc b/dali/operators/geometry/affine_transforms/transform_rotation.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -32,21 +32,21 @@ The number of dimensions is assumed to be 3 if a rotation axis is provided or 2
   .AddArg(
     "angle",
     R"code(Angle, in degrees.)code",
-    DALI_FLOAT, true)
+    DALI_FLOAT, true, true)
   .AddOptionalArg<std::vector<float>>(
     "axis",
     R"code(Axis of rotation (applies **only** to 3D transforms).
 
 The vector does not need to be normalized, but it must have a non-zero length.
 
 Reversing the vector is equivalent to changing the sign of ``angle``.)code",
-    nullptr, true)
+    nullptr, true, true)
   .AddOptionalArg<std::vector<float>>(
     "center",
     R"code(The center of the rotation.
 
 If provided, the number of elements should match the dimensionality of the transform.)code",
-    nullptr, true)
+    nullptr, true, true)
   .NumInput(0, 1)
   .NumOutput(1)
   .AddParent("TransformAttr");

diff --git a/dali/operators/geometry/affine_transforms/transform_scale.cc b/dali/operators/geometry/affine_transforms/transform_scale.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -30,13 +30,13 @@ If another transform matrix is passed as an input, the operator applies scaling
     R"code(The scale factor, per dimension.
 
 The number of dimensions of the transform is inferred from this argument.)code",
-    DALI_FLOAT_VEC, true)
+    DALI_FLOAT_VEC, true, true)
   .AddOptionalArg<std::vector<float>>(
     "center",
     R"code(The center of the scale operation.
 
 If provided, the number of elements should match the one of ``scale`` argument.)code",
-    nullptr, true)
+    nullptr, true, true)
   .AddOptionalArg<int>(
     "ndim",
     R"code(Number of dimensions.

diff --git a/dali/operators/geometry/affine_transforms/transform_shear.cc b/dali/operators/geometry/affine_transforms/transform_shear.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ direction of the second axis.
     This argument is mutually exclusive with ``angles``.
     If provided, the number of dimensions of the transform is inferred from this argument.
 )code",
-    nullptr, true)
+    nullptr, true, true)
   .AddOptionalArg<std::vector<float>>(
     "angles",
     R"code(The shear angles, in degrees.
@@ -62,13 +62,13 @@ A shear angle is translated to a shear factor as follows::
     This argument is mutually exclusive with ``shear``.
     If provided, the number of dimensions of the transform is inferred from this argument.
 )code",
-    nullptr, true)
+    nullptr, true, true)
   .AddOptionalArg<std::vector<float>>(
     "center",
     R"code(The center of the shear operation.
 
 If provided, the number of elements should match the dimensionality of the transform.)code",
-    nullptr, true)
+    nullptr, true, true)
   .NumInput(0, 1)
   .NumOutput(1)
   .AddParent("TransformAttr");

diff --git a/dali/operators/geometry/affine_transforms/transform_translation.cc b/dali/operators/geometry/affine_transforms/transform_translation.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@ If another transform matrix is passed as an input, the operator applies translat
     R"code(The translation vector.
 
 The number of dimensions of the transform is inferred from this argument.)code",
-    DALI_FLOAT_VEC, true)
+    DALI_FLOAT_VEC, true, true)
   .NumInput(0, 1)
   .NumOutput(1)
   .AddParent("TransformAttr");