Skip to content

Commit

Permalink
Per frame affine transforms (#3946)
Browse files Browse the repository at this point in the history
* Allow named arguments as a reference when expanding/broadcasting in SequenceOperator
* Enable per-frame affine transforms (transforms.combine, transforms.crop, transforms.rotation, transforms.scale, transforms.shear, transforms.translation)
* Enable per-frame coord_transform

Signed-off-by: Kamil Tokarski <[email protected]>
  • Loading branch information
stiepan authored Jun 21, 2022
1 parent 0b49400 commit d3ecce5
Show file tree
Hide file tree
Showing 17 changed files with 616 additions and 224 deletions.
20 changes: 17 additions & 3 deletions dali/operators/geometry/affine_transforms/combine_transforms.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,7 @@
#include "dali/pipeline/operator/op_spec.h"
#include "dali/pipeline/workspace/workspace.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/operator/sequence_operator.h"

#define TRANSFORM_INPUT_TYPES (float)

Expand All @@ -45,12 +46,13 @@ Example: combining [T1, T2, T3] is equivalent to T3(T2(T1(...))) for default ord
)code")
.NumInput(2, 99)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

class CombineTransformsCPU : public Operator<CPUBackend> {
class CombineTransformsCPU : public SequenceOperator<CPUBackend> {
public:
explicit CombineTransformsCPU(const OpSpec &spec) :
Operator<CPUBackend>(spec),
SequenceOperator<CPUBackend>(spec),
reverse_order_(spec.GetArgument<bool>("reverse_order")) {
}

Expand Down Expand Up @@ -151,6 +153,18 @@ class CombineTransformsCPU : public Operator<CPUBackend> {
), DALI_FAIL(make_string("Unsupported data type: ", dtype_))); // NOLINT
}

void PostprocessOutputs(workspace_t<CPUBackend> &ws) override {
if (this->IsExpanding()) {
auto &out = ws.template Output<CPUBackend>(0);
int sample_dim = out.sample_dim();
assert(sample_dim > 0);
TensorLayout layout;
layout.resize(sample_dim, '*');
layout[0] = 'F';
out.SetLayout(layout);
}
}

private:
using SupportedDims = dims<1, 2, 3, 4, 5, 6>;
DALIDataType dtype_ = DALI_FLOAT;
Expand Down
23 changes: 18 additions & 5 deletions dali/operators/geometry/affine_transforms/transform_base_op.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -24,9 +24,10 @@
#include "dali/core/static_switch.h"
#include "dali/kernels/kernel_manager.h"
#include "dali/pipeline/data/types.h"
#include "dali/pipeline/operator/arg_helper.h"
#include "dali/pipeline/operator/op_spec.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/operator/arg_helper.h"
#include "dali/pipeline/operator/sequence_operator.h"
#include "dali/pipeline/workspace/workspace.h"

#define TRANSFORM_INPUT_TYPES (float)
Expand All @@ -45,11 +46,11 @@ using affine_mat_t = mat<mat_dim, mat_dim, T>;
* As with any CRTP-based system, any non-private method can be shadowed by the TransformImpl class.
*/
template <typename Backend, typename TransformImpl>
class TransformBaseOp : public Operator<Backend> {
class TransformBaseOp : public SequenceOperator<Backend, true> {
public:
using Base = SequenceOperator<Backend, true>;
explicit TransformBaseOp(const OpSpec &spec) :
Operator<Backend>(spec),
reverse_order_(spec.GetArgument<bool>("reverse_order")) {
Base(spec), reverse_order_(spec.GetArgument<bool>("reverse_order")) {
matrix_data_.set_pinned(false);
matrix_data_.set_type(dtype_);
}
Expand Down Expand Up @@ -144,6 +145,18 @@ class TransformBaseOp : public Operator<Backend> {
), DALI_FAIL(make_string("Unsupported data type: ", dtype_))); // NOLINT
}

void PostprocessOutputs(workspace_t<Backend> &ws) override {
if (this->IsExpanding()) {
auto &out = ws.template Output<Backend>(0);
int sample_dim = out.sample_dim();
assert(sample_dim > 0);
TensorLayout layout;
layout.resize(sample_dim, '*');
layout[0] = 'F';
out.SetLayout(layout);
}
}

int input_transform_ndim(const workspace_t<Backend> &ws) const {
assert(has_input_);
auto &input = ws.template Input<Backend>(0);
Expand Down
11 changes: 6 additions & 5 deletions dali/operators/geometry/affine_transforms/transform_crop.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,7 +36,7 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of zeros will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{0.0}, true)
std::vector<float>{0.0}, true, true)
.AddOptionalArg(
"from_end",
R"code(The upper bound of the original coordinate space.
Expand All @@ -45,7 +45,7 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of ones will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{1.0}, true)
std::vector<float>{1.0}, true, true)
.AddOptionalArg(
"to_start",
R"code(The lower bound of the destination coordinate space.
Expand All @@ -54,7 +54,7 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of zeros will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{0.0}, true)
std::vector<float>{0.0}, true, true)
.AddOptionalArg(
"to_end",
R"code(The upper bound of the destination coordinate space.
Expand All @@ -63,13 +63,14 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of ones will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{1.0}, true)
std::vector<float>{1.0}, true, true)
.AddOptionalArg(
"absolute",
R"code(If set to true, start and end coordinates will be swapped if start > end.)code",
false)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,23 +32,24 @@ The number of dimensions is assumed to be 3 if a rotation axis is provided or 2
.AddArg(
"angle",
R"code(Angle, in degrees.)code",
DALI_FLOAT, true)
DALI_FLOAT, true, true)
.AddOptionalArg<std::vector<float>>(
"axis",
R"code(Axis of rotation (applies **only** to 3D transforms).
The vector does not need to be normalized, but it must have a non-zero length.
Reversing the vector is equivalent to changing the sign of ``angle``.)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<std::vector<float>>(
"center",
R"code(The center of the rotation.
If provided, the number of elements should match the dimensionality of the transform.)code",
nullptr, true)
nullptr, true, true)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
7 changes: 4 additions & 3 deletions dali/operators/geometry/affine_transforms/transform_scale.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,13 +30,13 @@ If another transform matrix is passed as an input, the operator applies scaling
R"code(The scale factor, per dimension.
The number of dimensions of the transform is inferred from this argument.)code",
DALI_FLOAT_VEC, true)
DALI_FLOAT_VEC, true, true)
.AddOptionalArg<std::vector<float>>(
"center",
R"code(The center of the scale operation.
If provided, the number of elements should match the one of ``scale`` argument.)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<int>(
"ndim",
R"code(Number of dimensions.
Expand All @@ -47,6 +47,7 @@ when `scale` is a scalar value and there's no input transform.
nullptr, false)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
9 changes: 5 additions & 4 deletions dali/operators/geometry/affine_transforms/transform_shear.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -42,7 +42,7 @@ direction of the second axis.
This argument is mutually exclusive with ``angles``.
If provided, the number of dimensions of the transform is inferred from this argument.
)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<std::vector<float>>(
"angles",
R"code(The shear angles, in degrees.
Expand All @@ -62,15 +62,16 @@ A shear angle is translated to a shear factor as follows::
This argument is mutually exclusive with ``shear``.
If provided, the number of dimensions of the transform is inferred from this argument.
)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<std::vector<float>>(
"center",
R"code(The center of the shear operation.
If provided, the number of elements should match the dimensionality of the transform.)code",
nullptr, true)
nullptr, true, true)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,9 +30,10 @@ If another transform matrix is passed as an input, the operator applies translat
R"code(The translation vector.
The number of dimensions of the transform is inferred from this argument.)code",
DALI_FLOAT_VEC, true)
DALI_FLOAT_VEC, true, true)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

DALI_SCHEMA(TransformTranslation) // Deprecated in 0.28.0dev
Expand Down
1 change: 1 addition & 0 deletions dali/operators/geometry/coord_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ This operator can be used for many operations. Here's the (incomplete) list:
If an integral type is used, the output values are rounded to the nearest integer and clamped
to the dynamic range of this type.)",
DALI_FLOAT)
.AllowSequences()
.AddParent("MTTransformAttr");

template <>
Expand Down
12 changes: 7 additions & 5 deletions dali/operators/geometry/coord_transform.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -20,25 +20,27 @@
#include "dali/core/geom/mat.h"
#include "dali/core/static_switch.h"
#include "dali/kernels/kernel_manager.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/operators/geometry/mt_transform_attr.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/operator/sequence_operator.h"

namespace dali {

#define COORD_TRANSFORM_INPUT_TYPES (uint8_t, int16_t, uint16_t, int32_t, float)
#define COORD_TRANSFORM_DIMS (1, 2, 3, 4, 5, 6)

template <typename Backend>
class CoordTransform : public Operator<Backend>, private MTTransformAttr {
class CoordTransform : public SequenceOperator<Backend, true>, private MTTransformAttr {
public:
explicit CoordTransform(const OpSpec &spec) : Operator<Backend>(spec), MTTransformAttr(spec) {
using Base = SequenceOperator<Backend, true>;
explicit CoordTransform(const OpSpec &spec) : Base(spec), MTTransformAttr(spec) {
dtype_ = spec_.template GetArgument<DALIDataType>("dtype");
}

bool CanInferOutputs() const override { return true; }

protected:
using Operator<Backend>::spec_;
using Base::spec_;
bool SetupImpl(std::vector<OutputDesc> &output_descs, const workspace_t<Backend> &ws) override {
auto &input = ws.template Input<Backend>(0); // get a reference to the input tensor list
const auto &input_shape = input.shape(); // get a shape - use const-ref to avoid copying
Expand Down
7 changes: 3 additions & 4 deletions dali/operators/geometry/mt_transform_attr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ If a scalar value is provided, ``M`` is assumed to be a square matrix with that
diagonal. The size of the matrix is then assumed to match the number of components in the
input vectors.)",
nullptr, // no default value
true)
true, true)
.AddOptionalArg<vector<float>>("T", R"(The translation vector.
If left unspecified, no translation is applied unless MT argument is used.
The number of components of this vector must match the number of rows in matrix ``M``.
If a scalar value is provided, that value is broadcast to all components of ``T`` and the number
of components is chosen to match the number of rows in ``M``.)", nullptr, true)
of components is chosen to match the number of rows in ``M``.)", nullptr, true, true)
.AddOptionalArg<vector<float>>("MT", R"(A block matrix [M T] which combines the arguments
``M`` and ``T``.
Expand All @@ -51,8 +51,7 @@ M and leaving T unspecified.
The number of columns must be one more than the number of components in the input.
This argument is mutually exclusive with ``M`` and ``T``.)",
nullptr,
true);
nullptr, true, true);

void MTTransformAttr::ProcessMatrixArg(const OpSpec &spec, const ArgumentWorkspace &ws, int N) {
bool is_fused = HasFusedMT();
Expand Down
Loading

0 comments on commit d3ecce5

Please sign in to comment.