Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Per frame affine transforms #3946

Merged
merged 16 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions dali/operators/geometry/affine_transforms/combine_transforms.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,7 @@
#include "dali/pipeline/operator/op_spec.h"
#include "dali/pipeline/workspace/workspace.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/operator/sequence_operator.h"

#define TRANSFORM_INPUT_TYPES (float)

Expand All @@ -45,12 +46,13 @@ Example: combining [T1, T2, T3] is equivalent to T3(T2(T1(...))) for default ord
)code")
.NumInput(2, 99)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

class CombineTransformsCPU : public Operator<CPUBackend> {
class CombineTransformsCPU : public SequenceOperator<CPUBackend> {
public:
explicit CombineTransformsCPU(const OpSpec &spec) :
Operator<CPUBackend>(spec),
SequenceOperator<CPUBackend>(spec),
reverse_order_(spec.GetArgument<bool>("reverse_order")) {
}

Expand Down Expand Up @@ -151,6 +153,18 @@ class CombineTransformsCPU : public Operator<CPUBackend> {
), DALI_FAIL(make_string("Unsupported data type: ", dtype_))); // NOLINT
}

void PostprocessOutputs(workspace_t<CPUBackend> &ws) override {
if (this->IsExpanding()) {
auto &out = ws.template Output<CPUBackend>(0);
int sample_dim = out.sample_dim();
assert(sample_dim > 0);
TensorLayout layout;
layout.resize(sample_dim, '*');
layout[0] = 'F';
out.SetLayout(layout);
}
}

private:
using SupportedDims = dims<1, 2, 3, 4, 5, 6>;
DALIDataType dtype_ = DALI_FLOAT;
Expand Down
23 changes: 18 additions & 5 deletions dali/operators/geometry/affine_transforms/transform_base_op.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -24,9 +24,10 @@
#include "dali/core/static_switch.h"
#include "dali/kernels/kernel_manager.h"
#include "dali/pipeline/data/types.h"
#include "dali/pipeline/operator/arg_helper.h"
#include "dali/pipeline/operator/op_spec.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/operator/arg_helper.h"
#include "dali/pipeline/operator/sequence_operator.h"
#include "dali/pipeline/workspace/workspace.h"

#define TRANSFORM_INPUT_TYPES (float)
Expand All @@ -45,11 +46,11 @@ using affine_mat_t = mat<mat_dim, mat_dim, T>;
* As with any CRTP-based system, any non-private method can be shadowed by the TransformImpl class.
*/
template <typename Backend, typename TransformImpl>
class TransformBaseOp : public Operator<Backend> {
class TransformBaseOp : public SequenceOperator<Backend, true> {
public:
using Base = SequenceOperator<Backend, true>;
explicit TransformBaseOp(const OpSpec &spec) :
Operator<Backend>(spec),
reverse_order_(spec.GetArgument<bool>("reverse_order")) {
Base(spec), reverse_order_(spec.GetArgument<bool>("reverse_order")) {
matrix_data_.set_pinned(false);
matrix_data_.set_type(dtype_);
}
Expand Down Expand Up @@ -144,6 +145,18 @@ class TransformBaseOp : public Operator<Backend> {
), DALI_FAIL(make_string("Unsupported data type: ", dtype_))); // NOLINT
}

void PostprocessOutputs(workspace_t<Backend> &ws) override {
if (this->IsExpanding()) {
auto &out = ws.template Output<Backend>(0);
int sample_dim = out.sample_dim();
assert(sample_dim > 0);
TensorLayout layout;
layout.resize(sample_dim, '*');
layout[0] = 'F';
out.SetLayout(layout);
}
}

int input_transform_ndim(const workspace_t<Backend> &ws) const {
assert(has_input_);
auto &input = ws.template Input<Backend>(0);
Expand Down
11 changes: 6 additions & 5 deletions dali/operators/geometry/affine_transforms/transform_crop.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,7 +36,7 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of zeros will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{0.0}, true)
std::vector<float>{0.0}, true, true)
.AddOptionalArg(
"from_end",
R"code(The upper bound of the original coordinate space.
Expand All @@ -45,7 +45,7 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of ones will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{1.0}, true)
std::vector<float>{1.0}, true, true)
.AddOptionalArg(
"to_start",
R"code(The lower bound of the destination coordinate space.
Expand All @@ -54,7 +54,7 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of zeros will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{0.0}, true)
std::vector<float>{0.0}, true, true)
.AddOptionalArg(
"to_end",
R"code(The upper bound of the destination coordinate space.
Expand All @@ -63,13 +63,14 @@ If another transform matrix is passed as an input, the operator applies the tran
If left empty, a vector of ones will be assumed.
If a single value is provided, it will be repeated to match the number of dimensions
)code",
std::vector<float>{1.0}, true)
std::vector<float>{1.0}, true, true)
.AddOptionalArg(
"absolute",
R"code(If set to true, start and end coordinates will be swapped if start > end.)code",
false)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,23 +32,24 @@ The number of dimensions is assumed to be 3 if a rotation axis is provided or 2
.AddArg(
"angle",
R"code(Angle, in degrees.)code",
DALI_FLOAT, true)
DALI_FLOAT, true, true)
.AddOptionalArg<std::vector<float>>(
"axis",
R"code(Axis of rotation (applies **only** to 3D transforms).

The vector does not need to be normalized, but it must have a non-zero length.

Reversing the vector is equivalent to changing the sign of ``angle``.)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<std::vector<float>>(
"center",
R"code(The center of the rotation.

If provided, the number of elements should match the dimensionality of the transform.)code",
nullptr, true)
nullptr, true, true)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
7 changes: 4 additions & 3 deletions dali/operators/geometry/affine_transforms/transform_scale.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,13 +30,13 @@ If another transform matrix is passed as an input, the operator applies scaling
R"code(The scale factor, per dimension.

The number of dimensions of the transform is inferred from this argument.)code",
DALI_FLOAT_VEC, true)
DALI_FLOAT_VEC, true, true)
.AddOptionalArg<std::vector<float>>(
"center",
R"code(The center of the scale operation.

If provided, the number of elements should match the one of ``scale`` argument.)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<int>(
"ndim",
R"code(Number of dimensions.
Expand All @@ -47,6 +47,7 @@ when `scale` is a scalar value and there's no input transform.
nullptr, false)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
9 changes: 5 additions & 4 deletions dali/operators/geometry/affine_transforms/transform_shear.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -42,7 +42,7 @@ direction of the second axis.
This argument is mutually exclusive with ``angles``.
If provided, the number of dimensions of the transform is inferred from this argument.
)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<std::vector<float>>(
"angles",
R"code(The shear angles, in degrees.
Expand All @@ -62,15 +62,16 @@ A shear angle is translated to a shear factor as follows::
This argument is mutually exclusive with ``shear``.
If provided, the number of dimensions of the transform is inferred from this argument.
)code",
nullptr, true)
nullptr, true, true)
.AddOptionalArg<std::vector<float>>(
"center",
R"code(The center of the shear operation.

If provided, the number of elements should match the dimensionality of the transform.)code",
nullptr, true)
nullptr, true, true)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,9 +30,10 @@ If another transform matrix is passed as an input, the operator applies translat
R"code(The translation vector.

The number of dimensions of the transform is inferred from this argument.)code",
DALI_FLOAT_VEC, true)
DALI_FLOAT_VEC, true, true)
.NumInput(0, 1)
.NumOutput(1)
.AllowSequences()
.AddParent("TransformAttr");

DALI_SCHEMA(TransformTranslation) // Deprecated in 0.28.0dev
Expand Down
1 change: 1 addition & 0 deletions dali/operators/geometry/coord_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ This operator can be used for many operations. Here's the (incomplete) list:
If an integral type is used, the output values are rounded to the nearest integer and clamped
to the dynamic range of this type.)",
DALI_FLOAT)
.AllowSequences()
.AddParent("MTTransformAttr");

template <>
Expand Down
12 changes: 7 additions & 5 deletions dali/operators/geometry/coord_transform.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -20,25 +20,27 @@
#include "dali/core/geom/mat.h"
#include "dali/core/static_switch.h"
#include "dali/kernels/kernel_manager.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/operators/geometry/mt_transform_attr.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/operator/sequence_operator.h"

namespace dali {

#define COORD_TRANSFORM_INPUT_TYPES (uint8_t, int16_t, uint16_t, int32_t, float)
#define COORD_TRANSFORM_DIMS (1, 2, 3, 4, 5, 6)

template <typename Backend>
class CoordTransform : public Operator<Backend>, private MTTransformAttr {
class CoordTransform : public SequenceOperator<Backend, true>, private MTTransformAttr {
public:
explicit CoordTransform(const OpSpec &spec) : Operator<Backend>(spec), MTTransformAttr(spec) {
using Base = SequenceOperator<Backend, true>;
explicit CoordTransform(const OpSpec &spec) : Base(spec), MTTransformAttr(spec) {
dtype_ = spec_.template GetArgument<DALIDataType>("dtype");
}

bool CanInferOutputs() const override { return true; }

protected:
using Operator<Backend>::spec_;
using Base::spec_;
bool SetupImpl(std::vector<OutputDesc> &output_descs, const workspace_t<Backend> &ws) override {
auto &input = ws.template Input<Backend>(0); // get a reference to the input tensor list
const auto &input_shape = input.shape(); // get a shape - use const-ref to avoid copying
Expand Down
7 changes: 3 additions & 4 deletions dali/operators/geometry/mt_transform_attr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ If a scalar value is provided, ``M`` is assumed to be a square matrix with that
diagonal. The size of the matrix is then assumed to match the number of components in the
input vectors.)",
nullptr, // no default value
true)
true, true)
.AddOptionalArg<vector<float>>("T", R"(The translation vector.

If left unspecified, no translation is applied unless MT argument is used.

The number of components of this vector must match the number of rows in matrix ``M``.
If a scalar value is provided, that value is broadcast to all components of ``T`` and the number
of components is chosen to match the number of rows in ``M``.)", nullptr, true)
of components is chosen to match the number of rows in ``M``.)", nullptr, true, true)
.AddOptionalArg<vector<float>>("MT", R"(A block matrix [M T] which combines the arguments
``M`` and ``T``.

Expand All @@ -51,8 +51,7 @@ M and leaving T unspecified.

The number of columns must be one more than the number of components in the input.
This argument is mutually exclusive with ``M`` and ``T``.)",
nullptr,
true);
nullptr, true, true);

void MTTransformAttr::ProcessMatrixArg(const OpSpec &spec, const ArgumentWorkspace &ws, int N) {
bool is_fused = HasFusedMT();
Expand Down
Loading