-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GPU/OpenCL] Initial version of Transpose (all axes) with OpenCL ops
Added naive version of OpenCL implementation for Transpose. Incorporated kernel for ops using blas_kernels. Added unit test for Transpose_cl. Signed-off-by: Niket Agarwal <[email protected]>
- Loading branch information
1 parent
72bdf54
commit 5d2614e
Showing
16 changed files
with
1,152 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
/** | ||
* Copyright (C) 2024 Niket Agarwal <[email protected]> | ||
* | ||
* @file transpose_cl.cpp | ||
* @date 31 July 2024 | ||
* @brief Implementation of transpose layer | ||
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Niket Agarwal <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
* | ||
*/ | ||
|
||
#include "transpose_cl.h" | ||
#include <blas_kernel_interface.h> | ||
#include <iostream> | ||
#include <layer_context.h> | ||
#include <nntrainer_error.h> | ||
#include <nntrainer_log.h> | ||
#include <node_exporter.h> | ||
|
||
namespace nntrainer { | ||
|
||
static constexpr size_t SINGLE_INOUT_IDX = 0; | ||
|
||
void TransposeLayerCl::finalize(InitLayerContext &context) { | ||
std::vector<TensorDim> dim = context.getInputDimensions(); | ||
|
||
for (unsigned int i = 0; i < dim.size(); ++i) { | ||
if (dim[i].getDataLen() == 0) { | ||
throw std::invalid_argument("Input dimension is not set"); | ||
} else { | ||
dim[i].channel(dim[i].channel()); | ||
dim[i].height(dim[i].height()); | ||
dim[i].width(dim[i].width()); | ||
} | ||
} | ||
|
||
context.setOutputDimensions(dim); | ||
} | ||
|
||
void TransposeLayerCl::forwarding(RunLayerContext &context, bool training) { | ||
Tensor &in = context.getInput(SINGLE_INOUT_IDX); | ||
Tensor &out = context.getOutput(SINGLE_INOUT_IDX); | ||
transposeCl("1:0:2", in, out); | ||
} | ||
|
||
void TransposeLayerCl::incremental_forwarding(RunLayerContext &context, | ||
unsigned int from, | ||
unsigned int to, bool training) { | ||
Tensor &in = context.getInput(SINGLE_INOUT_IDX); | ||
Tensor &out = context.getOutput(SINGLE_INOUT_IDX); | ||
if (from) { | ||
NNTR_THROW_IF(to - from != 1, std::invalid_argument) | ||
<< "incremental step size is not 1"; | ||
from = 0; | ||
to = 1; | ||
} | ||
transposeCl("1:0:2", in, out); | ||
} | ||
|
||
void TransposeLayerCl::calcDerivative(RunLayerContext &context) { | ||
std::throw_with_nested(std::runtime_error("Training is not supported yet.")); | ||
} | ||
|
||
void TransposeLayerCl::setProperty(const std::vector<std::string> &values) { | ||
auto remain_props = loadProperties(values, transpose_props); | ||
if (!remain_props.empty()) { | ||
std::string msg = "[TransposeLayerCl] Unknown Layer Properties count " + | ||
std::to_string(values.size()); | ||
throw exception::not_supported(msg); | ||
} | ||
} | ||
|
||
#ifdef PLUGGABLE | ||
|
||
Layer *create_transpose_layer_cl() { | ||
auto layer = new TransposeLayerCl(); | ||
return layer; | ||
} | ||
|
||
void destroy_transpose_layer_cl(Layer *layer) { delete layer; } | ||
|
||
extern "C" { | ||
LayerPluggable ml_train_layer_pluggable{create_transpose_layer_cl, | ||
destroy_transpose_layer_cl}; | ||
} | ||
|
||
#endif | ||
|
||
} // namespace nntrainer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
/** | ||
* Copyright (C) 2024 Niket Agarwal <[email protected]> | ||
* | ||
* @file transpose_cl.h | ||
* @date 31 July 2024 | ||
* @brief Implementation of transpose layer | ||
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Niket Agarwal <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
* | ||
*/ | ||
|
||
#ifndef __TRANSPOSE_LAYER_CL_H__ | ||
#define __TRANSPOSE_LAYER_CL_H__ | ||
|
||
#include <common_properties.h> | ||
#include <layer_devel.h> | ||
#include <opencl_buffer.h> | ||
#include <opencl_kernel.h> | ||
|
||
#define CREATE_IF_EMPTY_DIMS(tensor, ...) \ | ||
do { \ | ||
if (tensor.empty()) \ | ||
tensor = Tensor(__VA_ARGS__); \ | ||
} while (0); | ||
|
||
namespace nntrainer { | ||
|
||
/** | ||
* @brief A tranpose layer. | ||
* | ||
*/ | ||
class TransposeLayerCl final : public Layer { | ||
public: | ||
/** | ||
* @brief Construct a new transpose layer object | ||
* | ||
*/ | ||
TransposeLayerCl() : Layer(), transpose_props(props::Print()) {} | ||
|
||
/** | ||
* @brief Destroy the transpose layer object | ||
* | ||
*/ | ||
~TransposeLayerCl() {} | ||
|
||
/** | ||
* @copydoc Layer::finalize(InitLayerContext &context) | ||
*/ | ||
void finalize(InitLayerContext &context) override; | ||
|
||
/** | ||
* @copydoc Layer::forwarding(RunLayerContext &context, bool training) | ||
*/ | ||
void forwarding(RunLayerContext &context, bool training) override; | ||
|
||
/** | ||
* @copydoc Layer::incremental_forwarding(RunLayerContext &context, unsigned | ||
* int from, unsigned int to, bool training) | ||
*/ | ||
void incremental_forwarding(RunLayerContext &context, unsigned int from, | ||
unsigned int to, bool training) override; | ||
|
||
/** | ||
* @copydoc Layer::calcDerivative(RunLayerContext &context) | ||
*/ | ||
void calcDerivative(RunLayerContext &context) override; | ||
|
||
/** | ||
* @copydoc bool supportBackwarding() const | ||
*/ | ||
bool supportBackwarding() const override { return true; }; | ||
|
||
/** | ||
* @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method) | ||
*/ | ||
void exportTo(Exporter &exporter, | ||
const ml::train::ExportMethods &method) const override{}; | ||
|
||
/** | ||
* @copydoc Layer::getType() | ||
*/ | ||
const std::string getType() const override { return TransposeLayerCl::type; }; | ||
|
||
/** | ||
* @copydoc Layer::setProperty(const std::vector<std::string> &values) | ||
*/ | ||
void setProperty(const std::vector<std::string> &values) override; | ||
|
||
inline static const std::string type = "transpose"; | ||
|
||
static opencl::Kernel kernel_transpose_axis0; | ||
static opencl::Kernel kernel_transpose_fp16_axis0; | ||
static opencl::Kernel kernel_transpose_axis1; | ||
static opencl::Kernel kernel_transpose_fp16_axis1; | ||
static opencl::Kernel kernel_transpose_axis2; | ||
static opencl::Kernel kernel_transpose_fp16_axis2; | ||
|
||
std::tuple<props::Print> transpose_props; /**< transpose layer properties : | ||
unit - number of output neurons */ | ||
}; | ||
} // namespace nntrainer | ||
|
||
#endif /* __TRANSPOSE_LAYER_CL_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.