Skip to content

Commit

Permalink
[Wait for #2568][Mixed] Mixed Precision Layer update
Browse files Browse the repository at this point in the history
This PR is to update the mixed precision layer.
- integrate #2568 & #2455
- will update more test

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <[email protected]>
  • Loading branch information
DonghakPark committed May 10, 2024
1 parent 26b75ad commit fe21d0a
Show file tree
Hide file tree
Showing 12 changed files with 37 additions and 315 deletions.
51 changes: 0 additions & 51 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
return weights[idx]->getVariableRef();
}

/**
* @brief Get the Weight tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight tensor
*/
Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
return weights[idx]->getVariableMasterRef();
}

/**
* @brief Get the Weight Gradient tensor object
*
Expand Down Expand Up @@ -204,18 +194,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
return weights[idx]->getOptimizerVariableRef(jdx);
}

/**
* @brief Get the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
* @param jdx Identifier of the optimizer variables
* @return Tensor& Reference to the weight optimizer variable tensor
*/
Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
unsigned int jdx) const {
return weights[idx]->getOptimizerMasterVariableRef(jdx);
}

/**
* @brief Get the Number of Weight Optimizer Variable tensor object
*
Expand All @@ -226,16 +204,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
return weights[idx]->getNumOptVariable();
}

/**
* @brief Get the Number of Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
* @return int Number of the weight optimizer variable
*/
unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
return weights[idx]->getNumOptMasterVariable();
}

/**
* @brief Get regularization loss for the weight
*
Expand Down Expand Up @@ -375,25 +343,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
return getInputGrad(idx);
}

bool RunLayerContext::validateDerivatives() {
auto num_in = getNumInputs();
auto num_out = getNumOutputs();

for (unsigned int i = 0; i < num_in; ++i) {
auto deriv = getIncomingDerivative(i);
if (deriv.checkDataValidation(false) == false)
return false;
}

for (unsigned int i = 0; i < num_out; ++i) {
auto deriv = getOutgoingDerivative(i);
if (deriv.checkDataValidation(false) == false)
return false;
}

return true;
}

/**
* @brief Get the Tensor object
*
Expand Down
38 changes: 0 additions & 38 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,14 +453,6 @@ class RunLayerContext {
*/
Tensor &getWeight(unsigned int idx) const;

/**
* @brief Get the Weight master tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight tensor
*/
Tensor *getWeightMaster(unsigned int idx) const;

/**
* @brief Get the Weight Gradient tensor object
*
Expand Down Expand Up @@ -488,15 +480,6 @@ class RunLayerContext {
*/
Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;

/**
* @brief Get the Weight Optimizer Master Variable tensor object
*
* @param idx Identifier of the weight
* @param jdx Identifier of the weight optimizer master variable
* @return Tensor& Reference to the weight optimizer tensor
*/
Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const;

/**
* @brief Get the Weight name
*
Expand Down Expand Up @@ -607,11 +590,6 @@ class RunLayerContext {
*/
Tensor &getOutgoingDerivative(unsigned int idx);

/**
* @brief validate input/output derivatives of the layer
*/
bool validateDerivatives();

/**
* @brief Get the Tensor object
*
Expand Down Expand Up @@ -727,29 +705,13 @@ class RunLayerContext {
*/
unsigned int getNumWeightOptVar(unsigned int idx) const;

/**
* @brief Get the Number of Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
* @return unsigned int Number of the weight optimizer variable
*/
unsigned int getNumWeightOptMasterVar(unsigned int idx) const;

/**
* @brief Get the number of requested tensors objects
*
* @return unsigned int number of requested tensors
*/
unsigned int getNumTensors() const { return tensors.size(); }

/**
* @brief Set the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
* @param jdx Identifier of the weight optimizer variable
*/
void setWeightOptVars(unsigned int idx, std::vector<Tensor *> opts);

/**
* @brief Set the batch for the run context
*
Expand Down
27 changes: 4 additions & 23 deletions nntrainer/layers/layer_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <cmath>
#include <iterator>
#include <stdexcept>
#include <tuple>
#include <utility>

#include <activation_layer.h>
Expand Down Expand Up @@ -466,24 +465,16 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
if (run_context->isGradientLastAccess(i) && getTrainable()) {
/// @note read optimizer variables
auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
if (num_w_opt_m > 0)
run_context->getWeightOptMasterVar(i, j).read(file);
else
run_context->getWeightOptVar(i, j).read(file);
run_context->getWeightOptVar(i, j).read(file);
}
}
}
} else {
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
/// @note shared weights are only be read at the first acecss
if (run_context->isGradientLastAccess(i)) {
auto w = run_context->getWeightMaster(i);
if (w)
w->read(file);
else
run_context->getWeight(i).read(file);
run_context->getWeight(i).read(file);
}
}
}
Expand All @@ -498,13 +489,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
if (run_context->isGradientLastAccess(i) && getTrainable()) {
// @note save optimizer variables
if (run_context->weightHasGradient(i)) {
auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
++j) {
if (num_w_opt_m > 0)
run_context->getWeightOptMasterVar(i, j).save(file);
else
run_context->getWeightOptVar(i, j).save(file);
run_context->getWeightOptVar(i, j).save(file);
}
}
}
Expand All @@ -513,13 +500,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
// @note shared weights are only be saved at the first access
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
if (run_context->isGradientLastAccess(i)) {
if (run_context->getNumWeights()) {
auto w = run_context->getWeightMaster(i);
if (w)
w->save(file);
else
run_context->getWeight(i).save(file);
}
run_context->getWeight(i).save(file);
}
}
}
Expand Down
5 changes: 0 additions & 5 deletions nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -899,11 +899,6 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
*/
bool needsCalcGradient() { return needs_calc_gradient; }

/**
* @brief Set loss scale factor
*/
void setLossScale(float scale) { layer->setLossScale(scale); }

private:
/**
* @brief Get the Input Layers object
Expand Down
3 changes: 0 additions & 3 deletions nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);

applyLossScale(ret_derivative);

ret_derivative.subtract_i(y2);
if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "
Expand Down
36 changes: 7 additions & 29 deletions nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

// fill the output
auto out_type = hidden_.getDataType();
if (out_type == ml::train::TensorDim::DataType::FP32) {
if (y.getDataType() != out_type) {
Tensor y_ = y.clone(out_type);
hidden_ = y_.apply(ActiFunc::softmax<float>, hidden_);
} else {
hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
}
auto dataType = y.getDataType();
if (dataType == ml::train::TensorDim::DataType::FP32) {
hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);

if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
Expand All @@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
// update the loss value
LossLayer::updateLoss(context, l);
}
} else if (out_type == ml::train::TensorDim::DataType::FP16) {
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
if (y.getDataType() != out_type) {
Tensor y_ = y.clone(out_type);
hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_);
} else {
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
}
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);

if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
Expand All @@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

auto dataType = y.getDataType();

Tensor ret(y.getDim());
Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
if (dataType == ml::train::TensorDim::DataType::FP32) {
y.apply(ActiFunc::softmax<float>, ret);
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
Expand All @@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
/// operation
// TODO: verify y and ret_derivative must not be same as loss layer is not
// working in-place
if (ret.getDataType() != y2.getDataType()) {
ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
} else {
ret.subtract(y2, ret_derivative);
}

/**
* loss scale is applied for mixed precision
* every loss layers need to specify this applying code.
*/
applyLossScale(ret_derivative);

ret.subtract(y2, ret_derivative);
if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
"Error when calculating loss");
Expand Down
5 changes: 1 addition & 4 deletions nntrainer/layers/loss/loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,14 @@
#include <loss_layer.h>

namespace nntrainer {

LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {}

void LossLayer::finalize(InitLayerContext &context) {
std::vector<TensorDim> input_dim = context.getInputDimensions();
std::vector<TensorDim> output_dim = input_dim;
for (auto &d : output_dim)
d.setDataType(
str_converter<enum_class_prop_tag,
nntrainer::TensorDataTypeInfo>::from_string("FP32"));

context.setOutputDimensions(output_dim);
}

Expand Down
21 changes: 0 additions & 21 deletions nntrainer/layers/loss/loss_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ namespace nntrainer {
*/
class LossLayer : public Layer {
public:
/**
* @brief Constructor of Loss Layer
*/
LossLayer();

/**
* @brief Destructor of Loss Layer
*/
Expand All @@ -52,19 +47,11 @@ class LossLayer : public Layer {
*/
virtual bool supportBackwarding() const override { return true; }

/**
* @brief Set loss scale factor
*/
virtual void setLossScale(float scale) override { loss_scale = scale; }

private:
/**
* @copydoc Layer::requireLabel()
*/
bool requireLabel() const override { return true; }

float loss_scale; /**< loss scale factor */

protected:
/**
* @brief update loss
Expand All @@ -73,14 +60,6 @@ class LossLayer : public Layer {
*/
void updateLoss(RunLayerContext &context, const Tensor &l);

/**
* @brief apply loss scale
*/
void applyLossScale(Tensor &derivative) {
if (loss_scale != 0.0f)
derivative.multiply_i(loss_scale);
}

Tensor
l; /**< loss tensor to store intermediate value to calculate loss value */
};
Expand Down
Loading

0 comments on commit fe21d0a

Please sign in to comment.