Skip to content

Commit

Permalink
[ Context ] Add loss scale in Context & using mse loss
Browse files Browse the repository at this point in the history
This PR add loss scale parameter in runcontext and use it to update
mse loss.

. Add Loss Scale Parameter in RunLayerContext Constructor
. Add applyLossScale func to update return derivitive in Loss Layer
. Change MSE Loss Layer to apply the loss scale to return derivitive

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <[email protected]>
  • Loading branch information
jijoongmoon committed May 11, 2024
1 parent 59b7c2e commit bb2bb45
Show file tree
Hide file tree
Showing 13 changed files with 84 additions and 26 deletions.
6 changes: 4 additions & 2 deletions nntrainer/graph/network_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,8 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
lnode->getTrainable(), shared_weight_names),
inputs, outputs,
tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
lnode->getTrainable(), shared_tensor_names));
lnode->getTrainable(), shared_tensor_names),
init_context.getLossScale());

return outputs;
}
Expand Down Expand Up @@ -1028,7 +1029,8 @@ NetworkGraph::refinalizeContext(const std::shared_ptr<LayerNode> &lnode,
// TODO: update weights spec for trainable based on layer trainable prop
weights, inputs, outputs,
tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
lnode->getTrainable(), shared_tensor_names));
lnode->getTrainable(), shared_tensor_names),
init_context.getLossScale());

return outputs;
}
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,14 @@ const std::vector<VarGradSpecV2> &InitLayerContext::getOutSpecs() const {
}

RunLayerContext::RunLayerContext(const std::string &name, bool trainable,
float l, bool in_place_,
float l, bool in_place_, float loss_scale_,
const std::vector<Weight *> &w,
const std::vector<Var_Grad *> &in,
const std::vector<Var_Grad *> &out,
const std::vector<Var_Grad *> &t) :
loss(l),
in_place(in_place_),
loss_scale(loss_scale_),
weights(w),
inputs(in),
outputs(out),
Expand Down
36 changes: 32 additions & 4 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class InitLayerContext {
const float max_norm = 0.0,
std::array<std::string, 3> tensor_type_ = {"NCHW", "FP32",
"FP32"},
const float loss_scale = 0.0);
const float loss_scale = 1.0);
/**
* @brief get Tensor Format of Layer
*
Expand Down Expand Up @@ -348,6 +348,14 @@ class InitLayerContext {
*/
bool executeInPlace() const { return in_place; }

/**
* @brief get Initial value of Loss_Scale. This is set to RunLayerContext
* and updated
*
* @return loss_scale
*/
float getLossScale() const { return loss_scale; }

private:
std::vector<TensorDim> input_dim; /**< Input dimensions for the layer */
bool in_place; /**< if the layer is expected to run in-place */
Expand Down Expand Up @@ -385,7 +393,7 @@ class RunLayerContext {
* @brief Construct a new Run Layer Context object
*
*/
RunLayerContext() : loss(0.0), in_place(false) {}
RunLayerContext() : loss(0.0), in_place(false), loss_scale(1.0) {}

/**
* @brief Construct a new Run Layer Context object
Expand All @@ -396,20 +404,33 @@ class RunLayerContext {
std::get<props::Name>(props).set(name);
}

/**
* @brief Construct a new Run Layer Context object
*
*/
RunLayerContext(const std::string &name, bool in_place_, float loss_scale_) :
RunLayerContext() {
in_place = in_place_;
std::get<props::Name>(props).set(name);
loss_scale = loss_scale_;
}

/**
* @brief Construct a new Run Layer Context object
*
* @param name name of the layer
* @param trainable if the layer is trainable
* @param l loss of the layer
* @param in_place_ execution in-place of the layer
* @param loss_scale loss_scale of the layer
* @param w weights of the layer
* @param in inputs of the layer
* @param out outputs of the layer
* @param t extra tensors of the layer
*/
RunLayerContext(const std::string &name, bool trainable, float l,
bool in_place_, const std::vector<Weight *> &w,
bool in_place_, float loss_scale_,
const std::vector<Weight *> &w,
const std::vector<Var_Grad *> &in,
const std::vector<Var_Grad *> &out,
const std::vector<Var_Grad *> &t);
Expand Down Expand Up @@ -883,10 +904,17 @@ class RunLayerContext {
*/
ml::train::LayerComputeEngine getComputeEngine() { return compute_engine; }

/**
* @brief get loss scale
* @return loss scale
*/
float getLossScale() { return loss_scale; }

private:
std::tuple<props::Name, props::Trainable> props; /**< props of the layer */
float loss; /**< loss of the layer */
bool in_place; /**< if the layer is expected to run in-place */
bool in_place; /**< if the layer is expected to run in-place */
float loss_scale; /**< loss_scale of the layer */

std::vector<Weight *> weights; /**< weights of the layer */
std::vector<Var_Grad *> inputs; /**< inputs of the layer */
Expand Down
9 changes: 5 additions & 4 deletions nntrainer/layers/layer_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ InitLayerContext LayerNode::finalize(const std::vector<TensorDim> &input_dims,

const auto &scope = getSharedFrom().empty() ? getName() : getSharedFrom();
float max_norm = 0.0;
float loss_scale = 0.0;
float loss_scale = 1.0;
if (!std::get<props::ClipGradByGlobalNorm>(*layer_node_props).empty())
max_norm = std::get<props::ClipGradByGlobalNorm>(*layer_node_props).get();

Expand Down Expand Up @@ -864,10 +864,11 @@ float LayerNode::getLoss() const { return *loss; }
void LayerNode::configureRunContext(const std::vector<Weight *> &weights,
const std::vector<Var_Grad *> &inputs,
const std::vector<Var_Grad *> &outputs,
const std::vector<Var_Grad *> &tensors) {
const std::vector<Var_Grad *> &tensors,
float loss_scale) {
run_context = std::make_unique<RunLayerContext>(
getName(), getTrainable(), 0.0f, executeInPlace() != InPlace::NONE, weights,
inputs, outputs, tensors);
getName(), getTrainable(), 0.0f, executeInPlace() != InPlace::NONE,
loss_scale, weights, inputs, outputs, tensors);
}

/**
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,8 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
void configureRunContext(const std::vector<Weight *> &weights,
const std::vector<Var_Grad *> &inputs,
const std::vector<Var_Grad *> &outputs,
const std::vector<Var_Grad *> &tensors);
const std::vector<Var_Grad *> &tensors,
float loss_scale);

/**
* @brief Preset modes for printing summary for the layer
Expand Down
7 changes: 7 additions & 0 deletions nntrainer/layers/loss/loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ void LossLayer::updateLoss(RunLayerContext &context, const Tensor &l) {
context.setLoss(loss_sum / (float)l.batch());
}

void LossLayer::applyLossScale(RunLayerContext &context, Tensor &ret_deriv) {

float loss_scale = context.getLossScale();
if (loss_scale != 1.0)
ret_deriv.multiply_i(loss_scale);
}

/**
* @copydoc Layer::setProperty(const std::vector<std::string> &values)
*/
Expand Down
7 changes: 7 additions & 0 deletions nntrainer/layers/loss/loss_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ class LossLayer : public Layer {
*/
void updateLoss(RunLayerContext &context, const Tensor &l);

/**
* @brief update return derivative with loss scale
* @param context Run context to update
* @param return_dev Tensor data to calculate
*/
void applyLossScale(RunLayerContext &context, Tensor &l);

Tensor
l; /**< loss tensor to store intermediate value to calculate loss value */
};
Expand Down
13 changes: 12 additions & 1 deletion nntrainer/layers/loss/mse_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,17 @@ void MSELossLayer::forwarding(RunLayerContext &context, bool training) {
}

void MSELossLayer::calcDerivative(RunLayerContext &context) {
Tensor empty_tensor;
Tensor &ret_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
const Tensor &y2 = context.getIncomingDerivative(SINGLE_INOUT_IDX);
const Tensor &y2_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &y2 = empty_tensor;

if (ret_derivative.getDataType() == ml::train::TensorDim::DataType::FP32)
y2 = y2_;

if (y2.empty())
y2 = y2_.clone(ret_derivative.getDataType());

Tensor &y = context.getInput(SINGLE_INOUT_IDX);

y.subtract(y2, ret_derivative);
Expand All @@ -60,6 +69,8 @@ void MSELossLayer::calcDerivative(RunLayerContext &context) {
throw std::runtime_error(
"[MSELossLayer::calcDerivative] Error when calculating loss");
}

LossLayer::applyLossScale(context, ret_derivative);
}

} // namespace nntrainer
16 changes: 8 additions & 8 deletions nntrainer/layers/time_dist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ void TimeDistLayer::forwarding(RunLayerContext &context, bool training) {

RunLayerContext dist_context(context.getName(), context.getTrainable(),
context.getLoss(), context.executeInPlace(),
getWeightsForContext(), {&in_var}, {&out_var},
getTensorsForContext());
context.getLossScale(), getWeightsForContext(),
{&in_var}, {&out_var}, getTensorsForContext());

dist_layer->forwarding(dist_context, training);
}
Expand Down Expand Up @@ -303,8 +303,8 @@ void TimeDistLayer::calcDerivative(RunLayerContext &context) {

RunLayerContext dist_context(context.getName(), context.getTrainable(),
context.getLoss(), context.executeInPlace(),
getWeightsForContext(), {&in_var}, {&out_var},
getTensorsForContext());
context.getLossScale(), getWeightsForContext(),
{&in_var}, {&out_var}, getTensorsForContext());

dist_layer->calcDerivative(dist_context);
}
Expand Down Expand Up @@ -354,8 +354,8 @@ void TimeDistLayer::calcGradient(RunLayerContext &context) {

RunLayerContext dist_context(context.getName(), context.getTrainable(),
context.getLoss(), context.executeInPlace(),
getWeightsForContext(), {&in_var}, {&out_var},
getTensorsForContext());
context.getLossScale(), getWeightsForContext(),
{&in_var}, {&out_var}, getTensorsForContext());

dist_layer->calcGradient(dist_context);
}
Expand Down Expand Up @@ -396,8 +396,8 @@ void TimeDistLayer::setBatch(RunLayerContext &context, unsigned int batch) {

RunLayerContext dist_context(context.getName(), context.getTrainable(),
context.getLoss(), context.executeInPlace(),
getWeightsForContext(), {&in_var}, {&out_var},
getTensorsForContext());
context.getLossScale(), getWeightsForContext(),
{&in_var}, {&out_var}, getTensorsForContext());

dist_layer->setBatch(dist_context, batch);

Expand Down
2 changes: 1 addition & 1 deletion nntrainer/models/model_common_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ class ModelTensorDataType final : public EnumProperty<ModelTensorDataTypeInfo> {
*/
class LossScale : public Property<float> {
public:
LossScale(float value = 0.0f);
LossScale(float value = 1.0f);
static constexpr const char *key = "loss_scale"; /**< unique key to access */
using prop_tag = float_prop_tag; /**< property type */
};
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/tensor/weight.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Weight::Weight(const Tensor &v, const Tensor &g, const Tensor &v32,
decay(0.0f),
clip_by_global_norm(0.0f),
output_axis(output_axis_),
loss_scale(0.0),
loss_scale(1.0),
var32(std::make_shared<Tensor>(n + ":fp32")) {

if (!g.empty() && isMixedPrecision()) {
Expand Down
2 changes: 1 addition & 1 deletion test/unittest/layers/layers_golden_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ static RunLayerContext prepareRunContext(const TensorPacks &packs) {
};

auto rc =
RunLayerContext("golden", true, 0.0f, false, create_view(weights),
RunLayerContext("golden", true, 0.0f, false, 1.0, create_view(weights),
create_view(ins), create_view(outs), create_view(tensors));

auto num_outputs = rc.getNumOutputs();
Expand Down
4 changes: 2 additions & 2 deletions test/unittest/layers/unittest_layer_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ TEST(nntrainer_LayerNode, finalize_05_n) {
nntrainer::createLayerNode(nntrainer::IdentityLayer::type));
EXPECT_NO_THROW(lnode->setProperty({"input_shape=1:1:1", "name=abc"}));
EXPECT_NO_THROW(lnode->finalize());
EXPECT_NO_THROW(lnode->configureRunContext({}, {&input}, {}, {}));
EXPECT_NO_THROW(lnode->configureRunContext({}, {&input}, {}, {}, 1.0));
EXPECT_THROW(lnode->finalize(), std::runtime_error);
}

Expand Down Expand Up @@ -298,7 +298,7 @@ TEST(nntrainer_LayerNode, setWeights_02_n) {
EXPECT_NO_THROW(lnode =
nntrainer::createLayerNode(nntrainer::IdentityLayer::type));
EXPECT_NO_THROW(lnode->setProperty({"input_shape=1:1:1", "name=abc"}));
EXPECT_NO_THROW(lnode->configureRunContext({&weight}, {&input}, {}, {}));
EXPECT_NO_THROW(lnode->configureRunContext({&weight}, {&input}, {}, {}, 1.0));

EXPECT_THROW(lnode->setWeights(new_weights), std::runtime_error);
}
Expand Down

0 comments on commit bb2bb45

Please sign in to comment.