Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wait for #2567] [ Test ] Mixed Precision Test Case #2568

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Applications/KNN/jni/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ e = executable('knn_sample',
install_dir: application_install_dir
)

test('app_knn', e, args: [nntr_app_resdir / 'KNN'])
test('app_knn', e, args: [nntr_app_resdir / 'KNN/'])
18 changes: 15 additions & 3 deletions nntrainer/graph/network_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,9 +768,10 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
* node is going to be used with in-place optimizations.
*/
auto out_specs = init_context.getOutSpecs();

/// @note try move inplace control to finalize
bool shared_var = false, shared_grad = false;
if (lnode->executeInPlace() != InPlace::NONE) {
if (lnode->executeInPlace() != InPlace::NONE && lnode->supportInPlace()) {
setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
for (unsigned int i = 0; i < out_specs.size(); ++i) {
auto &s = out_specs.at(i);
Expand Down Expand Up @@ -1556,8 +1557,19 @@ void NetworkGraph::requestOptimizerVariable(
const TensorDim &dim = w->getDim();
std::vector<TensorDim> dims = cb(dim);
w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS));
dims, w->getName(), ":opt", TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), w->isMixedPrecision(),
Tensor::Initializer::ZEROS));

if (w->isMixedPrecision()) {
for (auto &dim : dims)
dim.setDataType(ml::train::TensorDim::DataType::FP32);
w->setOptimizerVariables32(
tensor_manager->requestWeightOptimizerVariables(
dims, w->getName(), ":opt32:", TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), w->isMixedPrecision(),
Tensor::Initializer::ZEROS));
}
}
}
}
Expand Down
19 changes: 16 additions & 3 deletions nntrainer/layers/input_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ namespace nntrainer {
static constexpr size_t SINGLE_INOUT_IDX = 0;

InputLayer::InputLayer() :
Layer(),
input_props(props::Normalization(), props::Standardization()) {}
Layer(), input_props(props::Normalization(), props::Standardization()) {}

void InputLayer::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, input_props);
Expand All @@ -47,7 +46,7 @@ void InputLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
if (!context.executeInPlace()) {
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
hidden_.copy(input_);
hidden_.copyData(input_);
}

if (std::get<props::Normalization>(input_props))
Expand All @@ -70,7 +69,21 @@ void InputLayer::finalize(InitLayerContext &context) {

std::vector<TensorDim> output_dims = context.getInputDimensions();

for (auto &d : output_dims) {
d.setDataType(context.getActivationDataType());
}

context.setOutputDimensions(output_dims);

is_inplace = true;

/**
* @note Input Layer assuems that the FP32 IN Tensor always. Therefore, if the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo assume ?

* activation data type is not fp32, then it does not support in-place
* operation.
*/
if (context.getActivationDataType() != ml::train::TensorDim::DataType::FP32)
is_inplace = false;
}

} /* namespace nntrainer */
3 changes: 2 additions & 1 deletion nntrainer/layers/input_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class InputLayer : public Layer {
/**
* @copydoc Layer::supportInPlace()
*/
bool supportInPlace() const override { return true; }
bool supportInPlace() const override { return is_inplace; }

/**
* @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
Expand All @@ -105,6 +105,7 @@ class InputLayer : public Layer {

private:
std::tuple<props::Normalization, props::Standardization> input_props;
bool is_inplace;
};
} // namespace nntrainer

Expand Down
13 changes: 13 additions & 0 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,19 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const {
return weights[idx]->getGradientRef();
}

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &RunLayerContext::getWeightFP32(unsigned int idx) const {
if (!weights[idx]->hasGradient())
throw std::invalid_argument(
"Requesting gradient for a non-trainable weight.");
return weights[idx]->getVariableFP32Ref();
}

/**
* @brief Get the Weight Optimizer Variable tensor object
*
Expand Down
9 changes: 9 additions & 0 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,15 @@ class RunLayerContext {
Tensor &getWeightGrad(unsigned int idx) const;

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &getWeightFP32(unsigned int idx) const;

/**

* @brief Get the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
Expand Down
9 changes: 5 additions & 4 deletions nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
const std::vector<TensorDim> getOutputDimensions() const;
/**
* @brief Get the Weight object
* currently, only unittest uses this func.
*
* @param idx Identifier of the weight
* @return Weight& Reference to the weight
Expand All @@ -495,11 +496,11 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
NNTR_THROW_IF(!run_context, std::runtime_error)
<< __func__ << " layer needs to be finalized first!";
if (run_context->weightHasGradient(idx)) {
return Weight(run_context->getWeight(idx),
run_context->getWeightGrad(idx),
run_context->getWeightName(idx));
return Weight(
run_context->getWeight(idx), run_context->getWeightGrad(idx),
run_context->getWeightFP32(idx), run_context->getWeightName(idx));
} else {
return Weight(run_context->getWeight(idx), Tensor(),
return Weight(run_context->getWeight(idx), Tensor(), Tensor(),
run_context->getWeightName(idx));
}
}
Expand Down
11 changes: 10 additions & 1 deletion nntrainer/layers/loss/mse_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;

void MSELossLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

Tensor empty_tensor;
Tensor &y = context.getInput(SINGLE_INOUT_IDX).getDataType() ==
ml::train::TensorDim::DataType::FP32
? context.getInput(SINGLE_INOUT_IDX)
: empty_tensor;

if (y.empty())
y = context.getInput(SINGLE_INOUT_IDX)
.clone(ml::train::TensorDim::DataType::FP32);

// hidden_ <- y2 - y;
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Expand Down
35 changes: 28 additions & 7 deletions nntrainer/tensor/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ std::vector<Weight *> Manager::requestWeights(
// var_exec_order.push_back(TensorPool::PERSIST_END_ORDER);
}

Tensor *var = nullptr, *grad = nullptr;
Tensor *var = nullptr, *grad = nullptr, *var32 = nullptr;
bool is_dependent = !shared_names.empty();
if (is_dependent) {
/// shared_name is used and the orignal name is discarded
Expand All @@ -431,6 +431,17 @@ std::vector<Weight *> Manager::requestWeights(
grad = tensor_pool.requestOrExtend(shared_name + Var_Grad::grad_suffix,
dim_g, grad_exec_order, grad_ls,
Tensor::Initializer::ZEROS);

if (var->getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim_v);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
std::vector<unsigned int> var32_exec_order;
var32_exec_order.push_back(TensorPool::PERSIST_END_ORDER);

var32 = weight_pool.requestOrExtend(shared_name + ":var32", var32_dim,
var32_exec_order, var_ls,
Tensor::Initializer::ZEROS);
}
}
} else {
/** case requesting fresh weights */
Expand All @@ -448,11 +459,21 @@ std::vector<Weight *> Manager::requestWeights(
grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim_g,
grad_exec_order, grad_ls,
Tensor::Initializer::ZEROS, is_wgrad);
if (var->getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim_v);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
std::vector<unsigned int> var32_exec_order;
var32_exec_order.push_back(TensorPool::PERSIST_END_ORDER);
var32 =
weight_pool.request(name + ":var32", var32_dim, var32_exec_order,
var_ls, Tensor::Initializer::ZEROS);
}
}
}

weights_v2.emplace_back(std::make_unique<Weight>(
var, grad, w_reg, w_reg_const, decay, is_dependent, clip_by_global_norm));
weights_v2.emplace_back(
std::make_unique<Weight>(var, grad, var32, w_reg, w_reg_const, decay,
is_dependent, clip_by_global_norm));
}

std::transform(weights_v2.begin() + current_size, weights_v2.end(),
Expand Down Expand Up @@ -668,15 +689,15 @@ bool Manager::isSecondLastAccess(const std::string &name,
*/
std::vector<Tensor *> Manager::requestWeightOptimizerVariables(
const std::vector<TensorDim> &dims, const std::string &name,
const TensorLifespan &lifespan, bool is_grad_clip,
Tensor::Initializer initializer) {
const std::string &suffix, const TensorLifespan &lifespan, bool is_grad_clip,
bool is_mixed_precision, Tensor::Initializer initializer) {

std::vector<Tensor *> ret;
ret.reserve(dims.size());

std::vector<unsigned int> exec;
exec.reserve(1);
if (is_grad_clip) {
if (is_grad_clip || is_mixed_precision) {
exec.emplace_back(TensorPool::PERSIST_END_ORDER);
} else {
exec.emplace_back(getMinMaxTensorExecutionOrder(name, true).second);
Expand All @@ -685,7 +706,7 @@ std::vector<Tensor *> Manager::requestWeightOptimizerVariables(
/// @note this is assuming weight optimizer variables is treated as weight, if
/// not, there is room to optimize below behavior
for (unsigned int idx = 0; idx < dims.size(); idx++)
ret.push_back(weight_pool.request(name + ":opt" + std::to_string(idx),
ret.push_back(weight_pool.request(name + suffix + std::to_string(idx),
dims[idx], exec, lifespan, initializer));

return ret;
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/tensor/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ class Manager {
*/
std::vector<Tensor *> requestWeightOptimizerVariables(
const std::vector<TensorDim> &dims, const std::string &name,
const TensorLifespan &lifespan, bool is_grad_clip,
const std::string &suffix, const TensorLifespan &lifespan,
bool is_grad_clip, bool is_mixed_type,
Tensor::Initializer initializer = Tensor::Initializer::NONE);

/**
Expand Down
12 changes: 12 additions & 0 deletions nntrainer/tensor/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3065,6 +3065,18 @@ Tensor Tensor::clone() const {
return t;
}

Tensor Tensor::clone(ml::train::TensorDim::DataType type) const {
if (getDataType() == type)
return clone();

TensorDim dim = getDim();
dim.setDataType(type);
Tensor t(dim, true);
t.copyData(*this);
t.name = name;
return t;
}

void Tensor::reshape(const TensorDim &d) {

NNTR_THROW_IF(!contiguous, std::invalid_argument)
Expand Down
7 changes: 7 additions & 0 deletions nntrainer/tensor/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,13 @@ class Tensor {
*/
Tensor clone() const;

/**
* @brief Convient wrapper for inplace copy of @a this.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @brief Convient wrapper for inplace copy of @a this.
* @brief Convenient wrapper for inplace copy of @a this.

Is it typo? Do you mean convenient ?

* @param[in] type output tensor data type
* @retval Copied version of this
*/
Tensor clone(ml::train::TensorDim::DataType type) const;

/**
* @brief Save the Tensor into file
* @param[in] file output file stream
Expand Down
73 changes: 73 additions & 0 deletions nntrainer/tensor/weight.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ Weight::Weight(const TensorDim &dim, const Tensor::Initializer init,
throw std::invalid_argument("Weight initializer cannot be none");
if (regularizer == WeightRegularizer::UNKNOWN)
throw std::invalid_argument("Weight regularizer unknown");

std::string var32_suffix = ":fp32";
std::string var32_name = name + var32_suffix;

/**
* @note We assume if the Weight Data Type is not FP32, then FP32 Weight is
* necessary to maintain the accuracy.
* We could think it can be other data type and if there is the case to
* support other data type, then the code below needs to be udpated.
*
* Also, the loss_scale is not used in Weight but leave as it is for later
* usage.
*/

if (train && dim.getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);

var32 = std::make_shared<Tensor>(var32_dim, alloc_now_, init, var32_name);
} else {
var32 = std::make_shared<Tensor>(var32_name);
}
}

Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
Expand All @@ -52,6 +74,57 @@ Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
throw std::invalid_argument("Weight initializer cannot be none");
if (regularizer == WeightRegularizer::UNKNOWN)
throw std::invalid_argument("Weight regularizer unknown");

std::string var32_suffix = ":fp32";
std::string var32_name = name + var32_suffix;

if (train && dim_v.getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim_v);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
std::string var32_suffix = ":fp32";
std::string var32_name = name + var32_suffix;

var32 = std::make_shared<Tensor>(var32_dim, alloc_now_, init, var32_name);
} else {
var32 = std::make_shared<Tensor>(var32_name);
}
}

Weight::Weight(const Tensor &v, const Tensor &g, const Tensor &v32,
const std::string &n, bool is_dependent,
unsigned int output_axis_) :
Var_Grad(v, g, n, is_dependent),
regularizer(WeightRegularizer::NONE),
regularizer_constant(1.0f),
decay(0.0f),
clip_by_global_norm(0.0f),
output_axis(output_axis_),
loss_scale(0.0),
var32(std::make_shared<Tensor>(n + ":fp32")) {

if (!g.empty() && isMixedPrecision()) {
TensorDim var32_dim(v.getDim());
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
if (!v32.empty())
var32 = std::make_shared<Tensor>(
v32.getSharedDataTensor(var32_dim, 0, false, n + ":fp32"));
}
}

Weight::Weight(Tensor *v, Tensor *g, Tensor *v32, const WeightRegularizer reg,
const float reg_const, const float decay, bool is_dependent,
const float max_norm, unsigned int output_axis_,
float loss_scale_) :
Var_Grad(v, g, is_dependent),
regularizer(reg),
regularizer_constant(reg_const),
decay(decay),
clip_by_global_norm(max_norm),
output_axis(output_axis_),
loss_scale(loss_scale_),
var32(std::shared_ptr<Tensor>(v32, [](void *) {})) {
if (!v32)
var32 = std::make_shared<Tensor>();
}

} // namespace nntrainer
Loading