Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wait for #2568] [ Tensor ] add is_NaN check in Tensor @open sesame 05/10 14:17 #2574

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Applications/KNN/jni/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ e = executable('knn_sample',
install_dir: application_install_dir
)

test('app_knn', e, args: [nntr_app_resdir / 'KNN'])
test('app_knn', e, args: [nntr_app_resdir / 'KNN/'])
17 changes: 11 additions & 6 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,19 @@ warning_c_flags = [
'-Wno-error=varargs'
]

arch = host_machine.cpu_family()

if get_option('enable-avx')
extra_defines += '-DUSE_AVX=1'
if get_option('platform') == 'tizen'
add_project_arguments(['-mavx2'], language: ['c','cpp'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would the Tizen platform always support AVX2 instructions?

else
add_project_arguments(['-march=native'], language: ['c','cpp'])
endif
message('-march=native added for AVX hardware acceleration.')
endif

if get_option('enable-fp16')
arch = host_machine.cpu_family()
if get_option('platform') == 'android'
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
extra_defines += '-DENABLE_FP16=1'
Expand Down Expand Up @@ -105,11 +115,6 @@ if get_option('enable-fp16')
if cc.version().version_compare('>=12.1.0')
message ('Float16 for x86_64 enabled. Modern gcc-x64 generally supports float16 with _Float16.')
extra_defines += '-DENABLE_FP16=1'
if get_option('enable-avx')
extra_defines += '-DUSE_AVX=1'
add_project_arguments(['-march=native'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
else
warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
endif
Expand Down
8 changes: 5 additions & 3 deletions nntrainer/graph/network_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,9 +768,10 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
* node is going to be used with in-place optimizations.
*/
auto out_specs = init_context.getOutSpecs();

/// @note try move inplace control to finalize
bool shared_var = false, shared_grad = false;
if (lnode->executeInPlace() != InPlace::NONE) {
if (lnode->executeInPlace() != InPlace::NONE && lnode->supportInPlace()) {
setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
for (unsigned int i = 0; i < out_specs.size(); ++i) {
auto &s = out_specs.at(i);
Expand Down Expand Up @@ -1556,8 +1557,9 @@ void NetworkGraph::requestOptimizerVariable(
const TensorDim &dim = w->getDim();
std::vector<TensorDim> dims = cb(dim);
w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS));
dims, w->getName(), ":opt", TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), w->isMixedPrecision(),
Tensor::Initializer::ZEROS));
}
}
}
Expand Down
19 changes: 16 additions & 3 deletions nntrainer/layers/input_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ namespace nntrainer {
static constexpr size_t SINGLE_INOUT_IDX = 0;

InputLayer::InputLayer() :
Layer(),
input_props(props::Normalization(), props::Standardization()) {}
Layer(), input_props(props::Normalization(), props::Standardization()) {}

void InputLayer::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, input_props);
Expand All @@ -47,7 +46,7 @@ void InputLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
if (!context.executeInPlace()) {
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
hidden_.copy(input_);
hidden_.copyData(input_);
}

if (std::get<props::Normalization>(input_props))
Expand All @@ -70,7 +69,21 @@ void InputLayer::finalize(InitLayerContext &context) {

std::vector<TensorDim> output_dims = context.getInputDimensions();

for (auto &d : output_dims) {
d.setDataType(context.getActivationDataType());
}

context.setOutputDimensions(output_dims);

is_inplace = true;

/**
* @note Input Layer assuems that the FP32 IN Tensor always. Therefore, if the
* activation data type is not fp32, then it does not support in-place
* operation.
*/
if (context.getActivationDataType() != ml::train::TensorDim::DataType::FP32)
is_inplace = false;
}

} /* namespace nntrainer */
3 changes: 2 additions & 1 deletion nntrainer/layers/input_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class InputLayer : public Layer {
/**
* @copydoc Layer::supportInPlace()
*/
bool supportInPlace() const override { return true; }
bool supportInPlace() const override { return is_inplace; }

/**
* @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
Expand All @@ -105,6 +105,7 @@ class InputLayer : public Layer {

private:
std::tuple<props::Normalization, props::Standardization> input_props;
bool is_inplace;
};
} // namespace nntrainer

Expand Down
13 changes: 13 additions & 0 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,19 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const {
return weights[idx]->getGradientRef();
}

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &RunLayerContext::getWeightFP32(unsigned int idx) const {
if (!weights[idx]->hasGradient())
throw std::invalid_argument(
"Requesting gradient for a non-trainable weight.");
return weights[idx]->getVariableFP32Ref();
}

/**
* @brief Get the Weight Optimizer Variable tensor object
*
Expand Down
9 changes: 9 additions & 0 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,15 @@ class RunLayerContext {
Tensor &getWeightGrad(unsigned int idx) const;

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &getWeightFP32(unsigned int idx) const;

/**

* @brief Get the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
Expand Down
9 changes: 5 additions & 4 deletions nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
const std::vector<TensorDim> getOutputDimensions() const;
/**
* @brief Get the Weight object
* currently, only unittest uses this func.
*
* @param idx Identifier of the weight
* @return Weight& Reference to the weight
Expand All @@ -495,11 +496,11 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
NNTR_THROW_IF(!run_context, std::runtime_error)
<< __func__ << " layer needs to be finalized first!";
if (run_context->weightHasGradient(idx)) {
return Weight(run_context->getWeight(idx),
run_context->getWeightGrad(idx),
run_context->getWeightName(idx));
return Weight(
run_context->getWeight(idx), run_context->getWeightGrad(idx),
run_context->getWeightFP32(idx), run_context->getWeightName(idx));
} else {
return Weight(run_context->getWeight(idx), Tensor(),
return Weight(run_context->getWeight(idx), Tensor(), Tensor(),
run_context->getWeightName(idx));
}
}
Expand Down
11 changes: 10 additions & 1 deletion nntrainer/layers/loss/mse_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;

void MSELossLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

Tensor empty_tensor;
Tensor &y = context.getInput(SINGLE_INOUT_IDX).getDataType() ==
ml::train::TensorDim::DataType::FP32
? context.getInput(SINGLE_INOUT_IDX)
: empty_tensor;

if (y.empty())
y = context.getInput(SINGLE_INOUT_IDX)
.clone(ml::train::TensorDim::DataType::FP32);

// hidden_ <- y2 - y;
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Expand Down
27 changes: 22 additions & 5 deletions nntrainer/optimizers/adam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ Adam::~Adam() {}
enum AdamParams { wm, wv };

std::vector<TensorDim> Adam::getOptimizerVariableDim(const TensorDim &dim) {
return {dim, dim};
/**
* @note We assume the optimizer parameters should be full precsion to
* maintain the accuracy even in mixed precision training.
*/
TensorDim wm_dim(dim);
TensorDim wv_dim(dim);
wm_dim.setDataType(ml::train::TensorDim::DataType::FP32);
wv_dim.setDataType(ml::train::TensorDim::DataType::FP32);
return {wm_dim, wv_dim};
}

void Adam::exportTo(Exporter &exporter,
Expand Down Expand Up @@ -64,7 +72,15 @@ double Adam::getUpdatedLearningRate(unsigned int iteration, double ll) const {
}

void Adam::applyGradient(RunOptimizerContext &context) {
Tensor &x_grad = context.getGradient();
Tensor empty_tensor;

Tensor &x_grad =
context.getGradient().getDataType() == ml::train::TensorDim::DataType::FP32
? context.getGradient()
: empty_tensor;

if (x_grad.empty())
x_grad = context.getGradient().clone(ml::train::TensorDim::DataType::FP32);

auto &beta1 = std::get<PropsB1>(adam_props).get();
auto &beta2 = std::get<PropsB2>(adam_props).get();
Expand All @@ -91,7 +107,7 @@ void Adam::applyGradient(RunOptimizerContext &context) {
denom.add_i(epsilon);
wm.divide(denom, x_grad);

context.applyGradient(context.getLearningRate() / biasCorrection1);
context.applyGradient(context.getLearningRate() / biasCorrection1, x_grad);

} else {
std::function<double(double)> sqrtEps = [epsilon](double f) {
Expand All @@ -100,8 +116,9 @@ void Adam::applyGradient(RunOptimizerContext &context) {

x_grad = wv.apply<float>(sqrtEps, x_grad);
x_grad.multiply_i(wm);
context.applyGradient(getUpdatedLearningRate(context.getIteration(),
context.getLearningRate()));
context.applyGradient(
getUpdatedLearningRate(context.getIteration(), context.getLearningRate()),
x_grad);
}
}

Expand Down
7 changes: 7 additions & 0 deletions nntrainer/optimizers/optimizer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,11 @@ Tensor &RunOptimizerContext::getOptimizerVariable(unsigned int idx) const {
void RunOptimizerContext::applyGradient(double lr) const {
weight->applyGradient(lr);
}

/**
* @brief Apply the gradient with the given learning rate and gradient
*/
void RunOptimizerContext::applyGradient(double lr, Tensor &updated_grad) const {
weight->applyGradient(lr, updated_grad);
}
} // namespace nntrainer
14 changes: 11 additions & 3 deletions nntrainer/optimizers/optimizer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ class RunOptimizerContext {
*
*/
RunOptimizerContext(Weight *w = nullptr, size_t iter = 0, double lr = 0.0) :
weight(w),
iteration(iter),
learning_rate(lr) {}
weight(w), iteration(iter), learning_rate(lr) {}

/**
* @brief Get the Weight tensor object
Expand Down Expand Up @@ -75,6 +73,16 @@ class RunOptimizerContext {
*/
void applyGradient(double lr) const;

/**
* @brief Apply the gradient with the given learning rate and updated
* gradient
*
* @param lr learning rate
* @param updated_grad gradient tensor which is updated. (usually it could be
* fp32)
*/
void applyGradient(double lr, Tensor &updated_grad) const;

/**
* @brief Get the current iteration value
*
Expand Down
Loading