Skip to content

Commit

Permalink
support amp/rts (PaddlePaddle#254)
Browse files Browse the repository at this point in the history
* enable custom_op

* add rts/amp

* resolved comments
  • Loading branch information
XBWGC authored Nov 2, 2021
1 parent 9918a8f commit 2f8a872
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 9 deletions.
22 changes: 22 additions & 0 deletions paddle/fluid/framework/ipu/ipu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ void Compiler::RegisterOpFunc() {
auto aiOnnxOpset = builder_->aiOnnxOpset11(); \
auto output_ids = OnnxImpl(inputs Args, debug_context); \
SetIpuIndexStage(output_ids, op_desc); \
SetAMPAttributes(output_ids, op_desc); \
InsertTensors(output_names, output_ids); \
}}, // NOLINT
#include "paddle/fluid/framework/ipu/supported_ops.h"
Expand Down Expand Up @@ -350,6 +351,27 @@ void Compiler::SetIpuIndexStage(const std::string& tensor_id,
VLOG(10) << "leave Compiler::SetIpuIndexStage";
}

void Compiler::SetAMPAttributes(const std::vector<std::string>& tensor_ids,
const OpDesc* op_desc) {
if (op_desc->Type() == "popart_matmul") {
for (const auto& tensor_id : tensor_ids) {
SetAMPAttributes(tensor_id, op_desc);
}
}
}

void Compiler::SetAMPAttributes(const std::string& tensor_id,
const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetAMPAttributes";
if (op_desc->Type() == "popart_matmul") {
auto amp = ipu_strategy_->available_memory_proportion;
if (amp > 0.0f && amp <= 1.0) {
builder_->setAvailableMemoryProportion(tensor_id, amp);
}
}
VLOG(10) << "leave Compiler::SetAMPAttributes";
}

void Compiler::SetCustomOps(
const std::vector<IpuCustomOpIdentifier>& custom_ops) {
for (auto x : custom_ops) {
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/ipu/ipu_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class Compiler {
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
void SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy;
}
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/framework/ipu/ipu_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,17 @@ void Executor::Prepare(const std::string &proto,
paddle::platform::errors::InvalidArgument(
"loss_id = %s doesn't exist in popart graph.", opt_info.GetLoss()));

patterns_.enableTiedGather(true);
patterns_.enableTiedGatherAccumulate(true);

session_ = popart::TrainingSession::createFromOnnxModel(
proto, dataFlow, it->second, *popart_optimizer, device,
popart::InputShapeInfo(), ipu_strategy_->popart_options,
popart::Patterns(popart::PatternsLevel::Default));
popart::InputShapeInfo(), ipu_strategy_->popart_options, patterns_);
} else {
VLOG(10) << "Creating InferenceSession from Onnx Model...";
session_ = popart::InferenceSession::createFromOnnxModel(
proto, dataFlow, device, popart::InputShapeInfo(),
ipu_strategy_->popart_options,
popart::Patterns(popart::PatternsLevel::Default));
ipu_strategy_->popart_options, patterns_);
}
VLOG(10) << "Creating session from Onnx Model...done";

Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/ipu/ipu_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License. */
#include <popart/dataflow.hpp>
#include <popart/half.hpp>
#include <popart/names.hpp>
#include <popart/patterns/patterns.hpp>
#include <popart/session.hpp>
#include <popart/tensorinfo.hpp>

Expand Down Expand Up @@ -86,6 +87,8 @@ class Executor {
std::vector<std::pair<popart::TensorId, popart::TensorId>>
weights_and_opt_state_;
int step_ = 0;
// Patterns
popart::Patterns patterns_;
};

} // namespace ipu
Expand Down
43 changes: 42 additions & 1 deletion paddle/fluid/framework/ipu/ipu_strategy.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ limitations under the License. */

#pragma once

#include <popart/op.hpp>
#include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp>

namespace paddle {
namespace framework {
Expand All @@ -24,16 +26,55 @@ using VirtualGraphMode = popart::VirtualGraphMode;
using RecomputationType = popart::RecomputationType;

struct IpuStrategy {
IpuStrategy() {
// we always save optimizer state to OffChip and enable rts for saving
// memory
auto storage = popart::TensorLocation(popart::TensorStorage::OffChip,
popart::ReplicatedTensorSharding::On);
popart_options.optimizerStateTensorLocationSettings =
popart::TensorLocationSettings(storage);

// We divide the accumulationFactor and replicatedGraphCount after all
// reduce
popart_options.accumulationAndReplicationReductionType =
popart::ReductionType::Mean;
popart_options.meanAccumulationAndReplicationReductionStrategy =
popart::MeanReductionStrategy::Post;
}
~IpuStrategy() {}

// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1;

// batches per step
int batches_per_step = 1;

// micro batch-size
int batch_size = 1;

// training flag, true for training
bool is_training = true;

// save the onnx model lowered by paddle program description
bool save_init_onnx = false;

// save the trained model
bool save_last_onnx = false;

// save paddle model per n steps
int save_per_n_step = 1;
popart::SessionOptions popart_options;

// average sharding, debugging used
bool need_avg_shard = false;

// flag for fp16, true for pure fp16
bool enable_fp16 = false;

// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;

// popart session option
popart::SessionOptions popart_options;
};

} // namespace ipu
Expand Down
24 changes: 23 additions & 1 deletion paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3364,7 +3364,29 @@ All parameter, weight, gradient are variables in Paddle.
"2: NormOnly (Only Norm Ops)"
"3: Pipeline (Recompute all forward pipeline stages)"
"4: RecomputeAll (Recompute all ops)
.)DOC");
.)DOC")
.def_property("enable_half_partial",
[](const ipu::IpuStrategy &self) {
return self.popart_options.partialsTypeMatMuls == "half";
},
[](ipu::IpuStrategy &self, bool enable_half_partial) {
self.popart_options.partialsTypeMatMuls = "half";
},
R"DOC(
Str type. half for fp16 partial, only work with fp16. Default float.
)DOC")
.def_property(
"available_mem_proportion",
[](const ipu::IpuStrategy &self) {
return self.available_memory_proportion;
},
[](ipu::IpuStrategy &self, float available_memory_proportion) {
self.available_memory_proportion = available_memory_proportion;
},
R"DOC(
Float type. Set the available memory proportion for matmul/conv, bigger value
means more memory occupy, range [0.0f, 1.0f], 0.0 no effect, default 0.0f.
)DOC");

py::class_<framework::ipu::IpuCustomOpIdentifier>(m, "IpuCustomOpIdentifier")
.def(py::init<const std::string &, const std::string &,
Expand Down
30 changes: 30 additions & 0 deletions python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,31 @@ def test_training(self):
ipu_strategy.num_ipus = 2
assert ipu_strategy.num_ipus == 2, "Set num_ipus Failed"

ipu_strategy.batches_per_step = 5
assert ipu_strategy.batches_per_step == 5, \
"Set batches_per_step Failed"

ipu_strategy.batch_size = 4
assert ipu_strategy.batch_size == 4, "Set batch_size Failed"

ipu_strategy.is_training = False
assert ipu_strategy.is_training == False, "Set is_training Failed"

ipu_strategy.save_init_onnx = True
assert ipu_strategy.save_init_onnx == True, "Set save_init_onnx Failed"

ipu_strategy.save_last_onnx = True
assert ipu_strategy.save_last_onnx == True, "Set save_last_onnx Failed"

ipu_strategy.save_per_n_step = 10
assert ipu_strategy.save_per_n_step == 10, "Set save_per_n_step Failed"

ipu_strategy.need_avg_shard = True
assert ipu_strategy.need_avg_shard == True, "Set need_avg_shard Failed"

ipu_strategy.enable_fp16 = True
assert ipu_strategy.enable_fp16 == True, "Set enable_fp16 Failed"

ipu_strategy.enable_pipelining = True
assert ipu_strategy.enable_pipelining == True, \
"Set enable_pipelining Failed"
Expand All @@ -52,6 +74,14 @@ def test_training(self):
assert ipu_strategy.enable_manual_shard == True, \
"Set enable_manual_shard Failed"

ipu_strategy.enable_half_partial = True
assert ipu_strategy.enable_half_partial == True, \
"Set enable_half_partial Failed"

ipu_strategy.available_mem_proportion = 0.5
assert ipu_strategy.available_mem_proportion == 0.5, \
"Set available_mem_proportion Failed"


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _test_base(self, run_ipu=True):
conv2 = paddle.static.nn.conv2d(
conv1, num_filters=3, filter_size=3, bias_attr=False)
# should consider influence of bs
loss = paddle.mean(conv2) * bs
loss = paddle.mean(conv2)

if self.is_training:
opt = None
Expand Down Expand Up @@ -227,7 +227,7 @@ def set_feed(self):

def test(self):
cpu_outputs = self._test_base(False)
ipu_outputs = self._test_base(True)[::2] * self.attrs['cpu_bs']
ipu_outputs = self._test_base(True)[::2]

self.assertTrue(np.allclose(cpu_outputs, ipu_outputs, atol=self.atol))

Expand Down Expand Up @@ -257,7 +257,7 @@ def set_feed(self):

def test(self):
cpu_outputs = self._test_base(False)
ipu_outputs = self._test_base(True)[::3] * self.attrs['cpu_bs']
ipu_outputs = self._test_base(True)[::3]

self.assertTrue(np.allclose(cpu_outputs, ipu_outputs, atol=self.atol))

Expand Down

0 comments on commit 2f8a872

Please sign in to comment.