Skip to content

Commit

Permalink
mlperf merge part1: add bool option for transfer_cast_op_pass (Paddle…
Browse files Browse the repository at this point in the history
…Paddle#486)

* fix compile error

* add bool option for transfer_cast_op_pass
  • Loading branch information
yaozhixin authored Mar 2, 2022
1 parent 92116f7 commit 5a51ba1
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 13 deletions.
3 changes: 2 additions & 1 deletion paddle/fluid/framework/ir/ipu/transfer_cast_op_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ void TransferCastOpPass::ApplyImpl(ir::Graph* graph) const {

auto ipu_backend = platform::ipu::IpuBackend::GetInstance();
auto enable_fp16 = ipu_backend->GetIpuStrategy()->enable_fp16;
if (enable_fp16) {
auto transfer_cast_op = ipu_backend->GetIpuStrategy()->transfer_cast_op;
if (enable_fp16 && transfer_cast_op) {
for (auto* node : graph->Nodes()) {
if (node->IsOp() && node->Op()->Type() == "popart_cast") {
if (BOOST_GET_CONST(std::string, node->Op()->GetAttr("to")) ==
Expand Down
24 changes: 12 additions & 12 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -447,18 +447,6 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_ipu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (platform::is_custom_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) { /* custom_device -> cpu*/
Expand Down Expand Up @@ -508,6 +496,18 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_ipu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
else if (platform::is_npu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) { /* npu -> cpu*/
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/platform/device/ipu/ipu_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ IpuStrategy::IpuStrategy() {
ADD_BOOL_OPTION(save_onnx_checkpoint);
ADD_BOOL_OPTION(need_avg_shard);
ADD_BOOL_OPTION(enable_fp16);
ADD_BOOL_OPTION(transfer_cast_op);
ADD_UINT64_OPTION(num_ipus);
ADD_UINT64_OPTION(batches_per_step);
ADD_UINT64_OPTION(micro_batch_size);
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/device/ipu/ipu_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class IpuStrategy {
// defaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f;

// enable transfer cast Op target from fp32 to fp16 in fp16 mode
bool transfer_cast_op = true;

// popart session option
popart::SessionOptions popart_options;

Expand Down
99 changes: 99 additions & 0 deletions python/paddle/fluid/tests/unittests/ipu/test_cast_op_ipu.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,105 @@ def test_base(self):
self.assertTrue(res0.shape == res1.shape)


class TestEnableFp16(TestBase):
def set_atol(self):
self.atol = 1e-10

def set_data_feed(self):
self.feed = {"x": np.array([1, 200, 3000, 40000]).astype('int32'), }

def set_op_attrs(self):
self.attrs = {}
self.attrs['dtype'] = 'float32'

def _test_base(self, run_ipu=True):
scope = paddle.static.Scope()
main_prog = paddle.static.Program()
startup_prog = paddle.static.Program()
main_prog.random_seed = self.SEED
startup_prog.random_seed = self.SEED

with paddle.static.scope_guard(scope):
with paddle.static.program_guard(main_prog, startup_prog):
x = paddle.static.data(
name=self.feed_list[0],
shape=self.feed_shape[0],
dtype=self.feed_dtype[0])
out = paddle.cast(x, **self.attrs)
fetch_list = [out.name]

if run_ipu:
place = paddle.IPUPlace()
else:
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup_prog)

if run_ipu:
feed_list = self.feed_list
ipu_strategy = paddle.static.IpuStrategy()
ipu_strategy.set_graph_config(is_training=self.is_training)
ipu_strategy.set_precision_config(enable_fp16=True)
program = paddle.static.IpuCompiledProgram(
main_prog,
ipu_strategy=ipu_strategy).compile(feed_list, fetch_list)
else:
program = main_prog

result = exe.run(program, feed=self.feed, fetch_list=fetch_list)
return result[0]


class TestDisableTransferCast(TestEnableFp16):
def set_atol(self):
self.atol = 1e-10

def set_data_feed(self):
self.feed = {"x": np.array([1, 200, 3000, 40000]).astype('int32'), }

def set_op_attrs(self):
self.attrs = {}
self.attrs['dtype'] = 'float32'

def _test_base(self, run_ipu=True):
scope = paddle.static.Scope()
main_prog = paddle.static.Program()
startup_prog = paddle.static.Program()
main_prog.random_seed = self.SEED
startup_prog.random_seed = self.SEED

with paddle.static.scope_guard(scope):
with paddle.static.program_guard(main_prog, startup_prog):
x = paddle.static.data(
name=self.feed_list[0],
shape=self.feed_shape[0],
dtype=self.feed_dtype[0])
out = paddle.cast(x, **self.attrs)
fetch_list = [out.name]

if run_ipu:
place = paddle.IPUPlace()
else:
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup_prog)

if run_ipu:
feed_list = self.feed_list
ipu_strategy = paddle.static.IpuStrategy()
ipu_strategy.set_graph_config(is_training=self.is_training)
ipu_strategy.set_precision_config(enable_fp16=True)
ipu_strategy.set_options({"transfer_cast_op": False})
program = paddle.static.IpuCompiledProgram(
main_prog,
ipu_strategy=ipu_strategy).compile(feed_list, fetch_list)
else:
program = main_prog

result = exe.run(program, feed=self.feed, fetch_list=fetch_list)
return result[0]


class TestCase2(TestBase):
def set_atol(self):
self.atol = 1e-10
Expand Down

0 comments on commit 5a51ba1

Please sign in to comment.