Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Fix _copy_to on MKLDNN backend (#15637)
Browse files Browse the repository at this point in the history
* Fix _copy_to

* Add comment
  • Loading branch information
ZhennanQin authored and pengzhao-intel committed Aug 1, 2019
1 parent 811d516 commit 42a47b1
Showing 1 changed file with 31 additions and 3 deletions.
34 changes: 31 additions & 3 deletions src/imperative/imperative_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,14 @@ inline void PushFCompute(const FCompute& fn,
// mapping from index in input_blobs to index in pre_temp_dst
std::unordered_map<uint32_t, uint32_t> in_temp_idx_map;
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(outputs, req);
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
// So for the case that A is holding mkldnn memory, and then copy A to B, and then copy B
// back to A, we shouldn't invalidate outputs for copying B back to A, because at this time,
// copying A to B may not happen, and will corrupt A's memory.
InvalidateOutputs(outputs, req);
}
#endif
std::vector<OpReqType> tmp_req = req;
// setup blobs
Expand Down Expand Up @@ -461,7 +468,14 @@ inline void PushFComputeEx(const FComputeEx& fn,
const auto& run = [=](RunContext rctx) {
OpContext opctx{need_grad, is_train, rctx, engine::CallbackOnComplete(), requested};
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(outputs, req);
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
// So for the case that A is holding mkldnn memory, and then copy A to B, and then copy B
// back to A, we shouldn't invalidate outputs for copying B back to A, because at this time,
// copying A to B may not happen, and will corrupt A's memory.
InvalidateOutputs(outputs, req);
}
#endif
fn(attrs, opctx, inputs, req, outputs);
if (ctx.dev_mask() == gpu::kDevMask && exec_type == ExecType::kSync && !rctx.is_bulk) {
Expand Down Expand Up @@ -508,7 +522,14 @@ inline void PushOperator(const OpStatePtr& state,
engine::CallbackOnComplete on_complete) {
OpContext opctx{need_grad, is_train, rctx, on_complete, requested};
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(outputs, req);
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
// So for the case that A is holding mkldnn memory, and then copy A to B, and then copy B
// back to A, we shouldn't invalidate outputs for copying B back to A, because at this time,
// copying A to B may not happen, and will corrupt A's memory.
InvalidateOutputs(outputs, req);
}
#endif
fcompute_ex(state, opctx, inputs, req, outputs);
if (ctx.dev_mask() == gpu::kDevMask && exec_type == ExecType::kSync
Expand Down Expand Up @@ -547,7 +568,14 @@ inline void PushOperator(const OpStatePtr& state,
// mapping from index in input_blobs to index in pre_temp_dst
std::unordered_map<uint32_t, uint32_t> in_temp_idx_map;
#if MXNET_USE_MKLDNN == 1
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
// So for the case that A is holding mkldnn memory, and then copy A to B, and then copy B
// back to A, we shouldn't invalidate outputs for copying B back to A, because at this time,
// copying A to B may not happen, and will corrupt A's memory.
InvalidateOutputs(outputs, req);
}
#endif
std::vector<OpReqType> tmp_req = req;
// populate input blobs and output blobs
Expand Down

0 comments on commit 42a47b1

Please sign in to comment.