Skip to content

Commit

Permalink
rm some logs
Browse files Browse the repository at this point in the history
  • Loading branch information
tianyan01 committed Nov 17, 2023
1 parent 186abc2 commit 19dc469
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,6 @@ class FusedMultiTransformerMoeOpKernel : public framework::OpKernel<T> {
} else {
sliced_inp = buf0;
}
VLOG(0) << "sliced_inp shape[0]: " << sliced_inp.dims()[0];
#ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
VLOG(0) << "moe, gate & topk";
#endif
Expand Down Expand Up @@ -825,12 +824,10 @@ class FusedMultiTransformerMoeOpKernel : public framework::OpKernel<T> {

// phi::AddKernel<T, phi::GPUContext>(dev_ctx, all_gather_out, bias_dropout_residual_out, &moe_out);
x_data = moe_out.data<T>();
VLOG(0) << "layer " << i + 1 << " end";

} // layers loop end
moe_out.Resize({{bsz, seq_len, dim_embed}});
*out = moe_out;
VLOG(0) << "kernel end";
}
};

Expand Down
6 changes: 0 additions & 6 deletions paddle/phi/kernels/fused_moe_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ static void AllToAll(Tensor& tensor, // NOLINT
out_tensor.push_back(out);
auto task = pg_nccl->AllToAll(in_tensor, out_tensor);
task->Wait();
VLOG(0) << "wait, all to all success !";
} else {
auto dtype = platform::ToNCCLDataType(
framework::TransToProtoVarType(tensor.dtype()));
Expand Down Expand Up @@ -245,7 +244,6 @@ void GlobalScatterFunctor(const phi::GPUContext& ctx,
}
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
// VLOG(0) << "GlobalScatterFunctor cudaDeviceSynchronize success !";
#else
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
Expand Down Expand Up @@ -334,12 +332,10 @@ void GlobalScatterProcessGroupFunctor(const phi::GPUContext& ctx,
}
}
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclGroupEnd());
// VLOG(0) << "GlobalScatterProcessGroupFunctor ncclGroupEnd " << i;
}

#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
// VLOG(0) << "GlobalScatterProcessGroupFunctor cudaDeviceSynchronize success!";
#else
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
Expand Down Expand Up @@ -449,7 +445,6 @@ void GlobalGatherFunctor(const phi::GPUContext& ctx,

#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
// VLOG(0) << "GlobalGatherFunctor cudaDeviceSynchronize success !";
#else
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
Expand Down Expand Up @@ -542,7 +537,6 @@ void GlobalGatherProcessGroupFunctor(const phi::GPUContext& ctx,

#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
// VLOG(0) << "GlobalGatherProcessGroupFunctor cudaDeviceSynchronize success !";
#else
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
Expand Down
4 changes: 1 addition & 3 deletions paddle/phi/kernels/gpu/fused_moe_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ void FusedMoeKernel(const DeviceContext& dev_ctx,
// auto out_dim = out->dims();
int bsz_seq = bsz * seq_len;
int tot_expert = world_size * num_expert;
// VLOG(0) << "moe, get dim: bsz_seq:" << bsz_seq << ", x.dim:" << x_dim << ", out.dim:" << out_dim;

// pre_layer_norm
const U* ln_scale_ptr = ln_scale.data<U>();
Expand All @@ -70,7 +69,6 @@ void FusedMoeKernel(const DeviceContext& dev_ctx,
Tensor ln_out;
ln_out.Resize({{bsz, seq_len, d_model}});
auto *ln_out_data = dev_ctx.template Alloc<T>(&ln_out);
// VLOG(0) << "moe, alloc pre layer norm";
// after slice, bsz_seq should be change
int sliced_bsz_seq = bsz_seq;
int start = 0;
Expand Down Expand Up @@ -418,4 +416,4 @@ PD_REGISTER_KERNEL(fused_moe_kernel,
phi::FusedMoeKernel,
float,
double,
paddle::platform::float16) {}
paddle::platform::float16) {}

0 comments on commit 19dc469

Please sign in to comment.