Skip to content

Commit

Permalink
PR #21134: [XLA:GPU] Add profiler annotation for sequential thunk.
Browse files Browse the repository at this point in the history
Imported from GitHub PR #21134

This PR wraps sequential thunk with profiler annotations, which will make loop iterations, and conditional branch more easy to read in the profiler.

The nsys profile looks like this:

![image](https://github.com/user-attachments/assets/8a3dd0be-4e1a-4516-ae64-b376336799bd)

Copybara import of the project:

--
eea74b8 by Shawn Wang <[email protected]>:

add nvtx marker for sequential thunk

Merging this change closes #21134

FUTURE_COPYBARA_INTEGRATE_REVIEW=#21134 from shawnwang18:shawnw/add_profiler_for_while_body_cond eea74b8
PiperOrigin-RevId: 713359421
  • Loading branch information
shawnwang18 authored and Google-ML-Automation committed Jan 11, 2025
1 parent e30ab06 commit 5ce4e5d
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
19 changes: 16 additions & 3 deletions xla/service/gpu/ir_emitter_unnested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,12 @@ absl::Status IrEmitterUnnested::EmitConditional(const HloInstruction* instr) {
for (auto comp : instr->branch_computations()) {
auto ir_emitter = IrEmitterUnnested::Create(ir_emitter_context_);
TF_RETURN_IF_ERROR(ir_emitter->EmitHloComputation(comp));
branch_thunks.push_back(ir_emitter->ConsumeThunkSequence());
Thunk::ThunkInfo branch_thunk_info =
Thunk::ThunkInfo::WithProfileAnnotation(instr);
branch_thunk_info.profile_annotation +=
absl::StrCat("_branch_", comp->name());
branch_thunks.push_back(
ir_emitter->ConsumeThunkSequence(branch_thunk_info));
}

ConditionalThunkConfig config =
Expand Down Expand Up @@ -2263,9 +2268,17 @@ absl::StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildWhileThunk(
TF_ASSIGN_OR_RETURN(
auto pred, GetAllocationSliceForHlo(condition->root_instruction(), {}));

Thunk::ThunkInfo cond_thunk_info =
Thunk::ThunkInfo::WithProfileAnnotation(instr);
cond_thunk_info.profile_annotation += "_condition";
Thunk::ThunkInfo body_thunk_info =
Thunk::ThunkInfo::WithProfileAnnotation(instr);
body_thunk_info.profile_annotation += "_body";

return std::unique_ptr<Thunk>(new WhileThunk(
thunk_info, pred, ir_emitter_condition->ConsumeThunkSequence(),
ir_emitter_body->ConsumeThunkSequence(), trip_count));
thunk_info, pred,
ir_emitter_condition->ConsumeThunkSequence(cond_thunk_info),
ir_emitter_body->ConsumeThunkSequence(body_thunk_info), trip_count));
}

absl::Status IrEmitterUnnested::EmitTargetElementLoop(
Expand Down
5 changes: 3 additions & 2 deletions xla/service/gpu/ir_emitter_unnested.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ class IrEmitterUnnested : public IrEmitter {
IrEmitterContext* ir_emitter_context);

// Transfers the ownship of thunk_sequence_ out.
std::unique_ptr<SequentialThunk> ConsumeThunkSequence() {
return std::make_unique<SequentialThunk>(Thunk::ThunkInfo{},
std::unique_ptr<SequentialThunk> ConsumeThunkSequence(
Thunk::ThunkInfo thunk_info = Thunk::ThunkInfo{}) {
return std::make_unique<SequentialThunk>(thunk_info,
std::move(thunk_sequence_));
}

Expand Down
2 changes: 2 additions & 0 deletions xla/service/gpu/runtime/sequential_thunk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ absl::Status SequentialThunk::Initialize(const InitializeParams& params) {
}

absl::Status SequentialThunk::ExecuteOnStream(const ExecuteParams& params) {
std::optional<tsl::profiler::ScopedAnnotation> seq_annotation =
GetKernelAnnotation(profile_annotation());
for (const std::unique_ptr<Thunk>& thunk : thunks_) {
std::optional<tsl::profiler::ScopedAnnotation> annotation =
GetKernelAnnotation(thunk->profile_annotation());
Expand Down

0 comments on commit 5ce4e5d

Please sign in to comment.