-
Notifications
You must be signed in to change notification settings - Fork 89
/
palQueueTimingsTraceSource.h
221 lines (184 loc) · 10 KB
/
palQueueTimingsTraceSource.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/*
***********************************************************************************************************************
*
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
#pragma once
#include "palGpuUtil.h"
#include "palTraceSession.h"
#include "palGpaSession.h"
#include <atomic>
struct SqttQueueEventRecord;
struct SqttQueueInfoRecord;
namespace Pal
{
class Platform;
}
namespace GpuUtil
{
namespace TraceChunk
{
/// "QueueInfo" RDF chunk identifier & version
constexpr char QueueInfoChunkId[TextIdentifierSize] = "QueueInfo";
constexpr Pal::uint32 QueueInfoChunkVersion = 1;
/// Enum describing logical queue types
enum class QueueType : Pal::uint8
{
Unknown = 0,
Universal = 1,
Compute = 2,
Dma = 3,
Encode = 4,
Decode = 5,
Security = 6,
VideoProcessor = 7
};
/// Enum describing hardware engine types
enum class HwEngineType : Pal::uint8
{
Unknown = 0,
Universal = 1,
Compute = 2,
ExclusiveCompute = 3,
Dma = 4,
Decode = 5,
Encode = 6,
HighPriorityUniversal = 7,
HighPriorityGraphics = 8,
Security = 9,
Vpe = 10
};
/// Structure describing a queue's properties
struct QueueInfo
{
Pal::uint32 pciId; ///< The ID of the GPU queried
Pal::uint64 queueId; ///< API-specific queue ID
Pal::uint64 queueContext; ///< OS-level queue context value from Windows KMD to correlate with ETW data.
/// Only applicable to D3D on Windows; 0 otherwise.
QueueType queueType; ///< The logical queue type
HwEngineType engineType; ///< The hardware engine that the queue is mapped to
};
// ------------------------------------------------------------------------------------------- //
/// "QueueEvent" RDF chunk identifier & version
constexpr char QueueEventChunkId[TextIdentifierSize] = "QueueEvent";
constexpr Pal::uint32 QueueEventChunkVersion = 1;
/// The type of queue-level timings event
enum class QueueEventType : Pal::uint32
{
CmdBufSubmit = 0,
SignalSemaphore = 1,
WaitSemaphore = 2,
Present = 3
};
/// Structure describing a queue-level timings event
struct QueueEvent
{
Pal::uint32 pciId; ///< The ID of the GPU queried
Pal::uint64 queueId; ///< The API-specific queue ID which triggered the event
QueueEventType eventType; ///< The type of the queue-timing event
Pal::uint32 sqttCmdBufId; ///< [`CmdBufSubmit` only; 0 otherwise]
/// SQTT command buffer ID matching CmdBufStart user data marker
Pal::uint64 frameIndex; ///< [`CmdBufSubmit` & `Present` only; 0 otherwise]
/// Global frame index incremented for each "Present" call
Pal::uint32 submitSubIndex; ///< [`CmdBufSubmit` only; 0 otherwise]
/// Sub-index of event within submission.
/// When there is only one CmdBuffer per submission, `submitSubIndex` is 0.
/// When there are multiple command buffers per submission, `submitSubIndex`
/// is incremented by one for each command buffer within the submission.
Pal::uint64 apiEventId; ///< [`CmdBufSubmit`] API-specific command buffer ID signaled
/// [`SignalSemaphore`] API-specific semaphore ID signaled
/// [`WaitSemaphore`] API-specific semaphore ID waited on
/// [`Present`] N/A (set to 0)
Pal::uint64 cpuTimestamp; ///< CPU start timestamp of when this event is triggered in clock cycle units
Pal::uint64 gpuTimestamp1; ///< [`CmdBufSubmit`] GPU timestamp when the HW execution of command buffer began
/// [`SignalSemaphore`] GPU timestamp when the HW signaled the queue semaphore
/// [`WaitSemaphore`] GPU timestamp when HW finished waiting on the semaphore
/// [`Present`] GPU timestamp when HW processed the Present call
///
/// All timestamps are expressed in clock cycle units.
Pal::uint64 gpuTimestamp2; ///< [`CmdBufSubmit` only; 0 otherwise]
/// GPU timestamp when the HW execution of command buffer finished
};
} // namespace TraceChunk
// QueueTimings Trace Source name & version
constexpr char QueueTimingsTraceSourceName[] = "queuetimings";
constexpr Pal::uint32 QueueTimingsTraceSourceVersion = 2;
// =====================================================================================================================
// This trace source captures queue timings data through GPA session & produces "QueueInfo" and "QueueEvent" RDF chunks
class QueueTimingsTraceSource : public ITraceSource
{
public:
explicit QueueTimingsTraceSource(Pal::IPlatform* pPlatform);
virtual ~QueueTimingsTraceSource();
// ==== TraceSource Native Functions ========================================================================== //
Pal::Result Init(Pal::IDevice* pDevice);
Pal::Result RegisterTimedQueue(Pal::IQueue* pQueue,
Pal::uint64 queueId,
Pal::uint64 queueContext);
Pal::Result UnregisterTimedQueue(Pal::IQueue* pQueue);
Pal::Result TimedSubmit(Pal::IQueue* pQueue,
const Pal::MultiSubmitInfo& submitInfo,
const TimedSubmitInfo& timedSubmitInfo);
Pal::Result TimedSignalQueueSemaphore(Pal::IQueue* pQueue,
Pal::IQueueSemaphore* pQueueSemaphore,
const TimedQueueSemaphoreInfo& timedSignalInfo,
Pal::uint64 value = 0);
Pal::Result TimedWaitQueueSemaphore(Pal::IQueue* pQueue,
Pal::IQueueSemaphore* pQueueSemaphore,
const TimedQueueSemaphoreInfo& timedWaitInfo,
Pal::uint64 value = 0);
Pal::Result TimedQueuePresent(Pal::IQueue* pQueue,
const TimedQueuePresentInfo& timedPresentInfo);
Pal::Result ExternalTimedWaitQueueSemaphore(Pal::uint64 queueContext,
Pal::uint64 cpuSubmissionTimestamp,
Pal::uint64 cpuCompletionTimestamp,
const TimedQueueSemaphoreInfo& timedWaitInfo);
Pal::Result ExternalTimedSignalQueueSemaphore(Pal::uint64 queueContext,
Pal::uint64 cpuSubmissionTimestamp,
Pal::uint64 cpuCompletionTimestamp,
const TimedQueueSemaphoreInfo& timedSignalInfo);
bool IsTimingInProgress() const;
// ==== Base Class Overrides =================================================================================== //
virtual void OnConfigUpdated(DevDriver::StructuredValue* pJsonConfig) override { };
virtual Pal::uint64 QueryGpuWorkMask() const override { return 0; }
virtual void OnTraceAccepted() override;
virtual void OnTraceBegin(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override { };
virtual void OnTraceEnd(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override;
virtual void OnTraceFinished() override;
virtual const char* GetName() const override { return QueueTimingsTraceSourceName; }
virtual Pal::uint32 GetVersion() const override { return QueueTimingsTraceSourceVersion; }
private:
void WriteQueueInfoChunks(
const SqttQueueInfoRecord* pQueueInfoRecords,
size_t numQueueInfoRecords);
void WriteQueueEventChunks(
const SqttQueueInfoRecord* pQueueInfoRecords,
size_t numQueueInfoRecords,
const SqttQueueEventRecord* pQueueEventRecords,
size_t numQueueEventRecords);
void ReportInternalError(const char* pErrorMsg, Pal::Result result);
Pal::IPlatform* const m_pPlatform; // IPlatform owning the parent TraceSession
GpaSession* m_pGpaSession; // Handle to GpaSession object for tracking queue timings
bool m_traceIsHealthy; // Internal flag for tracking resource and state health
std::atomic<bool> m_timingInProgress; // Flag for tracking if queue timings operations are ongoing
};
} // namespace GpuUtil