Skip to content

Commit

Permalink
Merge pull request #16802 from hrydgard/parallel-pipeline-creation
Browse files Browse the repository at this point in the history
Vulkan: Parallel pipeline creation
  • Loading branch information
hrydgard authored Feb 1, 2023
2 parents 0cfce04 + a67604d commit 2ed88a8
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 32 deletions.
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,7 +1336,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
// Maybe a middle pass. But let's try to just block and compile here for now, this doesn't
// happen all that much.
graphicsPipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();
graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount);
graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount, time_now_d(), -1);
}

VkPipeline pipeline = graphicsPipeline->pipeline[(size_t)rpType]->BlockUntilReady();
Expand Down
115 changes: 87 additions & 28 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <algorithm>
#include <cstdint>

#include <map>
#include <sstream>

#include "Common/Log.h"
Expand All @@ -27,7 +28,7 @@
using namespace PPSSPP_VK;

// renderPass is an example of the "compatibility class" or RenderPassType type.
bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount) {
bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {
bool multisample = RenderPassTypeHasMultisample(rpType);
if (multisample) {
if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
Expand Down Expand Up @@ -118,12 +119,17 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
double start = time_now_d();
VkPipeline vkpipeline;
VkResult result = vkCreateGraphicsPipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &pipe, nullptr, &vkpipeline);
double taken_ms = (time_now_d() - start) * 1000.0;

double now = time_now_d();
double taken_ms_since_scheduling = (now - scheduleTime) * 1000.0;
double taken_ms = (now - start) * 1000.0;

if (taken_ms < 0.1) {
DEBUG_LOG(G3D, "Pipeline creation time: %0.2f ms (fast) rpType: %08x sampleBits: %d (%s)", taken_ms, (u32)rpType, (u32)sampleCount, tag_.c_str());
DEBUG_LOG(G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling (fast) rpType: %04x sampleBits: %d (%s)",
countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());
} else {
INFO_LOG(G3D, "Pipeline creation time: %0.2f ms rpType: %08x sampleBits: %d (%s)", taken_ms, (u32)rpType, (u32)sampleCount, tag_.c_str());
INFO_LOG(G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling rpType: %04x sampleBits: %d (%s)",
countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());
}

bool success = true;
Expand Down Expand Up @@ -218,26 +224,27 @@ void VKRGraphicsPipeline::LogCreationFailure() const {
ERROR_LOG(G3D, "======== END OF PIPELINE ==========");
}

bool VKRComputePipeline::Create(VulkanContext *vulkan) {
bool VKRComputePipeline::CreateAsync(VulkanContext *vulkan) {
if (!desc) {
// Already failed to create this one.
return false;
}
VkPipeline vkpipeline;
VkResult result = vkCreateComputePipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &desc->pipe, nullptr, &vkpipeline);

bool success = true;
if (result != VK_SUCCESS) {
pipeline->Post(VK_NULL_HANDLE);
ERROR_LOG(G3D, "Failed creating compute pipeline! result='%s'", VulkanResultToString(result));
success = false;
} else {
pipeline->Post(vkpipeline);
}
pipeline->SpawnEmpty(&g_threadManager, [=] {
VkPipeline vkpipeline;
VkResult result = vkCreateComputePipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &desc->pipe, nullptr, &vkpipeline);

delete desc;
bool success = true;
if (result == VK_SUCCESS) {
return vkpipeline;
} else {
ERROR_LOG(G3D, "Failed creating compute pipeline! result='%s'", VulkanResultToString(result));
success = false;
return (VkPipeline)VK_NULL_HANDLE;
}
delete desc;
}, TaskType::CPU_COMPUTE);
desc = nullptr;
return success;
return true;
}

VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
Expand Down Expand Up @@ -370,7 +377,6 @@ VulkanRenderManager::~VulkanRenderManager() {

vulkan_->WaitUntilQueueIdle();

DrainCompileQueue();
VkDevice device = vulkan_->GetDevice();
frameDataShared_.Destroy(vulkan_);
for (int i = 0; i < inflightFramesAtStart_; i++) {
Expand All @@ -379,12 +385,43 @@ VulkanRenderManager::~VulkanRenderManager() {
queueRunner_.DestroyDeviceObjects();
}

struct SinglePipelineTask {
VKRGraphicsPipeline *pipeline;
VkRenderPass compatibleRenderPass;
RenderPassType rpType;
VkSampleCountFlagBits sampleCount;
double scheduleTime;
int countToCompile;
};

class CreateMultiPipelinesTask : public Task {
public:
CreateMultiPipelinesTask(VulkanContext *vulkan, std::vector<SinglePipelineTask> tasks) : vulkan_(vulkan), tasks_(tasks) {}
~CreateMultiPipelinesTask() {}

TaskType Type() const override {
return TaskType::CPU_COMPUTE;
}

void Run() override {
for (auto &task : tasks_) {
task.pipeline->Create(vulkan_, task.compatibleRenderPass, task.rpType, task.sampleCount, task.scheduleTime, task.countToCompile);
}
}

VulkanContext *vulkan_;
std::vector<SinglePipelineTask> tasks_;
};

void VulkanRenderManager::CompileThreadFunc() {
SetCurrentThreadName("ShaderCompile");
while (true) {
std::vector<CompileQueueEntry> toCompile;
{
std::unique_lock<std::mutex> lock(compileMutex_);
// TODO: Should this be while?
// It may be beneficial also to unlock and wait a little bit to see if we get some more shaders
// so we can do a better job of thread-sorting them.
if (compileQueue_.empty() && run_) {
compileCond_.wait(lock);
}
Expand All @@ -395,24 +432,46 @@ void VulkanRenderManager::CompileThreadFunc() {
break;
}

double time = time_now_d();
// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
// and split up further.
// Those with the same pairs of shaders should be on the same thread.
int countToCompile = (int)toCompile.size();

// Here we sort the pending pipelines by vertex and fragment shaders,
std::map<std::pair<Promise<VkShaderModule> *, Promise<VkShaderModule> *>, std::vector<SinglePipelineTask>> map;

double scheduleTime = time_now_d();

// Here we sort pending graphics pipelines by vertex and fragment shaders, and split up further.
// Those with the same pairs of shaders should be on the same thread, at least on NVIDIA.
// I don't think PowerVR cares though, it doesn't seem to reuse information between the compiles,
// so we might want a different splitting algorithm there.
for (auto &entry : toCompile) {
switch (entry.type) {
case CompileQueueEntry::Type::GRAPHICS:
entry.graphics->Create(vulkan_, entry.compatibleRenderPass, entry.renderPassType, entry.sampleCount);
map[std::pair< Promise<VkShaderModule> *, Promise<VkShaderModule> *>(entry.graphics->desc->vertexShader, entry.graphics->desc->fragmentShader)].push_back(
SinglePipelineTask{
entry.graphics,
entry.compatibleRenderPass,
entry.renderPassType,
entry.sampleCount,
scheduleTime, // these two are for logging purposes.
countToCompile,
}
);
break;
case CompileQueueEntry::Type::COMPUTE:
entry.compute->Create(vulkan_);
// Queue up pending compute pipelines on separate tasks.
entry.compute->CreateAsync(vulkan_);
break;
}
}

double delta = time_now_d() - time;
if (delta > 0.005f) {
INFO_LOG(G3D, "CompileThreadFunc: Creating %d pipelines took %0.3f ms", (int)toCompile.size(), delta * 1000.0f);
for (auto iter : map) {
auto &shaders = iter.first;
auto &entries = iter.second;

// NOTICE_LOG(G3D, "For this shader pair, we have %d pipelines to create", (int)entries.size());

Task *task = new CreateMultiPipelinesTask(vulkan_, entries);
g_threadManager.EnqueueTask(task);
}

queueRunner_.NotifyCompileDone();
Expand Down
5 changes: 3 additions & 2 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ struct VKRGraphicsPipeline {
VKRGraphicsPipeline(PipelineFlags flags, const char *tag) : flags_(flags), tag_(tag) {}
~VKRGraphicsPipeline();

bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount);
bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile);

void DestroyVariants(VulkanContext *vulkan, bool msaaOnly);

Expand All @@ -137,6 +137,7 @@ struct VKRGraphicsPipeline {
VkSampleCountFlagBits SampleCount() const { return sampleCount_; }

const char *Tag() const { return tag_.c_str(); }

private:
void DestroyVariantsInstant(VkDevice device);

Expand All @@ -153,7 +154,7 @@ struct VKRComputePipeline {
VKRComputePipelineDesc *desc = nullptr;
Promise<VkPipeline> *pipeline = nullptr;

bool Create(VulkanContext *vulkan);
bool CreateAsync(VulkanContext *vulkan);
bool Pending() const {
return pipeline == VK_NULL_HANDLE && desc != nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ static Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan,
if (singleThreaded) {
return Promise<VkShaderModule>::AlreadyDone(compile());
} else {
return Promise<VkShaderModule>::Spawn(&g_threadManager, compile, TaskType::CPU_COMPUTE);
return Promise<VkShaderModule>::Spawn(&g_threadManager, compile, TaskType::DEDICATED_THREAD);
}
}

Expand Down

0 comments on commit 2ed88a8

Please sign in to comment.