Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vulkan: Depalettize in shaders #10911

Merged
merged 8 commits into from
Apr 13, 2018
1 change: 1 addition & 0 deletions Core/Compatibility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "RequireDefaultCPUClock", &flags_.RequireDefaultCPUClock);
CheckSetting(iniFile, gameID, "DisableReadbacks", &flags_.DisableReadbacks);
CheckSetting(iniFile, gameID, "DisableAccurateDepth", &flags_.DisableAccurateDepth);
CheckSetting(iniFile, gameID, "MGS2AcidHack", &flags_.MGS2AcidHack);
}

void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
Expand Down
1 change: 1 addition & 0 deletions Core/Compatibility.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct CompatFlags {
bool RequireDefaultCPUClock;
bool DisableReadbacks;
bool DisableAccurateDepth;
bool MGS2AcidHack;
};

class IniFile;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ VkResult DrawEngineVulkan::RecreateDescriptorPool(FrameData &frame, int newSize)
VkDescriptorPoolSize dpTypes[3];
dpTypes[0].descriptorCount = frame.descPoolSize * 3;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = frame.descPoolSize * 2; // Don't use these for tess anymore, need max two per set.
dpTypes[1].descriptorCount = frame.descPoolSize * 3; // Don't use these for tess anymore, need max three per set.
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpTypes[2].descriptorCount = frame.descPoolSize;
dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
Expand Down
15 changes: 15 additions & 0 deletions GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include "GPU/Vulkan/FramebufferVulkan.h"
#include "GPU/Vulkan/DrawEngineVulkan.h"
#include "GPU/Vulkan/TextureCacheVulkan.h"
#include "thin3d/VulkanRenderManager.h"
#include "thin3d/VulkanQueueRunner.h"

#include "Core/MIPS/MIPS.h"
#include "Core/HLE/sceKernelThread.h"
Expand Down Expand Up @@ -456,6 +458,15 @@ void GPU_Vulkan::InitDeviceObjects() {
assert(!frameData_[i].push_);
frameData_[i].push_ = new VulkanPushBuffer(vulkan_, 64 * 1024);
}

VulkanRenderManager *rm = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
uint32_t hacks = 0;
if (PSP_CoreParameter().compat.flags().MGS2AcidHack) {
hacks |= QUEUE_HACK_MGS2_ACID;
}
if (hacks) {
rm->GetQueueRunner()->EnableHacks(hacks);
}
}

void GPU_Vulkan::DestroyDeviceObjects() {
Expand All @@ -467,6 +478,10 @@ void GPU_Vulkan::DestroyDeviceObjects() {
frameData_[i].push_ = nullptr;
}
}

// Need to turn off hacks when shutting down the GPU. Don't want them running in the menu.
VulkanRenderManager *rm = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
rm->GetQueueRunner()->EnableHacks(0);
}

void GPU_Vulkan::DeviceLost() {
Expand Down
10 changes: 9 additions & 1 deletion assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -325,4 +325,12 @@ NPHG00092 = true
NPEG00044 = true
NPJG00120 = true
UCJS10114 = true
UCES01401 = true
UCES01401 = true

[MGS2AcidHack]
ULES00008 = true
ULJM08001 = true
ULJM05001 = true
ULAS42007 = true
ULUS10006 = true
ULUS10077 = true
2 changes: 1 addition & 1 deletion ext/native/thin3d/GLRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ void GLRenderManager::Run(int frame) {
auto &initStepsOnThread = frameData_[frame].initSteps;
// queueRunner_.LogSteps(stepsOnThread);
queueRunner_.RunInitSteps(initStepsOnThread);
initStepsOnThread.clear();

// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
for (auto iter : frameData.activePushBuffers) {
Expand All @@ -481,7 +482,6 @@ void GLRenderManager::Run(int frame) {

queueRunner_.RunSteps(stepsOnThread);
stepsOnThread.clear();
initStepsOnThread.clear();

for (auto iter : frameData.activePushBuffers) {
iter->MapDevice(bufferStrategy_);
Expand Down
68 changes: 67 additions & 1 deletion ext/native/thin3d/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}

void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, const std::vector<VKRStep *> &steps) {
void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
// * Create copies of render target that are rendered to multiple times and textured from in sequence, and push those render passes
Expand Down Expand Up @@ -397,6 +397,12 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, const std::vector<VKRStep
}
}

// Queue hacks.
if (hacksEnabled_ & QUEUE_HACK_MGS2_ACID) {
// Massive speedup.
ApplyMGSHack(steps);
}

for (size_t i = 0; i < steps.size(); i++) {
const VKRStep &step = *steps[i];
switch (step.stepType) {
Expand All @@ -422,6 +428,66 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, const std::vector<VKRStep
}
}

void VulkanQueueRunner::ApplyMGSHack(std::vector<VKRStep *> &steps) {
// We want to turn a sequence of copy,render(1),copy,render(1),copy,render(1) to copy,copy,copy,render(n).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, since we render bind with UV, we may be able to detect if these copies are non-overlapping and do this in a general case, right?

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it can definitely be done, might at some point. Not sure how many games benefit though, and it's more work :)


for (int i = 0; i < (int)steps.size() - 3; i++) {
int last = -1;
if (!(steps[i]->stepType == VKRStepType::COPY &&
steps[i + 1]->stepType == VKRStepType::RENDER &&
steps[i + 2]->stepType == VKRStepType::COPY &&
steps[i + 1]->render.numDraws == 1 &&
steps[i]->copy.dst == steps[i + 2]->copy.dst))
continue;
// Looks promising! Let's start by finding the last one.
for (int j = i; j < (int)steps.size(); j++) {
switch (steps[j]->stepType) {
case VKRStepType::RENDER:
if (steps[j]->render.numDraws > 1)
last = j - 1;
break;
case VKRStepType::COPY:
if (steps[j]->copy.dst != steps[i]->copy.dst)
last = j - 1;
break;
}
if (last != -1)
break;
}

if (last != -1) {
// We've got a sequence from i to last that needs reordering.
// First, let's sort it, keeping the same length.
std::vector<VKRStep *> copies;
std::vector<VKRStep *> renders;
for (int n = i; n <= last; n++) {
if (steps[n]->stepType == VKRStepType::COPY)
copies.push_back(steps[n]);
else if (steps[n]->stepType == VKRStepType::RENDER)
renders.push_back(steps[n]);
}
// Write the copies back. TODO: Combine them too.
for (int j = 0; j < (int)copies.size(); j++) {
steps[i + j] = copies[j];
}
// Write the renders back (so they will be deleted properly).
for (int j = 0; j < (int)renders.size(); j++) {
steps[i + j + copies.size()] = renders[j];
}
assert(steps[i + j + copies.size()]->stepType == VKRStepType::RENDER);
// Combine the renders.
for (int j = 1; j < (int)renders.size(); j++) {
for (int k = 0; k < renders[j]->commands.size(); k++) {
steps[i + copies.size()]->commands.push_back(renders[j]->commands[k]);
}
steps[i + copies.size() + j]->stepType = VKRStepType::RENDER_SKIP;
}
// We're done.
break;
}
}
}

void VulkanQueueRunner::LogSteps(const std::vector<VKRStep *> &steps) {
ILOG("=======================================");
for (size_t i = 0; i < steps.size(); i++) {
Expand Down
17 changes: 16 additions & 1 deletion ext/native/thin3d/VulkanQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
class VKRFramebuffer;
struct VKRImage;

enum {
QUEUE_HACK_MGS2_ACID = 1,
};

enum class VKRRenderCommand : uint8_t {
BIND_PIPELINE,
STENCIL,
Expand Down Expand Up @@ -152,7 +156,9 @@ class VulkanQueueRunner {
backbuffer_ = fb;
backbufferImage_ = img;
}
void RunSteps(VkCommandBuffer cmd, const std::vector<VKRStep *> &steps);

// RunSteps can modify steps but will leave it in a valid state.
void RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps);
void LogSteps(const std::vector<VKRStep *> &steps);

void CreateDeviceObjects();
Expand Down Expand Up @@ -205,6 +211,10 @@ class VulkanQueueRunner {
return found;
}

void EnableHacks(uint32_t hacks) {
hacksEnabled_ = hacks;
}

private:
void InitBackbufferRenderPass();

Expand All @@ -223,6 +233,8 @@ class VulkanQueueRunner {

void ResizeReadbackBuffer(VkDeviceSize requiredSize);

void ApplyMGSHack(std::vector<VKRStep *> &steps);

static void SetupTransitionToTransferSrc(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);

Expand All @@ -244,4 +256,7 @@ class VulkanQueueRunner {
VkDeviceMemory readbackMemory_ = VK_NULL_HANDLE;
VkBuffer readbackBuffer_ = VK_NULL_HANDLE;
VkDeviceSize readbackBufferSize_ = 0;

// TODO: Enable based on compat.ini.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Outdated TODO?

-[Unknown]

Copy link
Owner Author

@hrydgard hrydgard Apr 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed.

uint32_t hacksEnabled_ = 0;
};