Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vulkan: Depalettize in shaders #10911

Merged
merged 8 commits into from
Apr 13, 2018
1 change: 1 addition & 0 deletions Core/Compatibility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "RequireDefaultCPUClock", &flags_.RequireDefaultCPUClock);
CheckSetting(iniFile, gameID, "DisableReadbacks", &flags_.DisableReadbacks);
CheckSetting(iniFile, gameID, "DisableAccurateDepth", &flags_.DisableAccurateDepth);
CheckSetting(iniFile, gameID, "MGS2AcidHack", &flags_.MGS2AcidHack);
}

void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
Expand Down
1 change: 1 addition & 0 deletions Core/Compatibility.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct CompatFlags {
bool RequireDefaultCPUClock;
bool DisableReadbacks;
bool DisableAccurateDepth;
bool MGS2AcidHack;
};

class IniFile;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
int mask = gstate.getClutIndexMask();
int shift = gstate.getClutIndexShift();
int offset = gstate.getClutIndexStartPos();
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
Expand Down
7 changes: 4 additions & 3 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,16 @@ enum : uint64_t {
DIRTY_BONEMATRIX6 = 1ULL << 30,
DIRTY_BONEMATRIX7 = 1ULL << 31,

// These are for hardware tessellation
DIRTY_BEZIERSPLINE = 1ULL << 32,
DIRTY_TEXCLAMP = 1ULL << 33,

// space for 7 more uniforms.
DIRTY_DEPAL = 1ULL << 34,

// space for 5 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x3FFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,

// Other dirty elements that aren't uniforms!
Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ std::string FragmentShaderDesc(const ShaderID &id) {
if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x ";
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA ";
if (id.Bit(FS_BIT_SHADER_DEPAL)) desc << "Depal ";
if (id.Bit(FS_BIT_SHADER_TEX_CLAMP)) {
desc << "TClamp";
if (id.Bit(FS_BIT_CLAMP_S)) desc << "S";
Expand Down Expand Up @@ -236,6 +237,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;

ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowShaderBlend, gstate.FrameBufFormat());
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(replaceBlend);
Expand Down Expand Up @@ -299,6 +301,8 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
id.SetBits(FS_BIT_BLENDFUNC_B, 4, gstate.getBlendFuncB());
}
id.SetBit(FS_BIT_FLATSHADE, doFlatShading);

id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal);
}

*id_out = id;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ enum {
FS_BIT_DO_TEXTURE = 1,
FS_BIT_TEXFUNC = 2, // 3 bits
FS_BIT_TEXALPHA = 5,
// 6 is free.
FS_BIT_SHADER_DEPAL = 6,
FS_BIT_SHADER_TEX_CLAMP = 7,
FS_BIT_CLAMP_S = 8,
FS_BIT_CLAMP_T = 9,
Expand Down
11 changes: 11 additions & 0 deletions GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,17 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v);
}

if (dirtyUniforms & DIRTY_DEPAL) {
int indexMask = gstate.getClutIndexMask();
int indexShift = gstate.getClutIndexShift();
int indexOffset = gstate.getClutIndexStartPos() >> 4;
int format = gstate_c.depalFramebufferFormat;
uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
// Poke in a bilinear filter flag in the top bit.
val |= gstate.isMagnifyFilteringEnabled() << 31;
ub->depal_mask_shift_off_fmt = val;
}
}

void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
Expand Down
9 changes: 5 additions & 4 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ enum : uint64_t {
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERSPLINE,
DIRTY_BEZIERSPLINE | DIRTY_DEPAL,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
Expand All @@ -30,7 +30,8 @@ struct UB_VS_FS_Base {
float depthRange[4];
float fogCoef[2]; float stencil; float pad0;
float matAmbient[4];
uint32_t spline_counts; int pad1; int pad2; int pad3;
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
int pad2; int pad3;
// Fragment data
float fogColor[4];
float texEnvColor[4];
Expand All @@ -54,7 +55,7 @@ R"( mat4 proj_mtx;
float stencilReplace;
vec4 matambientalpha;
uint spline_counts;
int pad1;
uint depal_mask_shift_off_fmt;
int pad2;
int pad3;
vec3 fogcolor;
Expand All @@ -80,7 +81,7 @@ R"( float4x4 u_proj;
float u_stencilReplaceValue;
float4 u_matambientalpha;
uint u_spline_counts;
int pad1;
uint u_depal_mask_shift_off_fmt;
int pad2;
int pad3;
float3 u_fogcolor;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
// These must flush on change, so that LoadClut doesn't have to always flush.
{ GE_CMD_CLUTADDR, FLAG_FLUSHBEFOREONCHANGE },
{ GE_CMD_CLUTADDRUPPER, FLAG_FLUSHBEFOREONCHANGE },
{ GE_CMD_CLUTFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_CLUTFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_DEPAL },

// Morph weights. TODO: Remove precomputation?
{ GE_CMD_MORPHWEIGHT0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
Expand Down
3 changes: 3 additions & 0 deletions GPU/GPUState.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,9 @@ struct GPUStateCache {
int spline_type_u;
int spline_type_v;

bool useShaderDepal;
GEBufferFormat depalFramebufferFormat;

u32 getRelativeAddress(u32 data) const;
void Reset();
void DoState(PointerWrap &p);
Expand Down
63 changes: 42 additions & 21 deletions GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MA
enum {
DRAW_BINDING_TEXTURE = 0,
DRAW_BINDING_2ND_TEXTURE = 1,
DRAW_BINDING_DYNUBO_BASE = 2,
DRAW_BINDING_DYNUBO_LIGHT = 3,
DRAW_BINDING_DYNUBO_BONE = 4,
DRAW_BINDING_TESS_STORAGE_BUF = 5,
DRAW_BINDING_DEPAL_TEXTURE = 2,
DRAW_BINDING_DYNUBO_BASE = 3,
DRAW_BINDING_DYNUBO_LIGHT = 4,
DRAW_BINDING_DYNUBO_BONE = 5,
DRAW_BINDING_TESS_STORAGE_BUF = 6,
};

enum {
Expand Down Expand Up @@ -95,7 +96,7 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra

void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
VkDescriptorSetLayoutBinding bindings[6]{};
VkDescriptorSetLayoutBinding bindings[7]{};
bindings[0].descriptorCount = 1;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
Expand All @@ -105,22 +106,26 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[1].binding = DRAW_BINDING_2ND_TEXTURE;
bindings[2].descriptorCount = 1;
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[2].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[2].binding = DRAW_BINDING_DYNUBO_BASE;
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; // sampler is ignored though.
bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[2].binding = DRAW_BINDING_DEPAL_TEXTURE;
bindings[3].descriptorCount = 1;
bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[3].binding = DRAW_BINDING_DYNUBO_LIGHT;
bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[3].binding = DRAW_BINDING_DYNUBO_BASE;
bindings[4].descriptorCount = 1;
bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[4].binding = DRAW_BINDING_DYNUBO_BONE;
// Used only for hardware tessellation.
bindings[4].binding = DRAW_BINDING_DYNUBO_LIGHT;
bindings[5].descriptorCount = 1;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[5].binding = DRAW_BINDING_TESS_STORAGE_BUF;
bindings[5].binding = DRAW_BINDING_DYNUBO_BONE;
// Used only for hardware tessellation.
bindings[6].descriptorCount = 1;
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF;

VkDevice device = vulkan_->GetDevice();

Expand Down Expand Up @@ -367,7 +372,7 @@ VkResult DrawEngineVulkan::RecreateDescriptorPool(FrameData &frame, int newSize)
VkDescriptorPoolSize dpTypes[3];
dpTypes[0].descriptorCount = frame.descPoolSize * 3;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = frame.descPoolSize * 2; // Don't use these for tess anymore, need max two per set.
dpTypes[1].descriptorCount = frame.descPoolSize * 3; // Don't use these for tess anymore, need max three per set.
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpTypes[2].descriptorCount = frame.descPoolSize;
dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
Expand All @@ -388,6 +393,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
key.imageView_ = imageView;
key.sampler_ = sampler;
key.secondaryImageView_ = boundSecondary_;
key.depalImageView_ = boundDepal_;
key.base_ = base;
key.light_ = light;
key.bone_ = bone;
Expand Down Expand Up @@ -433,12 +439,11 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
// Even in release mode, this is bad.
_assert_msg_(G3D, result == VK_SUCCESS, "Ran out of descriptor space in pool. sz=%d res=%d", (int)frame.descSets.size(), (int)result);

// We just don't write to the slots we don't care about.
// We need 8 now that we support secondary texture bindings.
VkWriteDescriptorSet writes[8]{};
// We just don't write to the slots we don't care about, which is fine.
VkWriteDescriptorSet writes[7]{};
// Main texture
int n = 0;
VkDescriptorImageInfo tex[2]{};
VkDescriptorImageInfo tex[3]{};
if (imageView) {
#ifdef VULKAN_USE_GENERAL_LAYOUT_FOR_COLOR
tex[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
Expand All @@ -459,7 +464,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView

if (boundSecondary_) {
#ifdef VULKAN_USE_GENERAL_LAYOUT_FOR_COLOR
tex[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
tex[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
#else
tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
#endif
Expand All @@ -475,7 +480,23 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
n++;
}

// Skipping 2nd texture for now.
if (boundDepal_) {
#ifdef VULKAN_USE_GENERAL_LAYOUT_FOR_COLOR
tex[2].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
#else
tex[2].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
#endif
tex[2].imageView = boundDepal_;
tex[2].sampler = samplerSecondary_; // doesn't matter, we use load
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_DEPAL_TEXTURE;
writes[n].pImageInfo = &tex[2];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[n].dstSet = desc;
n++;
}

// Tessellation data buffer. Make sure this is declared outside the if to avoid optimizer
// shenanigans.
Expand Down
15 changes: 11 additions & 4 deletions GPU/Vulkan/DrawEngineVulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
// The Descriptor Set used for the majority of PSP rendering looks like this:
//
// * binding 0: Texture/Sampler (the PSP texture)
// * binding 1: Secondary texture sampler for shader blending or depal palettes
// * binding 2: Base Uniform Buffer (includes fragment state)
// * binding 3: Light uniform buffer
// * binding 4: Bone uniform buffer
// * binding 1: Secondary texture sampler for shader blending
// * binding 2: Depal palette
// * binding 3: Base Uniform Buffer (includes fragment state)
// * binding 4: Light uniform buffer
// * binding 5: Bone uniform buffer
// * binding 6: Tess data storage buffer
//
// All shaders conform to this layout, so they are all compatible with the same descriptor set.
// The format of the various uniform buffers may vary though - vertex shaders that don't skin
Expand Down Expand Up @@ -177,6 +179,9 @@ class DrawEngineVulkan : public DrawEngineCommon {
}

void SetLineWidth(float lineWidth);
void SetDepalTexture(VkImageView depal) {
boundDepal_ = depal;
}

private:
struct FrameData;
Expand Down Expand Up @@ -207,6 +212,7 @@ class DrawEngineVulkan : public DrawEngineCommon {

// Secondary texture for shader blending
VkImageView boundSecondary_ = VK_NULL_HANDLE;
VkImageView boundDepal_ = VK_NULL_HANDLE;
VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch.

PrehashMap<VertexArrayInfoVulkan *, nullptr> vai_;
Expand All @@ -217,6 +223,7 @@ class DrawEngineVulkan : public DrawEngineCommon {
struct DescriptorSetKey {
VkImageView imageView_;
VkImageView secondaryImageView_;
VkImageView depalImageView_;
VkSampler sampler_;
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
// for all draws in a frame, except when the buffer has to grow.
Expand Down
Loading