Skip to content

Commit

Permalink
shader refactoring; shader disassembly
Browse files Browse the repository at this point in the history
  • Loading branch information
nem0 committed Sep 20, 2024
1 parent c0c11cb commit 0732450
Show file tree
Hide file tree
Showing 21 changed files with 250 additions and 119 deletions.
66 changes: 39 additions & 27 deletions data/pipelines/common.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,17 @@ struct Surface {

cbuffer GlobalState : register(b0) {
SMSlice Global_sm_slices[4];
float4x4 Global_projection;
float4x4 Global_prev_projection;
float4x4 Global_projection_no_jitter;
float4x4 Global_prev_projection_no_jitter;
float4x4 Global_inv_projection;
float4x4 Global_view;
float4x4 Global_inv_view;
float4x4 Global_view_projection;
float4x4 Global_view_projection_no_jitter;
float4x4 Global_prev_view_projection_no_jitter;
float4x4 Global_inv_view_projection;
float4x4 Global_vs_to_ndc; // a.k.a. projection matrix
float4x4 Global_prev_vs_to_ndc; // previous frame vs_to_ndc
float4x4 Global_vs_to_ndc_no_jitter;
float4x4 Global_prev_vs_to_ndc_no_jitter;
float4x4 Global_ndc_to_vs;
float4x4 Global_ws_to_vs; // a.k.a. view matrix
float4x4 Global_vs_to_ws;
float4x4 Global_ws_to_ndc; // a.k.a. view-projection matrix
float4x4 Global_ws_to_ndc_no_jitter;
float4x4 Global_prev_ws_to_ndc_no_jitter;
float4x4 Global_ndc_to_ws;
float4x4 Global_reprojection;
float4 Global_camera_world_pos;
float4 Global_view_dir;
Expand Down Expand Up @@ -144,12 +144,12 @@ cbuffer GlobalState : register(b0) {
};

cbuffer PassState : register(b1) {
float4x4 Pass_projection;
float4x4 Pass_inv_projection;
float4x4 Pass_view;
float4x4 Pass_inv_view;
float4x4 Pass_view_projection;
float4x4 Pass_inv_view_projection;
float4x4 Pass_vs_to_ndc;
float4x4 Pass_ndc_to_vs;
float4x4 Pass_ws_to_vs;
float4x4 Pass_vs_to_ws;
float4x4 Pass_ws_to_ndc;
float4x4 Pass_ndc_to_ws;
float4 Pass_view_dir;
float4 Pass_camera_up;
float4 Pass_camera_planes[6];
Expand All @@ -160,6 +160,18 @@ cbuffer ShadowAtlas : register(b3) {
float4x4 u_shadow_atlas_matrices[128];
};

// optimized mul(float4(pos, 1), m)
float4 transformPosition(float3 pos, float4x4 m) {
return pos.x * m[0] + (pos.y * m[1] + (pos.z * m[2] + m[3]));
}

// assumes transformPosition(pos, m0).w == 1
// so dont use with something like ws_to_ndc in m0
float4 transformPosition(float3 pos, float4x4 m0, float4x4 m1) {
float4 p = transformPosition(pos, m0);
return p.x * m1[0] + (p.y * m1[1] + (p.z * m1[2] + m1[3]));
}

float3 rotateByQuat(float4 rot, float3 pos) {
float3 uv = cross(rot.xyz, pos);
float3 uuv = cross(rot.xyz, uv);
Expand Down Expand Up @@ -217,23 +229,23 @@ uint2 textureSize(TextureCube<float4> Tex, uint Level) {
// returns view vector, i.e. normalized vector in world-space pointing from camera to pixel
float3 getViewDirection(float2 screen_uv) {
float4 pos_ndc = float4(toScreenUV(screen_uv) * 2 - 1, 1, 1.0);
float4 pos_ws = mul(pos_ndc, Global_inv_view_projection);
float4 pos_ws = mul(pos_ndc, Global_ndc_to_ws);
return normalize(pos_ws.xyz);
}

// get world-space position of pixel at `screen_uv`
float3 getPositionWS(uint depth_buffer, float2 screen_uv) {
float z = sampleBindlessLod(LinearSamplerClamp, depth_buffer, screen_uv, 0).r;
float4 pos_ndc = float4(toScreenUV(screen_uv) * 2 - 1, z, 1.0);
float4 pos_ws = mul(pos_ndc, Global_inv_view_projection);
float4 pos_ws = mul(pos_ndc, Global_ndc_to_ws);
return pos_ws.xyz / pos_ws.w;
}

// get world-space position of pixel at `tex_coord`, also returns its NDC depth
float3 getPositionWS(uint depth_buffer, float2 screen_uv, out float ndc_depth) {
float z = sampleBindlessLod(LinearSamplerClamp, depth_buffer, screen_uv, 0).r;
float4 pos_ndc = float4(toScreenUV(screen_uv) * 2 - 1, z, 1.0);
float4 pos_ws = mul(pos_ndc, Global_inv_view_projection);
float4 pos_ws = mul(pos_ndc, Global_ndc_to_ws);
ndc_depth = z;
return pos_ws.xyz / pos_ws.w;
}
Expand Down Expand Up @@ -277,9 +289,9 @@ float3 ACESFilm(float3 x) {
return saturate((x*(a*x+b))/(x*(c*x+d)+e));
}

float2 computeStaticObjectMotionVector(float3 wpos) {
float4 p = mul(float4(wpos, 1), Global_view_projection_no_jitter);
float4 pos_projected = mul(float4(wpos + Global_to_prev_frame_camera_translation.xyz, 1), Global_prev_view_projection_no_jitter);
float2 computeStaticObjectMotionVector(float3 pos_ws) {
float4 p = transformPosition(pos_ws, Global_ws_to_ndc_no_jitter);
float4 pos_projected = transformPosition(pos_ws + Global_to_prev_frame_camera_translation.xyz, Global_prev_ws_to_ndc_no_jitter);
return pos_projected.xy / pos_projected.w - p.xy / p.w;
}

Expand All @@ -289,14 +301,14 @@ float4 fullscreenQuad(int vertexID, out float2 screen_uv) {
return float4(toScreenUV(screen_uv) * 2 - 1, 0, 1);
}

// converts ndc depth to linear depth, using Global_projection
// converts ndc depth to linear depth, using Global_vs_to_ndc
// we assume reversed z with infinite far plane
float toLinearDepth(float ndc_depth) {
// float4 pos_proj = float4(0, 0, ndc_depth, 1.0);
// float4 view_pos = mul(pos_proj, inv_proj);
// return view_pos.z / view_pos.w;
// for reversed z with infinite far plane, this is equivalent to:
return Global_projection[3].z / ndc_depth;
return Global_vs_to_ndc[3].z / ndc_depth;
}

StructuredBuffer<Light> b_lights : register(t0);
Expand Down Expand Up @@ -499,7 +511,7 @@ float3 pointLightsLighting(Cluster cluster, Surface surface, uint shadow_atlas,
float3 direct_light = computeDirectLight(surface, L, light.color_attn.rgb);
int atlas_idx = light.atlas_idx;
if (atlas_idx >= 0) {
float4 proj_pos = mul(float4(lpos, 1), u_shadow_atlas_matrices[atlas_idx]);
float4 proj_pos = transformPosition(lpos, u_shadow_atlas_matrices[atlas_idx]);
proj_pos /= proj_pos.w;

float2 shadow_uv = proj_pos.xy;
Expand Down Expand Up @@ -670,7 +682,7 @@ float3 computeLighting(Cluster cluster, Surface surface, float3 light_direction,
}

float2 cameraReproject(float2 uv, float ndc_depth) {
float4 v = mul(float4(toScreenUV(uv) * 2 - 1, ndc_depth, 1), Global_reprojection);
float4 v = transformPosition(float3(toScreenUV(uv) * 2 - 1, ndc_depth), Global_reprojection);
float2 res = (v.xy / v.w) * 0.5 + 0.5;
return toScreenUV(res);
}
Expand Down
21 changes: 20 additions & 1 deletion data/pipelines/curve_decal.hlsl
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
/*
BEGIN_SHADER_DECLARATIONS
{
"Shaders": [
{
"ShaderName": "mainPS",
"ShaderCompiler": "fxc",
"ShaderType": "ps",
"ShaderModel": "5_0",
"EntryPoint": "mainPS",
"Defines": [],
"Optimization": "3",
"AdditionalArgs": []
}
]
}
END_SHADER_DECLARATIONS
*/

//@surface
//@include "pipelines/common.hlsli"
//@texture_slot "Texture", "textures/common/white.tga"
Expand Down Expand Up @@ -30,7 +49,7 @@ VSOutput mainVS(VSInput input) {
pos += input.i_pos;
output.uv_scale = input.i_uv_scale;
output.bezier = input.i_bezier;
output.position = mul(float4(pos, 1), mul(Global_view, Global_projection));
output.position = transformPosition(pos, Global_ws_to_ndc);
return output;
}

Expand Down
4 changes: 2 additions & 2 deletions data/pipelines/debug_shape.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//@include "pipelines/common.hlsli"

cbuffer Model : register(b4) {
float4x4 u_model;
float4x4 u_ls_to_ws;
};

struct VSOutput {
Expand All @@ -18,7 +18,7 @@ struct VSInput {
VSOutput mainVS(VSInput input) {
VSOutput output;
output.color = float4(pow(abs(input.color.rgb), 2.2f.xxx), input.color.a);
output.position = mul(float4(input.position, 1), mul(u_model, Pass_view_projection));
output.position = transformPosition(input.position, u_ls_to_ws, Pass_ws_to_ndc);
return output;
}

Expand Down
2 changes: 1 addition & 1 deletion data/pipelines/decal.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ VSOutput mainVS(VSInput input) {
float3 pos_ws = rotateByQuat(input.i_rot, input.position * input.i_half_extents);
pos_ws += input.i_pos_ws;
output.uv_scale = input.i_uv_scale;
output.position = mul(float4(pos_ws, 1), mul(Global_view, Global_projection));
output.position = transformPosition(pos_ws, Global_ws_to_ndc);
return output;
}

Expand Down
2 changes: 1 addition & 1 deletion data/pipelines/draw2d.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ VSOutput mainVS(VSInput input) {
VSOutput output;
output.color = input.color;
output.uv = input.uv;
output.position = mul(float4(input.position, 0, 1), u_matrix);
output.position = transformPosition(float3(input.position, 0), u_matrix);
return output;
}

Expand Down
45 changes: 24 additions & 21 deletions data/pipelines/editor_icon.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,58 @@
//@include "pipelines/common.hlsli"
//@texture_slot "Albedo", "textures/common/white.tga"

struct VSOutput {
float2 uv : TEXCOORD0;
float3 pos_ws : TEXCOORD1;
float3 normal : TEXCOORD2;
float4 position : SV_POSITION;
};

#define ATTR(X) TEXCOORD##X
cbuffer Model : register(b4) {
float4x4 u_model;
float4x4 u_ls_to_ws;
};

cbuffer Drawcall2 : register(b5) {
uint u_gbuffer_depth;
};

struct Input {
#define ATTR(X) TEXCOORD##X
struct VSInput {
float3 position : TEXCOORD0;
float3 normal : ATTR(NORMAL_ATTR);
#ifdef UV0_ATTR
float2 uv : ATTR(UV0_ATTR);
#endif
float3 normal : ATTR(NORMAL_ATTR);
};

VSOutput mainVS(Input input) {
struct VSOutput {
float2 uv : TEXCOORD0;
float distance_squared : TEXCOORD1;
float3 normal : TEXCOORD2;
float4 position : SV_POSITION;
};

VSOutput mainVS(VSInput input) {
VSOutput output;
#ifdef UV0_ATTR
output.uv = input.uv;
#else
output.uv = 0;
#endif
float4 p = mul(float4(input.position, 1), mul(u_model, Global_view));
output.pos_ws = p.xyz;
float4 pos_vs = transformPosition(input.position, u_ls_to_ws, Global_ws_to_vs);
output.distance_squared = dot(pos_vs.xyz, pos_vs.xyz);
output.normal = input.normal;
output.position = mul(p, Global_projection_no_jitter);
output.position = mul(pos_vs, Global_vs_to_ndc_no_jitter);
return output;
}

float4 mainPS(VSOutput input) : SV_TARGET {
float3 mainPS(VSOutput input) : SV_TARGET {
float2 screen_uv = input.position.xy / Global_framebuffer_size;
float3 pos_ws = getPositionWS(u_gbuffer_depth, screen_uv);
float4 albedo = sampleBindless(LinearSampler, t_albedo, input.uv);
#ifdef ALPHA_CUTOUT
if (albedo.a < 0.5) discard;
#endif
float d = dot(Global_light_dir.xyz, input.normal);
float4 o_color;
o_color.rgb = albedo.rgb * saturate(max(0, -d) + 0.25 * max(0, d) + 0.25);
o_color.a = 1;
if(length(pos_ws) < length(input.pos_ws)) o_color.rgb *= 0.25;
return o_color;
float3 output = 1;
output = albedo.rgb * saturate(max(0, -d) + 0.25 * max(0, d) + 0.25);

float distance_squared = dot(pos_ws, pos_ws);
bool is_behind = distance_squared < input.distance_squared;
if (is_behind) output *= 0.25;

return output;
}
2 changes: 1 addition & 1 deletion data/pipelines/grid.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Output mainVS(uint vertex_id : SV_VertexID) {
local_pos.y = 0;
output.lpos = float4(local_pos, 1);
float4 p = float4(local_pos - Global_camera_world_pos.xyz, 1);
output.position = mul(p, mul(Global_view, Global_projection_no_jitter));
output.position = mul(p, mul(Global_ws_to_vs, Global_vs_to_ndc_no_jitter));
return output;
}

Expand Down
6 changes: 3 additions & 3 deletions data/pipelines/impostor.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ float2 dirToGrid(float3 vec) {
#else
float3 p = u_center.xyz + mul(input.position - u_center.xyz, tangent_space);
#endif
p = mul(float4(p, 1), model_mtx).xyz;
p = transformPosition(p, model_mtx).xyz;

output.lod = 1;
output.tangent = tangent_space[0];
output.normal = tangent_space[2];
output.pos_ws = float4(p, 1);

output.position = mul(output.pos_ws, Pass_view_projection);
output.position = mul(output.pos_ws, Pass_ws_to_ndc);
return output;
}
#else
Expand Down Expand Up @@ -147,7 +147,7 @@ float2 dirToGrid(float3 vec) {
output.normal = tangent_space[2];
output.pos_ws = float4(instance_pos + p, 1);

output.position = mul(output.pos_ws, Pass_view_projection);
output.position = mul(output.pos_ws, Pass_ws_to_ndc);
return output;
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion data/pipelines/particles.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ VSOutput mainVS(VSInput input) {

output.color = input.i_color;
output.emission = input.i_emission;
output.position = mul((mul(float4(input.i_position.xyz, 1), mul(u_model, Pass_view)) + float4(pos.xy, 0, 0)), Pass_projection);
float4 pos_vs = transformPosition(input.i_position.xyz, u_model, Pass_ws_to_vs) + float4(pos.xy, 0, 0);
output.position = transformPosition(pos_vs, Pass_vs_to_ndc);
return output;
}

Expand Down
10 changes: 5 additions & 5 deletions data/pipelines/procedural_geom.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
//@texture_slot "Ambient occlusion", "", "HAS_AMBIENT_OCCLUSION_TEX"

cbuffer Drawcall : register(b4) {
float4x4 u_local_to_world;
float4x4 u_ls_to_ws;
};

struct VSOutput {
Expand All @@ -35,10 +35,10 @@ struct VSInput {

VSOutput mainVS(VSInput input) {
VSOutput output;
output.pos_ws = mul(float4(input.position, 1), u_local_to_world).xyz;
output.position = mul(float4(output.pos_ws, 1), Pass_view_projection);
output.normal = mul(float4(input.normal, 0), u_local_to_world).xyz;
output.tangent = mul(float4(input.tangent, 0), u_local_to_world).xyz;
output.pos_ws = transformPosition(input.position, u_ls_to_ws).xyz;
output.position = transformPosition(output.pos_ws, Pass_ws_to_ndc);
output.normal = mul(float4(input.normal, 0), u_ls_to_ws).xyz;
output.tangent = mul(float4(input.tangent, 0), u_ls_to_ws).xyz;
output.uv = input.uv;
return output;
}
Expand Down
4 changes: 2 additions & 2 deletions data/pipelines/ssao.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ cbuffer UB : register(b4) {
// get normal in view space
float3 getNormalVS(float2 tex_coord) {
float3 wnormal = sampleBindlessLod(LinearSamplerClamp, u_normal_buffer, tex_coord, 0).xyz * 2 - 1;
float4 vnormal = mul(float4(wnormal, 0), Global_view);
float4 vnormal = mul(float4(wnormal, 0), Global_ws_to_vs);
return vnormal.xyz;
}

// get view-space position of pixel at `screen_uv`
float3 getPositionVS(uint depth_buffer, float2 screen_uv) {
float depth_ndc = sampleBindlessLod(LinearSamplerClamp, depth_buffer, screen_uv, 0).r;
float4 pos_ndc = float4(toScreenUV(screen_uv) * 2 - 1, depth_ndc, 1.0);
float4 pos_vs = mul(pos_ndc, Global_inv_projection);
float4 pos_vs = mul(pos_ndc, Global_ndc_to_vs);
return pos_vs.xyz / pos_vs.w;
}

Expand Down
8 changes: 4 additions & 4 deletions data/pipelines/sss.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ cbuffer Data : register(b4) {
void raycast(float3 csOrig, float3 csDir, float stride, float jitter, uint2 ip0) {
float3 csEndPoint = csOrig + abs(csOrig.z * 0.1) * csDir;

float4 H0 = mul(float4(csOrig, 1), Global_projection);
float4 H1 = mul(float4(csEndPoint, 1), Global_projection);
float4 H0 = transformPosition(csOrig, Global_vs_to_ndc);
float4 H1 = transformPosition(csEndPoint, Global_vs_to_ndc);

float k0 = 1 / H0.w, k1 = 1 / H1.w;

Expand Down Expand Up @@ -73,8 +73,8 @@ void main(uint3 thread_id : SV_DispatchThreadID) {
float2 inv_size = 1 / u_size;
float2 uv = float2(thread_id.xy) * inv_size;
float3 p = getPositionWS(u_depth, uv);
float4 o = mul(float4(p, 1), Global_view);
float3 d = mul(Global_light_dir.xyz, (float3x3)Global_view);
float4 o = mul(float4(p, 1), Global_ws_to_vs);
float3 d = mul(Global_light_dir.xyz, (float3x3)Global_ws_to_vs);
float rr = hash(float2(thread_id.xy) + 0.1 * Global_time);
raycast(o.xyz, d.xyz, u_stride, rr, thread_id.xy);
}
Loading

0 comments on commit 0732450

Please sign in to comment.