diff --git a/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 000000000..8923f96a7 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,24 @@ +RWByteAddressBuffer _9 : register(u6, space0); +globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0); +RasterizerOrderedByteAddressBuffer _52 : register(u4, space0); +RWTexture2D img4 : register(u5, space0); +RasterizerOrderedTexture2D img : register(u0, space0); +RasterizerOrderedTexture2D img3 : register(u2, space0); +RasterizerOrderedTexture2D img2 : register(u1, space0); + +void frag_main() +{ + _9.Store(0, uint(0)); + img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f); + img[int2(0, 0)] = img3[int2(0, 0)]; + uint _39; + InterlockedAdd(img2[int2(0, 0)], 1u, _39); + _42.Store(0, uint(int(_42.Load(0)) + 42)); + uint _55; + _42.InterlockedAnd(4, _52.Load(0), _55); +} + +void main() +{ + frag_main(); +} diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 000000000..f77b8ed99 --- /dev/null +++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,43 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +struct spvDescriptorSetBuffer0 +{ + device Buffer3* m_9 [[id(0)]]; + texture2d img4 [[id(1)]]; + texture2d img [[id(2), raster_order_group(0)]]; + texture2d img3 [[id(3), raster_order_group(0)]]; + volatile device Buffer* m_34 [[id(4), raster_order_group(0)]]; + device Buffer2* m_44 [[id(5), raster_order_group(0)]]; +}; + +fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + (*spvDescriptorSet0.m_9).baz = 0; + spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); + (*spvDescriptorSet0.m_34).foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed); +} + diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 000000000..803416c66 --- /dev/null +++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,33 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]]) +{ + _9.baz = 0; + img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + img.write(img3.read(uint2(int2(0))), uint2(int2(0))); + _34.foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed); +} + diff --git a/reference/opt/shaders/frag/pixel-interlock-ordered.frag b/reference/opt/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 000000000..46cca96c6 --- /dev/null +++ b/reference/opt/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/reference/opt/shaders/frag/pixel-interlock-unordered.frag b/reference/opt/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 000000000..d60cd1451 --- /dev/null +++ b/reference/opt/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/reference/opt/shaders/frag/sample-interlock-ordered.frag b/reference/opt/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 000000000..67ca5560e --- /dev/null +++ b/reference/opt/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0])); + endInvocationInterlockARB(); +} + diff --git a/reference/opt/shaders/frag/sample-interlock-unordered.frag b/reference/opt/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 000000000..ea74397d6 --- /dev/null +++ b/reference/opt/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag new file mode 100644 index 000000000..3268995c8 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag @@ -0,0 +1,32 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RWByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _31 = int(gl_FragCoord.x); + _7.Store(_31 * 4 + 0, _7.Load(_31 * 4 + 0) + 1u); +} + +void callee() +{ + int _39 = int(gl_FragCoord.x); + _9.Store(_39 * 4 + 0, _9.Load(_39 * 4 + 0) + 1u); + callee2(); +} + +void frag_main() +{ + callee(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag new file mode 100644 index 000000000..692771215 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag @@ -0,0 +1,42 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RWByteAddressBuffer _13 : register(u2, space0); +RasterizerOrderedByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _44 = int(gl_FragCoord.x); + _7.Store(_44 * 4 + 0, _7.Load(_44 * 4 + 0) + 1u); +} + +void callee() +{ + int _52 = int(gl_FragCoord.x); + _9.Store(_52 * 4 + 0, _9.Load(_52 * 4 + 0) + 1u); + callee2(); + if (true) + { + } +} + +void _35() +{ + _13.Store(int(gl_FragCoord.x) * 4 + 0, 4u); +} + +void frag_main() +{ + callee(); + _35(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag new file mode 100644 index 000000000..bd963a74d --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag @@ -0,0 +1,42 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RasterizerOrderedByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _37 = int(gl_FragCoord.x); + _7.Store(_37 * 4 + 0, _7.Load(_37 * 4 + 0) + 1u); +} + +void callee() +{ + int _45 = int(gl_FragCoord.x); + _9.Store(_45 * 4 + 0, _9.Load(_45 * 4 + 0) + 1u); + callee2(); +} + +void _29() +{ +} + +void _31() +{ +} + +void frag_main() +{ + callee(); + _29(); + _31(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag new file mode 100644 index 000000000..55b71de21 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag @@ -0,0 +1,32 @@ +RasterizerOrderedByteAddressBuffer _14 : register(u1, space0); +RasterizerOrderedByteAddressBuffer _35 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _25 = int(gl_FragCoord.x); + _14.Store(_25 * 4 + 0, _14.Load(_25 * 4 + 0) + 1u); +} + +void callee() +{ + int _38 = int(gl_FragCoord.x); + _35.Store(_38 * 4 + 0, _35.Load(_38 * 4 + 0) + 1u); + callee2(); +} + +void frag_main() +{ + callee(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 000000000..8923f96a7 --- /dev/null +++ b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,24 @@ +RWByteAddressBuffer _9 : register(u6, space0); +globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0); +RasterizerOrderedByteAddressBuffer _52 : register(u4, space0); +RWTexture2D img4 : register(u5, space0); +RasterizerOrderedTexture2D img : register(u0, space0); +RasterizerOrderedTexture2D img3 : register(u2, space0); +RasterizerOrderedTexture2D img2 : register(u1, space0); + +void frag_main() +{ + _9.Store(0, uint(0)); + img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f); + img[int2(0, 0)] = img3[int2(0, 0)]; + uint _39; + InterlockedAdd(img2[int2(0, 0)], 1u, _39); + _42.Store(0, uint(int(_42.Load(0)) + 42)); + uint _55; + _42.InterlockedAnd(4, _52.Load(0), _55); +} + +void main() +{ + frag_main(); +} diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag new file mode 100644 index 000000000..1b6af2a36 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag @@ -0,0 +1,35 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _31 = int(gl_FragCoord.x); + v_7.values1[_31]++; +} + +inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _39 = int(gl_FragCoord.x); + v_9.values0[_39]++; + callee2(gl_FragCoord, v_7); +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag new file mode 100644 index 000000000..dded6a145 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct _12 +{ + uint _m0[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _44 = int(gl_FragCoord.x); + v_7.values1[_44]++; +} + +inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _52 = int(gl_FragCoord.x); + v_9.values0[_52]++; + callee2(gl_FragCoord, v_7); + if (true) + { + } +} + +inline void _35(thread float4& gl_FragCoord, device _12& v_13) +{ + v_13._m0[int(gl_FragCoord.x)] = 4u; +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device _12& v_13 [[buffer(1)]], device SSBO0& v_9 [[buffer(2), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); + _35(gl_FragCoord, v_13); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag new file mode 100644 index 000000000..5fe65f3b0 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag @@ -0,0 +1,45 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _37 = int(gl_FragCoord.x); + v_7.values1[_37]++; +} + +inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _45 = int(gl_FragCoord.x); + v_9.values0[_45]++; + callee2(gl_FragCoord, v_7); +} + +inline void _29() +{ +} + +inline void _31() +{ +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); + _29(); + _31(); +} + diff --git a/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag new file mode 100644 index 000000000..716ba251f --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag @@ -0,0 +1,35 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(device SSBO1& v_14, thread float4& gl_FragCoord) +{ + int _25 = int(gl_FragCoord.x); + v_14.values1[_25]++; +} + +inline void callee(device SSBO1& v_14, thread float4& gl_FragCoord, device SSBO0& v_35) +{ + int _38 = int(gl_FragCoord.x); + v_35.values0[_38]++; + callee2(v_14, gl_FragCoord); +} + +fragment void main0(device SSBO1& v_14 [[buffer(0), raster_order_group(0)]], device SSBO0& v_35 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(v_14, gl_FragCoord, v_35); +} + diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 000000000..f77b8ed99 --- /dev/null +++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,43 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +struct spvDescriptorSetBuffer0 +{ + device Buffer3* m_9 [[id(0)]]; + texture2d img4 [[id(1)]]; + texture2d img [[id(2), raster_order_group(0)]]; + texture2d img3 [[id(3), raster_order_group(0)]]; + volatile device Buffer* m_34 [[id(4), raster_order_group(0)]]; + device Buffer2* m_44 [[id(5), raster_order_group(0)]]; +}; + +fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + (*spvDescriptorSet0.m_9).baz = 0; + spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); + (*spvDescriptorSet0.m_34).foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed); +} + diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 000000000..803416c66 --- /dev/null +++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,33 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]]) +{ + _9.baz = 0; + img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + img.write(img3.read(uint2(int2(0))), uint2(int2(0))); + _34.foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed); +} + diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag new file mode 100644 index 000000000..948803c67 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag @@ -0,0 +1,39 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _31 = int(gl_FragCoord.x); + _7.values1[_31]++; +} + +void callee() +{ + int _39 = int(gl_FragCoord.x); + _9.values0[_39]++; + callee2(); +} + +void spvMainInterlockedBody() +{ + callee(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + beginInvocationInterlockARB(); + spvMainInterlockedBody(); + endInvocationInterlockARB(); +} diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag new file mode 100644 index 000000000..72dca0d7f --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag @@ -0,0 +1,53 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 2, std430) buffer _12_13 +{ + uint _m0[]; +} _13; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _44 = int(gl_FragCoord.x); + _7.values1[_44]++; +} + +void callee() +{ + int _52 = int(gl_FragCoord.x); + _9.values0[_52]++; + callee2(); + if (true) + { + } +} + +void _35() +{ + _13._m0[int(gl_FragCoord.x)] = 4u; +} + +void spvMainInterlockedBody() +{ + callee(); + _35(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + beginInvocationInterlockARB(); + spvMainInterlockedBody(); + endInvocationInterlockARB(); +} diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag new file mode 100644 index 000000000..b09eb6667 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag @@ -0,0 +1,49 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _37 = int(gl_FragCoord.x); + _7.values1[_37]++; +} + +void callee() +{ + int _45 = int(gl_FragCoord.x); + _9.values0[_45]++; + callee2(); +} + +void _29() +{ +} + +void _31() +{ +} + +void spvMainInterlockedBody() +{ + callee(); + _29(); + _31(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + beginInvocationInterlockARB(); + spvMainInterlockedBody(); + endInvocationInterlockARB(); +} diff --git a/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag new file mode 100644 index 000000000..151ed01d4 --- /dev/null +++ b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag @@ -0,0 +1,34 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _14; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _35; + +void callee2() +{ + int _25 = int(gl_FragCoord.x); + _14.values1[_25]++; +} + +void callee() +{ + int _38 = int(gl_FragCoord.x); + _35.values0[_38]++; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} + diff --git a/reference/shaders/frag/pixel-interlock-ordered.frag b/reference/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 000000000..46cca96c6 --- /dev/null +++ b/reference/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/reference/shaders/frag/pixel-interlock-unordered.frag b/reference/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 000000000..d60cd1451 --- /dev/null +++ b/reference/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/reference/shaders/frag/sample-interlock-ordered.frag b/reference/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 000000000..67ca5560e --- /dev/null +++ b/reference/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0])); + endInvocationInterlockARB(); +} + diff --git a/reference/shaders/frag/sample-interlock-unordered.frag b/reference/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 000000000..ea74397d6 --- /dev/null +++ b/reference/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag new file mode 100644 index 000000000..ebd8d6bab --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag new file mode 100644 index 000000000..69b8f9112 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag new file mode 100644 index 000000000..7c0fe9a2b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag new file mode 100644 index 000000000..59079fe58 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 000000000..ceac8cc50 --- /dev/null +++ b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag new file mode 100644 index 000000000..ebd8d6bab --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag new file mode 100644 index 000000000..69b8f9112 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag new file mode 100644 index 000000000..7c0fe9a2b --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag new file mode 100644 index 000000000..59079fe58 --- /dev/null +++ b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 000000000..04886a672 --- /dev/null +++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +//layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + //imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 000000000..04886a672 --- /dev/null +++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +//layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + //imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag new file mode 100644 index 000000000..ebd8d6bab --- /dev/null +++ b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag new file mode 100644 index 000000000..69b8f9112 --- /dev/null +++ b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag new file mode 100644 index 000000000..7c0fe9a2b --- /dev/null +++ b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag new file mode 100644 index 000000000..59079fe58 --- /dev/null +++ b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/pixel-interlock-ordered.frag b/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 000000000..4439f0672 --- /dev/null +++ b/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/pixel-interlock-unordered.frag b/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 000000000..f8fd468c1 --- /dev/null +++ b/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_unordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/sample-interlock-ordered.frag b/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 000000000..fa80dc9f8 --- /dev/null +++ b/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(sample_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, gl_SampleMaskIn[0]); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/sample-interlock-unordered.frag b/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 000000000..6fe5437f3 --- /dev/null +++ b/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(sample_interlock_unordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/spirv_cross.cpp b/spirv_cross.cpp index a83e14e21..c4913ecd6 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -1507,6 +1507,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const { handler.set_current_block(block); + handler.rearm_current_block(block); // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing @@ -1530,6 +1531,8 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand return false; if (!handler.end_function_scope(ops, i.length)) return false; + + handler.rearm_current_block(block); } } } @@ -3798,7 +3801,12 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin const CFG &Compiler::get_cfg_for_current_function() const { assert(current_function); - auto cfg_itr = function_cfgs.find(current_function->self); + return get_cfg_for_function(current_function->self); +} + +const CFG &Compiler::get_cfg_for_function(uint32_t id) const +{ + auto cfg_itr = function_cfgs.find(id); assert(cfg_itr != end(function_cfgs)); assert(cfg_itr->second); return *cfg_itr->second; @@ -4249,6 +4257,317 @@ void Compiler::analyze_non_block_pointer_types() sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); } +bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) +{ + if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) + { + if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) + { + // Most complex case, we have no sensible way of dealing with this + // other than taking the 100% conservative approach, exit early. + split_function_case = true; + return false; + } + else + { + interlock_function_id = call_stack.back(); + // If this call is performed inside control flow we have a problem. + auto &cfg = compiler.get_cfg_for_function(interlock_function_id); + + uint32_t from_block_id = compiler.get(interlock_function_id).entry_block; + bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); + if (!outside_control_flow) + control_flow_interlock = true; + } + } + return true; +} + +void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) +{ + current_block_id = block.self; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) +{ + call_stack.pop_back(); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + if (args[2] == interlock_function_id) + call_stack_is_interlocked = true; + + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) +{ + if (call_stack.back() == interlock_function_id) + call_stack_is_interlocked = false; + + call_stack.pop_back(); + return true; +} + +void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) +{ + if ((use_critical_section && in_crit_sec) || + (control_flow_interlock && call_stack_is_interlocked) || + split_function_case) + { + compiler.interlocked_resources.insert(id); + } +} + +bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Only care about critical section analysis if we have simple case. + if (use_critical_section) + { + if (opcode == OpBeginInvocationInterlockEXT) + { + in_crit_sec = true; + return true; + } + + if (opcode == OpEndInvocationInterlockEXT) + { + // End critical section--nothing more to do. + return false; + } + } + + // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniformConstant: + { + uint32_t result_type = args[0]; + uint32_t id = args[1]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassStorageBuffer) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + } + break; + } + + case OpImageTexelPointer: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + auto &e = compiler.set(id, "", result_type, true); + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + e.loaded_from = var->self; + break; + } + + case OpStore: + case OpImageWrite: + case OpAtomicStore: + { + if (length < 1) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t dst = args[0]; + uint32_t src = args[1]; + auto *dst_var = compiler.maybe_get_backing_variable(dst); + auto *src_var = compiler.maybe_get_backing_variable(src); + + if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) + access_potential_resource(dst_var->self); + + if (src_var) + { + if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) + break; + + if (src_var->storage == StorageClassUniform && + !compiler.has_decoration(compiler.get(src_var->basetype).self, DecorationBufferBlock)) + { + break; + } + + access_potential_resource(src_var->self); + } + + break; + } + + case OpImageRead: + case OpAtomicLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassUniformConstant: + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + default: + break; + } + + return true; +} + +void Compiler::analyze_interlocked_resource_usage() +{ + if (get_execution_model() == ExecutionModelFragment && + (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) + { + InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); + traverse_all_reachable_opcodes(get(ir.default_entry_point), prepass_handler); + + InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); + handler.interlock_function_id = prepass_handler.interlock_function_id; + handler.split_function_case = prepass_handler.split_function_case; + handler.control_flow_interlock = prepass_handler.control_flow_interlock; + handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; + + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. + interlocked_is_complex = !handler.use_critical_section || + handler.interlock_function_id != ir.default_entry_point; + } +} + bool Compiler::type_is_array_of_pointers(const SPIRType &type) const { if (!type.pointer) diff --git a/spirv_cross.hpp b/spirv_cross.hpp index 90cc99566..28ccba672 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -710,6 +710,13 @@ class Compiler { } + // Called after returning from a function or when entering a block, + // can be called multiple times per block, + // while set_current_block is only called on block entry. + virtual void rearm_current_block(const SPIRBlock &) + { + } + virtual bool begin_function_scope(const uint32_t *, uint32_t) { return true; @@ -884,10 +891,11 @@ class Compiler void build_function_control_flow_graphs_and_analyze(); std::unordered_map> function_cfgs; const CFG &get_cfg_for_current_function() const; + const CFG &get_cfg_for_function(uint32_t id) const; struct CFGBuilder : OpcodeHandler { - CFGBuilder(Compiler &compiler_); + explicit CFGBuilder(Compiler &compiler_); bool follow_function_call(const SPIRFunction &func) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; @@ -932,7 +940,7 @@ class Compiler struct PhysicalStorageBufferPointerHandler : OpcodeHandler { - PhysicalStorageBufferPointerHandler(Compiler &compiler_); + explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_); bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; Compiler &compiler; std::unordered_set types; @@ -945,6 +953,61 @@ class Compiler bool single_function); bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var); + // Finds all resources that are written to from inside the critical section, if present. + // The critical section is delimited by OpBeginInvocationInterlockEXT and + // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written + // while inside the critical section must be placed in a raster order group. + struct InterlockedResourceAccessHandler : OpcodeHandler + { + InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool in_crit_sec = false; + + uint32_t interlock_function_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + bool use_critical_section = false; + bool call_stack_is_interlocked = false; + SmallVector call_stack; + + void access_potential_resource(uint32_t id); + }; + + struct InterlockedResourceAccessPrepassHandler : OpcodeHandler + { + InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + void rearm_current_block(const SPIRBlock &block) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t interlock_function_id = 0; + uint32_t current_block_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + SmallVector call_stack; + }; + + void analyze_interlocked_resource_usage(); + // The set of all resources written while inside the critical section, if present. + std::unordered_set interlocked_resources; + bool interlocked_is_complex = false; + void make_constant_null(uint32_t id, uint32_t type); std::unordered_map declared_block_names; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 3619f09cb..a8f7a8670 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -511,6 +511,7 @@ string CompilerGLSL::compile() fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. @@ -535,6 +536,25 @@ string CompilerGLSL::compile() pass_count++; } while (is_forcing_recompilation()); + // Implement the interlocked wrapper function at the end. + // The body was implemented in lieu of main(). + if (interlocked_is_complex) + { + statement("void main()"); + begin_scope(); + statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + statement("spvMainInterlockedBody();"); + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + end_scope(); + } + // Entry point in GLSL is always main(). get_entry_point().name = "main"; @@ -605,6 +625,26 @@ void CompilerGLSL::emit_header() if (execution.flags.get(ExecutionModePostDepthCoverage)) require_extension_internal("GL_ARB_post_depth_coverage"); + // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || + execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + { + if (options.es) + { + if (options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); + require_extension_internal("GL_NV_fragment_shader_interlock"); + } + else + { + if (options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + require_extension_internal("GL_ARB_fragment_shader_interlock"); + } + } + for (auto &ext : forced_extensions) { if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") @@ -784,6 +824,15 @@ void CompilerGLSL::emit_header() if (execution.flags.get(ExecutionModePostDepthCoverage)) inputs.push_back("post_depth_coverage"); + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) + inputs.push_back("pixel_interlock_ordered"); + else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) + inputs.push_back("pixel_interlock_unordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) + inputs.push_back("sample_interlock_ordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + inputs.push_back("sample_interlock_unordered"); + if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) statement("layout(depth_greater) out float gl_FragDepth;"); else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) @@ -10109,6 +10158,34 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) emit_op(ops[0], ops[1], "helperInvocationEXT()", false); break; + case OpBeginInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + case OpEndInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + default: statement("// unimplemented op ", instruction.op); break; @@ -11022,7 +11099,13 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret if (func.self == ir.default_entry_point) { - decl += "main"; + // If we need complex fallback in GLSL, we just wrap main() in a function + // and interlock the entire shader ... + if (interlocked_is_complex) + decl += "spvMainInterlockedBody"; + else + decl += "main"; + processing_entry_point = true; } else diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 9658d1192..0df890656 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) } } -string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) +string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) { auto &imagetype = get(type.image.type); const char *dim = nullptr; @@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) + { + if (interlocked_resources.count(id)) + return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), + ">"); return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">"); + } else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; const char *rw = typed_load ? "RW" : ""; + if (typed_load && interlocked_resources.count(id)) + rw = "RasterizerOrdered"; return join(rw, "Texture", dim, ms, arrayed, "<", typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : join(type_to_glsl(imagetype), components), @@ -1848,9 +1855,13 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) Bitset flags = ir.get_buffer_block_flags(var); bool is_readonly = flags.get(DecorationNonWritable); bool is_coherent = flags.get(DecorationCoherent); + bool is_interlocked = interlocked_resources.count(var.self) > 0; + const char *type_name = "ByteAddressBuffer "; + if (!is_readonly) + type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer "; add_resource_name(var.self); - statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ", - to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); } else { @@ -4673,6 +4684,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpIsHelperInvocationEXT: SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL."); + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); + break; // Nothing to do in the body + default: CompilerGLSL::emit_instruction(instruction); break; @@ -4850,6 +4867,7 @@ string CompilerHLSL::compile() validate_shader_model(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); // Subpass input needs SV_Position. if (need_subpass_input) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index f4ba6a409..881995220 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -852,6 +852,7 @@ string CompilerMSL::compile() update_active_builtins(); analyze_image_and_sampler_usage(); analyze_sampled_image_usage(); + analyze_interlocked_resource_usage(); preprocess_op_codes(); build_implicit_builtins(); @@ -5541,6 +5542,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) emit_op(ops[0], ops[1], "simd_is_helper_thread()", false); break; + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Raster order groups require MSL 2.0."); + break; // Nothing to do in the body + default: CompilerGLSL::emit_instruction(instruction); break; @@ -7436,8 +7443,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in bool is_builtin = is_member_builtin(type, index, &builtin); if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) - return join(" [[id(", - get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]"); + { + string quals = join( + " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")"); + if (interlocked_resources.count( + get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))) + quals += ", raster_order_group(0)"; + quals += "]]"; + return quals; + } // Vertex function inputs if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) @@ -8239,7 +8253,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += ", "; ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) + r.name + "_" + convert_to_string(i); - ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } } else @@ -8248,7 +8265,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += ", "; ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } break; } @@ -8264,7 +8284,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += image_type_glsl(type, var_id) + " " + r.name; if (r.plane > 0) ep_args += join(plane_name_suffix, r.plane); - ep_args += " [[texture(" + convert_to_string(r.index) + ")]]"; + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; default: if (!ep_args.empty()) @@ -8274,7 +8297,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) type_to_glsl(type, var_id) + "& " + r.name; else ep_args += type_to_glsl(type, var_id) + " " + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; } }