Merge pull request #1284 from KhronosGroup/fix-1282

MSL: Reintroduce workarounds for arrays not being value types
KhronosGroup · Feb 24, 2020 · c5f7b55 · c5f7b55
2 parents f19fdb9 + 16796e9
commit c5f7b55
Show file tree

Hide file tree

Showing 20 changed files with 881 additions and 32 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -323,7 +323,7 @@ if (SPIRV_CROSS_STATIC)
 endif()
 
 set(spirv-cross-abi-major 0)
-set(spirv-cross-abi-minor 24)
+set(spirv-cross-abi-minor 25)
 set(spirv-cross-abi-patch 0)
 
 if (SPIRV_CROSS_SHARED)

diff --git a/main.cpp b/main.cpp
@@ -522,6 +522,7 @@ struct CLIArguments
 	bool msl_dispatch_base = false;
 	bool msl_decoration_binding = false;
 	bool msl_force_active_argument_buffer_resources = false;
+	bool msl_force_native_arrays = false;
 	bool glsl_emit_push_constant_as_ubo = false;
 	bool glsl_emit_ubo_as_plain_uniforms = false;
 	bool vulkan_glsl_disable_ext_samplerless_texture_functions = false;
@@ -616,6 +617,7 @@ static void print_help()
 	                "\t[--msl-inline-uniform-block <set index> <binding>]\n"
 	                "\t[--msl-decoration-binding]\n"
 	                "\t[--msl-force-active-argument-buffer-resources]\n"
+	                "\t[--msl-force-native-arrays]\n"
 	                "\t[--hlsl]\n"
 	                "\t[--reflect]\n"
 	                "\t[--shader-model]\n"
@@ -806,6 +808,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 		msl_opts.dispatch_base = args.msl_dispatch_base;
 		msl_opts.enable_decoration_binding = args.msl_decoration_binding;
 		msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources;
+		msl_opts.force_native_arrays = args.msl_force_native_arrays;
 		msl_comp->set_msl_options(msl_opts);
 		for (auto &v : args.msl_discrete_descriptor_sets)
 			msl_comp->add_discrete_descriptor_set(v);
@@ -1164,6 +1167,9 @@ static int main_inner(int argc, char *argv[])
 		uint32_t binding = parser.next_uint();
 		args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding));
 	});
+	cbs.add("--msl-force-native-arrays", [&args](CLIParser &) {
+		args.msl_force_native_arrays = true;
+	});
 	cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
 		auto old_name = parser.next_string();

diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp
@@ -0,0 +1,94 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Data
+{
+    float a;
+    float b;
+};
+
+constant float X_tmp [[function_constant(0)]];
+constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0;
+
+struct Data_1
+{
+    float a;
+    float b;
+};
+
+struct SSBO
+{
+    Data_1 outdata[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u);
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+{
+    Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
+
+    Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } };
+    Data data2[2];
+    spvArrayCopyFromStackToStack1(data2, _31);
+    _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a;
+    _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b;
+}
+
diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct BUF
+{
+    int a;
+    float b;
+    float c;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device BUF& o [[buffer(0)]])
+{
+    o.a = 4;
+    o.b = o.c;
+}
+
diff --git a/reference/opt/shaders-msl/vert/return-array.force-native-array.vert b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vInput1 [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = float4(10.0) + in.vInput1;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert
@@ -0,0 +1,103 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    int Index1 [[attribute(0)]];
+    int Index2 [[attribute(1)]];
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+static inline __attribute__((always_inline))
+float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+{
+    float4 indexable[4];
+    spvArrayCopyFromStackToStack1(indexable, positions);
+    float4 indexable_1[4];
+    spvArrayCopyFromStackToStack1(indexable_1, positions2);
+    return indexable[Index1] + indexable_1[Index2];
+}
+
+static inline __attribute__((always_inline))
+float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+{
+    return consume_constant_arrays2(positions, positions2, Index1, Index2);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+    main0_out out = {};
+    float4 LUT2[4];
+    LUT2[0] = float4(10.0);
+    LUT2[1] = float4(11.0);
+    LUT2[2] = float4(12.0);
+    LUT2[3] = float4(13.0);
+    out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2);
+    return out;
+}
+
diff --git a/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp
@@ -0,0 +1,104 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Data
+{
+    float a;
+    float b;
+};
+
+constant float X_tmp [[function_constant(0)]];
+constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0;
+
+struct Data_1
+{
+    float a;
+    float b;
+};
+
+struct SSBO
+{
+    Data_1 outdata[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u);
+
+constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+static inline __attribute__((always_inline))
+Data combine(thread const Data& a, thread const Data& b)
+{
+    return Data{ a.a + b.a, a.b + b.b };
+}
+
+kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+{
+    Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
+    Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } };
+    Data data2[2];
+    spvArrayCopyFromStackToStack1(data2, _31);
+    Data param = data[gl_LocalInvocationID.x];
+    Data param_1 = data2[gl_LocalInvocationID.x];
+    Data _73 = combine(param, param_1);
+    _53.outdata[gl_WorkGroupID.x].a = _73.a;
+    _53.outdata[gl_WorkGroupID.x].b = _73.b;
+}
+