diff --git a/Src/ILGPU.Tests/SharedMemory.cs b/Src/ILGPU.Tests/SharedMemory.cs index 79548664b..e632f1992 100644 --- a/Src/ILGPU.Tests/SharedMemory.cs +++ b/Src/ILGPU.Tests/SharedMemory.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2021 ILGPU Project +// Copyright (c) 2021-2022 ILGPU Project // www.ilgpu.net // // File: SharedMemory.cs @@ -245,13 +245,79 @@ public void MultiDimensionalSharedMemory2D() var expected = Enumerable.Repeat(42, groupSize).ToArray(); Verify(buffer.View, expected); } + + internal static void MultiDimensionalSharedMemoryKernel2DDenseX( + ArrayView1D output) + { + var sharedMemory = ILGPU.SharedMemory.Allocate2DDenseX( + new Index2D(20, 100)); + if (Group.IsFirstThread) { + sharedMemory[0, 0] = 0; + sharedMemory[1, 0] = 0; + } + if (Grid.GlobalIndex.X < 100) { + sharedMemory[0, Grid.GlobalIndex.X] = Grid.GlobalIndex.X; + sharedMemory[1, Grid.GlobalIndex.X] = 7*Grid.GlobalIndex.X; + } + Group.Barrier(); + if (Grid.GlobalIndex.X < 100) + output[Grid.GlobalIndex.X] = sharedMemory[1, Grid.GlobalIndex.X]; + else + output[Grid.GlobalIndex.X] = sharedMemory[0, 0]; + } + + [Fact] + [KernelMethod(nameof(MultiDimensionalSharedMemoryKernel2DDenseX))] + public void MultiDimensionalSharedMemory2DDenseX() + { + int groupSize = Accelerator.MaxNumThreadsPerGroup; + using var buffer = Accelerator.Allocate1D(groupSize); + Execute(new KernelConfig(1, groupSize), buffer.View); + var expected = + Enumerable.Range(0, groupSize).Select( + x => x < 100 ? 7*x : 0).ToArray(); + Verify(buffer.View, expected); + } + + internal static void MultiDimensionalSharedMemoryKernel2DDenseY( + ArrayView1D output) + { + var sharedMemory = ILGPU.SharedMemory.Allocate2DDenseY( + new Index2D(100, 20)); + if (Group.IsFirstThread) { + sharedMemory[0, 0] = 0; + sharedMemory[0, 1] = 0; + } + if (Grid.GlobalIndex.X < 100) { + sharedMemory[Grid.GlobalIndex.X, 0] = Grid.GlobalIndex.X; + sharedMemory[Grid.GlobalIndex.X, 1] = 7*Grid.GlobalIndex.X; + } + Group.Barrier(); + if (Grid.GlobalIndex.X < 100) + output[Grid.GlobalIndex.X] = sharedMemory[Grid.GlobalIndex.X, 1]; + else + output[Grid.GlobalIndex.X] = sharedMemory[0, 0]; + } + + [Fact] + [KernelMethod(nameof(MultiDimensionalSharedMemoryKernel2DDenseY))] + public void MultiDimensionalSharedMemory2DDenseY() + { + int groupSize = Accelerator.MaxNumThreadsPerGroup; + using var buffer = Accelerator.Allocate1D(groupSize); + Execute(new KernelConfig(1, groupSize), buffer.View); + var expected = + Enumerable.Range(0, groupSize).Select( + x => x < 100 ? 7*x : 0).ToArray(); + Verify(buffer.View, expected); + } internal static void MultiDimensionalSharedMemoryKernel3D( ArrayView1D output) { var sharedMemory = ILGPU.SharedMemory.Allocate3D( new Index3D(5, 7, 3), - new Stride3D.DenseZY(5 * 7, 7)); + new Stride3D.DenseZY(3 * 7, 3)); if (Group.IsFirstThread) sharedMemory[2, 6, 1] = 42; Group.Barrier(); @@ -269,5 +335,51 @@ public void MultiDimensionalSharedMemory3D() var expected = Enumerable.Repeat(42, groupSize).ToArray(); Verify(buffer.View, expected); } + + internal static void MultiDimensionalSharedMemoryKernel3DDenseXY( + ArrayView1D output) + { + var sharedMemory = ILGPU.SharedMemory.Allocate3DDenseXY( + new Index3D(11, 17, 13)); + if (Group.IsFirstThread) + sharedMemory[4, 5, 2] = 42; + Group.Barrier(); + output[Grid.GlobalIndex.X] = sharedMemory[4, 5, 2]; + } + + [Fact] + [KernelMethod(nameof(MultiDimensionalSharedMemoryKernel3DDenseXY))] + public void MultiDimensionalSharedMemory3DDenseXY() + { + int groupSize = Accelerator.MaxNumThreadsPerGroup; + using var buffer = Accelerator.Allocate1D(groupSize); + Execute(new KernelConfig(1, groupSize), buffer.View); + + var expected = Enumerable.Repeat(42, groupSize).ToArray(); + Verify(buffer.View, expected); + } + + internal static void MultiDimensionalSharedMemoryKernel3DDenseZY( + ArrayView1D output) + { + var sharedMemory = ILGPU.SharedMemory.Allocate3DDenseZY( + new Index3D(11, 17, 13)); + if (Group.IsFirstThread) + sharedMemory[4, 5, 2] = 42; + Group.Barrier(); + output[Grid.GlobalIndex.X] = sharedMemory[4, 5, 2]; + } + + [Fact] + [KernelMethod(nameof(MultiDimensionalSharedMemoryKernel3DDenseZY))] + public void MultiDimensionalSharedMemory3DDenseZY() + { + int groupSize = Accelerator.MaxNumThreadsPerGroup; + using var buffer = Accelerator.Allocate1D(groupSize); + Execute(new KernelConfig(1, groupSize), buffer.View); + + var expected = Enumerable.Repeat(42, groupSize).ToArray(); + Verify(buffer.View, expected); + } } -} +} \ No newline at end of file diff --git a/Src/ILGPU/Memory.tt b/Src/ILGPU/Memory.tt index 3fbb618bd..01f994322 100644 --- a/Src/ILGPU/Memory.tt +++ b/Src/ILGPU/Memory.tt @@ -86,4 +86,65 @@ namespace ILGPU <# } #> <# } #> + + +<# +var axes = new string[] +{ + "X", + "Y", +}; +#> + +<# foreach (var axis in axes) { #> + partial class SharedMemory + { + /// + /// Allocates a 2D chunk of shared memory with <#= axis #> + /// as the leading dimension. + /// + /// The element type. + /// The number of elements to allocate. + /// An allocated 2D buffer on shared memory. + public static ArrayView2D> + Allocate2DDense<#= axis #>( + in Index2D extent) + where T : unmanaged => + Allocate2D> ( + extent, + new Stride2D.Dense<#= axis #>(extent.<#= axis #>)); + } +<# } #> + +<# +var twoaxes = new (string, string, string)[] +{ + ("XY", "extent.X", "extent.X * extent.Y"), + ("ZY", "extent.Z * extent.Y", "extent.Z") +}; +#> + +<# foreach (var (axis, axis1, axis2) in twoaxes) { #> + partial class SharedMemory + { + /// + /// Allocates a 3D chunk of shared memory with <#= axis #> + /// as the leading dimensions. + /// + /// The element type. + /// The number of elements to allocate. + /// An allocated 3D buffer on shared memory. + /// + /// Since <#= axis #> are the leading dimension, combined dimension + /// (multiplied sizes) must be less or equal to . + /// + public static ArrayView3D> + Allocate3DDense<#= axis #>( + in Index3D extent) + where T : unmanaged => + Allocate3D> ( + extent, + new Stride3D.Dense<#= axis #>(<#= axis1 #>, <#= axis2 #>)); + } +<# } #> } \ No newline at end of file