Skip to content

Commit

Permalink
Bugfix/EdgeCarryShift mumax/3#339
Browse files Browse the repository at this point in the history
  • Loading branch information
MathieuMoalic committed Oct 18, 2024
1 parent 5a04a0e commit 2ae38f6
Show file tree
Hide file tree
Showing 9 changed files with 562 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/cuda/cuda2go.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ func wrapgen(filename, funcname string, argt, argn []string) {
// find corresponding .PTX files
if ls == nil {
dir, errd := os.Open(".")
defer dir.Close()
log.Log.PanicIfError(errd)
defer dir.Close()
var errls error
ls, errls = dir.Readdirnames(-1)
log.Log.PanicIfError(errls)
Expand Down
30 changes: 30 additions & 0 deletions src/cuda/shift.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,21 @@ func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32) {
k_shiftx_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftX, clampL, clampR, cfg)
}

// Shifts a component `src` of a vector field by `shiftX` cells along the X-axis.
// Unlike the normal `shift()`, the new edge value is the current edge value.
//
// To avoid the situation where the magnetization could be set to (0,0,0) within the geometry, it is
// also required to pass the two other vector components `othercomp` and `anothercomp` to this function.
// In cells where the vector (`src`, `othercomp`, `anothercomp`) is the zero-vector,
// `clampL` or `clampR` is used for the component `src` instead.
func ShiftEdgeCarryX(dst, src, othercomp, anothercomp *data.Slice, shiftX int, clampL, clampR float32) {
log.AssertMsg(dst.NComp() == 1 && src.NComp() == 1 && othercomp.NComp() == 1 && anothercomp.NComp() == 1, "Component mismatch: dst, src, othercomp and anothercomp must all have 1 component in ShiftEdgeCarryX")
log.AssertMsg(dst.Len() == src.Len(), "Length mismatch: dst and src must have the same length in ShiftEdgeCarryX")
N := dst.Size()
cfg := make3DConf(N)
k_shiftedgecarryX_async(dst.DevPtr(0), src.DevPtr(0), othercomp.DevPtr(0), anothercomp.DevPtr(0), N[X], N[Y], N[Z], shiftX, clampL, clampR, cfg)
}

func ShiftY(dst, src *data.Slice, shiftY int, clampL, clampR float32) {
log.AssertMsg(dst.NComp() == 1 && src.NComp() == 1, "Component mismatch: dst and src must both have 1 component in ShiftY")
log.AssertMsg(dst.Len() == src.Len(), "Length mismatch: dst and src must have the same length in ShiftY")
Expand All @@ -23,6 +38,21 @@ func ShiftY(dst, src *data.Slice, shiftY int, clampL, clampR float32) {
k_shifty_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftY, clampL, clampR, cfg)
}

// Shifts a component `src` of a vector field by `shiftY` cells along the Y-axis.
// Unlike the normal `shift()`, the new edge value is the current edge value.
//
// To avoid the situation where the magnetization could be set to (0,0,0) within the geometry, it is
// also required to pass the two other vector components `othercomp` and `anothercomp` to this function.
// In cells where the vector (`src`, `othercomp`, `anothercomp`) is the zero-vector,
// `clampD` or `clampU` is used for the component `src` instead.
func ShiftEdgeCarry(dst, src, othercomp, anothercomp *data.Slice, shiftY int, clampL, clampR float32) {
log.AssertMsg(dst.NComp() == 1 && src.NComp() == 1 && othercomp.NComp() == 1 && anothercomp.NComp() == 1, "Component mismatch: dst, src, othercomp and anothercomp must all have 1 component in ShiftEdgeCarry")
log.AssertMsg(dst.Len() == src.Len(), "Length mismatch: dst and src must have the same length in ShiftEdgeCarry")
N := dst.Size()
cfg := make3DConf(N)
k_shiftedgecarryY_async(dst.DevPtr(0), src.DevPtr(0), othercomp.DevPtr(0), anothercomp.DevPtr(0), N[X], N[Y], N[Z], shiftY, clampL, clampR, cfg)
}

func ShiftZ(dst, src *data.Slice, shiftZ int, clampL, clampR float32) {
log.AssertMsg(dst.NComp() == 1 && src.NComp() == 1, "Component mismatch: dst and src must both have 1 component in ShiftZ")
log.AssertMsg(dst.Len() == src.Len(), "Length mismatch: dst and src must have the same length in ShiftZ")
Expand Down
37 changes: 37 additions & 0 deletions src/cuda/shiftedgecarryx.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include "stencil.h"

// Shifts a component `src` of a vector field by `shx` cells along the X-axis.
// Unlike the normal `shiftx()`, the new edge value is the current edge value.
//
// To avoid the situation where the magnetization could be set to (0,0,0) within the geometry, it is
// also required to pass the two other vector components `othercomp` and `anothercomp` to this function.
// In cells where the vector (`src`, `othercomp`, `anothercomp`) is the zero-vector,
// `clampL` or `clampR` is used for the component `src` instead.
extern "C" __global__ void
shiftedgecarryX(float* __restrict__ dst, float* __restrict__ src,
float* __restrict__ othercomp, float* __restrict__ anothercomp,
int Nx, int Ny, int Nz, int shx, float clampL, float clampR) {

int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int iz = blockIdx.z * blockDim.z + threadIdx.z;

if(ix < Nx && iy < Ny && iz < Nz) {
int ix2 = ix-shx; // old X-index
float newval;
if (ix2 < 0) { // left edge (shifting right)
newval = src[idx(0, iy, iz)];
if (newval == 0 && othercomp[idx(0, iy, iz)] == 0 && anothercomp[idx(0, iy, iz)] == 0) { // If zero-vector
newval = clampL;
}
} else if (ix2 >= Nx) { // right edge (shifting left)
newval = src[idx(Nx-1, iy, iz)];
if (newval == 0 && othercomp[idx(Nx-1, iy, iz)] == 0 && anothercomp[idx(Nx-1, iy, iz)] == 0) { // If zero-vector
newval = clampR;
}
} else { // bulk, doesn't matter which way the shift is
newval = src[idx(ix2, iy, iz)];
}
dst[idx(ix, iy, iz)] = newval;
}
}
219 changes: 219 additions & 0 deletions src/cuda/shiftedgecarryx_wrapper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
package cuda

/*
THIS FILE IS AUTO-GENERATED BY CUDA2GO.
EDITING IS FUTILE.
*/

import(
"unsafe"
"github.com/MathieuMoalic/amumax/src/cuda/cu"
"github.com/MathieuMoalic/amumax/src/timer"
"sync"
)

// CUDA handle for shiftedgecarryX kernel
var shiftedgecarryX_code cu.Function

// Stores the arguments for shiftedgecarryX kernel invocation
type shiftedgecarryX_args_t struct{
arg_dst unsafe.Pointer
arg_src unsafe.Pointer
arg_othercomp unsafe.Pointer
arg_anothercomp unsafe.Pointer
arg_Nx int
arg_Ny int
arg_Nz int
arg_shx int
arg_clampL float32
arg_clampR float32
argptr [10]unsafe.Pointer
sync.Mutex
}

// Stores the arguments for shiftedgecarryX kernel invocation
var shiftedgecarryX_args shiftedgecarryX_args_t

func init(){
// CUDA driver kernel call wants pointers to arguments, set them up once.
shiftedgecarryX_args.argptr[0] = unsafe.Pointer(&shiftedgecarryX_args.arg_dst)
shiftedgecarryX_args.argptr[1] = unsafe.Pointer(&shiftedgecarryX_args.arg_src)
shiftedgecarryX_args.argptr[2] = unsafe.Pointer(&shiftedgecarryX_args.arg_othercomp)
shiftedgecarryX_args.argptr[3] = unsafe.Pointer(&shiftedgecarryX_args.arg_anothercomp)
shiftedgecarryX_args.argptr[4] = unsafe.Pointer(&shiftedgecarryX_args.arg_Nx)
shiftedgecarryX_args.argptr[5] = unsafe.Pointer(&shiftedgecarryX_args.arg_Ny)
shiftedgecarryX_args.argptr[6] = unsafe.Pointer(&shiftedgecarryX_args.arg_Nz)
shiftedgecarryX_args.argptr[7] = unsafe.Pointer(&shiftedgecarryX_args.arg_shx)
shiftedgecarryX_args.argptr[8] = unsafe.Pointer(&shiftedgecarryX_args.arg_clampL)
shiftedgecarryX_args.argptr[9] = unsafe.Pointer(&shiftedgecarryX_args.arg_clampR)
}

// Wrapper for shiftedgecarryX CUDA kernel, asynchronous.
func k_shiftedgecarryX_async ( dst unsafe.Pointer, src unsafe.Pointer, othercomp unsafe.Pointer, anothercomp unsafe.Pointer, Nx int, Ny int, Nz int, shx int, clampL float32, clampR float32, cfg *config) {
if Synchronous{ // debug
Sync()
timer.Start("shiftedgecarryX")
}

shiftedgecarryX_args.Lock()
defer shiftedgecarryX_args.Unlock()

if shiftedgecarryX_code == 0{
shiftedgecarryX_code = fatbinLoad(shiftedgecarryX_map, "shiftedgecarryX")
}

shiftedgecarryX_args.arg_dst = dst
shiftedgecarryX_args.arg_src = src
shiftedgecarryX_args.arg_othercomp = othercomp
shiftedgecarryX_args.arg_anothercomp = anothercomp
shiftedgecarryX_args.arg_Nx = Nx
shiftedgecarryX_args.arg_Ny = Ny
shiftedgecarryX_args.arg_Nz = Nz
shiftedgecarryX_args.arg_shx = shx
shiftedgecarryX_args.arg_clampL = clampL
shiftedgecarryX_args.arg_clampR = clampR


args := shiftedgecarryX_args.argptr[:]
cu.LaunchKernel(shiftedgecarryX_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)

if Synchronous{ // debug
Sync()
timer.Stop("shiftedgecarryX")
}
}

// maps compute capability on PTX code for shiftedgecarryX kernel.
var shiftedgecarryX_map = map[int]string{ 0: "" ,
52: shiftedgecarryX_ptx_52 }

// shiftedgecarryX PTX code for various compute capabilities.
const(
shiftedgecarryX_ptx_52 = `
.version 7.0
.target sm_52
.address_size 64
// .globl shiftedgecarryX
.visible .entry shiftedgecarryX(
.param .u64 shiftedgecarryX_param_0,
.param .u64 shiftedgecarryX_param_1,
.param .u64 shiftedgecarryX_param_2,
.param .u64 shiftedgecarryX_param_3,
.param .u32 shiftedgecarryX_param_4,
.param .u32 shiftedgecarryX_param_5,
.param .u32 shiftedgecarryX_param_6,
.param .u32 shiftedgecarryX_param_7,
.param .f32 shiftedgecarryX_param_8,
.param .f32 shiftedgecarryX_param_9
)
{
.reg .pred %p<14>;
.reg .f32 %f<14>;
.reg .b32 %r<24>;
.reg .b64 %rd<25>;
ld.param.u64 %rd4, [shiftedgecarryX_param_0];
ld.param.u64 %rd5, [shiftedgecarryX_param_1];
ld.param.u64 %rd6, [shiftedgecarryX_param_2];
ld.param.u64 %rd7, [shiftedgecarryX_param_3];
ld.param.u32 %r7, [shiftedgecarryX_param_4];
ld.param.u32 %r8, [shiftedgecarryX_param_5];
ld.param.u32 %r10, [shiftedgecarryX_param_6];
ld.param.u32 %r9, [shiftedgecarryX_param_7];
ld.param.f32 %f7, [shiftedgecarryX_param_8];
ld.param.f32 %f8, [shiftedgecarryX_param_9];
cvta.to.global.u64 %rd1, %rd7;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
mov.u32 %r11, %ntid.x;
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r1, %r11, %r12, %r13;
mov.u32 %r14, %ntid.y;
mov.u32 %r15, %ctaid.y;
mov.u32 %r16, %tid.y;
mad.lo.s32 %r2, %r14, %r15, %r16;
mov.u32 %r17, %ntid.z;
mov.u32 %r18, %ctaid.z;
mov.u32 %r19, %tid.z;
mad.lo.s32 %r3, %r17, %r18, %r19;
setp.lt.s32 %p1, %r1, %r7;
setp.lt.s32 %p2, %r2, %r8;
and.pred %p3, %p1, %p2;
setp.lt.s32 %p4, %r3, %r10;
and.pred %p5, %p3, %p4;
@!%p5 bra BB0_11;
bra.uni BB0_1;
BB0_1:
sub.s32 %r4, %r1, %r9;
setp.lt.s32 %p6, %r4, 0;
mad.lo.s32 %r20, %r3, %r8, %r2;
mul.lo.s32 %r5, %r20, %r7;
@%p6 bra BB0_7;
setp.lt.s32 %p7, %r4, %r7;
@%p7 bra BB0_6;
bra.uni BB0_3;
BB0_6:
add.s32 %r22, %r5, %r4;
mul.wide.s32 %rd14, %r22, 4;
add.s64 %rd15, %rd3, %rd14;
ld.global.nc.f32 %f13, [%rd15];
bra.uni BB0_10;
BB0_7:
mul.wide.s32 %rd16, %r5, 4;
add.s64 %rd17, %rd3, %rd16;
ld.global.nc.f32 %f13, [%rd17];
setp.neu.f32 %p11, %f13, 0f00000000;
@%p11 bra BB0_10;
add.s64 %rd19, %rd2, %rd16;
ld.global.nc.f32 %f11, [%rd19];
setp.neu.f32 %p12, %f11, 0f00000000;
@%p12 bra BB0_10;
add.s64 %rd21, %rd1, %rd16;
ld.global.nc.f32 %f12, [%rd21];
setp.eq.f32 %p13, %f12, 0f00000000;
selp.f32 %f13, %f7, %f13, %p13;
bra.uni BB0_10;
BB0_3:
add.s32 %r21, %r7, %r5;
add.s32 %r6, %r21, -1;
mul.wide.s32 %rd8, %r6, 4;
add.s64 %rd9, %rd3, %rd8;
ld.global.nc.f32 %f13, [%rd9];
setp.neu.f32 %p8, %f13, 0f00000000;
@%p8 bra BB0_10;
add.s64 %rd11, %rd2, %rd8;
ld.global.nc.f32 %f9, [%rd11];
setp.neu.f32 %p9, %f9, 0f00000000;
@%p9 bra BB0_10;
add.s64 %rd13, %rd1, %rd8;
ld.global.nc.f32 %f10, [%rd13];
setp.eq.f32 %p10, %f10, 0f00000000;
selp.f32 %f13, %f8, %f13, %p10;
BB0_10:
cvta.to.global.u64 %rd22, %rd4;
add.s32 %r23, %r5, %r1;
mul.wide.s32 %rd23, %r23, 4;
add.s64 %rd24, %rd22, %rd23;
st.global.f32 [%rd24], %f13;
BB0_11:
ret;
}
`
)
37 changes: 37 additions & 0 deletions src/cuda/shiftedgecarryy.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include "stencil.h"

// Shifts a component `src` of a vector field by `shy` cells along the Y-axis.
// Unlike the normal `shifty()`, the new edge value is the current edge value.
//
// To avoid the situation where the magnetization could be set to (0,0,0) within the geometry, it is
// also required to pass the two other vector components `othercomp` and `anothercomp` to this function.
// In cells where the vector (`src`, `othercomp`, `anothercomp`) is the zero-vector,
// `clampD` or `clampU` is used for the component `src` instead.
extern "C" __global__ void
shiftedgecarryY(float* __restrict__ dst, float* __restrict__ src,
float* __restrict__ othercomp, float* __restrict__ anothercomp,
int Nx, int Ny, int Nz, int shy, float clampD, float clampU) {

int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int iz = blockIdx.z * blockDim.z + threadIdx.z;

if(ix < Nx && iy < Ny && iz < Nz) {
int iy2 = iy-shy; // old Y-index
float newval;
if (iy2 < 0) { // bottom edge (shifting up)
newval = src[idx(ix, 0, iz)];
if (newval == 0 && othercomp[idx(ix, 0, iz)] == 0 && anothercomp[idx(ix, 0, iz)] == 0) { // If zero-vector
newval = clampD;
}
} else if (iy2 >= Ny) { // top edge (shifting down)
newval = src[idx(ix, Ny-1, iz)];
if (newval == 0 && othercomp[idx(ix, Ny-1, iz)] == 0 && anothercomp[idx(ix, Ny-1, iz)] == 0) { // If zero-vector
newval = clampU;
}
} else { // bulk, doesn't matter which way the shift is
newval = src[idx(ix, iy2, iz)];
}
dst[idx(ix, iy, iz)] = newval;
}
}
Loading

0 comments on commit 2ae38f6

Please sign in to comment.