mumax · godsic · Aug 5, 2020 · Aug 3, 2020
diff --git a/cuda/slonczewski.go b/cuda/slonczewski.go
@@ -1,17 +1,15 @@
 package cuda
 
 import (
-	"unsafe"
-
 	"github.com/mumax/3/data"
 )
 
 // Add Slonczewski ST torque to torque (Tesla).
 // see slonczewski.cu
-func AddSlonczewskiTorque2(torque, m *data.Slice, Msat, J, fixedP, alpha, pol, λ, ε_prime MSlice, flp float64, mesh *data.Mesh) {
+func AddSlonczewskiTorque2(torque, m *data.Slice, Msat, J, fixedP, alpha, pol, λ, ε_prime MSlice, thickness MSlice, flp float64, mesh *data.Mesh) {
 	N := torque.Len()
 	cfg := make1DConf(N)
-	flt := float32(flp * mesh.WorldSize()[Z])
+	meshThickness := mesh.WorldSize()[Z]
 
 	k_addslonczewskitorque2_async(
 		torque.DevPtr(X), torque.DevPtr(Y), torque.DevPtr(Z),
@@ -25,6 +23,8 @@ func AddSlonczewskiTorque2(torque, m *data.Slice, Msat, J, fixedP, alpha, pol,
 		pol.DevPtr(0), pol.Mul(0),
 		λ.DevPtr(0), λ.Mul(0),
 		ε_prime.DevPtr(0), ε_prime.Mul(0),
-		unsafe.Pointer(uintptr(0)), flt,
+		thickness.DevPtr(0), thickness.Mul(0),
+		float32(meshThickness),
+		float32(flp),
 		N, cfg)
 }
diff --git a/cuda/slonczewski2.cu b/cuda/slonczewski2.cu
@@ -9,16 +9,18 @@
 extern "C" __global__ void
 addslonczewskitorque2(float* __restrict__ tx, float* __restrict__ ty, float* __restrict__ tz,
                       float* __restrict__ mx, float* __restrict__ my, float* __restrict__ mz,
-                      float* __restrict__ Ms_,      float  Ms_mul,
-                      float* __restrict__ jz_,      float  jz_mul,
-                      float* __restrict__ px_,      float  px_mul,
-                      float* __restrict__ py_,      float  py_mul,
-                      float* __restrict__ pz_,      float  pz_mul,
-                      float* __restrict__ alpha_,   float  alpha_mul,
-                      float* __restrict__ pol_,     float  pol_mul,
-                      float* __restrict__ lambda_,  float  lambda_mul,
-                      float* __restrict__ epsPrime_,float  epsPrime_mul,
-                      float* __restrict__ flt_,     float  flt_mul,
+                      float* __restrict__ Ms_,        float  Ms_mul,
+                      float* __restrict__ jz_,        float  jz_mul,
+                      float* __restrict__ px_,        float  px_mul,
+                      float* __restrict__ py_,        float  py_mul,
+                      float* __restrict__ pz_,        float  pz_mul,
+                      float* __restrict__ alpha_,     float  alpha_mul,
+                      float* __restrict__ pol_,       float  pol_mul,
+                      float* __restrict__ lambda_,    float  lambda_mul,
+                      float* __restrict__ epsPrime_,  float  epsPrime_mul,
+                      float* __restrict__ thickness_, float  thickness_mul,
+                      float meshThickness,
+                      float freeLayerPosition,
                       int N) {
 
     int i =  ( blockIdx.y*gridDim.x + blockIdx.x ) * blockDim.x + threadIdx.x;
@@ -29,16 +31,21 @@ addslonczewskitorque2(float* __restrict__ tx, float* __restrict__ ty, float* __r
         float3 p = normalized(vmul(px_, py_, pz_, px_mul, py_mul, pz_mul, i));
         float  Ms           = amul(Ms_, Ms_mul, i);
         float  alpha        = amul(alpha_, alpha_mul, i);
-        float  flt          = amul(flt_, flt_mul, i);
         float  pol          = amul(pol_, pol_mul, i);
         float  lambda       = amul(lambda_, lambda_mul, i);
         float  epsilonPrime = amul(epsPrime_, epsPrime_mul, i);
 
+        float thickness = amul(thickness_, thickness_mul, i);
+        if (thickness == 0.0) { // if thickness is not set, use the thickness of the mesh instead
+            thickness = meshThickness;
+        }
+        thickness *= freeLayerPosition; // switch sign if fixedlayer is at the bottom
+
         if (J == 0.0f || Ms == 0.0f) {
             return;
         }
 
-        float beta    = (HBAR / QE) * (J / (flt*Ms) );
+        float beta    = (HBAR / QE) * (J / (thickness*Ms) );
         float lambda2 = lambda * lambda;
         float epsilon = pol * lambda2 / ((lambda2 + 1.0f) + (lambda2 - 1.0f) * dot(p, m));