object data in local memory

glenco · Sep 16, 2015 · e5802ae · e5802ae
1 parent 6137e50
commit e5802ae
Show file tree

Hide file tree

Showing 17 changed files with 67 additions and 47 deletions.
diff --git a/kernel/lensed.cl b/kernel/lensed.cl
@@ -6,12 +6,17 @@
 #endif
 
 // compute image
-kernel void render(constant float* data, float4 pcs,
-                   constant float2* qq, constant float2* ww,
+kernel void render(ulong dsiz, constant uint* gdata, local uint* ldata,
+                   float4 pcs, constant float2* qq, constant float2* ww,
                    global float* value, global float* error)
 {
     // get pixel index
-    int k = get_global_id(0);
+    size_t k = get_global_id(0);
+
+    // load data from global to local memory
+    for(size_t i = get_local_id(0); i < dsiz; i += get_local_size(0))
+        ldata[i] = gdata[i];
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     // compute pixel flux if pixel is in image
     if(k < IMAGE_SIZE)
@@ -24,7 +29,7 @@ kernel void render(constant float* data, float4 pcs,
 
         // apply quadrature rule to computed surface brightness
         for(size_t n = 0; n < QUAD_POINTS; ++n)
-            f += ww[n]*compute(data, x + qq[n]);
+            f += ww[n]*compute(ldata, x + qq[n]);
 
         // done
         value[k] = f.s0;

diff --git a/objects/devauc.cl b/objects/devauc.cl
@@ -23,13 +23,13 @@ data
     float norm; // normalisation
 };
 
-static float brightness(constant data* this, float2 x)
+static float brightness(local data* this, float2 x)
 {
     // de Vaucouleurs profile for centered and rotated coordinate system
     return this->norm*exp(-DEVAUC_B*sqrt(sqrt(length(mv22(this->t, x - this->x))/this->rs)));
 }
 
-static void set(global data* this, float x, float y, float r, float mag, float q, float pa)
+static void set(local data* this, float x, float y, float r, float mag, float q, float pa)
 {
     float c = cos(pa*DEG2RAD);
     float s = sin(pa*DEG2RAD);

diff --git a/objects/epl.cl b/objects/epl.cl
@@ -23,7 +23,7 @@ data
     float n;  // normalisation
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     float r, phi;
     float2 a, A;
@@ -68,7 +68,7 @@ static float2 deflection(constant data* this, float2 x)
     return mv22(this->w, a);
 }
 
-static void set(global data* this,
+static void set(local data* this,
                 float x1, float x2, float r, float t, float q, float pa)
 {
     float c;

diff --git a/objects/epl_plus_shear.cl b/objects/epl_plus_shear.cl
@@ -26,7 +26,7 @@ data
     float n;  // normalisation
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     float r, phi;
     float2 a, A;
@@ -76,7 +76,7 @@ static float2 deflection(constant data* this, float2 x)
     return y + mv22(this->g, dx);
 }
 
-static void set(global data* this,
+static void set(local data* this,
                 float x1, float x2, float r, float t, float q, float pa, float g1, float g2)
 {
     float c;

diff --git a/objects/exponential.cl b/objects/exponential.cl
@@ -18,13 +18,13 @@ data
     float norm; // normalisation
 };
 
-static float brightness(constant data* this, float2 x)
+static float brightness(local data* this, float2 x)
 {
     // exponential profile for centered and rotated coordinate system
     return this->norm*exp(-length(mv22(this->t, x - this->x))/this->rs);
 }
 
-static void set(global data* this, float x, float y, float rs, float mag, float q, float pa)
+static void set(local data* this, float x, float y, float rs, float mag, float q, float pa)
 {
     float c = cos(pa*DEG2RAD);
     float s = sin(pa*DEG2RAD);

diff --git a/objects/gauss.cl b/objects/gauss.cl
@@ -18,14 +18,14 @@ data
     float norm; // normalisation
 };
 
-static float brightness(constant data* this, float2 x)
+static float brightness(local data* this, float2 x)
 {
     // Gaussian profile for centered and rotated coordinate system
     float2 y = mv22(this->t, x - this->x);
     return this->norm*exp(-0.5f*dot(y, y)/this->s2);
 }
 
-static void set(global data* this, float x, float y, float sigma, float mag, float q, float pa)
+static void set(local data* this, float x, float y, float sigma, float mag, float q, float pa)
 {
     float c = cos(pa*DEG2RAD);
     float s = sin(pa*DEG2RAD);

diff --git a/objects/nsie.cl b/objects/nsie.cl
@@ -26,7 +26,7 @@ data
     float d;
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     float2 y;
     float r;
@@ -45,7 +45,7 @@ static float2 deflection(constant data* this, float2 x)
     return mv22(this->w, y);
 }
 
-static void set(global data* this, float x, float y, float r, float rc, float q, float pa)
+static void set(local data* this, float x, float y, float r, float rc, float q, float pa)
 {
     float c = cos(pa*DEG2RAD);
     float s = sin(pa*DEG2RAD);

diff --git a/objects/nsis.cl b/objects/nsis.cl
@@ -18,7 +18,7 @@ data
     float rc; // core radius
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     // move to central coordinates
     x -= this->x;
@@ -27,7 +27,7 @@ static float2 deflection(constant data* this, float2 x)
     return this->r/(this->rc + length(x))*x;
 }
 
-static void set(global data* this, float x, float y, float r, float rc)
+static void set(local data* this, float x, float y, float r, float rc)
 {
     // lens position
     this->x = (float2)(x, y);

diff --git a/objects/point_mass.cl b/objects/point_mass.cl
@@ -15,14 +15,14 @@ data
     float r2; // Einstein radius squared
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     // point mass deflection
     x -= this->x;
     return this->r2/dot(x, x)*x;
 }
 
-static void set(global data* this, float x, float y, float r)
+static void set(local data* this, float x, float y, float r)
 {
     // lens position
     this->x = (float2)(x, y);

diff --git a/objects/sersic.cl b/objects/sersic.cl
@@ -20,13 +20,13 @@ data
     float m;
 };
 
-static float brightness(constant data* this, float2 x)
+static float brightness(local data* this, float2 x)
 {
     float2 y = mv22(this->t, x - this->x);
     return exp(this->log0 - exp(this->log1 + this->m*log(dot(y, y))));
 }
 
-static void set(global data* this, float x, float y, float r, float mag, float n, float q, float a)
+static void set(local data* this, float x, float y, float r, float mag, float n, float q, float a)
 {
     float b = 1.9992f*n - 0.3271f; // approximation valid for 0.5 < n < 8
 

diff --git a/objects/sie.cl b/objects/sie.cl
@@ -24,7 +24,7 @@ data
     float d;
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     float2 y;
     float r;
@@ -43,7 +43,7 @@ static float2 deflection(constant data* this, float2 x)
     return mv22(this->w, y);
 }
 
-static void set(global data* this, float x, float y, float r, float q, float pa)
+static void set(local data* this, float x, float y, float r, float q, float pa)
 {
     float c = cos(pa*DEG2RAD);
     float s = sin(pa*DEG2RAD);

diff --git a/objects/sie_plus_shear.cl b/objects/sie_plus_shear.cl
@@ -27,7 +27,7 @@ data
     float d;
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     float2 y;
     float r;
@@ -46,7 +46,7 @@ static float2 deflection(constant data* this, float2 x)
     return mv22(this->w, y) + mv22(this->g, x);
 }
 
-static void set(global data* this, float x, float y, float r, float q, float pa, float g1, float g2)
+static void set(local data* this, float x, float y, float r, float q, float pa, float g1, float g2)
 {
     float c = cos(pa*DEG2RAD);
     float s = sin(pa*DEG2RAD);

diff --git a/objects/sis.cl b/objects/sis.cl
@@ -16,13 +16,13 @@ data
     float r;  // Einstein radius
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     // SIS deflection
     return this->r*normalize(x - this->x);
 }
 
-static void set(global data* this, float x, float y, float r)
+static void set(local data* this, float x, float y, float r)
 {
     // lens position
     this->x = (float2)(x, y);

diff --git a/objects/sis_plus_shear.cl b/objects/sis_plus_shear.cl
@@ -18,7 +18,7 @@ data
     float r;  // Einstein radius
 };
 
-static float2 deflection(constant data* this, float2 x)
+static float2 deflection(local data* this, float2 x)
 {
     // move to central coordinates
     x -= this->x;
@@ -27,7 +27,7 @@ static float2 deflection(constant data* this, float2 x)
     return this->r*normalize(x) + mv22(this->g, x);
 }
 
-static void set(global data* this, float x, float y, float r, float g1, float g2)
+static void set(local data* this, float x, float y, float r, float g1, float g2)
 {
     // lens position
     this->x = (float2)(x, y);

diff --git a/objects/sky.cl b/objects/sky.cl
@@ -13,12 +13,12 @@ data
     float2 grad;
 };
 
-static float foreground(constant data* this, float2 x)
+static float foreground(local data* this, float2 x)
 {
     return this->bg + dot(this->grad, x - (float2)(1, 1));
 }
 
-static void set(global data* this, float bg, float dx, float dy)
+static void set(local data* this, float bg, float dx, float dy)
 {
     this->bg = bg;
     this->grad = (float2)(dx, dy);

diff --git a/src/kernel.c b/src/kernel.c
@@ -62,7 +62,7 @@ static const char PARSKERN[] =
 
 // kernel to compute images
 static const char COMPHEAD[] =
-    "static float compute(constant float* data, float2 x)\n"
+    "static float compute(local uint* data, float2 x)\n"
     "{\n"
     "    // ray position\n"
     "    float2 y = x;\n"
@@ -80,7 +80,7 @@ static const char COMPLHED[] =
     "        // calculate deflection\n"
 ;
 static const char COMPLENS[] =
-    "        a += deflection_%s((constant void*)(data + %zu), y);\n"
+    "        a += deflection_%s((local void*)(data + %zu), y);\n"
 ;
 static const char COMPDEFL[] =
     "        \n"
@@ -93,14 +93,14 @@ static const char COMPSHED[] =
     "    // calculate surface brightness\n"
 ;
 static const char COMPSRCE[] =
-    "    f += brightness_%s((constant void*)(data + %zu), y);\n"
+    "    f += brightness_%s((local void*)(data + %zu), y);\n"
 ;
 static const char COMPFHED[] =
     "    \n"
     "    // add foreground\n"
 ;
 static const char COMPFGND[] =
-    "    f += foreground_%s((constant void*)(data + %zu), x);\n"
+    "    f += foreground_%s((local void*)(data + %zu), x);\n"
 ;
 static const char COMPFOOT[] =
     "    \n"
@@ -111,13 +111,23 @@ static const char COMPFOOT[] =
 
 // kernel to set parameters
 static const char SETPHEAD[] =
-    "kernel void set_params(global float* data, constant float* params)\n"
+    "kernel void set_params(ulong dsiz, global int* gdata, local int* ldata,\n"
+    "                       constant float* params)\n"
     "{\n"
+    "    // load parameters to local memory\n"
+    "    for(size_t i = get_local_id(0); i < dsiz; i += get_local_size(0))\n"
+    "        ldata[i] = gdata[i];\n"
+    "    \n"
 ;
-static const char SETPLEFT[] = "    set_%s((global void*)(data + %zu)";
+static const char SETPLEFT[] = "    set_%s((local void*)(ldata + %zu)";
 static const char SETPARGS[] = ", params[%zu]";
 static const char SETPRGHT[] = ");\n";
 static const char SETPFOOT[] =
+    "    \n"
+    "    // store parameters to global memory\n"
+    "    for(size_t i = get_local_id(0); i < dsiz; i += get_local_size(0))\n"
+    "        gdata[i] = ldata[i];\n"
+    "    \n"
     "}\n"
 ;
 

diff --git a/src/lensed.c b/src/lensed.c
@@ -63,6 +63,7 @@ int main(int argc, char* argv[])
     cl_ulong local_mem_size;
 
     // buffer for objects
+    cl_ulong object_size;
     cl_mem object_mem;
 
     // buffers for quadrature rule
@@ -676,7 +677,7 @@ int main(int argc, char* argv[])
     // create buffer that contains object data
     {
         // collect total size of object data, in units of sizeof(cl_float)
-        size_t object_size = 0;
+        object_size = 0;
         for(size_t i = 0; i < inp->nobjs; ++i)
             object_size += inp->objs[i].size;
 
@@ -706,8 +707,10 @@ int main(int argc, char* argv[])
 
         // set kernel arguments
         err = 0;
-        err |= clSetKernelArg(lensed->set_params, 0, sizeof(cl_mem), &object_mem);
-        err |= clSetKernelArg(lensed->set_params, 1, sizeof(cl_mem), &lensed->params);
+        err |= clSetKernelArg(lensed->set_params, 0, sizeof(cl_ulong), &object_size);
+        err |= clSetKernelArg(lensed->set_params, 1, sizeof(cl_mem), &object_mem);
+        err |= clSetKernelArg(lensed->set_params, 2, object_size*sizeof(cl_uint), NULL);
+        err |= clSetKernelArg(lensed->set_params, 3, sizeof(cl_mem), &lensed->params);
         if(err != CL_SUCCESS)
             error("failed to set kernel arguments for parameters");
     }
@@ -742,12 +745,14 @@ int main(int argc, char* argv[])
 
         // set kernel arguments
         err = 0;
-        err |= clSetKernelArg(lensed->render, 0, sizeof(cl_mem), &object_mem);
-        err |= clSetKernelArg(lensed->render, 1, sizeof(cl_float4), &pcs4);
-        err |= clSetKernelArg(lensed->render, 2, sizeof(cl_mem), &qq_mem);
-        err |= clSetKernelArg(lensed->render, 3, sizeof(cl_mem), &ww_mem);
-        err |= clSetKernelArg(lensed->render, 4, sizeof(cl_mem), &lensed->value_mem);
-        err |= clSetKernelArg(lensed->render, 5, sizeof(cl_mem), &lensed->error_mem);
+        err |= clSetKernelArg(lensed->render, 0, sizeof(cl_ulong), &object_size);
+        err |= clSetKernelArg(lensed->render, 1, sizeof(cl_mem), &object_mem);
+        err |= clSetKernelArg(lensed->render, 2, object_size*sizeof(cl_uint), NULL);
+        err |= clSetKernelArg(lensed->render, 3, sizeof(cl_float4), &pcs4);
+        err |= clSetKernelArg(lensed->render, 4, sizeof(cl_mem), &qq_mem);
+        err |= clSetKernelArg(lensed->render, 5, sizeof(cl_mem), &ww_mem);
+        err |= clSetKernelArg(lensed->render, 6, sizeof(cl_mem), &lensed->value_mem);
+        err |= clSetKernelArg(lensed->render, 7, sizeof(cl_mem), &lensed->error_mem);
         if(err != CL_SUCCESS)
             error("failed to set render kernel arguments");