Skip to content

Commit

Permalink
object data in local memory
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicolas Tessore committed Sep 16, 2015
1 parent 6137e50 commit e5802ae
Show file tree
Hide file tree
Showing 17 changed files with 67 additions and 47 deletions.
13 changes: 9 additions & 4 deletions kernel/lensed.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@
#endif

// compute image
kernel void render(constant float* data, float4 pcs,
constant float2* qq, constant float2* ww,
kernel void render(ulong dsiz, constant uint* gdata, local uint* ldata,
float4 pcs, constant float2* qq, constant float2* ww,
global float* value, global float* error)
{
// get pixel index
int k = get_global_id(0);
size_t k = get_global_id(0);

// load data from global to local memory
for(size_t i = get_local_id(0); i < dsiz; i += get_local_size(0))
ldata[i] = gdata[i];
barrier(CLK_LOCAL_MEM_FENCE);

// compute pixel flux if pixel is in image
if(k < IMAGE_SIZE)
Expand All @@ -24,7 +29,7 @@ kernel void render(constant float* data, float4 pcs,

// apply quadrature rule to computed surface brightness
for(size_t n = 0; n < QUAD_POINTS; ++n)
f += ww[n]*compute(data, x + qq[n]);
f += ww[n]*compute(ldata, x + qq[n]);

// done
value[k] = f.s0;
Expand Down
4 changes: 2 additions & 2 deletions objects/devauc.cl
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ data
float norm; // normalisation
};

static float brightness(constant data* this, float2 x)
static float brightness(local data* this, float2 x)
{
// de Vaucouleurs profile for centered and rotated coordinate system
return this->norm*exp(-DEVAUC_B*sqrt(sqrt(length(mv22(this->t, x - this->x))/this->rs)));
}

static void set(global data* this, float x, float y, float r, float mag, float q, float pa)
static void set(local data* this, float x, float y, float r, float mag, float q, float pa)
{
float c = cos(pa*DEG2RAD);
float s = sin(pa*DEG2RAD);
Expand Down
4 changes: 2 additions & 2 deletions objects/epl.cl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ data
float n; // normalisation
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
float r, phi;
float2 a, A;
Expand Down Expand Up @@ -68,7 +68,7 @@ static float2 deflection(constant data* this, float2 x)
return mv22(this->w, a);
}

static void set(global data* this,
static void set(local data* this,
float x1, float x2, float r, float t, float q, float pa)
{
float c;
Expand Down
4 changes: 2 additions & 2 deletions objects/epl_plus_shear.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ data
float n; // normalisation
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
float r, phi;
float2 a, A;
Expand Down Expand Up @@ -76,7 +76,7 @@ static float2 deflection(constant data* this, float2 x)
return y + mv22(this->g, dx);
}

static void set(global data* this,
static void set(local data* this,
float x1, float x2, float r, float t, float q, float pa, float g1, float g2)
{
float c;
Expand Down
4 changes: 2 additions & 2 deletions objects/exponential.cl
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ data
float norm; // normalisation
};

static float brightness(constant data* this, float2 x)
static float brightness(local data* this, float2 x)
{
// exponential profile for centered and rotated coordinate system
return this->norm*exp(-length(mv22(this->t, x - this->x))/this->rs);
}

static void set(global data* this, float x, float y, float rs, float mag, float q, float pa)
static void set(local data* this, float x, float y, float rs, float mag, float q, float pa)
{
float c = cos(pa*DEG2RAD);
float s = sin(pa*DEG2RAD);
Expand Down
4 changes: 2 additions & 2 deletions objects/gauss.cl
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ data
float norm; // normalisation
};

static float brightness(constant data* this, float2 x)
static float brightness(local data* this, float2 x)
{
// Gaussian profile for centered and rotated coordinate system
float2 y = mv22(this->t, x - this->x);
return this->norm*exp(-0.5f*dot(y, y)/this->s2);
}

static void set(global data* this, float x, float y, float sigma, float mag, float q, float pa)
static void set(local data* this, float x, float y, float sigma, float mag, float q, float pa)
{
float c = cos(pa*DEG2RAD);
float s = sin(pa*DEG2RAD);
Expand Down
4 changes: 2 additions & 2 deletions objects/nsie.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ data
float d;
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
float2 y;
float r;
Expand All @@ -45,7 +45,7 @@ static float2 deflection(constant data* this, float2 x)
return mv22(this->w, y);
}

static void set(global data* this, float x, float y, float r, float rc, float q, float pa)
static void set(local data* this, float x, float y, float r, float rc, float q, float pa)
{
float c = cos(pa*DEG2RAD);
float s = sin(pa*DEG2RAD);
Expand Down
4 changes: 2 additions & 2 deletions objects/nsis.cl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ data
float rc; // core radius
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
// move to central coordinates
x -= this->x;
Expand All @@ -27,7 +27,7 @@ static float2 deflection(constant data* this, float2 x)
return this->r/(this->rc + length(x))*x;
}

static void set(global data* this, float x, float y, float r, float rc)
static void set(local data* this, float x, float y, float r, float rc)
{
// lens position
this->x = (float2)(x, y);
Expand Down
4 changes: 2 additions & 2 deletions objects/point_mass.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ data
float r2; // Einstein radius squared
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
// point mass deflection
x -= this->x;
return this->r2/dot(x, x)*x;
}

static void set(global data* this, float x, float y, float r)
static void set(local data* this, float x, float y, float r)
{
// lens position
this->x = (float2)(x, y);
Expand Down
4 changes: 2 additions & 2 deletions objects/sersic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ data
float m;
};

static float brightness(constant data* this, float2 x)
static float brightness(local data* this, float2 x)
{
float2 y = mv22(this->t, x - this->x);
return exp(this->log0 - exp(this->log1 + this->m*log(dot(y, y))));
}

static void set(global data* this, float x, float y, float r, float mag, float n, float q, float a)
static void set(local data* this, float x, float y, float r, float mag, float n, float q, float a)
{
float b = 1.9992f*n - 0.3271f; // approximation valid for 0.5 < n < 8

Expand Down
4 changes: 2 additions & 2 deletions objects/sie.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ data
float d;
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
float2 y;
float r;
Expand All @@ -43,7 +43,7 @@ static float2 deflection(constant data* this, float2 x)
return mv22(this->w, y);
}

static void set(global data* this, float x, float y, float r, float q, float pa)
static void set(local data* this, float x, float y, float r, float q, float pa)
{
float c = cos(pa*DEG2RAD);
float s = sin(pa*DEG2RAD);
Expand Down
4 changes: 2 additions & 2 deletions objects/sie_plus_shear.cl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ data
float d;
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
float2 y;
float r;
Expand All @@ -46,7 +46,7 @@ static float2 deflection(constant data* this, float2 x)
return mv22(this->w, y) + mv22(this->g, x);
}

static void set(global data* this, float x, float y, float r, float q, float pa, float g1, float g2)
static void set(local data* this, float x, float y, float r, float q, float pa, float g1, float g2)
{
float c = cos(pa*DEG2RAD);
float s = sin(pa*DEG2RAD);
Expand Down
4 changes: 2 additions & 2 deletions objects/sis.cl
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ data
float r; // Einstein radius
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
// SIS deflection
return this->r*normalize(x - this->x);
}

static void set(global data* this, float x, float y, float r)
static void set(local data* this, float x, float y, float r)
{
// lens position
this->x = (float2)(x, y);
Expand Down
4 changes: 2 additions & 2 deletions objects/sis_plus_shear.cl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ data
float r; // Einstein radius
};

static float2 deflection(constant data* this, float2 x)
static float2 deflection(local data* this, float2 x)
{
// move to central coordinates
x -= this->x;
Expand All @@ -27,7 +27,7 @@ static float2 deflection(constant data* this, float2 x)
return this->r*normalize(x) + mv22(this->g, x);
}

static void set(global data* this, float x, float y, float r, float g1, float g2)
static void set(local data* this, float x, float y, float r, float g1, float g2)
{
// lens position
this->x = (float2)(x, y);
Expand Down
4 changes: 2 additions & 2 deletions objects/sky.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ data
float2 grad;
};

static float foreground(constant data* this, float2 x)
static float foreground(local data* this, float2 x)
{
return this->bg + dot(this->grad, x - (float2)(1, 1));
}

static void set(global data* this, float bg, float dx, float dy)
static void set(local data* this, float bg, float dx, float dy)
{
this->bg = bg;
this->grad = (float2)(dx, dy);
Expand Down
22 changes: 16 additions & 6 deletions src/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static const char PARSKERN[] =

// kernel to compute images
static const char COMPHEAD[] =
"static float compute(constant float* data, float2 x)\n"
"static float compute(local uint* data, float2 x)\n"
"{\n"
" // ray position\n"
" float2 y = x;\n"
Expand All @@ -80,7 +80,7 @@ static const char COMPLHED[] =
" // calculate deflection\n"
;
static const char COMPLENS[] =
" a += deflection_%s((constant void*)(data + %zu), y);\n"
" a += deflection_%s((local void*)(data + %zu), y);\n"
;
static const char COMPDEFL[] =
" \n"
Expand All @@ -93,14 +93,14 @@ static const char COMPSHED[] =
" // calculate surface brightness\n"
;
static const char COMPSRCE[] =
" f += brightness_%s((constant void*)(data + %zu), y);\n"
" f += brightness_%s((local void*)(data + %zu), y);\n"
;
static const char COMPFHED[] =
" \n"
" // add foreground\n"
;
static const char COMPFGND[] =
" f += foreground_%s((constant void*)(data + %zu), x);\n"
" f += foreground_%s((local void*)(data + %zu), x);\n"
;
static const char COMPFOOT[] =
" \n"
Expand All @@ -111,13 +111,23 @@ static const char COMPFOOT[] =

// kernel to set parameters
static const char SETPHEAD[] =
"kernel void set_params(global float* data, constant float* params)\n"
"kernel void set_params(ulong dsiz, global int* gdata, local int* ldata,\n"
" constant float* params)\n"
"{\n"
" // load parameters to local memory\n"
" for(size_t i = get_local_id(0); i < dsiz; i += get_local_size(0))\n"
" ldata[i] = gdata[i];\n"
" \n"
;
static const char SETPLEFT[] = " set_%s((global void*)(data + %zu)";
static const char SETPLEFT[] = " set_%s((local void*)(ldata + %zu)";
static const char SETPARGS[] = ", params[%zu]";
static const char SETPRGHT[] = ");\n";
static const char SETPFOOT[] =
" \n"
" // store parameters to global memory\n"
" for(size_t i = get_local_id(0); i < dsiz; i += get_local_size(0))\n"
" gdata[i] = ldata[i];\n"
" \n"
"}\n"
;

Expand Down
23 changes: 14 additions & 9 deletions src/lensed.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ int main(int argc, char* argv[])
cl_ulong local_mem_size;

// buffer for objects
cl_ulong object_size;
cl_mem object_mem;

// buffers for quadrature rule
Expand Down Expand Up @@ -676,7 +677,7 @@ int main(int argc, char* argv[])
// create buffer that contains object data
{
// collect total size of object data, in units of sizeof(cl_float)
size_t object_size = 0;
object_size = 0;
for(size_t i = 0; i < inp->nobjs; ++i)
object_size += inp->objs[i].size;

Expand Down Expand Up @@ -706,8 +707,10 @@ int main(int argc, char* argv[])

// set kernel arguments
err = 0;
err |= clSetKernelArg(lensed->set_params, 0, sizeof(cl_mem), &object_mem);
err |= clSetKernelArg(lensed->set_params, 1, sizeof(cl_mem), &lensed->params);
err |= clSetKernelArg(lensed->set_params, 0, sizeof(cl_ulong), &object_size);
err |= clSetKernelArg(lensed->set_params, 1, sizeof(cl_mem), &object_mem);
err |= clSetKernelArg(lensed->set_params, 2, object_size*sizeof(cl_uint), NULL);
err |= clSetKernelArg(lensed->set_params, 3, sizeof(cl_mem), &lensed->params);
if(err != CL_SUCCESS)
error("failed to set kernel arguments for parameters");
}
Expand Down Expand Up @@ -742,12 +745,14 @@ int main(int argc, char* argv[])

// set kernel arguments
err = 0;
err |= clSetKernelArg(lensed->render, 0, sizeof(cl_mem), &object_mem);
err |= clSetKernelArg(lensed->render, 1, sizeof(cl_float4), &pcs4);
err |= clSetKernelArg(lensed->render, 2, sizeof(cl_mem), &qq_mem);
err |= clSetKernelArg(lensed->render, 3, sizeof(cl_mem), &ww_mem);
err |= clSetKernelArg(lensed->render, 4, sizeof(cl_mem), &lensed->value_mem);
err |= clSetKernelArg(lensed->render, 5, sizeof(cl_mem), &lensed->error_mem);
err |= clSetKernelArg(lensed->render, 0, sizeof(cl_ulong), &object_size);
err |= clSetKernelArg(lensed->render, 1, sizeof(cl_mem), &object_mem);
err |= clSetKernelArg(lensed->render, 2, object_size*sizeof(cl_uint), NULL);
err |= clSetKernelArg(lensed->render, 3, sizeof(cl_float4), &pcs4);
err |= clSetKernelArg(lensed->render, 4, sizeof(cl_mem), &qq_mem);
err |= clSetKernelArg(lensed->render, 5, sizeof(cl_mem), &ww_mem);
err |= clSetKernelArg(lensed->render, 6, sizeof(cl_mem), &lensed->value_mem);
err |= clSetKernelArg(lensed->render, 7, sizeof(cl_mem), &lensed->error_mem);
if(err != CL_SUCCESS)
error("failed to set render kernel arguments");

Expand Down

0 comments on commit e5802ae

Please sign in to comment.