Skip to content

Commit

Permalink
Optimized computation of Sobol dim2, +1% overall perf
Browse files Browse the repository at this point in the history
  • Loading branch information
johguenther committed Jan 8, 2025
1 parent 0ee3e72 commit d57b226
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 51 deletions.
45 changes: 27 additions & 18 deletions modules/cpu/math/sobol.ih
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,30 @@ OSPRAY_BEGIN_ISPC_NAMESPACE
#ifdef OSPRAY_TARGET_SYCL
#include "sobol.inl"
#else
extern const uniform unsigned int Sobol_revMatrices[4 * 32];
extern const uniform unsigned int Sobol_revMatrices[3 * 32];
extern const uniform unsigned int Sobol_matrices[5 * 52];
#endif

// compute component 1 of the Sobol'-sequence (count starting from 0)
// Ahmed, "An Implementation Algorithm of 2D Sobol Sequence Fast, Elegant, and
// Compact", EGSR 2024, https://doi.org/10.2312/sr.20241147
inline uint32 Sobol_revSample1(uint32 v)
{
v ^= v << 16;
v ^= (v & 0x00FF00FF) << 8;
v ^= (v & 0x0F0F0F0F) << 4;
v ^= (v & 0x33333333) << 2;
v ^= (v & 0x55555555) << 1;
return reverseBits(v);
}

// compute components 1 and 2 of the Sobol'-sequence (count starting from 0)
inline vec2ui Sobol_revSample2(uint32 revIndex)
{
vec2ui result = make_vec2ui(0);
for (uniform uint32 i = 0; revIndex; revIndex <<= 1, i += 4) {
if (revIndex & 0x80000000u) {
result.x ^= Sobol_revMatrices[i];
result.y ^= Sobol_revMatrices[i + 1];
}
vec2ui result = make_vec2ui(Sobol_revSample1(revIndex), 0);
for (uniform uint32 i = 0; revIndex; revIndex <<= 1, i += 3) {
if (revIndex & 0x80000000u)
result.y ^= Sobol_revMatrices[i];
}

return result;
Expand All @@ -51,12 +62,11 @@ inline vec2ui Sobol_revSample2(uint32 revIndex)
// compute components 1 to 3 of the Sobol'-sequence (count starting from 0)
inline vec3ui Sobol_revSample3(uint32 revIndex)
{
vec3ui result = make_vec3ui(0);
for (uniform uint32 i = 0; revIndex; revIndex <<= 1, i += 4) {
vec3ui result = make_vec3ui(Sobol_revSample1(revIndex), 0, 0);
for (uniform uint32 i = 0; revIndex; revIndex <<= 1, i += 3) {
if (revIndex & 0x80000000u) {
result.x ^= Sobol_revMatrices[i];
result.y ^= Sobol_revMatrices[i + 1];
result.z ^= Sobol_revMatrices[i + 2];
result.y ^= Sobol_revMatrices[i];
result.z ^= Sobol_revMatrices[i + 1];
}
}

Expand All @@ -66,13 +76,12 @@ inline vec3ui Sobol_revSample3(uint32 revIndex)
// compute components 1 to 4 of the Sobol'-sequence (count starting from 0)
inline vec4ui Sobol_revSample4(uint32 revIndex)
{
vec4ui result = make_vec4ui(0);
for (uniform uint32 i = 0; revIndex; revIndex <<= 1, i += 4) {
vec4ui result = make_vec4ui(Sobol_revSample1(revIndex), 0, 0, 0);
for (uniform uint32 i = 0; revIndex; revIndex <<= 1, i += 3) {
if (revIndex & 0x80000000u) {
result.x ^= Sobol_revMatrices[i];
result.y ^= Sobol_revMatrices[i + 1];
result.z ^= Sobol_revMatrices[i + 2];
result.w ^= Sobol_revMatrices[i + 3];
result.y ^= Sobol_revMatrices[i];
result.z ^= Sobol_revMatrices[i + 1];
result.w ^= Sobol_revMatrices[i + 2];
}
}

Expand Down
34 changes: 1 addition & 33 deletions modules/cpu/math/sobol.inl
Original file line number Diff line number Diff line change
Expand Up @@ -30,163 +30,131 @@
// http://web.maths.unsw.edu.au/~fkuo/sobol/new-joe-kuo-6.21201

// bit-reversed and interleaved
OSPRAY_GLOBAL const unsigned int Sobol_revMatrices[4 * 32] = {
0x1u,
OSPRAY_GLOBAL const unsigned int Sobol_revMatrices[3 * 32] = {
0x1u,
0x1u,
0x1u,

0x3u,
0x3u,
0x3u,
0x2u,

0x5u,
0x6u,
0x4u,
0x4u,

0xfu,
0x9u,
0xau,
0xdu,

0x11u,
0x17u,
0x1fu,
0x1fu,

0x33u,
0x3au,
0x2eu,
0x3bu,

0x55u,
0x71u,
0x45u,
0x5eu,

0xffu,
0xa3u,
0xc9u,
0xb9u,

0x101u,
0x116u,
0x11bu,
0x15au,

0x303u,
0x339u,
0x2a4u,
0x3f4u,

0x505u,
0x677u,
0x79au,
0x685u,

0xf0fu,
0x9aau,
0xb67u,
0xd0fu,

0x1111u,
0x1601u,
0x101eu,
0x115bu,

0x3333u,
0x3903u,
0x302du,
0x23f6u,

0x5555u,
0x7706u,
0x4041u,
0x4681u,

0xffffu,
0xaa09u,
0xa0c3u,
0xdd02u,

0x10001u,
0x10117u,
0x1f104u,
0x1e144u,

0x30003u,
0x3033au,
0x2e28au,
0x393cdu,

0x50005u,
0x60671u,
0x457dfu,
0x5a6dfu,

0xf000fu,
0x909a3u,
0xc9baeu,
0xb4dbbu,

0x110011u,
0x171616u,
0x11a105u,
0x14401eu,

0x330033u,
0x3a3939u,
0x2a7289u,
0x3cd039u,

0x550055u,
0x717777u,
0x79e7dbu,
0x6df05au,

0xff00ffu,
0xa3aaaau,
0xb6dba4u,
0xdbb0b4u,

0x1010101u,
0x1170001u,
0x100011au,
0x101e145u,

0x3030303u,
0x33a0003u,
0x30002a7u,
0x20393cfu,

0x5050505u,
0x6710006u,
0x400079eu,
0x405a6dbu,

0xf0f0f0fu,
0x9a30009u,
0xa000b6du,
0xd0b4db6u,

0x11111111u,
0x16160017u,
0x1f001001u,
0x1f144001u,

0x33333333u,
0x3939003au,
0x2e003003u,
0x3b3cd002u,

0x55555555u,
0x77770071u,
0x45004004u,
0x5e6df004u,

0xffffffffu,
0xaaaa00a3u,
0xc900a00au,
0xb9dbb00du,
Expand Down

0 comments on commit d57b226

Please sign in to comment.