From 668b1c687c1aa2b8ff3c514167c420bfa72acabe Mon Sep 17 00:00:00 2001 From: Fabio Pellacini Date: Tue, 8 Mar 2022 12:57:22 +0100 Subject: [PATCH] Async cuda execution and optix denoiser (#1347) --- CMakePresets.json | 8 +- apps/ycutrace/ycutrace.cpp | 8 +- libs/yocto/yocto_cutrace.cpp | 555 ++++++++++++++++++++++++----------- libs/yocto/yocto_cutrace.cu | 125 ++------ libs/yocto/yocto_cutrace.h | 141 +++++---- libs/yocto/yocto_gui.cpp | 32 +- libs/yocto/yocto_trace.cpp | 75 ++++- libs/yocto/yocto_trace.h | 47 ++- 8 files changed, 604 insertions(+), 387 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index edb2d6a2c..4e6a695ca 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -86,8 +86,8 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "YOCTO_APPS": "ON", - "YOCTO_EMBREE": "OFF", - "YOCTO_DENOISE": "OFF", + "YOCTO_EMBREE": "ON", + "YOCTO_DENOISE": "ON", "YOCTO_OPENGL": "ON", "YOCTO_CUDA": "ON" }, @@ -104,8 +104,8 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", "YOCTO_APPS": "ON", - "YOCTO_EMBREE": "OFF", - "YOCTO_DENOISE": "OFF", + "YOCTO_EMBREE": "ON", + "YOCTO_DENOISE": "ON", "YOCTO_OPENGL": "ON", "YOCTO_CUDA": "ON" }, diff --git a/apps/ycutrace/ycutrace.cpp b/apps/ycutrace/ycutrace.cpp index c138de883..fb4dd9fe1 100644 --- a/apps/ycutrace/ycutrace.cpp +++ b/apps/ycutrace/ycutrace.cpp @@ -124,19 +124,19 @@ void run_render(const render_params& params_) { // upload scene to the gpu timer = simple_timer{}; - auto cuscene = make_cutrace_scene(scene, params__); + auto cuscene = make_cutrace_scene(context, scene, params__); print_info("upload scene: {}", elapsed_formatted(timer)); // build bvh timer = simple_timer{}; - auto bvh = make_cutrace_bvh(context, cuscene, scene, params__); + auto bvh = make_cutrace_bvh(context, cuscene, params__); print_info("build bvh: {}", elapsed_formatted(timer)); // init lights - auto lights = make_cutrace_lights(scene, params__); + auto lights = make_cutrace_lights(context, scene, params__); // state - auto state = make_cutrace_state(scene, params__); + auto state = make_cutrace_state(context, scene, params__); // render timer = simple_timer{}; diff --git a/libs/yocto/yocto_cutrace.cpp b/libs/yocto/yocto_cutrace.cpp index 8cfae089f..ff6351da5 100644 --- a/libs/yocto/yocto_cutrace.cpp +++ b/libs/yocto/yocto_cutrace.cpp @@ -46,6 +46,10 @@ #include #include +#ifdef YOCTO_DENOISE +#include +#endif + // ----------------------------------------------------------------------------- // CUDA HELPERS // ----------------------------------------------------------------------------- @@ -59,8 +63,8 @@ static void check_result(CUresult result) { } } -static void check_cusync() { - check_result(cuStreamSynchronize(nullptr)); // TODO: cuda_stream +static void sync_gpu(CUstream stream) { + check_result(cuStreamSynchronize(stream)); } static void check_result(OptixResult result) { @@ -71,91 +75,94 @@ static void check_result(OptixResult result) { // make a buffer template -static cubuffer make_buffer(size_t size, const T* data) { - auto buffer = cubuffer{}; +static cuspan make_buffer(CUstream stream, size_t size, const T* data) { + auto buffer = cuspan{}; buffer._size = size; check_result(cuMemAlloc(&buffer._data, buffer.size_in_bytes())); if (data) { - check_result( - cuMemcpyHtoD(buffer.device_ptr(), data, buffer.size_in_bytes())); + check_result(cuMemcpyHtoDAsync( + buffer.device_ptr(), data, buffer.size_in_bytes(), stream)); } return buffer; } template -static cubuffer make_buffer(const vector& data) { +static cuspan make_buffer(CUstream stream, const vector& data) { if (data.empty()) return {}; - return make_buffer(data.size(), data.data()); + return make_buffer(stream, data.size(), data.data()); } template -static cubuffer make_buffer(const T& data) { - return make_buffer(1, &data); +static cuspan make_buffer(CUstream stream, const T& data) { + return make_buffer(stream, 1, &data); } // resize a buffer template -static void resize_buffer(cubuffer& buffer, size_t size, const T* data) { +static void resize_buffer( + CUstream stream, cuspan& buffer, size_t size, const T* data) { if (buffer._size != size) { - check_result(cuMemFree(buffer._data)); + if (buffer._size != 0) check_result(cuMemFree(buffer._data)); buffer._size = size; check_result(cuMemAlloc(&buffer._data, buffer.size_in_bytes())); } if (data) { - check_result( - cuMemcpyHtoD(buffer.device_ptr(), data, buffer.size_in_bytes())); + check_result(cuMemcpyHtoDAsync( + buffer.device_ptr(), data, buffer.size_in_bytes(), stream)); } } // update a buffer template -static void update_buffer(cubuffer& buffer, size_t size, const T* data) { +static void update_buffer( + CUstream stream, cuspan& buffer, size_t size, const T* data) { if (buffer.size() != size) throw std::runtime_error{"Cuda buffer error"}; - check_result(cuMemcpyHtoD(buffer.device_ptr(), data, buffer.size_in_bytes())); + check_result(cuMemcpyHtoDAsync( + buffer.device_ptr(), data, buffer.size_in_bytes(), stream)); } template -static void update_buffer(cubuffer& buffer, const vector& data) { - return update_buffer(buffer, data.size(), data.data()); +static void update_buffer( + CUstream stream, cuspan& buffer, const vector& data) { + return update_buffer(stream, buffer, data.size(), data.data()); } template -static void update_buffer(cubuffer& buffer, const T& data) { - return update_buffer(buffer, 1, &data); +static void update_buffer(CUstream stream, cuspan& buffer, const T& data) { + return update_buffer(stream, buffer, 1, &data); } // update a buffer template -static void update_buffer_value( - cubuffer& buffer, size_t offset, size_t size, const T1* data) { - check_result( - cuMemcpyHtoD(buffer.device_ptr() + offset, data, size * sizeof(T1))); +static void update_buffer_value(CUstream stream, cuspan& buffer, + size_t offset, size_t size, const T1* data) { + check_result(cuMemcpyHtoDAsync( + buffer.device_ptr() + offset, data, size * sizeof(T1), stream)); } template static void update_buffer_value( - cubuffer& buffer, size_t offset, const T1& data) { - return update_buffer_value(buffer, offset, 1, &data); + CUstream stream, cuspan& buffer, size_t offset, const T1& data) { + return update_buffer_value(stream, buffer, offset, 1, &data); } -// download buffer +// download buffer --- these are synched to avoid errors template -static void download_buffer( - const cubuffer& buffer, size_t size, void* data) { +static void download_buffer(const cuspan& buffer, size_t size, void* data) { if (buffer.size() != size) throw std::runtime_error{"Cuda download error"}; check_result(cuMemcpyDtoH(data, buffer.device_ptr(), buffer.size_in_bytes())); } template -static void download_buffer(const cubuffer& buffer, vector& data) { +static void download_buffer(const cuspan& buffer, vector& data) { return download_buffer(buffer, data.size(), data.data()); } template -static void download_buffer(const cubuffer& buffer, T& data) { +static void download_buffer(const cuspan& buffer, T& data) { return download_buffer(buffer, 1, &data); } template -static vector download_buffer_vector(const cubuffer& buffer) { +static vector download_buffer_vector(const cuspan& buffer) { auto data = vector(buffer.size()); download_buffer(buffer, data.size(), data.data()); return data; } template -static T download_buffer_value(const cubuffer& buffer) { +static T download_buffer_value(const cuspan& buffer) { if (buffer.size() != 1) throw std::runtime_error{"Cuda download error"}; auto data = T{}; download_buffer(buffer, 1, &data); @@ -164,7 +171,7 @@ static T download_buffer_value(const cubuffer& buffer) { // free buffer template -static void clear_buffer(cubuffer& buffer) { +static void clear_buffer(cuspan& buffer) { if (buffer.device_ptr() == 0) return; check_result(cuMemFree(buffer.device_ptr())); buffer._data = 0; @@ -180,9 +187,7 @@ namespace yocto { extern "C" char yocto_cutrace_ptx[]; -cusceneext_data::cusceneext_data(cusceneext_data&& other) { - cutextures.swap(other.cutextures); - cushapes.swap(other.cushapes); +cuscene_data::cuscene_data(cuscene_data&& other) { cameras.swap(other.cameras); textures.swap(other.textures); materials.swap(other.materials); @@ -190,9 +195,7 @@ cusceneext_data::cusceneext_data(cusceneext_data&& other) { instances.swap(other.instances); environments.swap(other.environments); } -cusceneext_data& cusceneext_data::operator=(cusceneext_data&& other) { - cutextures.swap(other.cutextures); - cushapes.swap(other.cushapes); +cuscene_data& cuscene_data::operator=(cuscene_data&& other) { cameras.swap(other.cameras); textures.swap(other.textures); materials.swap(other.materials); @@ -201,17 +204,23 @@ cusceneext_data& cusceneext_data::operator=(cusceneext_data&& other) { environments.swap(other.environments); return *this; } -cusceneext_data::~cusceneext_data() { - for (auto& cutexture : cutextures) { - cuArrayDestroy(cutexture.array); - // TODO: texture +cuscene_data::~cuscene_data() { + if (!textures.empty()) { + auto textures_ = download_buffer_vector(textures); + for (auto& texture : textures_) { + cuArrayDestroy(texture.array); + // TODO: texture + } } - for (auto& cushape : cushapes) { - clear_buffer(cushape.positions); - clear_buffer(cushape.normals); - clear_buffer(cushape.texcoords); - clear_buffer(cushape.colors); - clear_buffer(cushape.triangles); + if (!shapes.empty()) { + auto shapes_ = download_buffer_vector(shapes); + for (auto& shape : shapes_) { + clear_buffer(shape.positions); + clear_buffer(shape.normals); + clear_buffer(shape.texcoords); + clear_buffer(shape.colors); + clear_buffer(shape.triangles); + } } clear_buffer(cameras); clear_buffer(textures); @@ -243,6 +252,7 @@ cubvh_data::~cubvh_data() { } cutrace_context::cutrace_context(cutrace_context&& other) { + std::swap(denoiser, other.denoiser); globals_buffer.swap(other.globals_buffer); raygen_records.swap(other.raygen_records); miss_records.swap(other.miss_records); @@ -258,6 +268,7 @@ cutrace_context::cutrace_context(cutrace_context&& other) { std::swap(cuda_context, other.cuda_context); } cutrace_context& cutrace_context::operator=(cutrace_context&& other) { + std::swap(denoiser, other.denoiser); globals_buffer.swap(other.globals_buffer); raygen_records.swap(other.raygen_records); miss_records.swap(other.miss_records); @@ -274,7 +285,65 @@ cutrace_context& cutrace_context::operator=(cutrace_context&& other) { return *this; } +cutrace_state::cutrace_state(cutrace_state&& other) { + std::swap(width, other.width); + std::swap(height, other.height); + std::swap(samples, other.samples); + image.swap(other.image); + albedo.swap(other.albedo); + normal.swap(other.normal); + hits.swap(other.hits); + rngs.swap(other.rngs); + denoised.swap(other.denoised); + denoiser_state.swap(other.denoiser_state); + denoiser_scratch.swap(other.denoiser_scratch); +} +cutrace_state& cutrace_state::operator=(cutrace_state&& other) { + std::swap(width, other.width); + std::swap(height, other.height); + std::swap(samples, other.samples); + image.swap(other.image); + albedo.swap(other.albedo); + normal.swap(other.normal); + hits.swap(other.hits); + rngs.swap(other.rngs); + denoised.swap(other.denoised); + denoiser_state.swap(other.denoiser_state); + denoiser_scratch.swap(other.denoiser_scratch); + return *this; +} +cutrace_state::~cutrace_state() { + clear_buffer(image); + clear_buffer(albedo); + clear_buffer(normal); + clear_buffer(hits); + clear_buffer(rngs); + clear_buffer(denoised); + clear_buffer(denoiser_state); + clear_buffer(denoiser_scratch); +} + +cutrace_lights::cutrace_lights(cutrace_lights&& other) { + lights.swap(other.lights); +} +cutrace_lights& cutrace_lights::operator=(cutrace_lights&& other) { + lights.swap(other.lights); + return *this; +} +cutrace_lights::~cutrace_lights() { + if (!lights.empty()) { + auto lights_ = download_buffer_vector(lights); + for (auto& light : lights_) { + clear_buffer(light.elements_cdf); + } + } + clear_buffer(lights); +} + cutrace_context::~cutrace_context() { + // denoiser + optixDenoiserDestroy(denoiser); + // global buffer clear_buffer(globals_buffer); @@ -298,6 +367,11 @@ cutrace_context::~cutrace_context() { cuCtxDestroy(cuda_context); } +static void optix_log_callback( + unsigned int level, const char* tag, const char* message, void* cbdata) { + printf("[%s] %s\n", tag, message); +} + // init cuda and optix context cutrace_context make_cutrace_context(const cutrace_params& params) { // context @@ -314,10 +388,16 @@ cutrace_context make_cutrace_context(const cutrace_params& params) { // init cuda device check_result(cuStreamCreate(&context.cuda_stream, CU_STREAM_DEFAULT)); - // init optix device + // init optix device --- disable logging + auto enable_logging = false; + auto ooptions = OptixDeviceContextOptions{}; + ooptions.logCallbackFunction = optix_log_callback; + ooptions.logCallbackData = nullptr; + ooptions.logCallbackLevel = 4; + ooptions.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL; check_result(cuCtxGetCurrent(&context.cuda_context)); - check_result(optixDeviceContextCreate( - context.cuda_context, 0, &context.optix_context)); + check_result(optixDeviceContextCreate(context.cuda_context, + enable_logging ? &ooptions : nullptr, &context.optix_context)); // options auto module_options = OptixModuleCompileOptions{}; @@ -389,13 +469,13 @@ cutrace_context make_cutrace_context(const cutrace_params& params) { auto raygen_record = cutrace_stbrecord{}; check_result( optixSbtRecordPackHeader(context.raygen_program, &raygen_record)); - context.raygen_records = make_buffer(raygen_record); + context.raygen_records = make_buffer(context.cuda_stream, raygen_record); context.binding_table.raygenRecord = context.raygen_records.device_ptr(); // stb miss auto miss_record = cutrace_stbrecord{}; check_result(optixSbtRecordPackHeader(context.miss_program, &miss_record)); - context.miss_records = make_buffer(miss_record); + context.miss_records = make_buffer(context.cuda_stream, miss_record); context.binding_table.missRecordBase = context.miss_records.device_ptr(); context.binding_table.missRecordStrideInBytes = sizeof(cutrace_stbrecord); context.binding_table.missRecordCount = 1; @@ -404,14 +484,28 @@ cutrace_context make_cutrace_context(const cutrace_params& params) { auto hitgroup_record = cutrace_stbrecord{}; check_result( optixSbtRecordPackHeader(context.hitgroup_program, &hitgroup_record)); - context.hitgroup_records = make_buffer(hitgroup_record); + context.hitgroup_records = make_buffer(context.cuda_stream, hitgroup_record); context.binding_table.hitgroupRecordBase = context.hitgroup_records.device_ptr(); context.binding_table.hitgroupRecordStrideInBytes = sizeof(cutrace_stbrecord); context.binding_table.hitgroupRecordCount = 1; // globals - context.globals_buffer = make_buffer(cutrace_globals{}); + context.globals_buffer = make_buffer(context.cuda_stream, cutrace_globals{}); + + // denoiser + auto doptions = OptixDenoiserOptions{}; + doptions.guideAlbedo = (uint) true; + doptions.guideNormal = (uint) true; + check_result(optixDenoiserCreate(context.optix_context, + OPTIX_DENOISER_MODEL_KIND_HDR, &doptions, &context.denoiser)); + + auto denoiser_sizes = OptixDenoiserSizes{}; + check_result(optixDenoiserComputeMemoryResources( + context.denoiser, 1280, 1280, &denoiser_sizes)); + + // sync gpu + sync_gpu(context.cuda_stream); return context; } @@ -422,18 +516,18 @@ void trace_start(cutrace_context& context, cutrace_state& state, const cutrace_lights& lights, const scene_data& scene, const cutrace_params& params) { auto globals = cutrace_globals{}; - update_buffer_value( - context.globals_buffer, offsetof(cutrace_globals, state), state); - update_buffer_value( - context.globals_buffer, offsetof(cutrace_globals, scene), cuscene); - update_buffer_value(context.globals_buffer, offsetof(cutrace_globals, bvh), - bvh.instances_bvh.handle); - update_buffer_value( - context.globals_buffer, offsetof(cutrace_globals, lights), lights); - update_buffer_value( - context.globals_buffer, offsetof(cutrace_globals, params), params); - // sync so we can get the frame - check_cusync(); + update_buffer_value(context.cuda_stream, context.globals_buffer, + offsetof(cutrace_globals, state), state); + update_buffer_value(context.cuda_stream, context.globals_buffer, + offsetof(cutrace_globals, scene), cuscene); + update_buffer_value(context.cuda_stream, context.globals_buffer, + offsetof(cutrace_globals, bvh), bvh.instances_bvh.handle); + update_buffer_value(context.cuda_stream, context.globals_buffer, + offsetof(cutrace_globals, lights), lights); + update_buffer_value(context.cuda_stream, context.globals_buffer, + offsetof(cutrace_globals, params), params); + // sync to avoid errors + sync_gpu(context.cuda_stream); } // render a batch of samples @@ -443,7 +537,7 @@ void trace_samples(cutrace_context& context, cutrace_state& state, const cutrace_params& params) { if (state.samples >= params.samples) return; auto nsamples = params.batch; - update_buffer_value(context.globals_buffer, + update_buffer_value(context.cuda_stream, context.globals_buffer, offsetof(cutrace_globals, state) + offsetof(cutrace_state, samples), state.samples); check_result(optixLaunch(context.optix_pipeline, context.cuda_stream, @@ -451,13 +545,16 @@ void trace_samples(cutrace_context& context, cutrace_state& state, context.globals_buffer.size_in_bytes(), &context.binding_table, state.width, state.height, 1)); state.samples += nsamples; - // sync so we can get the frame - check_cusync(); + if (params.denoise) { + denoise_image(context, state); + } + // sync so we can get the image + sync_gpu(context.cuda_stream); } -cusceneext_data make_cutrace_scene( +cuscene_data make_cutrace_scene(cutrace_context& context, const scene_data& scene, const cutrace_params& params) { - auto cuscene = cusceneext_data{}; + auto cuscene = cuscene_data{}; auto cucameras = vector{}; for (auto& camera : scene.cameras) { @@ -470,23 +567,27 @@ cusceneext_data make_cutrace_scene( cucamera.focus = camera.focus; cucamera.orthographic = camera.orthographic; } - cuscene.cameras = make_buffer(cucameras); + cuscene.cameras = make_buffer(context.cuda_stream, cucameras); // shapes + auto cushapes = vector{}; for (auto& shape : scene.shapes) { - auto& cushape = cuscene.cushapes.emplace_back(); - cushape.positions = make_buffer(shape.positions); - cushape.triangles = make_buffer(shape.triangles); - if (!shape.normals.empty()) cushape.normals = make_buffer(shape.normals); + auto& cushape = cushapes.emplace_back(); + cushape.positions = make_buffer(context.cuda_stream, shape.positions); + cushape.triangles = make_buffer(context.cuda_stream, shape.triangles); + if (!shape.normals.empty()) + cushape.normals = make_buffer(context.cuda_stream, shape.normals); if (!shape.texcoords.empty()) - cushape.texcoords = make_buffer(shape.texcoords); - if (!shape.colors.empty()) cushape.colors = make_buffer(shape.colors); + cushape.texcoords = make_buffer(context.cuda_stream, shape.texcoords); + if (!shape.colors.empty()) + cushape.colors = make_buffer(context.cuda_stream, shape.colors); } - cuscene.shapes = make_buffer(cuscene.cushapes); + cuscene.shapes = make_buffer(context.cuda_stream, cushapes); // textures + auto cutextures = vector{}; for (auto& texture : scene.textures) { - auto& cutexture = cuscene.cutextures.emplace_back(); + auto& cutexture = cutextures.emplace_back(); cutexture.width = texture.width; cutexture.height = texture.height; cutexture.linear = texture.linear; @@ -542,7 +643,7 @@ cusceneext_data make_cutrace_scene( check_result(cuTexObjectCreate(&cutexture.texture, &resource_descriptor, &texture_descriptor, nullptr)); } - cuscene.textures = make_buffer(cuscene.cutextures); + cuscene.textures = make_buffer(context.cuda_stream, cutextures); auto materials = vector{}; for (auto& material : scene.materials) { @@ -563,7 +664,7 @@ cusceneext_data make_cutrace_scene( cumaterial.scattering_tex = material.scattering_tex; cumaterial.normal_tex = material.normal_tex; } - cuscene.materials = make_buffer(materials); + cuscene.materials = make_buffer(context.cuda_stream, materials); auto instances = vector{}; for (auto& instance : scene.instances) { @@ -572,7 +673,7 @@ cusceneext_data make_cutrace_scene( cuinstance.shape = instance.shape; cuinstance.material = instance.material; } - cuscene.instances = make_buffer(instances); + cuscene.instances = make_buffer(context.cuda_stream, instances); auto environments = vector{}; for (auto& environment : scene.environments) { @@ -581,16 +682,16 @@ cusceneext_data make_cutrace_scene( cuenvironment.emission = environment.emission; cuenvironment.emission_tex = environment.emission_tex; } - cuscene.environments = make_buffer(environments); + cuscene.environments = make_buffer(context.cuda_stream, environments); // sync gpu - check_cusync(); + sync_gpu(context.cuda_stream); return cuscene; } -void update_cutrace_cameras(cusceneext_data& cuscene, const scene_data& scene, - const cutrace_params& params) { +void update_cutrace_cameras(cutrace_context& context, cuscene_data& cuscene, + const scene_data& scene, const cutrace_params& params) { auto cucameras = vector{}; for (auto& camera : scene.cameras) { auto& cucamera = cucameras.emplace_back(); @@ -602,18 +703,24 @@ void update_cutrace_cameras(cusceneext_data& cuscene, const scene_data& scene, cucamera.focus = camera.focus; cucamera.orthographic = camera.orthographic; } - update_buffer(cuscene.cameras, cucameras); + update_buffer(context.cuda_stream, cuscene.cameras, cucameras); + sync_gpu(context.cuda_stream); } -cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, - const scene_data& scene, const cutrace_params& params) { +cubvh_data make_cutrace_bvh(cutrace_context& context, const cuscene_data& scene, + const cutrace_params& params) { auto bvh = cubvh_data{}; + // download shapes and instances + // this is not efficient, but keeps the API very clean + // in the future, we might want to merge scene and bvh creation + auto shapes_data = download_buffer_vector(scene.shapes); + auto instances_data = download_buffer_vector(scene.instances); + // shapes bvh.shapes_bvhs.resize(scene.shapes.size()); for (auto shape_id = (size_t)0; shape_id < scene.shapes.size(); shape_id++) { - auto& shape = scene.shapes[shape_id]; - auto& cushape = cuscene.cushapes[shape_id]; + auto& shape = shapes_data[shape_id]; // input auto built_input = OptixBuildInput{}; @@ -621,16 +728,16 @@ cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, built_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; built_input.triangleArray.vertexStrideInBytes = sizeof(vec3f); built_input.triangleArray.numVertices = (int)shape.positions.size(); - auto vertex_buffer = cushape.positions.device_ptr(); + auto vertex_buffer = shape.positions.device_ptr(); built_input.triangleArray.vertexBuffers = &vertex_buffer; built_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3; built_input.triangleArray.indexStrideInBytes = sizeof(vec3i); built_input.triangleArray.numIndexTriplets = (int)shape.triangles.size(); - auto index_buffer = cushape.triangles.device_ptr(); - built_input.triangleArray.indexBuffer = index_buffer; - auto input_flags = (unsigned int)0; - built_input.triangleArray.flags = &input_flags; - built_input.triangleArray.numSbtRecords = 1; + auto index_buffer = shape.triangles.device_ptr(); + built_input.triangleArray.indexBuffer = index_buffer; + auto input_flags = (unsigned int)0; + built_input.triangleArray.flags = &input_flags; + built_input.triangleArray.numSbtRecords = 1; built_input.triangleArray.sbtIndexOffsetBuffer = 0; built_input.triangleArray.sbtIndexOffsetSizeInBytes = 0; built_input.triangleArray.sbtIndexOffsetStrideInBytes = 0; @@ -646,31 +753,37 @@ cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, check_result(optixAccelComputeMemoryUsage(context.optix_context, &accelerator_options, &built_input, (int)1, &accelerator_sizes)); - auto compacted_size_buffer = make_buffer(1, (uint64_t*)nullptr); + auto compacted_size_buffer = make_buffer( + context.cuda_stream, 1, (uint64_t*)nullptr); auto readback_descriptor = OptixAccelEmitDesc{}; readback_descriptor.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; readback_descriptor.result = compacted_size_buffer.device_ptr(); // build auto temporary_buffer = make_buffer( - accelerator_sizes.tempSizeInBytes, (byte*)nullptr); - auto bvh_buffer = make_buffer( - accelerator_sizes.outputSizeInBytes, (byte*)nullptr); - auto& sbvh = bvh.shapes_bvhs[shape_id]; + context.cuda_stream, accelerator_sizes.tempSizeInBytes, (byte*)nullptr); + auto bvh_buffer = make_buffer(context.cuda_stream, + accelerator_sizes.outputSizeInBytes, (byte*)nullptr); + auto& sbvh = bvh.shapes_bvhs[shape_id]; check_result(optixAccelBuild(context.optix_context, /* cuda_stream */ 0, &accelerator_options, &built_input, (int)1, temporary_buffer.device_ptr(), temporary_buffer.size_in_bytes(), bvh_buffer.device_ptr(), bvh_buffer.size_in_bytes(), &sbvh.handle, &readback_descriptor, 1)); - check_cusync(); + + // sync + sync_gpu(context.cuda_stream); // compact auto compacted_size = download_buffer_value(compacted_size_buffer); - sbvh.buffer = make_buffer(compacted_size, (byte*)nullptr); + sbvh.buffer = make_buffer( + context.cuda_stream, compacted_size, (byte*)nullptr); check_result(optixAccelCompact(context.optix_context, /*cuda_stream:*/ 0, sbvh.handle, sbvh.buffer.device_ptr(), sbvh.buffer.size_in_bytes(), &sbvh.handle)); - check_cusync(); + + // sync + sync_gpu(context.cuda_stream); // cleanup clear_buffer(bvh_buffer); @@ -681,20 +794,20 @@ cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, // instances { // upload data - auto instances = vector(scene.instances.size()); + auto opinstances = vector(scene.instances.size()); for (auto instance_id = 0; instance_id < scene.instances.size(); instance_id++) { - auto& instance = scene.instances[instance_id]; - auto& cuinstance = instances[instance_id]; + auto& instance = instances_data[instance_id]; + auto& opinstance = opinstances[instance_id]; auto transform = transpose(frame_to_mat(instance.frame)); - memcpy(cuinstance.transform, &transform, sizeof(float) * 12); - cuinstance.sbtOffset = 0; - cuinstance.instanceId = instance_id; - cuinstance.traversableHandle = bvh.shapes_bvhs[instance.shape].handle; - cuinstance.flags = OPTIX_INSTANCE_FLAG_NONE; - cuinstance.visibilityMask = 0xff; + memcpy(opinstance.transform, &transform, sizeof(float) * 12); + opinstance.sbtOffset = 0; + opinstance.instanceId = instance_id; + opinstance.traversableHandle = bvh.shapes_bvhs[instance.shape].handle; + opinstance.flags = OPTIX_INSTANCE_FLAG_NONE; + opinstance.visibilityMask = 0xff; } - bvh.instances = make_buffer(instances); + bvh.instances = make_buffer(context.cuda_stream, opinstances); // config auto build_input = OptixBuildInput{}; @@ -712,15 +825,16 @@ cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, check_result(optixAccelComputeMemoryUsage(context.optix_context, &accelerator_options, &build_input, (int)1, &accelerator_sizes)); - auto compacted_size_buffer = make_buffer(1, (uint64_t*)nullptr); + auto compacted_size_buffer = make_buffer( + context.cuda_stream, 1, (uint64_t*)nullptr); auto readback_descriptor = OptixAccelEmitDesc{}; readback_descriptor.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; readback_descriptor.result = compacted_size_buffer.device_ptr(); // build auto temporary_buffer = make_buffer( - accelerator_sizes.tempSizeInBytes, (byte*)nullptr); - auto bvh_buffer = make_buffer( + context.cuda_stream, accelerator_sizes.tempSizeInBytes, (byte*)nullptr); + auto bvh_buffer = make_buffer(context.cuda_stream, accelerator_sizes.outputSizeInBytes, (byte*)nullptr); auto& ibvh = bvh.instances_bvh; @@ -729,16 +843,20 @@ cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, temporary_buffer.device_ptr(), temporary_buffer.size_in_bytes(), bvh_buffer.device_ptr(), bvh_buffer.size_in_bytes(), &ibvh.handle, &readback_descriptor, 1)); - check_cusync(); + + // sync gpu + sync_gpu(context.cuda_stream); // compact auto compacted_size = download_buffer_value(compacted_size_buffer); - - ibvh.buffer = make_buffer(compacted_size, (byte*)nullptr); + ibvh.buffer = make_buffer( + context.cuda_stream, compacted_size, (byte*)nullptr); check_result(optixAccelCompact(context.optix_context, /*cuda_stream:*/ 0, ibvh.handle, ibvh.buffer.device_ptr(), ibvh.buffer.size_in_bytes(), &ibvh.handle)); - check_cusync(); + + // sync gpu + sync_gpu(context.cuda_stream); // cleanup clear_buffer(bvh_buffer); @@ -747,14 +865,14 @@ cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, } // sync gpu - check_cusync(); + sync_gpu(context.cuda_stream); // done return bvh; } // Initialize state. -cutrace_state make_cutrace_state( +cutrace_state make_cutrace_state(cutrace_context& context, const scene_data& scene, const cutrace_params& params) { auto& camera = scene.cameras[params.camera]; auto state = cutrace_state{}; @@ -766,17 +884,33 @@ cutrace_state make_cutrace_state( state.width = (int)round(params.resolution * camera.aspect); } state.samples = 0; - state.image = make_buffer(state.width * state.height, (vec4f*)nullptr); - state.albedo = make_buffer(state.width * state.height, (vec3f*)nullptr); - state.normal = make_buffer(state.width * state.height, (vec3f*)nullptr); - state.hits = make_buffer(state.width * state.height, (int*)nullptr); - state.rngs = make_buffer(state.width * state.height, (rng_state*)nullptr); - state.display = make_buffer(state.width * state.height, (vec4f*)nullptr); + state.image = make_buffer( + context.cuda_stream, state.width * state.height, (vec4f*)nullptr); + state.albedo = make_buffer( + context.cuda_stream, state.width * state.height, (vec3f*)nullptr); + state.normal = make_buffer( + context.cuda_stream, state.width * state.height, (vec3f*)nullptr); + state.hits = make_buffer( + context.cuda_stream, state.width * state.height, (int*)nullptr); + state.rngs = make_buffer( + context.cuda_stream, state.width * state.height, (rng_state*)nullptr); + if (params.denoise) { + auto denoiser_sizes = OptixDenoiserSizes{}; + check_result(optixDenoiserComputeMemoryResources( + context.denoiser, state.width, state.height, &denoiser_sizes)); + state.denoised = make_buffer( + context.cuda_stream, state.width * state.height, (vec4f*)nullptr); + state.denoiser_state = make_buffer( + context.cuda_stream, denoiser_sizes.stateSizeInBytes, (byte*)nullptr); + state.denoiser_scratch = make_buffer(context.cuda_stream, + denoiser_sizes.withoutOverlapScratchSizeInBytes, (byte*)nullptr); + } + sync_gpu(context.cuda_stream); return state; }; -void reset_cutrace_state(cutrace_state& state, const scene_data& scene, - const cutrace_params& params) { +void reset_cutrace_state(cutrace_context& context, cutrace_state& state, + const scene_data& scene, const cutrace_params& params) { auto& camera = scene.cameras[params.camera]; if (camera.aspect >= 1) { state.width = params.resolution; @@ -786,16 +920,36 @@ void reset_cutrace_state(cutrace_state& state, const scene_data& scene, state.width = (int)round(params.resolution * camera.aspect); } state.samples = 0; - resize_buffer(state.image, state.width * state.height, (vec4f*)nullptr); - resize_buffer(state.albedo, state.width * state.height, (vec3f*)nullptr); - resize_buffer(state.normal, state.width * state.height, (vec3f*)nullptr); - resize_buffer(state.hits, state.width * state.height, (int*)nullptr); - resize_buffer(state.rngs, state.width * state.height, (rng_state*)nullptr); - resize_buffer(state.display, state.width * state.height, (vec4f*)nullptr); + resize_buffer(context.cuda_stream, state.image, state.width * state.height, + (vec4f*)nullptr); + resize_buffer(context.cuda_stream, state.albedo, state.width * state.height, + (vec3f*)nullptr); + resize_buffer(context.cuda_stream, state.normal, state.width * state.height, + (vec3f*)nullptr); + resize_buffer(context.cuda_stream, state.hits, state.width * state.height, + (int*)nullptr); + resize_buffer(context.cuda_stream, state.rngs, state.width * state.height, + (rng_state*)nullptr); + if (params.denoise) { + auto denoiser_sizes = OptixDenoiserSizes{}; + check_result(optixDenoiserComputeMemoryResources( + context.denoiser, state.width, state.height, &denoiser_sizes)); + resize_buffer(context.cuda_stream, state.denoised, + state.width * state.height, (vec4f*)nullptr); + resize_buffer(context.cuda_stream, state.denoiser_state, + denoiser_sizes.stateSizeInBytes, (byte*)nullptr); + resize_buffer(context.cuda_stream, state.denoiser_scratch, + denoiser_sizes.withoutOverlapScratchSizeInBytes, (byte*)nullptr); + } else { + clear_buffer(state.denoised); + clear_buffer(state.denoiser_state); + clear_buffer(state.denoiser_scratch); + } + sync_gpu(context.cuda_stream); } // Init trace lights -cutrace_lights make_cutrace_lights( +cutrace_lights make_cutrace_lights(cutrace_context& context, const scene_data& scene, const cutrace_params& params) { auto lights = make_trace_lights(scene, (const trace_params&)params); auto culights_ = vector{}; @@ -803,10 +957,11 @@ cutrace_lights make_cutrace_lights( auto& culight = culights_.emplace_back(); culight.instance = light.instance; culight.environment = light.environment; - culight.elements_cdf = make_buffer(light.elements_cdf); + culight.elements_cdf = make_buffer(context.cuda_stream, light.elements_cdf); } auto culights = cutrace_lights{}; - culights.lights = make_buffer(culights_); + culights.lights = make_buffer(context.cuda_stream, culights_); + sync_gpu(context.cuda_stream); return culights; } @@ -815,10 +970,10 @@ image_data cutrace_image( const scene_data& scene, const cutrace_params& params) { // initialization auto context = make_cutrace_context(params); - auto cuscene = make_cutrace_scene(scene, params); - auto bvh = make_cutrace_bvh(context, cuscene, scene, params); - auto state = make_cutrace_state(scene, params); - auto lights = make_cutrace_lights(scene, params); + auto cuscene = make_cutrace_scene(context, scene, params); + auto bvh = make_cutrace_bvh(context, cuscene, params); + auto state = make_cutrace_state(context, scene, params); + auto lights = make_cutrace_lights(context, scene, params); // rendering trace_start(context, state, cuscene, bvh, lights, scene, params); @@ -827,7 +982,21 @@ image_data cutrace_image( } // copy back image and return - return get_rendered_image(state); + return get_image(state); +} + +// Get resulting render +image_data get_image(const cutrace_state& state) { + auto image = make_image(state.width, state.height, true); + get_image(image, state); + return image; +} +void get_image(image_data& image, const cutrace_state& state) { + if (state.denoised.empty()) { + download_buffer(state.image, image.pixels); + } else { + download_buffer(state.denoised, image.pixels); + } } // Get resulting render @@ -856,12 +1025,8 @@ void get_denoised_image(image_data& image, const cutrace_state& state) { get_rendered_image(image, state); // get albedo and normal - auto albedo = vector(image.pixels.size()), - normal = vector(image.pixels.size()); - for (auto idx = 0; idx < state.width * state.height; idx++) { - albedo[idx] = state.albedo[idx]; - normal[idx] = state.normal[idx]; - } + auto albedo = download_buffer_vector(state.albedo); + auto normal = download_buffer_vector(state.normal); // Create a denoising filter oidn::FilterRef filter = device.newFilter("RT"); // ray tracing filter @@ -910,6 +1075,41 @@ void get_normal_image(image_data& image, const cutrace_state& state) { } } +// denoise image +void denoise_image(cutrace_context& context, cutrace_state& state) { + // denoiser setup + check_result(optixDenoiserSetup(context.denoiser, context.cuda_stream, + state.width, state.height, state.denoiser_state.device_ptr(), + state.denoiser_state.size_in_bytes(), state.denoiser_scratch.device_ptr(), + state.denoiser_scratch.size_in_bytes())); + + // params + auto dparams = OptixDenoiserParams{}; + + // layers + auto guides = OptixDenoiserGuideLayer{}; + guides.albedo = OptixImage2D{state.albedo.device_ptr(), (uint)state.width, + (uint)state.height, (uint)state.width * sizeof(vec3f), sizeof(vec3f), + OPTIX_PIXEL_FORMAT_FLOAT3}; + guides.normal = OptixImage2D{state.normal.device_ptr(), (uint)state.width, + (uint)state.height, (uint)state.width * sizeof(vec3f), sizeof(vec3f), + OPTIX_PIXEL_FORMAT_FLOAT3}; + auto layers = OptixDenoiserLayer{}; + layers.input = OptixImage2D{state.image.device_ptr(), (uint)state.width, + (uint)state.height, (uint)state.width * sizeof(vec4f), sizeof(vec4f), + OPTIX_PIXEL_FORMAT_FLOAT4}; + layers.output = OptixImage2D{state.denoised.device_ptr(), (uint)state.width, + (uint)state.height, (uint)state.width * sizeof(vec4f), sizeof(vec4f), + OPTIX_PIXEL_FORMAT_FLOAT4}; + + // denoiser execution + check_result(optixDenoiserInvoke(context.denoiser, context.cuda_stream, + &dparams, state.denoiser_state.device_ptr(), + state.denoiser_state.size_in_bytes(), &guides, &layers, 1, 0, 0, + state.denoiser_scratch.device_ptr(), + state.denoiser_scratch.size_in_bytes())); +} + bool is_display(const cutrace_context& context) { auto device = 0, is_display = 0; // check_result(cuDevice(¤t_device)); @@ -927,14 +1127,19 @@ bool is_display(const cutrace_context& context) { // ----------------------------------------------------------------------------- namespace yocto { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4722) +#endif + static void exit_nocuda() { throw std::runtime_error{"Cuda not linked"}; } -cusceneext_data::cusceneext_data(cusceneext_data&& other) { exit_nocuda(); } -cusceneext_data& cusceneext_data::operator=(cusceneext_data&& other) { +cuscene_data::cuscene_data(cuscene_data&& other) { exit_nocuda(); } +cuscene_data& cuscene_data::operator=(cuscene_data&& other) { exit_nocuda(); return *this; } -cusceneext_data::~cusceneext_data() { exit_nocuda(); }; +cuscene_data::~cuscene_data() { exit_nocuda(); }; cubvh_data::cubvh_data(cubvh_data&& other) { exit_nocuda(); } cubvh_data& cubvh_data::operator=(cubvh_data&& other) { @@ -950,6 +1155,13 @@ cutrace_context& cutrace_context::operator=(cutrace_context&& other) { } cutrace_context::~cutrace_context() { exit_nocuda(); } +cutrace_state::~cutrace_state() { exit_nocuda(); } +cutrace_lights::~cutrace_lights() { exit_nocuda(); } + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + image_data cutrace_image( const scene_data& scene, const cutrace_params& params) { exit_nocuda(); @@ -963,38 +1175,38 @@ cutrace_context make_cutrace_context(const cutrace_params& params) { } // Upload the scene to the GPU. -cusceneext_data make_cutrace_scene( +cuscene_data make_cutrace_scene(cutrace_context& context, const scene_data& scene, const cutrace_params& params) { exit_nocuda(); return {}; } // Update cameras -void update_cutrace_cameras(cusceneext_data& cuscene, const scene_data& scene, - const cutrace_params& params) { +void update_cutrace_cameras(cutrace_context& context, cuscene_data& cuscene, + const scene_data& scene, const cutrace_params& params) { exit_nocuda(); } // Build the bvh acceleration structure. -cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, - const scene_data& scene, const cutrace_params& params) { +cubvh_data make_cutrace_bvh(cutrace_context& context, + const cuscene_data& cuscene, const cutrace_params& params) { exit_nocuda(); return {}; } // Initialize state. -cutrace_state make_cutrace_state( +cutrace_state make_cutrace_state(cutrace_context& context, const scene_data& scene, const cutrace_params& params) { exit_nocuda(); return {}; } -void reset_cutrace_state(cutrace_state& state, const scene_data& scene, - const cutrace_params& params) { +void reset_cutrace_state(cutrace_context& context, cutrace_state& state, + const scene_data& scene, const cutrace_params& params) { exit_nocuda(); } // Initialize lights. -cutrace_lights make_cutrace_lights( +cutrace_lights make_cutrace_lights(cutrace_context& context, const scene_data& scene, const cutrace_params& params) { exit_nocuda(); return {}; @@ -1016,6 +1228,13 @@ void trace_samples(cutrace_context& context, cutrace_state& state, exit_nocuda(); } +// Get render +image_data get_image(const cutrace_state& state) { + exit_nocuda(); + return {}; +} +void get_image(image_data& image, const cutrace_state& state) { exit_nocuda(); } + // Get resulting render image_data get_rendered_image(const cutrace_state& state) { exit_nocuda(); diff --git a/libs/yocto/yocto_cutrace.cu b/libs/yocto/yocto_cutrace.cu index ac129b03f..8456bc1ae 100644 --- a/libs/yocto/yocto_cutrace.cu +++ b/libs/yocto/yocto_cutrace.cu @@ -1038,15 +1038,8 @@ namespace yocto { struct rng_state { uint64_t state = 0x853c49e6748fea9bULL; uint64_t inc = 0xda3e39cb94b95bdbULL; - - rng_state() = default; - rng_state(uint64_t state, uint64_t inc); }; -// PCG random numbers from http://www.pcg-random.org/ -inline rng_state::rng_state(uint64_t state, uint64_t inc) - : state{state}, inc{inc} {} - // Next random number, used internally only. inline uint32_t _advance_rng(rng_state& rng) { uint64_t oldstate = rng.state; @@ -2051,7 +2044,7 @@ inline float sample_phasefunction_pdf( namespace yocto { template -struct cubuffer { +struct cuspan { inline bool empty() const { return _size == 0; } inline size_t size() const { return _size; } inline T& operator[](int idx) { return _data[idx]; } @@ -2126,15 +2119,17 @@ namespace yocto { constexpr int invalidid = -1; struct cutrace_state { - int width = 0; - int height = 0; - int samples = 0; - cubuffer image = {}; - cubuffer albedo = {}; - cubuffer normal = {}; - cubuffer hits = {}; - cubuffer rngs = {}; - cubuffer display = {}; + int width = 0; + int height = 0; + int samples = 0; + cuspan image = {}; + cuspan albedo = {}; + cuspan normal = {}; + cuspan hits = {}; + cuspan rngs = {}; + cuspan denoised = {}; + cuspan denoiser_state = {}; + cuspan denoiser_scratch = {}; }; struct cucamera_data { @@ -2188,11 +2183,11 @@ struct cuinstance_data { }; struct cushape_data { - cubuffer positions = {}; - cubuffer normals = {}; - cubuffer texcoords = {}; - cubuffer colors = {}; - cubuffer triangles = {}; + cuspan positions = {}; + cuspan normals = {}; + cuspan texcoords = {}; + cuspan colors = {}; + cuspan triangles = {}; }; struct cuenvironment_data { @@ -2202,12 +2197,12 @@ struct cuenvironment_data { }; struct cuscene_data { - cubuffer cameras = {}; - cubuffer textures = {}; - cubuffer materials = {}; - cubuffer shapes = {}; - cubuffer instances = {}; - cubuffer environments = {}; + cuspan cameras = {}; + cuspan textures = {}; + cuspan materials = {}; + cuspan shapes = {}; + cuspan instances = {}; + cuspan environments = {}; }; // Type of tracing algorithm @@ -2260,14 +2255,14 @@ using cutrace_bvh = OptixTraversableHandle; // light struct cutrace_light { - int instance = invalidid; - int environment = invalidid; - cubuffer elements_cdf = {}; + int instance = invalidid; + int environment = invalidid; + cuspan elements_cdf = {}; }; // lights struct cutrace_lights { - cubuffer lights = {}; + cuspan lights = {}; }; struct cutrace_globals { @@ -2346,52 +2341,6 @@ struct material_point { float trdepth = 0.01f; }; -// Evaluate material -static material_point eval_material(const scene_data& scene, - const material_data& material, const vec2f& texcoord, - const vec4f& color_shp) { - // evaluate textures - auto emission_tex = eval_texture( - scene, material.emission_tex, texcoord, true); - auto color_tex = eval_texture(scene, material.color_tex, texcoord, true); - auto roughness_tex = eval_texture( - scene, material.roughness_tex, texcoord, false); - auto scattering_tex = eval_texture( - scene, material.scattering_tex, texcoord, true); - - // material point - auto point = material_point{}; - point.type = material.type; - point.emission = material.emission * xyz(emission_tex); - point.color = material.color * xyz(color_tex) * xyz(color_shp); - point.opacity = material.opacity * color_tex.w * color_shp.w; - point.metallic = material.metallic * roughness_tex.z; - point.roughness = material.roughness * roughness_tex.y; - point.roughness = point.roughness * point.roughness; - point.ior = material.ior; - point.scattering = material.scattering * xyz(scattering_tex); - point.scanisotropy = material.scanisotropy; - point.trdepth = material.trdepth; - - // volume density - if (material.type == material_type::refractive || - material.type == material_type::volumetric || - material.type == material_type::subsurface) { - point.density = -log(clamp(point.color, 0.0001f, 1.0f)) / point.trdepth; - } else { - point.density = {0, 0, 0}; - } - - // fix roughness - if (point.type == material_type::matte || - point.type == material_type::gltfpbr || - point.type == material_type::glossy) { - point.roughness = clamp(point.roughness, min_roughness, 1.0f); - } - - return point; -} - // Eval position static vec3f eval_position(const scene_data& scene, const instance_data& instance, int element, const vec2f& uv) { @@ -4013,26 +3962,6 @@ static trace_result trace_falsecolor(const scene_data& scene, return {srgb_to_rgb(result), true, material.color, normal}; } -// Trace a single ray from the camera using the given algorithm. -using sampler_func = trace_result (*)(const scene_data& scene, - const trace_bvh& bvh, const trace_lights& lights, const ray3f& ray, - rng_state& rng, const trace_params& params); -static sampler_func get_trace_sampler_func(const trace_params& params) { - switch (params.sampler) { - case trace_sampler_type::path: return trace_path; - case trace_sampler_type::pathdirect: return trace_pathdirect; - case trace_sampler_type::pathmis: return trace_pathmis; - case trace_sampler_type::naive: return trace_naive; - case trace_sampler_type::eyelight: return trace_eyelight; - case trace_sampler_type::eyelightao: return trace_eyelightao; - case trace_sampler_type::furnace: return trace_furnace; - case trace_sampler_type::falsecolor: return trace_falsecolor; - default: { - return nullptr; - } - } -} - static trace_result trace_sampler(const scene_data& scene, const trace_bvh& bvh, const trace_lights& lights, const ray3f& ray, rng_state& rng, const trace_params& params) { diff --git a/libs/yocto/yocto_cutrace.h b/libs/yocto/yocto_cutrace.h index 8aba21ad1..20f1c0029 100644 --- a/libs/yocto/yocto_cutrace.h +++ b/libs/yocto/yocto_cutrace.h @@ -85,7 +85,7 @@ namespace yocto { // forward declarations struct cuscene_data; -struct cusceneext_data; +struct cuscene_data; struct cubvh_data; struct cutrace_state; struct cutrace_lights; @@ -95,23 +95,23 @@ struct cutrace_context; cutrace_context make_cutrace_context(const cutrace_params& params); // Upload the scene to the GPU. -cusceneext_data make_cutrace_scene( +cuscene_data make_cutrace_scene(cutrace_context& context, + const scene_data& scene, const cutrace_params& params); +void update_cutrace_cameras(cutrace_context& context, cuscene_data& cuscene, const scene_data& scene, const cutrace_params& params); -void update_cutrace_cameras(cusceneext_data& cuscene, const scene_data& scene, - const cutrace_params& params); // Build the bvh acceleration structure. -cubvh_data make_cutrace_bvh(cutrace_context& context, cusceneext_data& cuscene, - const scene_data& scene, const cutrace_params& params); +cubvh_data make_cutrace_bvh(cutrace_context& context, + const cuscene_data& cuscene, const cutrace_params& params); // Initialize state. -cutrace_state make_cutrace_state( +cutrace_state make_cutrace_state(cutrace_context& context, + const scene_data& scene, const cutrace_params& params); +void reset_cutrace_state(cutrace_context& context, cutrace_state& state, const scene_data& scene, const cutrace_params& params); -void reset_cutrace_state(cutrace_state& state, const scene_data& scene, - const cutrace_params& params); // Initialize lights. -cutrace_lights make_cutrace_lights( +cutrace_lights make_cutrace_lights(cutrace_context& context, const scene_data& scene, const cutrace_params& params); // Start rendering an image. @@ -126,20 +126,23 @@ void trace_samples(cutrace_context& context, cutrace_state& state, const cutrace_lights& lights, const scene_data& scene, const cutrace_params& params); -// Get resulting render +// Get resulting render, denoised if requested +image_data get_image(const cutrace_state& state); +void get_image(image_data& image, const cutrace_state& state); + +// Get internal images from state image_data get_rendered_image(const cutrace_state& state); void get_rendered_image(image_data& image, const cutrace_state& state); - -// Get denoised result image_data get_denoised_image(const cutrace_state& state); void get_denoised_image(image_data& image, const cutrace_state& state); - -// Get denoising buffers image_data get_albedo_image(const cutrace_state& state); void get_albedo_image(image_data& image, const cutrace_state& state); image_data get_normal_image(const cutrace_state& state); void get_normal_image(image_data& image, const cutrace_state& state); +// denoise image +void denoise_image(cutrace_context& context, cutrace_state& state); + // check if display bool is_display(const cutrace_context& context); @@ -194,6 +197,7 @@ using OptixModule = void*; using OptixShaderBindingTable = void*; using CUarray = void*; using CUtexObject = void*; +using OptixDenoiser = void*; #endif @@ -204,11 +208,12 @@ namespace yocto { // cuda buffer template -struct cubuffer { +struct cuspan { + bool empty() const { return _size == 0; } size_t size() const { return _size; } CUdeviceptr device_ptr() const { return _data; } size_t size_in_bytes() const { return _size * sizeof(T); } - void swap(cubuffer& other) { + void swap(cuspan& other) { std::swap(_data, other._data); std::swap(_size, other._size); } @@ -263,17 +268,17 @@ struct cumaterial_data { }; struct cuinstance_data { - frame3f frame; - int shape; - int material; + frame3f frame = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}, {0, 0, 0}}; + int shape = invalidid; + int material = invalidid; }; struct cushape_data { - cubuffer positions = {}; - cubuffer normals = {}; - cubuffer texcoords = {}; - cubuffer colors = {}; - cubuffer triangles = {}; + cuspan positions = {}; + cuspan normals = {}; + cuspan texcoords = {}; + cuspan colors = {}; + cuspan triangles = {}; }; struct cuenvironment_data { @@ -283,33 +288,28 @@ struct cuenvironment_data { }; struct cuscene_data { - cubuffer cameras = {}; - cubuffer textures = {}; - cubuffer materials = {}; - cubuffer shapes = {}; - cubuffer instances = {}; - cubuffer environments = {}; -}; - -struct cusceneext_data : cuscene_data { - vector cutextures = {}; - vector cushapes = {}; - - cusceneext_data() {} - cusceneext_data(cusceneext_data&&); - cusceneext_data& operator=(cusceneext_data&&); - ~cusceneext_data(); + cuspan cameras = {}; + cuspan textures = {}; + cuspan materials = {}; + cuspan shapes = {}; + cuspan instances = {}; + cuspan environments = {}; + + cuscene_data() {} + cuscene_data(cuscene_data&&); + cuscene_data& operator=(cuscene_data&&); + ~cuscene_data(); }; struct cubvh_tree { - cubuffer buffer = {}; + cuspan buffer = {}; OptixTraversableHandle handle = 0; }; struct cubvh_data { - cubuffer instances = {}; - cubvh_tree instances_bvh = {}; - vector shapes_bvhs = {}; + cuspan instances = {}; + cubvh_tree instances_bvh = {}; + vector shapes_bvhs = {}; cubvh_data() {} cubvh_data(cubvh_data&&); @@ -319,27 +319,39 @@ struct cubvh_data { // state struct cutrace_state { - int width = 0; - int height = 0; - int samples = 0; - cubuffer image = {}; - cubuffer albedo = {}; - cubuffer normal = {}; - cubuffer hits = {}; - cubuffer rngs = {}; - cubuffer display = {}; + int width = 0; + int height = 0; + int samples = 0; + cuspan image = {}; + cuspan albedo = {}; + cuspan normal = {}; + cuspan hits = {}; + cuspan rngs = {}; + cuspan denoised = {}; + cuspan denoiser_state = {}; + cuspan denoiser_scratch = {}; + + cutrace_state() {} + cutrace_state(cutrace_state&&); + cutrace_state& operator=(cutrace_state&&); + ~cutrace_state(); }; // light struct cutrace_light { - int instance = invalidid; - int environment = invalidid; - cubuffer elements_cdf = {}; + int instance = invalidid; + int environment = invalidid; + cuspan elements_cdf = {}; }; // lights struct cutrace_lights { - cubuffer lights = {}; + cuspan lights = {}; + + cutrace_lights() {} + cutrace_lights(cutrace_lights&&); + cutrace_lights& operator=(cutrace_lights&&); + ~cutrace_lights(); }; // device params @@ -382,13 +394,16 @@ struct cutrace_context { OptixProgramGroup hitgroup_program = nullptr; // stb - cubuffer raygen_records = {}; - cubuffer miss_records = {}; - cubuffer hitgroup_records = {}; - OptixShaderBindingTable binding_table = {}; + cuspan raygen_records = {}; + cuspan miss_records = {}; + cuspan hitgroup_records = {}; + OptixShaderBindingTable binding_table = {}; // global buffer - cubuffer globals_buffer = {}; + cuspan globals_buffer = {}; + + // denoiser + OptixDenoiser denoiser = nullptr; cutrace_context() {} cutrace_context(cutrace_context&&); diff --git a/libs/yocto/yocto_gui.cpp b/libs/yocto/yocto_gui.cpp index 6a18cf4eb..5616031d0 100644 --- a/libs/yocto/yocto_gui.cpp +++ b/libs/yocto/yocto_gui.cpp @@ -712,14 +712,14 @@ void show_trace_gui(const string& title, const string& name, scene_data& scene, } }); state.samples += params.batch; + if (params.denoise && !state.denoised.empty()) { + denoise_image(state.denoised, state.width, state.height, state.image, + state.albedo, state.normal); + } if (!render_stop) { auto lock = std::lock_guard{render_mutex}; render_current = state.samples; - if (!params.denoise || render_stop) { - get_rendered_image(render, state); - } else { - get_denoised_image(render, state); - } + get_image(render, state); image = render; render_update = true; } @@ -830,13 +830,13 @@ void show_cutrace_gui(const string& title, const string& name, auto context = make_cutrace_context(params); // upload scene to the gpu - auto cuscene = make_cutrace_scene(scene, params); + auto cuscene = make_cutrace_scene(context, scene, params); // build bvh - auto bvh = make_cutrace_bvh(context, cuscene, scene, params); + auto bvh = make_cutrace_bvh(context, cuscene, params); // init lights - auto lights = make_cutrace_lights(scene, params); + auto lights = make_cutrace_lights(context, scene, params); // fix renderer type if no lights // if (lights.lights.empty() && is_sampler_lit(params)) { @@ -844,13 +844,13 @@ void show_cutrace_gui(const string& title, const string& name, // } // state - auto state = make_cutrace_state(scene, params); + auto state = make_cutrace_state(context, scene, params); // preview state auto pparams = params; pparams.resolution /= params.pratio; pparams.samples = 1; - auto pstate = make_cutrace_state(scene, pparams); + auto pstate = make_cutrace_state(context, scene, pparams); // init state auto image = make_image(state.width, state.height, true); @@ -877,7 +877,7 @@ void show_cutrace_gui(const string& title, const string& name, auto pparams = params; pparams.resolution /= params.pratio; pparams.samples = 1; - reset_cutrace_state(pstate, scene, pparams); + reset_cutrace_state(context, pstate, scene, pparams); trace_start(context, pstate, cuscene, bvh, lights, scene, pparams); trace_samples(context, pstate, cuscene, bvh, lights, scene, pparams); auto preview = get_rendered_image(pstate); @@ -887,7 +887,7 @@ void show_cutrace_gui(const string& title, const string& name, pj = clamp(j / params.pratio, 0, preview.height - 1); image.pixels[idx] = preview.pixels[pj * preview.width + pi]; } - reset_cutrace_state(state, scene, params); + reset_cutrace_state(context, state, scene, params); return true; }; @@ -898,11 +898,7 @@ void show_cutrace_gui(const string& title, const string& name, trace_start(context, state, cuscene, bvh, lights, scene, params); } trace_samples(context, state, cuscene, bvh, lights, scene, params); - if (!params.denoise) { - get_rendered_image(image, state); - } else { - get_denoised_image(image, state); - } + get_image(image, state); return true; }; @@ -966,7 +962,7 @@ void show_cutrace_gui(const string& title, const string& name, auto camera = scene.cameras[params.camera]; if (uiupdate_camera_params(input, camera)) { scene.cameras[params.camera] = camera; - update_cutrace_cameras(cuscene, scene, params); + update_cutrace_cameras(context, cuscene, scene, params); if (render_preview()) set_image(glimage, image); } }; diff --git a/libs/yocto/yocto_trace.cpp b/libs/yocto/yocto_trace.cpp index dad3920ff..fd133cfe0 100644 --- a/libs/yocto/yocto_trace.cpp +++ b/libs/yocto/yocto_trace.cpp @@ -1437,6 +1437,9 @@ trace_state make_trace_state( for (auto& rng : state.rngs) { rng = make_rng(params.seed, rand1i(rng_, 1 << 31) / 2 + 1); } + if (params.denoise) { + state.denoised.assign(state.width * state.height, {0, 0, 0, 0}); + } return state; } @@ -1509,7 +1512,7 @@ image_data trace_image(const scene_data& scene, const trace_params& params) { for (auto sample = 0; sample < params.samples; sample++) { trace_samples(state, scene, bvh, lights, params); } - return get_rendered_image(state); + return get_image(state); } // Progressively compute an image by calling trace_samples multiple times. @@ -1533,6 +1536,10 @@ void trace_samples(trace_state& state, const scene_data& scene, }); } state.samples += params.batch; + if (params.denoise && !state.denoised.empty()) { + denoise_image(state.denoised, state.width, state.height, state.image, + state.albedo, state.normal); + } } // Check image type @@ -1544,6 +1551,28 @@ static void check_image( throw std::invalid_argument{ linear ? "expected linear image" : "expected srgb image"}; } +template +static void check_image(const vector& image, int width, int height) { + if (image.size() != (size_t)width * (size_t)height) + throw std::invalid_argument{"image should have the same size"}; +} + +// Get resulting render, denoised if requested +image_data get_image(const trace_state& state) { + auto image = make_image(state.width, state.height, true); + get_image(image, state); + return image; +} +void get_image(image_data& image, const trace_state& state) { + image.width = state.width; + image.height = state.height; + image.linear = true; + if (state.denoised.empty()) { + image.pixels = state.image; + } else { + image.pixels = state.denoised; + } +} // Get resulting render image_data get_rendered_image(const trace_state& state) { @@ -1629,13 +1658,13 @@ void get_normal_image(image_data& normal, const trace_state& state) { } // Denoise image -image_data denoise_rendered_image(const image_data& render, - const image_data& albedo, const image_data& normal) { +image_data denoise_image(const image_data& render, const image_data& albedo, + const image_data& normal) { auto denoised = make_image(render.width, render.height, render.linear); - denoise_rendered_image(denoised, render, albedo, normal); + denoise_image(denoised, render, albedo, normal); return denoised; } -void denoise_rendered_image(image_data& denoised, const image_data& render, +void denoise_image(image_data& denoised, const image_data& render, const image_data& albedo, const image_data& normal) { check_image(denoised, render.width, render.height, render.linear); check_image(albedo, render.width, render.height, albedo.linear); @@ -1673,4 +1702,40 @@ void denoise_rendered_image(image_data& denoised, const image_data& render, #endif } +void denoise_image(vector& denoised, int width, int height, + const vector& render, const vector& albedo, + const vector& normal) { + check_image(denoised, width, height); + check_image(render, width, height); + check_image(albedo, width, height); + check_image(normal, width, height); +#if YOCTO_DENOISE + // Create an Intel Open Image Denoise device + oidn::DeviceRef device = oidn::newDevice(); + device.commit(); + + // set image + denoised = render; + + // Create a denoising filter + oidn::FilterRef filter = device.newFilter("RT"); // ray tracing filter + filter.setImage("color", (void*)render.data(), oidn::Format::Float3, width, + height, 0, sizeof(vec4f), sizeof(vec4f) * width); + filter.setImage("albedo", (void*)albedo.data(), oidn::Format::Float3, width, + height, 0, sizeof(vec3f), sizeof(vec3f) * width); + filter.setImage("normal", (void*)normal.data(), oidn::Format::Float3, width, + height, 0, sizeof(vec3f), sizeof(vec3f) * width); + filter.setImage("output", denoised.data(), oidn::Format::Float3, width, + height, 0, sizeof(vec4f), sizeof(vec4f) * width); + filter.set("inputScale", 1.0f); // set scale as fixed + filter.set("hdr", true); // image is HDR + filter.commit(); + + // Filter the image + filter.execute(); +#else + denoised = render; +#endif +} + } // namespace yocto diff --git a/libs/yocto/yocto_trace.h b/libs/yocto/yocto_trace.h index 04576a6f5..efcf7095d 100644 --- a/libs/yocto/yocto_trace.h +++ b/libs/yocto/yocto_trace.h @@ -144,14 +144,15 @@ bool is_sampler_lit(const trace_params& params); // Trace state struct trace_state { - int width = 0; - int height = 0; - int samples = 0; - vector image = {}; - vector albedo = {}; - vector normal = {}; - vector hits = {}; - vector rngs = {}; + int width = 0; + int height = 0; + int samples = 0; + vector image = {}; + vector albedo = {}; + vector normal = {}; + vector hits = {}; + vector rngs = {}; + vector denoised = {}; }; // Initialize state. @@ -173,25 +174,28 @@ void trace_sample(trace_state& state, const scene_data& scene, const trace_bvh& bvh, const trace_lights& lights, int i, int j, int sample, const trace_params& params); -// Get resulting render +// Get resulting render, denoised if requested +image_data get_image(const trace_state& state); +void get_image(image_data& image, const trace_state& state); + +// Get internal images from state image_data get_rendered_image(const trace_state& state); void get_rendered_image(image_data& image, const trace_state& state); - -// Get denoised result image_data get_denoised_image(const trace_state& state); void get_denoised_image(image_data& image, const trace_state& state); - -// Get denoising buffers image_data get_albedo_image(const trace_state& state); void get_albedo_image(image_data& image, const trace_state& state); image_data get_normal_image(const trace_state& state); void get_normal_image(image_data& image, const trace_state& state); // Denoise image -image_data denoise_rendered_image(const image_data& render, - const image_data& albedo, const image_data& normal); -void denoise_rendered_image(image_data& image, const image_data& render, +image_data denoise_image(const image_data& render, const image_data& albedo, + const image_data& normal); +void denoise_image(image_data& image, const image_data& render, const image_data& albedo, const image_data& normal); +void denoise_image(vector& denoised, int width, int height, + const vector& render, const vector& albedo, + const vector& normal); } // namespace yocto @@ -302,17 +306,6 @@ namespace yocto { return get_normal_image(image, state); } -// Denoise image -[[deprecated]] inline image_data denoise_render(const image_data& render, - const image_data& albedo, const image_data& normal) { - return denoise_rendered_image(render, albedo, normal); -} -[[deprecated]] inline void denoise_render(image_data& image, - const image_data& render, const image_data& albedo, - const image_data& normal) { - return denoise_rendered_image(image, render, albedo, normal); -} - } // namespace yocto #endif