diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index fe23f7911924..d1cb116ce98d 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -2021,7 +2021,10 @@ Input attachment uniform. - + + Acceleration structure uniform. + + Represents the size of the [enum UniformType] enum. diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index a86f72e0b986..5279f6c9420c 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -369,6 +369,15 @@ uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_fla if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) { flags += SHADER_STAGE_COMPUTE_BIT; } + if (supported_stages & VK_SHADER_STAGE_RAYGEN_BIT_KHR) { + flags += SHADER_STAGE_RAYGEN_BIT; + } + if (supported_stages & VK_SHADER_STAGE_MISS_BIT_KHR) { + flags += SHADER_STAGE_MISS_BIT; + } + if (supported_stages & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) { + flags += SHADER_STAGE_CLOSEST_HIT_BIT; + } return flags; } @@ -513,6 +522,12 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, false); if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); @@ -713,6 +728,10 @@ Error RenderingDeviceDriverVulkan::_check_device_features() { return OK; } +static uint32_t _align_up(uint32_t size, uint32_t alignment) { + return (size + (alignment - 1)) & ~(alignment - 1); +} + Error RenderingDeviceDriverVulkan::_check_device_capabilities() { // Fill device family and version. device_capabilities.device_family = DEVICE_VULKAN; @@ -734,6 +753,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + VkPhysicalDeviceVulkanMemoryModelFeatures memory_model_features = {}; + VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_address_features = {}; + VkPhysicalDeviceAccelerationStructureFeaturesKHR acceleration_structure_features = {}; + VkPhysicalDeviceRayTracingPipelineFeaturesKHR raytracing_pipeline_features = {}; + VkPhysicalDeviceSynchronization2FeaturesKHR sync_2_features = {}; + VkPhysicalDeviceRayTracingValidationFeaturesNV raytracing_validation_features = {}; const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; if (use_1_2_features) { @@ -770,6 +795,40 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_features = &pipeline_cache_control_features; } + if (enabled_device_extension_names.has(VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES; + memory_model_features.pNext = next_features; + next_features = &memory_model_features; + + buffer_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES; + buffer_address_features.pNext = next_features; + next_features = &buffer_address_features; + } + + if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + acceleration_structure_features.pNext = next_features; + next_features = &acceleration_structure_features; + } + + if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + raytracing_pipeline_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR; + raytracing_pipeline_features.pNext = next_features; + next_features = &raytracing_pipeline_features; + } + + if (enabled_device_extension_names.has(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME)) { + raytracing_validation_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV; + raytracing_validation_features.pNext = next_features; + next_features = &raytracing_validation_features; + } + + if (enabled_device_extension_names.has(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)) { + sync_2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES; + sync_2_features.pNext = next_features; + next_features = &sync_2_features; + } + VkPhysicalDeviceFeatures2 device_features_2 = {}; device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features_2.pNext = next_features; @@ -821,6 +880,19 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { device_memory_report_support = true; } #endif + + if (enabled_device_extension_names.has(VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + raytracing_capabilities.buffer_device_address_support = buffer_address_features.bufferDeviceAddress; + } + + if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + raytracing_capabilities.acceleration_structure_support = acceleration_structure_features.accelerationStructure; + } + + if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + raytracing_capabilities.raytracing_pipeline_support = raytracing_pipeline_features.rayTracingPipeline; + raytracing_capabilities.validation = raytracing_validation_features.rayTracingValidation; + } } if (functions.GetPhysicalDeviceProperties2 != nullptr) { @@ -829,6 +901,7 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDeviceMultiviewProperties multiview_properties = {}; VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; + VkPhysicalDeviceRayTracingPipelinePropertiesKHR raytracing_properties = {}; VkPhysicalDeviceProperties2 physical_device_properties_2 = {}; const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; @@ -857,6 +930,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &vrs_properties; } + if (raytracing_capabilities.raytracing_pipeline_support) { + raytracing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + raytracing_properties.pNext = next_properties; + next_properties = &raytracing_properties; + } + physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; physical_device_properties_2.pNext = next_properties; functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2); @@ -924,6 +1003,21 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (subgroup_capabilities.quad_operations_in_all_stages) { print_verbose(" quad operations in all stages"); } + + if (raytracing_capabilities.raytracing_pipeline_support) { + raytracing_capabilities.shader_group_handle_size = raytracing_properties.shaderGroupHandleSize; + raytracing_capabilities.shader_group_handle_alignment = raytracing_properties.shaderGroupHandleAlignment; + raytracing_capabilities.shader_group_handle_size_aligned = _align_up(raytracing_capabilities.shader_group_handle_size, raytracing_capabilities.shader_group_handle_alignment); + raytracing_capabilities.shader_group_base_alignment = raytracing_properties.shaderGroupBaseAlignment; + + print_verbose("- Vulkan Raytracing supported"); + print_verbose(" shader group handle size: " + itos(raytracing_capabilities.shader_group_handle_size)); + print_verbose(" shader group handle alignment: " + itos(raytracing_capabilities.shader_group_handle_alignment)); + print_verbose(" shader group handle size aligned: " + itos(raytracing_capabilities.shader_group_handle_size_aligned)); + print_verbose(" shader group base alignment: " + itos(raytracing_capabilities.shader_group_base_alignment)); + } else { + print_verbose("- Vulkan Raytracing not supported"); + } } return OK; @@ -1009,6 +1103,38 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVectorvk_buffer; + return vkGetBufferDeviceAddress(vk_device, &addr_info); +} RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type) { VkBufferCreateInfo create_info = {}; @@ -2195,6 +2348,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPE static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)); // RDD::BarrierAccessBits == VkAccessFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT)); @@ -2213,6 +2368,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_H static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)); void RenderingDeviceDriverVulkan::command_pipeline_barrier( CommandBufferID p_cmd_buffer, @@ -3295,6 +3452,9 @@ static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_COMPUTE_BIT, + VK_SHADER_STAGE_RAYGEN_BIT_KHR, + VK_SHADER_STAGE_MISS_BIT_KHR, + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, }; String RenderingDeviceDriverVulkan::shader_get_binary_cache_key() { @@ -3318,7 +3478,7 @@ Vector RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(Ve binary_data.vertex_input_mask = shader_refl.vertex_input_mask; binary_data.fragment_output_mask = shader_refl.fragment_output_mask; binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; + binary_data.pipeline_type = shader_refl.pipeline_type; binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; @@ -3501,7 +3661,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute; + r_shader_desc.pipeline_type = binary_data.pipeline_type; r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; @@ -3576,6 +3736,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec case UNIFORM_TYPE_INPUT_ATTACHMENT: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; } break; + case UNIFORM_TYPE_ACCELERATION_STRUCTURE: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + } break; default: { DEV_ASSERT(false); } @@ -3672,6 +3835,31 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec create_info.pName = "main"; shader_info.vk_stages_create_info.push_back(create_info); + + ShaderStage stage = r_shader_desc.stages[i]; + + if (stage == ShaderStage::SHADER_STAGE_RAYGEN || stage == ShaderStage::SHADER_STAGE_MISS) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = VK_SHADER_UNUSED_KHR; + group_info.intersectionShader = VK_SHADER_UNUSED_KHR; + group_info.generalShader = i; + + shader_info.vk_groups_create_info.push_back(group_info); + } + if (stage == ShaderStage::SHADER_STAGE_CLOSEST_HIT) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = i; + group_info.intersectionShader = VK_SHADER_UNUSED_KHR; + group_info.generalShader = VK_SHADER_UNUSED_KHR; + + shader_info.vk_groups_create_info.push_back(group_info); + } } // Descriptor sets. @@ -3731,6 +3919,58 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec ERR_FAIL_V_MSG(ShaderID(), error_text); } + if (r_shader_desc.pipeline_type == PipelineType::RAYTRACING) { + // Regions + + for (uint32_t i = 0; i < r_shader_desc.stages.size(); i++) { + ShaderStage stage = r_shader_desc.stages[i]; + switch (stage) { + case ShaderStage::SHADER_STAGE_RAYGEN: + shader_info.regions.raygen_count += 1; + break; + case ShaderStage::SHADER_STAGE_MISS: + shader_info.regions.miss_count += 1; + break; + case ShaderStage::SHADER_STAGE_CLOSEST_HIT: + shader_info.regions.closest_hit_count += 1; + break; + default: + // nothing + break; + } + } + + shader_info.regions.group_count = shader_info.regions.raygen_count + shader_info.regions.miss_count + shader_info.regions.closest_hit_count; + + uint32_t handle_size_aligned = raytracing_capabilities.shader_group_handle_size_aligned; + uint32_t base_alignment = raytracing_capabilities.shader_group_base_alignment; + + shader_info.regions.raygen.stride = _align_up(handle_size_aligned * shader_info.regions.raygen_count, base_alignment); + shader_info.regions.raygen.size = shader_info.regions.raygen.stride; // odd but ok + + shader_info.regions.miss.stride = handle_size_aligned; + shader_info.regions.miss.size = _align_up(handle_size_aligned * shader_info.regions.miss_count, base_alignment); + + shader_info.regions.closest_hit.stride = handle_size_aligned; + shader_info.regions.closest_hit.size = _align_up(handle_size_aligned * shader_info.regions.closest_hit_count, base_alignment); + + shader_info.regions.call.stride = 0; + shader_info.regions.call.size = 0; + + uint32_t handles_size = shader_info.regions.group_count * raytracing_capabilities.shader_group_handle_size; + shader_info.regions.handles_data.resize(handles_size); + + // Shader binding table + uint32_t sbt_size = shader_info.regions.raygen.size + shader_info.regions.closest_hit.size + shader_info.regions.miss.size + shader_info.regions.call.size; + shader_info.sbt_buffer = buffer_create(sbt_size, BUFFER_USAGE_TRANSFER_FROM_BIT | BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | BUFFER_USAGE_SHADER_BINDING_TABLE_BIT, MEMORY_ALLOCATION_TYPE_CPU); + + // Update regions addresses + shader_info.regions.raygen.deviceAddress = _buffer_get_device_address(shader_info.sbt_buffer); + shader_info.regions.miss.deviceAddress = shader_info.regions.raygen.deviceAddress + shader_info.regions.raygen.size; + shader_info.regions.closest_hit.deviceAddress = shader_info.regions.miss.deviceAddress + shader_info.regions.miss.size; + shader_info.regions.call.deviceAddress = 0; + } + // Bookkeep. ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); @@ -3749,6 +3989,10 @@ void RenderingDeviceDriverVulkan::shader_free(ShaderID p_shader) { shader_destroy_modules(p_shader); + if (shader_info->sbt_buffer) { + buffer_free(shader_info->sbt_buffer); + } + VersatileResource::free(resources_allocator, shader_info); } @@ -3851,6 +4095,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat curr_vk_size++; vk_sizes_count++; } + if (p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE]) { + *curr_vk_size = {}; + curr_vk_size->type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE] * max_descriptor_sets_per_pool; + curr_vk_size++; + vk_sizes_count++; + } DEV_ASSERT(vk_sizes_count <= UNIFORM_TYPE_MAX); } @@ -4036,6 +4287,17 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorViewsType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + acceleration_structure_write->accelerationStructureCount = 1; + acceleration_structure_write->pAccelerationStructures = &accel_info->vk_acceleration_structure; + + vk_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + vk_writes[i].pNext = acceleration_structure_write; + } break; default: { DEV_ASSERT(false); } @@ -4999,6 +5261,216 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( return PipelineID(vk_pipeline); } +/********************/ +/**** RAYTRACING ****/ +/********************/ + +RDD::AccelerationStructureID RenderingDeviceDriverVulkan::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BufferID p_transform_buffer, uint64_t p_transform_offset) { + // Vertex positions is first buffer + const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id; + VkDeviceSize buffer_offset = vf_info->vk_attributes[0].offset; + + VkDeviceAddress vertex_address = _buffer_get_device_address(p_vertex_buffer) + buffer_offset; + VkDeviceAddress index_address = _buffer_get_device_address(p_index_buffer) + p_index_offset_bytes; + VkDeviceAddress transform_address = _buffer_get_device_address(p_transform_buffer) + p_transform_offset; + + VkDeviceSize vertex_stride = vf_info->vk_bindings[0].stride; + VkFormat vertex_format = vf_info->vk_attributes[0].format; + uint32_t max_vertex = p_vertex_count ? p_vertex_count - 1 : 0; + + AccelerationStructureInfo *accel_info = VersatileResource::allocate(resources_allocator); + + accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR; + accel_info->geometry.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; + + accel_info->geometry.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + accel_info->geometry.geometry.triangles.vertexFormat = vertex_format; + accel_info->geometry.geometry.triangles.vertexData.deviceAddress = vertex_address; + accel_info->geometry.geometry.triangles.vertexStride = vertex_stride; + accel_info->geometry.geometry.triangles.indexType = p_index_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + accel_info->geometry.geometry.triangles.indexData.deviceAddress = index_address; + // Transform matrix, 3 rows, 4 columns, row-major + accel_info->geometry.geometry.triangles.transformData.deviceAddress = transform_address; + // Number of vertices in vertexData minus one, aka max vertex index + accel_info->geometry.geometry.triangles.maxVertex = max_vertex; + + // Info for building BLAS + uint32_t primitive_count = (p_vertex_count - p_vertex_offset) / 3; + if (p_index_buffer) { + primitive_count = p_index_count / 3; + } + accel_info->range_info.firstVertex = p_vertex_offset; + accel_info->range_info.primitiveCount = primitive_count; + accel_info->range_info.primitiveOffset = 0; + accel_info->range_info.transformOffset = 0; + uint32_t max_primitive_count = accel_info->range_info.primitiveCount; + + accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accel_info->build_info.pGeometries = &accel_info->geometry; + accel_info->build_info.geometryCount = 1; + accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + + VkAccelerationStructureBuildSizesInfoKHR size_info = {}; + size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &max_primitive_count, &size_info); + _acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, size_info, accel_info); + + return AccelerationStructureID(accel_info); +} + +RDD::AccelerationStructureID RenderingDeviceDriverVulkan::tlas_create(const LocalVector &p_blases) { + AccelerationStructureInfo *accel_info = VersatileResource::allocate(resources_allocator); + + for (uint32_t i = 0; i < p_blases.size(); ++i) { + const AccelerationStructureID &blas = p_blases[i]; + AccelerationStructureInfo *blas_info = (AccelerationStructureInfo *)blas.id; + + VkTransformMatrixKHR transform = { { + { 1.0, 0.0, 0.0, 0.0 }, + { 0.0, 1.0, 0.0, 0.0 }, + { 0.0, 0.0, 1.0, 0.0 }, + } }; + + VkAccelerationStructureInstanceKHR instance = {}; + instance.transform = transform; + instance.instanceCustomIndex = i; + instance.mask = 0xFF; + instance.accelerationStructureReference = _buffer_get_device_address(blas_info->buffer); + instance.instanceShaderBindingTableRecordOffset = 0; + instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + + accel_info->instances.push_back(instance); + } + + uint32_t instance_count = accel_info->instances.size(); + VkDeviceAddress instances_buffer_address = 0; + + if (instance_count > 0) { + uint32_t instances_size = instance_count * sizeof(accel_info->instances[0]); + accel_info->instances_buffer = buffer_create(instances_size, BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT, MEMORY_ALLOCATION_TYPE_CPU); + uint8_t *data_ptr = buffer_map(accel_info->instances_buffer); + ERR_FAIL_NULL_V(data_ptr, AccelerationStructureID()); + memcpy(data_ptr, accel_info->instances.ptr(), instances_size); + buffer_unmap(accel_info->instances_buffer); + instances_buffer_address = _buffer_get_device_address(accel_info->instances_buffer); + } + + accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + accel_info->geometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + accel_info->geometry.geometry.instances.data.deviceAddress = instances_buffer_address; + + accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accel_info->build_info.geometryCount = 1; + accel_info->build_info.pGeometries = &accel_info->geometry; + accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + + VkAccelerationStructureBuildSizesInfoKHR size_info = {}; + size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &instance_count, &size_info); + accel_info->range_info.primitiveCount = instance_count; + + _acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, size_info, accel_info); + return AccelerationStructureID(accel_info); +} + +void RenderingDeviceDriverVulkan::_acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info) { + RDD::BufferID buffer = buffer_create(p_size_info.accelerationStructureSize, RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT | RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU); + r_accel_info->buffer = buffer; + + RDD::BufferID scratch_buffer = buffer_create(p_size_info.buildScratchSize, RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU); + r_accel_info->scratch_buffer = scratch_buffer; + r_accel_info->build_info.scratchData.deviceAddress = _buffer_get_device_address(scratch_buffer); + + VkAccelerationStructureCreateInfoKHR blas_create_info = {}; + blas_create_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + blas_create_info.type = p_type; + blas_create_info.size = p_size_info.accelerationStructureSize; + blas_create_info.buffer = ((const BufferInfo *)buffer.id)->vk_buffer; + VkResult err = vkCreateAccelerationStructureKHR(vk_device, &blas_create_info, nullptr, &r_accel_info->vk_acceleration_structure); + ERR_FAIL_COND_MSG(err, "vkCreateAccelerationStructureKHR failed with error " + itos(err) + "."); + r_accel_info->build_info.dstAccelerationStructure = r_accel_info->vk_acceleration_structure; +} + +void RenderingDeviceDriverVulkan::acceleration_structure_free(AccelerationStructureID p_acceleration_structure) { + AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id; + if (accel_info->instances_buffer) { + buffer_free(accel_info->instances_buffer); + } + if (accel_info->scratch_buffer) { + buffer_free(accel_info->scratch_buffer); + } + if (accel_info->buffer) { + buffer_free(accel_info->buffer); + } + if (accel_info->vk_acceleration_structure) { + vkDestroyAccelerationStructureKHR(vk_device, accel_info->vk_acceleration_structure, nullptr); + } + VersatileResource::free(resources_allocator, accel_info); +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverVulkan::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) { + const AccelerationStructureInfo *accel_info = (const AccelerationStructureInfo *)p_acceleration_structure.id; + const VkAccelerationStructureBuildRangeInfoKHR *range_info_ptr = &accel_info->range_info; + vkCmdBuildAccelerationStructuresKHR((VkCommandBuffer)p_cmd_buffer.id, 1, &accel_info->build_info, &range_info_ptr); +} + +void RenderingDeviceDriverVulkan::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + const RaytracingPipelineInfo *pipeline_info = (const RaytracingPipelineInfo *)p_pipeline.id; + vkCmdBindPipeline((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, (VkPipeline)pipeline_info->vk_pipeline); +} + +void RenderingDeviceDriverVulkan::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; + vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); +} + +void RenderingDeviceDriverVulkan::command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) { + ShaderInfo *shader_info = (ShaderInfo *)p_shader.id; + const RaytracingPipelineInfo *pipeline_info = (const RaytracingPipelineInfo *)p_pipeline.id; + + uint32_t handle_size = raytracing_capabilities.shader_group_handle_size; + uint32_t handles_size = shader_info->regions.handles_data.size(); + uint8_t *handles_ptr = shader_info->regions.handles_data.ptr(); + + VkResult err = vkGetRayTracingShaderGroupHandlesKHR(vk_device, pipeline_info->vk_pipeline, 0, shader_info->regions.group_count, handles_size, handles_ptr); + ERR_FAIL_COND_MSG(err, "vkGetRayTracingShaderGroupHandlesKHR failed with error " + itos(err) + "."); + + uint8_t *sbt_ptr = buffer_map(shader_info->sbt_buffer); + uint8_t *sbt_data = sbt_ptr; + uint32_t handle_index = 0; + + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + ++handle_index; + + sbt_data = sbt_ptr + shader_info->regions.raygen.size; + for (uint32_t i = 0; i < shader_info->regions.miss_count; ++i) { + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + sbt_data += shader_info->regions.miss.stride; + ++handle_index; + } + + sbt_data = sbt_ptr + shader_info->regions.raygen.size + shader_info->regions.miss.size; + for (uint32_t i = 0; i < shader_info->regions.closest_hit_count; ++i) { + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + sbt_data += shader_info->regions.closest_hit.stride; + ++handle_index; + } + + buffer_unmap(shader_info->sbt_buffer); + + vkCmdTraceRaysKHR((VkCommandBuffer)p_cmd_buffer.id, &shader_info->regions.raygen, &shader_info->regions.miss, &shader_info->regions.closest_hit, &shader_info->regions.call, p_width, p_height, 1); +} + /*****************/ /**** COMPUTE ****/ /*****************/ @@ -5060,6 +5532,72 @@ RDD::PipelineID RenderingDeviceDriverVulkan::compute_pipeline_create(ShaderID p_ return PipelineID(vk_pipeline); } +RDD::RaytracingPipelineID RenderingDeviceDriverVulkan::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + + VkRayTracingPipelineCreateInfoKHR pipeline_create_info = {}; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; + + // Stages + pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size(); + + VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, pipeline_create_info.stageCount); + + for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) { + vk_pipeline_stages[i] = shader_info->vk_stages_create_info[i]; + + if (p_specialization_constants.size()) { + VkSpecializationMapEntry *specialization_map_entries = ALLOCA_ARRAY(VkSpecializationMapEntry, p_specialization_constants.size()); + for (uint32_t j = 0; j < p_specialization_constants.size(); j++) { + specialization_map_entries[j] = {}; + specialization_map_entries[j].constantID = p_specialization_constants[j].constant_id; + specialization_map_entries[j].offset = (const char *)&p_specialization_constants[j].int_value - (const char *)p_specialization_constants.ptr(); + specialization_map_entries[j].size = sizeof(uint32_t); + } + + VkSpecializationInfo *specialization_info = ALLOCA_SINGLE(VkSpecializationInfo); + *specialization_info = {}; + specialization_info->dataSize = p_specialization_constants.size() * sizeof(PipelineSpecializationConstant); + specialization_info->pData = p_specialization_constants.ptr(); + specialization_info->mapEntryCount = p_specialization_constants.size(); + specialization_info->pMapEntries = specialization_map_entries; + + vk_pipeline_stages[i].pSpecializationInfo = specialization_info; + } + } + + // Groups + pipeline_create_info.groupCount = pipeline_create_info.stageCount; + VkRayTracingShaderGroupCreateInfoKHR *vk_pipeline_groups = ALLOCA_ARRAY(VkRayTracingShaderGroupCreateInfoKHR, pipeline_create_info.groupCount); + for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) { + vk_pipeline_groups[i] = shader_info->vk_groups_create_info[i]; + } + + // Pipeline + pipeline_create_info.layout = shader_info->vk_pipeline_layout; + pipeline_create_info.pStages = vk_pipeline_stages; + pipeline_create_info.pGroups = vk_pipeline_groups; + pipeline_create_info.maxPipelineRayRecursionDepth = 1; + + VkPipeline vk_pipeline = VK_NULL_HANDLE; + VkResult err = vkCreateRayTracingPipelinesKHR(vk_device, VK_NULL_HANDLE, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &vk_pipeline); + ERR_FAIL_COND_V_MSG(err, RaytracingPipelineID(), "vkCreateRayTracingPipelinesKHR failed with error " + itos(err) + "."); + + // Done + RaytracingPipelineInfo *pipeline_info = VersatileResource::allocate(resources_allocator); + pipeline_info->vk_pipeline = vk_pipeline; + + return RaytracingPipelineID(pipeline_info); +} + +void RenderingDeviceDriverVulkan::raytracing_pipeline_free(RaytracingPipelineID p_pipeline) { + RaytracingPipelineInfo *pipeline_info = (RaytracingPipelineInfo *)p_pipeline.id; + if (pipeline_info->vk_pipeline) { + vkDestroyPipeline(vk_device, pipeline_info->vk_pipeline, nullptr); + } + VersatileResource::free(resources_allocator, pipeline_info); +} + /*****************/ /**** QUERIES ****/ /*****************/ @@ -5500,6 +6038,10 @@ void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver case OBJECT_TYPE_PIPELINE: { _set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name); } break; + case OBJECT_TYPE_ACCELERATION_STRUCTURE: { + const AccelerationStructureInfo *asi = (const AccelerationStructureInfo *)p_driver_id.id; + _set_object_name(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)asi->vk_acceleration_structure, p_name); + } break; default: { DEV_ASSERT(false); } @@ -5674,6 +6216,8 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_RAYTRACING: + return raytracing_capabilities.buffer_device_address_support && raytracing_capabilities.acceleration_structure_support && raytracing_capabilities.raytracing_pipeline_support; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 06cd2a31be6e..309851669eb3 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -100,6 +100,17 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { bool storage_input_output_16 = false; }; + struct RaytracingCapabilities { + bool buffer_device_address_support = false; + bool acceleration_structure_support = false; + bool raytracing_pipeline_support = false; + uint32_t shader_group_handle_size = 0; + uint32_t shader_group_handle_alignment = 0; + uint32_t shader_group_handle_size_aligned = 0; + uint32_t shader_group_base_alignment = 0; + bool validation = false; + }; + struct DeviceFunctions { PFN_vkCreateSwapchainKHR CreateSwapchainKHR = nullptr; PFN_vkDestroySwapchainKHR DestroySwapchainKHR = nullptr; @@ -116,6 +127,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { // Debug device fault. PFN_vkGetDeviceFaultInfoEXT GetDeviceFaultInfoEXT = nullptr; + + // Raytracing extensions. + PFN_vkCreateAccelerationStructureKHR CreateAccelerationStructureKHR = nullptr; + PFN_vkCreateRayTracingPipelinesKHR CreateRaytracingPipelinesKHR = nullptr; }; // Debug marker extensions. VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type); @@ -138,6 +153,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VRSCapabilities vrs_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + RaytracingCapabilities raytracing_capabilities; bool pipeline_cache_control_support = false; bool device_fault_support = false; #if defined(VK_TRACK_DEVICE_MEMORY) @@ -198,6 +214,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VkBufferView vk_view = VK_NULL_HANDLE; // For texel buffers. }; +private: + VkDeviceAddress _buffer_get_device_address(BufferID p_buffer); + +public: virtual BufferID buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type) override final; virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final; virtual void buffer_free(BufferID p_buffer) override final; @@ -424,7 +444,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; uint32_t specialization_constants_count = 0; - uint32_t is_compute = 0; + PipelineType pipeline_type = PipelineType::RASTERIZATION; uint32_t compute_local_size[3] = {}; uint32_t set_count = 0; uint32_t push_constant_size = 0; @@ -434,11 +454,28 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { }; }; + struct RaytracingShaderRegions { + VkStridedDeviceAddressRegionKHR raygen; + uint32_t raygen_count = 0; + VkStridedDeviceAddressRegionKHR miss; + uint32_t miss_count = 0; + VkStridedDeviceAddressRegionKHR closest_hit; + uint32_t closest_hit_count = 0; + VkStridedDeviceAddressRegionKHR call; + uint32_t group_count = 0; + + // Size of one shader group handle + LocalVector handles_data; + }; + struct ShaderInfo { VkShaderStageFlags vk_push_constant_stages = 0; TightLocalVector vk_stages_create_info; + TightLocalVector vk_groups_create_info; TightLocalVector vk_descriptor_set_layouts; VkPipelineLayout vk_pipeline_layout = VK_NULL_HANDLE; + RaytracingShaderRegions regions; + BufferID sbt_buffer; }; public: @@ -626,6 +663,50 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + /********************/ + /**** RAYTRACING ****/ + /********************/ + struct AccelerationStructureInfo { + VkAccelerationStructureKHR vk_acceleration_structure = VK_NULL_HANDLE; + // Buffer used for the structure + RDD::BufferID buffer; + // Buffer used for building the structure + RDD::BufferID scratch_buffer; + // Buffer used for instances in a TLAS + RDD::BufferID instances_buffer; + + // Required for building + VkAccelerationStructureGeometryKHR geometry; + LocalVector instances; + VkAccelerationStructureBuildGeometryInfoKHR build_info; + VkAccelerationStructureBuildRangeInfoKHR range_info; + }; + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BufferID p_transform_buffer, uint64_t p_transform_offset) override final; + virtual AccelerationStructureID tlas_create(const LocalVector &p_blases) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + +private: + void _acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info); + +public: + // ----- PIPELINE ----- + + struct RaytracingPipelineInfo { + VkPipeline vk_pipeline; + }; + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) override final; + +public: /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/editor/plugins/shader_file_editor_plugin.cpp b/editor/plugins/shader_file_editor_plugin.cpp index d2fd9b1cc05a..af75de57560f 100644 --- a/editor/plugins/shader_file_editor_plugin.cpp +++ b/editor/plugins/shader_file_editor_plugin.cpp @@ -268,7 +268,10 @@ ShaderFileEditor::ShaderFileEditor() { "Fragment", "TessControl", "TessEval", - "Compute" + "Compute", + "Raygen", + "Miss", + "ClosestHit", }; stage_hb = memnew(HBoxContainer); diff --git a/gles3_builders.py b/gles3_builders.py index a81d42b42e23..3204fffdb606 100644 --- a/gles3_builders.py +++ b/gles3_builders.py @@ -10,6 +10,9 @@ class GLES3HeaderStruct: def __init__(self): self.vertex_lines = [] self.fragment_lines = [] + self.raygen_lines = [] + self.miss_lines = [] + self.closest_hit_lines = [] self.uniforms = [] self.fbos = [] self.texunits = [] @@ -25,6 +28,9 @@ def __init__(self): self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 + self.raygen_offset = 0 + self.miss_offset = 0 + self.closest_hit_offset = 0 self.variant_defines = [] self.variant_names = [] self.specialization_names = [] @@ -88,6 +94,27 @@ def include_file_in_gles3_header(filename: str, header_data: GLES3HeaderStruct, header_data.fragment_offset = header_data.line_offset continue + if line.find("#[raygen]") != -1: + header_data.reading = "raygen" + line = fs.readline() + header_data.line_offset += 1 + header_data.raygen_offset = header_data.line_offset + continue + + if line.find("#[miss]") != -1: + header_data.reading = "miss" + line = fs.readline() + header_data.line_offset += 1 + header_data.miss_offset = header_data.line_offset + continue + + if line.find("#[closest_hit]") != -1: + header_data.reading = "closest_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.closest_hit_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -182,6 +209,12 @@ def include_file_in_gles3_header(filename: str, header_data: GLES3HeaderStruct, header_data.vertex_lines += [line] if header_data.reading == "fragment": header_data.fragment_lines += [line] + if header_data.reading == "raygen": + header_data.raygen_lines += [line] + if header_data.reading == "miss": + header_data.miss_lines += [line] + if header_data.reading == "closest_hit": + header_data.closest_hit_lines += [line] line = fs.readline() header_data.line_offset += 1 diff --git a/glsl_builders.py b/glsl_builders.py index 82c15fc93bee..7b797f8a0c89 100644 --- a/glsl_builders.py +++ b/glsl_builders.py @@ -11,16 +11,25 @@ def __init__(self): self.vertex_lines = [] self.fragment_lines = [] self.compute_lines = [] + self.raygen_lines = [] + self.miss_lines = [] + self.closest_hit_lines = [] self.vertex_included_files = [] self.fragment_included_files = [] self.compute_included_files = [] + self.raygen_included_files = [] + self.miss_included_files = [] + self.closest_hit_included_files = [] self.reading = "" self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 self.compute_offset = 0 + self.raygen_offset = 0 + self.miss_offset = 0 + self.closest_hit_offset = 0 def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: int) -> RDHeaderStruct: @@ -53,6 +62,27 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.compute_offset = header_data.line_offset continue + if line.find("#[raygen]") != -1: + header_data.reading = "raygen" + line = fs.readline() + header_data.line_offset += 1 + header_data.raygen_offset = header_data.line_offset + continue + + if line.find("#[miss]") != -1: + header_data.reading = "miss" + line = fs.readline() + header_data.line_offset += 1 + header_data.miss_offset = header_data.line_offset + continue + + if line.find("#[closest_hit]") != -1: + header_data.reading = "closest_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.closest_hit_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -74,6 +104,18 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.compute_included_files += [included_file] if include_file_in_rd_header(included_file, header_data, depth + 1) is None: print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.raygen_included_files and header_data.reading == "raygen": + header_data.raygen_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.miss_included_files and header_data.reading == "miss": + header_data.miss_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.closest_hit_included_files and header_data.reading == "closest_hit": + header_data.closest_hit_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') line = fs.readline() @@ -85,6 +127,12 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.fragment_lines += [line] if header_data.reading == "compute": header_data.compute_lines += [line] + if header_data.reading == "raygen": + header_data.raygen_lines += [line] + if header_data.reading == "miss": + header_data.miss_lines += [line] + if header_data.reading == "closest_hit": + header_data.closest_hit_lines += [line] line = fs.readline() header_data.line_offset += 1 @@ -109,7 +157,14 @@ def build_rd_header( out_file_ifdef = out_file_base.replace(".", "_").upper() out_file_class = out_file_base.replace(".glsl.gen.h", "").title().replace("_", "").replace(".", "") + "ShaderRD" - if header_data.compute_lines: + if header_data.raygen_lines: + body_parts = [ + "static const char _raygen_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.raygen_lines), + "static const char _miss_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.miss_lines), + "static const char _closest_hit_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.closest_hit_lines), + f'setup_raytracing(_raygen_code, _miss_code, _closest_hit_code, "{out_file_class}");', + ] + elif header_data.compute_lines: body_parts = [ "static const char _compute_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.compute_lines), f'setup(nullptr, nullptr, _compute_code, "{out_file_class}");', diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index 81505f716a2d..25fc42fe7ac2 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -48,7 +48,10 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage EShLangFragment, EShLangTessControl, EShLangTessEvaluation, - EShLangCompute + EShLangCompute, + EShLangRayGen, + EShLangMiss, + EShLangClosestHit, }; int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100 diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 6234cddee393..ada76b0e8ad7 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -66,6 +66,15 @@ void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) { case STAGE_TYPE_COMPUTE: chunk.type = StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS; break; + case STAGE_TYPE_RAYGEN: + chunk.type = StageTemplate::Chunk::TYPE_RAYGEN_GLOBALS; + break; + case STAGE_TYPE_MISS: + chunk.type = StageTemplate::Chunk::TYPE_MISS_GLOBALS; + break; + case STAGE_TYPE_CLOSEST_HIT: + chunk.type = StageTemplate::Chunk::TYPE_CLOSEST_HIT_GLOBALS; + break; default: { } } @@ -108,9 +117,9 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con if (p_compute_code) { _add_stage(p_compute_code, STAGE_TYPE_COMPUTE); - is_compute = true; + pipeline_type = RD::PipelineType::COMPUTE; } else { - is_compute = false; + pipeline_type = RD::PipelineType::RASTERIZATION; if (p_vertex_code) { _add_stage(p_vertex_code, STAGE_TYPE_VERTEX); } @@ -138,6 +147,39 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con base_sha256 = tohash.as_string().sha256_text(); } +void ShaderRD::setup_raytracing(const char *p_raygen_code, const char *p_miss_code, const char *p_closest_hit_code, const char *p_name) { + name = p_name; + + pipeline_type = RD::PipelineType::RAYTRACING; + if (p_raygen_code) { + _add_stage(p_raygen_code, STAGE_TYPE_RAYGEN); + } + if (p_miss_code) { + _add_stage(p_miss_code, STAGE_TYPE_MISS); + } + if (p_closest_hit_code) { + _add_stage(p_closest_hit_code, STAGE_TYPE_CLOSEST_HIT); + } + + StringBuilder tohash; + tohash.append("[GodotVersionNumber]"); + tohash.append(VERSION_NUMBER); + tohash.append("[GodotVersionHash]"); + tohash.append(VERSION_HASH); + tohash.append("[SpirvCacheKey]"); + tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key()); + tohash.append("[BinaryCacheKey]"); + tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key()); + tohash.append("[Raygen]"); + tohash.append(p_raygen_code ? p_raygen_code : ""); + tohash.append("[Miss]"); + tohash.append(p_miss_code ? p_miss_code : ""); + tohash.append("[ClosestHit]"); + tohash.append(p_closest_hit_code ? p_closest_hit_code : ""); + + base_sha256 = tohash.as_string().sha256_text(); +} + RID ShaderRD::version_create() { //initialize() was never called ERR_FAIL_COND_V(group_to_variant_map.is_empty(), RID()); @@ -220,6 +262,15 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c case StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS: { builder.append(p_version->compute_globals.get_data()); // compute globals } break; + case StageTemplate::Chunk::TYPE_RAYGEN_GLOBALS: { + builder.append(p_version->raygen_globals.get_data()); // raygen globals + } break; + case StageTemplate::Chunk::TYPE_MISS_GLOBALS: { + builder.append(p_version->miss_globals.get_data()); // miss globals + } break; + case StageTemplate::Chunk::TYPE_CLOSEST_HIT_GLOBALS: { + builder.append(p_version->closest_hit_globals.get_data()); // closest_hit globals + } break; case StageTemplate::Chunk::TYPE_CODE: { if (p_version->code_sections.has(chunk.code)) { builder.append(p_version->code_sections[chunk.code].get_data()); @@ -246,7 +297,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; bool build_ok = true; - if (!is_compute) { + if (pipeline_type == RD::PipelineType::RASTERIZATION) { //vertex stage StringBuilder builder; @@ -263,7 +314,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } - if (!is_compute && build_ok) { + if (pipeline_type == RD::PipelineType::RASTERIZATION && build_ok) { //fragment stage current_stage = RD::SHADER_STAGE_FRAGMENT; @@ -281,7 +332,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } - if (is_compute) { + if (pipeline_type == RD::PipelineType::COMPUTE) { //compute stage current_stage = RD::SHADER_STAGE_COMPUTE; @@ -300,9 +351,87 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } + if (pipeline_type == RD::PipelineType::RAYTRACING) { + { + current_stage = RD::SHADER_STAGE_RAYGEN; + + StringBuilder builder; + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_RAYGEN]); + + current_source = builder.as_string(); + + RD::ShaderStageSPIRVData stage; + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_RAYGEN, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spirv.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_RAYGEN; + stages.push_back(stage); + } + } + if (build_ok) { + current_stage = RD::SHADER_STAGE_MISS; + + StringBuilder builder; + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_MISS]); + + current_source = builder.as_string(); + + RD::ShaderStageSPIRVData stage; + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_MISS, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spirv.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_MISS; + stages.push_back(stage); + } + } + if (build_ok) { + current_stage = RD::SHADER_STAGE_CLOSEST_HIT; + + StringBuilder builder; + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_CLOSEST_HIT]); + + current_source = builder.as_string(); + + RD::ShaderStageSPIRVData stage; + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_CLOSEST_HIT, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spirv.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_CLOSEST_HIT; + stages.push_back(stage); + } + } + } + if (!build_ok) { MutexLock lock(variant_set_mutex); //properly print the errors - ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(variant) + " (" + variant_defines[variant].text.get_data() + ")."); + String stage_string; + switch (current_stage) { + case RD::SHADER_STAGE_VERTEX: + stage_string = "Vertex"; + break; + case RD::SHADER_STAGE_FRAGMENT: + stage_string = "Fragment"; + break; + case RD::SHADER_STAGE_COMPUTE: + stage_string = "Compute"; + break; + case RD::SHADER_STAGE_RAYGEN: + stage_string = "Raygen"; + break; + case RD::SHADER_STAGE_MISS: + stage_string = "Miss"; + break; + case RD::SHADER_STAGE_CLOSEST_HIT: + stage_string = "ClosestHit"; + break; + default: + stage_string = "Unknown"; + break; + } + ERR_PRINT("Error compiling " + stage_string + " shader, variant #" + itos(variant) + " (" + variant_defines[variant].text.get_data() + ")."); ERR_PRINT(error); #ifdef DEBUG_ENABLED @@ -331,7 +460,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.resize(variant_defines.size()); for (int i = 0; i < source_code.versions.size(); i++) { - if (!is_compute) { + if (pipeline_type == RD::PipelineType::RASTERIZATION) { //vertex stage StringBuilder builder; @@ -344,7 +473,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } - if (!is_compute) { + if (pipeline_type == RD::PipelineType::RASTERIZATION) { //fragment stage StringBuilder builder; @@ -357,7 +486,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } - if (is_compute) { + if (pipeline_type == RD::PipelineType::COMPUTE) { //compute stage StringBuilder builder; @@ -369,6 +498,43 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } + + if (pipeline_type == RD::PipelineType::RAYTRACING) { + //raygen stage + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_RAYGEN]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "raygen"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PipelineType::RAYTRACING) { + // miss stage + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_MISS]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "miss"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PipelineType::RAYTRACING) { + // closest_hit stage + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_CLOSEST_HIT]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "closest_hit"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } } return source_code; @@ -385,6 +551,12 @@ String ShaderRD::_version_get_sha1(Version *p_version) const { hash_build.append(p_version->fragment_globals.get_data()); hash_build.append("[compute_globals]"); hash_build.append(p_version->compute_globals.get_data()); + hash_build.append("[raygen_globals]"); + hash_build.append(p_version->raygen_globals.get_data()); + hash_build.append("[miss_globals]"); + hash_build.append(p_version->miss_globals.get_data()); + hash_build.append("[closest_hit_globals]"); + hash_build.append(p_version->closest_hit_globals.get_data()); Vector code_sections; for (const KeyValue &E : p_version->code_sections) { @@ -585,7 +757,7 @@ void ShaderRD::_compile_ensure_finished(Version *p_version) { } void ShaderRD::version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines) { - ERR_FAIL_COND(is_compute); + ERR_FAIL_COND(pipeline_type != RD::PipelineType::RASTERIZATION); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); @@ -620,7 +792,7 @@ void ShaderRD::version_set_code(RID p_version, const HashMap &p_ } void ShaderRD::version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines) { - ERR_FAIL_COND(!is_compute); + ERR_FAIL_COND(pipeline_type != RD::PipelineType::COMPUTE); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); @@ -654,6 +826,41 @@ void ShaderRD::version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_raygen_globals, const String &p_miss_globals, const String &p_closest_hit_globals, const Vector &p_custom_defines) { + ERR_FAIL_COND(pipeline_type != RD::PipelineType::RAYTRACING); + + Version *version = version_owner.get_or_null(p_version); + ERR_FAIL_NULL(version); + + version->raygen_globals = p_raygen_globals.utf8(); + version->miss_globals = p_miss_globals.utf8(); + version->closest_hit_globals = p_closest_hit_globals.utf8(); + version->uniforms = p_uniforms.utf8(); + + version->code_sections.clear(); + for (const KeyValue &E : p_code) { + version->code_sections[StringName(E.key.to_upper())] = E.value.utf8(); + } + + version->custom_defines.clear(); + for (int i = 0; i < p_custom_defines.size(); i++) { + version->custom_defines.push_back(p_custom_defines[i].utf8()); + } + + version->dirty = true; + if (version->initialize_needed) { + _initialize_version(version); + for (int i = 0; i < group_enabled.size(); i++) { + if (!group_enabled[i]) { + _allocate_placeholders(version, i); + continue; + } + _compile_version_start(version, i); + } + version->initialize_needed = false; + } +} + bool ShaderRD::version_is_valid(RID p_version) { Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL_V(version, false); diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index 90e41947b937..e1d5d053103d 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -68,6 +68,9 @@ class ShaderRD { CharString vertex_globals; CharString compute_globals; CharString fragment_globals; + CharString raygen_globals; + CharString miss_globals; + CharString closest_hit_globals; HashMap code_sections; Vector custom_defines; Vector group_compilation_tasks; @@ -106,6 +109,9 @@ class ShaderRD { TYPE_VERTEX_GLOBALS, TYPE_FRAGMENT_GLOBALS, TYPE_COMPUTE_GLOBALS, + TYPE_RAYGEN_GLOBALS, + TYPE_MISS_GLOBALS, + TYPE_CLOSEST_HIT_GLOBALS, TYPE_CODE, TYPE_TEXT }; @@ -117,7 +123,7 @@ class ShaderRD { LocalVector chunks; }; - bool is_compute = false; + RD::PipelineType pipeline_type = RD::PipelineType::RASTERIZATION; String name; @@ -137,6 +143,9 @@ class ShaderRD { STAGE_TYPE_VERTEX, STAGE_TYPE_FRAGMENT, STAGE_TYPE_COMPUTE, + STAGE_TYPE_RAYGEN, + STAGE_TYPE_MISS, + STAGE_TYPE_CLOSEST_HIT, STAGE_TYPE_MAX, }; @@ -155,12 +164,14 @@ class ShaderRD { protected: ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); + void setup_raytracing(const char *p_raygen_code, const char *p_miss_code, const char *p_closest_hit_code, const char *p_name); public: RID version_create(); void version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines); void version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines); + void version_set_raytracing_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_raygen_globals, const String &p_miss_globals, const String &p_closest_hit_globals, const Vector &p_custom_defines); _FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) { ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID()); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index ab5de3cb7fd1..b17784c188cb 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -654,7 +654,13 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + if (p_usage.has_flag(STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS)) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + } + if (p_usage.has_flag(STORAGE_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY)) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + } + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Storage buffers are assumed to be mutable. @@ -2613,11 +2619,11 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Vertex buffers are assumed to be immutable unless they don't have initial data or they've been marked for storage explicitly. @@ -2785,8 +2791,8 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm index_buffer.max_index = 0xFFFFFFFF; #endif index_buffer.size = size_bytes; - index_buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_INDEX_BIT); - index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + index_buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_INDEX_BIT | RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!index_buffer.driver_id, RID()); // Index buffers are assumed to be immutable unless they don't have initial data. @@ -2935,6 +2941,11 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader case SHADER_STAGE_COMPUTE: shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); break; + case SHADER_STAGE_RAYGEN: + case SHADER_STAGE_MISS: + case SHADER_STAGE_CLOSEST_HIT: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT); + break; default: DEV_ASSERT(false && "Unknown shader stage."); break; @@ -2972,7 +2983,7 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Uniform buffers are assumed to be immutable unless they don't have initial data. @@ -3339,7 +3350,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p _check_transfer_worker_buffer(buffer); } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for compute shader (this is not allowed)."); + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::RASTERIZATION, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for non-render shader (this is not allowed)."); if (uniform.get_id_count() != (uint32_t)set_uniform.length) { if (set_uniform.length > 1) { @@ -3365,6 +3376,24 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p _check_transfer_worker_texture(texture); } } break; + case UNIFORM_TYPE_ACCELERATION_STRUCTURE: { + ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(), + "Acceleration structure supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided)."); + + RID accel_id = uniform.get_id(0); + AccelerationStructure *accel = acceleration_structure_owner.get_or_null(accel_id); + ERR_FAIL_NULL_V_MSG(accel, RID(), "Acceleration Structure supplied (binding: " + itos(uniform.binding) + ") is invalid."); + + if (accel->draw_tracker != nullptr) { + draw_trackers.push_back(accel->draw_tracker); + // Acceleration structure is never going to be writable from raytracing shaders + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ); + } else { + untracked_usage[accel_id] = RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ; + } + + driver_uniform.ids.push_back(accel->driver_id); + } break; default: { } } @@ -3424,7 +3453,8 @@ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_ // Needs a shader. Shader *shader = shader_owner.get_or_null(p_shader); ERR_FAIL_NULL_V(shader, RID()); - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "Compute shaders can't be used in render pipelines"); + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::RASTERIZATION, RID(), + "Only render shaders can be used in render pipelines"); FramebufferFormat fb_format; { @@ -3614,7 +3644,7 @@ RID RenderingDevice::compute_pipeline_create(RID p_shader, const Vectoris_compute, RID(), + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::COMPUTE, RID(), "Non-compute shaders can't be used in compute pipelines"); } @@ -3667,6 +3697,57 @@ bool RenderingDevice::compute_pipeline_is_valid(RID p_pipeline) { return compute_pipeline_owner.owns(p_pipeline); } +RID RenderingDevice::raytracing_pipeline_create(RID p_shader, const Vector &p_specialization_constants) { + _THREAD_SAFE_METHOD_ + + // Needs a shader. + Shader *shader = shader_owner.get_or_null(p_shader); + ERR_FAIL_NULL_V(shader, RID()); + + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::RAYTRACING, RID(), + "Only raytracing shaders can be used in raytracing pipelines"); + + for (int i = 0; i < shader->specialization_constants.size(); i++) { + const ShaderSpecializationConstant &sc = shader->specialization_constants[i]; + for (int j = 0; j < p_specialization_constants.size(); j++) { + const PipelineSpecializationConstant &psc = p_specialization_constants[j]; + if (psc.constant_id == sc.constant_id) { + ERR_FAIL_COND_V_MSG(psc.type != sc.type, RID(), "Specialization constant provided for id (" + itos(sc.constant_id) + ") is of the wrong type."); + break; + } + } + } + + RaytracingPipeline pipeline; + pipeline.driver_id = driver->raytracing_pipeline_create(shader->driver_id, p_specialization_constants); + ERR_FAIL_COND_V(!pipeline.driver_id, RID()); + + if (pipeline_cache_enabled) { + _update_pipeline_cache(); + } + + pipeline.shader = p_shader; + pipeline.shader_driver_id = shader->driver_id; + pipeline.shader_layout_hash = shader->layout_hash; + pipeline.set_formats = shader->set_formats; + pipeline.push_constant_size = shader->push_constant_size; + + // Create ID to associate with this pipeline. + RID id = raytracing_pipeline_owner.make_rid(pipeline); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + // Now add all the dependencies. + _add_dependency(id, p_shader); + return id; +} + +bool RenderingDevice::raytracing_pipeline_is_valid(RID p_pipeline) { + _THREAD_SAFE_METHOD_ + + return raytracing_pipeline_owner.owns(p_pipeline); +} + /****************/ /**** SCREEN ****/ /****************/ @@ -4280,6 +4361,78 @@ void RenderingDevice::draw_list_bind_index_array(DrawListID p_list, RID p_index_ } } +RID RenderingDevice::blas_create(RID p_vertex_array, RID p_index_array, RID p_transform_buffer, uint64_t p_transform_offset) { + ERR_RENDER_THREAD_GUARD_V(RID()); + + const VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array); + ERR_FAIL_NULL_V(vertex_array, RID()); + RDD::VertexFormatID vertex_format; + if (vertex_array->description != INVALID_ID) { + ERR_FAIL_COND_V(!vertex_formats.has(vertex_array->description), RID()); + vertex_format = vertex_formats[vertex_array->description].driver_id; + } + + // Indices are optional. + const IndexArray *index_array = index_array_owner.get_or_null(p_index_array); + RDD::BufferID index_buffer = RDD::BufferID(); + IndexBufferFormat index_format = IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT32; + uint32_t index_offset_bytes = 0; + uint32_t index_count = 0; + if (index_array) { + index_buffer = index_array->driver_id; + index_format = index_array->format; + index_offset_bytes = index_array->offset * (index_array->format == INDEX_BUFFER_FORMAT_UINT16 ? sizeof(uint16_t) : sizeof(uint32_t)); + index_count = index_array->indices; + } + + const Buffer *transform_buffer = storage_buffer_owner.get_or_null(p_transform_buffer); + ERR_FAIL_NULL_V(transform_buffer, RID()); + ERR_FAIL_COND_V_MSG(!transform_buffer->usage.has_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT), RID(), "Transform buffer provided was not created for shader device address usage."); + ERR_FAIL_COND_V_MSG(!transform_buffer->usage.has_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT), RID(), "Transform buffer provided was not created for acceleration structure build input."); + + AccelerationStructure acceleration_structure; + acceleration_structure.type = RDD::ACCELERATION_STRUCTURE_TYPE_BLAS; + acceleration_structure.driver_id = driver->blas_create(vertex_array->buffers[0], vertex_array->offsets[0], vertex_format, vertex_array->vertex_count, index_buffer, index_format, index_offset_bytes, index_count, transform_buffer->driver_id, p_transform_offset); + ERR_FAIL_COND_V(!acceleration_structure.driver_id, RID()); + + acceleration_structure.draw_tracker = RDG::resource_tracker_create(); + acceleration_structure.draw_tracker->acceleration_structure_driver_id = acceleration_structure.driver_id; + + for (int i = 0; i < vertex_array->draw_trackers.size(); i++) { + draw_graph.add_raytracing_list_usage(vertex_array->draw_trackers[i], RDG::RESOURCE_USAGE_VERTEX_BUFFER_READ); + } + if (index_array && index_array->draw_tracker != nullptr) { + draw_graph.add_raytracing_list_usage(index_array->draw_tracker, RDG::RESOURCE_USAGE_INDEX_BUFFER_READ); + } + draw_graph.add_raytracing_list_usage(transform_buffer->draw_tracker, RDG::RESOURCE_USAGE_STORAGE_BUFFER_READ); + + RID id = acceleration_structure_owner.make_rid(acceleration_structure); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + +RID RenderingDevice::tlas_create(const Vector &p_blases) { + LocalVector blases; + for (Vector::ConstIterator itr = p_blases.begin(); itr != p_blases.end(); ++itr) { + const AccelerationStructure *blas = acceleration_structure_owner.get_or_null(*itr); + ERR_FAIL_NULL_V(blas, RID()); + blases.push_back(blas->driver_id); + } + + AccelerationStructure acceleration_structure; + acceleration_structure.type = RDD::ACCELERATION_STRUCTURE_TYPE_TLAS; + acceleration_structure.driver_id = driver->tlas_create(blases); + + ERR_FAIL_COND_V(!acceleration_structure.driver_id, RID()); + RID id = acceleration_structure_owner.make_rid(acceleration_structure); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + void RenderingDevice::draw_list_set_line_width(DrawListID p_list, float p_width) { ERR_RENDER_THREAD_GUARD(); @@ -4662,6 +4815,297 @@ void RenderingDevice::draw_list_end() { draw_list_bound_textures.clear(); } +/***************************/ +/**** RAYTRACING LISTS ****/ +/**************************/ + +RenderingDevice::RaytracingListID RenderingDevice::raytracing_list_begin() { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND_V_MSG(raytracing_list != nullptr, INVALID_ID, "Only one draw/compute/raytracing list can be active at the same time."); + + // Lock while raytracing_list is active. + _THREAD_SAFE_LOCK_ + + raytracing_list = memnew(RaytracingList); + + draw_graph.add_raytracing_list_begin(); + + return ID_TYPE_RAYTRACING_LIST; +} + +void RenderingDevice::raytracing_list_build_acceleration_structure(RaytracingListID p_list, RID p_acceleration_structure) { + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + const AccelerationStructure *accel = acceleration_structure_owner.get_or_null(p_acceleration_structure); + ERR_FAIL_NULL(accel); + + draw_graph.add_raytracing_list_build_acceleration_structure(accel->driver_id, accel->type); +} + +void RenderingDevice::raytracing_list_bind_raytracing_pipeline(RaytracingListID p_list, RID p_raytracing_pipeline) { + // Must be called within a raytracing list, the class mutex is locked during that time + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + RaytracingList *rl = raytracing_list; + + const RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_raytracing_pipeline); + ERR_FAIL_NULL(pipeline); + + if (p_raytracing_pipeline == rl->state.pipeline) { + return; // Redundant state, return. + } + + rl->state.pipeline = p_raytracing_pipeline; + rl->state.pipeline_driver_id = pipeline->driver_id; + + draw_graph.add_raytracing_list_bind_pipeline(pipeline->driver_id); + + if (rl->state.pipeline_shader != pipeline->shader) { + // Shader changed, so descriptor sets may become incompatible. + + uint32_t pcount = pipeline->set_formats.size(); // Formats count in this pipeline. + rl->state.set_count = MAX(rl->state.set_count, pcount); + const uint32_t *pformats = pipeline->set_formats.ptr(); // Pipeline set formats. + + uint32_t first_invalid_set = UINT32_MAX; // All valid by default. + switch (driver->api_trait_get(RDD::API_TRAIT_SHADER_CHANGE_INVALIDATION)) { + case RDD::SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS: { + first_invalid_set = 0; + } break; + case RDD::SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE: { + for (uint32_t i = 0; i < pcount; i++) { + if (rl->state.sets[i].pipeline_expected_format != pformats[i]) { + first_invalid_set = i; + break; + } + } + } break; + case RDD::SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH: { + if (rl->state.pipeline_shader_layout_hash != pipeline->shader_layout_hash) { + first_invalid_set = 0; + } + } break; + } + + for (uint32_t i = 0; i < pcount; i++) { + rl->state.sets[i].bound = rl->state.sets[i].bound && i < first_invalid_set; + rl->state.sets[i].pipeline_expected_format = pformats[i]; + } + + for (uint32_t i = pcount; i < rl->state.set_count; i++) { + // Unbind the ones above (not used) if exist. + rl->state.sets[i].bound = false; + } + + rl->state.set_count = pcount; // Update set count. + + if (pipeline->push_constant_size) { +#ifdef DEBUG_ENABLED + rl->validation.pipeline_push_constant_supplied = false; +#endif + } + + rl->state.pipeline_shader = pipeline->shader; + rl->state.pipeline_shader_driver_id = pipeline->shader_driver_id; + rl->state.pipeline_shader_layout_hash = pipeline->shader_layout_hash; + } + +#ifdef DEBUG_ENABLED + // Update raytracing pass pipeline info. + rl->validation.pipeline_active = true; + rl->validation.pipeline_push_constant_size = pipeline->push_constant_size; +#endif +} + +void RenderingDevice::raytracing_list_bind_uniform_set(RaytracingListID p_list, RID p_uniform_set, uint32_t p_index) { + // Must be called within a raytracing list, the class mutex is locked during that time + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + RaytracingList *rl = raytracing_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_index >= driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS) || p_index >= MAX_UNIFORM_SETS, + "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS)) + ")."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!rl->validation.active, "Submitted Raytracing Lists can no longer be modified."); +#endif + + UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set); + ERR_FAIL_NULL(uniform_set); + + if (p_index > rl->state.set_count) { + rl->state.set_count = p_index; + } + + rl->state.sets[p_index].uniform_set_driver_id = uniform_set->driver_id; // Update set pointer. + rl->state.sets[p_index].bound = false; // Needs rebind. + rl->state.sets[p_index].uniform_set_format = uniform_set->format; + rl->state.sets[p_index].uniform_set = p_uniform_set; + +#if 0 + { // Validate that textures bound are not attached as framebuffer bindings. + uint32_t attachable_count = uniform_set->attachable_textures.size(); + const RID *attachable_ptr = uniform_set->attachable_textures.ptr(); + uint32_t bound_count = draw_list_bound_textures.size(); + const RID *bound_ptr = draw_list_bound_textures.ptr(); + for (uint32_t i = 0; i < attachable_count; i++) { + for (uint32_t j = 0; j < bound_count; j++) { + ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j], + "Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed."); + } + } + } +#endif +} + +void RenderingDevice::raytracing_list_set_push_constant(RaytracingListID p_list, const void *p_data, uint32_t p_data_size) { + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + ERR_FAIL_COND_MSG(p_data_size > MAX_PUSH_CONSTANT_SIZE, "Push constants can't be bigger than 128 bytes to maintain compatibility."); + + RaytracingList *rl = raytracing_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!rl->validation.active, "Submitted Raytracing Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_data_size != rl->validation.pipeline_push_constant_size, + "This raytracing pipeline requires (" + itos(rl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")"); +#endif + + draw_graph.add_raytracing_list_set_push_constant(rl->state.pipeline_shader_driver_id, p_data, p_data_size); + + // Store it in the state in case we need to restart the raytracing list. + memcpy(rl->state.push_constant_data, p_data, p_data_size); + rl->state.push_constant_size = p_data_size; + +#ifdef DEBUG_ENABLED + rl->validation.pipeline_push_constant_supplied = true; +#endif +} + +void RenderingDevice::raytracing_list_trace_rays(RaytracingListID p_list, uint32_t p_width, uint32_t p_height) { + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + RaytracingList *rl = raytracing_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!rl->validation.active, "Submitted Raytracing Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!rl->validation.pipeline_active, "No raytracing pipeline was set before attempting to draw."); + + if (rl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!rl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + +#ifdef DEBUG_ENABLED + for (uint32_t i = 0; i < rl->state.set_count; i++) { + if (rl->state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + if (rl->state.sets[i].pipeline_expected_format != rl->state.sets[i].uniform_set_format) { + if (rl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline."); + } else if (uniform_set_owner.owns(rl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(rl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(rl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(rl->state.pipeline_shader)); + } + } + } +#endif + + // Prepare descriptor sets if the API doesn't use pipeline barriers. + if (!driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + for (uint32_t i = 0; i < rl->state.set_count; i++) { + if (rl->state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + draw_graph.add_raytracing_list_uniform_set_prepare_for_use(rl->state.pipeline_shader_driver_id, rl->state.sets[i].uniform_set_driver_id, i); + } + } + + // Bind descriptor sets. + for (uint32_t i = 0; i < rl->state.set_count; i++) { + if (rl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!rl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_raytracing_list_bind_uniform_set(rl->state.pipeline_shader_driver_id, rl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(rl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + + draw_graph.add_raytracing_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + rl->state.sets[i].bound = true; + } + } + + Shader *shader = shader_owner.get_or_null(rl->state.pipeline_shader); + ERR_FAIL_NULL(shader); + + draw_graph.add_raytracing_list_trace_rays(rl->state.pipeline_driver_id, shader->driver_id, p_width, p_height); + rl->state.trace_count++; +} + +void RenderingDevice::raytracing_list_add_barrier(RaytracingListID p_list) { + // Must be called within a raytracing list, the class mutex is locked during that time + + raytracing_list_barrier_state = raytracing_list->state; + raytracing_list_end(); + raytracing_list_begin(); + + if (raytracing_list_barrier_state.pipeline.is_valid()) { + raytracing_list_bind_raytracing_pipeline(p_list, raytracing_list_barrier_state.pipeline); + } + + for (uint32_t i = 0; i < raytracing_list_barrier_state.set_count; i++) { + if (raytracing_list_barrier_state.sets[i].uniform_set.is_valid()) { + raytracing_list_bind_uniform_set(p_list, raytracing_list_barrier_state.sets[i].uniform_set, i); + } + } + + if (raytracing_list_barrier_state.push_constant_size > 0) { + raytracing_list_set_push_constant(p_list, raytracing_list_barrier_state.push_constant_data, raytracing_list_barrier_state.push_constant_size); + } +} + +void RenderingDevice::raytracing_list_end() { + ERR_FAIL_NULL(raytracing_list); + + draw_graph.add_raytracing_list_end(); + + memdelete(raytracing_list); + raytracing_list = nullptr; + + // Raytracing_list is no longer active. + _THREAD_SAFE_UNLOCK_ +} + /***********************/ /**** COMPUTE LISTS ****/ /***********************/ @@ -5661,6 +6105,14 @@ void RenderingDevice::_free_internal(RID p_id) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline); compute_pipeline_owner.free(p_id); + } else if (acceleration_structure_owner.owns(p_id)) { + AccelerationStructure *acceleration_structure = acceleration_structure_owner.get_or_null(p_id); + frames[frame].acceleration_structures_to_dispose_of.push_back(*acceleration_structure); + acceleration_structure_owner.free(p_id); + } else if (raytracing_pipeline_owner.owns(p_id)) { + RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_id); + frames[frame].raytracing_pipelines_to_dispose_of.push_back(*pipeline); + raytracing_pipeline_owner.free(p_id); } else { #ifdef DEV_ENABLED ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()) + " " + resource_name); @@ -5711,6 +6163,12 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } else if (compute_pipeline_owner.owns(p_id)) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name); + } else if (acceleration_structure_owner.owns(p_id)) { + AccelerationStructure *acceleration_structure = acceleration_structure_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_ACCELERATION_STRUCTURE, acceleration_structure->driver_id, p_name); + } else if (raytracing_pipeline_owner.owns(p_id)) { + RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name); } else { ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id())); return; @@ -5819,6 +6277,22 @@ void RenderingDevice::_free_pending_resources(int p_frame) { frames[p_frame].compute_pipelines_to_dispose_of.pop_front(); } + while (frames[p_frame].raytracing_pipelines_to_dispose_of.front()) { + RaytracingPipeline *pipeline = &frames[p_frame].raytracing_pipelines_to_dispose_of.front()->get(); + + driver->raytracing_pipeline_free(pipeline->driver_id); + + frames[p_frame].raytracing_pipelines_to_dispose_of.pop_front(); + } + + // Acceleration structures. + while (frames[p_frame].acceleration_structures_to_dispose_of.front()) { + AccelerationStructure &acceleration_structure = frames[p_frame].acceleration_structures_to_dispose_of.front()->get(); + driver->acceleration_structure_free(acceleration_structure.driver_id); + + frames[p_frame].acceleration_structures_to_dispose_of.pop_front(); + } + // Uniform sets. while (frames[p_frame].uniform_sets_to_dispose_of.front()) { UniformSet *uniform_set = &frames[p_frame].uniform_sets_to_dispose_of.front()->get(); @@ -6106,8 +6580,10 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ String rendering_method; if (OS::get_singleton()->get_current_rendering_method() == "mobile") { rendering_method = "Forward Mobile"; - } else { + } else if (OS::get_singleton()->get_current_rendering_method() == "forward_plus") { rendering_method = "Forward+"; + } else { + rendering_method = "Raytracing"; } // Output our device version. @@ -6368,6 +6844,7 @@ void RenderingDevice::capture_timestamp(const String &p_name) { ERR_FAIL_COND_MSG(draw_list != nullptr && draw_list->state.draw_count > 0, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(compute_list != nullptr && compute_list->state.dispatch_count > 0, "Capturing timestamps during compute list creation is not allowed. Offending timestamp was: " + p_name); + ERR_FAIL_COND_MSG(raytracing_list != nullptr && raytracing_list->state.trace_count > 0, "Capturing timestamps during raytracing list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(frames[frame].timestamp_count >= max_timestamp_query_elements, vformat("Tried capturing more timestamps than the configured maximum (%d). You can increase this limit in the project settings under 'Debug/Settings' called 'Max Timestamp Query Elements'.", max_timestamp_query_elements)); draw_graph.add_capture_timestamp(frames[frame].timestamp_pool, frames[frame].timestamp_count); @@ -6760,6 +7237,12 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_compute_pipeline_create, DEFVAL(TypedArray())); ClassDB::bind_method(D_METHOD("compute_pipeline_is_valid", "compute_pipeline"), &RenderingDevice::compute_pipeline_is_valid); + ClassDB::bind_method(D_METHOD("raytracing_pipeline_create", "shader"), &RenderingDevice::_raytracing_pipeline_create, DEFVAL(TypedArray())); + ClassDB::bind_method(D_METHOD("raytracing_pipeline_is_valid", "raytracing_pipeline"), &RenderingDevice::raytracing_pipeline_is_valid); + + ClassDB::bind_method(D_METHOD("blas_create", "vertex_array", "index_array", "transform_buffer", "transform_offset"), &RenderingDevice::blas_create, DEFVAL(0)); + ClassDB::bind_method(D_METHOD("tlas_create", "blases"), &RenderingDevice::_tlas_create); + ClassDB::bind_method(D_METHOD("screen_get_width", "screen"), &RenderingDevice::screen_get_width, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_height", "screen"), &RenderingDevice::screen_get_height, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format", "screen"), &RenderingDevice::screen_get_framebuffer_format, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); @@ -6800,6 +7283,15 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_list_add_barrier", "compute_list"), &RenderingDevice::compute_list_add_barrier); ClassDB::bind_method(D_METHOD("compute_list_end"), &RenderingDevice::compute_list_end); + ClassDB::bind_method(D_METHOD("raytracing_list_begin"), &RenderingDevice::raytracing_list_begin); + ClassDB::bind_method(D_METHOD("raytracing_list_build_acceleration_structure", "raytracing_list", "acceleration_structure"), &RenderingDevice::raytracing_list_build_acceleration_structure); + ClassDB::bind_method(D_METHOD("raytracing_list_bind_raytracing_pipeline", "raytracing_list", "raytracing_pipeline"), &RenderingDevice::raytracing_list_bind_raytracing_pipeline); + ClassDB::bind_method(D_METHOD("raytracing_list_set_push_constant", "raytracing_list", "buffer", "size_bytes"), &RenderingDevice::_raytracing_list_set_push_constant); + ClassDB::bind_method(D_METHOD("raytracing_list_bind_uniform_set", "raytracing_list", "uniform_set", "set_index"), &RenderingDevice::raytracing_list_bind_uniform_set); + ClassDB::bind_method(D_METHOD("raytracing_list_trace_rays", "raytracing_list", "width", "height"), &RenderingDevice::raytracing_list_trace_rays); + ClassDB::bind_method(D_METHOD("raytracing_list_add_barrier", "raytracing_list"), &RenderingDevice::raytracing_list_add_barrier); + ClassDB::bind_method(D_METHOD("raytracing_list_end"), &RenderingDevice::raytracing_list_end); + ClassDB::bind_method(D_METHOD("free_rid", "rid"), &RenderingDevice::free); ClassDB::bind_method(D_METHOD("capture_timestamp", "name"), &RenderingDevice::capture_timestamp); @@ -7183,6 +7675,8 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(INDEX_BUFFER_FORMAT_UINT32); BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); + BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS); + BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY); BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER); //for sampling only (sampler GLSL type) BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER_WITH_TEXTURE); // for sampling only); but includes a texture); (samplerXX GLSL type)); first a sampler then a texture @@ -7194,6 +7688,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(UNIFORM_TYPE_UNIFORM_BUFFER); //regular uniform buffer (or UBO). BIND_ENUM_CONSTANT(UNIFORM_TYPE_STORAGE_BUFFER); //storage buffer ("buffer" qualifier) like UBO); but supports storage); for compute mostly BIND_ENUM_CONSTANT(UNIFORM_TYPE_INPUT_ATTACHMENT); //used for sub-pass read/write); for mobile mostly + BIND_ENUM_CONSTANT(UNIFORM_TYPE_ACCELERATION_STRUCTURE); //acceleration structure (TLAS)); for raytracing BIND_ENUM_CONSTANT(UNIFORM_TYPE_MAX); BIND_ENUM_CONSTANT(RENDER_PRIMITIVE_POINTS); @@ -7315,12 +7810,18 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE); + BIND_ENUM_CONSTANT(SHADER_STAGE_RAYGEN); + BIND_ENUM_CONSTANT(SHADER_STAGE_MISS); + BIND_ENUM_CONSTANT(SHADER_STAGE_CLOSEST_HIT); BIND_ENUM_CONSTANT(SHADER_STAGE_MAX); BIND_ENUM_CONSTANT(SHADER_STAGE_VERTEX_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_FRAGMENT_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_RAYGEN_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MISS_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_CLOSEST_HIT_BIT); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_GLSL); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_HLSL); @@ -7597,6 +8098,11 @@ Error RenderingDevice::_buffer_update_bind(RID p_buffer, uint32_t p_offset, uint return buffer_update(p_buffer, p_offset, p_size, p_data.ptr()); } +RID RenderingDevice::_tlas_create(const TypedArray &p_blases) { + Vector blases = Variant(p_blases); + return tlas_create(blases); +} + static Vector _get_spec_constants(const TypedArray &p_constants) { Vector ret; ret.resize(p_constants.size()); @@ -7665,6 +8171,10 @@ RID RenderingDevice::_compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants = TypedArray()) { + return raytracing_pipeline_create(p_shader, _get_spec_constants(p_specialization_constants)); +} + #ifndef DISABLE_DEPRECATED Vector RenderingDevice::_draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures) { ERR_FAIL_V_MSG(Vector(), "Deprecated. Split draw lists are used automatically by RenderingDevice."); @@ -7684,3 +8194,8 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons ERR_FAIL_COND(p_data_size > (uint32_t)p_data.size()); compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size); } + +void RenderingDevice::_raytracing_list_set_push_constant(RaytracingListID p_list, const Vector &p_data, uint32_t p_data_size) { + ERR_FAIL_COND(p_data_size > (uint32_t)p_data.size()); + raytracing_list_set_push_constant(p_list, p_data.ptr(), p_data_size); +} diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index ccfe51043b4d..82d8f3cf73f8 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -75,6 +75,7 @@ class RenderingDevice : public RenderingDeviceCommons { typedef int64_t DrawListID; typedef int64_t ComputeListID; + typedef int64_t RaytracingListID; typedef String (*ShaderSPIRVGetCacheKeyFunction)(const RenderingDevice *p_render_device); typedef Vector (*ShaderCompileToSPIRVFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, const RenderingDevice *p_render_device); @@ -118,6 +119,7 @@ class RenderingDevice : public RenderingDeviceCommons { ID_TYPE_VERTEX_FORMAT, ID_TYPE_DRAW_LIST, ID_TYPE_COMPUTE_LIST = 4, + ID_TYPE_RAYTRACING_LIST = 5, ID_TYPE_MAX, ID_BASE_SHIFT = 58, // 5 bits for ID types. ID_MASK = (ID_BASE_SHIFT - 1), @@ -815,7 +817,7 @@ class RenderingDevice : public RenderingDeviceCommons { #ifndef DISABLE_DEPRECATED public: - enum BarrierMask { + enum BarrierMask{ BARRIER_MASK_VERTEX = 1, BARRIER_MASK_FRAGMENT = 8, BARRIER_MASK_COMPUTE = 2, @@ -887,6 +889,8 @@ class RenderingDevice : public RenderingDeviceCommons { enum StorageBufferUsage { STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT = 1, + STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS = (1 << 1), + STORAGE_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY = (1 << 2), }; RID uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_data = Vector()); @@ -1069,6 +1073,9 @@ class RenderingDevice : public RenderingDeviceCommons { RID compute_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); bool compute_pipeline_is_valid(RID p_pipeline); + RID raytracing_pipeline_create(RID p_shader); + bool raytracing_pipeline_is_valid(RID p_pipeline); + private: /****************/ /**** SCREEN ****/ @@ -1197,6 +1204,92 @@ class RenderingDevice : public RenderingDeviceCommons { void draw_list_end(); +private: + /***********************/ + /***** RAYTRACING ******/ + /***********************/ + struct AccelerationStructure { + RDD::AccelerationStructureID driver_id; + RDD::AccelerationStructureType type = RDD::ACCELERATION_STRUCTURE_TYPE_BLAS; + RDG::ResourceTracker *draw_tracker = nullptr; + }; + + RID_Owner acceleration_structure_owner; + +public: + RID blas_create(RID p_vertex_array, RID p_index_array, RID p_transform_buffer, uint64_t p_transform_offset); + RID tlas_create(const Vector &blases); + + struct RaytracingPipeline { + RID shader; + RDD::ShaderID shader_driver_id; + uint32_t shader_layout_hash = 0; + Vector set_formats; + RDD::RaytracingPipelineID driver_id; + uint32_t push_constant_size = 0; + }; + + RID raytracing_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); + +private: + RID_Owner raytracing_pipeline_owner; + + /**************************/ + /**** RAYTRACING LISTS ****/ + /**************************/ + + struct RaytracingList { + struct SetState { + uint32_t pipeline_expected_format = 0; + uint32_t uniform_set_format = 0; + RDD::UniformSetID uniform_set_driver_id; + RID uniform_set; + bool bound = false; + }; + + struct State { + SetState sets[MAX_UNIFORM_SETS]; + uint32_t set_count = 0; + RID pipeline; + RDD::RaytracingPipelineID pipeline_driver_id; + RID pipeline_shader; + RDD::ShaderID pipeline_shader_driver_id; + uint32_t pipeline_shader_layout_hash = 0; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE] = {}; + uint32_t push_constant_size = 0; + uint32_t trace_count = 0; + } state; + +#ifdef DEBUG_ENABLED + struct Validation { + bool active = true; // Means command buffer was not closed, so you can keep adding things. + Vector set_formats; + Vector set_bound; + Vector set_rids; + // Last pipeline set values. + bool pipeline_active = false; + RID pipeline_shader; + uint32_t invalid_set_from = 0; + uint32_t pipeline_push_constant_size = 0; + bool pipeline_push_constant_supplied = false; + } validation; +#endif + }; + + RaytracingList *raytracing_list = nullptr; + RaytracingList::State raytracing_list_barrier_state; + +public: + RaytracingListID raytracing_list_begin(); + void raytracing_list_build_acceleration_structure(RaytracingListID p_list, RID p_acceleration_structure); + void raytracing_list_bind_raytracing_pipeline(RaytracingListID p_list, RID p_raytracing_pipeline); + void raytracing_list_bind_uniform_set(RaytracingListID p_list, RID p_uniform_set, uint32_t p_index); + void raytracing_list_set_push_constant(RaytracingListID p_list, const void *p_data, uint32_t p_data_size); + void raytracing_list_trace_rays(RaytracingListID p_list, uint32_t p_width, uint32_t p_height); + void raytracing_list_add_barrier(RaytracingListID p_list); + + void raytracing_list_end(); + private: /***********************/ /**** COMPUTE LISTS ****/ @@ -1358,6 +1451,8 @@ class RenderingDevice : public RenderingDeviceCommons { List uniform_sets_to_dispose_of; List render_pipelines_to_dispose_of; List compute_pipelines_to_dispose_of; + List acceleration_structures_to_dispose_of; + List raytracing_pipelines_to_dispose_of; // The command pool used by the command buffer. RDD::CommandPoolID command_pool; @@ -1535,11 +1630,15 @@ class RenderingDevice : public RenderingDeviceCommons { Error _buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data); + RID _tlas_create(const TypedArray &p_blases); + RID _render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref &p_rasterization_state, const Ref &p_multisample_state, const Ref &p_depth_stencil_state, const Ref &p_blend_state, BitField p_dynamic_state_flags, uint32_t p_for_render_pass, const TypedArray &p_specialization_constants); RID _compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); + RID _raytracing_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); void _draw_list_set_push_constant(DrawListID p_list, const Vector &p_data, uint32_t p_data_size); void _compute_list_set_push_constant(ComputeListID p_list, const Vector &p_data, uint32_t p_data_size); + void _raytracing_list_set_push_constant(RaytracingListID p_list, const Vector &p_data, uint32_t p_data_size); }; VARIANT_ENUM_CAST(RenderingDevice::DeviceType) diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index e41a56b0a32e..f12c334d536d 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -39,7 +39,7 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String Vector lines = p_text.split("\n"); bool reading_versions = false; - bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false }; + bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false, false, false, false }; RD::ShaderStage stage = RD::SHADER_STAGE_MAX; static const char *stage_str[RD::SHADER_STAGE_MAX] = { "vertex", @@ -47,6 +47,9 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String "tesselation_control", "tesselation_evaluation", "compute", + "raygen", + "miss", + "closest_hit", }; String stage_code[RD::SHADER_STAGE_MAX]; int stages_found = 0; diff --git a/servers/rendering/rendering_device_binds.h b/servers/rendering/rendering_device_binds.h index 4d9b56508000..9243e97a5e6e 100644 --- a/servers/rendering/rendering_device_binds.h +++ b/servers/rendering/rendering_device_binds.h @@ -262,6 +262,9 @@ class RDShaderSource : public RefCounted { ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_control"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_evaluation"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_compute"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_raygen"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_miss"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_closest_hit"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_CLOSEST_HIT); ADD_GROUP("Syntax", "source_"); ADD_PROPERTY(PropertyInfo(Variant::INT, "language", PROPERTY_HINT_RANGE, "GLSL,HLSL"), "set_language", "get_language"); } @@ -321,12 +324,18 @@ class RDShaderSPIRV : public Resource { ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_control"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_evaluation"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_compute"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_raygen"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_miss"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_closest_hit"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_CLOSEST_HIT); ADD_GROUP("Compile Error", "compile_error_"); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_vertex"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_VERTEX); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_fragment"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_FRAGMENT); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_control"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_evaluation"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_compute"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_raygen"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_miss"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_closest_hit"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_CLOSEST_HIT); } }; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index d516d968af62..ad61946a0029 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -504,12 +504,18 @@ class RenderingDeviceCommons : public Object { SHADER_STAGE_TESSELATION_CONTROL, SHADER_STAGE_TESSELATION_EVALUATION, SHADER_STAGE_COMPUTE, + SHADER_STAGE_RAYGEN, + SHADER_STAGE_MISS, + SHADER_STAGE_CLOSEST_HIT, SHADER_STAGE_MAX, SHADER_STAGE_VERTEX_BIT = (1 << SHADER_STAGE_VERTEX), SHADER_STAGE_FRAGMENT_BIT = (1 << SHADER_STAGE_FRAGMENT), SHADER_STAGE_TESSELATION_CONTROL_BIT = (1 << SHADER_STAGE_TESSELATION_CONTROL), SHADER_STAGE_TESSELATION_EVALUATION_BIT = (1 << SHADER_STAGE_TESSELATION_EVALUATION), SHADER_STAGE_COMPUTE_BIT = (1 << SHADER_STAGE_COMPUTE), + SHADER_STAGE_RAYGEN_BIT = (1 << SHADER_STAGE_RAYGEN), + SHADER_STAGE_MISS_BIT = (1 << SHADER_STAGE_MISS), + SHADER_STAGE_CLOSEST_HIT_BIT = (1 << SHADER_STAGE_CLOSEST_HIT), }; struct ShaderStageSPIRVData { @@ -534,6 +540,7 @@ class RenderingDeviceCommons : public Object { UNIFORM_TYPE_UNIFORM_BUFFER, // Regular uniform buffer (or UBO). UNIFORM_TYPE_STORAGE_BUFFER, // Storage buffer ("buffer" qualifier) like UBO, but supports storage, for compute mostly. UNIFORM_TYPE_INPUT_ATTACHMENT, // Used for sub-pass read/write, for mobile mostly. + UNIFORM_TYPE_ACCELERATION_STRUCTURE, // Bounding Volume Hierarchy (Top + Bottom Level acceleration structures), for raytracing only. UNIFORM_TYPE_MAX }; @@ -563,6 +570,12 @@ class RenderingDeviceCommons : public Object { // ----- PIPELINE ----- + enum PipelineType { + RASTERIZATION, + COMPUTE, + RAYTRACING, + }; + enum RenderPrimitive { RENDER_PRIMITIVE_POINTS, RENDER_PRIMITIVE_LINES, @@ -858,6 +871,7 @@ class RenderingDeviceCommons : public Object { SUPPORTS_ATTACHMENT_VRS, // If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, + SUPPORTS_RAYTRACING, }; enum SubgroupOperations { @@ -959,7 +973,7 @@ class RenderingDeviceCommons : public Object { struct ShaderDescription { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; - bool is_compute = false; + PipelineType pipeline_type = PipelineType::RASTERIZATION; uint32_t compute_local_size[3] = {}; uint32_t push_constant_size = 0; diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index c1a3f34af895..0ccecce084f4 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -44,10 +44,14 @@ Error RenderingDeviceDriver::_reflect_spirv(VectorView p_s ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { - r_reflection.is_compute = true; + r_reflection.pipeline_type = PipelineType::COMPUTE; ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, "Compute shaders can only receive one stage, dedicated to compute."); } + if (p_spirv[i].shader_stage == SHADER_STAGE_RAYGEN || p_spirv[i].shader_stage == SHADER_STAGE_MISS || p_spirv[i].shader_stage == SHADER_STAGE_CLOSEST_HIT) { + r_reflection.pipeline_type = PipelineType::RAYTRACING; + } + ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); @@ -58,7 +62,7 @@ Error RenderingDeviceDriver::_reflect_spirv(VectorView p_s ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed parsing shader."); - if (r_reflection.is_compute) { + if (r_reflection.pipeline_type == PipelineType::COMPUTE) { r_reflection.compute_local_size[0] = module.entry_points->local_size.x; r_reflection.compute_local_size[1] = module.entry_points->local_size.y; r_reflection.compute_local_size[2] = module.entry_points->local_size.z; @@ -136,8 +140,7 @@ Error RenderingDeviceDriver::_reflect_spirv(VectorView p_s need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { - ERR_PRINT("Acceleration structure not supported."); - continue; + uniform.type = UNIFORM_TYPE_ACCELERATION_STRUCTURE; } break; } diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index d2d14676db3f..24e6a2242cab 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -160,6 +160,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { DEFINE_ID(QueryPool); DEFINE_ID(Fence); DEFINE_ID(Semaphore); + DEFINE_ID(AccelerationStructure); + DEFINE_ID(RaytracingPipeline); public: /*****************/ @@ -190,6 +192,10 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BUFFER_USAGE_INDEX_BIT = (1 << 6), BUFFER_USAGE_VERTEX_BIT = (1 << 7), BUFFER_USAGE_INDIRECT_BIT = (1 << 8), + BUFFER_USAGE_SHADER_BINDING_TABLE_BIT = (1 << 10), + BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT = 0x00020000, + BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT = 0x00080000, + BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT = 0x00100000, }; enum { @@ -325,6 +331,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_RAY_TRACING_SHADER_BIT = 0x00200000, + PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT = 0x02000000, }; enum BarrierAccessBits { @@ -349,6 +357,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24), BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25), BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27), + BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT = 0x00200000, + BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT = 0x00400000, }; struct MemoryBarrier { @@ -693,6 +703,33 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) = 0; + /********************/ + /**** RAYTRACING ****/ + /********************/ + + // ----- ACCELERATION STRUCTURE ----- + + enum AccelerationStructureType { + ACCELERATION_STRUCTURE_TYPE_BLAS, + ACCELERATION_STRUCTURE_TYPE_TLAS, + }; + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BufferID p_instance_buffer, uint64_t p_instance_offset) = 0; + virtual AccelerationStructureID tlas_create(const LocalVector &p_blases) = 0; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) = 0; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) = 0; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) = 0; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) = 0; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) = 0; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) = 0; + virtual void command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) = 0; + /*****************/ /**** QUERIES ****/ /*****************/ @@ -739,6 +776,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { OBJECT_TYPE_SHADER, OBJECT_TYPE_UNIFORM_SET, OBJECT_TYPE_PIPELINE, + OBJECT_TYPE_ACCELERATION_STRUCTURE, }; struct MultiviewCapabilities { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 86b5f80e561a..c05975895fce 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -55,6 +55,7 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_INDEX_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_SAMPLE: case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ: return false; case RESOURCE_USAGE_COPY_TO: case RESOURCE_USAGE_RESOLVE_TO: @@ -120,6 +121,7 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage case RESOURCE_USAGE_STORAGE_IMAGE_READ: case RESOURCE_USAGE_TEXTURE_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_SAMPLE: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ: return RDD::BARRIER_ACCESS_SHADER_READ_BIT; case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE: @@ -235,6 +237,12 @@ RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_allocate_co return reinterpret_cast(&compute_instruction_list.data[compute_list_data_offset]); } +RenderingDeviceGraph::RaytracingListInstruction *RenderingDeviceGraph::_allocate_raytracing_list_instruction(uint32_t p_instruction_size) { + uint32_t raytracing_list_data_offset = raytracing_instruction_list.data.size(); + raytracing_instruction_list.data.resize(raytracing_list_data_offset + p_instruction_size); + return reinterpret_cast(&raytracing_instruction_list.data[raytracing_list_data_offset]); +} + void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) { // Assign the next stages derived from the stages the command requires first. r_command->next_stages = r_command->self_stages; @@ -452,6 +460,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // Memory barriers are pushed regardless of buffer barriers being used or not. r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; + } else if (resource_tracker->acceleration_structure_driver_id) { + // Make sure the acceleration structure has been built before accessing it from raytracing shaders. + r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; + r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; } else { DEV_ASSERT(false && "Resource tracker does not contain a valid buffer or texture ID."); } @@ -638,6 +650,67 @@ void RenderingDeviceGraph::_add_buffer_barrier_to_command(RDD::BufferID p_buffer } #endif +void RenderingDeviceGraph::_run_raytracing_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(RaytracingListInstruction)) <= p_instruction_data_size); + + const RaytracingListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case RaytracingListInstruction::TYPE_BUILD_ACCELERATION_STRUCTURE: { + const RaytracingListBuildAccelerationStructureInstruction *build_acceleration_structure_instruction = reinterpret_cast(instruction); + // Make sure BLASs are ready before building the TLAS + if (build_acceleration_structure_instruction->acceleration_structure_type == RDD::ACCELERATION_STRUCTURE_TYPE_TLAS) { + RDD::MemoryBarrier mb{}; + mb.src_access = RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT; + mb.dst_access = RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT; + driver->command_pipeline_barrier(p_command_buffer, RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, + mb, {}, {}); + } + driver->command_build_acceleration_structure(p_command_buffer, build_acceleration_structure_instruction->acceleration_structure); + // Make sure TLAS is built before using it in raytracing shader + if (build_acceleration_structure_instruction->acceleration_structure_type == RDD::ACCELERATION_STRUCTURE_TYPE_TLAS) { + RDD::MemoryBarrier mb = {}; + mb.src_access = RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT; + mb.dst_access = RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT; + driver->command_pipeline_barrier(p_command_buffer, RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT, mb, {}, {}); + } + instruction_data_cursor += sizeof(RaytracingListBuildAccelerationStructureInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_PIPELINE: { + const RaytracingListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + driver->command_bind_raytracing_pipeline(p_command_buffer, bind_pipeline_instruction->pipeline); + instruction_data_cursor += sizeof(RaytracingListBindPipelineInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_UNIFORM_SET: { + const RaytracingListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + driver->command_bind_raytracing_uniform_set(p_command_buffer, bind_uniform_set_instruction->uniform_set, bind_uniform_set_instruction->shader, bind_uniform_set_instruction->set_index); + instruction_data_cursor += sizeof(RaytracingListBindUniformSetInstruction); + } break; + case RaytracingListInstruction::TYPE_TRACE_RAYS: { + const RaytracingListTraceRaysInstruction *trace_rays_instruction = reinterpret_cast(instruction); + driver->command_raytracing_trace_rays(p_command_buffer, trace_rays_instruction->pipeline, trace_rays_instruction->shader, trace_rays_instruction->width, trace_rays_instruction->height); + instruction_data_cursor += sizeof(RaytracingListTraceRaysInstruction); + } break; + case RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT: { + const RaytracingListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t)); + driver->command_bind_push_constants(p_command_buffer, set_push_constant_instruction->shader, 0, push_constant_data_view); + instruction_data_cursor += sizeof(RaytracingListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const RaytracingListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); + instruction_data_cursor += sizeof(RaytracingListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown raytracing list instruction type."); + return; + } + } +} + void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -834,6 +907,10 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC driver->command_copy_buffer(r_command_buffer, command_buffer_copies[j].source, buffer_update_command->destination, command_buffer_copies[j].region); } } break; + case RecordedCommand::TYPE_RAYTRACING_LIST: { + const RecordedRaytracingListCommand *raytracing_list_command = reinterpret_cast(command); + _run_raytracing_list_command(r_command_buffer, raytracing_list_command->instruction_data(), raytracing_list_command->instruction_data_size); + } break; case RecordedCommand::TYPE_COMPUTE_LIST: { if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) { // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information. @@ -1279,6 +1356,51 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u } } +void RenderingDeviceGraph::_print_raytracing_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(RaytracingListInstruction)) <= p_instruction_data_size); + + const RaytracingListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case RaytracingListInstruction::TYPE_BUILD_ACCELERATION_STRUCTURE: { + const RaytracingListBuildAccelerationStructureInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + print_line("\tBUILD ACCELERATION STRUCTURE ID", itos(bind_pipeline_instruction->acceleration_structure.id)); + instruction_data_cursor += sizeof(RaytracingListBuildAccelerationStructureInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_PIPELINE: { + const RaytracingListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + print_line("\tBIND PIPELINE ID", itos(bind_pipeline_instruction->pipeline.id)); + instruction_data_cursor += sizeof(RaytracingListBindPipelineInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_UNIFORM_SET: { + const RaytracingListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_set_instruction->uniform_set.id), "SHADER ID", itos(bind_uniform_set_instruction->shader.id)); + instruction_data_cursor += sizeof(RaytracingListBindUniformSetInstruction); + } break; + case RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT: { + const RaytracingListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + print_line("\tSET PUSH CONSTANT SIZE", set_push_constant_instruction->size); + instruction_data_cursor += sizeof(RaytracingListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case RaytracingListInstruction::TYPE_TRACE_RAYS: { + const RaytracingListTraceRaysInstruction *trace_rays_instruction = reinterpret_cast(instruction); + print_line("\tTRACE RAYS PIPELINE ID", trace_rays_instruction->pipeline.id, "SHADER ID", trace_rays_instruction->shader.id, "WIDTH", itos(trace_rays_instruction->width), "HEIGHT", itos(trace_rays_instruction->height)); + instruction_data_cursor += sizeof(RaytracingListTraceRaysInstruction); + } break; + case RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const RaytracingListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + print_line("\tUNIFORM SET PREPARE FOR USE ID", itos(uniform_set_prepare_for_use_instruction->uniform_set.id), "SHADER ID", itos(uniform_set_prepare_for_use_instruction->shader.id), "INDEX", itos(uniform_set_prepare_for_use_instruction->set_index)); + instruction_data_cursor += sizeof(RaytracingListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown raytracing list instruction type."); + return; + } + } +} + void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -1458,6 +1580,97 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke _add_command_to_graph(&p_dst_tracker, &buffer_usage, 1, command_index, command); } +void RenderingDeviceGraph::add_raytracing_list_begin() { + raytracing_instruction_list.clear(); + raytracing_instruction_list.index++; +} + +void RenderingDeviceGraph::add_raytracing_list_build_acceleration_structure(RDD::AccelerationStructureID p_acceleration_structure, RDD::AccelerationStructureType p_acceleration_structure_type) { + RaytracingListBuildAccelerationStructureInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBuildAccelerationStructureInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BUILD_ACCELERATION_STRUCTURE; + instruction->acceleration_structure = p_acceleration_structure; + instruction->acceleration_structure_type = p_acceleration_structure_type; +} + +void RenderingDeviceGraph::add_raytracing_list_bind_pipeline(RDD::RaytracingPipelineID p_pipeline) { + RaytracingListBindPipelineInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBindPipelineInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BIND_PIPELINE; + instruction->pipeline = p_pipeline; + raytracing_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT); +} + +void RenderingDeviceGraph::add_raytracing_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + RaytracingListBindUniformSetInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBindUniformSetInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BIND_UNIFORM_SET; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_raytracing_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size) { + uint32_t instruction_size = sizeof(RaytracingListSetPushConstantInstruction) + p_data_size; + RaytracingListSetPushConstantInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(instruction_size)); + instruction->type = RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT; + instruction->size = p_data_size; + instruction->shader = p_shader; + memcpy(instruction->data(), p_data, p_data_size); +} + +void RenderingDeviceGraph::add_raytracing_list_trace_rays(RDD::RaytracingPipelineID p_pipeline, RDD::ShaderID p_shader, uint32_t p_width, uint32_t p_height) { + RaytracingListTraceRaysInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListTraceRaysInstruction))); + instruction->type = RaytracingListInstruction::TYPE_TRACE_RAYS; + instruction->pipeline = p_pipeline; + instruction->shader = p_shader; + instruction->width = p_width; + instruction->height = p_height; +} + +void RenderingDeviceGraph::add_raytracing_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + RaytracingListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListUniformSetPrepareForUseInstruction))); + instruction->type = RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_raytracing_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage) { + DEV_ASSERT(p_tracker != nullptr); + + p_tracker->reset_if_outdated(tracking_frame); + + if (p_tracker->raytracing_list_index != raytracing_instruction_list.index) { + raytracing_instruction_list.command_trackers.push_back(p_tracker); + raytracing_instruction_list.command_tracker_usages.push_back(p_usage); + p_tracker->raytracing_list_index = raytracing_instruction_list.index; + p_tracker->raytracing_list_usage = p_usage; + } +#ifdef DEV_ENABLED + else if (p_tracker->raytracing_list_usage != p_usage) { + ERR_FAIL_MSG(vformat("Tracker can't have more than one type of usage in the same raytracing list. Raytracing list usage is %d and the requested usage is %d.", p_tracker->raytracing_list_usage, p_usage)); + } +#endif +} + +void RenderingDeviceGraph::add_raytracing_list_usages(VectorView p_trackers, VectorView p_usages) { + DEV_ASSERT(p_trackers.size() == p_usages.size()); + + for (uint32_t i = 0; i < p_trackers.size(); i++) { + add_raytracing_list_usage(p_trackers[i], p_usages[i]); + } +} + +void RenderingDeviceGraph::add_raytracing_list_end() { + int32_t command_index; + uint32_t instruction_data_size = raytracing_instruction_list.data.size(); + uint32_t command_size = sizeof(RecordedRaytracingListCommand) + instruction_data_size; + RecordedRaytracingListCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_RAYTRACING_LIST; + command->self_stages = raytracing_instruction_list.stages; + command->instruction_data_size = instruction_data_size; + memcpy(command->instruction_data(), raytracing_instruction_list.data.ptr(), instruction_data_size); + _add_command_to_graph(raytracing_instruction_list.command_trackers.ptr(), raytracing_instruction_list.command_tracker_usages.ptr(), raytracing_instruction_list.command_trackers.size(), command_index, command); +} + void RenderingDeviceGraph::add_compute_list_begin(RDD::BreadcrumbMarker p_phase, uint32_t p_breadcrumb_data) { compute_instruction_list.clear(); #if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 452e1700b666..1bdc6d984b42 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -45,6 +45,20 @@ class RenderingDeviceGraph { public: + struct RaytracingListInstruction { + enum Type { + TYPE_NONE, + TYPE_BUILD_ACCELERATION_STRUCTURE, + TYPE_BIND_PIPELINE, + TYPE_BIND_UNIFORM_SET, + TYPE_SET_PUSH_CONSTANT, + TYPE_TRACE_RAYS, + TYPE_UNIFORM_SET_PREPARE_FOR_USE + }; + + Type type = TYPE_NONE; + }; + struct ComputeListInstruction { enum Type { TYPE_NONE, @@ -92,6 +106,7 @@ class RenderingDeviceGraph { TYPE_BUFFER_GET_DATA, TYPE_BUFFER_UPDATE, TYPE_COMPUTE_LIST, + TYPE_RAYTRACING_LIST, TYPE_DRAW_LIST, TYPE_TEXTURE_CLEAR, TYPE_TEXTURE_COPY, @@ -147,7 +162,8 @@ class RenderingDeviceGraph { RESOURCE_USAGE_STORAGE_IMAGE_READ, RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE, RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE, - RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE + RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ, }; struct ResourceTracker { @@ -161,7 +177,9 @@ class RenderingDeviceGraph { int32_t draw_list_index = -1; ResourceUsage draw_list_usage = RESOURCE_USAGE_NONE; int32_t compute_list_index = -1; + int32_t raytracing_list_index = -1; ResourceUsage compute_list_usage = RESOURCE_USAGE_NONE; + ResourceUsage raytracing_list_usage = RESOURCE_USAGE_NONE; ResourceUsage usage = RESOURCE_USAGE_NONE; BitField usage_access; RDD::BufferID buffer_driver_id; @@ -175,6 +193,7 @@ class RenderingDeviceGraph { Rect2i texture_slice_or_dirty_rect; bool in_parent_dirty_list = false; bool write_command_list_enabled = false; + RDD::AccelerationStructureID acceleration_structure_driver_id; _FORCE_INLINE_ void reset_if_outdated(int64_t new_command_frame) { if (new_command_frame != command_frame) { @@ -186,6 +205,7 @@ class RenderingDeviceGraph { write_command_or_list_index = -1; draw_list_index = -1; compute_list_index = -1; + raytracing_list_index = -1; texture_slice_command_index = -1; write_command_list_enabled = false; } @@ -228,6 +248,10 @@ class RenderingDeviceGraph { #endif }; + struct RaytracingInstructionList : InstructionList { + // No extra contents. + }; + struct DrawInstructionList : InstructionList { RDD::RenderPassID render_pass; RDD::FramebufferID framebuffer; @@ -304,6 +328,18 @@ class RenderingDeviceGraph { } }; + struct RecordedRaytracingListCommand : RecordedCommand { + uint32_t instruction_data_size = 0; + + _FORCE_INLINE_ uint8_t *instruction_data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *instruction_data() const { + return reinterpret_cast(&this[1]); + } + }; + struct RecordedComputeListCommand : RecordedCommand { uint32_t instruction_data_size = 0; uint32_t breadcrumb = 0; @@ -535,6 +571,47 @@ class RenderingDeviceGraph { uint32_t set_index = 0; }; + struct RaytracingListBuildAccelerationStructureInstruction : RaytracingListInstruction { + RDD::AccelerationStructureID acceleration_structure; + RDD::AccelerationStructureType acceleration_structure_type; + }; + + struct RaytracingListBindPipelineInstruction : RaytracingListInstruction { + RDD::RaytracingPipelineID pipeline; + }; + + struct RaytracingListBindUniformSetInstruction : RaytracingListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct RaytracingListSetPushConstantInstruction : RaytracingListInstruction { + uint32_t size = 0; + RDD::ShaderID shader; + + _FORCE_INLINE_ uint8_t *data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *data() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RaytracingListTraceRaysInstruction : RaytracingListInstruction { + RDD::RaytracingPipelineID pipeline; + RDD::ShaderID shader; + uint32_t width; + uint32_t height; + }; + + struct RaytracingListUniformSetPrepareForUseInstruction : RaytracingListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + struct ComputeListBindPipelineInstruction : ComputeListInstruction { RDD::PipelineID pipeline; }; @@ -626,6 +703,7 @@ class RenderingDeviceGraph { int32_t command_label_index = -1; DrawInstructionList draw_instruction_list; ComputeInstructionList compute_instruction_list; + RaytracingInstructionList raytracing_instruction_list; uint32_t command_count = 0; uint32_t command_label_count = 0; LocalVector command_list_nodes; @@ -657,12 +735,14 @@ class RenderingDeviceGraph { RecordedCommand *_allocate_command(uint32_t p_command_size, int32_t &r_command_index); DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size); ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size); + RaytracingListInstruction *_allocate_raytracing_list_instruction(uint32_t p_instruction_size); void _add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command); void _add_texture_barrier_to_command(RDD::TextureID p_texture_id, BitField p_src_access, BitField p_dst_access, ResourceUsage p_prev_usage, ResourceUsage p_next_usage, RDD::TextureSubresourceRange p_subresources, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count); #if USE_BUFFER_BARRIERS void _add_buffer_barrier_to_command(RDD::BufferID p_buffer_id, BitField p_src_access, BitField p_dst_access, int32_t &r_barrier_index, int32_t &r_barrier_count); #endif void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _run_raytracing_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); void _wait_for_secondary_command_buffer_tasks(); @@ -673,6 +753,7 @@ class RenderingDeviceGraph { void _print_render_commands(const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count); void _print_draw_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _print_raytracing_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); public: RenderingDeviceGraph(); @@ -684,6 +765,16 @@ class RenderingDeviceGraph { void add_buffer_copy(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, RDD::BufferCopyRegion p_region); void add_buffer_get_data(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, RDD::BufferCopyRegion p_region); void add_buffer_update(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); + void add_raytracing_list_begin(); + void add_raytracing_list_build_acceleration_structure(RDD::AccelerationStructureID p_acceleration_structure, RDD::AccelerationStructureType p_acceleration_structure_type); + void add_raytracing_list_bind_pipeline(RDD::RaytracingPipelineID p_pipeline); + void add_raytracing_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_raytracing_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_raytracing_list_trace_rays(RDD::RaytracingPipelineID p_pipeline, RDD::ShaderID p_shader, uint32_t p_width, uint32_t p_height); + void add_raytracing_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_raytracing_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); + void add_raytracing_list_usages(VectorView p_trackers, VectorView p_usages); + void add_raytracing_list_end(); void add_compute_list_begin(RDD::BreadcrumbMarker p_phase = RDD::BreadcrumbMarker::NONE, uint32_t p_breadcrumb_data = 0); void add_compute_list_bind_pipeline(RDD::PipelineID p_pipeline); void add_compute_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); diff --git a/servers/rendering/shader_compiler.h b/servers/rendering/shader_compiler.h index 66106d7eb734..16a11f8313b7 100644 --- a/servers/rendering/shader_compiler.h +++ b/servers/rendering/shader_compiler.h @@ -41,6 +41,9 @@ class ShaderCompiler { STAGE_VERTEX, STAGE_FRAGMENT, STAGE_COMPUTE, + STAGE_RAYGEN, + STAGE_MISS, + STAGE_CLOSEST_HIT, STAGE_MAX };