diff --git a/doc/classes/RDShaderSPIRV.xml b/doc/classes/RDShaderSPIRV.xml index dd8a292c610b..2bcbcfb4ea29 100644 --- a/doc/classes/RDShaderSPIRV.xml +++ b/doc/classes/RDShaderSPIRV.xml @@ -42,12 +42,21 @@ + + The SPIR-V bytecode for the closest hit shader stage. + The SPIR-V bytecode for the compute shader stage. The SPIR-V bytecode for the fragment shader stage. + + The SPIR-V bytecode for the miss shader stage. + + + The SPIR-V bytecode for the ray generation shader stage. + The SPIR-V bytecode for the tessellation control shader stage. @@ -57,12 +66,21 @@ The SPIR-V bytecode for the vertex shader stage. + + The compilation error message for the closest hit shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + The compilation error message for the compute shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. The compilation error message for the fragment shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + + The compilation error message for the miss shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + + + The compilation error message for the ray generation shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + The compilation error message for the tessellation control shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. diff --git a/doc/classes/RDShaderSource.xml b/doc/classes/RDShaderSource.xml index a7b897d56e69..b2626c3d93bf 100644 --- a/doc/classes/RDShaderSource.xml +++ b/doc/classes/RDShaderSource.xml @@ -31,12 +31,21 @@ The language the shader is written in. + + Source code for the shader's closest hit stage. + Source code for the shader's compute stage. Source code for the shader's fragment stage. + + Source code for the shader's miss stage. + + + Source code for the shader's ray generation stage. + Source code for the shader's tessellation control stage. diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index b7f95587cda6..7cf5ee4df9f7 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -14,6 +14,13 @@ $DOCS_URL/tutorials/shaders/compute_shaders.html + + + + + Builds the [param acceleration_structure]. + + @@ -22,6 +29,17 @@ This method does nothing. + + + + + + + + Creates a new Bottom Level Acceleration Structure. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + + @@ -701,6 +719,102 @@ Limits for various graphics hardware can be found in the [url=https://vulkan.gpuinfo.org/]Vulkan Hardware Database[/url]. + + + + Returns [code]true[/code] if raytracing is supported by this device. + + + + + + Starts a list of raytracing drawing commands created with the [code]draw_*[/code] methods. The returned value should be passed to other [code]raytracing_list_*[/code] functions. + Multiple raytracing lists cannot be created at the same time; you must finish the previous raytracing list first using [method raytracing_list_end]. + A simple raytracing operation might look like this (code is not a complete example): + [codeblock] + var rd = RenderingDevice.new() + + # Create a BLAS for a mesh. + blas = rd.blas_create(vertex_array, index_array, transform_buffer) + # Create TLAS with BLASs. + tlas = rd.tlas_create([blas]) + + # Build acceleration structures. + rd.acceleration_structure_build(blas) + rd.acceleration_structure_build(tlas) + + var raylist = rd.raytracing_list_begin() + + # Bind pipeline and uniforms. + rd.raytracing_list_bind_raytracing_pipeline(raylist, raytracing_pipeline) + rd.raytracing_list_bind_uniform_set(raylist, uniform_set, 0) + + # Trace rays. + var width = get_viewport().size.x + var height = get_viewport().size.y + rd.raytracing_list_trace_rays(raylist, width, height) + + rd.raytracing_list_end() + [/codeblock] + + + + + + + + Binds [param raytracing_pipeline] to the specified [param raytracing_list]. + + + + + + + + + Binds the [param uniform_set] to this [param raytracing_list]. Godot ensures that all textures in the uniform set have the correct Vulkan access masks. If Godot had to change access masks of textures, it will raise a Vulkan image memory barrier. + + + + + + Finishes a list of raytracing commands created with the [code]raytracing_*[/code] methods. + + + + + + + + + Sets the push constant data to [param buffer] for the specified [param raytracing_list]. The shader determines how this binary data is used. The buffer's size in bytes must also be specified in [param size_bytes] (this can be obtained by calling the [method PackedByteArray.size] method on the passed [param buffer]). + + + + + + + + + Initializes a ray tracing dispatch for the specified [param raytracing_list] assembling a group of [param width] x [param height] rays. + + + + + + + + Creates a new raytracing pipeline. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + + + + + + + Returns [code]true[/code] if the raytracing pipeline specified by the [param raytracing_pipeline] RID is valid, [code]false[/code] otherwise. + + @@ -1052,6 +1166,14 @@ [b]Note:[/b] The existing [param texture] requires the [constant TEXTURE_USAGE_CAN_UPDATE_BIT] to be updatable. + + + + + Creates a new Top Level Acceleration Structure. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + + @@ -2047,6 +2169,10 @@ + + + + Sampler uniform. @@ -2077,7 +2203,10 @@ Input attachment uniform. - + + Acceleration structure uniform. + + Represents the size of the [enum UniformType] enum. @@ -2385,7 +2514,16 @@ Compute shader stage. This can be used to run arbitrary computing tasks in a shader, performing them on the GPU instead of the CPU. - + + Ray generation shader stage. This can be used to generate primary rays. + + + Miss shader stage. This can be used to specify what happens if a ray does not hit anything in the scene. + + + Closest hit shader stage. This can be used to specify what happens when a ray hits the closest geometry in the scene. + + Represents the size of the [enum ShaderStage] enum. @@ -2403,6 +2541,15 @@ Compute shader stage bit (see also [constant SHADER_STAGE_COMPUTE]). + + Ray generation shader stage bit (see also [constant SHADER_STAGE_RAYGEN]). + + + Miss shader stage bit (see also [constant SHADER_STAGE_MISS]). + + + Closest hit shader stage bit (see also [constant SHADER_STAGE_CLOSEST_HIT]). + Khronos' GLSL shading language (used natively by OpenGL and Vulkan). This is the language used for core Godot shaders. diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index cd5df56fbb92..e0c896bfc513 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -3032,7 +3032,7 @@ Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec binary_data.vertex_input_mask = shader_refl.vertex_input_mask; binary_data.fragment_output_mask = shader_refl.fragment_output_mask; binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; + binary_data.pipeline_type = shader_refl.pipeline_type; binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; @@ -3663,8 +3663,8 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute; - shader_info_in.is_compute = binary_data.is_compute; + r_shader_desc.pipeline_type = binary_data.pipeline_type; + shader_info_in.pipeline_type = binary_data.pipeline_type; r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; @@ -4912,10 +4912,13 @@ void RenderingDeviceDriverD3D12::command_bind_push_constants(CommandBufferID p_c if (!shader_info_in->dxil_push_constant_size) { return; } - if (shader_info_in->is_compute) { + if (shader_info_in->pipeline_type == PipelineType::COMPUTE) { cmd_buf_info->cmd_list->SetComputeRoot32BitConstants(0, p_data.size(), p_data.ptr(), p_dst_first_index); - } else { + } else if (shader_info_in->pipeline_type == PipelineType::RASTERIZATION) { cmd_buf_info->cmd_list->SetGraphicsRoot32BitConstants(0, p_data.size(), p_data.ptr(), p_dst_first_index); + } else { + // TODO + ERR_FAIL_MSG("Unimplemented!"); } } @@ -5954,6 +5957,61 @@ RDD::PipelineID RenderingDeviceDriverD3D12::compute_pipeline_create(ShaderID p_s return PipelineID(pipeline_info); } +/********************/ +/**** RAYTRACING ****/ +/********************/ + +// ---- ACCELERATION STRUCTURES ---- + +RDD::AccelerationStructureID RenderingDeviceDriverD3D12::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BufferID p_instance_buffer, uint64_t p_instance_offset) { + // TODO + ERR_FAIL_V_MSG(AccelerationStructureID(), "Unimplemented!"); +} + +RDD::AccelerationStructureID RenderingDeviceDriverD3D12::tlas_create(const LocalVector &p_blases) { + // TODO + ERR_FAIL_V_MSG(AccelerationStructureID(), "Unimplemented!"); +} + +void RenderingDeviceDriverD3D12::acceleration_structure_free(AccelerationStructureID p_acceleration_structure) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +// ----- PIPELINE ----- + +RDD::RaytracingPipelineID RenderingDeviceDriverD3D12::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { + // TODO + ERR_FAIL_V_MSG(RaytracingPipelineID(), "Unimplemented!"); +} + +void RenderingDeviceDriverD3D12::raytracing_pipeline_free(RDD::RaytracingPipelineID p_pipeline) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverD3D12::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +void RenderingDeviceDriverD3D12::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +void RenderingDeviceDriverD3D12::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +void RenderingDeviceDriverD3D12::command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 5bcef0664210..7cbfaa7bb74a 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -626,7 +626,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint32_t fragment_output_mask = 0; uint32_t specialization_constants_count = 0; uint32_t spirv_specialization_constants_ids_mask = 0; - uint32_t is_compute = 0; + PipelineType pipeline_type = PipelineType::RASTERIZATION; uint32_t compute_local_size[3] = {}; uint32_t set_count = 0; uint32_t push_constant_size = 0; @@ -642,7 +642,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { struct ShaderInfo { uint32_t dxil_push_constant_size = 0; uint32_t nir_runtime_data_root_param_idx = UINT32_MAX; - bool is_compute = false; + PipelineType pipeline_type = PipelineType::RASTERIZATION; struct UniformBindingInfo { uint32_t stages = 0; // Actual shader stages using the uniform (0 if totally optimized out). @@ -912,6 +912,28 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + /********************/ + /**** RAYTRACING ****/ + /********************/ + + // ---- ACCELERATION STRUCTURES ---- + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BufferID p_instance_buffer, uint64_t p_instance_offset) override final; + virtual AccelerationStructureID tlas_create(const LocalVector &p_blases) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) override final; + /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 0fff49da4106..ab4ffcaa388e 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -383,6 +383,26 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; +#pragma mark - Raytracing + + // ----- ACCELERATION STRUCTURE ----- + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BufferID p_transform_buffer, uint64_t p_transform_offset) override final; + virtual AccelerationStructureID tlas_create(const LocalVector &p_blases) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) override final; + #pragma mark - Queries // ----- TIMESTAMP ----- diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 1d16ac964da3..e271a0f2dd77 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -1242,6 +1242,12 @@ _FORCE_INLINE_ void read(RD::ShaderStage &p_val) { p_val = (RD::ShaderStage)val; } + _FORCE_INLINE_ void read(RD::PipelineType &p_val) { + uint32_t val; + read(val); + p_val = (RD::PipelineType)val; + } + _FORCE_INLINE_ void read(bool &p_val) { CHECK(sizeof(uint8_t)); @@ -1552,6 +1558,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { uint32_t fragment_output_mask = UINT32_MAX; uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX; uint32_t flags = NONE; + RD::PipelineType pipeline_type = RD::PipelineType::RASTERIZATION; ComputeSize compute_local_size; PushConstantData push_constant; LocalVector stages; @@ -1626,6 +1633,7 @@ void serialize(BufWriter &p_writer) const { p_writer.write(fragment_output_mask); p_writer.write(spirv_specialization_constants_ids_mask); p_writer.write(flags); + p_writer.write(pipeline_type); p_writer.write(compute_local_size); p_writer.write(push_constant); p_writer.write(VectorView(stages)); @@ -1640,6 +1648,7 @@ void deserialize(BufReader &p_reader) { p_reader.read(fragment_output_mask); p_reader.read(spirv_specialization_constants_ids_mask); p_reader.read(flags); + p_reader.read(pipeline_type); p_reader.read(compute_local_size); p_reader.read(push_constant); p_reader.read(stages); @@ -1677,10 +1686,13 @@ void deserialize(BufReader &p_reader) { ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { - r_reflection.is_compute = true; + r_reflection.pipeline_type = PipelineType::COMPUTE; ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, "Compute shaders can only receive one stage, dedicated to compute."); } + if (p_spirv[i].shader_stage == SHADER_STAGE_RAYGEN || p_spirv[i].shader_stage == SHADER_STAGE_MISS || p_spirv[i].shader_stage == SHADER_STAGE_CLOSEST_HIT) { + r_reflection.pipeline_type = PipelineType::RAYTRACING; + } ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); @@ -1689,7 +1701,7 @@ void deserialize(BufReader &p_reader) { Compiler compiler(std::move(pir)); - if (r_reflection.is_compute) { + if (r_reflection.pipeline_type == PipelineType::COMPUTE) { r_reflection.compute_local_size[0] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 0); r_reflection.compute_local_size[1] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 1); r_reflection.compute_local_size[2] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 2); @@ -1986,6 +1998,7 @@ void deserialize(BufReader &p_reader) { .y = spirv_data.compute_local_size[1], .z = spirv_data.compute_local_size[2], }; + bin_data.pipeline_type = spirv_data.pipeline_type; bin_data.push_constant.size = spirv_data.push_constant_size; bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages; bin_data.set_needs_view_mask_buffer(shader_meta.has_multiview); @@ -2563,7 +2576,7 @@ void deserialize(BufReader &p_reader) { } MDShader *shader = nullptr; - if (binary_data.is_compute()) { + if (binary_data.pipeline_type == PipelineType::COMPUTE) { MDComputeShader *cs = new MDComputeShader( binary_data.shader_name, uniform_sets, @@ -2615,7 +2628,7 @@ void deserialize(BufReader &p_reader) { r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute(); + r_shader_desc.pipeline_type = binary_data.pipeline_type; r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x; r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y; r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z; @@ -3730,6 +3743,59 @@ bool isArrayTexture(MTLTextureType p_type) { return PipelineID(pipeline); } +#pragma mark - Raytracing + +// ----- ACCELERATION STRUCTURE ----- + +RDD::AccelerationStructureID RenderingDeviceDriverMetal::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, RDD::BufferID p_transform_buffer, uint64_t p_transform_offset) { + // TODO + ERR_FAIL_V_MSG(AccelerationStructureID(), "Unimplemented!"); +} + +RDD::AccelerationStructureID RenderingDeviceDriverMetal::tlas_create(const LocalVector &p_blases) { + // TODO + ERR_FAIL_V_MSG(AccelerationStructureID(), "Unimplemented!"); +} + +void RenderingDeviceDriverMetal::acceleration_structure_free(RDD::AccelerationStructureID p_acceleration_structure) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +// ----- PIPELINE ----- + +RDD::RaytracingPipelineID RenderingDeviceDriverMetal::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { + // TODO + ERR_FAIL_V_MSG(RaytracingPipelineID(), "Unimplemented!"); +} + +void RenderingDeviceDriverMetal::raytracing_pipeline_free(RDD::RaytracingPipelineID p_pipeline) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverMetal::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +void RenderingDeviceDriverMetal::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +void RenderingDeviceDriverMetal::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + +void RenderingDeviceDriverMetal::command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) { + // TODO + ERR_FAIL_MSG("Unimplemented!"); +} + #pragma mark - Queries // ----- TIMESTAMP ----- diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index fb63e962de9f..983b1e15300d 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -369,6 +369,15 @@ uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_fla if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) { flags += SHADER_STAGE_COMPUTE_BIT; } + if (supported_stages & VK_SHADER_STAGE_RAYGEN_BIT_KHR) { + flags += SHADER_STAGE_RAYGEN_BIT; + } + if (supported_stages & VK_SHADER_STAGE_MISS_BIT_KHR) { + flags += SHADER_STAGE_MISS_BIT; + } + if (supported_stages & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) { + flags += SHADER_STAGE_CLOSEST_HIT_BIT; + } return flags; } @@ -513,6 +522,12 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, false); if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); @@ -713,6 +728,14 @@ Error RenderingDeviceDriverVulkan::_check_device_features() { return OK; } +static uint32_t _align_up(uint32_t size, uint32_t alignment) { + return (size + (alignment - 1)) & ~(alignment - 1); +} + +static VkDeviceAddress _align_up_address(VkDeviceAddress address, VkDeviceAddress alignment) { + return (address + (alignment - 1)) & ~(alignment - 1); +} + Error RenderingDeviceDriverVulkan::_check_device_capabilities() { // Fill device family and version. device_capabilities.device_family = DEVICE_VULKAN; @@ -734,6 +757,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + VkPhysicalDeviceVulkanMemoryModelFeatures memory_model_features = {}; + VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_address_features = {}; + VkPhysicalDeviceAccelerationStructureFeaturesKHR acceleration_structure_features = {}; + VkPhysicalDeviceRayTracingPipelineFeaturesKHR raytracing_pipeline_features = {}; + VkPhysicalDeviceSynchronization2FeaturesKHR sync_2_features = {}; + VkPhysicalDeviceRayTracingValidationFeaturesNV raytracing_validation_features = {}; const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; if (use_1_2_features) { @@ -770,6 +799,40 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_features = &pipeline_cache_control_features; } + if (enabled_device_extension_names.has(VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES; + memory_model_features.pNext = next_features; + next_features = &memory_model_features; + + buffer_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES; + buffer_address_features.pNext = next_features; + next_features = &buffer_address_features; + } + + if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + acceleration_structure_features.pNext = next_features; + next_features = &acceleration_structure_features; + } + + if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + raytracing_pipeline_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR; + raytracing_pipeline_features.pNext = next_features; + next_features = &raytracing_pipeline_features; + } + + if (enabled_device_extension_names.has(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME)) { + raytracing_validation_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV; + raytracing_validation_features.pNext = next_features; + next_features = &raytracing_validation_features; + } + + if (enabled_device_extension_names.has(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)) { + sync_2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES; + sync_2_features.pNext = next_features; + next_features = &sync_2_features; + } + VkPhysicalDeviceFeatures2 device_features_2 = {}; device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features_2.pNext = next_features; @@ -821,6 +884,19 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { device_memory_report_support = true; } #endif + + if (enabled_device_extension_names.has(VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + raytracing_capabilities.buffer_device_address_support = buffer_address_features.bufferDeviceAddress; + } + + if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + acceleration_structure_capabilities.acceleration_structure_support = acceleration_structure_features.accelerationStructure; + } + + if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + raytracing_capabilities.raytracing_pipeline_support = raytracing_pipeline_features.rayTracingPipeline; + raytracing_capabilities.validation = raytracing_validation_features.rayTracingValidation; + } } if (functions.GetPhysicalDeviceProperties2 != nullptr) { @@ -829,6 +905,8 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDeviceMultiviewProperties multiview_properties = {}; VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; + VkPhysicalDeviceAccelerationStructurePropertiesKHR acceleration_structure_properties = {}; + VkPhysicalDeviceRayTracingPipelinePropertiesKHR raytracing_properties = {}; VkPhysicalDeviceProperties2 physical_device_properties_2 = {}; const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; @@ -857,6 +935,18 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &vrs_properties; } + if (acceleration_structure_capabilities.acceleration_structure_support) { + acceleration_structure_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR; + acceleration_structure_properties.pNext = next_properties; + next_properties = &acceleration_structure_properties; + } + + if (raytracing_capabilities.raytracing_pipeline_support) { + raytracing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + raytracing_properties.pNext = next_properties; + next_properties = &raytracing_properties; + } + physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; physical_device_properties_2.pNext = next_properties; functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2); @@ -924,6 +1014,29 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (subgroup_capabilities.quad_operations_in_all_stages) { print_verbose(" quad operations in all stages"); } + + if (acceleration_structure_capabilities.acceleration_structure_support) { + print_verbose("- Vulkan Acceleration Structure supported"); + acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment = acceleration_structure_properties.minAccelerationStructureScratchOffsetAlignment; + print_verbose(" min acceleration structure scratch offset alignment: " + itos(acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment)); + } else { + print_verbose("- Vulkan Acceleration Structure not supported"); + } + + if (raytracing_capabilities.raytracing_pipeline_support) { + raytracing_capabilities.shader_group_handle_size = raytracing_properties.shaderGroupHandleSize; + raytracing_capabilities.shader_group_handle_alignment = raytracing_properties.shaderGroupHandleAlignment; + raytracing_capabilities.shader_group_handle_size_aligned = _align_up(raytracing_capabilities.shader_group_handle_size, raytracing_capabilities.shader_group_handle_alignment); + raytracing_capabilities.shader_group_base_alignment = raytracing_properties.shaderGroupBaseAlignment; + + print_verbose("- Vulkan Raytracing supported"); + print_verbose(" shader group handle size: " + itos(raytracing_capabilities.shader_group_handle_size)); + print_verbose(" shader group handle alignment: " + itos(raytracing_capabilities.shader_group_handle_alignment)); + print_verbose(" shader group handle size aligned: " + itos(raytracing_capabilities.shader_group_handle_size_aligned)); + print_verbose(" shader group base alignment: " + itos(raytracing_capabilities.shader_group_base_alignment)); + } else { + print_verbose("- Vulkan Raytracing not supported"); + } } return OK; @@ -1009,6 +1122,38 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVectorvk_buffer; + return vkGetBufferDeviceAddress(vk_device, &addr_info); +} RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type) { VkBufferCreateInfo create_info = {}; @@ -2227,6 +2399,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPE static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)); // RDD::BarrierAccessBits == VkAccessFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT)); @@ -2245,6 +2419,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_H static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)); void RenderingDeviceDriverVulkan::command_pipeline_barrier( CommandBufferID p_cmd_buffer, @@ -2257,8 +2433,58 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { vk_memory_barriers[i] = {}; vk_memory_barriers[i].sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access); - vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access); + // Remove acceleration structure read bit + vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; + vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; + } + + VkPipelineStageFlags src_stage_flags = _rd_to_vk_pipeline_stages(p_src_stages); + VkPipelineStageFlags dst_stage_flags = _rd_to_vk_pipeline_stages(p_dst_stages); + VkPipelineStageFlags as_src_stages = src_stage_flags; + VkPipelineStageFlags as_dst_stages = dst_stage_flags; + + // If the rayQuery feature is not enabled and a memory barrier srcAccessMask includes + // VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, srcStageMask must not include any of the + // VK_PIPELINE_STAGE_*_SHADER_BIT stages except VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR + + // If the rayQuery feature is not enabled and a memory barrier dstAccessMask includes + // VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, dstStageMask must not include any of the + // VK_PIPELINE_STAGE_*_SHADER_BIT stages except VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR + + uint32_t acceleration_structure_barrier_count = 0; + LocalVector acceleration_structure_barrier_indices; + + for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) { + VkAccessFlags src_access = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access); + if ((src_access & VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) != 0) { + if ((src_stage_flags & (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) != 0) { + acceleration_structure_barrier_indices.push_back(i); + as_src_stages &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + continue; + } + } + + VkAccessFlags dst_access = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access); + if ((dst_access & VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) != 0) { + if ((dst_stage_flags & (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) != 0) { + acceleration_structure_barrier_indices.push_back(i); + as_dst_stages &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } + } + } + + VkBufferMemoryBarrier *as_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, acceleration_structure_barrier_indices.size()); + for (uint32_t j = 0; j < acceleration_structure_barrier_indices.size(); j++) { + uint32_t i = acceleration_structure_barrier_indices[j]; + as_barriers[j] = {}; + as_barriers[j].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + as_barriers[j].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + as_barriers[j].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + as_barriers[j].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access); + as_barriers[j].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access); + as_barriers[j].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer; + as_barriers[j].offset = p_buffer_barriers[i].offset; + as_barriers[j].size = p_buffer_barriers[i].size; } VkBufferMemoryBarrier *vk_buffer_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_buffer_barriers.size()); @@ -2267,8 +2493,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_buffer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_buffer_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_buffer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access); - vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access); + vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; + vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; vk_buffer_barriers[i].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer; vk_buffer_barriers[i].offset = p_buffer_barriers[i].offset; vk_buffer_barriers[i].size = p_buffer_barriers[i].size; @@ -2312,12 +2538,23 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vkCmdPipelineBarrier( (VkCommandBuffer)p_cmd_buffer.id, - _rd_to_vk_pipeline_stages(p_src_stages), - _rd_to_vk_pipeline_stages(p_dst_stages), + src_stage_flags, + dst_stage_flags, 0, p_memory_barriers.size(), vk_memory_barriers, p_buffer_barriers.size(), vk_buffer_barriers, p_texture_barriers.size(), vk_image_barriers); + + if (acceleration_structure_barrier_count > 0) { + vkCmdPipelineBarrier( + (VkCommandBuffer)p_cmd_buffer.id, + as_src_stages, + as_dst_stages, + 0, + 0, nullptr, + acceleration_structure_barrier_count, as_barriers, + 0, nullptr); + } } /****************/ @@ -3338,6 +3575,9 @@ static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_COMPUTE_BIT, + VK_SHADER_STAGE_RAYGEN_BIT_KHR, + VK_SHADER_STAGE_MISS_BIT_KHR, + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, }; String RenderingDeviceDriverVulkan::shader_get_binary_cache_key() { @@ -3361,7 +3601,7 @@ Vector RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(Ve binary_data.vertex_input_mask = shader_refl.vertex_input_mask; binary_data.fragment_output_mask = shader_refl.fragment_output_mask; binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; + binary_data.pipeline_type = shader_refl.pipeline_type; binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; @@ -3547,7 +3787,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute; + r_shader_desc.pipeline_type = binary_data.pipeline_type; r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; @@ -3635,6 +3875,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec case UNIFORM_TYPE_INPUT_ATTACHMENT: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; } break; + case UNIFORM_TYPE_ACCELERATION_STRUCTURE: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + } break; default: { DEV_ASSERT(false); } @@ -3731,6 +3974,31 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec create_info.pName = "main"; shader_info.vk_stages_create_info.push_back(create_info); + + ShaderStage stage = r_shader_desc.stages[i]; + + if (stage == ShaderStage::SHADER_STAGE_RAYGEN || stage == ShaderStage::SHADER_STAGE_MISS) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = VK_SHADER_UNUSED_KHR; + group_info.intersectionShader = VK_SHADER_UNUSED_KHR; + group_info.generalShader = i; + + shader_info.vk_groups_create_info.push_back(group_info); + } + if (stage == ShaderStage::SHADER_STAGE_CLOSEST_HIT) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = i; + group_info.intersectionShader = VK_SHADER_UNUSED_KHR; + group_info.generalShader = VK_SHADER_UNUSED_KHR; + + shader_info.vk_groups_create_info.push_back(group_info); + } } // Descriptor sets. @@ -3790,6 +4058,58 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec ERR_FAIL_V_MSG(ShaderID(), error_text); } + if (r_shader_desc.pipeline_type == PipelineType::RAYTRACING) { + // Regions + + for (uint32_t i = 0; i < r_shader_desc.stages.size(); i++) { + ShaderStage stage = r_shader_desc.stages[i]; + switch (stage) { + case ShaderStage::SHADER_STAGE_RAYGEN: + shader_info.regions.raygen_count += 1; + break; + case ShaderStage::SHADER_STAGE_MISS: + shader_info.regions.miss_count += 1; + break; + case ShaderStage::SHADER_STAGE_CLOSEST_HIT: + shader_info.regions.closest_hit_count += 1; + break; + default: + // nothing + break; + } + } + + shader_info.regions.group_count = shader_info.regions.raygen_count + shader_info.regions.miss_count + shader_info.regions.closest_hit_count; + + uint32_t handle_size_aligned = raytracing_capabilities.shader_group_handle_size_aligned; + uint32_t base_alignment = raytracing_capabilities.shader_group_base_alignment; + + shader_info.regions.raygen.stride = _align_up(handle_size_aligned * shader_info.regions.raygen_count, base_alignment); + shader_info.regions.raygen.size = shader_info.regions.raygen.stride; // odd but ok + + shader_info.regions.miss.stride = handle_size_aligned; + shader_info.regions.miss.size = _align_up(handle_size_aligned * shader_info.regions.miss_count, base_alignment); + + shader_info.regions.closest_hit.stride = handle_size_aligned; + shader_info.regions.closest_hit.size = _align_up(handle_size_aligned * shader_info.regions.closest_hit_count, base_alignment); + + shader_info.regions.call.stride = 0; + shader_info.regions.call.size = 0; + + uint32_t handles_size = shader_info.regions.group_count * raytracing_capabilities.shader_group_handle_size; + shader_info.regions.handles_data.resize(handles_size); + + // Shader binding table + uint32_t sbt_size = shader_info.regions.raygen.size + shader_info.regions.closest_hit.size + shader_info.regions.miss.size + shader_info.regions.call.size; + shader_info.sbt_buffer = buffer_create(sbt_size, BUFFER_USAGE_TRANSFER_FROM_BIT | BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | BUFFER_USAGE_SHADER_BINDING_TABLE_BIT, MEMORY_ALLOCATION_TYPE_CPU); + + // Update regions addresses + shader_info.regions.raygen.deviceAddress = _buffer_get_device_address(shader_info.sbt_buffer); + shader_info.regions.miss.deviceAddress = shader_info.regions.raygen.deviceAddress + shader_info.regions.raygen.size; + shader_info.regions.closest_hit.deviceAddress = shader_info.regions.miss.deviceAddress + shader_info.regions.miss.size; + shader_info.regions.call.deviceAddress = 0; + } + // Bookkeep. ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); @@ -3808,6 +4128,10 @@ void RenderingDeviceDriverVulkan::shader_free(ShaderID p_shader) { shader_destroy_modules(p_shader); + if (shader_info->sbt_buffer) { + buffer_free(shader_info->sbt_buffer); + } + VersatileResource::free(resources_allocator, shader_info); } @@ -3910,6 +4234,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat curr_vk_size++; vk_sizes_count++; } + if (p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE]) { + *curr_vk_size = {}; + curr_vk_size->type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE] * max_descriptor_sets_per_pool; + curr_vk_size++; + vk_sizes_count++; + } DEV_ASSERT(vk_sizes_count <= UNIFORM_TYPE_MAX); } @@ -4127,6 +4458,17 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorViewsType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + acceleration_structure_write->accelerationStructureCount = 1; + acceleration_structure_write->pAccelerationStructures = &accel_info->vk_acceleration_structure; + + vk_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + vk_writes[i].pNext = acceleration_structure_write; + } break; default: { DEV_ASSERT(false); } @@ -5178,6 +5520,236 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( return PipelineID(vk_pipeline); } +/********************/ +/**** RAYTRACING ****/ +/********************/ + +RDD::AccelerationStructureID RenderingDeviceDriverVulkan::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BufferID p_transform_buffer, uint64_t p_transform_offset) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + // Vertex positions is first buffer + const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id; + VkDeviceSize buffer_offset = vf_info->vk_attributes[0].offset; + + VkDeviceAddress vertex_address = _buffer_get_device_address(p_vertex_buffer) + buffer_offset; + VkDeviceAddress index_address = _buffer_get_device_address(p_index_buffer) + p_index_offset_bytes; + VkDeviceAddress transform_address = _buffer_get_device_address(p_transform_buffer) + p_transform_offset; + + VkDeviceSize vertex_stride = vf_info->vk_bindings[0].stride; + VkFormat vertex_format = vf_info->vk_attributes[0].format; + uint32_t max_vertex = p_vertex_count ? p_vertex_count - 1 : 0; + + AccelerationStructureInfo *accel_info = VersatileResource::allocate(resources_allocator); + + accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR; + accel_info->geometry.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; + + accel_info->geometry.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + accel_info->geometry.geometry.triangles.vertexFormat = vertex_format; + accel_info->geometry.geometry.triangles.vertexData.deviceAddress = vertex_address; + accel_info->geometry.geometry.triangles.vertexStride = vertex_stride; + accel_info->geometry.geometry.triangles.indexType = p_index_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + accel_info->geometry.geometry.triangles.indexData.deviceAddress = index_address; + // Transform matrix, 3 rows, 4 columns, row-major + accel_info->geometry.geometry.triangles.transformData.deviceAddress = transform_address; + // Number of vertices in vertexData minus one, aka max vertex index + accel_info->geometry.geometry.triangles.maxVertex = max_vertex; + + // Info for building BLAS + uint32_t primitive_count = p_vertex_count / 3; + if (p_index_buffer) { + primitive_count = p_index_count / 3; + } + // The vertex offset is expressed in bytes + uint32_t first_vertex = p_vertex_offset / vertex_stride; + accel_info->range_info.firstVertex = first_vertex; + accel_info->range_info.primitiveCount = primitive_count; + accel_info->range_info.primitiveOffset = 0; + accel_info->range_info.transformOffset = 0; + uint32_t max_primitive_count = accel_info->range_info.primitiveCount; + + accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accel_info->build_info.pGeometries = &accel_info->geometry; + accel_info->build_info.geometryCount = 1; + accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + + VkAccelerationStructureBuildSizesInfoKHR size_info = {}; + size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &max_primitive_count, &size_info); + _acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, size_info, accel_info); + + return AccelerationStructureID(accel_info); +#else + return AccelerationStructureID(); +#endif +} + +RDD::AccelerationStructureID RenderingDeviceDriverVulkan::tlas_create(const LocalVector &p_blases) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + AccelerationStructureInfo *accel_info = VersatileResource::allocate(resources_allocator); + + for (uint32_t i = 0; i < p_blases.size(); ++i) { + const AccelerationStructureID &blas = p_blases[i]; + AccelerationStructureInfo *blas_info = (AccelerationStructureInfo *)blas.id; + + VkTransformMatrixKHR transform = { { + { 1.0, 0.0, 0.0, 0.0 }, + { 0.0, 1.0, 0.0, 0.0 }, + { 0.0, 0.0, 1.0, 0.0 }, + } }; + + VkAccelerationStructureInstanceKHR instance = {}; + instance.transform = transform; + instance.instanceCustomIndex = i; + instance.mask = 0xFF; + instance.accelerationStructureReference = _buffer_get_device_address(blas_info->buffer); + instance.instanceShaderBindingTableRecordOffset = 0; + instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + + accel_info->instances.push_back(instance); + } + + uint32_t instance_count = accel_info->instances.size(); + VkDeviceAddress instances_buffer_address = 0; + + if (instance_count > 0) { + uint32_t instances_size = instance_count * sizeof(accel_info->instances[0]); + accel_info->instances_buffer = buffer_create(instances_size, BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT, MEMORY_ALLOCATION_TYPE_CPU); + uint8_t *data_ptr = buffer_map(accel_info->instances_buffer); + ERR_FAIL_NULL_V(data_ptr, AccelerationStructureID()); + memcpy(data_ptr, accel_info->instances.ptr(), instances_size); + buffer_unmap(accel_info->instances_buffer); + instances_buffer_address = _buffer_get_device_address(accel_info->instances_buffer); + } + + accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + accel_info->geometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + accel_info->geometry.geometry.instances.data.deviceAddress = instances_buffer_address; + + accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accel_info->build_info.geometryCount = 1; + accel_info->build_info.pGeometries = &accel_info->geometry; + accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + + VkAccelerationStructureBuildSizesInfoKHR size_info = {}; + size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &instance_count, &size_info); + accel_info->range_info.primitiveCount = instance_count; + + _acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, size_info, accel_info); + return AccelerationStructureID(accel_info); +#else + return AccelerationStructureID(); +#endif +} + +void RenderingDeviceDriverVulkan::_acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + RDD::BufferID buffer = buffer_create(p_size_info.accelerationStructureSize, RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT | RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU); + r_accel_info->buffer = buffer; + + // Scratch address must be a multiple of minAccelerationStructureScratchOffsetAlignment + uint32_t padded_scratch_size = p_size_info.buildScratchSize + acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment; + + RDD::BufferID scratch_buffer = buffer_create(padded_scratch_size, RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU); + r_accel_info->scratch_buffer = scratch_buffer; + VkDeviceAddress scratch_address = _buffer_get_device_address(scratch_buffer); + r_accel_info->build_info.scratchData.deviceAddress = _align_up_address(scratch_address, acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment); + + VkAccelerationStructureCreateInfoKHR blas_create_info = {}; + blas_create_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + blas_create_info.type = p_type; + blas_create_info.size = p_size_info.accelerationStructureSize; + blas_create_info.buffer = ((const BufferInfo *)buffer.id)->vk_buffer; + VkResult err = vkCreateAccelerationStructureKHR(vk_device, &blas_create_info, nullptr, &r_accel_info->vk_acceleration_structure); + ERR_FAIL_COND_MSG(err, "vkCreateAccelerationStructureKHR failed with error " + itos(err) + "."); + r_accel_info->build_info.dstAccelerationStructure = r_accel_info->vk_acceleration_structure; +#endif +} + +void RenderingDeviceDriverVulkan::acceleration_structure_free(AccelerationStructureID p_acceleration_structure) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id; + if (accel_info->instances_buffer) { + buffer_free(accel_info->instances_buffer); + } + if (accel_info->scratch_buffer) { + buffer_free(accel_info->scratch_buffer); + } + if (accel_info->buffer) { + buffer_free(accel_info->buffer); + } + if (accel_info->vk_acceleration_structure) { + vkDestroyAccelerationStructureKHR(vk_device, accel_info->vk_acceleration_structure, nullptr); + } + VersatileResource::free(resources_allocator, accel_info); +#endif +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverVulkan::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + const AccelerationStructureInfo *accel_info = (const AccelerationStructureInfo *)p_acceleration_structure.id; + const VkAccelerationStructureBuildRangeInfoKHR *range_info_ptr = &accel_info->range_info; + vkCmdBuildAccelerationStructuresKHR((VkCommandBuffer)p_cmd_buffer.id, 1, &accel_info->build_info, &range_info_ptr); +#endif +} + +void RenderingDeviceDriverVulkan::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + vkCmdBindPipeline((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, (VkPipeline)p_pipeline.id); +} + +void RenderingDeviceDriverVulkan::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; + vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); +} + +void RenderingDeviceDriverVulkan::command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + ShaderInfo *shader_info = (ShaderInfo *)p_shader.id; + + uint32_t handle_size = raytracing_capabilities.shader_group_handle_size; + uint32_t handles_size = shader_info->regions.handles_data.size(); + uint8_t *handles_ptr = shader_info->regions.handles_data.ptr(); + + VkResult err = vkGetRayTracingShaderGroupHandlesKHR(vk_device, (VkPipeline)p_pipeline.id, 0, shader_info->regions.group_count, handles_size, handles_ptr); + ERR_FAIL_COND_MSG(err, "vkGetRayTracingShaderGroupHandlesKHR failed with error " + itos(err) + "."); + + uint8_t *sbt_ptr = buffer_map(shader_info->sbt_buffer); + uint8_t *sbt_data = sbt_ptr; + uint32_t handle_index = 0; + + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + ++handle_index; + + sbt_data = sbt_ptr + shader_info->regions.raygen.size; + for (uint32_t i = 0; i < shader_info->regions.miss_count; ++i) { + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + sbt_data += shader_info->regions.miss.stride; + ++handle_index; + } + + sbt_data = sbt_ptr + shader_info->regions.raygen.size + shader_info->regions.miss.size; + for (uint32_t i = 0; i < shader_info->regions.closest_hit_count; ++i) { + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + sbt_data += shader_info->regions.closest_hit.stride; + ++handle_index; + } + + buffer_unmap(shader_info->sbt_buffer); + + vkCmdTraceRaysKHR((VkCommandBuffer)p_cmd_buffer.id, &shader_info->regions.raygen, &shader_info->regions.miss, &shader_info->regions.closest_hit, &shader_info->regions.call, p_width, p_height, 1); +#endif +} + /*****************/ /**** COMPUTE ****/ /*****************/ @@ -5256,6 +5828,67 @@ RDD::PipelineID RenderingDeviceDriverVulkan::compute_pipeline_create(ShaderID p_ return PipelineID(vk_pipeline); } +RDD::RaytracingPipelineID RenderingDeviceDriverVulkan::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + + VkRayTracingPipelineCreateInfoKHR pipeline_create_info = {}; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; + + // Stages + pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size(); + + VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, pipeline_create_info.stageCount); + + for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) { + vk_pipeline_stages[i] = shader_info->vk_stages_create_info[i]; + + if (p_specialization_constants.size()) { + VkSpecializationMapEntry *specialization_map_entries = ALLOCA_ARRAY(VkSpecializationMapEntry, p_specialization_constants.size()); + for (uint32_t j = 0; j < p_specialization_constants.size(); j++) { + specialization_map_entries[j] = {}; + specialization_map_entries[j].constantID = p_specialization_constants[j].constant_id; + specialization_map_entries[j].offset = (const char *)&p_specialization_constants[j].int_value - (const char *)p_specialization_constants.ptr(); + specialization_map_entries[j].size = sizeof(uint32_t); + } + + VkSpecializationInfo *specialization_info = ALLOCA_SINGLE(VkSpecializationInfo); + *specialization_info = {}; + specialization_info->dataSize = p_specialization_constants.size() * sizeof(PipelineSpecializationConstant); + specialization_info->pData = p_specialization_constants.ptr(); + specialization_info->mapEntryCount = p_specialization_constants.size(); + specialization_info->pMapEntries = specialization_map_entries; + + vk_pipeline_stages[i].pSpecializationInfo = specialization_info; + } + } + + // Groups + pipeline_create_info.groupCount = pipeline_create_info.stageCount; + VkRayTracingShaderGroupCreateInfoKHR *vk_pipeline_groups = ALLOCA_ARRAY(VkRayTracingShaderGroupCreateInfoKHR, pipeline_create_info.groupCount); + for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) { + vk_pipeline_groups[i] = shader_info->vk_groups_create_info[i]; + } + + // Pipeline + pipeline_create_info.layout = shader_info->vk_pipeline_layout; + pipeline_create_info.pStages = vk_pipeline_stages; + pipeline_create_info.pGroups = vk_pipeline_groups; + pipeline_create_info.maxPipelineRayRecursionDepth = 1; + + VkPipeline vk_pipeline = VK_NULL_HANDLE; + VkResult err = vkCreateRayTracingPipelinesKHR(vk_device, VK_NULL_HANDLE, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &vk_pipeline); + ERR_FAIL_COND_V_MSG(err, RaytracingPipelineID(), "vkCreateRayTracingPipelinesKHR failed with error " + itos(err) + "."); + + return RaytracingPipelineID(vk_pipeline); +#else + return RaytracingPipelineID(); +#endif +} + +void RenderingDeviceDriverVulkan::raytracing_pipeline_free(RaytracingPipelineID p_pipeline) { + vkDestroyPipeline(vk_device, (VkPipeline)p_pipeline.id, nullptr); +} /*****************/ /**** QUERIES ****/ /*****************/ @@ -5696,6 +6329,10 @@ void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver case OBJECT_TYPE_PIPELINE: { _set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name); } break; + case OBJECT_TYPE_ACCELERATION_STRUCTURE: { + const AccelerationStructureInfo *asi = (const AccelerationStructureInfo *)p_driver_id.id; + _set_object_name(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)asi->vk_acceleration_structure, p_name); + } break; default: { DEV_ASSERT(false); } @@ -5874,6 +6511,8 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_RAYTRACING: + return raytracing_capabilities.buffer_device_address_support && acceleration_structure_capabilities.acceleration_structure_support && raytracing_capabilities.raytracing_pipeline_support; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 4eec7547f50f..4343112ed98f 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -100,6 +100,21 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { bool storage_input_output_16 = false; }; + struct AccelerationStructureCapabilities { + bool acceleration_structure_support = false; + uint32_t min_acceleration_structure_scratch_offset_alignment = 0; + }; + + struct RaytracingCapabilities { + bool buffer_device_address_support = false; + bool raytracing_pipeline_support = false; + uint32_t shader_group_handle_size = 0; + uint32_t shader_group_handle_alignment = 0; + uint32_t shader_group_handle_size_aligned = 0; + uint32_t shader_group_base_alignment = 0; + bool validation = false; + }; + struct DeviceFunctions { PFN_vkCreateSwapchainKHR CreateSwapchainKHR = nullptr; PFN_vkDestroySwapchainKHR DestroySwapchainKHR = nullptr; @@ -116,6 +131,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { // Debug device fault. PFN_vkGetDeviceFaultInfoEXT GetDeviceFaultInfoEXT = nullptr; + + // Raytracing extensions. + PFN_vkCreateAccelerationStructureKHR CreateAccelerationStructureKHR = nullptr; + PFN_vkCreateRayTracingPipelinesKHR CreateRaytracingPipelinesKHR = nullptr; }; // Debug marker extensions. VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type); @@ -138,6 +157,8 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VRSCapabilities vrs_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + AccelerationStructureCapabilities acceleration_structure_capabilities; + RaytracingCapabilities raytracing_capabilities; bool pipeline_cache_control_support = false; bool device_fault_support = false; #if defined(VK_TRACK_DEVICE_MEMORY) @@ -198,6 +219,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VkBufferView vk_view = VK_NULL_HANDLE; // For texel buffers. }; +private: + VkDeviceAddress _buffer_get_device_address(BufferID p_buffer); + +public: virtual BufferID buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type) override final; virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final; virtual void buffer_free(BufferID p_buffer) override final; @@ -427,7 +452,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; uint32_t specialization_constants_count = 0; - uint32_t is_compute = 0; + PipelineType pipeline_type = PipelineType::RASTERIZATION; uint32_t compute_local_size[3] = {}; uint32_t set_count = 0; uint32_t push_constant_size = 0; @@ -437,11 +462,28 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { }; }; + struct RaytracingShaderRegions { + VkStridedDeviceAddressRegionKHR raygen; + uint32_t raygen_count = 0; + VkStridedDeviceAddressRegionKHR miss; + uint32_t miss_count = 0; + VkStridedDeviceAddressRegionKHR closest_hit; + uint32_t closest_hit_count = 0; + VkStridedDeviceAddressRegionKHR call; + uint32_t group_count = 0; + + // Size of one shader group handle + LocalVector handles_data; + }; + struct ShaderInfo { VkShaderStageFlags vk_push_constant_stages = 0; TightLocalVector vk_stages_create_info; + TightLocalVector vk_groups_create_info; TightLocalVector vk_descriptor_set_layouts; VkPipelineLayout vk_pipeline_layout = VK_NULL_HANDLE; + RaytracingShaderRegions regions; + BufferID sbt_buffer; }; public: @@ -640,6 +682,46 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + /********************/ + /**** RAYTRACING ****/ + /********************/ + struct AccelerationStructureInfo { + VkAccelerationStructureKHR vk_acceleration_structure = VK_NULL_HANDLE; + // Buffer used for the structure + RDD::BufferID buffer; + // Buffer used for building the structure + RDD::BufferID scratch_buffer; + // Buffer used for instances in a TLAS + RDD::BufferID instances_buffer; + + // Required for building + VkAccelerationStructureGeometryKHR geometry; + LocalVector instances; + VkAccelerationStructureBuildGeometryInfoKHR build_info; + VkAccelerationStructureBuildRangeInfoKHR range_info; + }; + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BufferID p_transform_buffer, uint64_t p_transform_offset) override final; + virtual AccelerationStructureID tlas_create(const LocalVector &p_blases) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + +private: + void _acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info); + +public: + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) override final; + +public: /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/editor/plugins/shader_file_editor_plugin.cpp b/editor/plugins/shader_file_editor_plugin.cpp index d955f84b170d..5cd55caa7773 100644 --- a/editor/plugins/shader_file_editor_plugin.cpp +++ b/editor/plugins/shader_file_editor_plugin.cpp @@ -264,7 +264,10 @@ ShaderFileEditor::ShaderFileEditor() { "Fragment", "TessControl", "TessEval", - "Compute" + "Compute", + "Raygen", + "Miss", + "ClosestHit", }; stage_hb = memnew(HBoxContainer); diff --git a/gles3_builders.py b/gles3_builders.py index a81d42b42e23..3204fffdb606 100644 --- a/gles3_builders.py +++ b/gles3_builders.py @@ -10,6 +10,9 @@ class GLES3HeaderStruct: def __init__(self): self.vertex_lines = [] self.fragment_lines = [] + self.raygen_lines = [] + self.miss_lines = [] + self.closest_hit_lines = [] self.uniforms = [] self.fbos = [] self.texunits = [] @@ -25,6 +28,9 @@ def __init__(self): self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 + self.raygen_offset = 0 + self.miss_offset = 0 + self.closest_hit_offset = 0 self.variant_defines = [] self.variant_names = [] self.specialization_names = [] @@ -88,6 +94,27 @@ def include_file_in_gles3_header(filename: str, header_data: GLES3HeaderStruct, header_data.fragment_offset = header_data.line_offset continue + if line.find("#[raygen]") != -1: + header_data.reading = "raygen" + line = fs.readline() + header_data.line_offset += 1 + header_data.raygen_offset = header_data.line_offset + continue + + if line.find("#[miss]") != -1: + header_data.reading = "miss" + line = fs.readline() + header_data.line_offset += 1 + header_data.miss_offset = header_data.line_offset + continue + + if line.find("#[closest_hit]") != -1: + header_data.reading = "closest_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.closest_hit_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -182,6 +209,12 @@ def include_file_in_gles3_header(filename: str, header_data: GLES3HeaderStruct, header_data.vertex_lines += [line] if header_data.reading == "fragment": header_data.fragment_lines += [line] + if header_data.reading == "raygen": + header_data.raygen_lines += [line] + if header_data.reading == "miss": + header_data.miss_lines += [line] + if header_data.reading == "closest_hit": + header_data.closest_hit_lines += [line] line = fs.readline() header_data.line_offset += 1 diff --git a/glsl_builders.py b/glsl_builders.py index 82c15fc93bee..29118240dcc8 100644 --- a/glsl_builders.py +++ b/glsl_builders.py @@ -11,16 +11,25 @@ def __init__(self): self.vertex_lines = [] self.fragment_lines = [] self.compute_lines = [] + self.raygen_lines = [] + self.miss_lines = [] + self.closest_hit_lines = [] self.vertex_included_files = [] self.fragment_included_files = [] self.compute_included_files = [] + self.raygen_included_files = [] + self.miss_included_files = [] + self.closest_hit_included_files = [] self.reading = "" self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 self.compute_offset = 0 + self.raygen_offset = 0 + self.miss_offset = 0 + self.closest_hit_offset = 0 def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: int) -> RDHeaderStruct: @@ -53,6 +62,27 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.compute_offset = header_data.line_offset continue + if line.find("#[raygen]") != -1: + header_data.reading = "raygen" + line = fs.readline() + header_data.line_offset += 1 + header_data.raygen_offset = header_data.line_offset + continue + + if line.find("#[miss]") != -1: + header_data.reading = "miss" + line = fs.readline() + header_data.line_offset += 1 + header_data.miss_offset = header_data.line_offset + continue + + if line.find("#[closest_hit]") != -1: + header_data.reading = "closest_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.closest_hit_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -74,6 +104,20 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.compute_included_files += [included_file] if include_file_in_rd_header(included_file, header_data, depth + 1) is None: print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.raygen_included_files and header_data.reading == "raygen": + header_data.raygen_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.miss_included_files and header_data.reading == "miss": + header_data.miss_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif ( + included_file not in header_data.closest_hit_included_files and header_data.reading == "closest_hit" + ): + header_data.closest_hit_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') line = fs.readline() @@ -85,6 +129,12 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.fragment_lines += [line] if header_data.reading == "compute": header_data.compute_lines += [line] + if header_data.reading == "raygen": + header_data.raygen_lines += [line] + if header_data.reading == "miss": + header_data.miss_lines += [line] + if header_data.reading == "closest_hit": + header_data.closest_hit_lines += [line] line = fs.readline() header_data.line_offset += 1 @@ -109,7 +159,14 @@ def build_rd_header( out_file_ifdef = out_file_base.replace(".", "_").upper() out_file_class = out_file_base.replace(".glsl.gen.h", "").title().replace("_", "").replace(".", "") + "ShaderRD" - if header_data.compute_lines: + if header_data.raygen_lines: + body_parts = [ + "static const char _raygen_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.raygen_lines), + "static const char _miss_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.miss_lines), + "static const char _closest_hit_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.closest_hit_lines), + f'setup_raytracing(_raygen_code, _miss_code, _closest_hit_code, "{out_file_class}");', + ] + elif header_data.compute_lines: body_parts = [ "static const char _compute_code[] = {\n%s\n\t\t};" % to_raw_cstring(header_data.compute_lines), f'setup(nullptr, nullptr, _compute_code, "{out_file_class}");', diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index 81505f716a2d..ca8d35a3e2eb 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -48,7 +48,10 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage EShLangFragment, EShLangTessControl, EShLangTessEvaluation, - EShLangCompute + EShLangCompute, + EShLangRayGen, + EShLangMiss, + EShLangClosestHit, }; int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100 @@ -130,6 +133,10 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage preamble += "#define has_VK_KHR_multiview 1\n"; } + if (p_render_device->has_feature(RD::SUPPORTS_RAYTRACING)) { + preamble += "#define has_VK_KHR_ray_tracing_pipeline 1\n"; + } + if (!preamble.empty()) { shader.setPreamble(preamble.c_str()); } diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index cbdbe151c82f..75a8c7020c57 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -65,6 +65,15 @@ void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) { case STAGE_TYPE_COMPUTE: chunk.type = StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS; break; + case STAGE_TYPE_RAYGEN: + chunk.type = StageTemplate::Chunk::TYPE_RAYGEN_GLOBALS; + break; + case STAGE_TYPE_MISS: + chunk.type = StageTemplate::Chunk::TYPE_MISS_GLOBALS; + break; + case STAGE_TYPE_CLOSEST_HIT: + chunk.type = StageTemplate::Chunk::TYPE_CLOSEST_HIT_GLOBALS; + break; default: { } } @@ -136,9 +145,9 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con if (p_compute_code) { _add_stage(p_compute_code, STAGE_TYPE_COMPUTE); - is_compute = true; + pipeline_type = RD::PipelineType::COMPUTE; } else { - is_compute = false; + pipeline_type = RD::PipelineType::RASTERIZATION; if (p_vertex_code) { _add_stage(p_vertex_code, STAGE_TYPE_VERTEX); } @@ -166,6 +175,39 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con base_sha256 = tohash.as_string().sha256_text(); } +void ShaderRD::setup_raytracing(const char *p_raygen_code, const char *p_miss_code, const char *p_closest_hit_code, const char *p_name) { + name = p_name; + + pipeline_type = RD::PipelineType::RAYTRACING; + if (p_raygen_code) { + _add_stage(p_raygen_code, STAGE_TYPE_RAYGEN); + } + if (p_miss_code) { + _add_stage(p_miss_code, STAGE_TYPE_MISS); + } + if (p_closest_hit_code) { + _add_stage(p_closest_hit_code, STAGE_TYPE_CLOSEST_HIT); + } + + StringBuilder tohash; + tohash.append("[GodotVersionNumber]"); + tohash.append(VERSION_NUMBER); + tohash.append("[GodotVersionHash]"); + tohash.append(VERSION_HASH); + tohash.append("[SpirvCacheKey]"); + tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key()); + tohash.append("[BinaryCacheKey]"); + tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key()); + tohash.append("[Raygen]"); + tohash.append(p_raygen_code ? p_raygen_code : ""); + tohash.append("[Miss]"); + tohash.append(p_miss_code ? p_miss_code : ""); + tohash.append("[ClosestHit]"); + tohash.append(p_closest_hit_code ? p_closest_hit_code : ""); + + base_sha256 = tohash.as_string().sha256_text(); +} + RID ShaderRD::version_create() { //initialize() was never called ERR_FAIL_COND_V(group_to_variant_map.is_empty(), RID()); @@ -248,6 +290,15 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c case StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS: { builder.append(p_version->compute_globals.get_data()); // compute globals } break; + case StageTemplate::Chunk::TYPE_RAYGEN_GLOBALS: { + builder.append(p_version->raygen_globals.get_data()); // raygen globals + } break; + case StageTemplate::Chunk::TYPE_MISS_GLOBALS: { + builder.append(p_version->miss_globals.get_data()); // miss globals + } break; + case StageTemplate::Chunk::TYPE_CLOSEST_HIT_GLOBALS: { + builder.append(p_version->closest_hit_globals.get_data()); // closest_hit globals + } break; case StageTemplate::Chunk::TYPE_CODE: { if (p_version->code_sections.has(chunk.code)) { builder.append(p_version->code_sections[chunk.code].get_data()); @@ -274,7 +325,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; bool build_ok = true; - if (!is_compute) { + if (pipeline_type == RD::PipelineType::RASTERIZATION) { //vertex stage StringBuilder builder; @@ -291,7 +342,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } - if (!is_compute && build_ok) { + if (pipeline_type == RD::PipelineType::RASTERIZATION && build_ok) { //fragment stage current_stage = RD::SHADER_STAGE_FRAGMENT; @@ -309,7 +360,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } - if (is_compute) { + if (pipeline_type == RD::PipelineType::COMPUTE) { //compute stage current_stage = RD::SHADER_STAGE_COMPUTE; @@ -328,9 +379,87 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } + if (pipeline_type == RD::PipelineType::RAYTRACING) { + { + current_stage = RD::SHADER_STAGE_RAYGEN; + + StringBuilder builder; + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_RAYGEN]); + + current_source = builder.as_string(); + + RD::ShaderStageSPIRVData stage; + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_RAYGEN, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spirv.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_RAYGEN; + stages.push_back(stage); + } + } + if (build_ok) { + current_stage = RD::SHADER_STAGE_MISS; + + StringBuilder builder; + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_MISS]); + + current_source = builder.as_string(); + + RD::ShaderStageSPIRVData stage; + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_MISS, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spirv.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_MISS; + stages.push_back(stage); + } + } + if (build_ok) { + current_stage = RD::SHADER_STAGE_CLOSEST_HIT; + + StringBuilder builder; + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_CLOSEST_HIT]); + + current_source = builder.as_string(); + + RD::ShaderStageSPIRVData stage; + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_CLOSEST_HIT, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spirv.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_CLOSEST_HIT; + stages.push_back(stage); + } + } + } + if (!build_ok) { MutexLock lock(variant_set_mutex); //properly print the errors - ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(variant) + " (" + variant_defines[variant].text.get_data() + ")."); + String stage_string; + switch (current_stage) { + case RD::SHADER_STAGE_VERTEX: + stage_string = "Vertex"; + break; + case RD::SHADER_STAGE_FRAGMENT: + stage_string = "Fragment"; + break; + case RD::SHADER_STAGE_COMPUTE: + stage_string = "Compute"; + break; + case RD::SHADER_STAGE_RAYGEN: + stage_string = "Raygen"; + break; + case RD::SHADER_STAGE_MISS: + stage_string = "Miss"; + break; + case RD::SHADER_STAGE_CLOSEST_HIT: + stage_string = "ClosestHit"; + break; + default: + stage_string = "Unknown"; + break; + } + ERR_PRINT("Error compiling " + stage_string + " shader, variant #" + itos(variant) + " (" + variant_defines[variant].text.get_data() + ")."); ERR_PRINT(error); #ifdef DEBUG_ENABLED @@ -359,7 +488,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.resize(variant_defines.size()); for (int i = 0; i < source_code.versions.size(); i++) { - if (!is_compute) { + if (pipeline_type == RD::PipelineType::RASTERIZATION) { //vertex stage StringBuilder builder; @@ -372,7 +501,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } - if (!is_compute) { + if (pipeline_type == RD::PipelineType::RASTERIZATION) { //fragment stage StringBuilder builder; @@ -385,7 +514,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } - if (is_compute) { + if (pipeline_type == RD::PipelineType::COMPUTE) { //compute stage StringBuilder builder; @@ -397,6 +526,43 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } + + if (pipeline_type == RD::PipelineType::RAYTRACING) { + //raygen stage + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_RAYGEN]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "raygen"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PipelineType::RAYTRACING) { + // miss stage + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_MISS]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "miss"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PipelineType::RAYTRACING) { + // closest_hit stage + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_CLOSEST_HIT]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "closest_hit"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } } return source_code; @@ -413,6 +579,12 @@ String ShaderRD::_version_get_sha1(Version *p_version) const { hash_build.append(p_version->fragment_globals.get_data()); hash_build.append("[compute_globals]"); hash_build.append(p_version->compute_globals.get_data()); + hash_build.append("[raygen_globals]"); + hash_build.append(p_version->raygen_globals.get_data()); + hash_build.append("[miss_globals]"); + hash_build.append(p_version->miss_globals.get_data()); + hash_build.append("[closest_hit_globals]"); + hash_build.append(p_version->closest_hit_globals.get_data()); Vector code_sections; for (const KeyValue &E : p_version->code_sections) { @@ -613,7 +785,7 @@ void ShaderRD::_compile_ensure_finished(Version *p_version) { } void ShaderRD::version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines) { - ERR_FAIL_COND(is_compute); + ERR_FAIL_COND(pipeline_type != RD::PipelineType::RASTERIZATION); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); @@ -648,7 +820,7 @@ void ShaderRD::version_set_code(RID p_version, const HashMap &p_ } void ShaderRD::version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines) { - ERR_FAIL_COND(!is_compute); + ERR_FAIL_COND(pipeline_type != RD::PipelineType::COMPUTE); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); @@ -682,6 +854,41 @@ void ShaderRD::version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_raygen_globals, const String &p_miss_globals, const String &p_closest_hit_globals, const Vector &p_custom_defines) { + ERR_FAIL_COND(pipeline_type != RD::PipelineType::RAYTRACING); + + Version *version = version_owner.get_or_null(p_version); + ERR_FAIL_NULL(version); + + version->raygen_globals = p_raygen_globals.utf8(); + version->miss_globals = p_miss_globals.utf8(); + version->closest_hit_globals = p_closest_hit_globals.utf8(); + version->uniforms = p_uniforms.utf8(); + + version->code_sections.clear(); + for (const KeyValue &E : p_code) { + version->code_sections[StringName(E.key.to_upper())] = E.value.utf8(); + } + + version->custom_defines.clear(); + for (int i = 0; i < p_custom_defines.size(); i++) { + version->custom_defines.push_back(p_custom_defines[i].utf8()); + } + + version->dirty = true; + if (version->initialize_needed) { + _initialize_version(version); + for (int i = 0; i < group_enabled.size(); i++) { + if (!group_enabled[i]) { + _allocate_placeholders(version, i); + continue; + } + _compile_version_start(version, i); + } + version->initialize_needed = false; + } +} + bool ShaderRD::version_is_valid(RID p_version) { Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL_V(version, false); diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index 50b5ff212cc2..f6a08168103d 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -68,6 +68,9 @@ class ShaderRD { CharString vertex_globals; CharString compute_globals; CharString fragment_globals; + CharString raygen_globals; + CharString miss_globals; + CharString closest_hit_globals; HashMap code_sections; Vector custom_defines; Vector group_compilation_tasks; @@ -106,6 +109,9 @@ class ShaderRD { TYPE_VERTEX_GLOBALS, TYPE_FRAGMENT_GLOBALS, TYPE_COMPUTE_GLOBALS, + TYPE_RAYGEN_GLOBALS, + TYPE_MISS_GLOBALS, + TYPE_CLOSEST_HIT_GLOBALS, TYPE_CODE, TYPE_TEXT }; @@ -117,7 +123,7 @@ class ShaderRD { LocalVector chunks; }; - bool is_compute = false; + RD::PipelineType pipeline_type = RD::PipelineType::RASTERIZATION; String name; @@ -137,6 +143,9 @@ class ShaderRD { STAGE_TYPE_VERTEX, STAGE_TYPE_FRAGMENT, STAGE_TYPE_COMPUTE, + STAGE_TYPE_RAYGEN, + STAGE_TYPE_MISS, + STAGE_TYPE_CLOSEST_HIT, STAGE_TYPE_MAX, }; @@ -155,12 +164,14 @@ class ShaderRD { protected: ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); + void setup_raytracing(const char *p_raygen_code, const char *p_miss_code, const char *p_closest_hit_code, const char *p_name); public: RID version_create(); void version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines); void version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines); + void version_set_raytracing_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_raygen_globals, const String &p_miss_globals, const String &p_closest_hit_globals, const Vector &p_custom_defines); _FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) { ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID()); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index d62fa1f420e7..5ac54f3b64b2 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -232,6 +232,139 @@ RID RenderingDevice::shader_create_from_spirv(const Vector return shader_create_from_bytecode(bytecode); } +/********************************/ +/**** ACCELERATION STRUCTURE ****/ +/********************************/ + +RID RenderingDevice::blas_create(RID p_vertex_array, RID p_index_array, RID p_transform_buffer, uint64_t p_transform_offset) { + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING), RID(), "The current rendering device has no raytracing support."); + + VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array); + ERR_FAIL_NULL_V(vertex_array, RID()); + RDD::VertexFormatID vertex_format; + if (vertex_array->description != INVALID_ID) { + ERR_FAIL_COND_V(!vertex_formats.has(vertex_array->description), RID()); + vertex_format = vertex_formats[vertex_array->description].driver_id; + } + _check_transfer_worker_vertex_array(vertex_array); + + // Indices are optional. + IndexArray *index_array = index_array_owner.get_or_null(p_index_array); + RDD::BufferID index_buffer = RDD::BufferID(); + IndexBufferFormat index_format = IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT32; + uint32_t index_offset_bytes = 0; + uint32_t index_count = 0; + if (index_array) { + index_buffer = index_array->driver_id; + index_format = index_array->format; + index_offset_bytes = index_array->offset * (index_array->format == INDEX_BUFFER_FORMAT_UINT16 ? sizeof(uint16_t) : sizeof(uint32_t)); + index_count = index_array->indices; + _check_transfer_worker_index_array(index_array); + } + + Buffer *transform_buffer = storage_buffer_owner.get_or_null(p_transform_buffer); + ERR_FAIL_NULL_V(transform_buffer, RID()); + ERR_FAIL_COND_V_MSG(!transform_buffer->usage.has_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT), RID(), "Transform buffer provided was not created for shader device address usage."); + ERR_FAIL_COND_V_MSG(!transform_buffer->usage.has_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT), RID(), "Transform buffer provided was not created for acceleration structure build input."); + _check_transfer_worker_buffer(transform_buffer); + + AccelerationStructure acceleration_structure; + acceleration_structure.type = RDD::ACCELERATION_STRUCTURE_TYPE_BLAS; + acceleration_structure.driver_id = driver->blas_create(vertex_array->buffers[0], vertex_array->offsets[0], vertex_format, vertex_array->vertex_count, index_buffer, index_format, index_offset_bytes, index_count, transform_buffer->driver_id, p_transform_offset); + ERR_FAIL_COND_V(!acceleration_structure.driver_id, RID()); + acceleration_structure.vertex_array = p_vertex_array; + acceleration_structure.index_array = p_index_array; + acceleration_structure.transform_buffer = p_transform_buffer; + + acceleration_structure.draw_tracker = RDG::resource_tracker_create(); + acceleration_structure.draw_tracker->acceleration_structure_driver_id = acceleration_structure.driver_id; + // Assume we are going to build this acceleration structure + acceleration_structure.draw_tracker->usage = RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE; + + RID id = acceleration_structure_owner.make_rid(acceleration_structure); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + +RID RenderingDevice::tlas_create(const Vector &p_blases) { + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING), RID(), "The current rendering device has no raytracing support."); + + LocalVector blases; + for (Vector::ConstIterator itr = p_blases.begin(); itr != p_blases.end(); ++itr) { + const AccelerationStructure *blas = acceleration_structure_owner.get_or_null(*itr); + ERR_FAIL_NULL_V(blas, RID()); + blases.push_back(blas->driver_id); + } + + AccelerationStructure acceleration_structure; + acceleration_structure.type = RDD::ACCELERATION_STRUCTURE_TYPE_TLAS; + acceleration_structure.driver_id = driver->tlas_create(blases); + ERR_FAIL_COND_V(!acceleration_structure.driver_id, RID()); + acceleration_structure.blases = p_blases; + + acceleration_structure.draw_tracker = RDG::resource_tracker_create(); + acceleration_structure.draw_tracker->acceleration_structure_driver_id = acceleration_structure.driver_id; + // Assume we are going to build this acceleration structure + acceleration_structure.draw_tracker->usage = RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE; + + RID id = acceleration_structure_owner.make_rid(acceleration_structure); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + +Error RenderingDevice::acceleration_structure_build(RID p_acceleration_structure) { + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); + + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Building acceleration structures is forbidden during creation of a draw list."); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Building acceleration structures is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list, ERR_INVALID_PARAMETER, + "Building acceleration structures is forbidden during creation of a raytracing list."); + + const AccelerationStructure *accel = acceleration_structure_owner.get_or_null(p_acceleration_structure); + ERR_FAIL_NULL_V_MSG(accel, ERR_INVALID_PARAMETER, "Acceleration structure argument is not valid."); + + Vector src_trackers; + switch (accel->type) { + case RDD::ACCELERATION_STRUCTURE_TYPE_BLAS: { + VertexArray *vertex_array = vertex_array_owner.get_or_null(accel->vertex_array); + ERR_FAIL_NULL_V_MSG(vertex_array, ERR_INVALID_PARAMETER, "Vertex array input is not valid."); + src_trackers.append_array(vertex_array->draw_trackers); + + IndexArray *index_array = index_array_owner.get_or_null(accel->index_array); + if (index_array && index_array->draw_tracker) { + src_trackers.append(index_array->draw_tracker); + } + + Buffer *transform_buffer = storage_buffer_owner.get_or_null(accel->transform_buffer); + ERR_FAIL_NULL_V_MSG(transform_buffer, ERR_INVALID_PARAMETER, "Transform buffer input is not valid."); + if (transform_buffer->draw_tracker) { + src_trackers.append(transform_buffer->draw_tracker); + } + } break; + case RDD::ACCELERATION_STRUCTURE_TYPE_TLAS: { + for (Vector::ConstIterator itr = accel->blases.begin(); itr != accel->blases.end(); ++itr) { + const AccelerationStructure *blas = acceleration_structure_owner.get_or_null(*itr); + ERR_FAIL_NULL_V_MSG(blas, ERR_INVALID_PARAMETER, "BLAS input is not valid."); + if (blas->draw_tracker) { + src_trackers.append(blas->draw_tracker); + } + } + } break; + default: + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Invalid acceleration structure type"); + } + + draw_graph.add_acceleration_structure_build(accel->driver_id, accel->draw_tracker, src_trackers); + + return OK; +} + /***************************/ /**** BUFFER MANAGEMENT ****/ /***************************/ @@ -745,7 +878,13 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + if (p_usage.has_flag(STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS)) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + } + if (p_usage.has_flag(STORAGE_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY)) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + } + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Storage buffers are assumed to be mutable. @@ -2915,11 +3054,11 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Vertex buffers are assumed to be immutable unless they don't have initial data or they've been marked for storage explicitly. @@ -3087,8 +3226,8 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm index_buffer.max_index = 0xFFFFFFFF; #endif index_buffer.size = size_bytes; - index_buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_INDEX_BIT); - index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + index_buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_INDEX_BIT | RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!index_buffer.driver_id, RID()); // Index buffers are assumed to be immutable unless they don't have initial data. @@ -3259,6 +3398,11 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vectorstage_bits.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); break; + case SHADER_STAGE_RAYGEN: + case SHADER_STAGE_MISS: + case SHADER_STAGE_CLOSEST_HIT: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT); + break; default: DEV_ASSERT(false && "Unknown shader stage."); break; @@ -3302,7 +3446,7 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Uniform buffers are assumed to be immutable unless they don't have initial data. @@ -3677,7 +3821,7 @@ RID RenderingDevice::uniform_set_create(const Collection &p_uniforms, RID p_shad _check_transfer_worker_buffer(buffer); } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for compute shader (this is not allowed)."); + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::RASTERIZATION, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for non-render shader (this is not allowed)."); if (uniform.get_id_count() != (uint32_t)set_uniform.length) { if (set_uniform.length > 1) { @@ -3703,6 +3847,24 @@ RID RenderingDevice::uniform_set_create(const Collection &p_uniforms, RID p_shad _check_transfer_worker_texture(texture); } } break; + case UNIFORM_TYPE_ACCELERATION_STRUCTURE: { + ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(), + "Acceleration structure supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided)."); + + RID accel_id = uniform.get_id(0); + AccelerationStructure *accel = acceleration_structure_owner.get_or_null(accel_id); + ERR_FAIL_NULL_V_MSG(accel, RID(), "Acceleration Structure supplied (binding: " + itos(uniform.binding) + ") is invalid."); + + if (accel->draw_tracker != nullptr) { + draw_trackers.push_back(accel->draw_tracker); + // Acceleration structure is never going to be writable from raytracing shaders + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ); + } else { + untracked_usage[accel_id] = RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ; + } + + driver_uniform.ids.push_back(accel->driver_id); + } break; default: { } } @@ -3766,7 +3928,8 @@ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_ // Needs a shader. Shader *shader = shader_owner.get_or_null(p_shader); ERR_FAIL_NULL_V(shader, RID()); - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "Compute shaders can't be used in render pipelines"); + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::RASTERIZATION, RID(), + "Only render shaders can be used in render pipelines"); FramebufferFormat fb_format; { @@ -3956,7 +4119,7 @@ RID RenderingDevice::compute_pipeline_create(RID p_shader, const Vectoris_compute, RID(), + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::COMPUTE, RID(), "Non-compute shaders can't be used in compute pipelines"); } @@ -4009,6 +4172,57 @@ bool RenderingDevice::compute_pipeline_is_valid(RID p_pipeline) { return compute_pipeline_owner.owns(p_pipeline); } +RID RenderingDevice::raytracing_pipeline_create(RID p_shader, const Vector &p_specialization_constants) { + _THREAD_SAFE_METHOD_ + + // Needs a shader. + Shader *shader = shader_owner.get_or_null(p_shader); + ERR_FAIL_NULL_V(shader, RID()); + + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PipelineType::RAYTRACING, RID(), + "Only raytracing shaders can be used in raytracing pipelines"); + + for (int i = 0; i < shader->specialization_constants.size(); i++) { + const ShaderSpecializationConstant &sc = shader->specialization_constants[i]; + for (int j = 0; j < p_specialization_constants.size(); j++) { + const PipelineSpecializationConstant &psc = p_specialization_constants[j]; + if (psc.constant_id == sc.constant_id) { + ERR_FAIL_COND_V_MSG(psc.type != sc.type, RID(), "Specialization constant provided for id (" + itos(sc.constant_id) + ") is of the wrong type."); + break; + } + } + } + + RaytracingPipeline pipeline; + pipeline.driver_id = driver->raytracing_pipeline_create(shader->driver_id, p_specialization_constants); + ERR_FAIL_COND_V(!pipeline.driver_id, RID()); + + if (pipeline_cache_enabled) { + _update_pipeline_cache(); + } + + pipeline.shader = p_shader; + pipeline.shader_driver_id = shader->driver_id; + pipeline.shader_layout_hash = shader->layout_hash; + pipeline.set_formats = shader->set_formats; + pipeline.push_constant_size = shader->push_constant_size; + + // Create ID to associate with this pipeline. + RID id = raytracing_pipeline_owner.make_rid(pipeline); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + // Now add all the dependencies. + _add_dependency(id, p_shader); + return id; +} + +bool RenderingDevice::raytracing_pipeline_is_valid(RID p_pipeline) { + _THREAD_SAFE_METHOD_ + + return raytracing_pipeline_owner.owns(p_pipeline); +} + /****************/ /**** SCREEN ****/ /****************/ @@ -4955,6 +5169,266 @@ void RenderingDevice::draw_list_end() { draw_list_bound_textures.clear(); } +/***************************/ +/**** RAYTRACING LISTS ****/ +/**************************/ + +RenderingDevice::RaytracingListID RenderingDevice::raytracing_list_begin() { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING), INVALID_ID, "The current rendering device has no raytracing support."); + ERR_FAIL_COND_V_MSG(raytracing_list != nullptr, INVALID_ID, "Only one draw/compute/raytracing list can be active at the same time."); + + // Lock while raytracing_list is active. + _THREAD_SAFE_LOCK_ + + raytracing_list = memnew(RaytracingList); + + draw_graph.add_raytracing_list_begin(); + + return ID_TYPE_RAYTRACING_LIST; +} + +void RenderingDevice::raytracing_list_bind_raytracing_pipeline(RaytracingListID p_list, RID p_raytracing_pipeline) { + // Must be called within a raytracing list, the class mutex is locked during that time + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + RaytracingList *rl = raytracing_list; + + const RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_raytracing_pipeline); + ERR_FAIL_NULL(pipeline); + + if (p_raytracing_pipeline == rl->state.pipeline) { + return; // Redundant state, return. + } + + rl->state.pipeline = p_raytracing_pipeline; + rl->state.pipeline_driver_id = pipeline->driver_id; + + draw_graph.add_raytracing_list_bind_pipeline(pipeline->driver_id); + + if (rl->state.pipeline_shader != pipeline->shader) { + // Shader changed, so descriptor sets may become incompatible. + + uint32_t pcount = pipeline->set_formats.size(); // Formats count in this pipeline. + rl->state.set_count = MAX(rl->state.set_count, pcount); + const uint32_t *pformats = pipeline->set_formats.ptr(); // Pipeline set formats. + + uint32_t first_invalid_set = UINT32_MAX; // All valid by default. + switch (driver->api_trait_get(RDD::API_TRAIT_SHADER_CHANGE_INVALIDATION)) { + case RDD::SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS: { + first_invalid_set = 0; + } break; + case RDD::SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE: { + for (uint32_t i = 0; i < pcount; i++) { + if (rl->state.sets[i].pipeline_expected_format != pformats[i]) { + first_invalid_set = i; + break; + } + } + } break; + case RDD::SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH: { + if (rl->state.pipeline_shader_layout_hash != pipeline->shader_layout_hash) { + first_invalid_set = 0; + } + } break; + } + + for (uint32_t i = 0; i < pcount; i++) { + rl->state.sets[i].bound = rl->state.sets[i].bound && i < first_invalid_set; + rl->state.sets[i].pipeline_expected_format = pformats[i]; + } + + for (uint32_t i = pcount; i < rl->state.set_count; i++) { + // Unbind the ones above (not used) if exist. + rl->state.sets[i].bound = false; + } + + rl->state.set_count = pcount; // Update set count. + + if (pipeline->push_constant_size) { +#ifdef DEBUG_ENABLED + rl->validation.pipeline_push_constant_supplied = false; +#endif + } + + rl->state.pipeline_shader = pipeline->shader; + rl->state.pipeline_shader_driver_id = pipeline->shader_driver_id; + rl->state.pipeline_shader_layout_hash = pipeline->shader_layout_hash; + } + +#ifdef DEBUG_ENABLED + // Update raytracing pass pipeline info. + rl->validation.pipeline_active = true; + rl->validation.pipeline_push_constant_size = pipeline->push_constant_size; +#endif +} + +void RenderingDevice::raytracing_list_bind_uniform_set(RaytracingListID p_list, RID p_uniform_set, uint32_t p_index) { + // Must be called within a raytracing list, the class mutex is locked during that time + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + RaytracingList *rl = raytracing_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_index >= driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS) || p_index >= MAX_UNIFORM_SETS, + "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS)) + ")."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!rl->validation.active, "Submitted Raytracing Lists can no longer be modified."); +#endif + + UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set); + ERR_FAIL_NULL(uniform_set); + + if (p_index > rl->state.set_count) { + rl->state.set_count = p_index; + } + + rl->state.sets[p_index].uniform_set_driver_id = uniform_set->driver_id; // Update set pointer. + rl->state.sets[p_index].bound = false; // Needs rebind. + rl->state.sets[p_index].uniform_set_format = uniform_set->format; + rl->state.sets[p_index].uniform_set = p_uniform_set; + +#if 0 + { // Validate that textures bound are not attached as framebuffer bindings. + uint32_t attachable_count = uniform_set->attachable_textures.size(); + const RID *attachable_ptr = uniform_set->attachable_textures.ptr(); + uint32_t bound_count = draw_list_bound_textures.size(); + const RID *bound_ptr = draw_list_bound_textures.ptr(); + for (uint32_t i = 0; i < attachable_count; i++) { + for (uint32_t j = 0; j < bound_count; j++) { + ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j], + "Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed."); + } + } + } +#endif +} + +void RenderingDevice::raytracing_list_set_push_constant(RaytracingListID p_list, const void *p_data, uint32_t p_data_size) { + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + ERR_FAIL_COND_MSG(p_data_size > MAX_PUSH_CONSTANT_SIZE, "Push constants can't be bigger than 128 bytes to maintain compatibility."); + + RaytracingList *rl = raytracing_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!rl->validation.active, "Submitted Raytracing Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_data_size != rl->validation.pipeline_push_constant_size, + "This raytracing pipeline requires (" + itos(rl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")"); +#endif + + draw_graph.add_raytracing_list_set_push_constant(rl->state.pipeline_shader_driver_id, p_data, p_data_size); + + // Store it in the state in case we need to restart the raytracing list. + memcpy(rl->state.push_constant_data, p_data, p_data_size); + rl->state.push_constant_size = p_data_size; + +#ifdef DEBUG_ENABLED + rl->validation.pipeline_push_constant_supplied = true; +#endif +} + +void RenderingDevice::raytracing_list_trace_rays(RaytracingListID p_list, uint32_t p_width, uint32_t p_height) { + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_NULL(raytracing_list); + + RaytracingList *rl = raytracing_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!rl->validation.active, "Submitted Raytracing Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!rl->validation.pipeline_active, "No raytracing pipeline was set before attempting to draw."); + + if (rl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!rl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + +#ifdef DEBUG_ENABLED + for (uint32_t i = 0; i < rl->state.set_count; i++) { + if (rl->state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + if (rl->state.sets[i].pipeline_expected_format != rl->state.sets[i].uniform_set_format) { + if (rl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline."); + } else if (uniform_set_owner.owns(rl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(rl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(rl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(rl->state.pipeline_shader)); + } + } + } +#endif + + // Prepare descriptor sets if the API doesn't use pipeline barriers. + if (!driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + for (uint32_t i = 0; i < rl->state.set_count; i++) { + if (rl->state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + draw_graph.add_raytracing_list_uniform_set_prepare_for_use(rl->state.pipeline_shader_driver_id, rl->state.sets[i].uniform_set_driver_id, i); + } + } + + // Bind descriptor sets. + for (uint32_t i = 0; i < rl->state.set_count; i++) { + if (rl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!rl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_raytracing_list_bind_uniform_set(rl->state.pipeline_shader_driver_id, rl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(rl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + + draw_graph.add_raytracing_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + rl->state.sets[i].bound = true; + } + } + + Shader *shader = shader_owner.get_or_null(rl->state.pipeline_shader); + ERR_FAIL_NULL(shader); + + draw_graph.add_raytracing_list_trace_rays(rl->state.pipeline_driver_id, shader->driver_id, p_width, p_height); + rl->state.trace_count++; +} + +void RenderingDevice::raytracing_list_end() { + ERR_FAIL_NULL(raytracing_list); + + draw_graph.add_raytracing_list_end(); + + memdelete(raytracing_list); + raytracing_list = nullptr; + + // Raytracing_list is no longer active. + _THREAD_SAFE_UNLOCK_ +} + /***********************/ /**** COMPUTE LISTS ****/ /***********************/ @@ -6022,6 +6496,14 @@ void RenderingDevice::_free_internal(RID p_id) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline); compute_pipeline_owner.free(p_id); + } else if (acceleration_structure_owner.owns(p_id)) { + AccelerationStructure *acceleration_structure = acceleration_structure_owner.get_or_null(p_id); + frames[frame].acceleration_structures_to_dispose_of.push_back(*acceleration_structure); + acceleration_structure_owner.free(p_id); + } else if (raytracing_pipeline_owner.owns(p_id)) { + RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_id); + frames[frame].raytracing_pipelines_to_dispose_of.push_back(*pipeline); + raytracing_pipeline_owner.free(p_id); } else { #ifdef DEV_ENABLED ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()) + " " + resource_name); @@ -6074,6 +6556,12 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } else if (compute_pipeline_owner.owns(p_id)) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name); + } else if (acceleration_structure_owner.owns(p_id)) { + AccelerationStructure *acceleration_structure = acceleration_structure_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_ACCELERATION_STRUCTURE, acceleration_structure->driver_id, p_name); + } else if (raytracing_pipeline_owner.owns(p_id)) { + RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name); } else { ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id())); return; @@ -6183,6 +6671,22 @@ void RenderingDevice::_free_pending_resources(int p_frame) { frames[p_frame].compute_pipelines_to_dispose_of.pop_front(); } + while (frames[p_frame].raytracing_pipelines_to_dispose_of.front()) { + RaytracingPipeline *pipeline = &frames[p_frame].raytracing_pipelines_to_dispose_of.front()->get(); + + driver->raytracing_pipeline_free(pipeline->driver_id); + + frames[p_frame].raytracing_pipelines_to_dispose_of.pop_front(); + } + + // Acceleration structures. + while (frames[p_frame].acceleration_structures_to_dispose_of.front()) { + AccelerationStructure &acceleration_structure = frames[p_frame].acceleration_structures_to_dispose_of.front()->get(); + driver->acceleration_structure_free(acceleration_structure.driver_id); + + frames[p_frame].acceleration_structures_to_dispose_of.pop_front(); + } + // Uniform sets. while (frames[p_frame].uniform_sets_to_dispose_of.front()) { UniformSet *uniform_set = &frames[p_frame].uniform_sets_to_dispose_of.front()->get(); @@ -6577,7 +7081,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ String rendering_method; if (OS::get_singleton()->get_current_rendering_method() == "mobile") { rendering_method = "Forward Mobile"; - } else { + } else if (OS::get_singleton()->get_current_rendering_method() == "forward_plus") { rendering_method = "Forward+"; } @@ -6850,6 +7354,7 @@ void RenderingDevice::capture_timestamp(const String &p_name) { ERR_FAIL_COND_MSG(draw_list != nullptr && draw_list->state.draw_count > 0, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(compute_list != nullptr && compute_list->state.dispatch_count > 0, "Capturing timestamps during compute list creation is not allowed. Offending timestamp was: " + p_name); + ERR_FAIL_COND_MSG(raytracing_list != nullptr && raytracing_list->state.trace_count > 0, "Capturing timestamps during raytracing list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(frames[frame].timestamp_count >= max_timestamp_query_elements, vformat("Tried capturing more timestamps than the configured maximum (%d). You can increase this limit in the project settings under 'Debug/Settings' called 'Max Timestamp Query Elements'.", max_timestamp_query_elements)); draw_graph.add_capture_timestamp(frames[frame].timestamp_pool, frames[frame].timestamp_count); @@ -7177,6 +7682,10 @@ bool RenderingDevice::has_feature(const Features p_feature) const { return driver->has_feature(p_feature); } +bool RenderingDevice::_raytracing_is_supported() const { + return has_feature(SUPPORTS_RAYTRACING); +} + void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_create", "format", "view", "data"), &RenderingDevice::_texture_create, DEFVAL(Array())); ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared); @@ -7251,6 +7760,14 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_compute_pipeline_create, DEFVAL(TypedArray())); ClassDB::bind_method(D_METHOD("compute_pipeline_is_valid", "compute_pipeline"), &RenderingDevice::compute_pipeline_is_valid); + ClassDB::bind_method(D_METHOD("raytracing_is_supported"), &RenderingDevice::_raytracing_is_supported); + ClassDB::bind_method(D_METHOD("raytracing_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_raytracing_pipeline_create, DEFVAL(TypedArray())); + ClassDB::bind_method(D_METHOD("raytracing_pipeline_is_valid", "raytracing_pipeline"), &RenderingDevice::raytracing_pipeline_is_valid); + + ClassDB::bind_method(D_METHOD("blas_create", "vertex_array", "index_array", "transform_buffer", "transform_offset"), &RenderingDevice::blas_create, DEFVAL(0)); + ClassDB::bind_method(D_METHOD("tlas_create", "blases"), &RenderingDevice::_tlas_create); + ClassDB::bind_method(D_METHOD("acceleration_structure_build", "acceleration_structure"), &RenderingDevice::acceleration_structure_build); + ClassDB::bind_method(D_METHOD("screen_get_width", "screen"), &RenderingDevice::screen_get_width, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_height", "screen"), &RenderingDevice::screen_get_height, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format", "screen"), &RenderingDevice::screen_get_framebuffer_format, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); @@ -7291,6 +7808,13 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_list_add_barrier", "compute_list"), &RenderingDevice::compute_list_add_barrier); ClassDB::bind_method(D_METHOD("compute_list_end"), &RenderingDevice::compute_list_end); + ClassDB::bind_method(D_METHOD("raytracing_list_begin"), &RenderingDevice::raytracing_list_begin); + ClassDB::bind_method(D_METHOD("raytracing_list_bind_raytracing_pipeline", "raytracing_list", "raytracing_pipeline"), &RenderingDevice::raytracing_list_bind_raytracing_pipeline); + ClassDB::bind_method(D_METHOD("raytracing_list_set_push_constant", "raytracing_list", "buffer", "size_bytes"), &RenderingDevice::_raytracing_list_set_push_constant); + ClassDB::bind_method(D_METHOD("raytracing_list_bind_uniform_set", "raytracing_list", "uniform_set", "set_index"), &RenderingDevice::raytracing_list_bind_uniform_set); + ClassDB::bind_method(D_METHOD("raytracing_list_trace_rays", "raytracing_list", "width", "height"), &RenderingDevice::raytracing_list_trace_rays); + ClassDB::bind_method(D_METHOD("raytracing_list_end"), &RenderingDevice::raytracing_list_end); + ClassDB::bind_method(D_METHOD("free_rid", "rid"), &RenderingDevice::free); ClassDB::bind_method(D_METHOD("capture_timestamp", "name"), &RenderingDevice::capture_timestamp); @@ -7674,6 +8198,8 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(INDEX_BUFFER_FORMAT_UINT32); BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); + BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS); + BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY); BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER); //for sampling only (sampler GLSL type) BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER_WITH_TEXTURE); // for sampling only); but includes a texture); (samplerXX GLSL type)); first a sampler then a texture @@ -7685,6 +8211,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(UNIFORM_TYPE_UNIFORM_BUFFER); //regular uniform buffer (or UBO). BIND_ENUM_CONSTANT(UNIFORM_TYPE_STORAGE_BUFFER); //storage buffer ("buffer" qualifier) like UBO); but supports storage); for compute mostly BIND_ENUM_CONSTANT(UNIFORM_TYPE_INPUT_ATTACHMENT); //used for sub-pass read/write); for mobile mostly + BIND_ENUM_CONSTANT(UNIFORM_TYPE_ACCELERATION_STRUCTURE); //acceleration structure (TLAS)); for raytracing BIND_ENUM_CONSTANT(UNIFORM_TYPE_MAX); BIND_ENUM_CONSTANT(RENDER_PRIMITIVE_POINTS); @@ -7804,12 +8331,18 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE); + BIND_ENUM_CONSTANT(SHADER_STAGE_RAYGEN); + BIND_ENUM_CONSTANT(SHADER_STAGE_MISS); + BIND_ENUM_CONSTANT(SHADER_STAGE_CLOSEST_HIT); BIND_ENUM_CONSTANT(SHADER_STAGE_MAX); BIND_ENUM_CONSTANT(SHADER_STAGE_VERTEX_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_FRAGMENT_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_RAYGEN_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MISS_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_CLOSEST_HIT_BIT); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_GLSL); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_HLSL); @@ -8114,6 +8647,11 @@ Error RenderingDevice::_buffer_update_bind(RID p_buffer, uint32_t p_offset, uint return buffer_update(p_buffer, p_offset, p_size, p_data.ptr()); } +RID RenderingDevice::_tlas_create(const TypedArray &p_blases) { + Vector blases = Variant(p_blases); + return tlas_create(blases); +} + static Vector _get_spec_constants(const TypedArray &p_constants) { Vector ret; ret.resize(p_constants.size()); @@ -8182,6 +8720,10 @@ RID RenderingDevice::_compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants = TypedArray()) { + return raytracing_pipeline_create(p_shader, _get_spec_constants(p_specialization_constants)); +} + #ifndef DISABLE_DEPRECATED Vector RenderingDevice::_draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures) { ERR_FAIL_V_MSG(Vector(), "Deprecated. Split draw lists are used automatically by RenderingDevice."); @@ -8201,3 +8743,8 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons ERR_FAIL_COND(p_data_size > (uint32_t)p_data.size()); compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size); } + +void RenderingDevice::_raytracing_list_set_push_constant(RaytracingListID p_list, const Vector &p_data, uint32_t p_data_size) { + ERR_FAIL_COND(p_data_size > (uint32_t)p_data.size()); + raytracing_list_set_push_constant(p_list, p_data.ptr(), p_data_size); +} diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index ea268460c034..8512fdaeb968 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -73,6 +73,7 @@ class RenderingDevice : public RenderingDeviceCommons { typedef int64_t DrawListID; typedef int64_t ComputeListID; + typedef int64_t RaytracingListID; typedef String (*ShaderSPIRVGetCacheKeyFunction)(const RenderingDevice *p_render_device); typedef Vector (*ShaderCompileToSPIRVFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, const RenderingDevice *p_render_device); @@ -116,6 +117,7 @@ class RenderingDevice : public RenderingDeviceCommons { ID_TYPE_VERTEX_FORMAT, ID_TYPE_DRAW_LIST, ID_TYPE_COMPUTE_LIST = 4, + ID_TYPE_RAYTRACING_LIST = 5, ID_TYPE_MAX, ID_BASE_SHIFT = 58, // 5 bits for ID types. ID_MASK = (ID_BASE_SHIFT - 1), @@ -128,6 +130,30 @@ class RenderingDevice : public RenderingDeviceCommons { void _add_dependency(RID p_id, RID p_depends_on); void _free_dependencies(RID p_id); +private: + /********************************/ + /**** ACCELERATION STRUCTURE ****/ + /********************************/ + + struct AccelerationStructure { + RDD::AccelerationStructureID driver_id; + RDD::AccelerationStructureType type = RDD::ACCELERATION_STRUCTURE_TYPE_BLAS; + RDG::ResourceTracker *draw_tracker = nullptr; + + RID vertex_array; + RID index_array; + RID transform_buffer; + + Vector blases; + }; + + RID_Owner acceleration_structure_owner; + +public: + RID blas_create(RID p_vertex_array, RID p_index_array, RID p_transform_buffer, uint64_t p_transform_offset); + RID tlas_create(const Vector &blases); + Error acceleration_structure_build(RID p_acceleration_structure); + private: /***************************/ /**** BUFFER MANAGEMENT ****/ @@ -885,6 +911,8 @@ class RenderingDevice : public RenderingDeviceCommons { enum StorageBufferUsage { STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT = 1, + STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS = (1 << 1), + STORAGE_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY = (1 << 2), }; /*****************/ @@ -1083,6 +1111,17 @@ class RenderingDevice : public RenderingDeviceCommons { RID_Owner compute_pipeline_owner; + struct RaytracingPipeline { + RID shader; + RDD::ShaderID shader_driver_id; + uint32_t shader_layout_hash = 0; + Vector set_formats; + RDD::RaytracingPipelineID driver_id; + uint32_t push_constant_size = 0; + }; + + RID_Owner raytracing_pipeline_owner; + public: RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, BitField p_dynamic_state_flags = 0, uint32_t p_for_render_pass = 0, const Vector &p_specialization_constants = Vector()); bool render_pipeline_is_valid(RID p_pipeline); @@ -1090,6 +1129,9 @@ class RenderingDevice : public RenderingDeviceCommons { RID compute_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); bool compute_pipeline_is_valid(RID p_pipeline); + RID raytracing_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); + bool raytracing_pipeline_is_valid(RID p_pipeline); + private: /****************/ /**** SCREEN ****/ @@ -1244,6 +1286,60 @@ class RenderingDevice : public RenderingDeviceCommons { void draw_list_end(); +private: + /**************************/ + /**** RAYTRACING LISTS ****/ + /**************************/ + + struct RaytracingList { + struct SetState { + uint32_t pipeline_expected_format = 0; + uint32_t uniform_set_format = 0; + RDD::UniformSetID uniform_set_driver_id; + RID uniform_set; + bool bound = false; + }; + + struct State { + SetState sets[MAX_UNIFORM_SETS]; + uint32_t set_count = 0; + RID pipeline; + RDD::RaytracingPipelineID pipeline_driver_id; + RID pipeline_shader; + RDD::ShaderID pipeline_shader_driver_id; + uint32_t pipeline_shader_layout_hash = 0; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE] = {}; + uint32_t push_constant_size = 0; + uint32_t trace_count = 0; + } state; + +#ifdef DEBUG_ENABLED + struct Validation { + bool active = true; // Means command buffer was not closed, so you can keep adding things. + Vector set_formats; + Vector set_bound; + Vector set_rids; + // Last pipeline set values. + bool pipeline_active = false; + RID pipeline_shader; + uint32_t invalid_set_from = 0; + uint32_t pipeline_push_constant_size = 0; + bool pipeline_push_constant_supplied = false; + } validation; +#endif + }; + + RaytracingList *raytracing_list = nullptr; + RaytracingList::State raytracing_list_barrier_state; + +public: + RaytracingListID raytracing_list_begin(); + void raytracing_list_bind_raytracing_pipeline(RaytracingListID p_list, RID p_raytracing_pipeline); + void raytracing_list_bind_uniform_set(RaytracingListID p_list, RID p_uniform_set, uint32_t p_index); + void raytracing_list_set_push_constant(RaytracingListID p_list, const void *p_data, uint32_t p_data_size); + void raytracing_list_trace_rays(RaytracingListID p_list, uint32_t p_width, uint32_t p_height); + void raytracing_list_end(); + private: /***********************/ /**** COMPUTE LISTS ****/ @@ -1405,6 +1501,8 @@ class RenderingDevice : public RenderingDeviceCommons { List uniform_sets_to_dispose_of; List render_pipelines_to_dispose_of; List compute_pipelines_to_dispose_of; + List acceleration_structures_to_dispose_of; + List raytracing_pipelines_to_dispose_of; // Pending asynchronous data transfer for buffers. LocalVector download_buffer_staging_buffers; @@ -1612,11 +1710,16 @@ class RenderingDevice : public RenderingDeviceCommons { Error _buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data); + bool _raytracing_is_supported() const; + RID _tlas_create(const TypedArray &p_blases); + RID _render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref &p_rasterization_state, const Ref &p_multisample_state, const Ref &p_depth_stencil_state, const Ref &p_blend_state, BitField p_dynamic_state_flags, uint32_t p_for_render_pass, const TypedArray &p_specialization_constants); RID _compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); + RID _raytracing_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); void _draw_list_set_push_constant(DrawListID p_list, const Vector &p_data, uint32_t p_data_size); void _compute_list_set_push_constant(ComputeListID p_list, const Vector &p_data, uint32_t p_data_size); + void _raytracing_list_set_push_constant(RaytracingListID p_list, const Vector &p_data, uint32_t p_data_size); }; VARIANT_ENUM_CAST(RenderingDevice::DeviceType) diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index ab611a92fe7a..3698f489d888 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -41,7 +41,7 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String Vector lines = p_text.split("\n"); bool reading_versions = false; - bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false }; + bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false, false, false, false }; RD::ShaderStage stage = RD::SHADER_STAGE_MAX; static const char *stage_str[RD::SHADER_STAGE_MAX] = { "vertex", @@ -49,6 +49,9 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String "tesselation_control", "tesselation_evaluation", "compute", + "raygen", + "miss", + "closest_hit", }; String stage_code[RD::SHADER_STAGE_MAX]; int stages_found = 0; diff --git a/servers/rendering/rendering_device_binds.h b/servers/rendering/rendering_device_binds.h index 89fed7ffa633..8e7700476d6a 100644 --- a/servers/rendering/rendering_device_binds.h +++ b/servers/rendering/rendering_device_binds.h @@ -267,6 +267,9 @@ class RDShaderSource : public RefCounted { ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_control"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_evaluation"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_compute"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_raygen"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_miss"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_closest_hit"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_CLOSEST_HIT); ADD_GROUP("Syntax", "source_"); ADD_PROPERTY(PropertyInfo(Variant::INT, "language", PROPERTY_HINT_RANGE, "GLSL,HLSL"), "set_language", "get_language"); } @@ -326,12 +329,18 @@ class RDShaderSPIRV : public Resource { ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_control"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_evaluation"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_compute"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_raygen"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_miss"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_closest_hit"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_CLOSEST_HIT); ADD_GROUP("Compile Error", "compile_error_"); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_vertex"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_VERTEX); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_fragment"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_FRAGMENT); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_control"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_evaluation"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_compute"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_raygen"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_miss"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_closest_hit"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_CLOSEST_HIT); } }; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 284337fe6bc1..1ecc9eea79f9 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -527,12 +527,18 @@ class RenderingDeviceCommons : public Object { SHADER_STAGE_TESSELATION_CONTROL, SHADER_STAGE_TESSELATION_EVALUATION, SHADER_STAGE_COMPUTE, + SHADER_STAGE_RAYGEN, + SHADER_STAGE_MISS, + SHADER_STAGE_CLOSEST_HIT, SHADER_STAGE_MAX, SHADER_STAGE_VERTEX_BIT = (1 << SHADER_STAGE_VERTEX), SHADER_STAGE_FRAGMENT_BIT = (1 << SHADER_STAGE_FRAGMENT), SHADER_STAGE_TESSELATION_CONTROL_BIT = (1 << SHADER_STAGE_TESSELATION_CONTROL), SHADER_STAGE_TESSELATION_EVALUATION_BIT = (1 << SHADER_STAGE_TESSELATION_EVALUATION), SHADER_STAGE_COMPUTE_BIT = (1 << SHADER_STAGE_COMPUTE), + SHADER_STAGE_RAYGEN_BIT = (1 << SHADER_STAGE_RAYGEN), + SHADER_STAGE_MISS_BIT = (1 << SHADER_STAGE_MISS), + SHADER_STAGE_CLOSEST_HIT_BIT = (1 << SHADER_STAGE_CLOSEST_HIT), }; struct ShaderStageSPIRVData { @@ -557,6 +563,7 @@ class RenderingDeviceCommons : public Object { UNIFORM_TYPE_UNIFORM_BUFFER, // Regular uniform buffer (or UBO). UNIFORM_TYPE_STORAGE_BUFFER, // Storage buffer ("buffer" qualifier) like UBO, but supports storage, for compute mostly. UNIFORM_TYPE_INPUT_ATTACHMENT, // Used for sub-pass read/write, for mobile mostly. + UNIFORM_TYPE_ACCELERATION_STRUCTURE, // Bounding Volume Hierarchy (Top + Bottom Level acceleration structures), for raytracing only. UNIFORM_TYPE_MAX }; @@ -586,6 +593,12 @@ class RenderingDeviceCommons : public Object { // ----- PIPELINE ----- + enum PipelineType { + RASTERIZATION, + COMPUTE, + RAYTRACING, + }; + enum RenderPrimitive { RENDER_PRIMITIVE_POINTS, RENDER_PRIMITIVE_LINES, @@ -881,6 +894,7 @@ class RenderingDeviceCommons : public Object { SUPPORTS_ATTACHMENT_VRS, // If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, + SUPPORTS_RAYTRACING, }; enum SubgroupOperations { @@ -982,7 +996,7 @@ class RenderingDeviceCommons : public Object { struct ShaderDescription { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; - bool is_compute = false; + PipelineType pipeline_type = PipelineType::RASTERIZATION; uint32_t compute_local_size[3] = {}; uint32_t push_constant_size = 0; diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index c1a3f34af895..0ccecce084f4 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -44,10 +44,14 @@ Error RenderingDeviceDriver::_reflect_spirv(VectorView p_s ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { - r_reflection.is_compute = true; + r_reflection.pipeline_type = PipelineType::COMPUTE; ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, "Compute shaders can only receive one stage, dedicated to compute."); } + if (p_spirv[i].shader_stage == SHADER_STAGE_RAYGEN || p_spirv[i].shader_stage == SHADER_STAGE_MISS || p_spirv[i].shader_stage == SHADER_STAGE_CLOSEST_HIT) { + r_reflection.pipeline_type = PipelineType::RAYTRACING; + } + ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); @@ -58,7 +62,7 @@ Error RenderingDeviceDriver::_reflect_spirv(VectorView p_s ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed parsing shader."); - if (r_reflection.is_compute) { + if (r_reflection.pipeline_type == PipelineType::COMPUTE) { r_reflection.compute_local_size[0] = module.entry_points->local_size.x; r_reflection.compute_local_size[1] = module.entry_points->local_size.y; r_reflection.compute_local_size[2] = module.entry_points->local_size.z; @@ -136,8 +140,7 @@ Error RenderingDeviceDriver::_reflect_spirv(VectorView p_s need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { - ERR_PRINT("Acceleration structure not supported."); - continue; + uniform.type = UNIFORM_TYPE_ACCELERATION_STRUCTURE; } break; } diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 6b4c8eeab11e..c4a22f14ef43 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -159,6 +159,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { DEFINE_ID(QueryPool); DEFINE_ID(Fence); DEFINE_ID(Semaphore); + DEFINE_ID(AccelerationStructure); + DEFINE_ID(RaytracingPipeline); public: /*****************/ @@ -189,6 +191,10 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BUFFER_USAGE_INDEX_BIT = (1 << 6), BUFFER_USAGE_VERTEX_BIT = (1 << 7), BUFFER_USAGE_INDIRECT_BIT = (1 << 8), + BUFFER_USAGE_SHADER_BINDING_TABLE_BIT = (1 << 10), + BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT = (2 << 16), + BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT = (8 << 16), + BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT = (1 << 20), }; enum { @@ -324,6 +330,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_RAY_TRACING_SHADER_BIT = (2 << 20), + PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT = (2 << 24), }; enum BarrierAccessBits { @@ -348,6 +356,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24), BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25), BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27), + BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT = (2 << 20), + BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT = (4 << 20), }; struct MemoryBarrier { @@ -714,6 +724,33 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) = 0; + /********************/ + /**** RAYTRACING ****/ + /********************/ + + // ----- ACCELERATION STRUCTURE ----- + + enum AccelerationStructureType { + ACCELERATION_STRUCTURE_TYPE_BLAS, + ACCELERATION_STRUCTURE_TYPE_TLAS, + }; + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BufferID p_instance_buffer, uint64_t p_instance_offset) = 0; + virtual AccelerationStructureID tlas_create(const LocalVector &p_blases) = 0; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) = 0; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) = 0; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) = 0; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure) = 0; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) = 0; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) = 0; + virtual void command_raytracing_trace_rays(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline, ShaderID p_shader, uint32_t p_width, uint32_t p_height) = 0; + /*****************/ /**** QUERIES ****/ /*****************/ @@ -760,6 +797,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { OBJECT_TYPE_SHADER, OBJECT_TYPE_UNIFORM_SET, OBJECT_TYPE_PIPELINE, + OBJECT_TYPE_ACCELERATION_STRUCTURE, }; struct MultiviewCapabilities { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 905c7ecb7974..835c0d8134c2 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -98,6 +98,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_INDEX_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_SAMPLE: case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ: return false; case RESOURCE_USAGE_COPY_TO: case RESOURCE_USAGE_RESOLVE_TO: @@ -106,6 +108,7 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE: return true; default: DEV_ASSERT(false && "Invalid resource tracker usage."); @@ -159,15 +162,20 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage return RDD::BARRIER_ACCESS_UNIFORM_READ_BIT; case RESOURCE_USAGE_INDIRECT_BUFFER_READ: return RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT; + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT: + // Acceleration structure build inputs can be either storage buffers with vertices, indices, transforms, or + // other acceleration structures (BLAS) + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_COPY_READ_BIT | RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT); + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ: + return RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT; case RESOURCE_USAGE_STORAGE_BUFFER_READ: case RESOURCE_USAGE_STORAGE_IMAGE_READ: case RESOURCE_USAGE_TEXTURE_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_SAMPLE: - return RDD::BARRIER_ACCESS_SHADER_READ_BIT; case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: - return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_SHADER_READ_BIT); case RESOURCE_USAGE_VERTEX_BUFFER_READ: return RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; case RESOURCE_USAGE_INDEX_BUFFER_READ: @@ -176,6 +184,8 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE: + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT | RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT); default: DEV_ASSERT(false && "Invalid usage."); return RDD::BarrierAccessBits(0); @@ -326,6 +336,12 @@ void RenderingDeviceGraph::_check_discardable_attachment_dependency(ResourceTrac } } +RenderingDeviceGraph::RaytracingListInstruction *RenderingDeviceGraph::_allocate_raytracing_list_instruction(uint32_t p_instruction_size) { + uint32_t raytracing_list_data_offset = raytracing_instruction_list.data.size(); + raytracing_instruction_list.data.resize(raytracing_list_data_offset + p_instruction_size); + return reinterpret_cast(&raytracing_instruction_list.data[raytracing_list_data_offset]); +} + void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) { // Assign the next stages derived from the stages the command requires first. r_command->next_stages = r_command->self_stages; @@ -543,6 +559,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // Memory barriers are pushed regardless of buffer barriers being used or not. r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; + } else if (resource_tracker->acceleration_structure_driver_id.id != 0) { + // Make sure the acceleration structure has been built before accessing it from raytracing shaders. + r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; + r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; } else { DEV_ASSERT(false && "Resource tracker does not contain a valid buffer or texture ID."); } @@ -733,6 +753,47 @@ void RenderingDeviceGraph::_add_buffer_barrier_to_command(RDD::BufferID p_buffer } #endif +void RenderingDeviceGraph::_run_raytracing_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(RaytracingListInstruction)) <= p_instruction_data_size); + + const RaytracingListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case RaytracingListInstruction::TYPE_BIND_PIPELINE: { + const RaytracingListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + driver->command_bind_raytracing_pipeline(p_command_buffer, bind_pipeline_instruction->pipeline); + instruction_data_cursor += sizeof(RaytracingListBindPipelineInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_UNIFORM_SET: { + const RaytracingListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + driver->command_bind_raytracing_uniform_set(p_command_buffer, bind_uniform_set_instruction->uniform_set, bind_uniform_set_instruction->shader, bind_uniform_set_instruction->set_index); + instruction_data_cursor += sizeof(RaytracingListBindUniformSetInstruction); + } break; + case RaytracingListInstruction::TYPE_TRACE_RAYS: { + const RaytracingListTraceRaysInstruction *trace_rays_instruction = reinterpret_cast(instruction); + driver->command_raytracing_trace_rays(p_command_buffer, trace_rays_instruction->pipeline, trace_rays_instruction->shader, trace_rays_instruction->width, trace_rays_instruction->height); + instruction_data_cursor += sizeof(RaytracingListTraceRaysInstruction); + } break; + case RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT: { + const RaytracingListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t)); + driver->command_bind_push_constants(p_command_buffer, set_push_constant_instruction->shader, 0, push_constant_data_view); + instruction_data_cursor += sizeof(RaytracingListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const RaytracingListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); + instruction_data_cursor += sizeof(RaytracingListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown raytracing list instruction type."); + return; + } + } +} + void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -975,6 +1036,10 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC _run_label_command_change(r_command_buffer, command->label_index, p_level, false, true, &p_sorted_commands[i], p_sorted_commands_count - i, r_current_label_index, r_current_label_level); switch (command->type) { + case RecordedCommand::TYPE_ACCELERATION_STRUCTURE_BUILD: { + const RecordedAccelerationStructureBuildCommand *as_build_command = reinterpret_cast(command); + driver->command_build_acceleration_structure(r_command_buffer, as_build_command->acceleration_structure); + } break; case RecordedCommand::TYPE_BUFFER_CLEAR: { const RecordedBufferClearCommand *buffer_clear_command = reinterpret_cast(command); driver->command_clear_buffer(r_command_buffer, buffer_clear_command->buffer, buffer_clear_command->offset, buffer_clear_command->size); @@ -994,6 +1059,10 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC driver->command_copy_buffer(r_command_buffer, command_buffer_copies[j].source, buffer_update_command->destination, command_buffer_copies[j].region); } } break; + case RecordedCommand::TYPE_RAYTRACING_LIST: { + const RecordedRaytracingListCommand *raytracing_list_command = reinterpret_cast(command); + _run_raytracing_list_command(r_command_buffer, raytracing_list_command->instruction_data(), raytracing_list_command->instruction_data_size); + } break; case RecordedCommand::TYPE_COMPUTE_LIST: { if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) { // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information. @@ -1471,6 +1540,46 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u } } +void RenderingDeviceGraph::_print_raytracing_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(RaytracingListInstruction)) <= p_instruction_data_size); + + const RaytracingListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case RaytracingListInstruction::TYPE_BIND_PIPELINE: { + const RaytracingListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + print_line("\tBIND PIPELINE ID", itos(bind_pipeline_instruction->pipeline.id)); + instruction_data_cursor += sizeof(RaytracingListBindPipelineInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_UNIFORM_SET: { + const RaytracingListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_set_instruction->uniform_set.id), "SHADER ID", itos(bind_uniform_set_instruction->shader.id)); + instruction_data_cursor += sizeof(RaytracingListBindUniformSetInstruction); + } break; + case RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT: { + const RaytracingListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + print_line("\tSET PUSH CONSTANT SIZE", set_push_constant_instruction->size); + instruction_data_cursor += sizeof(RaytracingListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case RaytracingListInstruction::TYPE_TRACE_RAYS: { + const RaytracingListTraceRaysInstruction *trace_rays_instruction = reinterpret_cast(instruction); + print_line("\tTRACE RAYS PIPELINE ID", trace_rays_instruction->pipeline.id, "SHADER ID", trace_rays_instruction->shader.id, "WIDTH", itos(trace_rays_instruction->width), "HEIGHT", itos(trace_rays_instruction->height)); + instruction_data_cursor += sizeof(RaytracingListTraceRaysInstruction); + } break; + case RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const RaytracingListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + print_line("\tUNIFORM SET PREPARE FOR USE ID", itos(uniform_set_prepare_for_use_instruction->uniform_set.id), "SHADER ID", itos(uniform_set_prepare_for_use_instruction->shader.id), "INDEX", itos(uniform_set_prepare_for_use_instruction->set_index)); + instruction_data_cursor += sizeof(RaytracingListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown raytracing list instruction type."); + return; + } + } +} + void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -1587,6 +1696,24 @@ void RenderingDeviceGraph::begin() { #endif } +void RenderingDeviceGraph::add_acceleration_structure_build(RDD::AccelerationStructureID p_acceleration_structure, ResourceTracker *p_dst_tracker, VectorView p_src_trackers) { + int32_t command_index; + RecordedAccelerationStructureBuildCommand *command = static_cast(_allocate_command(sizeof(RecordedAccelerationStructureBuildCommand), command_index)); + command->type = RecordedCommand::TYPE_ACCELERATION_STRUCTURE_BUILD; + command->self_stages = RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT; + command->acceleration_structure = p_acceleration_structure; + + LocalVector trackers = { p_dst_tracker }; + LocalVector usages = { RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE }; + + for (uint32_t i = 0; i < p_src_trackers.size(); ++i) { + trackers.push_back(p_src_trackers[i]); + usages.push_back(RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT); + } + + _add_command_to_graph(trackers.ptr(), usages.ptr(), usages.size(), command_index, command); +} + void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size) { DEV_ASSERT(p_dst_tracker != nullptr); @@ -1658,6 +1785,90 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke _add_command_to_graph(&p_dst_tracker, &buffer_usage, 1, command_index, command); } +void RenderingDeviceGraph::add_raytracing_list_begin() { + raytracing_instruction_list.clear(); + raytracing_instruction_list.index++; +} + +void RenderingDeviceGraph::add_raytracing_list_bind_pipeline(RDD::RaytracingPipelineID p_pipeline) { + RaytracingListBindPipelineInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBindPipelineInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BIND_PIPELINE; + instruction->pipeline = p_pipeline; + raytracing_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT); +} + +void RenderingDeviceGraph::add_raytracing_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + RaytracingListBindUniformSetInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBindUniformSetInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BIND_UNIFORM_SET; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_raytracing_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size) { + uint32_t instruction_size = sizeof(RaytracingListSetPushConstantInstruction) + p_data_size; + RaytracingListSetPushConstantInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(instruction_size)); + instruction->type = RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT; + instruction->size = p_data_size; + instruction->shader = p_shader; + memcpy(instruction->data(), p_data, p_data_size); +} + +void RenderingDeviceGraph::add_raytracing_list_trace_rays(RDD::RaytracingPipelineID p_pipeline, RDD::ShaderID p_shader, uint32_t p_width, uint32_t p_height) { + RaytracingListTraceRaysInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListTraceRaysInstruction))); + instruction->type = RaytracingListInstruction::TYPE_TRACE_RAYS; + instruction->pipeline = p_pipeline; + instruction->shader = p_shader; + instruction->width = p_width; + instruction->height = p_height; +} + +void RenderingDeviceGraph::add_raytracing_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + RaytracingListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListUniformSetPrepareForUseInstruction))); + instruction->type = RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_raytracing_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage) { + DEV_ASSERT(p_tracker != nullptr); + + p_tracker->reset_if_outdated(tracking_frame); + + if (p_tracker->raytracing_list_index != raytracing_instruction_list.index) { + raytracing_instruction_list.command_trackers.push_back(p_tracker); + raytracing_instruction_list.command_tracker_usages.push_back(p_usage); + p_tracker->raytracing_list_index = raytracing_instruction_list.index; + p_tracker->raytracing_list_usage = p_usage; + } +#ifdef DEV_ENABLED + else if (p_tracker->raytracing_list_usage != p_usage) { + ERR_FAIL_MSG(vformat("Tracker can't have more than one type of usage in the same raytracing list. Raytracing list usage is %d and the requested usage is %d.", p_tracker->raytracing_list_usage, p_usage)); + } +#endif +} + +void RenderingDeviceGraph::add_raytracing_list_usages(VectorView p_trackers, VectorView p_usages) { + DEV_ASSERT(p_trackers.size() == p_usages.size()); + + for (uint32_t i = 0; i < p_trackers.size(); i++) { + add_raytracing_list_usage(p_trackers[i], p_usages[i]); + } +} + +void RenderingDeviceGraph::add_raytracing_list_end() { + int32_t command_index; + uint32_t instruction_data_size = raytracing_instruction_list.data.size(); + uint32_t command_size = sizeof(RecordedRaytracingListCommand) + instruction_data_size; + RecordedRaytracingListCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_RAYTRACING_LIST; + command->self_stages = raytracing_instruction_list.stages; + command->instruction_data_size = instruction_data_size; + memcpy(command->instruction_data(), raytracing_instruction_list.data.ptr(), instruction_data_size); + _add_command_to_graph(raytracing_instruction_list.command_trackers.ptr(), raytracing_instruction_list.command_tracker_usages.ptr(), raytracing_instruction_list.command_trackers.size(), command_index, command); +} + void RenderingDeviceGraph::add_compute_list_begin(RDD::BreadcrumbMarker p_phase, uint32_t p_breadcrumb_data) { compute_instruction_list.clear(); #if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 97412a331728..0dca3f4bf60c 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -45,6 +45,19 @@ class RenderingDeviceGraph { public: + struct RaytracingListInstruction { + enum Type { + TYPE_NONE, + TYPE_BIND_PIPELINE, + TYPE_BIND_UNIFORM_SET, + TYPE_SET_PUSH_CONSTANT, + TYPE_TRACE_RAYS, + TYPE_UNIFORM_SET_PREPARE_FOR_USE, + }; + + Type type = TYPE_NONE; + }; + struct ComputeListInstruction { enum Type { TYPE_NONE, @@ -87,11 +100,13 @@ class RenderingDeviceGraph { struct RecordedCommand { enum Type { TYPE_NONE, + TYPE_ACCELERATION_STRUCTURE_BUILD, TYPE_BUFFER_CLEAR, TYPE_BUFFER_COPY, TYPE_BUFFER_GET_DATA, TYPE_BUFFER_UPDATE, TYPE_COMPUTE_LIST, + TYPE_RAYTRACING_LIST, TYPE_DRAW_LIST, TYPE_TEXTURE_CLEAR, TYPE_TEXTURE_COPY, @@ -147,7 +162,10 @@ class RenderingDeviceGraph { RESOURCE_USAGE_STORAGE_IMAGE_READ, RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE, RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE, - RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE + RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE, }; struct ResourceTracker { @@ -161,7 +179,9 @@ class RenderingDeviceGraph { int32_t draw_list_index = -1; ResourceUsage draw_list_usage = RESOURCE_USAGE_NONE; int32_t compute_list_index = -1; + int32_t raytracing_list_index = -1; ResourceUsage compute_list_usage = RESOURCE_USAGE_NONE; + ResourceUsage raytracing_list_usage = RESOURCE_USAGE_NONE; ResourceUsage usage = RESOURCE_USAGE_NONE; BitField usage_access; RDD::BufferID buffer_driver_id; @@ -177,6 +197,7 @@ class RenderingDeviceGraph { bool in_parent_dirty_list = false; bool write_command_list_enabled = false; bool is_discardable = false; + RDD::AccelerationStructureID acceleration_structure_driver_id; _FORCE_INLINE_ void reset_if_outdated(int64_t new_command_frame) { if (new_command_frame != command_frame) { @@ -188,6 +209,7 @@ class RenderingDeviceGraph { write_command_or_list_index = -1; draw_list_index = -1; compute_list_index = -1; + raytracing_list_index = -1; texture_slice_command_index = -1; write_command_list_enabled = false; } @@ -255,6 +277,10 @@ class RenderingDeviceGraph { #endif }; + struct RaytracingInstructionList : InstructionList { + // No extra contents. + }; + struct DrawInstructionList : InstructionList { FramebufferCache *framebuffer_cache = nullptr; RDD::RenderPassID render_pass; @@ -305,6 +331,10 @@ class RenderingDeviceGraph { bool partial_coverage = false; }; + struct RecordedAccelerationStructureBuildCommand : RecordedCommand { + RDD::AccelerationStructureID acceleration_structure; + }; + struct RecordedBufferClearCommand : RecordedCommand { RDD::BufferID buffer; uint32_t offset = 0; @@ -336,6 +366,18 @@ class RenderingDeviceGraph { } }; + struct RecordedRaytracingListCommand : RecordedCommand { + uint32_t instruction_data_size = 0; + + _FORCE_INLINE_ uint8_t *instruction_data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *instruction_data() const { + return reinterpret_cast(&this[1]); + } + }; + struct RecordedComputeListCommand : RecordedCommand { uint32_t instruction_data_size = 0; uint32_t breadcrumb = 0; @@ -603,6 +645,47 @@ class RenderingDeviceGraph { uint32_t set_index = 0; }; + struct RaytracingListBuildAccelerationStructureInstruction : RaytracingListInstruction { + RDD::AccelerationStructureID acceleration_structure; + RDD::AccelerationStructureType acceleration_structure_type; + }; + + struct RaytracingListBindPipelineInstruction : RaytracingListInstruction { + RDD::RaytracingPipelineID pipeline; + }; + + struct RaytracingListBindUniformSetInstruction : RaytracingListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct RaytracingListSetPushConstantInstruction : RaytracingListInstruction { + uint32_t size = 0; + RDD::ShaderID shader; + + _FORCE_INLINE_ uint8_t *data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *data() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RaytracingListTraceRaysInstruction : RaytracingListInstruction { + RDD::RaytracingPipelineID pipeline; + RDD::ShaderID shader; + uint32_t width; + uint32_t height; + }; + + struct RaytracingListUniformSetPrepareForUseInstruction : RaytracingListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + struct ComputeListBindPipelineInstruction : ComputeListInstruction { RDD::PipelineID pipeline; }; @@ -703,6 +786,7 @@ class RenderingDeviceGraph { int32_t command_label_index = -1; DrawInstructionList draw_instruction_list; ComputeInstructionList compute_instruction_list; + RaytracingInstructionList raytracing_instruction_list; uint32_t command_count = 0; uint32_t command_label_count = 0; LocalVector command_list_nodes; @@ -737,6 +821,7 @@ class RenderingDeviceGraph { DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size); ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size); void _check_discardable_attachment_dependency(ResourceTracker *p_resource_tracker, int32_t p_previous_command_index, int32_t p_command_index); + RaytracingListInstruction *_allocate_raytracing_list_instruction(uint32_t p_instruction_size); void _add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command); void _add_texture_barrier_to_command(RDD::TextureID p_texture_id, BitField p_src_access, BitField p_dst_access, ResourceUsage p_prev_usage, ResourceUsage p_next_usage, RDD::TextureSubresourceRange p_subresources, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count); #if USE_BUFFER_BARRIERS @@ -744,6 +829,7 @@ class RenderingDeviceGraph { #endif void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _get_draw_list_render_pass_and_framebuffer(const RecordedDrawListCommand *p_draw_list_command, RDD::RenderPassID &r_render_pass, RDD::FramebufferID &r_framebuffer); + void _run_raytracing_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); @@ -755,6 +841,7 @@ class RenderingDeviceGraph { void _print_render_commands(const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count); void _print_draw_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _print_raytracing_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); public: RenderingDeviceGraph(); @@ -762,10 +849,20 @@ class RenderingDeviceGraph { void initialize(RDD *p_driver, RenderingContextDriver::Device p_device, RenderPassCreationFunction p_render_pass_creation_function, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame); void finalize(); void begin(); + void add_acceleration_structure_build(RDD::AccelerationStructureID p_acceleration_structure, ResourceTracker *p_dst_tracker, VectorView p_src_trackers); void add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size); void add_buffer_copy(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, RDD::BufferCopyRegion p_region); void add_buffer_get_data(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, RDD::BufferCopyRegion p_region); void add_buffer_update(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); + void add_raytracing_list_begin(); + void add_raytracing_list_bind_pipeline(RDD::RaytracingPipelineID p_pipeline); + void add_raytracing_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_raytracing_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_raytracing_list_trace_rays(RDD::RaytracingPipelineID p_pipeline, RDD::ShaderID p_shader, uint32_t p_width, uint32_t p_height); + void add_raytracing_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_raytracing_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); + void add_raytracing_list_usages(VectorView p_trackers, VectorView p_usages); + void add_raytracing_list_end(); void add_compute_list_begin(RDD::BreadcrumbMarker p_phase = RDD::BreadcrumbMarker::NONE, uint32_t p_breadcrumb_data = 0); void add_compute_list_bind_pipeline(RDD::PipelineID p_pipeline); void add_compute_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); diff --git a/servers/rendering/shader_compiler.h b/servers/rendering/shader_compiler.h index 66106d7eb734..16a11f8313b7 100644 --- a/servers/rendering/shader_compiler.h +++ b/servers/rendering/shader_compiler.h @@ -41,6 +41,9 @@ class ShaderCompiler { STAGE_VERTEX, STAGE_FRAGMENT, STAGE_COMPUTE, + STAGE_RAYGEN, + STAGE_MISS, + STAGE_CLOSEST_HIT, STAGE_MAX }; diff --git a/tests/python_build/fixtures/gles3/vertex_fragment_expected_parts.json b/tests/python_build/fixtures/gles3/vertex_fragment_expected_parts.json index 5ac8092ad0fd..d1eb0c757176 100644 --- a/tests/python_build/fixtures/gles3/vertex_fragment_expected_parts.json +++ b/tests/python_build/fixtures/gles3/vertex_fragment_expected_parts.json @@ -25,6 +25,9 @@ "\tfrag_color = vec4(depth);", "}" ], + "raygen_lines": [], + "miss_lines": [], + "closest_hit_lines": [], "uniforms": [], "fbos": [], "texunits": [], @@ -34,10 +37,14 @@ "feedbacks": [], "vertex_included_files": [], "fragment_included_files": [], + "reading": "fragment", "line_offset": 33, "vertex_offset": 10, "fragment_offset": 23, + "raygen_offset": 0, + "miss_offset": 0, + "closest_hit_offset": 0, "variant_defines": [ "#define USE_NINEPATCH" ], diff --git a/tests/python_build/fixtures/rd_glsl/compute_expected_parts.json b/tests/python_build/fixtures/rd_glsl/compute_expected_parts.json index 26ba9e4fc423..aa97167b2f2f 100644 --- a/tests/python_build/fixtures/rd_glsl/compute_expected_parts.json +++ b/tests/python_build/fixtures/rd_glsl/compute_expected_parts.json @@ -15,14 +15,23 @@ "\tuint t = BLOCK_SIZE + 1;", "}" ], + "raygen_lines": [], + "miss_lines": [], + "closest_hit_lines": [], "vertex_included_files": [], "fragment_included_files": [], "compute_included_files": [ "tests/python_build/fixtures/rd_glsl/_included.glsl" ], + "raygen_included_files": [], + "miss_included_files": [], + "closest_hit_included_files": [], "reading": "compute", "line_offset": 13, "vertex_offset": 0, "fragment_offset": 0, - "compute_offset": 1 + "compute_offset": 1, + "raygen_offset": 0, + "miss_offset": 0, + "closest_hit_offset": 0 } diff --git a/tests/python_build/fixtures/rd_glsl/vertex_fragment_expected_parts.json b/tests/python_build/fixtures/rd_glsl/vertex_fragment_expected_parts.json index dbf833edeadc..29e5efea21d5 100644 --- a/tests/python_build/fixtures/rd_glsl/vertex_fragment_expected_parts.json +++ b/tests/python_build/fixtures/rd_glsl/vertex_fragment_expected_parts.json @@ -27,14 +27,23 @@ "}" ], "compute_lines": [], + "raygen_lines": [], + "miss_lines": [], + "closest_hit_lines": [], "vertex_included_files": [ "tests/python_build/fixtures/rd_glsl/_included.glsl" ], "fragment_included_files": [], "compute_included_files": [], + "raygen_included_files": [], + "miss_included_files": [], + "closest_hit_included_files": [], "reading": "fragment", "line_offset": 25, "vertex_offset": 1, "fragment_offset": 15, - "compute_offset": 0 + "compute_offset": 0, + "raygen_offset": 0, + "miss_offset": 0, + "closest_hit_offset": 0 }