From f91ffc74535a10d261677be957a2bdc5b219ce3b Mon Sep 17 00:00:00 2001 From: thimenesup Date: Thu, 5 Dec 2024 18:32:45 +0100 Subject: [PATCH] Implement Buffer Device Address for Rendering Device Vulkan and DirectX12 --- core/extension/gdextension_compat_hashes.cpp | 6 +-- doc/classes/RenderingDevice.xml | 28 ++++++++++++ .../d3d12/rendering_device_driver_d3d12.cpp | 7 +++ drivers/d3d12/rendering_device_driver_d3d12.h | 1 + drivers/metal/rendering_device_driver_metal.h | 1 + .../metal/rendering_device_driver_metal.mm | 11 +++++ .../vulkan/rendering_device_driver_vulkan.cpp | 38 ++++++++++++++-- .../vulkan/rendering_device_driver_vulkan.h | 2 + .../4.3-stable.expected | 9 ++++ servers/rendering/rendering_device.compat.inc | 16 +++++++ servers/rendering/rendering_device.cpp | 44 ++++++++++++++++--- servers/rendering/rendering_device.h | 14 ++++-- servers/rendering/rendering_device_commons.h | 1 + servers/rendering/rendering_device_driver.h | 3 ++ 14 files changed, 164 insertions(+), 17 deletions(-) diff --git a/core/extension/gdextension_compat_hashes.cpp b/core/extension/gdextension_compat_hashes.cpp index b07f5b185893..3a7bbe221685 100644 --- a/core/extension/gdextension_compat_hashes.cpp +++ b/core/extension/gdextension_compat_hashes.cpp @@ -698,14 +698,14 @@ void GDExtensionCompatHashes::initialize() { { "framebuffer_create", 1884747791, 3284231055 }, { "framebuffer_create_multipass", 452534725, 1750306695 }, { "framebuffer_create_empty", 382373098, 3058360618 }, - { "vertex_buffer_create", 3491282828, 3410049843 }, + { "vertex_buffer_create", 3491282828, 76995708 }, { "vertex_array_create", 3137892244, 3799816279 }, - { "index_buffer_create", 975915977, 3935920523 }, + { "index_buffer_create", 975915977, 2360653086 }, { "shader_compile_spirv_from_source", 3459523685, 1178973306 }, { "shader_compile_binary_from_spirv", 1395027180, 134910450 }, { "shader_create_from_spirv", 3297482566, 342949005 }, { "shader_create_from_bytecode", 2078349841, 1687031350 }, - { "uniform_buffer_create", 1453158401, 34556762 }, + { "uniform_buffer_create", 1453158401, 3410049843 }, { "storage_buffer_create", 1173156076, 2316365934 }, { "texture_buffer_create", 2344087557, 1470338698 }, { "buffer_update", 652628289, 3793150683 }, diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index b7f95587cda6..e41464bc8467 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -81,6 +81,15 @@ [/codeblock] + + + + + Returns the address of the given [param buffer] which can be passed to shaders in any way to access underlying data. + Buffer must have been created with this feature enabled. + You must check that the GPU supports this functionality by calling [method has_feature] with [constant SUPPORTS_BUFFER_ADDRESS] as a parameter. + + @@ -672,6 +681,13 @@ This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. + + + + + Returns [code]true[/code] if the [param feature] is supported by the GPU. + + @@ -688,9 +704,11 @@ + Creates a new index buffer. It can be accessed with the RID that is returned. Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + Optionally, set [param enable_shader_address] if you wish to use [method buffer_get_device_address] functionality and the GPU supports it. @@ -1056,9 +1074,11 @@ + Creates a new uniform buffer. It can be accessed with the RID that is returned. Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + Optionally, set [param enable_shader_address] if you wish to use [method buffer_get_device_address] functionality and the GPU supports it. @@ -1093,9 +1113,11 @@ + It can be accessed with the RID that is returned. Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + Optionally, set [param enable_shader_address] if you wish to use [method buffer_get_device_address] functionality and the GPU supports it. @@ -2047,6 +2069,9 @@ + + Allows usage of [method buffer_get_device_address] on supported GPUs. + Sampler uniform. @@ -2418,6 +2443,9 @@ Floating-point specialization constant. + + Features support for buffer device address extension. + Maximum number of uniform sets that can be bound at a given time. diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index cd5df56fbb92..f21be1d30474 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -905,6 +905,11 @@ void RenderingDeviceDriverD3D12::buffer_unmap(BufferID p_buffer) { buf_info->resource->Unmap(0, &VOID_RANGE); } +uint64_t RenderingDeviceDriverD3D12::buffer_get_device_address(BufferID p_buffer) { + const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; + return buf_info->resource->GetGPUVirtualAddress(); +} + /*****************/ /**** TEXTURE ****/ /*****************/ @@ -6271,6 +6276,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { return vrs_capabilities.ss_image_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_BUFFER_ADDRESS: + return true; default: return false; } diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 5bcef0664210..50d0f2cddf84 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -284,6 +284,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final; virtual uint8_t *buffer_map(BufferID p_buffer) override final; virtual void buffer_unmap(BufferID p_buffer) override final; + virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; /*****************/ /**** TEXTURE ****/ diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 52eb0f79300d..ffd12c43b3e4 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -106,6 +106,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final; virtual uint8_t *buffer_map(BufferID p_buffer) override final; virtual void buffer_unmap(BufferID p_buffer) override final; + virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; #pragma mark - Texture diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index dea02f699775..579044f46b88 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -158,6 +158,15 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve // Nothing to do. } +uint64_t RenderingDeviceDriverMetal::buffer_get_device_address(BufferID p_buffer) { + if (@available(iOS 16.0, macOS 13.0, *)) { + id obj = rid::get(p_buffer); + return obj.gpuAddress; + } else { + return 0; + } +} + #pragma mark - Texture #pragma mark - Format Conversions @@ -3947,6 +3956,8 @@ bool isArrayTexture(MTLTextureType p_type) { return false; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_BUFFER_ADDRESS: + return false; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index fb63e962de9f..0dd115f81cc4 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -513,6 +513,7 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false); if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); @@ -730,6 +731,7 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { void *next_features = nullptr; VkPhysicalDeviceVulkan12Features device_features_vk_1_2 = {}; VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {}; + VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_address_features = {}; VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; @@ -740,10 +742,17 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { device_features_vk_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; device_features_vk_1_2.pNext = next_features; next_features = &device_features_vk_1_2; - } else if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { - shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR; - shader_features.pNext = next_features; - next_features = &shader_features; + } else { + if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { + shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR; + shader_features.pNext = next_features; + next_features = &shader_features; + } + if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + buffer_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR; + buffer_address_features.pNext = next_features; + next_features = &buffer_address_features; + } } if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { @@ -783,11 +792,17 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { shader_capabilities.shader_float16_is_supported = device_features_vk_1_2.shaderFloat16; shader_capabilities.shader_int8_is_supported = device_features_vk_1_2.shaderInt8; } + if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + buffer_device_address_support = device_features_vk_1_2.bufferDeviceAddress; + } } else { if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16; shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8; } + if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + buffer_device_address_support = buffer_address_features.bufferDeviceAddress; + } } if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { @@ -1112,6 +1127,9 @@ Error RenderingDeviceDriverVulkan::_initialize_allocator() { if (use_1_3_features) { allocator_info.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT; } + if (buffer_device_address_support) { + allocator_info.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT; + } VkResult err = vmaCreateAllocator(&allocator_info, &allocator); ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vmaCreateAllocator failed with error " + itos(err) + "."); @@ -1487,6 +1505,7 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_STORAGE_BIT, VK_BUFFER_USAGE_ static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_INDEX_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_VERTEX_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_INDIRECT_BIT, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)); RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type) { VkBufferCreateInfo create_info = {}; @@ -1588,6 +1607,15 @@ void RenderingDeviceDriverVulkan::buffer_unmap(BufferID p_buffer) { vmaUnmapMemory(allocator, buf_info->allocation.handle); } +uint64_t RenderingDeviceDriverVulkan::buffer_get_device_address(BufferID p_buffer) { + const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; + VkBufferDeviceAddressInfo address_info = {}; + address_info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + address_info.pNext = nullptr; + address_info.buffer = buf_info->vk_buffer; + return vkGetBufferDeviceAddress(vk_device, &address_info); +} + /*****************/ /**** TEXTURE ****/ /*****************/ @@ -5874,6 +5902,8 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_BUFFER_ADDRESS: + return buffer_device_address_support; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 4eec7547f50f..ea12450a4cd1 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -138,6 +138,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VRSCapabilities vrs_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + bool buffer_device_address_support = false; bool pipeline_cache_control_support = false; bool device_fault_support = false; #if defined(VK_TRACK_DEVICE_MEMORY) @@ -204,6 +205,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final; virtual uint8_t *buffer_map(BufferID p_buffer) override final; virtual void buffer_unmap(BufferID p_buffer) override final; + virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; /*****************/ /**** TEXTURE ****/ diff --git a/misc/extension_api_validation/4.3-stable.expected b/misc/extension_api_validation/4.3-stable.expected index f347c95b2c6d..ae31b8801a90 100644 --- a/misc/extension_api_validation/4.3-stable.expected +++ b/misc/extension_api_validation/4.3-stable.expected @@ -261,3 +261,12 @@ Validate extension JSON: Error: Field 'classes/PointLight2D/properties/texture': Property hints modified to disallow resource types that don't work. The types allowed are now more restricted, but this change only impacts the editor and not the actual exposed API. No adjustments should be necessary. Decal properties were previously changed from Texture to Texture2D in 4.2, so we need to silence those warnings too. + + +GH-100062 +-------- +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/index_buffer_create/arguments': size changed value in new API, from 4 to 5. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/uniform_buffer_create/arguments': size changed value in new API, from 2 to 3. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/vertex_buffer_create/arguments': size changed value in new API, from 3 to 4. + +Optional argument added. Compatibility method registered. diff --git a/servers/rendering/rendering_device.compat.inc b/servers/rendering/rendering_device.compat.inc index 11aede713346..f3bfe1599de6 100644 --- a/servers/rendering/rendering_device.compat.inc +++ b/servers/rendering/rendering_device.compat.inc @@ -143,6 +143,18 @@ RenderingDevice::FramebufferFormatID RenderingDevice::_screen_get_framebuffer_fo return screen_get_framebuffer_format(DisplayServer::MAIN_WINDOW_ID); } +RID RenderingDevice::_uniform_buffer_create_compat_100062(uint32_t p_size_bytes, const Vector &p_data) { + return uniform_buffer_create(p_size_bytes, p_data, false); +} + +RID RenderingDevice::_vertex_buffer_create_compat_100062(uint32_t p_size_bytes, const Vector &p_data, bool p_use_as_storage) { + return vertex_buffer_create(p_size_bytes, p_data, p_use_as_storage, false); +} + +RID RenderingDevice::_index_buffer_create_compat_100062(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector &p_data, bool p_use_restart_indices) { + return index_buffer_create(p_size_indices, p_format, p_data, p_use_restart_indices, false); +} + void RenderingDevice::_bind_compatibility_methods() { ClassDB::bind_compatibility_method(D_METHOD("shader_create_from_bytecode", "binary_data"), &RenderingDevice::_shader_create_from_bytecode_bind_compat_79606); @@ -166,6 +178,10 @@ void RenderingDevice::_bind_compatibility_methods() { ClassDB::bind_compatibility_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region"), &RenderingDevice::_draw_list_begin_bind_compat_90993, DEFVAL(Vector()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2())); ClassDB::bind_compatibility_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region", "breadcrumb"), &RenderingDevice::_draw_list_begin_bind_compat_98670, DEFVAL(Vector()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2()), DEFVAL(0)); + + ClassDB::bind_compatibility_method(D_METHOD("uniform_buffer_create"), &RenderingDevice::_uniform_buffer_create_compat_100062, DEFVAL(Vector())); + ClassDB::bind_compatibility_method(D_METHOD("vertex_buffer_create"), &RenderingDevice::_vertex_buffer_create_compat_100062, DEFVAL(Vector()), DEFVAL(false)); + ClassDB::bind_compatibility_method(D_METHOD("index_buffer_create"), &RenderingDevice::_index_buffer_create_compat_100062, DEFVAL(Vector()), DEFVAL(false)); } #endif diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index d62fa1f420e7..6c0a57efb146 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -736,6 +736,16 @@ Error RenderingDevice::buffer_get_data_async(RID p_buffer, const Callable &p_cal return OK; } +uint64_t RenderingDevice::buffer_get_device_address(RID p_buffer) { + ERR_RENDER_THREAD_GUARD_V(0); + + Buffer *buffer = _get_buffer_from_owner(p_buffer); + ERR_FAIL_NULL_V_MSG(buffer, 0, "Buffer argument is not a valid buffer of any type."); + ERR_FAIL_COND_V_MSG(!buffer->usage.has_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT), 0, "Buffer was not created with device address flag."); + + return driver->buffer_get_device_address(buffer->driver_id); +} + RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vector &p_data, BitField p_usage) { ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); @@ -745,6 +755,14 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); @@ -2910,7 +2928,7 @@ bool RenderingDevice::sampler_is_format_supported_for_filter(DataFormat p_format /**** VERTEX BUFFER ****/ /***********************/ -RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vector &p_data, bool p_use_as_storage) { +RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vector &p_data, bool p_use_as_storage, bool p_enable_shader_address) { ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); Buffer buffer; @@ -2919,6 +2937,9 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); @@ -3049,7 +3070,7 @@ RID RenderingDevice::vertex_array_create(uint32_t p_vertex_count, VertexFormatID return id; } -RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector &p_data, bool p_use_restart_indices) { +RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector &p_data, bool p_use_restart_indices, bool p_enable_shader_address) { ERR_FAIL_COND_V(p_index_count == 0, RID()); IndexBuffer index_buffer; @@ -3088,6 +3109,9 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm #endif index_buffer.size = size_bytes; index_buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_INDEX_BIT); + if (p_enable_shader_address) { + index_buffer.usage.set_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + } index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!index_buffer.driver_id, RID()); @@ -3296,12 +3320,15 @@ uint64_t RenderingDevice::shader_get_vertex_input_attribute_mask(RID p_shader) { /**** UNIFORMS ****/ /******************/ -RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_data) { +RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_data, bool p_enable_shader_address) { ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); Buffer buffer; buffer.size = p_size_bytes; buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_UNIFORM_BIT); + if (p_enable_shader_address) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + } buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); @@ -7217,11 +7244,11 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("sampler_create", "state"), &RenderingDevice::_sampler_create); ClassDB::bind_method(D_METHOD("sampler_is_format_supported_for_filter", "format", "sampler_filter"), &RenderingDevice::sampler_is_format_supported_for_filter); - ClassDB::bind_method(D_METHOD("vertex_buffer_create", "size_bytes", "data", "use_as_storage"), &RenderingDevice::vertex_buffer_create, DEFVAL(Vector()), DEFVAL(false)); + ClassDB::bind_method(D_METHOD("vertex_buffer_create", "size_bytes", "data", "use_as_storage", "enable_shader_address"), &RenderingDevice::vertex_buffer_create, DEFVAL(Vector()), DEFVAL(false), DEFVAL(false)); ClassDB::bind_method(D_METHOD("vertex_format_create", "vertex_descriptions"), &RenderingDevice::_vertex_format_create); ClassDB::bind_method(D_METHOD("vertex_array_create", "vertex_count", "vertex_format", "src_buffers", "offsets"), &RenderingDevice::_vertex_array_create, DEFVAL(Vector())); - ClassDB::bind_method(D_METHOD("index_buffer_create", "size_indices", "format", "data", "use_restart_indices"), &RenderingDevice::index_buffer_create, DEFVAL(Vector()), DEFVAL(false)); + ClassDB::bind_method(D_METHOD("index_buffer_create", "size_indices", "format", "data", "use_restart_indices", "enable_shader_address"), &RenderingDevice::index_buffer_create, DEFVAL(Vector()), DEFVAL(false), DEFVAL(false)); ClassDB::bind_method(D_METHOD("index_array_create", "index_buffer", "index_offset", "index_count"), &RenderingDevice::index_array_create); ClassDB::bind_method(D_METHOD("shader_compile_spirv_from_source", "shader_source", "allow_cache"), &RenderingDevice::_shader_compile_spirv_from_source, DEFVAL(true)); @@ -7232,7 +7259,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("shader_get_vertex_input_attribute_mask", "shader"), &RenderingDevice::shader_get_vertex_input_attribute_mask); - ClassDB::bind_method(D_METHOD("uniform_buffer_create", "size_bytes", "data"), &RenderingDevice::uniform_buffer_create, DEFVAL(Vector())); + ClassDB::bind_method(D_METHOD("uniform_buffer_create", "size_bytes", "data", "enable_shader_address"), &RenderingDevice::uniform_buffer_create, DEFVAL(Vector()), DEFVAL(false)); ClassDB::bind_method(D_METHOD("storage_buffer_create", "size_bytes", "data", "usage"), &RenderingDevice::storage_buffer_create, DEFVAL(Vector()), DEFVAL(0)); ClassDB::bind_method(D_METHOD("texture_buffer_create", "size_bytes", "format", "data"), &RenderingDevice::texture_buffer_create, DEFVAL(Vector())); @@ -7244,6 +7271,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes"), &RenderingDevice::buffer_clear); ClassDB::bind_method(D_METHOD("buffer_get_data", "buffer", "offset_bytes", "size_bytes"), &RenderingDevice::buffer_get_data, DEFVAL(0), DEFVAL(0)); ClassDB::bind_method(D_METHOD("buffer_get_data_async", "buffer", "callback", "offset_bytes", "size_bytes"), &RenderingDevice::buffer_get_data_async, DEFVAL(0), DEFVAL(0)); + ClassDB::bind_method(D_METHOD("buffer_get_device_address", "buffer"), &RenderingDevice::buffer_get_device_address); ClassDB::bind_method(D_METHOD("render_pipeline_create", "shader", "framebuffer_format", "vertex_format", "primitive", "rasterization_state", "multisample_state", "stencil_state", "color_blend_state", "dynamic_state_flags", "for_render_pass", "specialization_constants"), &RenderingDevice::_render_pipeline_create, DEFVAL(0), DEFVAL(0), DEFVAL(TypedArray())); ClassDB::bind_method(D_METHOD("render_pipeline_is_valid", "render_pipeline"), &RenderingDevice::render_pipeline_is_valid); @@ -7300,6 +7328,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("get_captured_timestamp_cpu_time", "index"), &RenderingDevice::get_captured_timestamp_cpu_time); ClassDB::bind_method(D_METHOD("get_captured_timestamp_name", "index"), &RenderingDevice::get_captured_timestamp_name); + ClassDB::bind_method(D_METHOD("has_feature", "feature"), &RenderingDevice::has_feature); ClassDB::bind_method(D_METHOD("limit_get", "limit"), &RenderingDevice::limit_get); ClassDB::bind_method(D_METHOD("get_frame_delay"), &RenderingDevice::get_frame_delay); ClassDB::bind_method(D_METHOD("submit"), &RenderingDevice::submit); @@ -7674,6 +7703,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(INDEX_BUFFER_FORMAT_UINT32); BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); + BIND_BITFIELD_FLAG(STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS); BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER); //for sampling only (sampler GLSL type) BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER_WITH_TEXTURE); // for sampling only); but includes a texture); (samplerXX GLSL type)); first a sampler then a texture @@ -7818,6 +7848,8 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT); BIND_ENUM_CONSTANT(PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT); + BIND_ENUM_CONSTANT(SUPPORTS_BUFFER_ADDRESS); + BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS); BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS); BIND_ENUM_CONSTANT(LIMIT_MAX_TEXTURES_PER_UNIFORM_SET); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 8d5219bbf102..48ec4f2e4105 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -224,6 +224,7 @@ class RenderingDevice : public RenderingDeviceCommons { Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size); Vector buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0); // This causes stall, only use to retrieve large buffers for saving. Error buffer_get_data_async(RID p_buffer, const Callable &p_callback, uint32_t p_offset = 0, uint32_t p_size = 0); + uint64_t buffer_get_device_address(RID p_buffer); /*****************/ /**** TEXTURE ****/ @@ -714,13 +715,13 @@ class RenderingDevice : public RenderingDeviceCommons { RID_Owner index_array_owner; public: - RID vertex_buffer_create(uint32_t p_size_bytes, const Vector &p_data = Vector(), bool p_use_as_storage = false); + RID vertex_buffer_create(uint32_t p_size_bytes, const Vector &p_data = Vector(), bool p_use_as_storage = false, bool p_enable_shader_address = false); // This ID is warranted to be unique for the same formats, does not need to be freed VertexFormatID vertex_format_create(const Vector &p_vertex_descriptions); RID vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector &p_src_buffers, const Vector &p_offsets = Vector()); - RID index_buffer_create(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector &p_data = Vector(), bool p_use_restart_indices = false); + RID index_buffer_create(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector &p_data = Vector(), bool p_use_restart_indices = false, bool p_enable_shader_address = false); RID index_array_create(RID p_index_buffer, uint32_t p_index_offset, uint32_t p_index_count); /****************/ @@ -854,6 +855,10 @@ class RenderingDevice : public RenderingDeviceCommons { DrawListID _draw_list_begin_bind_compat_90993(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region); DrawListID _draw_list_begin_bind_compat_98670(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, uint32_t p_breadcrumb); + + RID _uniform_buffer_create_compat_100062(uint32_t p_size_bytes, const Vector &p_data); + RID _vertex_buffer_create_compat_100062(uint32_t p_size_bytes, const Vector &p_data, bool p_use_as_storage); + RID _index_buffer_create_compat_100062(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector &p_data, bool p_use_restart_indices); #endif public: @@ -886,14 +891,15 @@ class RenderingDevice : public RenderingDeviceCommons { String get_perf_report() const; enum StorageBufferUsage { - STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT = 1, + STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT = (1 << 0), + STORAGE_BUFFER_USAGE_SHADER_DEVICE_ADDRESS = (1 << 1), }; /*****************/ /**** BUFFERS ****/ /*****************/ - RID uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_data = Vector()); + RID uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_data = Vector(), bool p_enable_shader_address = false); RID storage_buffer_create(uint32_t p_size, const Vector &p_data = Vector(), BitField p_usage = 0); RID texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector &p_data = Vector()); diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 284337fe6bc1..f9a2346fc102 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -881,6 +881,7 @@ class RenderingDeviceCommons : public Object { SUPPORTS_ATTACHMENT_VRS, // If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, + SUPPORTS_BUFFER_ADDRESS, }; enum SubgroupOperations { diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 9c0a060d5440..bd1c65b9af71 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -190,6 +190,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BUFFER_USAGE_INDEX_BIT = (1 << 6), BUFFER_USAGE_VERTEX_BIT = (1 << 7), BUFFER_USAGE_INDIRECT_BIT = (1 << 8), + BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT = (1 << 17), }; enum { @@ -203,6 +204,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) = 0; virtual uint8_t *buffer_map(BufferID p_buffer) = 0; virtual void buffer_unmap(BufferID p_buffer) = 0; + // Only for a buffer with BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. + virtual uint64_t buffer_get_device_address(BufferID p_buffer) = 0; /*****************/ /**** TEXTURE ****/