From f6f9233295f1bf49ada1da2d103db0ce087b9eda Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Mon, 23 Dec 2024 18:45:59 -0800 Subject: [PATCH 1/4] [naga] Allow abstract scalars in `modf` and `frexp` results. Allow `PredeclaredType::ModfResult` and `PredeclaredType::FrexpResult` to hold any sort of scalar, not just a floating-point scalar. This prepares Naga for implementing the `AbstractFloat` overloads for the `modf` and `frexp` builtin functions. --- naga/src/back/glsl/mod.rs | 13 +++++++----- naga/src/back/hlsl/help.rs | 8 ++++---- naga/src/back/msl/writer.rs | 8 ++++---- naga/src/front/type_gen.rs | 34 +++++++++++++------------------- naga/src/front/wgsl/lower/mod.rs | 16 ++++++--------- naga/src/lib.rs | 4 ++-- naga/src/proc/typifier.rs | 20 +++++-------------- 7 files changed, 43 insertions(+), 60 deletions(-) diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index e34127b3a1..83aeeebdd3 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -747,14 +747,17 @@ impl<'a, W: Write> Writer<'a, W> { // Write functions to create special types. for (type_key, struct_ty) in self.module.special_types.predeclared_types.iter() { match type_key { - &crate::PredeclaredType::ModfResult { size, width } - | &crate::PredeclaredType::FrexpResult { size, width } => { + &crate::PredeclaredType::ModfResult { size, scalar } + | &crate::PredeclaredType::FrexpResult { size, scalar } => { let arg_type_name_owner; let arg_type_name = if let Some(size) = size { - arg_type_name_owner = - format!("{}vec{}", if width == 8 { "d" } else { "" }, size as u8); + arg_type_name_owner = format!( + "{}vec{}", + if scalar.width == 8 { "d" } else { "" }, + size as u8 + ); &arg_type_name_owner - } else if width == 8 { + } else if scalar.width == 8 { "double" } else { "float" diff --git a/naga/src/back/hlsl/help.rs b/naga/src/back/hlsl/help.rs index e060529dcf..347addd67e 100644 --- a/naga/src/back/hlsl/help.rs +++ b/naga/src/back/hlsl/help.rs @@ -796,17 +796,17 @@ impl super::Writer<'_, W> { pub(super) fn write_special_functions(&mut self, module: &crate::Module) -> BackendResult { for (type_key, struct_ty) in module.special_types.predeclared_types.iter() { match type_key { - &crate::PredeclaredType::ModfResult { size, width } - | &crate::PredeclaredType::FrexpResult { size, width } => { + &crate::PredeclaredType::ModfResult { size, scalar } + | &crate::PredeclaredType::FrexpResult { size, scalar } => { let arg_type_name_owner; let arg_type_name = if let Some(size) = size { arg_type_name_owner = format!( "{}{}", - if width == 8 { "double" } else { "float" }, + if scalar.width == 8 { "double" } else { "float" }, size as u8 ); &arg_type_name_owner - } else if width == 8 { + } else if scalar.width == 8 { "double" } else { "float" diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index dc444bddb5..a8b34d4574 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -3830,17 +3830,17 @@ impl Writer { // Write functions to create special types. for (type_key, struct_ty) in module.special_types.predeclared_types.iter() { match type_key { - &crate::PredeclaredType::ModfResult { size, width } - | &crate::PredeclaredType::FrexpResult { size, width } => { + &crate::PredeclaredType::ModfResult { size, scalar } + | &crate::PredeclaredType::FrexpResult { size, scalar } => { let arg_type_name_owner; let arg_type_name = if let Some(size) = size { arg_type_name_owner = format!( "{NAMESPACE}::{}{}", - if width == 8 { "double" } else { "float" }, + if scalar.width == 8 { "double" } else { "float" }, size as u8 ); &arg_type_name_owner - } else if width == 8 { + } else if scalar.width == 8 { "double" } else { "float" diff --git a/naga/src/front/type_gen.rs b/naga/src/front/type_gen.rs index 1cd9f7f378..737c456bbd 100644 --- a/naga/src/front/type_gen.rs +++ b/naga/src/front/type_gen.rs @@ -298,11 +298,11 @@ impl crate::Module { }, } } - crate::PredeclaredType::ModfResult { size, width } => { + crate::PredeclaredType::ModfResult { size, scalar } => { let float_ty = self.types.insert( crate::Type { name: None, - inner: crate::TypeInner::Scalar(crate::Scalar::float(width)), + inner: crate::TypeInner::Scalar(scalar), }, Span::UNDEFINED, ); @@ -311,23 +311,20 @@ impl crate::Module { let vec_ty = self.types.insert( crate::Type { name: None, - inner: crate::TypeInner::Vector { - size, - scalar: crate::Scalar::float(width), - }, + inner: crate::TypeInner::Vector { size, scalar }, }, Span::UNDEFINED, ); - (vec_ty, size as u32 * width as u32) + (vec_ty, size as u32 * scalar.width as u32) } else { - (float_ty, width as u32) + (float_ty, scalar.width as u32) }; let mut type_name = "__modf_result_".to_string(); if let Some(size) = size { let _ = write!(type_name, "vec{}_", size as u8); } - let _ = write!(type_name, "f{}", width * 8); + let _ = write!(type_name, "f{}", scalar.width * 8); crate::Type { name: Some(type_name), @@ -350,11 +347,11 @@ impl crate::Module { }, } } - crate::PredeclaredType::FrexpResult { size, width } => { + crate::PredeclaredType::FrexpResult { size, scalar } => { let float_ty = self.types.insert( crate::Type { name: None, - inner: crate::TypeInner::Scalar(crate::Scalar::float(width)), + inner: crate::TypeInner::Scalar(scalar), }, Span::UNDEFINED, ); @@ -364,7 +361,7 @@ impl crate::Module { name: None, inner: crate::TypeInner::Scalar(crate::Scalar { kind: crate::ScalarKind::Sint, - width, + width: scalar.width, }), }, Span::UNDEFINED, @@ -374,10 +371,7 @@ impl crate::Module { let vec_float_ty = self.types.insert( crate::Type { name: None, - inner: crate::TypeInner::Vector { - size, - scalar: crate::Scalar::float(width), - }, + inner: crate::TypeInner::Vector { size, scalar }, }, Span::UNDEFINED, ); @@ -388,22 +382,22 @@ impl crate::Module { size, scalar: crate::Scalar { kind: crate::ScalarKind::Sint, - width, + width: scalar.width, }, }, }, Span::UNDEFINED, ); - (vec_float_ty, vec_int_ty, size as u32 * width as u32) + (vec_float_ty, vec_int_ty, size as u32 * scalar.width as u32) } else { - (float_ty, int_ty, width as u32) + (float_ty, int_ty, scalar.width as u32) }; let mut type_name = "__frexp_result_".to_string(); if let Some(size) = size { let _ = write!(type_name, "vec{}_", size as u8); } - let _ = write!(type_name, "f{}", width * 8); + let _ = write!(type_name, "f{}", scalar.width * 8); crate::Type { name: Some(type_name), diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index 50d012c4e6..4b3abc0ad9 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -2304,22 +2304,18 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { args.finish()?; if fun == crate::MathFunction::Modf || fun == crate::MathFunction::Frexp { - if let Some((size, width)) = match *resolve_inner!(ctx, arg) { - crate::TypeInner::Scalar(crate::Scalar { width, .. }) => { - Some((None, width)) + if let Some((size, scalar)) = match *resolve_inner!(ctx, arg) { + crate::TypeInner::Scalar(scalar) => Some((None, scalar)), + crate::TypeInner::Vector { size, scalar, .. } => { + Some((Some(size), scalar)) } - crate::TypeInner::Vector { - size, - scalar: crate::Scalar { width, .. }, - .. - } => Some((Some(size), width)), _ => None, } { ctx.module.generate_predeclared_type( if fun == crate::MathFunction::Modf { - crate::PredeclaredType::ModfResult { size, width } + crate::PredeclaredType::ModfResult { size, scalar } } else { - crate::PredeclaredType::FrexpResult { size, width } + crate::PredeclaredType::FrexpResult { size, scalar } }, ); } diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 687dc5b441..8db5b676d6 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -2217,11 +2217,11 @@ pub enum PredeclaredType { AtomicCompareExchangeWeakResult(Scalar), ModfResult { size: Option, - width: Bytes, + scalar: Scalar, }, FrexpResult { size: Option, - width: Bytes, + scalar: Scalar, }, } diff --git a/naga/src/proc/typifier.rs b/naga/src/proc/typifier.rs index 1e1a4c93a4..1359289900 100644 --- a/naga/src/proc/typifier.rs +++ b/naga/src/proc/typifier.rs @@ -668,19 +668,9 @@ impl<'a> ResolveContext<'a> { | Mf::Pow | Mf::QuantizeToF16 => res_arg.clone(), Mf::Modf | Mf::Frexp => { - let (size, width) = match res_arg.inner_with(types) { - &Ti::Scalar(crate::Scalar { - kind: crate::ScalarKind::Float, - width, - }) => (None, width), - &Ti::Vector { - scalar: - crate::Scalar { - kind: crate::ScalarKind::Float, - width, - }, - size, - } => (Some(size), width), + let (size, scalar) = match res_arg.inner_with(types) { + &Ti::Scalar(scalar) => (None, scalar), + &Ti::Vector { scalar, size } => (Some(size), scalar), ref other => { return Err(ResolveError::IncompatibleOperands(format!( "{fun:?}({other:?}, _)" @@ -691,9 +681,9 @@ impl<'a> ResolveContext<'a> { .special_types .predeclared_types .get(&if fun == Mf::Modf { - crate::PredeclaredType::ModfResult { size, width } + crate::PredeclaredType::ModfResult { size, scalar } } else { - crate::PredeclaredType::FrexpResult { size, width } + crate::PredeclaredType::FrexpResult { size, scalar } }) .ok_or(ResolveError::MissingSpecialType)?; TypeResolution::Handle(*result) From 78e35c4a7e8ca983d59c9099087d466814ec9acb Mon Sep 17 00:00:00 2001 From: Jamie Nicol Date: Tue, 7 Jan 2025 11:58:46 +0000 Subject: [PATCH 2/4] [naga wgsl-in] Disallow named component expression for matrix types The WGSL spec only allows named component expressions when the base type is a vector or a structure, so this patch removes support for it for matrices. Additionally tests which used this for matrices have been updated to use indexing expressions instead, and a test has been added to ensure a named component expression on a matrix results in an error. --- naga/src/front/wgsl/lower/mod.rs | 4 +--- naga/src/front/wgsl/tests.rs | 8 ++++++++ naga/tests/in/shadow.wgsl | 2 +- naga/tests/in/skybox.wgsl | 2 +- naga/tests/wgsl_errors.rs | 4 ++-- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index 4b3abc0ad9..dcfa38116b 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -2060,11 +2060,9 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { lowered_base.map(|base| crate::Expression::AccessIndex { base, index }) } - crate::TypeInner::Vector { .. } | crate::TypeInner::Matrix { .. } => { + crate::TypeInner::Vector { .. } => { match Components::new(field.name, field.span)? { Components::Swizzle { size, pattern } => { - // Swizzles aren't allowed on matrices, but - // validation will catch that. Typed::Plain(crate::Expression::Swizzle { size, vector: ctx.apply_load_rule(lowered_base)?, diff --git a/naga/src/front/wgsl/tests.rs b/naga/src/front/wgsl/tests.rs index 2dbec5a430..54d931efe2 100644 --- a/naga/src/front/wgsl/tests.rs +++ b/naga/src/front/wgsl/tests.rs @@ -399,6 +399,14 @@ fn parse_postfix() { }", ) .unwrap(); + + let err = parse_str( + "fn foo() { + let v = mat4x4().x; + }", + ) + .unwrap_err(); + assert_eq!(err.message(), "invalid field accessor `x`"); } #[test] diff --git a/naga/tests/in/shadow.wgsl b/naga/tests/in/shadow.wgsl index b02cf68775..86c3a4a00b 100644 --- a/naga/tests/in/shadow.wgsl +++ b/naga/tests/in/shadow.wgsl @@ -37,7 +37,7 @@ fn vs_main( let w = u_entity.world; let world_pos = u_entity.world * vec4(position); var out: VertexOutput; - out.world_normal = mat3x3(w.x.xyz, w.y.xyz, w.z.xyz) * vec3(normal.xyz); + out.world_normal = mat3x3(w[0].xyz, w[1].xyz, w[2].xyz) * vec3(normal.xyz); out.world_position = world_pos; out.proj_position = u_globals.view_proj * world_pos; return out; diff --git a/naga/tests/in/skybox.wgsl b/naga/tests/in/skybox.wgsl index f4cc37a44b..4c6a68a120 100644 --- a/naga/tests/in/skybox.wgsl +++ b/naga/tests/in/skybox.wgsl @@ -22,7 +22,7 @@ fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { 1.0, ); - let inv_model_view = transpose(mat3x3(r_data.view.x.xyz, r_data.view.y.xyz, r_data.view.z.xyz)); + let inv_model_view = transpose(mat3x3(r_data.view[0].xyz, r_data.view[1].xyz, r_data.view[2].xyz)); let unprojected = r_data.proj_inv * pos; return VertexOutput(pos, inv_model_view * unprojected.xyz); } diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index a9c7344ec3..abba829d5f 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -1238,8 +1238,8 @@ fn pointer_type_equivalence() { fn g() { var m: mat2x2; - let pv: ptr> = &m.x; - let pf: ptr = &m.x.x; + let pv: ptr> = &m[0]; + let pf: ptr = &m[0].x; f(pv, pf); } From fabcba8f9af42c5e6b51da771fd2df0c4a01910f Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Tue, 7 Jan 2025 07:52:42 -0500 Subject: [PATCH 3/4] Refine Multi-Draw-Indirect (#6870) --- wgpu-core/src/command/bundle.rs | 28 ++++++++--------- wgpu-core/src/command/render.rs | 41 +++++++++++-------------- wgpu-core/src/command/render_command.rs | 18 +++++------ wgpu-hal/src/gles/adapter.rs | 9 +++--- wgpu-types/src/lib.rs | 11 +++++-- wgpu/src/backend/webgpu.rs | 3 +- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index ac7a5280bf..c7f433c3a0 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -484,10 +484,10 @@ impl RenderBundleEncoder { ) .map_pass_err(scope)?; } - RenderCommand::MultiDrawIndirect { + RenderCommand::DrawIndirect { buffer_id, offset, - count: None, + count: 1, indexed, } => { let scope = PassErrorScope::Draw { @@ -504,7 +504,7 @@ impl RenderBundleEncoder { ) .map_pass_err(scope)?; } - RenderCommand::MultiDrawIndirect { .. } + RenderCommand::DrawIndirect { .. } | RenderCommand::MultiDrawIndirectCount { .. } => unimplemented!(), RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), @@ -887,10 +887,10 @@ fn multi_draw_indirect( state.flush_vertices(); state.flush_binds(used_bind_groups, dynamic_offsets); - state.commands.push(ArcRenderCommand::MultiDrawIndirect { + state.commands.push(ArcRenderCommand::DrawIndirect { buffer, offset, - count: None, + count: 1, indexed, }); Ok(()) @@ -1101,25 +1101,25 @@ impl RenderBundle { ) }; } - Cmd::MultiDrawIndirect { + Cmd::DrawIndirect { buffer, offset, - count: None, + count: 1, indexed: false, } => { let buffer = buffer.try_raw(snatch_guard)?; unsafe { raw.draw_indirect(buffer, *offset, 1) }; } - Cmd::MultiDrawIndirect { + Cmd::DrawIndirect { buffer, offset, - count: None, + count: 1, indexed: true, } => { let buffer = buffer.try_raw(snatch_guard)?; unsafe { raw.draw_indexed_indirect(buffer, *offset, 1) }; } - Cmd::MultiDrawIndirect { .. } | Cmd::MultiDrawIndirectCount { .. } => { + Cmd::DrawIndirect { .. } | Cmd::MultiDrawIndirectCount { .. } => { return Err(ExecutionError::Unimplemented("multi-draw-indirect")) } Cmd::PushDebugGroup { .. } | Cmd::InsertDebugMarker { .. } | Cmd::PopDebugGroup => { @@ -1727,10 +1727,10 @@ pub mod bundle_ffi { buffer_id: id::BufferId, offset: BufferAddress, ) { - bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + bundle.base.commands.push(RenderCommand::DrawIndirect { buffer_id, offset, - count: None, + count: 1, indexed: false, }); } @@ -1740,10 +1740,10 @@ pub mod bundle_ffi { buffer_id: id::BufferId, offset: BufferAddress, ) { - bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + bundle.base.commands.push(RenderCommand::DrawIndirect { buffer_id, offset, - count: None, + count: 1, indexed: true, }); } diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 31b4dd8218..abbbcfb46a 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -676,10 +676,9 @@ pub enum RenderPassErrorInner { MissingDownlevelFlags(#[from] MissingDownlevelFlags), #[error("Indirect buffer offset {0:?} is not a multiple of 4")] UnalignedIndirectBufferOffset(BufferAddress), - #[error("Indirect draw uses bytes {offset}..{end_offset} {} which overruns indirect buffer of size {buffer_size}", - count.map_or_else(String::new, |v| format!("(using count {v})")))] + #[error("Indirect draw uses bytes {offset}..{end_offset} using count {count} which overruns indirect buffer of size {buffer_size}")] IndirectBufferOverrun { - count: Option, + count: u32, offset: u64, end_offset: u64, buffer_size: u64, @@ -1787,14 +1786,14 @@ impl Global { ) .map_pass_err(scope)?; } - ArcRenderCommand::MultiDrawIndirect { + ArcRenderCommand::DrawIndirect { buffer, offset, count, indexed, } => { let scope = PassErrorScope::Draw { - kind: if count.is_some() { + kind: if count != 1 { DrawKind::MultiDrawIndirect } else { DrawKind::DrawIndirect @@ -2467,7 +2466,7 @@ fn multi_draw_indirect( cmd_buf: &Arc, indirect_buffer: Arc, offset: u64, - count: Option, + count: u32, indexed: bool, ) -> Result<(), RenderPassErrorInner> { api_log!( @@ -2482,7 +2481,7 @@ fn multi_draw_indirect( true => size_of::(), }; - if count.is_some() { + if count != 1 { state .device .require_features(wgt::Features::MULTI_DRAW_INDIRECT)?; @@ -2502,13 +2501,11 @@ fn multi_draw_indirect( indirect_buffer.check_usage(BufferUsages::INDIRECT)?; let indirect_raw = indirect_buffer.try_raw(state.snatch_guard)?; - let actual_count = count.map_or(1, |c| c.get()); - if offset % 4 != 0 { return Err(RenderPassErrorInner::UnalignedIndirectBufferOffset(offset)); } - let end_offset = offset + stride as u64 * actual_count as u64; + let end_offset = offset + stride as u64 * count as u64; if end_offset > indirect_buffer.size { return Err(RenderPassErrorInner::IndirectBufferOverrun { count, @@ -2528,14 +2525,12 @@ fn multi_draw_indirect( match indexed { false => unsafe { - state - .raw_encoder - .draw_indirect(indirect_raw, offset, actual_count); + state.raw_encoder.draw_indirect(indirect_raw, offset, count); }, true => unsafe { state .raw_encoder - .draw_indexed_indirect(indirect_raw, offset, actual_count); + .draw_indexed_indirect(indirect_raw, offset, count); }, } Ok(()) @@ -2599,7 +2594,7 @@ fn multi_draw_indirect_count( let end_offset = offset + stride * max_count as u64; if end_offset > indirect_buffer.size { return Err(RenderPassErrorInner::IndirectBufferOverrun { - count: None, + count: 1, offset, end_offset, buffer_size: indirect_buffer.size, @@ -3103,10 +3098,10 @@ impl Global { }; let base = pass.base_mut(scope)?; - base.commands.push(ArcRenderCommand::MultiDrawIndirect { + base.commands.push(ArcRenderCommand::DrawIndirect { buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?, offset, - count: None, + count: 1, indexed: false, }); @@ -3125,10 +3120,10 @@ impl Global { }; let base = pass.base_mut(scope)?; - base.commands.push(ArcRenderCommand::MultiDrawIndirect { + base.commands.push(ArcRenderCommand::DrawIndirect { buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?, offset, - count: None, + count: 1, indexed: true, }); @@ -3148,10 +3143,10 @@ impl Global { }; let base = pass.base_mut(scope)?; - base.commands.push(ArcRenderCommand::MultiDrawIndirect { + base.commands.push(ArcRenderCommand::DrawIndirect { buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?, offset, - count: NonZeroU32::new(count), + count, indexed: false, }); @@ -3171,10 +3166,10 @@ impl Global { }; let base = pass.base_mut(scope)?; - base.commands.push(ArcRenderCommand::MultiDrawIndirect { + base.commands.push(ArcRenderCommand::DrawIndirect { buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?, offset, - count: NonZeroU32::new(count), + count, indexed: true, }); diff --git a/wgpu-core/src/command/render_command.rs b/wgpu-core/src/command/render_command.rs index d4e2689d27..549d140bb5 100644 --- a/wgpu-core/src/command/render_command.rs +++ b/wgpu-core/src/command/render_command.rs @@ -6,7 +6,7 @@ use crate::{ }; use wgt::{BufferAddress, BufferSize, Color}; -use std::{num::NonZeroU32, sync::Arc}; +use std::sync::Arc; use super::{Rect, RenderBundle}; @@ -82,11 +82,10 @@ pub enum RenderCommand { base_vertex: i32, first_instance: u32, }, - MultiDrawIndirect { + DrawIndirect { buffer_id: id::BufferId, offset: BufferAddress, - /// Count of `None` represents a non-multi call. - count: Option, + count: u32, indexed: bool, }, MultiDrawIndirectCount { @@ -311,16 +310,16 @@ impl RenderCommand { first_instance, }, - RenderCommand::MultiDrawIndirect { + RenderCommand::DrawIndirect { buffer_id, offset, count, indexed, - } => ArcRenderCommand::MultiDrawIndirect { + } => ArcRenderCommand::DrawIndirect { buffer: buffers_guard.get(buffer_id).get().map_err(|e| { RenderPassError { scope: PassErrorScope::Draw { - kind: if count.is_some() { + kind: if count != 1 { DrawKind::MultiDrawIndirect } else { DrawKind::DrawIndirect @@ -459,11 +458,10 @@ pub enum ArcRenderCommand { base_vertex: i32, first_instance: u32, }, - MultiDrawIndirect { + DrawIndirect { buffer: Arc, offset: BufferAddress, - /// Count of `None` represents a non-multi call. - count: Option, + count: u32, indexed: bool, }, MultiDrawIndirectCount { diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 15d171f55a..d901324205 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -372,6 +372,8 @@ impl super::Adapter { } else { vertex_shader_storage_textures.min(fragment_shader_storage_textures) }; + let indirect_execution = + supported((3, 1), (4, 3)) || extensions.contains("GL_ARB_multi_draw_indirect"); let mut downlevel_flags = wgt::DownlevelFlags::empty() | wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES @@ -383,10 +385,7 @@ impl super::Adapter { wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, max_storage_block_size != 0, ); - downlevel_flags.set( - wgt::DownlevelFlags::INDIRECT_EXECUTION, - supported((3, 1), (4, 3)) || extensions.contains("GL_ARB_multi_draw_indirect"), - ); + downlevel_flags.set(wgt::DownlevelFlags::INDIRECT_EXECUTION, indirect_execution); downlevel_flags.set(wgt::DownlevelFlags::BASE_VERTEX, supported((3, 2), (3, 2))); downlevel_flags.set( wgt::DownlevelFlags::INDEPENDENT_BLEND, @@ -471,6 +470,8 @@ impl super::Adapter { wgt::Features::SHADER_EARLY_DEPTH_TEST, supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"), ); + // We emulate MDI with a loop of draw calls. + features.set(wgt::Features::MULTI_DRAW_INDIRECT, indirect_execution); if extensions.contains("GL_ARB_timer_query") { features.set(wgt::Features::TIMESTAMP_QUERY, true); features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS, true); diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 9d3605f18e..7753de289a 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -638,12 +638,17 @@ bitflags::bitflags! { /// /// Allows multiple indirect calls to be dispatched from a single buffer. /// - /// Supported platforms: + /// Natively Supported Platforms: /// - DX12 /// - Vulkan - /// - Metal on Apple3+ or Mac1+ (Emulated on top of `draw_indirect` and `draw_indexed_indirect`) /// - /// This is a native only feature. + /// Emulated Platforms: + /// - Metal + /// - OpenGL + /// - WebGPU + /// + /// Emulation is preformed by looping over the individual indirect draw calls in the backend. This is still significantly + /// faster than enulating it yourself, as wgpu only does draw call validation once. /// /// [`RenderPass::multi_draw_indirect`]: ../wgpu/struct.RenderPass.html#method.multi_draw_indirect /// [`RenderPass::multi_draw_indexed_indirect`]: ../wgpu/struct.RenderPass.html#method.multi_draw_indexed_indirect diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 8511aff5ed..73bb72bebf 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -783,7 +783,8 @@ const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 12] = [ ]; fn map_wgt_features(supported_features: webgpu_sys::GpuSupportedFeatures) -> wgt::Features { - let mut features = wgt::Features::empty(); + // We emulate MDI. + let mut features = wgt::Features::MULTI_DRAW_INDIRECT; for (wgpu_feat, web_feat) in FEATURES_MAPPING { match wasm_bindgen::JsValue::from(web_feat).as_string() { Some(value) if supported_features.has(&value) => features |= wgpu_feat, From a8a91737b2d2f378976e292074c75817593a0224 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Tue, 7 Jan 2025 16:00:56 -0500 Subject: [PATCH 4/4] Switch Binding Arrays on Metal to Argument Buffers (#6751) --- .github/workflows/ci.yml | 2 +- Cargo.lock | 3 +- Cargo.toml | 2 +- benches/benches/bind_groups.rs | 9 + naga/src/back/msl/keywords.rs | 1 + naga/src/back/msl/mod.rs | 2 - naga/src/back/msl/writer.rs | 53 +++-- naga/tests/in/binding-arrays.param.ron | 4 +- naga/tests/out/msl/binding-arrays.msl | 106 ++++----- wgpu-hal/src/metal/adapter.rs | 24 +- wgpu-hal/src/metal/command.rs | 13 ++ wgpu-hal/src/metal/conv.rs | 28 +++ wgpu-hal/src/metal/device.rs | 295 ++++++++++++++++--------- wgpu-hal/src/metal/mod.rs | 36 ++- 14 files changed, 374 insertions(+), 204 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3baefb469b..a10a2d0d9d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,7 +48,7 @@ env: CARGO_INCREMENTAL: false CARGO_TERM_COLOR: always WGPU_DX12_COMPILER: dxc - RUST_LOG: info + RUST_LOG: debug RUST_BACKTRACE: full PKG_CONFIG_ALLOW_CROSS: 1 # allow android to work RUSTFLAGS: -D warnings diff --git a/Cargo.lock b/Cargo.lock index 49e0517467..226cdae506 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2099,8 +2099,7 @@ dependencies = [ [[package]] name = "metal" version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3572083504c43e14aec05447f8a3d57cce0f66d7a3c1b9058572eca4d70ab9" +source = "git+https://github.com/gfx-rs/metal-rs.git?rev=ef768ff9d7#ef768ff9d742ae6a0f4e83ddc8031264e7d460c4" dependencies = [ "bitflags 2.6.0", "block", diff --git a/Cargo.toml b/Cargo.toml index c16b1b000d..d693596e99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,9 +142,9 @@ wgpu-types = { version = "23.0.0", path = "./wgpu-types" } winit = { version = "0.29", features = ["android-native-activity"] } # Metal dependencies +metal = { version = "0.30.0", git = "https://github.com/gfx-rs/metal-rs.git", rev = "ef768ff9d7" } block = "0.1" core-graphics-types = "0.1" -metal = { version = "0.30.0" } objc = "0.2.5" # Vulkan dependencies diff --git a/benches/benches/bind_groups.rs b/benches/benches/bind_groups.rs index 35da49cccb..f14fa9a3b1 100644 --- a/benches/benches/bind_groups.rs +++ b/benches/benches/bind_groups.rs @@ -62,6 +62,15 @@ impl BindGroupState { fn run_bench(ctx: &mut Criterion) { let state = Lazy::new(BindGroupState::new); + if !state + .device_state + .device + .features() + .contains(wgpu::Features::TEXTURE_BINDING_ARRAY) + { + return; + } + let mut group = ctx.benchmark_group("Bind Group Creation"); for count in [5, 50, 500, 5_000, 50_000] { diff --git a/naga/src/back/msl/keywords.rs b/naga/src/back/msl/keywords.rs index 73c457dd34..a4eabab234 100644 --- a/naga/src/back/msl/keywords.rs +++ b/naga/src/back/msl/keywords.rs @@ -341,4 +341,5 @@ pub const RESERVED: &[&str] = &[ "DefaultConstructible", super::writer::FREXP_FUNCTION, super::writer::MODF_FUNCTION, + super::writer::ARGUMENT_BUFFER_WRAPPER_STRUCT, ]; diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index 0c85c8a9e4..28e99acc5f 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -59,8 +59,6 @@ pub struct BindTarget { pub buffer: Option, pub texture: Option, pub sampler: Option, - /// If the binding is an unsized binding array, this overrides the size. - pub binding_array_size: Option, pub mutable: bool, } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index a8b34d4574..2386f1825b 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -36,6 +36,14 @@ const RAY_QUERY_FUN_MAP_INTERSECTION: &str = "_map_intersection_type"; pub(crate) const ATOMIC_COMP_EXCH_FUNCTION: &str = "naga_atomic_compare_exchange_weak_explicit"; pub(crate) const MODF_FUNCTION: &str = "naga_modf"; pub(crate) const FREXP_FUNCTION: &str = "naga_frexp"; +/// For some reason, Metal does not let you have `metal::texture<..>*` as a buffer argument. +/// However, if you put that texture inside a struct, everything is totally fine. This +/// baffles me to no end. +/// +/// As such, we wrap all argument buffers in a struct that has a single generic `` field. +/// This allows `NagaArgumentBufferWrapper>*` to work. The astute among +/// you have noticed that this should be exactly the same to the compiler, and you're correct. +pub(crate) const ARGUMENT_BUFFER_WRAPPER_STRUCT: &str = "NagaArgumentBufferWrapper"; /// Write the Metal name for a Naga numeric type: scalar, vector, or matrix. /// @@ -275,24 +283,17 @@ impl Display for TypeContext<'_> { crate::TypeInner::RayQuery => { write!(out, "{RAY_QUERY_TYPE}") } - crate::TypeInner::BindingArray { base, size } => { + crate::TypeInner::BindingArray { base, .. } => { let base_tyname = Self { handle: base, first_time: false, ..*self }; - if let Some(&super::ResolvedBinding::Resource(super::BindTarget { - binding_array_size: Some(override_size), - .. - })) = self.binding - { - write!(out, "{NAMESPACE}::array<{base_tyname}, {override_size}>") - } else if let crate::ArraySize::Constant(size) = size { - write!(out, "{NAMESPACE}::array<{base_tyname}, {size}>") - } else { - unreachable!("metal requires all arrays be constant sized"); - } + write!( + out, + "constant {ARGUMENT_BUFFER_WRAPPER_STRUCT}<{base_tyname}>*" + ) } } } @@ -2552,6 +2553,8 @@ impl Writer { } => true, _ => false, }; + let accessing_wrapped_binding_array = + matches!(*base_ty, crate::TypeInner::BindingArray { .. }); self.put_access_chain(base, policy, context)?; if accessing_wrapped_array { @@ -2588,6 +2591,10 @@ impl Writer { write!(self.out, "]")?; + if accessing_wrapped_binding_array { + write!(self.out, ".{WRAPPED_ARRAY_FIELD}")?; + } + Ok(()) } @@ -3701,7 +3708,18 @@ impl Writer { } fn write_type_defs(&mut self, module: &crate::Module) -> BackendResult { + let mut generated_argument_buffer_wrapper = false; for (handle, ty) in module.types.iter() { + if let crate::TypeInner::BindingArray { .. } = ty.inner { + if !generated_argument_buffer_wrapper { + writeln!(self.out, "template ")?; + writeln!(self.out, "struct {ARGUMENT_BUFFER_WRAPPER_STRUCT} {{")?; + writeln!(self.out, "{}T {WRAPPED_ARRAY_FIELD};", back::INDENT)?; + writeln!(self.out, "}};")?; + generated_argument_buffer_wrapper = true; + } + } + if !ty.needs_alias() { continue; } @@ -5132,13 +5150,10 @@ template let target = options.get_resource_binding_target(ep, br); let good = match target { Some(target) => { - let binding_ty = match module.types[var.ty].inner { - crate::TypeInner::BindingArray { base, .. } => { - &module.types[base].inner - } - ref ty => ty, - }; - match *binding_ty { + // We intentionally don't dereference binding_arrays here, + // so that binding arrays fall to the buffer location. + + match module.types[var.ty].inner { crate::TypeInner::Image { .. } => target.texture.is_some(), crate::TypeInner::Sampler { .. } => { target.sampler.is_some() diff --git a/naga/tests/in/binding-arrays.param.ron b/naga/tests/in/binding-arrays.param.ron index 249a4afe2a..96807d825a 100644 --- a/naga/tests/in/binding-arrays.param.ron +++ b/naga/tests/in/binding-arrays.param.ron @@ -19,11 +19,11 @@ restrict_indexing: true ), msl: ( - lang_version: (2, 0), + lang_version: (3, 0), per_entry_point_map: { "main": ( resources: { - (group: 0, binding: 0): (texture: Some(0), binding_array_size: Some(10), mutable: false), + (group: 0, binding: 0): (buffer: Some(0), binding_array_size: Some(10), mutable: false), }, sizes_buffer: None, ) diff --git a/naga/tests/out/msl/binding-arrays.msl b/naga/tests/out/msl/binding-arrays.msl index 75f787a9f2..f62546241a 100644 --- a/naga/tests/out/msl/binding-arrays.msl +++ b/naga/tests/out/msl/binding-arrays.msl @@ -1,4 +1,4 @@ -// language: metal2.0 +// language: metal3.0 #include #include @@ -13,6 +13,10 @@ struct DefaultConstructible { struct UniformIndex { uint index; }; +template +struct NagaArgumentBufferWrapper { + T inner; +}; struct FragmentIn { uint index; }; @@ -25,14 +29,14 @@ struct main_Output { }; fragment main_Output main_( main_Input varyings [[stage_in]] -, metal::array, 10> texture_array_unbounded [[texture(0)]] -, metal::array, 5> texture_array_bounded [[user(fake0)]] -, metal::array, 5> texture_array_2darray [[user(fake0)]] -, metal::array, 5> texture_array_multisampled [[user(fake0)]] -, metal::array, 5> texture_array_depth [[user(fake0)]] -, metal::array, 5> texture_array_storage [[user(fake0)]] -, metal::array samp [[user(fake0)]] -, metal::array samp_comp [[user(fake0)]] +, constant NagaArgumentBufferWrapper>* texture_array_unbounded [[buffer(0)]] +, constant NagaArgumentBufferWrapper>* texture_array_bounded [[user(fake0)]] +, constant NagaArgumentBufferWrapper>* texture_array_2darray [[user(fake0)]] +, constant NagaArgumentBufferWrapper>* texture_array_multisampled [[user(fake0)]] +, constant NagaArgumentBufferWrapper>* texture_array_depth [[user(fake0)]] +, constant NagaArgumentBufferWrapper>* texture_array_storage [[user(fake0)]] +, constant NagaArgumentBufferWrapper* samp [[user(fake0)]] +, constant NagaArgumentBufferWrapper* samp_comp [[user(fake0)]] , constant UniformIndex& uni [[user(fake0)]] ) { const FragmentIn fragment_in = { varyings.index }; @@ -45,116 +49,116 @@ fragment main_Output main_( metal::float2 uv = metal::float2(0.0); metal::int2 pix = metal::int2(0); metal::uint2 _e22 = u2_; - u2_ = _e22 + metal::uint2(texture_array_unbounded[0].get_width(), texture_array_unbounded[0].get_height()); + u2_ = _e22 + metal::uint2(texture_array_unbounded[0].inner.get_width(), texture_array_unbounded[0].inner.get_height()); metal::uint2 _e27 = u2_; - u2_ = _e27 + metal::uint2(texture_array_unbounded[uniform_index].get_width(), texture_array_unbounded[uniform_index].get_height()); + u2_ = _e27 + metal::uint2(texture_array_unbounded[uniform_index].inner.get_width(), texture_array_unbounded[uniform_index].inner.get_height()); metal::uint2 _e32 = u2_; - u2_ = _e32 + metal::uint2(texture_array_unbounded[non_uniform_index].get_width(), texture_array_unbounded[non_uniform_index].get_height()); - metal::float4 _e38 = texture_array_bounded[0].gather(samp[0], uv); + u2_ = _e32 + metal::uint2(texture_array_unbounded[non_uniform_index].inner.get_width(), texture_array_unbounded[non_uniform_index].inner.get_height()); + metal::float4 _e38 = texture_array_bounded[0].inner.gather(samp[0].inner, uv); metal::float4 _e39 = v4_; v4_ = _e39 + _e38; - metal::float4 _e45 = texture_array_bounded[uniform_index].gather(samp[uniform_index], uv); + metal::float4 _e45 = texture_array_bounded[uniform_index].inner.gather(samp[uniform_index].inner, uv); metal::float4 _e46 = v4_; v4_ = _e46 + _e45; - metal::float4 _e52 = texture_array_bounded[non_uniform_index].gather(samp[non_uniform_index], uv); + metal::float4 _e52 = texture_array_bounded[non_uniform_index].inner.gather(samp[non_uniform_index].inner, uv); metal::float4 _e53 = v4_; v4_ = _e53 + _e52; - metal::float4 _e60 = texture_array_depth[0].gather_compare(samp_comp[0], uv, 0.0); + metal::float4 _e60 = texture_array_depth[0].inner.gather_compare(samp_comp[0].inner, uv, 0.0); metal::float4 _e61 = v4_; v4_ = _e61 + _e60; - metal::float4 _e68 = texture_array_depth[uniform_index].gather_compare(samp_comp[uniform_index], uv, 0.0); + metal::float4 _e68 = texture_array_depth[uniform_index].inner.gather_compare(samp_comp[uniform_index].inner, uv, 0.0); metal::float4 _e69 = v4_; v4_ = _e69 + _e68; - metal::float4 _e76 = texture_array_depth[non_uniform_index].gather_compare(samp_comp[non_uniform_index], uv, 0.0); + metal::float4 _e76 = texture_array_depth[non_uniform_index].inner.gather_compare(samp_comp[non_uniform_index].inner, uv, 0.0); metal::float4 _e77 = v4_; v4_ = _e77 + _e76; - metal::float4 _e82 = (uint(0) < texture_array_unbounded[0].get_num_mip_levels() && metal::all(metal::uint2(pix) < metal::uint2(texture_array_unbounded[0].get_width(0), texture_array_unbounded[0].get_height(0))) ? texture_array_unbounded[0].read(metal::uint2(pix), 0): DefaultConstructible()); + metal::float4 _e82 = (uint(0) < texture_array_unbounded[0].inner.get_num_mip_levels() && metal::all(metal::uint2(pix) < metal::uint2(texture_array_unbounded[0].inner.get_width(0), texture_array_unbounded[0].inner.get_height(0))) ? texture_array_unbounded[0].inner.read(metal::uint2(pix), 0): DefaultConstructible()); metal::float4 _e83 = v4_; v4_ = _e83 + _e82; - metal::float4 _e88 = (uint(0) < texture_array_unbounded[uniform_index].get_num_mip_levels() && metal::all(metal::uint2(pix) < metal::uint2(texture_array_unbounded[uniform_index].get_width(0), texture_array_unbounded[uniform_index].get_height(0))) ? texture_array_unbounded[uniform_index].read(metal::uint2(pix), 0): DefaultConstructible()); + metal::float4 _e88 = (uint(0) < texture_array_unbounded[uniform_index].inner.get_num_mip_levels() && metal::all(metal::uint2(pix) < metal::uint2(texture_array_unbounded[uniform_index].inner.get_width(0), texture_array_unbounded[uniform_index].inner.get_height(0))) ? texture_array_unbounded[uniform_index].inner.read(metal::uint2(pix), 0): DefaultConstructible()); metal::float4 _e89 = v4_; v4_ = _e89 + _e88; - metal::float4 _e94 = (uint(0) < texture_array_unbounded[non_uniform_index].get_num_mip_levels() && metal::all(metal::uint2(pix) < metal::uint2(texture_array_unbounded[non_uniform_index].get_width(0), texture_array_unbounded[non_uniform_index].get_height(0))) ? texture_array_unbounded[non_uniform_index].read(metal::uint2(pix), 0): DefaultConstructible()); + metal::float4 _e94 = (uint(0) < texture_array_unbounded[non_uniform_index].inner.get_num_mip_levels() && metal::all(metal::uint2(pix) < metal::uint2(texture_array_unbounded[non_uniform_index].inner.get_width(0), texture_array_unbounded[non_uniform_index].inner.get_height(0))) ? texture_array_unbounded[non_uniform_index].inner.read(metal::uint2(pix), 0): DefaultConstructible()); metal::float4 _e95 = v4_; v4_ = _e95 + _e94; uint _e100 = u1_; - u1_ = _e100 + texture_array_2darray[0].get_array_size(); + u1_ = _e100 + texture_array_2darray[0].inner.get_array_size(); uint _e105 = u1_; - u1_ = _e105 + texture_array_2darray[uniform_index].get_array_size(); + u1_ = _e105 + texture_array_2darray[uniform_index].inner.get_array_size(); uint _e110 = u1_; - u1_ = _e110 + texture_array_2darray[non_uniform_index].get_array_size(); + u1_ = _e110 + texture_array_2darray[non_uniform_index].inner.get_array_size(); uint _e115 = u1_; - u1_ = _e115 + texture_array_bounded[0].get_num_mip_levels(); + u1_ = _e115 + texture_array_bounded[0].inner.get_num_mip_levels(); uint _e120 = u1_; - u1_ = _e120 + texture_array_bounded[uniform_index].get_num_mip_levels(); + u1_ = _e120 + texture_array_bounded[uniform_index].inner.get_num_mip_levels(); uint _e125 = u1_; - u1_ = _e125 + texture_array_bounded[non_uniform_index].get_num_mip_levels(); + u1_ = _e125 + texture_array_bounded[non_uniform_index].inner.get_num_mip_levels(); uint _e130 = u1_; - u1_ = _e130 + texture_array_multisampled[0].get_num_samples(); + u1_ = _e130 + texture_array_multisampled[0].inner.get_num_samples(); uint _e135 = u1_; - u1_ = _e135 + texture_array_multisampled[uniform_index].get_num_samples(); + u1_ = _e135 + texture_array_multisampled[uniform_index].inner.get_num_samples(); uint _e140 = u1_; - u1_ = _e140 + texture_array_multisampled[non_uniform_index].get_num_samples(); - metal::float4 _e146 = texture_array_bounded[0].sample(samp[0], uv); + u1_ = _e140 + texture_array_multisampled[non_uniform_index].inner.get_num_samples(); + metal::float4 _e146 = texture_array_bounded[0].inner.sample(samp[0].inner, uv); metal::float4 _e147 = v4_; v4_ = _e147 + _e146; - metal::float4 _e153 = texture_array_bounded[uniform_index].sample(samp[uniform_index], uv); + metal::float4 _e153 = texture_array_bounded[uniform_index].inner.sample(samp[uniform_index].inner, uv); metal::float4 _e154 = v4_; v4_ = _e154 + _e153; - metal::float4 _e160 = texture_array_bounded[non_uniform_index].sample(samp[non_uniform_index], uv); + metal::float4 _e160 = texture_array_bounded[non_uniform_index].inner.sample(samp[non_uniform_index].inner, uv); metal::float4 _e161 = v4_; v4_ = _e161 + _e160; - metal::float4 _e168 = texture_array_bounded[0].sample(samp[0], uv, metal::bias(0.0)); + metal::float4 _e168 = texture_array_bounded[0].inner.sample(samp[0].inner, uv, metal::bias(0.0)); metal::float4 _e169 = v4_; v4_ = _e169 + _e168; - metal::float4 _e176 = texture_array_bounded[uniform_index].sample(samp[uniform_index], uv, metal::bias(0.0)); + metal::float4 _e176 = texture_array_bounded[uniform_index].inner.sample(samp[uniform_index].inner, uv, metal::bias(0.0)); metal::float4 _e177 = v4_; v4_ = _e177 + _e176; - metal::float4 _e184 = texture_array_bounded[non_uniform_index].sample(samp[non_uniform_index], uv, metal::bias(0.0)); + metal::float4 _e184 = texture_array_bounded[non_uniform_index].inner.sample(samp[non_uniform_index].inner, uv, metal::bias(0.0)); metal::float4 _e185 = v4_; v4_ = _e185 + _e184; - float _e192 = texture_array_depth[0].sample_compare(samp_comp[0], uv, 0.0); + float _e192 = texture_array_depth[0].inner.sample_compare(samp_comp[0].inner, uv, 0.0); float _e193 = v1_; v1_ = _e193 + _e192; - float _e200 = texture_array_depth[uniform_index].sample_compare(samp_comp[uniform_index], uv, 0.0); + float _e200 = texture_array_depth[uniform_index].inner.sample_compare(samp_comp[uniform_index].inner, uv, 0.0); float _e201 = v1_; v1_ = _e201 + _e200; - float _e208 = texture_array_depth[non_uniform_index].sample_compare(samp_comp[non_uniform_index], uv, 0.0); + float _e208 = texture_array_depth[non_uniform_index].inner.sample_compare(samp_comp[non_uniform_index].inner, uv, 0.0); float _e209 = v1_; v1_ = _e209 + _e208; - float _e216 = texture_array_depth[0].sample_compare(samp_comp[0], uv, 0.0); + float _e216 = texture_array_depth[0].inner.sample_compare(samp_comp[0].inner, uv, 0.0); float _e217 = v1_; v1_ = _e217 + _e216; - float _e224 = texture_array_depth[uniform_index].sample_compare(samp_comp[uniform_index], uv, 0.0); + float _e224 = texture_array_depth[uniform_index].inner.sample_compare(samp_comp[uniform_index].inner, uv, 0.0); float _e225 = v1_; v1_ = _e225 + _e224; - float _e232 = texture_array_depth[non_uniform_index].sample_compare(samp_comp[non_uniform_index], uv, 0.0); + float _e232 = texture_array_depth[non_uniform_index].inner.sample_compare(samp_comp[non_uniform_index].inner, uv, 0.0); float _e233 = v1_; v1_ = _e233 + _e232; - metal::float4 _e239 = texture_array_bounded[0].sample(samp[0], uv, metal::gradient2d(uv, uv)); + metal::float4 _e239 = texture_array_bounded[0].inner.sample(samp[0].inner, uv, metal::gradient2d(uv, uv)); metal::float4 _e240 = v4_; v4_ = _e240 + _e239; - metal::float4 _e246 = texture_array_bounded[uniform_index].sample(samp[uniform_index], uv, metal::gradient2d(uv, uv)); + metal::float4 _e246 = texture_array_bounded[uniform_index].inner.sample(samp[uniform_index].inner, uv, metal::gradient2d(uv, uv)); metal::float4 _e247 = v4_; v4_ = _e247 + _e246; - metal::float4 _e253 = texture_array_bounded[non_uniform_index].sample(samp[non_uniform_index], uv, metal::gradient2d(uv, uv)); + metal::float4 _e253 = texture_array_bounded[non_uniform_index].inner.sample(samp[non_uniform_index].inner, uv, metal::gradient2d(uv, uv)); metal::float4 _e254 = v4_; v4_ = _e254 + _e253; - metal::float4 _e261 = texture_array_bounded[0].sample(samp[0], uv, metal::level(0.0)); + metal::float4 _e261 = texture_array_bounded[0].inner.sample(samp[0].inner, uv, metal::level(0.0)); metal::float4 _e262 = v4_; v4_ = _e262 + _e261; - metal::float4 _e269 = texture_array_bounded[uniform_index].sample(samp[uniform_index], uv, metal::level(0.0)); + metal::float4 _e269 = texture_array_bounded[uniform_index].inner.sample(samp[uniform_index].inner, uv, metal::level(0.0)); metal::float4 _e270 = v4_; v4_ = _e270 + _e269; - metal::float4 _e277 = texture_array_bounded[non_uniform_index].sample(samp[non_uniform_index], uv, metal::level(0.0)); + metal::float4 _e277 = texture_array_bounded[non_uniform_index].inner.sample(samp[non_uniform_index].inner, uv, metal::level(0.0)); metal::float4 _e278 = v4_; v4_ = _e278 + _e277; metal::float4 _e282 = v4_; - texture_array_storage[0].write(_e282, metal::uint2(pix)); + texture_array_storage[0].inner.write(_e282, metal::uint2(pix)); metal::float4 _e285 = v4_; - texture_array_storage[uniform_index].write(_e285, metal::uint2(pix)); + texture_array_storage[uniform_index].inner.write(_e285, metal::uint2(pix)); metal::float4 _e288 = v4_; - texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix)); + texture_array_storage[non_uniform_index].inner.write(_e288, metal::uint2(pix)); metal::uint2 _e289 = u2_; uint _e290 = u1_; metal::float2 v2_ = static_cast(_e289 + metal::uint2(_e290)); diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index d343d8881a..21d34001a8 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -377,12 +377,6 @@ const RESOURCE_HEAP_SUPPORT: &[MTLFeatureSet] = &[ MTLFeatureSet::macOS_GPUFamily1_v3, ]; -const ARGUMENT_BUFFER_SUPPORT: &[MTLFeatureSet] = &[ - MTLFeatureSet::iOS_GPUFamily1_v4, - MTLFeatureSet::tvOS_GPUFamily1_v3, - MTLFeatureSet::macOS_GPUFamily1_v3, -]; - const MUTABLE_COMPARISON_SAMPLER_SUPPORT: &[MTLFeatureSet] = &[ MTLFeatureSet::iOS_GPUFamily3_v1, MTLFeatureSet::macOS_GPUFamily1_v1, @@ -610,7 +604,7 @@ impl super::PrivateCapabilities { }, msaa_apple7: family_check && device.supports_family(MTLGPUFamily::Apple7), resource_heaps: Self::supports_any(device, RESOURCE_HEAP_SUPPORT), - argument_buffers: Self::supports_any(device, ARGUMENT_BUFFER_SUPPORT), + argument_buffers: device.argument_buffers_support(), shared_textures: !os_is_mac, mutable_comparison_samplers: Self::supports_any( device, @@ -905,18 +899,12 @@ impl super::PrivateCapabilities { features.set( F::TEXTURE_BINDING_ARRAY | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING - | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, - self.msl_version >= MTLLanguageVersion::V2_0 && self.supports_arrays_of_textures, + | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | F::PARTIALLY_BOUND_BINDING_ARRAY, + self.msl_version >= MTLLanguageVersion::V3_0 + && self.supports_arrays_of_textures + && self.argument_buffers as u64 >= metal::MTLArgumentBuffersTier::Tier2 as u64, ); - //// XXX: this is technically not true, as read-only storage images can be used in arrays - //// on precisely the same conditions that sampled textures can. But texel fetch from a - //// sampled texture is a thing; should we bother introducing another feature flag? - if self.msl_version >= MTLLanguageVersion::V2_2 - && self.supports_arrays_of_textures - && self.supports_arrays_of_textures_write - { - features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); - } features.set( F::SHADER_INT64, self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index c0b8331fb5..a66349cbf4 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -750,6 +750,11 @@ impl crate::CommandEncoder for super::CommandEncoder { Some(res.as_native()), ); } + + // Call useResource on all textures and buffers used indirectly so they are alive + for (resource, use_info) in group.resources_to_use.iter() { + encoder.use_resource_at(resource.as_native(), use_info.uses, use_info.stages); + } } if let Some(ref encoder) = self.state.compute { @@ -807,6 +812,14 @@ impl crate::CommandEncoder for super::CommandEncoder { Some(res.as_native()), ); } + + // Call useResource on all textures and buffers used indirectly so they are alive + for (resource, use_info) in group.resources_to_use.iter() { + if !use_info.visible_in_compute { + continue; + } + encoder.use_resource(resource.as_native(), use_info.uses); + } } } diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index c6a213e0d0..ef71f168ca 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -331,3 +331,31 @@ pub fn get_blit_option( metal::MTLBlitOption::None } } + +pub fn map_render_stages(stage: wgt::ShaderStages) -> metal::MTLRenderStages { + let mut raw_stages = metal::MTLRenderStages::empty(); + + if stage.contains(wgt::ShaderStages::VERTEX) { + raw_stages |= metal::MTLRenderStages::Vertex; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + raw_stages |= metal::MTLRenderStages::Fragment; + } + + raw_stages +} + +pub fn map_resource_usage(ty: &wgt::BindingType) -> metal::MTLResourceUsage { + match ty { + wgt::BindingType::Texture { .. } => metal::MTLResourceUsage::Sample, + wgt::BindingType::StorageTexture { access, .. } => match access { + wgt::StorageTextureAccess::WriteOnly => metal::MTLResourceUsage::Write, + wgt::StorageTextureAccess::ReadOnly => metal::MTLResourceUsage::Read, + wgt::StorageTextureAccess::ReadWrite => { + metal::MTLResourceUsage::Read | metal::MTLResourceUsage::Write + } + }, + wgt::BindingType::Sampler(..) => metal::MTLResourceUsage::empty(), + _ => unreachable!(), + } +} diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 73d6bcc0e2..b64fa7c935 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1,7 +1,6 @@ use parking_lot::Mutex; use std::{ - num::NonZeroU32, - ptr, + ptr::NonNull, sync::{atomic, Arc}, thread, time, }; @@ -10,6 +9,8 @@ use super::conv; use crate::auxil::map_naga_stage; use crate::TlasInstance; +use metal::foreign_types::ForeignType; + type DeviceResult = Result; struct CompiledShader { @@ -384,7 +385,7 @@ impl crate::Device for super::Device { let ptr = buffer.raw.contents().cast::(); assert!(!ptr.is_null()); Ok(crate::BufferMapping { - ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), + ptr: NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), is_coherent: true, }) } @@ -580,6 +581,9 @@ impl crate::Device for super::Device { if let Some(label) = desc.label { descriptor.set_label(label); } + if self.features.contains(wgt::Features::TEXTURE_BINDING_ARRAY) { + descriptor.set_support_argument_buffers(true); + } let raw = self.shared.device.lock().new_sampler(&descriptor); self.counters.samplers.add(1); @@ -698,36 +702,41 @@ impl crate::Device for super::Device { } let mut target = naga::back::msl::BindTarget::default(); - let count = entry.count.map_or(1, NonZeroU32::get); - target.binding_array_size = entry.count.map(NonZeroU32::get); - match entry.ty { - wgt::BindingType::Buffer { ty, .. } => { - target.buffer = Some(info.counters.buffers as _); - info.counters.buffers += count; - if let wgt::BufferBindingType::Storage { read_only } = ty { - target.mutable = !read_only; + // Bindless path + if let Some(_) = entry.count { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += 1; + } else { + match entry.ty { + wgt::BindingType::Buffer { ty, .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += 1; + if let wgt::BufferBindingType::Storage { read_only } = ty { + target.mutable = !read_only; + } } + wgt::BindingType::Sampler { .. } => { + target.sampler = + Some(naga::back::msl::BindSamplerTarget::Resource( + info.counters.samplers as _, + )); + info.counters.samplers += 1; + } + wgt::BindingType::Texture { .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += 1; + } + wgt::BindingType::StorageTexture { access, .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += 1; + target.mutable = match access { + wgt::StorageTextureAccess::ReadOnly => false, + wgt::StorageTextureAccess::WriteOnly => true, + wgt::StorageTextureAccess::ReadWrite => true, + }; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), } - wgt::BindingType::Sampler { .. } => { - target.sampler = Some(naga::back::msl::BindSamplerTarget::Resource( - info.counters.samplers as _, - )); - info.counters.samplers += count; - } - wgt::BindingType::Texture { .. } => { - target.texture = Some(info.counters.textures as _); - info.counters.textures += count; - } - wgt::BindingType::StorageTexture { access, .. } => { - target.texture = Some(info.counters.textures as _); - info.counters.textures += count; - target.mutable = match access { - wgt::StorageTextureAccess::ReadOnly => false, - wgt::StorageTextureAccess::WriteOnly => true, - wgt::StorageTextureAccess::ReadWrite => true, - }; - } - wgt::BindingType::AccelerationStructure => unimplemented!(), } let br = naga::ResourceBinding { @@ -805,90 +814,162 @@ impl crate::Device for super::Device { super::AccelerationStructure, >, ) -> DeviceResult { - let mut bg = super::BindGroup::default(); - for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { - let stage_bit = map_naga_stage(stage); - let mut dynamic_offsets_count = 0u32; - let layout_and_entry_iter = desc.entries.iter().map(|entry| { - let layout = desc - .layout - .entries - .iter() - .find(|layout_entry| layout_entry.binding == entry.binding) - .expect("internal error: no layout entry found with binding slot"); - (entry, layout) - }); - for (entry, layout) in layout_and_entry_iter { - let size = layout.count.map_or(1, |c| c.get()); - if let wgt::BindingType::Buffer { - has_dynamic_offset: true, - .. - } = layout.ty - { - dynamic_offsets_count += size; - } - if !layout.visibility.contains(stage_bit) { - continue; - } - match layout.ty { - wgt::BindingType::Buffer { - ty, - has_dynamic_offset, - .. - } => { - let start = entry.resource_index as usize; - let end = start + size as usize; - bg.buffers - .extend(desc.buffers[start..end].iter().map(|source| { - // Given the restrictions on `BufferBinding::offset`, - // this should never be `None`. - let remaining_size = - wgt::BufferSize::new(source.buffer.size - source.offset); - let binding_size = match ty { - wgt::BufferBindingType::Storage { .. } => { - source.size.or(remaining_size) - } - _ => None, - }; - super::BufferResource { - ptr: source.buffer.as_raw(), - offset: source.offset, - dynamic_index: if has_dynamic_offset { - Some(dynamic_offsets_count - 1) - } else { - None - }, - binding_size, - binding_location: layout.binding, + objc::rc::autoreleasepool(|| { + let mut bg = super::BindGroup::default(); + for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { + let stage_bit = map_naga_stage(stage); + let mut dynamic_offsets_count = 0u32; + let layout_and_entry_iter = desc.entries.iter().map(|entry| { + let layout = desc + .layout + .entries + .iter() + .find(|layout_entry| layout_entry.binding == entry.binding) + .expect("internal error: no layout entry found with binding slot"); + (entry, layout) + }); + for (entry, layout) in layout_and_entry_iter { + // Bindless path + if layout.count.is_some() { + let count = entry.count; + + let stages = conv::map_render_stages(layout.visibility); + let uses = conv::map_resource_usage(&layout.ty); + + // Create argument buffer for this array + let buffer = self.shared.device.lock().new_buffer( + 8 * count as u64, + metal::MTLResourceOptions::HazardTrackingModeUntracked + | metal::MTLResourceOptions::StorageModeShared, + ); + + let contents: &mut [metal::MTLResourceID] = unsafe { + std::slice::from_raw_parts_mut(buffer.contents().cast(), count as usize) + }; + + match layout.ty { + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + count as usize; + let textures = &desc.textures[start..end]; + + for (idx, tex) in textures.iter().enumerate() { + contents[idx] = tex.view.raw.gpu_resource_id(); + + let use_info = bg + .resources_to_use + .entry(tex.view.as_raw().cast()) + .or_default(); + use_info.stages |= stages; + use_info.uses |= uses; + use_info.visible_in_compute |= + layout.visibility.contains(wgt::ShaderStages::COMPUTE); } - })); + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + count as usize; + let samplers = &desc.samplers[start..end]; + + for (idx, &sampler) in samplers.iter().enumerate() { + contents[idx] = sampler.raw.gpu_resource_id(); + // Samplers aren't resources like buffers and textures, so don't + // need to be passed to useResource + } + } + _ => { + unimplemented!(); + } + } + + bg.buffers.push(super::BufferResource { + ptr: unsafe { NonNull::new_unchecked(buffer.as_ptr()) }, + offset: 0, + dynamic_index: None, + binding_size: None, + binding_location: layout.binding, + }); counter.buffers += 1; + + bg.argument_buffers.push(buffer) } - wgt::BindingType::Sampler { .. } => { - let start = entry.resource_index as usize; - let end = start + size as usize; - bg.samplers - .extend(desc.samplers[start..end].iter().map(|samp| samp.as_raw())); - counter.samplers += size; - } - wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { - let start = entry.resource_index as usize; - let end = start + size as usize; - bg.textures.extend( - desc.textures[start..end] - .iter() - .map(|tex| tex.view.as_raw()), - ); - counter.textures += size; + // Bindfull path + else { + if let wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } = layout.ty + { + dynamic_offsets_count += 1; + } + if !layout.visibility.contains(stage_bit) { + continue; + } + match layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.buffers + .extend(desc.buffers[start..end].iter().map(|source| { + // Given the restrictions on `BufferBinding::offset`, + // this should never be `None`. + let remaining_size = wgt::BufferSize::new( + source.buffer.size - source.offset, + ); + let binding_size = match ty { + wgt::BufferBindingType::Storage { .. } => { + source.size.or(remaining_size) + } + _ => None, + }; + super::BufferResource { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, + } + })); + counter.buffers += 1; + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.samplers.extend( + desc.samplers[start..end].iter().map(|samp| samp.as_raw()), + ); + counter.samplers += 1; + } + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.textures.extend( + desc.textures[start..end] + .iter() + .map(|tex| tex.view.as_raw()), + ); + counter.textures += 1; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + } } - wgt::BindingType::AccelerationStructure => unimplemented!(), } } - } - self.counters.bind_groups.add(1); + self.counters.bind_groups.add(1); - Ok(bg) + Ok(bg) + }) } unsafe fn destroy_bind_group(&self, _group: super::BindGroup) { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index fc73446528..448349e2b0 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -26,6 +26,7 @@ mod surface; mod time; use std::{ + collections::HashMap, fmt, iter, ops, ptr::NonNull, sync::{atomic, Arc}, @@ -199,7 +200,7 @@ struct PrivateCapabilities { msaa_apple3: bool, msaa_apple7: bool, resource_heaps: bool, - argument_buffers: bool, + argument_buffers: metal::MTLArgumentBuffersTier, shared_textures: bool, mutable_comparison_samplers: bool, sampler_clamp_to_border: bool, @@ -651,10 +652,23 @@ trait AsNative { fn as_native(&self) -> &Self::Native; } +type ResourcePtr = NonNull; type BufferPtr = NonNull; type TexturePtr = NonNull; type SamplerPtr = NonNull; +impl AsNative for ResourcePtr { + type Native = metal::ResourceRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + impl AsNative for BufferPtr { type Native = metal::BufferRef; #[inline] @@ -710,12 +724,32 @@ struct BufferResource { binding_location: u32, } +#[derive(Debug)] +struct UseResourceInfo { + uses: metal::MTLResourceUsage, + stages: metal::MTLRenderStages, + visible_in_compute: bool, +} + +impl Default for UseResourceInfo { + fn default() -> Self { + Self { + uses: metal::MTLResourceUsage::empty(), + stages: metal::MTLRenderStages::empty(), + visible_in_compute: false, + } + } +} + #[derive(Debug, Default)] pub struct BindGroup { counters: MultiStageResourceCounters, buffers: Vec, samplers: Vec, textures: Vec, + + argument_buffers: Vec, + resources_to_use: HashMap, } impl crate::DynBindGroup for BindGroup {}