diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index 5e9b0168c..d8aff1523 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -676,6 +676,7 @@ struct CLIArguments bool msl_manual_helper_invocation_updates = true; bool msl_check_discarded_frag_stores = false; bool msl_sample_dref_lod_array_as_grad = false; + bool msl_runtime_array_rich_descriptor = false; const char *msl_combined_sampler_suffix = nullptr; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; @@ -864,7 +865,8 @@ static void print_help_msl() "\t\tRequires MSL 2.0 to be enabled.\n" "\t[--msl-argument-buffer-tier]:\n\t\tWhen using Metal argument buffers, indicate the Metal argument buffer tier level supported by the Metal platform.\n" "\t\tUses same values as Metal MTLArgumentBuffersTier enumeration (0 = Tier1, 1 = Tier2).\n" - "\t\tSetting this value also enables msl-argument-buffers.\n" + "\t\tNOTE: Setting this value no longer enables msl-argument-buffers implicitly.\n" + "\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n" "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" "\t\tEmits [[color(N)]] inputs in fragment stage.\n" @@ -1230,6 +1232,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores; msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad; msl_opts.ios_support_base_vertex_instance = true; + msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); @@ -1636,10 +1639,8 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-pad-fragment-output", [&args](CLIParser &) { args.msl_pad_fragment_output = true; }); cbs.add("--msl-domain-lower-left", [&args](CLIParser &) { args.msl_domain_lower_left = true; }); cbs.add("--msl-argument-buffers", [&args](CLIParser &) { args.msl_argument_buffers = true; }); - cbs.add("--msl-argument-buffer-tier", [&args](CLIParser &parser) { - args.msl_argument_buffers_tier = parser.next_uint(); - args.msl_argument_buffers = true; - }); + cbs.add("--msl-argument-buffer-tier", + [&args](CLIParser &parser) { args.msl_argument_buffers_tier = parser.next_uint(); }); cbs.add("--msl-discrete-descriptor-set", [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); }); cbs.add("--msl-device-argument-buffer", @@ -1789,6 +1790,8 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) { args.msl_combined_sampler_suffix = parser.next_string(); }); + cbs.add("--msl-runtime-array-rich-descriptor", + [&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 49cc83868..c1c6a11d7 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -630,6 +630,14 @@ bool Compiler::is_array(const SPIRType &type) const return !type.array.empty(); } +bool Compiler::is_runtime_size_array(const SPIRType &type) +{ + if (type.array.empty()) + return false; + assert(type.array.size() == type.array_size_literal.size()); + return type.array_size_literal.back() && type.array.back() == 0; +} + ShaderResources Compiler::get_shader_resources() const { return get_shader_resources(nullptr); @@ -995,37 +1003,45 @@ ShaderResources Compiler::get_shader_resources(const unordered_set * { res.shader_record_buffers.push_back({ var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); } - // Images - else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && - type.image.sampled == 2) - { - res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); - } - // Separate images - else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && - type.image.sampled == 1) - { - res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); - } - // Separate samplers - else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler) - { - res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); - } - // Textures - else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) - { - res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); - } // Atomic counters else if (type.storage == StorageClassAtomicCounter) { res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } - // Acceleration structures - else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure) + else if (type.storage == StorageClassUniformConstant) { - res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + if (type.basetype == SPIRType::Image) + { + // Images + if (type.image.sampled == 2) + { + res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Separate images + else if (type.image.sampled == 1) + { + res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + } + // Separate samplers + else if (type.basetype == SPIRType::Sampler) + { + res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Textures + else if (type.basetype == SPIRType::SampledImage) + { + res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Acceleration structures + else if (type.basetype == SPIRType::AccelerationStructure) + { + res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + else + { + res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } } }); diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index b99b7ae7a..8f7ba4473 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -94,6 +94,7 @@ struct ShaderResources SmallVector sampled_images; SmallVector atomic_counters; SmallVector acceleration_structures; + SmallVector gl_plain_uniforms; // There can only be one push constant block, // but keep the vector in case this restriction is lifted in the future. @@ -682,6 +683,7 @@ protected: bool is_vector(const SPIRType &type) const; bool is_matrix(const SPIRType &type) const; bool is_array(const SPIRType &type) const; + static bool is_runtime_size_array(const SPIRType &type); uint32_t expression_type_id(uint32_t id) const; const SPIRType &expression_type(uint32_t id) const; bool expression_is_lvalue(uint32_t id) const; diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 690bc9d35..56639f0c0 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -1295,7 +1295,10 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) if (flags.get(DecorationInvariant) && (options.es || options.version >= 120)) res += "invariant "; if (flags.get(DecorationPerPrimitiveEXT)) - res += "perprimitiveEXT "; + { + res += "perprimitiveEXT "; + require_extension_internal("GL_EXT_mesh_shader"); + } if (flags.get(DecorationExplicitInterpAMD)) { @@ -1486,6 +1489,10 @@ const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) return "rg8i"; case ImageFormatR16i: return "r16i"; + case ImageFormatR64i: + return "r64i"; + case ImageFormatR64ui: + return "r64ui"; default: case ImageFormatUnknown: return nullptr; @@ -3155,6 +3162,10 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) should_force = true; } + // Either glslang bug or oversight, but global invariant position does not work in mesh shaders. + if (get_execution_model() == ExecutionModelMeshEXT && position_invariant) + should_force = true; + return should_force; } @@ -3295,11 +3306,13 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo auto &m = ir.meta[var.self].decoration; if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { + // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type + // for correct result. global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = to_array_size_literal(type); + cull_distance_size = to_array_size_literal(type, 0); else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = to_array_size_literal(type); + clip_distance_size = to_array_size_literal(type, 0); if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) @@ -3403,6 +3416,8 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo auto itr = builtin_xfb_offsets.find(BuiltInPosition); if (itr != end(builtin_xfb_offsets)) statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;"); + else if (position_invariant) + statement("invariant vec4 gl_Position;"); else statement("vec4 gl_Position;"); } @@ -3499,6 +3514,8 @@ void CompilerGLSL::emit_resources() break; } + bool global_invariant_position = position_invariant && (options.es || options.version >= 120); + // Emit custom gl_PerVertex for SSO compatibility. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) { @@ -3509,11 +3526,13 @@ void CompilerGLSL::emit_resources() case ExecutionModelTessellationEvaluation: emit_declared_builtin_block(StorageClassInput, execution.model); emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; break; case ExecutionModelVertex: case ExecutionModelMeshEXT: emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; break; default: @@ -3523,6 +3542,7 @@ void CompilerGLSL::emit_resources() else if (should_force_emit_builtin_block(StorageClassOutput)) { emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; } else if (execution.geometry_passthrough) { @@ -3543,7 +3563,7 @@ void CompilerGLSL::emit_resources() statement(""); } - if (position_invariant && (options.es || options.version >= 120)) + if (global_invariant_position) { statement("invariant gl_Position;"); statement(""); @@ -10051,8 +10071,14 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice switch (builtin) { - // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. - // case BuiltInClipDistance: + case BuiltInCullDistance: + case BuiltInClipDistance: + if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here. + { + append_index(index, is_literal); + break; + } + // fallthrough case BuiltInPosition: case BuiltInPointSize: if (mesh_shader) @@ -15345,6 +15371,14 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) switch (imagetype.basetype) { + case SPIRType::Int64: + res = "i64"; + require_extension_internal("GL_EXT_shader_image_int64"); + break; + case SPIRType::UInt64: + res = "u64"; + require_extension_internal("GL_EXT_shader_image_int64"); + break; case SPIRType::Int: case SPIRType::Short: case SPIRType::SByte: diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index de8d530a0..363d4852f 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -1354,23 +1354,54 @@ void CompilerMSL::emit_entry_point_declarations() } } - // Emit buffer arrays here. - for (uint32_t array_id : buffer_arrays_discrete) + bool has_runtime_array_declaration = false; + for (SPIRVariable *arg : entry_point_bindings) { - const auto &var = get(array_id); + const auto &var = *arg; const auto &type = get_variable_data_type(var); const auto &buffer_type = get_variable_element_type(var); - string name = to_name(array_id); - statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", to_restrict(array_id, true), name, - "[] ="); - begin_scope(); - for (uint32_t i = 0; i < to_array_size_literal(type); ++i) - statement(name, "_", i, ","); - end_scope_decl(); - statement_no_indent(""); + const string name = to_name(var.self); + if (is_runtime_size_array(type)) + { + if (msl_options.argument_buffers_tier < Options::ArgumentBuffersTier::Tier2) + { + SPIRV_CROSS_THROW("Unsized array of descriptors requires argument buffer tier 2"); + } + switch (type.basetype) + { + case SPIRType::Image: + case SPIRType::Sampler: + case SPIRType::AccelerationStructure: + statement("spvDescriptorArray<", type_to_glsl(buffer_type), "> ", name, " {", name, "_};"); + break; + case SPIRType::SampledImage: + statement("spvDescriptorArray<", type_to_glsl(buffer_type), "> ", name, " {", name, "_};"); + statement("spvDescriptorArray ", name, "Smplr {", name, "Smplr_};"); + break; + case SPIRType::Struct: + statement("spvDescriptorArray<", get_argument_address_space(var), " ", type_to_glsl(buffer_type), "*> ", + name, " {", name, "_};"); + break; + default: + break; + } + has_runtime_array_declaration = true; + } + else if (!type.array.empty() && type.basetype == SPIRType::Struct) + { + // Emit only buffer arrays here. + statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", + to_restrict(var.self, true), name, "[] ="); + begin_scope(); + for (uint32_t i = 0; i < to_array_size_literal(type); ++i) + statement(name, "_", i, ","); + end_scope_decl(); + statement_no_indent(""); + } } - // Discrete descriptors are processed in entry point emission every compiler iteration. - buffer_arrays_discrete.clear(); + + if (has_runtime_array_declaration) + statement_no_indent(""); // Emit buffer aliases here. for (auto &var_id : buffer_aliases_discrete) @@ -7217,6 +7248,70 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplVariableDescriptor: + statement("template"); + statement("struct spvDescriptor"); + begin_scope(); + statement("T value;"); + end_scope_decl(); + statement(""); + break; + + case SPVFuncImplVariableSizedDescriptor: + statement("template"); + statement("struct spvBufferDescriptor"); + begin_scope(); + statement("T value;"); + statement("int length;"); + statement("const device T& operator -> () const device"); + begin_scope(); + statement("return value;"); + end_scope(); + statement("const device T& operator * () const device"); + begin_scope(); + statement("return value;"); + end_scope(); + end_scope_decl(); + statement(""); + break; + + case SPVFuncImplVariableDescriptorArray: + statement("template"); + statement("struct spvDescriptorArray"); + begin_scope(); + statement("spvDescriptorArray(const device spvDescriptor* ptr) : ptr(ptr)"); + begin_scope(); + end_scope(); + statement("const device T& operator [] (size_t i) const"); + begin_scope(); + statement("return ptr[i].value;"); + end_scope(); + statement("const device spvDescriptor* ptr;"); + end_scope_decl(); + statement(""); + + if (msl_options.runtime_array_rich_descriptor) + { + statement("template"); + statement("struct spvDescriptorArray"); + begin_scope(); + statement("spvDescriptorArray(const device spvBufferDescriptor* ptr) : ptr(ptr)"); + begin_scope(); + end_scope(); + statement("const device T* operator [] (size_t i) const"); + begin_scope(); + statement("return ptr[i].value;"); + end_scope(); + statement("const int length(int i) const"); + begin_scope(); + statement("return ptr[i].length;"); + end_scope(); + statement("const device spvBufferDescriptor* ptr;"); + end_scope_decl(); + statement(""); + } + break; + default: break; } @@ -8675,9 +8770,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) // Mark that this shader reads from this image uint32_t img_id = ops[2]; auto &type = expression_type(img_id); + auto *p_var = maybe_get_backing_variable(img_id); if (type.image.dim != DimSubpassData) { - auto *p_var = maybe_get_backing_variable(img_id); if (p_var && has_decoration(p_var->self, DecorationNonReadable)) { unset_decoration(p_var->self, DecorationNonReadable); @@ -8685,6 +8780,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } } + // Metal requires explicit fences to break up RAW hazards, even within the same shader invocation + if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable)) + statement(to_expression(img_id), ".fence();"); + emit_texture_op(instruction, false); break; } @@ -10380,7 +10479,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) // Manufacture automatic sampler arg for SampledImage texture if (arg_type.image.dim != DimBuffer) { - if (arg_type.array.empty()) + if (arg_type.array.empty() || is_runtime_size_array(arg_type)) { decl += join(", ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(name_id)); } @@ -11568,6 +11667,19 @@ string CompilerMSL::to_buffer_size_expression(uint32_t id) { auto buffer_expr = expr.substr(0, index); auto array_expr = expr.substr(index); + if (auto var = maybe_get_backing_variable(id)) + { + auto &var_type = get(var->basetype); + if (is_runtime_size_array(var_type)) + { + if (!msl_options.runtime_array_rich_descriptor) + SPIRV_CROSS_THROW("OpArrayLength requires rich descriptor format"); + + auto last_pos = array_expr.find_last_of(']'); + if (last_pos != std::string::npos) + return buffer_expr + ".length(" + array_expr.substr(1, last_pos - 1) + ")"; + } + } return buffer_expr + buffer_size_name_suffix + array_expr; } } @@ -12628,7 +12740,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) else ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id); - ep_args += " [[" + builtin_qualifier(bi_type); + ep_args += string(" [[") + builtin_qualifier(bi_type); if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage)) { if (!msl_options.supports_msl_version(2)) @@ -12944,6 +13056,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) SmallVector resources; + entry_point_bindings.clear(); ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && @@ -12973,10 +13086,6 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) (resource.var->storage == StorageClassUniform || resource.var->storage == StorageClassStorageBuffer)) { - // Possible, but horrible to implement, ignore for now. - if (!type.array.empty()) - SPIRV_CROSS_THROW("Aliasing arrayed discrete descriptors is currently not supported."); - descriptor_alias = resource.var; // Self-reference marks that we should declare the resource, // and it's being used as an alias (so we can emit void* instead). @@ -13015,6 +13124,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) plane_count = constexpr_sampler->planes; + entry_point_bindings.push_back(&var); for (uint32_t i = 0; i < plane_count; i++) resources.push_back({ &var, descriptor_alias, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index }); @@ -13035,15 +13145,16 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (!descriptor_alias) resource_index = get_metal_resource_index(var, type.basetype); + entry_point_bindings.push_back(&var); resources.push_back({ &var, descriptor_alias, to_name(var_id), type.basetype, resource_index, 0, secondary_index }); } } }); - stable_sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) { - return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); - }); + stable_sort(resources.begin(), resources.end(), + [](const Resource &lhs, const Resource &rhs) + { return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); }); for (auto &r : resources) { @@ -13091,23 +13202,45 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) // later. uint32_t array_size = to_array_size_literal(type); - if (array_size == 0) - SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL."); - - // Allow Metal to use the array template to make arrays a value type is_using_builtin_array = true; - buffer_arrays_discrete.push_back(var_id); - for (uint32_t i = 0; i < array_size; ++i) + if (is_runtime_size_array(type)) { + add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray); if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id, true) + - r.name + "_" + convert_to_string(i); - ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + const bool ssbo = has_decoration(type.self, DecorationBufferBlock); + if ((var.storage == spv::StorageClassStorageBuffer || ssbo) && + msl_options.runtime_array_rich_descriptor) + { + add_spv_func_and_recompile(SPVFuncImplVariableSizedDescriptor); + ep_args += "const device spvBufferDescriptor<" + get_argument_address_space(var) + " " + + type_to_glsl(type) + "*>* "; + } + else + { + ep_args += "const device spvDescriptor<" + get_argument_address_space(var) + " " + + type_to_glsl(type) + "*>* "; + } + ep_args += to_restrict(var_id, true) + r.name + "_"; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; if (interlocked_resources.count(var_id)) ep_args += ", raster_order_group(0)"; ep_args += "]]"; } + else + { + for (uint32_t i = 0; i < array_size; ++i) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + + to_restrict(var_id, true) + r.name + "_" + convert_to_string(i); + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + } is_using_builtin_array = false; } else @@ -13127,7 +13260,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (!ep_args.empty()) ep_args += ", "; ep_args += sampler_type(type, var_id) + " " + r.name; - ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]"; + if (is_runtime_size_array(type)) + ep_args += "_ [[buffer(" + convert_to_string(r.index) + ")]]"; + else + ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]"; break; case SPIRType::Image: { @@ -13141,7 +13277,12 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += image_type_glsl(type, var_id) + " " + r.name; if (r.plane > 0) ep_args += join(plane_name_suffix, r.plane); - ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + + if (is_runtime_size_array(type)) + ep_args += "_ [[buffer(" + convert_to_string(r.index) + ")"; + else + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) ep_args += ", raster_order_group(0)"; ep_args += "]]"; @@ -13167,9 +13308,22 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) break; } case SPIRType::AccelerationStructure: - ep_args += ", " + type_to_glsl(type, var_id) + " " + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + { + if (is_runtime_size_array(type)) + { + add_spv_func_and_recompile(SPVFuncImplVariableDescriptor); + const auto &parent_type = get(type.parent_type); + ep_args += ", const device spvDescriptor<" + type_to_glsl(parent_type) + ">* " + + to_restrict(var_id, true) + r.name + "_"; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + } + else + { + ep_args += ", " + type_to_glsl(type, var_id) + " " + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + } break; + } default: if (!ep_args.empty()) ep_args += ", "; @@ -13254,25 +13408,27 @@ void CompilerMSL::fix_up_shader_inputs_outputs() { if (buffer_requires_array_length(var.self)) { - entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { - bool is_array_type = !type.array.empty(); + entry_func.fixup_hooks_in.push_back( + [this, &type, &var, var_id]() + { + bool is_array_type = !type.array.empty() && !is_runtime_size_array(type); - uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); - if (descriptor_set_is_argument_buffer(desc_set)) - { - statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), - is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]), - ".spvBufferSizeConstants", "[", - convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); - } - else - { - // If we have an array of images, we need to be able to index into it, so take a pointer instead. - statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), - is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[", - convert_to_string(get_metal_resource_index(var, type.basetype)), "];"); - } - }); + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + { + statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), + is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]), + ".spvBufferSizeConstants", "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + else + { + // If we have an array of images, we need to be able to index into it, so take a pointer instead. + statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), + is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[", + convert_to_string(get_metal_resource_index(var, type.basetype)), "];"); + } + }); } } }); @@ -13847,8 +14003,6 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) binding_stride *= to_array_size_literal(type, i); - assert(binding_stride != 0); - // If a binding has not been specified, revert to incrementing resource indices. uint32_t resource_index; @@ -13860,6 +14014,11 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base } else { + if (is_runtime_size_array(type)) + { + basetype = SPIRType::Struct; + binding_stride = 1; + } // Allocate from plain bindings which are allocated per resource type. switch (basetype) { @@ -13962,6 +14121,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler; + bool type_is_tlas = type.basetype == SPIRType::AccelerationStructure; // For opaque types we handle const later due to descriptor address spaces. const char *cv_qualifier = (constref && !type_is_image) ? "const " : ""; @@ -14012,6 +14172,18 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) else decl = join(cv_qualifier, type_to_glsl(type, arg.id)); } + else if (is_runtime_size_array(type)) + { + const auto *parent_type = &get(type.parent_type); + auto type_name = type_to_glsl(*parent_type, arg.id); + if (type.basetype == SPIRType::AccelerationStructure) + decl = join("spvDescriptorArray<", type_name, ">"); + else if (type_is_image) + decl = join("spvDescriptorArray<", cv_qualifier, type_name, ">"); + else + decl = join("spvDescriptorArray<", address_space, " ", type_name, "*>"); + address_space = "const"; + } else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type)) { is_using_builtin_array = true; @@ -14033,7 +14205,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += join(" ", cv_qualifier); } else + { decl = join(cv_qualifier, type_to_glsl(type, arg.id)); + } } if (!builtin && !is_pointer && @@ -14100,6 +14274,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += join("[", array_size, "]"); } } + else if (is_runtime_size_array(type)) + { + decl += " " + to_expression(name_id); + } else { auto array_size_decl = type_to_array_glsl(type); @@ -14123,7 +14301,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } } } - else if (!type_is_image && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) + else if (!type_is_image && !type_is_tlas && + (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) { // If this is going to be a reference to a variable pointer, the address space // for the reference has to go before the '&', but after the '*'. @@ -14143,9 +14322,13 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += to_restrict(name_id, true); decl += to_expression(name_id); } - else if (type_is_image) + else if (type_is_image || type_is_tlas) { - if (type.array.empty()) + if (is_runtime_size_array(type)) + { + decl = address_space + " " + decl + " " + to_expression(name_id); + } + else if (type.array.empty()) { // For non-arrayed types we can just pass opaque descriptors by value. // This fixes problems if descriptors are passed by value from argument buffers and plain descriptors @@ -14570,7 +14753,8 @@ string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uin { // Only allow -> dereference for block types. This is so we get expressions like // buffer[i]->first_member.second_member, rather than buffer[i]->first->second. - bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + const bool is_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); bool is_buffer_variable = is_block && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer); @@ -14927,7 +15111,14 @@ std::string CompilerMSL::sampler_type(const SPIRType &type, uint32_t id) array_size = get_resource_array_size(id); if (array_size == 0) - SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL."); + { + add_spv_func_and_recompile(SPVFuncImplVariableDescriptor); + add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray); + auto &parent = get(get_pointee_type(type).parent_type); + if (processing_entry_point) + return join("const device spvDescriptor<", sampler_type(parent, id), ">*"); + return join("const spvDescriptorArray<", sampler_type(parent, id), ">"); + } auto &parent = get(get_pointee_type(type).parent_type); return join("array<", sampler_type(parent, id), ", ", array_size, ">"); @@ -14973,7 +15164,12 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) array_size = get_resource_array_size(id); if (array_size == 0) - SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL."); + { + add_spv_func_and_recompile(SPVFuncImplVariableDescriptor); + add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray); + auto &parent = get(get_pointee_type(type).parent_type); + return join("const device spvDescriptor<", image_type_glsl(parent, id), ">*"); + } auto &parent = get(get_pointee_type(type).parent_type); return join("array<", image_type_glsl(parent, id), ", ", array_size, ">"); @@ -16026,8 +16222,8 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) // Handle HLSL-style 0-based vertex/instance index. builtin_declaration = true; bi_arg += builtin_type_decl(builtin); - bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput); - bi_arg += " [[" + builtin_qualifier(builtin) + "]]"; + bi_arg += string(" ") + builtin_to_glsl(builtin, StorageClassInput); + bi_arg += string(" [[") + builtin_qualifier(builtin) + string("]]"); builtin_declaration = false; return bi_arg; @@ -16474,8 +16670,12 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui case OpArrayLength: { auto *var = compiler.maybe_get_backing_variable(args[2]); - if (var) - compiler.buffers_requiring_array_length.insert(var->self); + if (var != nullptr) + { + auto &type = compiler.get(var->basetype); + if (!is_runtime_size_array(type)) + compiler.buffers_requiring_array_length.insert(var->self); + } break; } diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index 57993978b..26167f673 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -358,6 +358,9 @@ public: // Tier capabilities based on recommendations from Apple engineering. ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; + // Enables specifick argument buffer format with extra information to track SSBO-length + bool runtime_array_rich_descriptor = false; + // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. bool enable_base_index_zero = false; @@ -496,6 +499,12 @@ public: // so it can be enabled only when the bug is present. bool sample_dref_lod_array_as_grad = false; + // MSL doesn't guarantee coherence between writes and subsequent reads of read_write textures. + // This inserts fences before each read of a read_write texture to ensure coherency. + // If you're sure you never rely on this, you can set this to false for a possible performance improvement. + // Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this. + bool readwrite_texture_fences = true; + bool is_ios() const { return platform == iOS; @@ -796,6 +805,9 @@ protected: SPVFuncImplConvertYCbCrBT2020, SPVFuncImplDynamicImageSampler, SPVFuncImplRayQueryIntersectionParams, + SPVFuncImplVariableDescriptor, + SPVFuncImplVariableSizedDescriptor, + SPVFuncImplVariableDescriptorArray, }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too @@ -1178,12 +1190,13 @@ protected: const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; std::unordered_set buffers_requiring_array_length; - SmallVector buffer_arrays_discrete; SmallVector> buffer_aliases_argument; SmallVector buffer_aliases_discrete; std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations std::unordered_set pull_model_inputs; + SmallVector entry_point_bindings; + // Must be ordered since array is in a specific order. std::map> buffers_requiring_dynamic_offset;