From b7fb619125ba3059775fcad8d80fb5428585378e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Sun, 1 Nov 2020 21:41:59 -0800 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 11 +- 3rdparty/spirv-cross/spirv_common.hpp | 42 --- 3rdparty/spirv-cross/spirv_cross_c.cpp | 4 +- 3rdparty/spirv-cross/spirv_cross_c.h | 4 + 3rdparty/spirv-cross/spirv_glsl.cpp | 118 ++++++- 3rdparty/spirv-cross/spirv_glsl.hpp | 5 + 3rdparty/spirv-cross/spirv_msl.cpp | 472 ++++++++++++++++++++----- 3rdparty/spirv-cross/spirv_msl.hpp | 12 +- 8 files changed, 526 insertions(+), 142 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index f0660bea2..e8f14be27 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -565,6 +565,7 @@ struct CLIArguments bool msl_arrayed_subpass_input = false; uint32_t msl_r32ui_linear_texture_alignment = 4; uint32_t msl_r32ui_alignment_constant_id = 65535; + bool msl_texture_1d_as_2d = false; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; bool glsl_force_flattened_io_blocks = false; @@ -728,7 +729,7 @@ static void print_help_msl() "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" "\t\tEmits [[color(N)]] inputs in fragment stage.\n" - "\t\tRequires iOS Metal.\n" + "\t\tRequires an Apple GPU.\n" "\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n" "\t[--msl-discrete-descriptor-set ]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n" "\t\tUseful for implementing push descriptors in emulation layers.\n" @@ -774,7 +775,9 @@ static void print_help_msl() "\t[--msl-r32ui-linear-texture-align ]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n" "\t\tThis is used to align the row stride for atomic accesses to such images.\n" "\t[--msl-r32ui-linear-texture-align-constant-id ]:\n\t\tThe function constant ID to use for the linear texture alignment.\n" - "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n"); + "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n" + "\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n" + "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n"); // clang-format on } @@ -991,9 +994,9 @@ static string compile_iteration(const CLIArguments &args, std::vector if (args.msl_ios) { msl_opts.platform = CompilerMSL::Options::iOS; - msl_opts.ios_use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch; msl_opts.emulate_cube_array = args.msl_emulate_cube_array; } + msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch; msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output; msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left; msl_opts.argument_buffers = args.msl_argument_buffers; @@ -1015,6 +1018,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input; msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment; msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id; + msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); @@ -1439,6 +1443,7 @@ static int main_inner(int argc, char *argv[]) [&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); }); cbs.add("--msl-r32ui-linear-texture-align-constant-id", [&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); }); + cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index 53df37616..c69c052b8 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -357,28 +357,6 @@ public: return TypedID(*this); } - bool operator==(const TypedID &other) const - { - return id == other.id; - } - - bool operator!=(const TypedID &other) const - { - return id != other.id; - } - - template - bool operator==(const TypedID &other) const - { - return id == uint32_t(other); - } - - template - bool operator!=(const TypedID &other) const - { - return id != uint32_t(other); - } - private: uint32_t id = 0; }; @@ -403,26 +381,6 @@ public: return id; } - bool operator==(const TypedID &other) const - { - return id == other.id; - } - - bool operator!=(const TypedID &other) const - { - return id != other.id; - } - - bool operator==(const TypedID &other) const - { - return id == uint32_t(other); - } - - bool operator!=(const TypedID &other) const - { - return id != uint32_t(other); - } - private: uint32_t id = 0; }; diff --git a/3rdparty/spirv-cross/spirv_cross_c.cpp b/3rdparty/spirv-cross/spirv_cross_c.cpp index cbe3a5585..59a9c9186 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.cpp +++ b/3rdparty/spirv-cross/spirv_cross_c.cpp @@ -599,8 +599,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c options->msl.enable_base_index_zero = value != 0; break; - case SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS: - options->msl.ios_use_framebuffer_fetch_subpasses = value != 0; + case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS: + options->msl.use_framebuffer_fetch_subpasses = value != 0; break; case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH: diff --git a/3rdparty/spirv-cross/spirv_cross_c.h b/3rdparty/spirv-cross/spirv_cross_c.h index f52abcda9..d6802a1fd 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.h +++ b/3rdparty/spirv-cross/spirv_cross_c.h @@ -606,7 +606,11 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT, + + /* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */ SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index e5db12f3b..6a7d7b2ea 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -511,6 +511,7 @@ string CompilerGLSL::compile() { // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers backend.nonuniform_qualifier = ""; + backend.needs_row_major_load_workaround = true; } backend.force_gl_in_out_block = true; backend.supports_extensions = true; @@ -3798,6 +3799,17 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) statement(""); } } + + if (!workaround_ubo_load_overload_types.empty()) + { + for (auto &type_id : workaround_ubo_load_overload_types) + { + auto &type = get(type_id); + statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type), + " wrap) { return wrap; }"); + } + statement(""); + } } // Returns a string representation of the ID, usable as a function arg. @@ -9496,11 +9508,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (forward && ptr_expression) ptr_expression->need_transpose = old_need_transpose; + bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0; + + if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened) + rewrite_load_for_wrapped_row_major(expr, result_type, ptr); + // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. // However, if we try to load a complex, composite object from a flattened buffer, // we should avoid emitting the same code over and over and lower the result to a temporary. - bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 && - (type.basetype == SPIRType::Struct || (type.columns > 1)); + bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); SPIRExpression *e = nullptr; if (!forward && expression_is_non_value_type_array(ptr)) @@ -13253,8 +13269,14 @@ void CompilerGLSL::branch(BlockID from, BlockID to) // and end the chain here. statement("continue;"); } - else if (is_break(to)) + else if (from != to && is_break(to)) { + // We cannot break to ourselves, so check explicitly for from != to. + // This case can trigger if a loop header is all three of these things: + // - Continue block + // - Loop header + // - Break merge target all at once ... + // Very dirty workaround. // Switch constructs are able to break, but they cannot break out of a loop at the same time. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, @@ -14578,7 +14600,35 @@ void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::str // so we might have to fixup the OpLoad-ed expression late. auto start_array_index = expr.find_first_of('['); - auto end_array_index = expr.find_last_of(']'); + + if (start_array_index == string::npos) + return; + + // Check for the edge case that a non-arrayed resource was marked to be nonuniform, + // and the bracket we found is actually part of non-resource related data. + if (expr.find_first_of(',') < start_array_index) + return; + + // We've opened a bracket, track expressions until we can close the bracket. + // This must be our image index. + size_t end_array_index = string::npos; + unsigned bracket_count = 1; + for (size_t index = start_array_index + 1; index < expr.size(); index++) + { + if (expr[index] == ']') + { + if (--bracket_count == 0) + { + end_array_index = index; + break; + } + } + else if (expr[index] == '[') + bracket_count++; + } + + assert(bracket_count == 0); + // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's // nothing we can do here to express that. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) @@ -15087,3 +15137,63 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() weights[KHR_shader_subgroup_basic] = big_num; weights[KHR_shader_subgroup_vote] = big_num; } + +void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) +{ + // Must be ordered to maintain deterministic output, so vector is appropriate. + if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) == + end(workaround_ubo_load_overload_types)) + { + force_recompile(); + workaround_ubo_load_overload_types.push_back(id); + } +} + +void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) +{ + // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. + // To load these types correctly, we must first wrap them in a dummy function which only purpose is to + // ensure row_major decoration is actually respected. + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return; + + auto &backing_type = get(var->basetype); + bool is_ubo = backing_type.basetype == SPIRType::Struct && + backing_type.storage == StorageClassUniform && + has_decoration(backing_type.self, DecorationBlock); + if (!is_ubo) + return; + + auto *type = &get(loaded_type); + bool rewrite = false; + + if (is_matrix(*type)) + { + // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, + // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. + // If there is any row-major action going on, we apply the workaround. + // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. + // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. + type = &backing_type; + } + + if (type->basetype == SPIRType::Struct) + { + // If we're loading a struct where any member is a row-major matrix, apply the workaround. + for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) + { + if (combined_decoration_for_member(*type, i).get(DecorationRowMajor)) + { + rewrite = true; + break; + } + } + } + + if (rewrite) + { + request_workaround_wrapper_overload(loaded_type); + expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")"); + } +} diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index d81254561..d52f786e8 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -560,6 +560,7 @@ protected: bool support_small_type_sampling_result = false; bool support_case_fallthrough = true; bool use_array_constructor = false; + bool needs_row_major_load_workaround = false; } backend; void emit_struct(SPIRType &type); @@ -784,6 +785,10 @@ protected: // Currently used by NMin/Max/Clamp implementations. std::unordered_map extra_sub_expressions; + SmallVector workaround_ubo_load_overload_types; + void request_workaround_wrapper_overload(TypeID id); + void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr); + uint32_t statement_count = 0; inline bool is_legacy() const diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 629b873f6..e82637fc4 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -160,7 +160,7 @@ void CompilerMSL::build_implicit_builtins() bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff; if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || - needs_subgroup_invocation_id || need_sample_mask) + needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask) { bool has_frag_coord = false; bool has_sample_id = false; @@ -197,7 +197,7 @@ void CompilerMSL::build_implicit_builtins() if (var.storage != StorageClassInput) return; - if (need_subpass_input && (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses)) + if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses)) { switch (builtin) { @@ -287,7 +287,7 @@ void CompilerMSL::build_implicit_builtins() has_subgroup_invocation_id = true; } - if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize) + if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize) { builtin_subgroup_size_id = var.self; mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self); @@ -331,7 +331,7 @@ void CompilerMSL::build_implicit_builtins() // Use Metal's native frame-buffer fetch API for subpass inputs. if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) || (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) && - (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses) && need_subpass_input) + (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input) { if (!has_frag_coord) { @@ -593,7 +593,7 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id); } - if (!has_subgroup_size && need_subgroup_ge_mask) + if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size)) { uint32_t offset = ir.increase_bound_by(2); uint32_t type_ptr_id = offset; @@ -1265,7 +1265,8 @@ void CompilerMSL::preprocess_op_codes() add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\""); } - // Metal vertex functions that write to resources must disable rasterization and return void. + // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to + // resources must disable rasterization and return void. if (preproc.uses_resource_write) is_rasterization_disabled = true; @@ -1280,6 +1281,8 @@ void CompilerMSL::preprocess_op_codes() if (preproc.needs_subgroup_invocation_id) needs_subgroup_invocation_id = true; + if (preproc.needs_subgroup_size) + needs_subgroup_size = true; } // Move the Private and Workgroup global variables to the entry function. @@ -1372,7 +1375,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // Use Metal's native frame-buffer fetch API for subpass inputs. auto &type = get(ops[0]); if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && - (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses)) + (!msl_options.use_framebuffer_fetch_subpasses)) { // Implicitly reads gl_FragCoord. assert(builtin_frag_coord_id != 0); @@ -4608,6 +4611,59 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplSubgroupBroadcast: + // Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting + // them as integers. + statement("template"); + statement("inline T spvSubgroupBroadcast(T value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return quad_broadcast(value, lane);"); + else + statement("return simd_broadcast(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return !!quad_broadcast((ushort)value, lane);"); + else + statement("return !!simd_broadcast((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupBroadcast(vec value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return (vec)quad_broadcast((vec)value, lane);"); + else + statement("return (vec)simd_broadcast((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBroadcastFirst: + statement("template"); + statement("inline T spvSubgroupBroadcastFirst(T value)"); + begin_scope(); + statement("return simd_broadcast_first(value);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupBroadcastFirst(bool value)"); + begin_scope(); + statement("return !!simd_broadcast_first((ushort)value);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupBroadcastFirst(vec value)"); + begin_scope(); + statement("return (vec)simd_broadcast_first((vec)value);"); + end_scope(); + statement(""); + break; + case SPVFuncImplSubgroupBallot: statement("inline uint4 spvSubgroupBallot(bool value)"); begin_scope(); @@ -4631,8 +4687,11 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplSubgroupBallotFindLSB: - statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)"); + statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + statement("ballot &= mask;"); statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + " "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);"); end_scope(); @@ -4640,8 +4699,11 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplSubgroupBallotFindMSB: - statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)"); + statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + statement("ballot &= mask;"); statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - " "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), " "ballot.z == 0), ballot.w == 0);"); @@ -4650,24 +4712,31 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplSubgroupBallotBitCount: - statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)"); + statement("inline uint spvPopCount4(uint4 ballot)"); begin_scope(); statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);"); end_scope(); statement(""); + statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)"); + begin_scope(); + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + statement("return spvPopCount4(ballot & mask);"); + end_scope(); + statement(""); statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); begin_scope(); statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " "uint2(0));"); - statement("return spvSubgroupBallotBitCount(ballot & mask);"); + statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); begin_scope(); statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); - statement("return spvSubgroupBallotBitCount(ballot & mask);"); + statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); break; @@ -4680,7 +4749,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline bool spvSubgroupAllEqual(T value)"); begin_scope(); - statement("return simd_all(value == simd_broadcast_first(value));"); + statement("return simd_all(all(value == simd_broadcast_first(value)));"); end_scope(); statement(""); statement("template<>"); @@ -4689,6 +4758,184 @@ void CompilerMSL::emit_custom_functions() statement("return simd_all(value) || !simd_any(value);"); end_scope(); statement(""); + statement("template"); + statement("inline bool spvSubgroupAllEqual(vec value)"); + begin_scope(); + statement("return simd_all(all(value == (vec)simd_broadcast_first((vec)value)));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffle: + statement("template"); + statement("inline T spvSubgroupShuffle(T value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return quad_shuffle(value, lane);"); + else + statement("return simd_shuffle(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffle(bool value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return !!quad_shuffle((ushort)value, lane);"); + else + statement("return !!simd_shuffle((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffle(vec value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return (vec)quad_shuffle((vec)value, lane);"); + else + statement("return (vec)simd_shuffle((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleXor: + statement("template"); + statement("inline T spvSubgroupShuffleXor(T value, ushort mask)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return quad_shuffle_xor(value, mask);"); + else + statement("return simd_shuffle_xor(value, mask);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return !!quad_shuffle_xor((ushort)value, mask);"); + else + statement("return !!simd_shuffle_xor((ushort)value, mask);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleXor(vec value, ushort mask)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return (vec)quad_shuffle_xor((vec)value, mask);"); + else + statement("return (vec)simd_shuffle_xor((vec)value, mask);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleUp: + statement("template"); + statement("inline T spvSubgroupShuffleUp(T value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return quad_shuffle_up(value, delta);"); + else + statement("return simd_shuffle_up(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return !!quad_shuffle_up((ushort)value, delta);"); + else + statement("return !!simd_shuffle_up((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleUp(vec value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return (vec)quad_shuffle_up((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_up((vec)value, delta);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleDown: + statement("template"); + statement("inline T spvSubgroupShuffleDown(T value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return quad_shuffle_down(value, delta);"); + else + statement("return simd_shuffle_down(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return !!quad_shuffle_down((ushort)value, delta);"); + else + statement("return !!simd_shuffle_down((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleDown(vec value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios()) + statement("return (vec)quad_shuffle_down((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_down((vec)value, delta);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuadBroadcast: + statement("template"); + statement("inline T spvQuadBroadcast(T value, uint lane)"); + begin_scope(); + statement("return quad_broadcast(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvQuadBroadcast(bool value, uint lane)"); + begin_scope(); + statement("return !!quad_broadcast((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvQuadBroadcast(vec value, uint lane)"); + begin_scope(); + statement("return (vec)quad_broadcast((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuadSwap: + // We can implement this easily based on the following table giving + // the target lane ID from the direction and current lane ID: + // Direction + // | 0 | 1 | 2 | + // ---+---+---+---+ + // L 0 | 1 2 3 + // a 1 | 0 3 2 + // n 2 | 3 0 1 + // e 3 | 2 1 0 + // Notice that target = source ^ (direction + 1). + statement("template"); + statement("inline T spvQuadSwap(T value, uint dir)"); + begin_scope(); + statement("return quad_shuffle_xor(value, dir + 1);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvQuadSwap(bool value, uint dir)"); + begin_scope(); + statement("return !!quad_shuffle_xor((ushort)value, dir + 1);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvQuadSwap(vec value, uint dir)"); + begin_scope(); + statement("return (vec)quad_shuffle_xor((vec)value, dir + 1);"); + end_scope(); + statement(""); break; case SPVFuncImplReflectScalar: @@ -7168,7 +7415,7 @@ void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse) if (sparse) SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL."); - if (msl_options.is_ios() && msl_options.ios_use_framebuffer_fetch_subpasses) + if (msl_options.use_framebuffer_fetch_subpasses) { auto *ops = stream(i); @@ -8265,25 +8512,26 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool break; } - if (args.base.is_fetch && args.offset) + if (args.base.is_fetch && (args.offset || args.coffset)) { + uint32_t offset_expr = args.offset ? args.offset : args.coffset; // Fetch offsets must be applied directly to the coordinate. - forward = forward && should_forward(args.offset); - auto &type = expression_type(args.offset); - if (type.basetype != SPIRType::UInt) - tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset); + forward = forward && should_forward(offset_expr); + auto &type = expression_type(offset_expr); + if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D) + { + if (type.basetype != SPIRType::UInt) + tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, offset_expr), ", 0)"); + else + tex_coords += join(" + uint2(", to_enclosed_expression(offset_expr), ", 0)"); + } else - tex_coords += " + " + to_enclosed_expression(args.offset); - } - else if (args.base.is_fetch && args.coffset) - { - // Fetch offsets must be applied directly to the coordinate. - forward = forward && should_forward(args.coffset); - auto &type = expression_type(args.coffset); - if (type.basetype != SPIRType::UInt) - tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.coffset); - else - tex_coords += " + " + to_enclosed_expression(args.coffset); + { + if (type.basetype != SPIRType::UInt) + tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset_expr); + else + tex_coords += " + " + to_enclosed_expression(offset_expr); + } } // If projection, use alt coord as divisor @@ -8454,6 +8702,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool string grad_opt; switch (imgtype.image.dim) { + case Dim1D: case Dim2D: grad_opt = "2d"; break; @@ -8489,30 +8738,42 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool // Add offsets string offset_expr; + const SPIRType *offset_type = nullptr; if (args.coffset && !args.base.is_fetch) { forward = forward && should_forward(args.coffset); offset_expr = to_expression(args.coffset); + offset_type = &expression_type(args.coffset); } else if (args.offset && !args.base.is_fetch) { forward = forward && should_forward(args.offset); offset_expr = to_expression(args.offset); + offset_type = &expression_type(args.offset); } if (!offset_expr.empty()) { switch (imgtype.image.dim) { + case Dim1D: + if (!msl_options.texture_1D_as_2D) + break; + if (offset_type->vecsize > 1) + offset_expr = enclose_expression(offset_expr) + ".x"; + + farg_str += join(", int2(", offset_expr, ", 0)"); + break; + case Dim2D: - if (coord_type.vecsize > 2) + if (offset_type->vecsize > 2) offset_expr = enclose_expression(offset_expr) + ".xy"; farg_str += ", " + offset_expr; break; case Dim3D: - if (coord_type.vecsize > 3) + if (offset_type->vecsize > 3) offset_expr = enclose_expression(offset_expr) + ".xyz"; farg_str += ", " + offset_expr; @@ -8532,7 +8793,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler) { forward = forward && should_forward(args.component); - farg_str += ", " + to_component_argument(args.component); + + if (const auto *var = maybe_get_backing_variable(img)) + if (!image_is_comparison(get(var->basetype), var->self)) + farg_str += ", " + to_component_argument(args.component); } } @@ -8962,9 +9226,9 @@ string CompilerMSL::to_swizzle_expression(uint32_t id) auto index = expr.find_first_of('['); // If an image is part of an argument buffer translate this to a legal identifier. - for (auto &c : expr) - if (c == '.') - c = '_'; + string::size_type period = 0; + while ((period = expr.find_first_of('.', period)) != string::npos && period < index) + expr[period] = '_'; if (index == string::npos) return expr + swizzle_name_suffix; @@ -9828,9 +10092,9 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage)) { if (!msl_options.supports_msl_version(2)) - SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0."); - if (!msl_options.is_ios()) - SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS."); + SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0."); + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3."); ep_args += ", post_depth_coverage"; } ep_args += "]]"; @@ -10207,6 +10471,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) } else { + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3."); ep_args += image_type_glsl(type, var_id) + " " + r.name; ep_args += " [[color(" + convert_to_string(r.index) + ")]]"; } @@ -10449,7 +10715,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); entry_func.fixup_hooks_in.push_back([=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", - to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (", + to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (", to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); }); @@ -10461,25 +10727,25 @@ void CompilerMSL::fix_up_shader_inputs_outputs() SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); entry_func.fixup_hooks_in.push_back([=]() { // Case where index < 32, size < 32: - // mask0 = bfe(0xFFFFFFFF, index, size - index); - // mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0 + // mask0 = bfi(0, 0xFFFFFFFF, index, size - index); + // mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0 // Case where index < 32 but size >= 32: - // mask0 = bfe(0xFFFFFFFF, index, 32 - index); - // mask1 = bfe(0xFFFFFFFF, 0, size - 32); + // mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index); + // mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32); // Case where index >= 32: - // mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0 - // mask1 = bfe(0xFFFFFFFF, index - 32, size - index); + // mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0 + // mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index); // This is expressed without branches to avoid divergent // control flow--hence the complicated min/max expressions. // This is further complicated by the fact that if you attempt - // to bfe out-of-bounds on Metal, undefined behavior is the + // to bfi/bfe out-of-bounds on Metal, undefined behavior is the // result. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, min(", + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", to_expression(builtin_subgroup_size_id), ", 32) - (int)", to_expression(builtin_subgroup_invocation_id_id), - ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)", + ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", to_expression(builtin_subgroup_size_id), " - (int)max(", to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); @@ -10494,11 +10760,11 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // The same logic applies here, except now the index is one // more than the subgroup invocation ID. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, min(", + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", to_expression(builtin_subgroup_size_id), ", 32) - (int)", to_expression(builtin_subgroup_invocation_id_id), - " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)", + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", to_expression(builtin_subgroup_size_id), " - (int)max(", to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); @@ -10834,8 +11100,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const { - return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && msl_options.is_ios() && - msl_options.ios_use_framebuffer_fetch_subpasses; + return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + msl_options.use_framebuffer_fetch_subpasses; } string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) @@ -11062,6 +11328,11 @@ void CompilerMSL::replace_illegal_names() "fragment", "compute", "bias", + "level", + "gradient2d", + "gradientcube", + "gradient3d", + "min_lod_clamp", "assert", "VARIABLE_TRACEPOINT", "STATIC_DATA_TRACEPOINT", @@ -11850,12 +12121,11 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) break; case OpGroupNonUniformBroadcast: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast"); break; case OpGroupNonUniformBroadcastFirst: - emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first"); + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst"); break; case OpGroupNonUniformBallot: @@ -11871,46 +12141,50 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) break; case OpGroupNonUniformBallotFindLSB: - emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB"); + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB"); break; case OpGroupNonUniformBallotFindMSB: - emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB"); + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB"); break; case OpGroupNonUniformBallotBitCount: { auto operation = static_cast(ops[3]); - if (operation == GroupOperationReduce) - emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount"); - else if (operation == GroupOperationInclusiveScan) + switch (operation) + { + case GroupOperationReduce: + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount"); + break; + case GroupOperationInclusiveScan: emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, "spvSubgroupBallotInclusiveBitCount"); - else if (operation == GroupOperationExclusiveScan) + break; + case GroupOperationExclusiveScan: emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, "spvSubgroupBallotExclusiveBitCount"); - else + break; + default: SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } break; } case OpGroupNonUniformShuffle: - emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle"); break; case OpGroupNonUniformShuffleXor: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor"); break; case OpGroupNonUniformShuffleUp: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp"); break; case OpGroupNonUniformShuffleDown: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown"); break; case OpGroupNonUniformAll: @@ -12018,26 +12292,11 @@ case OpGroupNonUniform##op: \ #undef MSL_GROUP_OP_CAST case OpGroupNonUniformQuadSwap: - { - // We can implement this easily based on the following table giving - // the target lane ID from the direction and current lane ID: - // Direction - // | 0 | 1 | 2 | - // ---+---+---+---+ - // L 0 | 1 2 3 - // a 1 | 0 3 2 - // n 2 | 3 0 1 - // e 3 | 2 1 0 - // Notice that target = source ^ (direction + 1). - uint32_t mask = evaluate_constant_u32(ops[4]) + 1; - uint32_t mask_id = ir.increase_bound_by(1); - set(mask_id, expression_type_id(ops[4]), mask, false); - emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap"); break; - } case OpGroupNonUniformQuadBroadcast: - emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast"); break; default: @@ -12930,7 +13189,8 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui } case OpImageWrite: - uses_resource_write = true; + if (!compiler.msl_options.supports_msl_version(2, 2)) + uses_resource_write = true; break; case OpStore: @@ -12990,8 +13250,15 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui needs_subgroup_invocation_id = true; break; + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + needs_subgroup_size = true; + break; + case OpGroupNonUniformBallotBitCount: - if (args[3] != GroupOperationReduce) + if (args[3] == GroupOperationReduce) + needs_subgroup_size = true; + else needs_subgroup_invocation_id = true; break; @@ -13035,7 +13302,8 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id) { auto *p_var = compiler.maybe_get_backing_variable(var_id); StorageClass sc = p_var ? p_var->storage : StorageClassMax; - if (sc == StorageClassUniform || sc == StorageClassStorageBuffer) + if (!compiler.msl_options.supports_msl_version(2, 1) && + (sc == StorageClassUniform || sc == StorageClassStorageBuffer)) uses_resource_write = true; } @@ -13174,6 +13442,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o break; } + case OpGroupNonUniformBroadcast: + return SPVFuncImplSubgroupBroadcast; + + case OpGroupNonUniformBroadcastFirst: + return SPVFuncImplSubgroupBroadcastFirst; + case OpGroupNonUniformBallot: return SPVFuncImplSubgroupBallot; @@ -13193,6 +13467,24 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpGroupNonUniformAllEqual: return SPVFuncImplSubgroupAllEqual; + case OpGroupNonUniformShuffle: + return SPVFuncImplSubgroupShuffle; + + case OpGroupNonUniformShuffleXor: + return SPVFuncImplSubgroupShuffleXor; + + case OpGroupNonUniformShuffleUp: + return SPVFuncImplSubgroupShuffleUp; + + case OpGroupNonUniformShuffleDown: + return SPVFuncImplSubgroupShuffleDown; + + case OpGroupNonUniformQuadBroadcast: + return SPVFuncImplQuadBroadcast; + + case OpGroupNonUniformQuadSwap: + return SPVFuncImplQuadSwap; + default: break; } diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index d8b801e71..13271f62a 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -315,7 +315,7 @@ public: bool ios_support_base_vertex_instance = false; // Use Metal's native frame-buffer fetch API for subpass inputs. - bool ios_use_framebuffer_fetch_subpasses = false; + bool use_framebuffer_fetch_subpasses = false; // Enables use of "fma" intrinsic for invariant float math bool invariant_float_math = false; @@ -600,12 +600,20 @@ protected: SPVFuncImplTextureSwizzle, SPVFuncImplGatherSwizzle, SPVFuncImplGatherCompareSwizzle, + SPVFuncImplSubgroupBroadcast, + SPVFuncImplSubgroupBroadcastFirst, SPVFuncImplSubgroupBallot, SPVFuncImplSubgroupBallotBitExtract, SPVFuncImplSubgroupBallotFindLSB, SPVFuncImplSubgroupBallotFindMSB, SPVFuncImplSubgroupBallotBitCount, SPVFuncImplSubgroupAllEqual, + SPVFuncImplSubgroupShuffle, + SPVFuncImplSubgroupShuffleXor, + SPVFuncImplSubgroupShuffleUp, + SPVFuncImplSubgroupShuffleDown, + SPVFuncImplQuadBroadcast, + SPVFuncImplQuadSwap, SPVFuncImplReflectScalar, SPVFuncImplRefractScalar, SPVFuncImplFaceForwardScalar, @@ -913,6 +921,7 @@ protected: bool used_swizzle_buffer = false; bool added_builtin_tess_level = false; bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; @@ -984,6 +993,7 @@ protected: bool uses_atomics = false; bool uses_resource_write = false; bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; }; // OpcodeHandler that scans for uses of sampled images