diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index edc98f81e..49cc83868 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -5445,18 +5445,55 @@ void Compiler::analyze_interlocked_resource_usage() } bool Compiler::type_is_array_of_pointers(const SPIRType &type) const +{ + if (!type_is_top_level_array(type)) + return false; + + // BDA types must have parent type hierarchy. + if (!type.parent_type) + return false; + + // Punch through all array layers. + auto *parent = &get(type.parent_type); + while (type_is_top_level_array(*parent)) + parent = &get(parent->parent_type); + + return type_is_top_level_pointer(*parent); +} + +bool Compiler::type_is_top_level_pointer(const SPIRType &type) const { if (!type.pointer) return false; - // If parent type has same pointer depth, we must have an array of pointers. - return type.pointer_depth == get(type.parent_type).pointer_depth; + // Function pointers, should not be hit by valid SPIR-V. + // Parent type will be SPIRFunction instead. + if (type.basetype == SPIRType::Unknown) + return false; + + // Some types are synthesized in-place without complete type hierarchy and might not have parent types, + // but these types are never array-of-pointer or any complicated BDA type, infer reasonable defaults. + if (type.parent_type) + return type.pointer_depth > get(type.parent_type).pointer_depth; + else + return true; } bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const { - return type.pointer && type.storage == StorageClassPhysicalStorageBuffer && - type.pointer_depth > get(type.parent_type).pointer_depth; + return type_is_top_level_pointer(type) && type.storage == StorageClassPhysicalStorageBuffer; +} + +bool Compiler::type_is_top_level_array(const SPIRType &type) const +{ + if (type.array.empty()) + return false; + + // If we have pointer and array, we infer pointer-to-array as it's the only meaningful thing outside BDA. + if (type.parent_type) + return type.array.size() > get(type.parent_type).array.size(); + else + return !type.pointer; } bool Compiler::flush_phi_required(BlockID from, BlockID to) const diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index ea98ee60d..b99b7ae7a 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -1145,6 +1145,8 @@ protected: bool type_is_array_of_pointers(const SPIRType &type) const; bool type_is_top_level_physical_pointer(const SPIRType &type) const; + bool type_is_top_level_pointer(const SPIRType &type) const; + bool type_is_top_level_array(const SPIRType &type) const; bool type_is_block_like(const SPIRType &type) const; bool type_is_opaque_value(const SPIRType &type) const; diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index f4ceab6ae..e03111673 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #ifndef _WIN32 #include @@ -1521,7 +1522,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits { // If using PhysicalStorageBufferEXT storage class, this is a pointer, // and is 64-bit. - if (type.storage == StorageClassPhysicalStorageBufferEXT) + if (type_is_top_level_physical_pointer(type)) { if (!type.pointer) SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); @@ -1536,8 +1537,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits else SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); } - - if (!type.array.empty()) + else if (type_is_top_level_array(type)) { uint32_t minimum_alignment = 1; if (packing_is_vec4_padded(packing)) @@ -1643,7 +1643,19 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { - if (!type.array.empty()) + // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // and is 64-bit. + if (type_is_top_level_physical_pointer(type)) + { + if (!type.pointer) + SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + return 8; + else + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + } + else if (type_is_top_level_array(type)) { uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); @@ -1655,19 +1667,6 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f return packed_size; } - // If using PhysicalStorageBufferEXT storage class, this is a pointer, - // and is 64-bit. - if (type.storage == StorageClassPhysicalStorageBufferEXT) - { - if (!type.pointer) - SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); - - if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) - return 8; - else - SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); - } - uint32_t size = 0; if (type.basetype == SPIRType::Struct) @@ -1834,8 +1833,9 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin } // Verify array stride rules. - if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != - type_struct_member_array_stride(type, i)) + if (type_is_top_level_array(memb_type) && + type_to_packed_array_stride(memb_type, member_flags, packing) != + type_struct_member_array_stride(type, i)) { if (failed_validation_index) *failed_validation_index = i; @@ -3993,6 +3993,169 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) } } +void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op) +{ + std::string result; + switch (group_op) + { + case GroupOperationReduce: + result = "reduction"; + break; + + case GroupOperationExclusiveScan: + result = "excl_scan"; + break; + + case GroupOperationInclusiveScan: + result = "incl_scan"; + break; + + default: + SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); + } + + struct TypeInfo + { + std::string type; + std::string identity; + }; + + std::vector type_infos; + switch (op) + { + case OpGroupNonUniformIAdd: + { + type_infos.emplace_back(TypeInfo{ "uint", "0u" }); + type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" }); + type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" }); + type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" }); + type_infos.emplace_back(TypeInfo{ "int", "0" }); + type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" }); + type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" }); + type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" }); + break; + } + + case OpGroupNonUniformFAdd: + { + type_infos.emplace_back(TypeInfo{ "float", "0.0f" }); + type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" }); + // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle + type_infos.emplace_back(TypeInfo{ "double", "0.0LF" }); + type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" }); + break; + } + + case OpGroupNonUniformIMul: + { + type_infos.emplace_back(TypeInfo{ "uint", "1u" }); + type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" }); + type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" }); + type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" }); + type_infos.emplace_back(TypeInfo{ "int", "1" }); + type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" }); + type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" }); + type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" }); + break; + } + + case OpGroupNonUniformFMul: + { + type_infos.emplace_back(TypeInfo{ "float", "1.0f" }); + type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" }); + type_infos.emplace_back(TypeInfo{ "double", "0.0LF" }); + type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" }); + break; + } + + default: + SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); + } + + const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd; + const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul; + std::string op_symbol; + if (op_is_addition) + { + op_symbol = "+="; + } + else if (op_is_multiplication) + { + op_symbol = "*="; + } + + for (const TypeInfo &t : type_infos) + { + statement(t.type, " ", func, "(", t.type, " v)"); + begin_scope(); + statement(t.type, " ", result, " = ", t.identity, ";"); + statement("uvec4 active_threads = subgroupBallot(true);"); + statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)"); + begin_scope(); + statement("uint total = gl_SubgroupSize / 2u;"); + statement(result, " = v;"); + statement("for (uint i = 1u; i <= total; i <<= 1u)"); + begin_scope(); + statement("bool valid;"); + if (group_op == GroupOperationReduce) + { + statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);"); + } + else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) + { + statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);"); + } + if (op_is_addition || op_is_multiplication) + { + statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";"); + } + end_scope(); + if (group_op == GroupOperationExclusiveScan) + { + statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);"); + statement("if (subgroupElect())"); + begin_scope(); + statement(result, " = ", t.identity, ";"); + end_scope(); + } + end_scope(); + statement("else"); + begin_scope(); + if (group_op == GroupOperationExclusiveScan) + { + statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);"); + } + else if (group_op == GroupOperationInclusiveScan) + { + statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);"); + } + statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)"); + begin_scope(); + statement("bool valid = subgroupBallotBitExtract(active_threads, i);"); + statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);"); + if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) + { + statement("valid = valid && (i < total);"); + } + if (op_is_addition || op_is_multiplication) + { + statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";"); + } + end_scope(); + end_scope(); + statement("return ", result, ";"); + end_scope(); + } +} + void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) { static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", @@ -4396,6 +4559,57 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) statement("#endif"); statement(""); } + + auto arithmetic_feature_helper = + [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op) + { + if (shader_subgroup_supporter.is_feature_requested(feat)) + { + auto exts = Supp::get_candidates_for_feature(feat, result); + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_shuffle: + emit_subgroup_arithmetic_workaround(func_name, op, group_op); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + }; + + arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd", + OpGroupNonUniformIAdd, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd", + OpGroupNonUniformIAdd, GroupOperationInclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd", + OpGroupNonUniformFAdd, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd", + OpGroupNonUniformFAdd, GroupOperationInclusiveScan); + + arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul", + OpGroupNonUniformIMul, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul", + OpGroupNonUniformIMul, GroupOperationInclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul", + OpGroupNonUniformFMul, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul", + OpGroupNonUniformFMul, GroupOperationInclusiveScan); } if (!workaround_ubo_load_overload_types.empty()) @@ -7109,7 +7323,7 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s } } -bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op) +bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops) { switch (op) { @@ -7128,6 +7342,22 @@ bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op) case OpGroupNonUniformBallotBitExtract: case OpGroupNonUniformInverseBallot: return true; + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMul: + { + const GroupOperation operation = static_cast(ops[3]); + if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan || + operation == GroupOperationExclusiveScan) + { + return true; + } + else + { + return false; + } + } default: return false; } @@ -7522,7 +7752,7 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) { auto &type = expression_type(ptr); - if (type.array.empty()) + if (!type_is_top_level_array(get_pointee_type(type))) return false; if (!backend.array_is_value_type) @@ -8725,7 +8955,7 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) const uint32_t *ops = stream(i); auto op = static_cast(i.op); - if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op)) + if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops)) SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); // If we need to do implicit bitcasts, make sure we do it with the correct type. @@ -8793,12 +9023,34 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) } break; - case OpGroupNonUniformFAdd: - case OpGroupNonUniformFMul: + // clang-format off +#define GLSL_GROUP_OP(OP)\ + case OpGroupNonUniform##OP:\ + {\ + auto operation = static_cast(ops[3]);\ + if (operation == GroupOperationClusteredReduce)\ + require_extension_internal("GL_KHR_shader_subgroup_clustered");\ + else if (operation == GroupOperationReduce)\ + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\ + else if (operation == GroupOperationExclusiveScan)\ + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\ + else if (operation == GroupOperationInclusiveScan)\ + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\ + else\ + SPIRV_CROSS_THROW("Invalid group operation.");\ + break;\ + } + + GLSL_GROUP_OP(IAdd) + GLSL_GROUP_OP(FAdd) + GLSL_GROUP_OP(IMul) + GLSL_GROUP_OP(FMul) + +#undef GLSL_GROUP_OP + // clang-format on + case OpGroupNonUniformFMin: case OpGroupNonUniformFMax: - case OpGroupNonUniformIAdd: - case OpGroupNonUniformIMul: case OpGroupNonUniformSMin: case OpGroupNonUniformSMax: case OpGroupNonUniformUMin: @@ -17667,6 +17919,7 @@ const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candid static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", "GL_KHR_shader_subgroup_basic", "GL_KHR_shader_subgroup_vote", + "GL_KHR_shader_subgroup_arithmetic", "GL_NV_gpu_shader_5", "GL_NV_shader_thread_group", "GL_NV_shader_thread_shuffle", @@ -17715,6 +17968,21 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSub return { SubgroupMask }; case SubgroupBallotBitCount: return { SubgroupBallot }; + case SubgroupArithmeticIAddReduce: + case SubgroupArithmeticIAddInclusiveScan: + case SubgroupArithmeticFAddReduce: + case SubgroupArithmeticFAddInclusiveScan: + case SubgroupArithmeticIMulReduce: + case SubgroupArithmeticIMulInclusiveScan: + case SubgroupArithmeticFMulReduce: + case SubgroupArithmeticFMulInclusiveScan: + return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract }; + case SubgroupArithmeticIAddExclusiveScan: + case SubgroupArithmeticFAddExclusiveScan: + case SubgroupArithmeticIMulExclusiveScan: + case SubgroupArithmeticFMulExclusiveScan: + return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, + SubgroupMask, SubgroupElect, SubgroupBallotBitExtract }; default: return {}; } @@ -17728,11 +17996,15 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgr bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) { - static const bool retval[FeatureCount] = { false, false, false, false, false, false, - true, // SubgroupBalloFindLSB_MSB - false, false, false, false, - true, // SubgroupMemBarrier - replaced with workgroup memory barriers - false, false, true, false }; + static const bool retval[FeatureCount] = { + false, false, false, false, false, false, + true, // SubgroupBalloFindLSB_MSB + false, false, false, false, + true, // SubgroupMemBarrier - replaced with workgroup memory barriers + false, false, true, false, + false, false, false, false, false, false, // iadd, fadd + false, false, false, false, false, false, // imul , fmul + }; return retval[feature]; } @@ -17744,7 +18016,11 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgrou KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, - KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot + KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, }; return extensions[feature]; @@ -17840,6 +18116,19 @@ CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderS return { NV_shader_thread_group }; case SubgroupBallotBitCount: return {}; + case SubgroupArithmeticIAddReduce: + case SubgroupArithmeticIAddExclusiveScan: + case SubgroupArithmeticIAddInclusiveScan: + case SubgroupArithmeticFAddReduce: + case SubgroupArithmeticFAddExclusiveScan: + case SubgroupArithmeticFAddInclusiveScan: + case SubgroupArithmeticIMulReduce: + case SubgroupArithmeticIMulExclusiveScan: + case SubgroupArithmeticIMulInclusiveScan: + case SubgroupArithmeticFMulReduce: + case SubgroupArithmeticFMulExclusiveScan: + case SubgroupArithmeticFMulInclusiveScan: + return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle }; default: return {}; } @@ -17864,6 +18153,7 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() weights[KHR_shader_subgroup_ballot] = big_num; weights[KHR_shader_subgroup_basic] = big_num; weights[KHR_shader_subgroup_vote] = big_num; + weights[KHR_shader_subgroup_arithmetic] = big_num; } void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 0b40ed801..d6e2477e0 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -292,6 +292,7 @@ protected: KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_vote, + KHR_shader_subgroup_arithmetic, NV_gpu_shader_5, NV_shader_thread_group, NV_shader_thread_shuffle, @@ -324,7 +325,18 @@ protected: SubgroupInverseBallot_InclBitCount_ExclBitCout = 13, SubgroupBallotBitExtract = 14, SubgroupBallotBitCount = 15, - + SubgroupArithmeticIAddReduce = 16, + SubgroupArithmeticIAddExclusiveScan = 17, + SubgroupArithmeticIAddInclusiveScan = 18, + SubgroupArithmeticFAddReduce = 19, + SubgroupArithmeticFAddExclusiveScan = 20, + SubgroupArithmeticFAddInclusiveScan = 21, + SubgroupArithmeticIMulReduce = 22, + SubgroupArithmeticIMulExclusiveScan = 23, + SubgroupArithmeticIMulInclusiveScan = 24, + SubgroupArithmeticFMulReduce = 25, + SubgroupArithmeticFMulExclusiveScan = 26, + SubgroupArithmeticFMulInclusiveScan = 27, FeatureCount }; @@ -358,7 +370,7 @@ protected: }; // TODO remove this function when all subgroup ops are supported (or make it always return true) - static bool is_supported_subgroup_op_in_opengl(spv::Op op); + static bool is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops); void reset(uint32_t iteration_count); void emit_function(SPIRFunction &func, const Bitset &return_flags); @@ -627,6 +639,7 @@ protected: void emit_struct(SPIRType &type); void emit_resources(); void emit_extension_workarounds(spv::ExecutionModel model); + void emit_subgroup_arithmetic_workaround(const std::string &func, spv::Op op, spv::GroupOperation group_op); void emit_polyfills(uint32_t polyfills, bool relaxed); void emit_buffer_block_native(const SPIRVariable &var); void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration); diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 3791264a4..3f03f1c28 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -9603,7 +9603,7 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) { // We only care about assignments of an entire array auto &type = expression_type(id_rhs); - if (type.array.size() == 0) + if (!type_is_top_level_array(get_pointee_type(type))) return false; auto *var = maybe_get(id_lhs); @@ -14550,7 +14550,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) string type_name; // Pointer? - if (type.pointer) + if (type_is_top_level_pointer(type) || type_is_array_of_pointers(type)) { assert(type.pointer_depth > 0); @@ -14573,7 +14573,17 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) while (type_is_pointer(*p_parent_type)) p_parent_type = &get(p_parent_type->parent_type); + // If we're emitting BDA, just use the templated type. + // Emitting builtin arrays need a lot of cooperation with other code to ensure + // the C-style nesting works right. + // FIXME: This is somewhat of a hack. + bool old_is_using_builtin_array = is_using_builtin_array; + if (type_is_top_level_physical_pointer(type)) + is_using_builtin_array = false; + type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id)); + + is_using_builtin_array = old_is_using_builtin_array; } switch (type.basetype) diff --git a/3rdparty/spirv-cross/spirv_reflect.cpp b/3rdparty/spirv-cross/spirv_reflect.cpp index 0bd224e6c..9fcd3bc09 100644 --- a/3rdparty/spirv-cross/spirv_reflect.cpp +++ b/3rdparty/spirv-cross/spirv_reflect.cpp @@ -292,7 +292,7 @@ bool CompilerReflection::type_is_reference(const SPIRType &type) const { // Physical pointers and arrays of physical pointers need to refer to the pointee's type. return type_is_top_level_physical_pointer(type) || - (!type.array.empty() && type_is_top_level_physical_pointer(get(type.parent_type))); + (type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBuffer); } void CompilerReflection::emit_types()