diff --git a/3rdparty/spirv-cross/spirv_cfg.cpp b/3rdparty/spirv-cross/spirv_cfg.cpp index 9081a6646..a938634e2 100644 --- a/3rdparty/spirv-cross/spirv_cfg.cpp +++ b/3rdparty/spirv-cross/spirv_cfg.cpp @@ -135,7 +135,9 @@ bool CFG::post_order_visit(uint32_t block_id) break; case SPIRBlock::MultiSelect: - for (auto &target : block.cases) + { + const auto &cases = compiler.get_case_list(block); + for (const auto &target : cases) { if (post_order_visit(target.block)) add_branch(block_id, target.block); @@ -143,7 +145,7 @@ bool CFG::post_order_visit(uint32_t block_id) if (block.default_block && post_order_visit(block.default_block)) add_branch(block_id, block.default_block); break; - + } default: break; } @@ -385,7 +387,9 @@ void DominatorBuilder::lift_continue_block_dominator() break; case SPIRBlock::MultiSelect: - for (auto &target : block.cases) + { + auto &cases = cfg.get_compiler().get_case_list(block); + for (auto &target : cases) { if (cfg.get_visit_order(target.block) > post_order) back_edge_dominator = true; @@ -393,6 +397,7 @@ void DominatorBuilder::lift_continue_block_dominator() if (block.default_block && cfg.get_visit_order(block.default_block) > post_order) back_edge_dominator = true; break; + } default: break; diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index e602fbd44..bb2260e4d 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -854,10 +854,11 @@ struct SPIRBlock : IVariant struct Case { - uint32_t value; + uint64_t value; BlockID block; }; - SmallVector cases; + SmallVector cases_32bit; + SmallVector cases_64bit; // If we have tried to optimize code for this block but failed, // keep track of this. diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 07fdb946f..dc8360663 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -1659,6 +1659,39 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc } } +const SmallVector &Compiler::get_case_list(const SPIRBlock &block) const +{ + uint32_t width = 0; + + // First we check if we can get the type directly from the block.condition + // since it can be a SPIRConstant or a SPIRVariable. + if (const auto *constant = maybe_get(block.condition)) + { + const auto &type = get(constant->constant_type); + width = type.width; + } + else if (const auto *var = maybe_get(block.condition)) + { + const auto &type = get(var->basetype); + width = type.width; + } + else + { + auto search = ir.load_type_width.find(block.condition); + if (search == ir.load_type_width.end()) + { + SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement."); + } + + width = search->second; + } + + if (width > 32) + return block.cases_64bit; + + return block.cases_32bit; +} + bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const { handler.set_current_block(block); @@ -3057,12 +3090,15 @@ void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBl break; case SPIRBlock::MultiSelect: + { notify_variable_access(block.condition, block.self); - for (auto &target : block.cases) + auto &cases = compiler.get_case_list(block); + for (auto &target : cases) test_phi(target.block); if (block.default_block) test_phi(block.default_block); break; + } default: break; @@ -4448,16 +4484,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ if (length < 4) return false; - uint32_t result_type = args[0]; - uint32_t result_id = args[1]; - auto &type = compiler.get(result_type); - // If the underlying resource has been used for comparison then duplicate loads of that resource must be too. // This image must be a depth image. + uint32_t result_id = args[1]; uint32_t image = args[2]; uint32_t sampler = args[3]; - if (type.image.depth || dref_combined_samplers.count(result_id) != 0) + if (dref_combined_samplers.count(result_id) != 0) { add_hierarchy_to_comparison_ids(image); @@ -4717,9 +4750,11 @@ bool Compiler::is_desktop_only_format(spv::ImageFormat format) return false; } -bool Compiler::image_is_comparison(const SPIRType &type, uint32_t id) const +// An image is determined to be a depth image if it is marked as a depth image and is not also +// explicitly marked with a color format, or if there are any sample/gather compare operations on it. +bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const { - return type.image.depth || (comparison_ids.count(id) != 0); + return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(id); } bool Compiler::type_is_opaque_value(const SPIRType &type) const diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index d8967963f..c945401d8 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -1107,7 +1107,7 @@ protected: Bitset combined_decoration_for_member(const SPIRType &type, uint32_t index) const; static bool is_desktop_only_format(spv::ImageFormat format); - bool image_is_comparison(const SPIRType &type, uint32_t id) const; + bool is_depth_image(const SPIRType &type, uint32_t id) const; void set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value = 0); uint32_t get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const; @@ -1135,6 +1135,11 @@ protected: bool is_vertex_like_shader() const; + // Get the correct case list for the OpSwitch, since it can be either a + // 32 bit wide condition or a 64 bit, but the type is not embedded in the + // instruction itself. + const SmallVector &get_case_list(const SPIRBlock &block) const; + private: // Used only to implement the old deprecated get_entry_point() interface. const SPIREntryPoint &get_first_entry_point(const std::string &name) const; diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp index d6cea923e..e7fcdff04 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -83,6 +83,7 @@ ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT loop_iteration_depth_soft = other.loop_iteration_depth_soft; meta_needing_name_fixup = std::move(other.meta_needing_name_fixup); + load_type_width = std::move(other.load_type_width); } return *this; } @@ -115,7 +116,9 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other) addressing_model = other.addressing_model; memory_model = other.memory_model; + meta_needing_name_fixup = other.meta_needing_name_fixup; + load_type_width = other.load_type_width; // Very deliberate copying of IDs. There is no default copy constructor, nor a simple default constructor. // Construct object first so we have the correct allocator set-up, then we can copy object into our new pool group. diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp index 8971a9707..138d9dd43 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp @@ -78,6 +78,13 @@ public: SmallVector ids_for_constant_or_type; SmallVector ids_for_constant_or_variable; + // We need to keep track of the width the Ops that contains a type for the + // OpSwitch instruction, since this one doesn't contains the type in the + // instruction itself. And in some case we need to cast the condition to + // wider types. We only need the width to do the branch fixup since the + // type check itself can be done at runtime + std::unordered_map load_type_width; + // Declared capabilities and extensions in the SPIR-V module. // Not really used except for reflection at the moment. SmallVector declared_capabilities; diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 9578502c1..5c21518f3 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -6215,7 +6215,7 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp // GLES has very limited support for shadow samplers. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, // everything else can just throw - bool is_comparison = image_is_comparison(imgtype, tex); + bool is_comparison = is_depth_image(imgtype, tex); if (is_comparison && is_legacy_es()) { if (op == "texture" || op == "textureProj") @@ -6842,7 +6842,7 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool expr += ")"; // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. - if (is_legacy() && image_is_comparison(imgtype, img)) + if (is_legacy() && is_depth_image(imgtype, img)) expr += ".r"; // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. @@ -6853,16 +6853,16 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool const auto *combined = maybe_get(img); VariableID image_id = combined ? combined->image : img; - if (combined && image_is_comparison(imgtype, combined->image)) + if (combined && is_depth_image(imgtype, combined->image)) image_is_depth = true; - else if (image_is_comparison(imgtype, img)) + else if (is_depth_image(imgtype, img)) image_is_depth = true; // We must also check the backing variable for the image. // We might have loaded an OpImage, and used that handle for two different purposes. // Once with comparison, once without. auto *image_variable = maybe_get_backing_variable(image_id); - if (image_variable && image_is_comparison(get(image_variable->basetype), image_variable->self)) + if (image_variable && is_depth_image(get(image_variable->basetype), image_variable->self)) image_is_depth = true; if (image_is_depth) @@ -6930,7 +6930,7 @@ string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = false; if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && - image_is_comparison(imgtype, tex) && args.lod) + is_depth_image(imgtype, tex) && args.lod) { if (!expression_is_constant_null(args.lod)) { @@ -7074,7 +7074,7 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && - image_is_comparison(imgtype, img) && args.lod != 0; + is_depth_image(imgtype, img) && args.lod != 0; if (args.dref) { @@ -13392,7 +13392,7 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) // "Shadow" state in GLSL only exists for samplers and combined image samplers. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && - image_is_comparison(type, id)) + is_depth_image(type, id)) { res += "Shadow"; } @@ -14803,19 +14803,24 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // and let the default: block handle it. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. - for (auto &c : block.cases) + auto &cases = get_case_list(block); + for (auto &c : cases) { + // It's safe to cast to uint32_t since we actually do a check + // previously that we're not using uint64_t as the switch selector. + auto case_value = static_cast(c.value); + if (c.block != block.next_block && c.block != block.default_block) { if (!case_constructs.count(c.block)) block_declaration_order.push_back(c.block); - case_constructs[c.block].push_back(c.value); + case_constructs[c.block].push_back(case_value); } else if (c.block == block.next_block && block.default_block != block.next_block) { // We might have to flush phi inside specific case labels. // If we can piggyback on default:, do so instead. - literals_to_merge.push_back(c.value); + literals_to_merge.push_back(case_value); } } @@ -14935,7 +14940,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate // non-structured exits with the help of a switch block. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. - bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty(); + bool degenerate_switch = block.default_block != block.merge_block && block.cases_32bit.empty(); if (degenerate_switch || is_legacy_es()) { @@ -15831,7 +15836,7 @@ void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const { - return image_is_comparison(get(get(id).basetype), id); + return is_depth_image(get(get(id).basetype), id); } const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 1f04c400a..bdcb6dd37 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -2380,7 +2380,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret arg_type.image.dim != DimBuffer) { // Manufacture automatic sampler arg for SampledImage texture - arglist.push_back(join(image_is_comparison(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", + arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", to_sampler_expression(arg.id), type_to_array_glsl(arg_type))); } @@ -2910,7 +2910,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) { texop += img_expr; - if (image_is_comparison(imgtype, img)) + if (is_depth_image(imgtype, img)) { if (gather) { @@ -3386,7 +3386,7 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) { // For combined image samplers, also emit a combined image sampler. - if (image_is_comparison(type, var.self)) + if (is_depth_image(type, var.self)) statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type), to_resource_binding_sampler(var), ";"); else diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 3f53ebd58..fabf2e9cc 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -3364,16 +3364,22 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) // It's not enough to simply avoid marking fragment outputs if the pipeline won't // accept them. We can't put them in the struct at all, or otherwise the compiler // complains that the outputs weren't explicitly marked. + // Frag depth and stencil outputs are incompatible with explicit early fragment tests. + // In GLSL, depth and stencil outputs are just ignored when explicit early fragment tests are required. + // In Metal, it's a compilation error, so we need to exclude them from the output struct. if (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput && !patch && - ((is_builtin && ((bi_type == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) || - (bi_type == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin))) || + ((is_builtin && ((bi_type == BuiltInFragDepth && (!msl_options.enable_frag_depth_builtin || uses_explicit_early_fragment_test())) || + (bi_type == BuiltInFragStencilRefEXT && (!msl_options.enable_frag_stencil_ref_builtin || uses_explicit_early_fragment_test())))) || (!is_builtin && !(msl_options.enable_frag_output_mask & (1 << location))))) { hidden = true; disabled_frag_outputs.push_back(var_id); - // If a builtin, force it to have the proper name. + // If a builtin, force it to have the proper name, and mark it as not part of the output struct. if (is_builtin) + { set_name(var_id, builtin_to_glsl(bi_type, StorageClassFunction)); + mask_stage_output_by_builtin(bi_type); + } } // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments. @@ -9393,8 +9399,6 @@ static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sam string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) { VariableID img = args.base.img; - auto &imgtype = *args.base.imgtype; - const MSLConstexprSampler *constexpr_sampler = nullptr; bool is_dynamic_img_sampler = false; if (auto *var = maybe_get_backing_variable(img)) @@ -9408,8 +9412,9 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler && (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) { - add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle); - return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; + bool is_compare = comparison_ids.count(img); + add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle); + return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; } auto *combined = maybe_get(img); @@ -10021,7 +10026,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool image_var = var->self; } - if (image_var == 0 || !image_is_comparison(expression_type(image_var), image_var)) + if (image_var == 0 || !is_depth_image(expression_type(image_var), image_var)) farg_str += ", " + to_component_argument(args.component); } } @@ -11157,10 +11162,7 @@ string CompilerMSL::func_type_decl(SPIRType &type) execution.output_vertices, ") ]] vertex"); break; case ExecutionModelFragment: - entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) || - execution.flags.get(ExecutionModePostDepthCoverage) ? - "[[ early_fragment_tests ]] fragment" : - "fragment"; + entry_type = uses_explicit_early_fragment_test() ? "[[ early_fragment_tests ]] fragment" : "fragment"; break; case ExecutionModelTessellationControl: if (!msl_options.supports_msl_version(1, 2)) @@ -11180,6 +11182,12 @@ string CompilerMSL::func_type_decl(SPIRType &type) return entry_type + " " + return_type; } +bool CompilerMSL::uses_explicit_early_fragment_test() +{ + auto &ep_flags = get_entry_point().flags; + return ep_flags.get(ExecutionModeEarlyFragmentTests) || ep_flags.get(ExecutionModePostDepthCoverage); +} + // In MSL, address space qualifiers are required for all pointer or reference variables string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { @@ -13631,7 +13639,7 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) // Bypass pointers because we need the real image struct auto &img_type = get(type.self).image; - if (image_is_comparison(type, id)) + if (is_depth_image(type, id)) { switch (img_type.dim) { diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index d1d2ef3e6..f01cceaf7 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -932,6 +932,8 @@ protected: void build_implicit_builtins(); uint32_t build_constant_uint_array_pointer(); void emit_entry_point_declarations() override; + bool uses_explicit_early_fragment_test(); + uint32_t builtin_frag_coord_id = 0; uint32_t builtin_sample_id_id = 0; uint32_t builtin_sample_mask_id = 0; diff --git a/3rdparty/spirv-cross/spirv_parser.cpp b/3rdparty/spirv-cross/spirv_parser.cpp index 58fcec101..da39367db 100644 --- a/3rdparty/spirv-cross/spirv_parser.cpp +++ b/3rdparty/spirv-cross/spirv_parser.cpp @@ -1018,8 +1018,21 @@ void Parser::parse(const Instruction &instruction) current_block->condition = ops[0]; current_block->default_block = ops[1]; - for (uint32_t i = 2; i + 2 <= length; i += 2) - current_block->cases.push_back({ ops[i], ops[i + 1] }); + uint32_t remaining_ops = length - 2; + if ((remaining_ops % 2) == 0) + { + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->cases_32bit.push_back({ ops[i], ops[i + 1] }); + } + + if ((remaining_ops % 3) == 0) + { + for (uint32_t i = 2; i + 3 <= length; i += 3) + { + uint64_t value = (static_cast(ops[i]) << 32) | ops[i + 1]; + current_block->cases_64bit.push_back({ value, ops[i + 2] }); + } + } // If we jump to next block, make it break instead since we're inside a switch case block at that point. ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT; @@ -1177,6 +1190,14 @@ void Parser::parse(const Instruction &instruction) // Actual opcodes. default: { + if (length >= 2) + { + const auto *type = maybe_get(ops[0]); + if (type) + { + ir.load_type_width.insert({ ops[1], type->width }); + } + } if (!current_block) SPIRV_CROSS_THROW("Currently no block to insert opcode.");