From 189b020b90f3ddf7e8adba104d0a64da48234540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Sun, 8 May 2022 09:42:45 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 6 + 3rdparty/spirv-cross/spirv_common.hpp | 8 +- 3rdparty/spirv-cross/spirv_cross.cpp | 48 +-- 3rdparty/spirv-cross/spirv_cross.hpp | 8 + 3rdparty/spirv-cross/spirv_glsl.cpp | 472 ++++++++++++++++++++++--- 3rdparty/spirv-cross/spirv_glsl.hpp | 15 + 3rdparty/spirv-cross/spirv_hlsl.cpp | 60 +++- 3rdparty/spirv-cross/spirv_msl.cpp | 50 ++- 3rdparty/spirv-cross/spirv_parser.cpp | 2 + 3rdparty/spirv-cross/spirv_reflect.cpp | 24 +- 10 files changed, 599 insertions(+), 94 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index 9124ecaf9..c4ff4c4e1 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -400,6 +400,12 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma fprintf(stderr, " writeonly"); if (mask.get(DecorationNonWritable)) fprintf(stderr, " readonly"); + if (mask.get(DecorationRestrict)) + fprintf(stderr, " restrict"); + if (mask.get(DecorationCoherent)) + fprintf(stderr, " coherent"); + if (mask.get(DecorationVolatile)) + fprintf(stderr, " volatile"); if (is_sized_block) { fprintf(stderr, " (BlockSize : %u bytes)", block_size); diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index 4aaa71488..1c8a7253c 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -24,7 +24,11 @@ #ifndef SPIRV_CROSS_COMMON_HPP #define SPIRV_CROSS_COMMON_HPP +#ifndef SPV_ENABLE_UTILITY_CODE +#define SPV_ENABLE_UTILITY_CODE +#endif #include "spirv.hpp" + #include "spirv_cross_containers.hpp" #include "spirv_cross_error_handling.hpp" #include @@ -638,7 +642,8 @@ struct SPIRExtension : IVariant SPV_AMD_shader_ballot, SPV_AMD_shader_explicit_vertex_parameter, SPV_AMD_shader_trinary_minmax, - SPV_AMD_gcn_shader + SPV_AMD_gcn_shader, + NonSemanticDebugPrintf }; explicit SPIRExtension(Extension ext_) @@ -1563,6 +1568,7 @@ struct AccessChainMeta bool storage_is_packed = false; bool storage_is_invariant = false; bool flattened_struct = false; + bool relaxed_precision = false; }; enum ExtendedDecorations diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index a68ef7578..0617a1495 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -4710,46 +4710,22 @@ bool Compiler::reflection_ssbo_instance_name_is_significant() const return aliased_ssbo_types; } -bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, - uint32_t length) +bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, + const uint32_t *args, uint32_t length) { - // Most instructions follow the pattern of . - // There are some exceptions. - switch (op) - { - case OpStore: - case OpCopyMemory: - case OpCopyMemorySized: - case OpImageWrite: - case OpAtomicStore: - case OpAtomicFlagClear: - case OpEmitStreamVertex: - case OpEndStreamPrimitive: - case OpControlBarrier: - case OpMemoryBarrier: - case OpGroupWaitEvents: - case OpRetainEvent: - case OpReleaseEvent: - case OpSetUserEventStatus: - case OpCaptureEventProfilingInfo: - case OpCommitReadPipe: - case OpCommitWritePipe: - case OpGroupCommitReadPipe: - case OpGroupCommitWritePipe: - case OpLine: - case OpNoLine: + if (length < 2) return false; - default: - if (length > 1 && maybe_get(args[0]) != nullptr) - { - result_type = args[0]; - result_id = args[1]; - return true; - } - else - return false; + bool has_result_id = false, has_result_type = false; + HasResultAndType(op, &has_result_id, &has_result_type); + if (has_result_id && has_result_type) + { + result_type = args[0]; + result_id = args[1]; + return true; } + else + return false; } Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index 4a8811977..1d7e7c480 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -24,6 +24,9 @@ #ifndef SPIRV_CROSS_HPP #define SPIRV_CROSS_HPP +#ifndef SPV_ENABLE_UTILITY_CODE +#define SPV_ENABLE_UTILITY_CODE +#endif #include "spirv.hpp" #include "spirv_cfg.hpp" #include "spirv_cross_parsed_ir.hpp" @@ -556,6 +559,11 @@ protected: } } + uint32_t *stream_mutable(const Instruction &instr) const + { + return const_cast(stream(instr)); + } + ParsedIR ir; // Marks variables which have global scope and variables which can alias with other variables // (SSBO, image load store, etc) diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 4051dd654..9779c3941 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -649,8 +649,9 @@ string CompilerGLSL::compile() backend.supports_extensions = true; backend.use_array_constructor = true; backend.workgroup_size_is_hidden = true; - - backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320); + backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; + backend.support_precise_qualifier = + (!options.es && options.version >= 400) || (options.es && options.version >= 320); if (is_legacy_es()) backend.support_case_fallthrough = false; @@ -2130,9 +2131,8 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) // OpenGL has no concept of push constant blocks, implement it as a uniform struct. auto &type = get(var.basetype); - auto &flags = ir.meta[var.self].decoration.decoration_flags; - flags.clear(DecorationBinding); - flags.clear(DecorationDescriptorSet); + unset_decoration(var.self, DecorationBinding); + unset_decoration(var.self, DecorationDescriptorSet); #if 0 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) @@ -2142,14 +2142,13 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. - auto &block_flags = ir.meta[type.self].decoration.decoration_flags; - bool block_flag = block_flags.get(DecorationBlock); - block_flags.clear(DecorationBlock); + bool block_flag = has_decoration(type.self, DecorationBlock); + unset_decoration(type.self, DecorationBlock); emit_struct(type); if (block_flag) - block_flags.set(DecorationBlock); + set_decoration(type.self, DecorationBlock); emit_uniform(var); statement(""); @@ -2986,11 +2985,10 @@ void CompilerGLSL::fixup_image_load_store_access() // Solve this by making the image access as restricted as possible and loosen up if we need to. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. - auto &flags = ir.meta[var].decoration.decoration_flags; - if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable)) + if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable)) { - flags.set(DecorationNonWritable); - flags.set(DecorationNonReadable); + set_decoration(var, DecorationNonWritable); + set_decoration(var, DecorationNonReadable); } } }); @@ -4310,6 +4308,73 @@ void CompilerGLSL::force_temporary_and_recompile(uint32_t id) force_recompile(); } +uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) +{ + // Constants do not have innate precision. + if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp) + return id; + + // Ignore anything that isn't 32-bit values. + auto &type = get(type_id); + if (type.pointer) + return id; + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int) + return id; + + if (precision == Options::DontCare) + { + // If precision is consumed as don't care (operations only consisting of constants), + // we need to bind the expression to a temporary, + // otherwise we have no way of controlling the precision later. + auto itr = forced_temporaries.insert(id); + if (itr.second) + force_recompile_guarantee_forward_progress(); + return id; + } + + auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp; + if (current_precision == precision) + return id; + + auto itr = temporary_to_mirror_precision_alias.find(id); + if (itr == temporary_to_mirror_precision_alias.end()) + { + uint32_t alias_id = ir.increase_bound_by(1); + auto &m = ir.meta[alias_id]; + if (auto *input_m = ir.find_meta(id)) + m = *input_m; + + const char *prefix; + if (precision == Options::Mediump) + { + set_decoration(alias_id, DecorationRelaxedPrecision); + prefix = "mp_copy_"; + } + else + { + unset_decoration(alias_id, DecorationRelaxedPrecision); + prefix = "hp_copy_"; + } + + auto alias_name = join(prefix, to_name(id)); + ParsedIR::sanitize_underscores(alias_name); + set_name(alias_id, alias_name); + + emit_op(type_id, alias_id, to_expression(id), true); + temporary_to_mirror_precision_alias[id] = alias_id; + forced_temporaries.insert(id); + forced_temporaries.insert(alias_id); + force_recompile_guarantee_forward_progress(); + id = alias_id; + } + else + { + id = itr->second; + } + + return id; +} + void CompilerGLSL::handle_invalid_expression(uint32_t id) { // We tried to read an invalidated expression. @@ -4710,7 +4775,20 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, // the variable has not been declared yet. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) - return to_expression(var.static_expression); + { + // We might try to load from a loop variable before it has been initialized. + // Prefer static expression and fallback to initializer. + if (var.static_expression) + return to_expression(var.static_expression); + else if (var.initializer) + return to_expression(var.initializer); + else + { + // We cannot declare the variable yet, so have to fake it. + uint32_t undef_id = ir.increase_bound_by(1); + return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression; + } + } else if (var.deferred_declaration) { var.deferred_declaration = false; @@ -5679,7 +5757,7 @@ void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t r { // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. - if (current_continue_block && !hoisted_temporaries.count(result_id)) + if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), @@ -5695,7 +5773,7 @@ void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t r else if (hoisted_temporaries.count(result_id) == 0) { auto &type = get(result_type); - auto &flags = ir.meta[result_id].decoration.decoration_flags; + auto &flags = get_decoration_bitset(result_id); // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); @@ -5711,11 +5789,10 @@ void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t r string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &type = get(result_type); - auto &flags = ir.meta[result_id].decoration.decoration_flags; // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. - if (current_continue_block && !hoisted_temporaries.count(result_id)) + if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), @@ -5725,7 +5802,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); - force_recompile(); + force_recompile_guarantee_forward_progress(); } return join(to_name(result_id), " = "); @@ -5739,6 +5816,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); + auto &flags = get_decoration_bitset(result_id); return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); } } @@ -8766,6 +8844,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); bool is_invariant = has_decoration(base, DecorationInvariant); + bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision); bool pending_array_enclose = false; bool dimension_flatten = false; @@ -8953,6 +9032,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (has_member_decoration(type->self, index, DecorationInvariant)) is_invariant = true; + if (has_member_decoration(type->self, index, DecorationRelaxedPrecision)) + relaxed_precision = true; is_packed = member_is_packed_physical_type(*type, index); if (member_is_remapped_physical_type(*type, index)) @@ -9098,6 +9179,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice meta->storage_is_packed = is_packed; meta->storage_is_invariant = is_invariant; meta->storage_physical_type = physical_type; + meta->relaxed_precision = relaxed_precision; } return expr; @@ -10021,8 +10103,51 @@ void CompilerGLSL::register_control_dependent_expression(uint32_t expr) void CompilerGLSL::emit_block_instructions(SPIRBlock &block) { current_emitting_block = █ + + if (backend.requires_relaxed_precision_analysis) + { + // If PHI variables are consumed in unexpected precision contexts, copy them here. + for (auto &phi : block.phi_variables) + { + auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable); + if (itr != temporary_to_mirror_precision_alias.end()) + { + // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, + // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). + EmbeddedInstruction inst; + inst.op = OpCopyObject; + inst.length = 3; + inst.ops.push_back(expression_type_id(itr->first)); + inst.ops.push_back(itr->second); + inst.ops.push_back(itr->first); + emit_instruction(inst); + } + } + } + for (auto &op : block.ops) + { + auto temporary_copy = handle_instruction_precision(op); emit_instruction(op); + if (temporary_copy.dst_id) + { + // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, + // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). + EmbeddedInstruction inst; + inst.op = OpCopyObject; + inst.length = 3; + inst.ops.push_back(expression_type_id(temporary_copy.src_id)); + inst.ops.push_back(temporary_copy.dst_id); + inst.ops.push_back(temporary_copy.src_id); + + // Never attempt to hoist mirrored temporaries. + // They are hoisted in lock-step with their parents. + block_temporary_hoisting = true; + emit_instruction(inst); + block_temporary_hoisting = false; + } + } + current_emitting_block = nullptr; } @@ -10154,6 +10279,233 @@ uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, con } } +void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length) +{ + // Only GLSL supports RelaxedPrecision directly. + // We cannot implement this in HLSL or MSL because it is tied to the type system. + // In SPIR-V, everything must masquerade as 32-bit. + if (!backend.requires_relaxed_precision_analysis) + return; + + auto input_precision = analyze_expression_precision(args, length); + + // For expressions which are loaded or directly forwarded, we inherit mediump implicitly. + // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id. + if (input_precision == Options::Mediump) + set_decoration(dst_id, DecorationRelaxedPrecision); +} + +CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const +{ + // Now, analyze the precision at which the arguments would run. + // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision + // for the inputs. Constants do not have inherent precision and do not contribute to this decision. + // If all inputs are constants, they inherit precision from outer expressions, including an l-value. + // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with + // correct precision. + bool expression_has_highp = false; + bool expression_has_mediump = false; + + for (uint32_t i = 0; i < length; i++) + { + uint32_t arg = args[i]; + if (ir.ids[arg].get_type() == TypeConstant) + continue; + + if (has_decoration(arg, DecorationRelaxedPrecision)) + expression_has_mediump = true; + else + expression_has_highp = true; + } + + if (expression_has_highp) + return Options::Highp; + else if (expression_has_mediump) + return Options::Mediump; + else + return Options::DontCare; +} + +void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length) +{ + if (!backend.requires_relaxed_precision_analysis) + return; + + auto &type = get(type_id); + + // RelaxedPrecision only applies to 32-bit values. + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt) + return; + + bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision); + + auto input_precision = analyze_expression_precision(args, length); + if (input_precision == Options::DontCare) + { + consume_temporary_in_precision_context(type_id, dst_id, input_precision); + return; + } + + // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined. + // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit. + // However, if the expression is not, inputs must be expanded to 32-bit first, + // since the operation must run at high precision. + // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump, + // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations + // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables. + if ((operation_is_highp && input_precision == Options::Mediump) || + (!operation_is_highp && input_precision == Options::Highp)) + { + auto precision = operation_is_highp ? Options::Highp : Options::Mediump; + for (uint32_t i = 0; i < length; i++) + { + // Rewrites the opcode so that we consume an ID in correct precision context. + // This is pretty hacky, but it's the most straight forward way of implementing this without adding + // lots of extra passes to rewrite all code blocks. + args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision); + } + } +} + +// This is probably not exhaustive ... +static bool opcode_is_precision_sensitive_operation(Op op) +{ + switch (op) + { + case OpFAdd: + case OpFSub: + case OpFMul: + case OpFNegate: + case OpIAdd: + case OpISub: + case OpIMul: + case OpSNegate: + case OpFMod: + case OpFDiv: + case OpFRem: + case OpSMod: + case OpSDiv: + case OpSRem: + case OpUMod: + case OpUDiv: + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + case OpMatrixTimesMatrix: + case OpDPdx: + case OpDPdy: + case OpDPdxCoarse: + case OpDPdyCoarse: + case OpDPdxFine: + case OpDPdyFine: + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + case OpVectorTimesScalar: + case OpMatrixTimesScalar: + case OpOuterProduct: + case OpFConvert: + case OpSConvert: + case OpUConvert: + case OpConvertSToF: + case OpConvertUToF: + case OpConvertFToU: + case OpConvertFToS: + return true; + + default: + return false; + } +} + +// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration. +// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only +// relevant when operating on the IDs, not when shuffling things around. +static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count) +{ + switch (op) + { + case OpLoad: + case OpAccessChain: + case OpInBoundsAccessChain: + case OpCompositeExtract: + case OpVectorExtractDynamic: + case OpSampledImage: + case OpImage: + case OpCopyObject: + + case OpImageRead: + case OpImageFetch: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageGather: + case OpImageDrefGather: + case OpImageSparseRead: + case OpImageSparseFetch: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseGather: + case OpImageSparseDrefGather: + arg_count = 1; + return true; + + case OpVectorShuffle: + arg_count = 2; + return true; + + case OpCompositeConstruct: + return true; + + default: + break; + } + + return false; +} + +CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction) +{ + auto ops = stream_mutable(instruction); + auto opcode = static_cast(instruction.op); + uint32_t length = instruction.length; + + if (backend.requires_relaxed_precision_analysis) + { + if (length > 2) + { + uint32_t forwarding_length = length - 2; + + if (opcode_is_precision_sensitive_operation(opcode)) + analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length); + else if (opcode == OpExtInst && length >= 5 && get(ops[2]).ext == SPIRExtension::GLSL) + analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2); + else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length)) + forward_relaxed_precision(ops[1], &ops[2], forwarding_length); + } + + uint32_t result_type = 0, result_id = 0; + if (instruction_to_result_type(result_type, result_id, opcode, ops, length)) + { + auto itr = temporary_to_mirror_precision_alias.find(ops[1]); + if (itr != temporary_to_mirror_precision_alias.end()) + return { itr->second, itr->first }; + } + } + + return {}; +} + void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -10350,6 +10702,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) set_decoration(ops[1], DecorationInvariant); if (meta.flattened_struct) flattened_structs[ops[1]] = true; + if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) + set_decoration(ops[1], DecorationRelaxedPrecision); // If we have some expression dependencies in our access chain, this access chain is technically a forwarded // temporary which could be subject to invalidation. @@ -10714,6 +11068,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); e->base_expression = ops[2]; + + if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) + set_decoration(ops[1], DecorationRelaxedPrecision); } else { @@ -10829,8 +11186,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { // Need a copy. // For pointer types, we copy the pointer itself. - statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";"); - set(id, to_name(id), result_type, true); + emit_op(result_type, id, to_unpacked_expression(rhs), false); } else { @@ -11971,10 +12327,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto *var = maybe_get_backing_variable(ops[2]); if (var) { - auto &flags = ir.meta[var->self].decoration.decoration_flags; + auto &flags = get_decoration_bitset(var->self); if (flags.get(DecorationNonReadable)) { - flags.clear(DecorationNonReadable); + unset_decoration(var->self, DecorationNonReadable); force_recompile(); } } @@ -12163,10 +12519,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto *var = maybe_get_backing_variable(ops[0]); if (var) { - auto &flags = ir.meta[var->self].decoration.decoration_flags; - if (flags.get(DecorationNonWritable)) + if (has_decoration(var->self, DecorationNonWritable)) { - flags.clear(DecorationNonWritable); + unset_decoration(var->self, DecorationNonWritable); force_recompile(); } } @@ -12410,31 +12765,50 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpExtInst: { uint32_t extension_set = ops[2]; + auto ext = get(extension_set).ext; - if (get(extension_set).ext == SPIRExtension::GLSL) + if (ext == SPIRExtension::GLSL) { emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot) + else if (ext == SPIRExtension::SPV_AMD_shader_ballot) { emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) { emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) + else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) { emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader) + else if (ext == SPIRExtension::SPV_AMD_gcn_shader) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - else if (get(extension_set).ext == SPIRExtension::SPV_debug_info) + else if (ext == SPIRExtension::SPV_debug_info) { break; // Ignore SPIR-V debug information extended instructions. } + else if (ext == SPIRExtension::NonSemanticDebugPrintf) + { + // Operation 1 is printf. + if (ops[3] == 1) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n"); + require_extension_internal("GL_EXT_debug_printf"); + auto &format_string = get(ops[4]).str; + string expr = join("debugPrintfEXT(\"", format_string, "\""); + for (uint32_t i = 5; i < length; i++) + { + expr += ", "; + expr += to_expression(ops[i]); + } + statement(expr, ");"); + } + } else { statement("// unimplemented ext op ", instruction.op); @@ -13234,7 +13608,7 @@ void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var) string CompilerGLSL::to_qualifiers_glsl(uint32_t id) { - auto &flags = ir.meta[id].decoration.decoration_flags; + auto &flags = get_decoration_bitset(id); string res; auto *var = maybe_get(id); @@ -13363,7 +13737,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable) const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) { - auto &flags = ir.meta[variable.self].decoration.decoration_flags; + auto &flags = get_decoration_bitset(variable.self); if (flags.get(DecorationRelaxedPrecision)) return "mediump "; else @@ -13821,7 +14195,7 @@ void CompilerGLSL::flatten_buffer_block(VariableID id) auto &var = get(id); auto &type = get(var.basetype); auto name = to_name(type.self, false); - auto &flags = ir.meta[type.self].decoration.decoration_flags; + auto &flags = get_decoration_bitset(type.self); if (!type.array.empty()) SPIRV_CROSS_THROW(name + " is an array of UBOs."); @@ -13851,11 +14225,10 @@ bool CompilerGLSL::check_atomic_image(uint32_t id) auto *var = maybe_get_backing_variable(id); if (var) { - auto &flags = ir.meta[var->self].decoration.decoration_flags; - if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable)) + if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable)) { - flags.clear(DecorationNonWritable); - flags.clear(DecorationNonReadable); + unset_decoration(var->self, DecorationNonWritable); + unset_decoration(var->self, DecorationNonReadable); force_recompile(); } } @@ -14108,7 +14481,11 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) // Loop variables are never declared outside their for-loop, so block any implicit declaration. if (var.loop_variable) + { var.deferred_declaration = false; + // Need to reset the static expression so we can fallback to initializer if need be. + var.static_expression = 0; + } } // Enforce declaration order for regression testing purposes. @@ -14736,7 +15113,7 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo continue; add_local_variable_name(tmp.second); - auto &flags = ir.meta[tmp.second].decoration.decoration_flags; + auto &flags = get_decoration_bitset(tmp.second); // Not all targets support pointer literals, so don't bother with that case. string initializer; @@ -14750,6 +15127,21 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo // The temporary might be read from before it's assigned, set up the expression now. set(tmp.second, to_name(tmp.second), tmp.first, true); + + // If we have hoisted temporaries in multi-precision contexts, emit that here too ... + // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here. + auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second); + if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end()) + { + uint32_t mirror_id = mirrored_precision_itr->second; + auto &mirror_flags = get_decoration_bitset(mirror_id); + statement(flags_to_qualifiers_glsl(type, mirror_flags), + variable_decl(type, to_name(mirror_id)), + initializer, ";"); + // The temporary might be read from before it's assigned, set up the expression now. + set(mirror_id, to_name(mirror_id), tmp.first, true); + hoisted_temporaries.insert(mirror_id); + } } } diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 7297239ff..8d1c71318 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -368,6 +368,12 @@ protected: bool current_emitting_switch_fallthrough = false; virtual void emit_instruction(const Instruction &instr); + struct TemporaryCopy + { + uint32_t dst_id; + uint32_t src_id; + }; + TemporaryCopy handle_instruction_precision(const Instruction &instr); void emit_block_instructions(SPIRBlock &block); // For relax_nan_checks. @@ -512,6 +518,7 @@ protected: // on a single line separated by comma. SmallVector *redirect_statement = nullptr; const SPIRBlock *current_continue_block = nullptr; + bool block_temporary_hoisting = false; void begin_scope(); void end_scope(); @@ -605,6 +612,7 @@ protected: bool support_precise_qualifier = false; bool support_64bit_switch = false; bool workgroup_size_is_hidden = false; + bool requires_relaxed_precision_analysis = false; } backend; void emit_struct(SPIRType &type); @@ -808,6 +816,10 @@ protected: void replace_fragment_outputs(); std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t id); + void forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length); + void analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length); + Options::Precision analyze_expression_precision(const uint32_t *args, uint32_t length) const; + uint32_t indent = 0; std::unordered_set emitted_functions; @@ -901,6 +913,9 @@ protected: void force_temporary_and_recompile(uint32_t id); void find_static_extensions(); + uint32_t consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision); + std::unordered_map temporary_to_mirror_precision_alias; + std::string emit_for_loop_initializers(const SPIRBlock &block); void emit_while_loop_initializers(const SPIRBlock &block); bool for_loop_initializers_are_same_type(const SPIRBlock &block); diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 8bb6b69f0..bae3e61bd 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -643,6 +643,13 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() else SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + case BuiltInLayer: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Render target index output is only supported in GS 5.0 or higher."); + type = "uint"; + semantic = "SV_RenderTargetIndex"; + break; + default: SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); } @@ -674,6 +681,11 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() semantic = "SV_VertexID"; break; + case BuiltInPrimitiveId: + type = "uint"; + semantic = "SV_PrimitiveID"; + break; + case BuiltInInstanceId: case BuiltInInstanceIndex: if (legacy) @@ -721,6 +733,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() semantic = "SV_IsFrontFace"; break; + case BuiltInViewIndex: + if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment)) + SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher."); + type = "uint"; + semantic = "SV_ViewID"; + break; + case BuiltInNumWorkgroups: case BuiltInSubgroupSize: case BuiltInSubgroupLocalInvocationId: @@ -776,6 +795,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() else SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + case BuiltInLayer: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Render target index input is only supported in PS 5.0 or higher."); + type = "uint"; + semantic = "SV_RenderTargetIndex"; + break; + default: SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); } @@ -1133,6 +1159,12 @@ void CompilerHLSL::emit_builtin_variables() type = "int"; break; + case BuiltInPrimitiveId: + case BuiltInViewIndex: + case BuiltInLayer: + type = "uint"; + break; + default: SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); } @@ -1285,7 +1317,33 @@ void CompilerHLSL::replace_illegal_names() { static const unordered_set keywords = { // Additional HLSL specific keywords. - "line", "linear", "matrix", "point", "row_major", "sampler", "vector" + // From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords + "AppendStructuredBuffer", "asm", "asm_fragment", + "BlendState", "bool", "break", "Buffer", "ByteAddressBuffer", + "case", "cbuffer", "centroid", "class", "column_major", "compile", + "compile_fragment", "CompileShader", "const", "continue", "ComputeShader", + "ConsumeStructuredBuffer", + "default", "DepthStencilState", "DepthStencilView", "discard", "do", + "double", "DomainShader", "dword", + "else", "export", "false", "float", "for", "fxgroup", + "GeometryShader", "groupshared", "half", "HullShader", + "if", "in", "inline", "inout", "InputPatch", "int", "interface", + "line", "lineadj", "linear", "LineStream", + "matrix", "min16float", "min10float", "min16int", "min16uint", + "namespace", "nointerpolation", "noperspective", "NULL", + "out", "OutputPatch", + "packoffset", "pass", "pixelfragment", "PixelShader", "point", + "PointStream", "precise", "RasterizerState", "RenderTargetView", + "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer", + "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D", + "RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState", + "SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state", + "static", "string", "struct", "switch", "StructuredBuffer", "tbuffer", + "technique", "technique10", "technique11", "texture", "Texture1D", + "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray", + "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle", + "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned", + "vector", "vertexfragment", "VertexShader", "void", "volatile", "while", }; CompilerGLSL::replace_illegal_names(keywords); diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 04555f43c..cb6eac92f 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -1765,6 +1765,45 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: break; } + case OpRayQueryInitializeKHR: + case OpRayQueryProceedKHR: + case OpRayQueryTerminateKHR: + case OpRayQueryGenerateIntersectionKHR: + case OpRayQueryConfirmIntersectionKHR: + { + // Ray query accesses memory directly, need check pass down object if using Private storage class. + uint32_t base_id = ops[0]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + + case OpRayQueryGetRayTMinKHR: + case OpRayQueryGetRayFlagsKHR: + case OpRayQueryGetWorldRayOriginKHR: + case OpRayQueryGetWorldRayDirectionKHR: + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: + case OpRayQueryGetIntersectionTypeKHR: + case OpRayQueryGetIntersectionTKHR: + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: + case OpRayQueryGetIntersectionInstanceIdKHR: + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: + case OpRayQueryGetIntersectionGeometryIndexKHR: + case OpRayQueryGetIntersectionPrimitiveIndexKHR: + case OpRayQueryGetIntersectionBarycentricsKHR: + case OpRayQueryGetIntersectionFrontFaceKHR: + case OpRayQueryGetIntersectionObjectRayDirectionKHR: + case OpRayQueryGetIntersectionObjectRayOriginKHR: + case OpRayQueryGetIntersectionObjectToWorldKHR: + case OpRayQueryGetIntersectionWorldToObjectKHR: + { + // Ray query accesses memory directly, need check pass down object if using Private storage class. + uint32_t base_id = ops[2]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + default: break; } @@ -2541,12 +2580,14 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass uint32_t mbr_type_id = var_type.member_types[mbr_idx]; auto &mbr_type = get(mbr_type_id); + bool mbr_is_indexable = false; uint32_t elem_cnt = 1; if (is_matrix(mbr_type)) { if (is_array(mbr_type)) SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + mbr_is_indexable = true; elem_cnt = mbr_type.columns; } else if (is_array(mbr_type)) @@ -2554,6 +2595,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass if (mbr_type.array.size() != 1) SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + mbr_is_indexable = true; elem_cnt = to_array_size_literal(mbr_type); } @@ -2589,8 +2631,8 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass { for (uint32_t i = 0; i < elem_cnt; i++) { - string mbr_name = append_member_name(mbr_name_qual, var_type, mbr_idx) + (elem_cnt == 1 ? "" : join("_", i)); - string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (elem_cnt == 1 ? "" : join("[", i, "]"))); + string mbr_name = append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""); + string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : "")); uint32_t sub_mbr_cnt = uint32_t(mbr_type.member_types.size()); for (uint32_t sub_mbr_idx = 0; sub_mbr_idx < sub_mbr_cnt; sub_mbr_idx++) { @@ -2615,7 +2657,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass ib_type.member_types.push_back(usable_type->self); // Give the member a name - string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (elem_cnt == 1 ? "" : join("_", i)), "m"); + string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""), "m"); set_member_name(ib_type.self, ib_mbr_idx, mbr_name); // Once we determine the location of the first member within nested structures, @@ -2679,7 +2721,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. if (!meta.strip_array && meta.allow_local_declaration) { - string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (elem_cnt == 1 ? "" : join("[", i, "]"))); + string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : "")); switch (storage) { case StorageClassInput: diff --git a/3rdparty/spirv-cross/spirv_parser.cpp b/3rdparty/spirv-cross/spirv_parser.cpp index 1296f841f..262aa70ff 100644 --- a/3rdparty/spirv-cross/spirv_parser.cpp +++ b/3rdparty/spirv-cross/spirv_parser.cpp @@ -279,6 +279,8 @@ void Parser::parse(const Instruction &instruction) set(id, SPIRExtension::SPV_AMD_shader_trinary_minmax); else if (ext == "SPV_AMD_gcn_shader") set(id, SPIRExtension::SPV_AMD_gcn_shader); + else if (ext == "NonSemantic.DebugPrintf") + set(id, SPIRExtension::NonSemanticDebugPrintf); else set(id, SPIRExtension::Unsupported); diff --git a/3rdparty/spirv-cross/spirv_reflect.cpp b/3rdparty/spirv-cross/spirv_reflect.cpp index ee2fe930d..0bd224e6c 100644 --- a/3rdparty/spirv-cross/spirv_reflect.cpp +++ b/3rdparty/spirv-cross/spirv_reflect.cpp @@ -587,18 +587,18 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVectoremit_json_key_value("writeonly", true); - if (buffer_flags.get(DecorationNonWritable)) - json_stream->emit_json_key_value("readonly", true); - if (buffer_flags.get(DecorationRestrict)) - json_stream->emit_json_key_value("restrict", true); - if (buffer_flags.get(DecorationCoherent)) - json_stream->emit_json_key_value("coherent", true); - } + Bitset qualifier_mask = ssbo_block ? get_buffer_block_flags(res.id) : mask; + + if (qualifier_mask.get(DecorationNonReadable)) + json_stream->emit_json_key_value("writeonly", true); + if (qualifier_mask.get(DecorationNonWritable)) + json_stream->emit_json_key_value("readonly", true); + if (qualifier_mask.get(DecorationRestrict)) + json_stream->emit_json_key_value("restrict", true); + if (qualifier_mask.get(DecorationCoherent)) + json_stream->emit_json_key_value("coherent", true); + if (qualifier_mask.get(DecorationVolatile)) + json_stream->emit_json_key_value("volatile", true); } emit_type_array(type);