From 939e9233c84427cd7747d1287cb9c97edfa1fc8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Sun, 10 Jul 2022 10:49:20 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 2 +- 3rdparty/spirv-cross/spirv.h | 49 ++++- 3rdparty/spirv-cross/spirv.hpp | 49 ++++- 3rdparty/spirv-cross/spirv_cfg.cpp | 33 +++- 3rdparty/spirv-cross/spirv_cross.cpp | 13 ++ 3rdparty/spirv-cross/spirv_glsl.cpp | 279 ++++++++++++++++++++++----- 3rdparty/spirv-cross/spirv_glsl.hpp | 2 + 3rdparty/spirv-cross/spirv_hlsl.cpp | 20 +- 3rdparty/spirv-cross/spirv_msl.cpp | 179 ++++++++++++++--- 3rdparty/spirv-cross/spirv_msl.hpp | 2 + 10 files changed, 536 insertions(+), 92 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index 521e98f96..b430ec810 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -908,7 +908,7 @@ static void print_help_common() // clang-format off fprintf(stderr, "\nCommon options:\n" "\t[--entry name]:\n\t\tUse a specific entry point. By default, the first entry point in the module is used.\n" - "\t[--stage ]:\n\t\tForces use of a certain shader stage.\n" + "\t[--stage ]:\n\t\tForces use of a certain shader stage.\n" "\t\tCan disambiguate the entry point if more than one entry point exists with same name, but different stage.\n" "\t[--emit-line-directives]:\n\t\tIf SPIR-V has OpLine directives, aim to emit those accurately in output code as well.\n" "\t[--rename-entry-point ]:\n\t\tRenames an entry point from what is declared in SPIR-V to code output.\n" diff --git a/3rdparty/spirv-cross/spirv.h b/3rdparty/spirv-cross/spirv.h index c15736e25..38f558748 100644 --- a/3rdparty/spirv-cross/spirv.h +++ b/3rdparty/spirv-cross/spirv.h @@ -31,7 +31,7 @@ /* ** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python, C#, D +** C, C++, C++11, JSON, Lua, Python, C#, D, Beef ** ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL @@ -41,6 +41,8 @@ ** - C# will use enum classes in the Specification class located in the "Spv" namespace, ** e.g.: Spv.Specification.SourceLanguage.GLSL ** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** - Beef will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL ** ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have @@ -70,6 +72,7 @@ typedef enum SpvSourceLanguage_ { SpvSourceLanguageOpenCL_CPP = 4, SpvSourceLanguageHLSL = 5, SpvSourceLanguageCPP_for_OpenCL = 6, + SpvSourceLanguageSYCL = 7, SpvSourceLanguageMax = 0x7fffffff, } SpvSourceLanguage; @@ -184,6 +187,7 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeNoGlobalOffsetINTEL = 5895, SpvExecutionModeNumSIMDWorkitemsINTEL = 5896, SpvExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + SpvExecutionModeNamedBarrierCountINTEL = 6417, SpvExecutionModeMax = 0x7fffffff, } SpvExecutionMode; @@ -546,6 +550,8 @@ typedef enum SpvDecoration_ { SpvDecorationPrefetchINTEL = 5902, SpvDecorationStallEnableINTEL = 5905, SpvDecorationFuseLoopsInFunctionINTEL = 5907, + SpvDecorationAliasScopeINTEL = 5914, + SpvDecorationNoAliasINTEL = 5915, SpvDecorationBufferLocationINTEL = 5921, SpvDecorationIOPipeStorageINTEL = 5944, SpvDecorationFunctionFloatingPointModeINTEL = 6080, @@ -677,6 +683,7 @@ typedef enum SpvBuiltIn_ { SpvBuiltInSMCountNV = 5375, SpvBuiltInWarpIDNV = 5376, SpvBuiltInSMIDNV = 5377, + SpvBuiltInCullMaskKHR = 6021, SpvBuiltInMax = 0x7fffffff, } SpvBuiltIn; @@ -804,6 +811,8 @@ typedef enum SpvMemoryAccessShift_ { SpvMemoryAccessMakePointerVisibleKHRShift = 4, SpvMemoryAccessNonPrivatePointerShift = 5, SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessAliasScopeINTELMaskShift = 16, + SpvMemoryAccessNoAliasINTELMaskShift = 17, SpvMemoryAccessMax = 0x7fffffff, } SpvMemoryAccessShift; @@ -818,6 +827,8 @@ typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, SpvMemoryAccessNonPrivatePointerMask = 0x00000020, SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, + SpvMemoryAccessAliasScopeINTELMaskMask = 0x00010000, + SpvMemoryAccessNoAliasINTELMaskMask = 0x00020000, } SpvMemoryAccessMask; typedef enum SpvScope_ { @@ -1059,6 +1070,7 @@ typedef enum SpvCapability_ { SpvCapabilityFPGAMemoryAccessesINTEL = 5898, SpvCapabilityFPGAClusterAttributesINTEL = 5904, SpvCapabilityLoopFuseINTEL = 5906, + SpvCapabilityMemoryAccessAliasingINTEL = 5910, SpvCapabilityFPGABufferLocationINTEL = 5920, SpvCapabilityArbitraryPrecisionFixedPointINTEL = 5922, SpvCapabilityUSMStorageClassesINTEL = 5935, @@ -1073,13 +1085,17 @@ typedef enum SpvCapability_ { SpvCapabilityDotProductInput4x8BitPackedKHR = 6018, SpvCapabilityDotProduct = 6019, SpvCapabilityDotProductKHR = 6019, + SpvCapabilityRayCullMaskKHR = 6020, SpvCapabilityBitInstructions = 6025, + SpvCapabilityGroupNonUniformRotateKHR = 6026, SpvCapabilityAtomicFloat32AddEXT = 6033, SpvCapabilityAtomicFloat64AddEXT = 6034, SpvCapabilityLongConstantCompositeINTEL = 6089, SpvCapabilityOptNoneINTEL = 6094, SpvCapabilityAtomicFloat16AddEXT = 6095, SpvCapabilityDebugInfoModuleINTEL = 6114, + SpvCapabilitySplitBarrierINTEL = 6141, + SpvCapabilityGroupUniformArithmeticKHR = 6400, SpvCapabilityMax = 0x7fffffff, } SpvCapability; @@ -1535,6 +1551,7 @@ typedef enum SpvOp_ { SpvOpSubgroupAllKHR = 4428, SpvOpSubgroupAnyKHR = 4429, SpvOpSubgroupAllEqualKHR = 4430, + SpvOpGroupNonUniformRotateKHR = 4431, SpvOpSubgroupReadInvocationKHR = 4432, SpvOpTraceRayKHR = 4445, SpvOpExecuteCallableKHR = 4446, @@ -1801,6 +1818,9 @@ typedef enum SpvOp_ { SpvOpArbitraryFloatPowRINTEL = 5881, SpvOpArbitraryFloatPowNINTEL = 5882, SpvOpLoopControlINTEL = 5887, + SpvOpAliasDomainDeclINTEL = 5911, + SpvOpAliasScopeDeclINTEL = 5912, + SpvOpAliasScopeListDeclINTEL = 5913, SpvOpFixedSqrtINTEL = 5923, SpvOpFixedRecipINTEL = 5924, SpvOpFixedRsqrtINTEL = 5925, @@ -1839,10 +1859,23 @@ typedef enum SpvOp_ { SpvOpTypeStructContinuedINTEL = 6090, SpvOpConstantCompositeContinuedINTEL = 6091, SpvOpSpecConstantCompositeContinuedINTEL = 6092, + SpvOpControlBarrierArriveINTEL = 6142, + SpvOpControlBarrierWaitINTEL = 6143, + SpvOpGroupIMulKHR = 6401, + SpvOpGroupFMulKHR = 6402, + SpvOpGroupBitwiseAndKHR = 6403, + SpvOpGroupBitwiseOrKHR = 6404, + SpvOpGroupBitwiseXorKHR = 6405, + SpvOpGroupLogicalAndKHR = 6406, + SpvOpGroupLogicalOrKHR = 6407, + SpvOpGroupLogicalXorKHR = 6408, SpvOpMax = 0x7fffffff, } SpvOp; #ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { *hasResult = *hasResultType = false; switch (opcode) { @@ -2197,6 +2230,7 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; case SpvOpTraceRayKHR: *hasResult = false; *hasResultType = false; break; case SpvOpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; @@ -2452,6 +2486,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; case SpvOpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; @@ -2490,6 +2527,16 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; case SpvOpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; case SpvOpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; } } #endif /* SPV_ENABLE_UTILITY_CODE */ diff --git a/3rdparty/spirv-cross/spirv.hpp b/3rdparty/spirv-cross/spirv.hpp index 3d500ebbd..48d93d64c 100644 --- a/3rdparty/spirv-cross/spirv.hpp +++ b/3rdparty/spirv-cross/spirv.hpp @@ -26,7 +26,7 @@ // the Binary Section of the SPIR-V specification. // Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python, C#, D +// C, C++, C++11, JSON, Lua, Python, C#, D, Beef // // - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL // - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL @@ -36,6 +36,8 @@ // - C# will use enum classes in the Specification class located in the "Spv" namespace, // e.g.: Spv.Specification.SourceLanguage.GLSL // - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// - Beef will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL // // Some tokens act like mask values, which can be OR'd together, // while others are mutually exclusive. The mask-like ones have @@ -66,6 +68,7 @@ enum SourceLanguage { SourceLanguageOpenCL_CPP = 4, SourceLanguageHLSL = 5, SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageSYCL = 7, SourceLanguageMax = 0x7fffffff, }; @@ -180,6 +183,7 @@ enum ExecutionMode { ExecutionModeNoGlobalOffsetINTEL = 5895, ExecutionModeNumSIMDWorkitemsINTEL = 5896, ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeNamedBarrierCountINTEL = 6417, ExecutionModeMax = 0x7fffffff, }; @@ -542,6 +546,8 @@ enum Decoration { DecorationPrefetchINTEL = 5902, DecorationStallEnableINTEL = 5905, DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationAliasScopeINTEL = 5914, + DecorationNoAliasINTEL = 5915, DecorationBufferLocationINTEL = 5921, DecorationIOPipeStorageINTEL = 5944, DecorationFunctionFloatingPointModeINTEL = 6080, @@ -673,6 +679,7 @@ enum BuiltIn { BuiltInSMCountNV = 5375, BuiltInWarpIDNV = 5376, BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, BuiltInMax = 0x7fffffff, }; @@ -800,6 +807,8 @@ enum MemoryAccessShift { MemoryAccessMakePointerVisibleKHRShift = 4, MemoryAccessNonPrivatePointerShift = 5, MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessAliasScopeINTELMaskShift = 16, + MemoryAccessNoAliasINTELMaskShift = 17, MemoryAccessMax = 0x7fffffff, }; @@ -814,6 +823,8 @@ enum MemoryAccessMask { MemoryAccessMakePointerVisibleKHRMask = 0x00000010, MemoryAccessNonPrivatePointerMask = 0x00000020, MemoryAccessNonPrivatePointerKHRMask = 0x00000020, + MemoryAccessAliasScopeINTELMaskMask = 0x00010000, + MemoryAccessNoAliasINTELMaskMask = 0x00020000, }; enum Scope { @@ -1055,6 +1066,7 @@ enum Capability { CapabilityFPGAMemoryAccessesINTEL = 5898, CapabilityFPGAClusterAttributesINTEL = 5904, CapabilityLoopFuseINTEL = 5906, + CapabilityMemoryAccessAliasingINTEL = 5910, CapabilityFPGABufferLocationINTEL = 5920, CapabilityArbitraryPrecisionFixedPointINTEL = 5922, CapabilityUSMStorageClassesINTEL = 5935, @@ -1069,13 +1081,17 @@ enum Capability { CapabilityDotProductInput4x8BitPackedKHR = 6018, CapabilityDotProduct = 6019, CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, CapabilityBitInstructions = 6025, + CapabilityGroupNonUniformRotateKHR = 6026, CapabilityAtomicFloat32AddEXT = 6033, CapabilityAtomicFloat64AddEXT = 6034, CapabilityLongConstantCompositeINTEL = 6089, CapabilityOptNoneINTEL = 6094, CapabilityAtomicFloat16AddEXT = 6095, CapabilityDebugInfoModuleINTEL = 6114, + CapabilitySplitBarrierINTEL = 6141, + CapabilityGroupUniformArithmeticKHR = 6400, CapabilityMax = 0x7fffffff, }; @@ -1531,6 +1547,7 @@ enum Op { OpSubgroupAllKHR = 4428, OpSubgroupAnyKHR = 4429, OpSubgroupAllEqualKHR = 4430, + OpGroupNonUniformRotateKHR = 4431, OpSubgroupReadInvocationKHR = 4432, OpTraceRayKHR = 4445, OpExecuteCallableKHR = 4446, @@ -1797,6 +1814,9 @@ enum Op { OpArbitraryFloatPowRINTEL = 5881, OpArbitraryFloatPowNINTEL = 5882, OpLoopControlINTEL = 5887, + OpAliasDomainDeclINTEL = 5911, + OpAliasScopeDeclINTEL = 5912, + OpAliasScopeListDeclINTEL = 5913, OpFixedSqrtINTEL = 5923, OpFixedRecipINTEL = 5924, OpFixedRsqrtINTEL = 5925, @@ -1835,10 +1855,23 @@ enum Op { OpTypeStructContinuedINTEL = 6090, OpConstantCompositeContinuedINTEL = 6091, OpSpecConstantCompositeContinuedINTEL = 6092, + OpControlBarrierArriveINTEL = 6142, + OpControlBarrierWaitINTEL = 6143, + OpGroupIMulKHR = 6401, + OpGroupFMulKHR = 6402, + OpGroupBitwiseAndKHR = 6403, + OpGroupBitwiseOrKHR = 6404, + OpGroupBitwiseXorKHR = 6405, + OpGroupLogicalAndKHR = 6406, + OpGroupLogicalOrKHR = 6407, + OpGroupLogicalXorKHR = 6408, OpMax = 0x7fffffff, }; #ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { *hasResult = *hasResultType = false; switch (opcode) { @@ -2193,6 +2226,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; @@ -2448,6 +2482,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; @@ -2486,6 +2523,16 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; } } #endif /* SPV_ENABLE_UTILITY_CODE */ diff --git a/3rdparty/spirv-cross/spirv_cfg.cpp b/3rdparty/spirv-cross/spirv_cfg.cpp index a938634e2..932994798 100644 --- a/3rdparty/spirv-cross/spirv_cfg.cpp +++ b/3rdparty/spirv-cross/spirv_cfg.cpp @@ -306,15 +306,36 @@ bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) co bool true_path_ignore = false; bool false_path_ignore = false; - if (ignore_block_id && dom.terminator == SPIRBlock::Select) + + bool merges_to_nothing = dom.merge == SPIRBlock::MergeNone || + (dom.merge == SPIRBlock::MergeSelection && dom.next_block && + compiler.get(dom.next_block).terminator == SPIRBlock::Unreachable) || + (dom.merge == SPIRBlock::MergeLoop && dom.merge_block && + compiler.get(dom.merge_block).terminator == SPIRBlock::Unreachable); + + if (dom.self == from || merges_to_nothing) { - auto &true_block = compiler.get(dom.true_block); - auto &false_block = compiler.get(dom.false_block); - auto &ignore_block = compiler.get(ignore_block_id); - true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block); - false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block); + // We can only ignore inner branchy paths if there is no merge, + // i.e. no code is generated afterwards. E.g. this allows us to elide continue: + // for (;;) { if (cond) { continue; } else { break; } }. + // Codegen here in SPIR-V will be something like either no merge if one path directly breaks, or + // we merge to Unreachable. + if (ignore_block_id && dom.terminator == SPIRBlock::Select) + { + auto &true_block = compiler.get(dom.true_block); + auto &false_block = compiler.get(dom.false_block); + auto &ignore_block = compiler.get(ignore_block_id); + true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block); + false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block); + } } + // Cases where we allow traversal. This serves as a proxy for post-dominance in a loop body. + // TODO: Might want to do full post-dominance analysis, but it's a lot of churn for something like this ... + // - We're the merge block of a selection construct. Jump to header. + // - We're the merge block of a loop. Jump to header. + // - Direct branch. Trivial. + // - Allow cases inside a branch if the header cannot merge execution before loop exit. if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) || (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) || (dom.terminator == SPIRBlock::Direct && dom.next_block == to) || diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 0617a1495..5463e9cda 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -3229,7 +3229,20 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // Keep track of the types of temporaries, so we can hoist them out as necessary. uint32_t result_type, result_id; if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) + { + // For some opcodes, we will need to override the result id. + // If we need to hoist the temporary, the temporary type is the input, not the result. + // FIXME: This will likely break with OpCopyObject + hoisting, but we'll have to + // solve it if we ever get there ... + if (op == OpConvertUToAccelerationStructureKHR) + { + auto itr = result_id_to_type.find(args[2]); + if (itr != result_id_to_type.end()) + result_type = itr->second; + } + result_id_to_type[result_id] = result_type; + } switch (op) { diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 34020b931..9936c404d 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -405,10 +405,9 @@ void CompilerGLSL::find_static_extensions() } else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) { - if (options.es) - SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); - if (!options.es) - require_extension_internal("GL_ARB_gpu_shader_int64"); + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); + require_extension_internal("GL_ARB_gpu_shader_int64"); } else if (type.basetype == SPIRType::Half) { @@ -619,6 +618,11 @@ void CompilerGLSL::find_static_extensions() SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); require_extension_internal("GL_OVR_multiview2"); } + + // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. + for (auto &ext : ir.declared_extensions) + if (ext == "SPV_NV_fragment_shader_barycentric") + barycentric_is_nv = true; } void CompilerGLSL::ray_tracing_khr_fixup_locations() @@ -821,7 +825,20 @@ void CompilerGLSL::emit_header() for (auto &ext : forced_extensions) { - if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") + if (ext == "GL_ARB_gpu_shader_int64") + { + statement("#if defined(GL_ARB_gpu_shader_int64)"); + statement("#extension GL_ARB_gpu_shader_int64 : require"); + if (!options.vulkan_semantics || options.es) + { + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + } + statement("#else"); + statement("#error No extension available for 64-bit integers."); + statement("#endif"); + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") { // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. // GL_AMD_gpu_shader_half_float is a superset, so try that first. @@ -841,13 +858,30 @@ void CompilerGLSL::emit_header() statement("#error No extension available for FP16."); statement("#endif"); } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8") + { + if (options.vulkan_semantics) + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); + else + { + statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + statement("#else"); + statement("#error No extension available for Int8."); + statement("#endif"); + } + } else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") { if (options.vulkan_semantics) statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); else { - statement("#if defined(GL_AMD_gpu_shader_int16)"); + statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); + statement("#elif defined(GL_AMD_gpu_shader_int16)"); statement("#extension GL_AMD_gpu_shader_int16 : require"); statement("#elif defined(GL_NV_gpu_shader5)"); statement("#extension GL_NV_gpu_shader5 : require"); @@ -1206,14 +1240,23 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) res += "__explicitInterpAMD "; } - if (flags.get(DecorationPerVertexNV)) + if (flags.get(DecorationPerVertexKHR)) { if (options.es && options.version < 320) - SPIRV_CROSS_THROW("pervertexNV requires ESSL 320."); + SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320."); else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("pervertexNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - res += "pervertexNV "; + SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + res += "pervertexNV "; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + res += "pervertexEXT "; + } } return res; @@ -5171,8 +5214,8 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_bloc } #ifdef _MSC_VER -// sprintf warning. -// We cannot rely on snprintf existing because, ..., MSVC. +// snprintf does not exist or is buggy on older MSVC versions, some of them +// being used by MinGW. Use sprintf instead and disable corresponding warning. #pragma warning(push) #pragma warning(disable : 4996) #endif @@ -5232,7 +5275,11 @@ string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col in_type.width = 32; char print_buffer[32]; +#ifdef _WIN32 sprintf(print_buffer, "0x%xu", c.scalar(col, row)); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row)); +#endif const char *comment = "inf"; if (float_value == -numeric_limits::infinity()) @@ -5299,13 +5346,18 @@ std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32 uint64_t u64_value = c.scalar_u64(col, row); - if (options.es) - SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile."); + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); require_extension_internal("GL_ARB_gpu_shader_int64"); char print_buffer[64]; +#ifdef _WIN32 sprintf(print_buffer, "0x%llx%s", static_cast(u64_value), backend.long_long_literal_suffix ? "ull" : "ul"); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); +#endif const char *comment = "inf"; if (double_value == -numeric_limits::infinity()) @@ -6066,6 +6118,16 @@ void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { + auto &type = get(result_type); + if (type_is_floating_point(type)) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics."); + if (options.es) + SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL."); + require_extension_internal("GL_EXT_shader_atomic_float"); + } + forced_temporaries.insert(result_id); emit_op(result_type, result_id, join(op, "(", to_non_uniform_aware_expression(op0), ", ", @@ -6340,7 +6402,11 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp switch (imgtype.image.dim) { case spv::Dim1D: - type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; + // Force 2D path for ES. + if (options.es) + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + else + type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; break; case spv::Dim2D: type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; @@ -7018,8 +7084,8 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool expr += to_function_args(args, forward); expr += ")"; - // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. - if (is_legacy() && is_depth_image(imgtype, img)) + // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here. + if (is_legacy() && !options.es && is_depth_image(imgtype, img)) expr += ".r"; // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. @@ -7300,10 +7366,29 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool // Create a composite which merges coord/dref into a single vector. auto type = expression_type(args.coord); type.vecsize = args.coord_components + 1; + if (imgtype.image.dim == Dim1D && options.es) + type.vecsize++; farg_str += ", "; farg_str += type_to_glsl_constructor(type); farg_str += "("; - farg_str += coord_expr; + + if (imgtype.image.dim == Dim1D && options.es) + { + if (imgtype.image.arrayed) + { + farg_str += enclose_expression(coord_expr) + ".x"; + farg_str += ", 0.0, "; + farg_str += enclose_expression(coord_expr) + ".y"; + } + else + { + farg_str += coord_expr; + farg_str += ", 0.0"; + } + } + else + farg_str += coord_expr; + farg_str += ", "; farg_str += to_expression(args.dref); farg_str += ")"; @@ -7311,6 +7396,33 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool } else { + if (imgtype.image.dim == Dim1D && options.es) + { + // Have to fake a second coordinate. + if (type_is_floating_point(coord_type)) + { + // Cannot mix proj and array. + if (imgtype.image.arrayed || args.base.is_proj) + { + coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ", + enclose_expression(coord_expr), ".y)"); + } + else + coord_expr = join("vec2(", coord_expr, ", 0.0)"); + } + else + { + if (imgtype.image.arrayed) + { + coord_expr = join("ivec3(", enclose_expression(coord_expr), + ".x, 0, ", + enclose_expression(coord_expr), ".y)"); + } + else + coord_expr = join("ivec2(", coord_expr, ", 0)"); + } + } + farg_str += ", "; farg_str += coord_expr; } @@ -8698,24 +8810,42 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInIncomingRayFlagsKHR: return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV"; - case BuiltInBaryCoordNV: + case BuiltInBaryCoordKHR: { if (options.es && options.version < 320) - SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320."); + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320."); else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - return "gl_BaryCoordNV"; + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordEXT"; + } } case BuiltInBaryCoordNoPerspNV: { if (options.es && options.version < 320) - SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320."); + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320."); else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - return "gl_BaryCoordNoPerspNV"; + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspEXT"; + } } case BuiltInFragStencilRefEXT: @@ -8860,6 +8990,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!is_literal) mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); + check_physical_type_cast(expr, type, physical_type); }; for (uint32_t i = 0; i < count; i++) @@ -9192,6 +9323,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice return expr; } +void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) +{ +} + void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) { } @@ -12132,6 +12267,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } case OpAtomicIAdd: + case OpAtomicFAddEXT: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); @@ -12484,6 +12620,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + coord_expr = join("ivec2(", coord_expr, ", 0)"); + // Plain image load/store. if (sparse) { @@ -12596,6 +12736,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + coord_expr = join("ivec2(", coord_expr, ", 0)"); + if (type.image.ms) { uint32_t operands = ops[3]; @@ -13249,9 +13393,30 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) #undef GLSL_RAY_QUERY_GET_OP2 case OpConvertUToAccelerationStructureKHR: + { require_extension_internal("GL_EXT_ray_tracing"); - GLSL_UFOP(accelerationStructureEXT); + + bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 && + !hoisted_temporaries.count(ops[1]); + + if (elide_temporary) + { + GLSL_UFOP(accelerationStructureEXT); + } + else + { + // Force this path in subsequent iterations. + forced_temporaries.insert(ops[1]); + + // We cannot declare a temporary acceleration structure in GLSL. + // If we get to this point, we'll have to emit a temporary uvec2, + // and cast to RTAS on demand. + statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";"); + // Use raw SPIRExpression interface to block all usage tracking. + set(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true); + } break; + } case OpConvertUToPtr: { @@ -13956,7 +14121,8 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) switch (type.image.dim) { case Dim1D: - res += "1D"; + // ES doesn't support 1D. Fake it with 2D. + res += options.es ? "2D" : "1D"; break; case Dim2D: res += "2D"; @@ -15587,26 +15753,32 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate // non-structured exits with the help of a switch block. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. - bool degenerate_switch = block.default_block != block.merge_block && cases.empty(); + bool block_like_switch = cases.empty(); - if (degenerate_switch || is_legacy_es()) + // If this is true, the switch is completely meaningless, and we should just avoid it. + bool collapsed_switch = block_like_switch && block.default_block == block.next_block; + + if (!collapsed_switch) { - // ESSL 1.0 is not guaranteed to support do/while. - if (is_legacy_es()) + if (block_like_switch || is_legacy_es()) { - uint32_t counter = statement_count; - statement("for (int spvDummy", counter, " = 0; spvDummy", counter, - " < 1; spvDummy", counter, "++)"); + // ESSL 1.0 is not guaranteed to support do/while. + if (is_legacy_es()) + { + uint32_t counter = statement_count; + statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter, + "++)"); + } + else + statement("do"); } else - statement("do"); + { + emit_block_hints(block); + statement("switch (", to_unpacked_expression(block.condition), ")"); + } + begin_scope(); } - else - { - emit_block_hints(block); - statement("switch (", to_unpacked_expression(block.condition), ")"); - } - begin_scope(); for (size_t i = 0; i < num_blocks; i++) { @@ -15616,7 +15788,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (literals.empty()) { // Default case. - if (!degenerate_switch) + if (!block_like_switch) { if (is_legacy_es()) statement("else"); @@ -15654,10 +15826,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) else current_emitting_switch_fallthrough = false; - if (!degenerate_switch) + if (!block_like_switch) begin_scope(); branch(block.self, target_block); - if (!degenerate_switch) + if (!block_like_switch) end_scope(); current_emitting_switch_fallthrough = false; @@ -15671,7 +15843,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there. bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block); bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty(); - if ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()) + if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())) { for (auto &case_literal : literals_to_merge) statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); @@ -15690,10 +15862,15 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) end_scope(); } - if (degenerate_switch && !is_legacy_es()) - end_scope_decl("while(false)"); + if (!collapsed_switch) + { + if (block_like_switch && !is_legacy_es()) + end_scope_decl("while(false)"); + else + end_scope(); + } else - end_scope(); + flush_phi(block.self, block.next_block); if (block.need_ladder_break) { diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index c4396991e..dc693787c 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -711,6 +711,7 @@ protected: spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); + virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type); virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed); @@ -877,6 +878,7 @@ protected: bool requires_transpose_3x3 = false; bool requires_transpose_4x4 = false; bool ray_tracing_is_khr = false; + bool barycentric_is_nv = false; void ray_tracing_khr_fixup_locations(); bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index bae3e61bd..c5a37d2c1 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -645,9 +645,9 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() case BuiltInLayer: if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelGeometry) - SPIRV_CROSS_THROW("Render target index output is only supported in GS 5.0 or higher."); + SPIRV_CROSS_THROW("Render target array index output is only supported in GS 5.0 or higher."); type = "uint"; - semantic = "SV_RenderTargetIndex"; + semantic = "SV_RenderTargetArrayIndex"; break; default: @@ -797,9 +797,9 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() case BuiltInLayer: if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) - SPIRV_CROSS_THROW("Render target index input is only supported in PS 5.0 or higher."); + SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher."); type = "uint"; - semantic = "SV_RenderTargetIndex"; + semantic = "SV_RenderTargetArrayIndex"; break; default: @@ -3598,6 +3598,18 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i } return "spvPackFloat2x16"; } + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4."); + return "(" + type_to_glsl(out_type) + ")f32tof16"; + } + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4."); + return "(" + type_to_glsl(out_type) + ")f16tof32"; + } else return ""; } diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 22ab19006..efd29879d 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -1967,6 +1967,13 @@ void CompilerMSL::mark_packable_structs() mark_as_packable(type); } }); + + // Physical storage buffer pointers can appear outside of the context of a variable, if the address + // is calculated from a ulong or uvec2 and cast to a pointer, so check if they need to be packed too. + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + if (type.basetype == SPIRType::Struct && type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + mark_as_packable(type); + }); } // If the specified type is a struct, it and any nested structs @@ -1980,7 +1987,8 @@ void CompilerMSL::mark_as_packable(SPIRType &type) return; } - if (type.basetype == SPIRType::Struct) + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked)) { set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked); @@ -3175,6 +3183,9 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st return; } + if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR)) + SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL."); + // If variable names alias, they will end up with wrong names in the interface struct, because // there might be aliases in the member name cache and there would be a mismatch in fixup_in code. // Make sure to register the variables as unique resource names ahead of time. @@ -3458,7 +3469,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) bool builtin_is_stage_in_out = builtin_is_gl_in_out || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || - bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || + bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR || bi_type == BuiltInFragDepth || bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask; @@ -3515,7 +3526,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) } // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments. - if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV)) + if (is_active && (bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR)) { if (has_seen_barycentric) SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL."); @@ -4036,6 +4047,10 @@ uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t locat void CompilerMSL::mark_struct_members_packed(const SPIRType &type) { + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (has_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked)) + return; + set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked); // Problem case! Struct needs to be placed at an awkward alignment. @@ -4062,8 +4077,9 @@ void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type) uint32_t mbr_cnt = uint32_t(type.member_types.size()); for (uint32_t i = 0; i < mbr_cnt; i++) { + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. auto &mbr_type = get(type.member_types[i]); - if (mbr_type.basetype == SPIRType::Struct) + if (mbr_type.basetype == SPIRType::Struct && !(mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer)) { auto *struct_type = &mbr_type; while (!struct_type->array.empty()) @@ -4275,8 +4291,10 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule // that struct alignment == max alignment of all members and struct size depends on this alignment. + // Can't repack structs, but can repack pointers to structs. auto &mbr_type = get(ib_type.member_types[index]); - if (mbr_type.basetype == SPIRType::Struct) + bool is_buff_ptr = mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer; + if (mbr_type.basetype == SPIRType::Struct && !is_buff_ptr) SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct."); // Perform remapping here. @@ -4303,7 +4321,9 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in for (uint32_t dim = 0; dim < dimensions; dim++) array_stride /= max(to_array_size_literal(mbr_type, dim), 1u); - uint32_t elems_per_stride = array_stride / (mbr_type.width / 8); + // Pointers are 8 bytes + uint32_t mbr_width_in_bytes = is_buff_ptr ? 8 : (mbr_type.width / 8); + uint32_t elems_per_stride = array_stride / mbr_width_in_bytes; if (elems_per_stride == 3) SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); @@ -4313,6 +4333,17 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in auto physical_type = mbr_type; physical_type.vecsize = elems_per_stride; physical_type.parent_type = 0; + + // If this is a physical buffer pointer, replace type with a ulongn vector. + if (is_buff_ptr) + { + physical_type.width = 64; + physical_type.basetype = to_unsigned_basetype(physical_type.width); + physical_type.pointer = false; + physical_type.pointer_depth = false; + physical_type.forward_pointer = false; + } + uint32_t type_id = ir.increase_bound_by(1); set(type_id, physical_type); set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); @@ -6789,6 +6820,25 @@ void CompilerMSL::emit_specialization_constants_and_structs() // these types for purpose of iterating over them in ir.ids_for_type and friends. auto loop_lock = ir.create_loop_soft_lock(); + // Physical storage buffer pointers can have cyclical references, + // so emit forward declarations of them before other structs. + // Ignore type_id because we want the underlying struct type from the pointer. + ir.for_each_typed_id([&](uint32_t /* type_id */, const SPIRType &type) { + if (type.basetype == SPIRType::Struct && + type.pointer && type.storage == StorageClassPhysicalStorageBuffer && + declared_structs.count(type.self) == 0) + { + statement("struct ", to_name(type.self), ";"); + declared_structs.insert(type.self); + emitted = true; + } + }); + if (emitted) + statement(""); + + emitted = false; + declared_structs.clear(); + for (auto &id_ : ir.ids_for_constant_or_type) { auto &id = ir.ids[id_]; @@ -7675,6 +7725,23 @@ void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index); } + +// If the physical type of a physical buffer pointer has been changed +// to a ulong or ulongn vector, add a cast back to the pointer type. +void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) +{ + auto *p_physical_type = maybe_get(physical_type); + if (p_physical_type && + p_physical_type->storage == StorageClassPhysicalStorageBuffer && + p_physical_type->basetype == to_unsigned_basetype(64)) + { + if (p_physical_type->vecsize > 1) + expr += ".x"; + + expr = join("((", type_to_glsl(*type), ")", expr, ")"); + } +} + // Override for MSL-specific syntax instructions void CompilerMSL::emit_instruction(const Instruction &instruction) { @@ -11072,8 +11139,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInSampleId: case BuiltInSampleMask: case BuiltInLayer: - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: quals = builtin_qualifier(builtin); break; @@ -11089,7 +11156,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in else quals = member_location_attribute_qualifier(type, index); - if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV) + if (builtin == BuiltInBaryCoordKHR || builtin == BuiltInBaryCoordNoPerspKHR) { if (has_member_decoration(type.self, index, DecorationFlat) || has_member_decoration(type.self, index, DecorationCentroid) || @@ -11397,6 +11464,7 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo break; case StorageClassStorageBuffer: + case StorageClassPhysicalStorageBuffer: { // For arguments from variable pointers, we use the write count deduction, so // we should not assume any constness here. Only for global SSBOs. @@ -11555,8 +11623,8 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) // Fragment function in case BuiltInSamplePosition: case BuiltInHelperInvocation: - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: return false; case BuiltInViewIndex: return get_execution_model() == ExecutionModelFragment && msl_options.multiview && @@ -13525,7 +13593,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) const char *restrict_kw; auto type_address_space = get_type_address_space(type, id); - auto type_decl = type_to_glsl(get(type.parent_type), id); + const auto *p_parent_type = &get(type.parent_type); // Work around C pointer qualifier rules. If glsl_type is a pointer type as well // we'll need to emit the address space to the right. @@ -13533,9 +13601,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) // Prefer emitting thread T *foo over T thread* foo since it's more readable, // but we'll have to emit thread T * thread * T constant bar; for example. if (type_is_pointer_to_pointer(type)) - type_name = join(type_decl, " ", type_address_space, " "); + type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " "); else - type_name = join(type_address_space, " ", type_decl); + { + // Since this is not a pointer-to-pointer, ensure we've dug down to the base type. + // Some situations chain pointers even though they are not formally pointers-of-pointers. + while (type_is_pointer(*p_parent_type)) + p_parent_type = &get(p_parent_type->parent_type); + + type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id)); + } switch (type.basetype) { @@ -14320,18 +14395,31 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in // size (eg. short shift right becomes int), which means chaining integer ops // together may introduce size variations that SPIR-V doesn't know about. if (same_size_cast && !integral_cast) - { return "as_type<" + type_to_glsl(out_type) + ">"; - } else - { return type_to_glsl(out_type); - } } -bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) +bool CompilerMSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) { - return false; + auto &out_type = get(result_type); + auto &in_type = expression_type(op0); + bool uvec2_to_ptr = (in_type.basetype == SPIRType::UInt && in_type.vecsize == 2 && + out_type.pointer && out_type.storage == StorageClassPhysicalStorageBuffer); + bool ptr_to_uvec2 = (in_type.pointer && in_type.storage == StorageClassPhysicalStorageBuffer && + out_type.basetype == SPIRType::UInt && out_type.vecsize == 2); + string expr; + + // Casting between uvec2 and buffer storage pointer per GL_EXT_buffer_reference_uvec2 + if (uvec2_to_ptr) + expr = join("((", type_to_glsl(out_type), ")as_type(", to_unpacked_expression(op0), "))"); + else if (ptr_to_uvec2) + expr = join("as_type<", type_to_glsl(out_type), ">((uint64_t)", to_unpacked_expression(op0), ")"); + else + return false; + + emit_op(result_type, id, expr, should_forward(op0)); + return true; } // Returns an MSL string identifying the name of a SPIR-V builtin. @@ -14494,8 +14582,8 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; @@ -14732,16 +14820,14 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Shouldn't be reached. SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL."); - case BuiltInBaryCoordNV: - // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + case BuiltInBaryCoordKHR: if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); return "barycentric_coord, center_perspective"; - case BuiltInBaryCoordNoPerspNV: - // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + case BuiltInBaryCoordNoPerspKHR: if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) @@ -14831,8 +14917,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInHelperInvocation: return "bool"; - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: // Use the type as declared, can be 1, 2 or 3 components. return type_to_glsl(get_variable_data_type(get(id))); @@ -15006,6 +15092,25 @@ uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, // Returns the byte size of a struct member. uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const { + // Pointers take 8 bytes each + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + { + uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize); + + // Work our way through potentially layered arrays, + // stopping when we hit a pointer that is not also an array. + int32_t dim_idx = (int32_t)type.array.size() - 1; + auto *p_type = &type; + while (!type_is_pointer(*p_type) && dim_idx >= 0) + { + type_size *= to_array_size_literal(*p_type, dim_idx); + p_type = &get(p_type->parent_type); + dim_idx--; + } + + return type_size; + } + switch (type.basetype) { case SPIRType::Unknown: @@ -15065,6 +15170,10 @@ uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t // Returns the byte alignment of a type. uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const { + // Pointers aligns on multiples of 8 bytes + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + return 8 * (type.vecsize == 3 ? 4 : type.vecsize); + switch (type.basetype) { case SPIRType::Unknown: @@ -16404,6 +16513,20 @@ void CompilerMSL::emit_block_hints(const SPIRBlock &) string CompilerMSL::additional_fixed_sample_mask_str() const { char print_buffer[32]; +#ifdef _MSC_VER + // snprintf does not exist or is buggy on older MSVC versions, some of + // them being used by MinGW. Use sprintf instead and disable + // corresponding warning. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif +#if _WIN32 sprintf(print_buffer, "0x%x", msl_options.additional_fixed_sample_mask); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%x", msl_options.additional_fixed_sample_mask); +#endif +#ifdef _MSC_VER +#pragma warning(pop) +#endif return print_buffer; } diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index b1abd12af..c0317c7a0 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -986,6 +986,8 @@ protected: void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed) override; void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); + void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; + bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);