From 686190d7ff5957f1999a6bf84801df5943f02f95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Sun, 14 Sep 2025 09:07:27 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 1 + 3rdparty/spirv-cross/spirv.h | 126 ++- 3rdparty/spirv-cross/spirv.hpp | 130 ++- 3rdparty/spirv-cross/spirv_cfg.cpp | 124 ++- 3rdparty/spirv-cross/spirv_cfg.hpp | 26 +- 3rdparty/spirv-cross/spirv_common.hpp | 87 +- 3rdparty/spirv-cross/spirv_cross.cpp | 31 +- 3rdparty/spirv-cross/spirv_cross.hpp | 2 + 3rdparty/spirv-cross/spirv_cross_c.cpp | 8 + 3rdparty/spirv-cross/spirv_cross_c.h | 3 +- .../spirv-cross/spirv_cross_parsed_ir.cpp | 31 +- .../spirv-cross/spirv_cross_parsed_ir.hpp | 1 + 3rdparty/spirv-cross/spirv_glsl.cpp | 764 ++++++++++++++++-- 3rdparty/spirv-cross/spirv_glsl.hpp | 18 +- 3rdparty/spirv-cross/spirv_hlsl.cpp | 4 +- 3rdparty/spirv-cross/spirv_msl.cpp | 386 ++++++--- 3rdparty/spirv-cross/spirv_msl.hpp | 21 +- 3rdparty/spirv-cross/spirv_parser.cpp | 126 ++- 3rdparty/spirv-cross/spirv_reflect.cpp | 11 +- 19 files changed, 1606 insertions(+), 294 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index ea3ecf441..605901648 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -536,6 +536,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res print_resources(compiler, "push", res.push_constant_buffers); print_resources(compiler, "counters", res.atomic_counters); print_resources(compiler, "acceleration structures", res.acceleration_structures); + print_resources(compiler, "tensors", res.tensors); print_resources(compiler, "record buffers", res.shader_record_buffers); print_resources(compiler, spv::StorageClassInput, res.builtin_inputs); print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs); diff --git a/3rdparty/spirv-cross/spirv.h b/3rdparty/spirv-cross/spirv.h index 43dd311f0..005d451d3 100644 --- a/3rdparty/spirv-cross/spirv.h +++ b/3rdparty/spirv-cross/spirv.h @@ -1,27 +1,11 @@ /* -** Copyright (c) 2014-2024 The Khronos Group Inc. +** Copyright: 2014-2024 The Khronos Group Inc. +** License: MIT ** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +** KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +** SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +** https://www.khronos.org/registry/ */ /* @@ -176,6 +160,8 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeSignedZeroInfNanPreserve = 4461, SpvExecutionModeRoundingModeRTE = 4462, SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeNonCoherentTileAttachmentReadQCOM = 4489, + SpvExecutionModeTileShadingRateQCOM = 4490, SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017, SpvExecutionModeStencilRefReplacingEXT = 5027, SpvExecutionModeCoalescingAMDX = 5069, @@ -245,6 +231,7 @@ typedef enum SpvStorageClass_ { SpvStorageClassImage = 11, SpvStorageClassStorageBuffer = 12, SpvStorageClassTileImageEXT = 4172, + SpvStorageClassTileAttachmentQCOM = 4491, SpvStorageClassNodePayloadAMDX = 5068, SpvStorageClassCallableDataKHR = 5328, SpvStorageClassCallableDataNV = 5328, @@ -554,6 +541,7 @@ typedef enum SpvDecoration_ { SpvDecorationMaxByteOffset = 45, SpvDecorationAlignmentId = 46, SpvDecorationMaxByteOffsetId = 47, + SpvDecorationSaturatedToLargestFloat8NormalConversionEXT = 4216, SpvDecorationNoSignedWrap = 4469, SpvDecorationNoUnsignedWrap = 4470, SpvDecorationWeightTextureQCOM = 4487, @@ -723,6 +711,9 @@ typedef enum SpvBuiltIn_ { SpvBuiltInDeviceIndex = 4438, SpvBuiltInViewIndex = 4440, SpvBuiltInShadingRateKHR = 4444, + SpvBuiltInTileOffsetQCOM = 4492, + SpvBuiltInTileDimensionQCOM = 4493, + SpvBuiltInTileApronSizeQCOM = 4494, SpvBuiltInBaryCoordNoPerspAMD = 4992, SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, @@ -1073,7 +1064,13 @@ typedef enum SpvCapability_ { SpvCapabilityTileImageColorReadAccessEXT = 4166, SpvCapabilityTileImageDepthReadAccessEXT = 4167, SpvCapabilityTileImageStencilReadAccessEXT = 4168, + SpvCapabilityTensorsARM = 4174, + SpvCapabilityStorageTensorArrayDynamicIndexingARM = 4175, + SpvCapabilityStorageTensorArrayNonUniformIndexingARM = 4176, + SpvCapabilityGraphARM = 4191, SpvCapabilityCooperativeMatrixLayoutsARM = 4201, + SpvCapabilityFloat8EXT = 4212, + SpvCapabilityFloat8CooperativeMatrixEXT = 4213, SpvCapabilityFragmentShadingRateKHR = 4422, SpvCapabilitySubgroupBallotKHR = 4423, SpvCapabilityDrawParameters = 4427, @@ -1109,6 +1106,7 @@ typedef enum SpvCapability_ { SpvCapabilityTextureSampleWeightedQCOM = 4484, SpvCapabilityTextureBoxFilterQCOM = 4485, SpvCapabilityTextureBlockMatchQCOM = 4486, + SpvCapabilityTileShadingQCOM = 4495, SpvCapabilityTextureBlockMatch2QCOM = 4498, SpvCapabilityFloat16ImageAMD = 5008, SpvCapabilityImageGatherBiasLodAMD = 5009, @@ -1119,6 +1117,8 @@ typedef enum SpvCapability_ { SpvCapabilityShaderClockKHR = 5055, SpvCapabilityShaderEnqueueAMDX = 5067, SpvCapabilityQuadControlKHR = 5087, + SpvCapabilityInt4TypeINTEL = 5112, + SpvCapabilityInt4CooperativeMatrixINTEL = 5114, SpvCapabilityBFloat16TypeKHR = 5116, SpvCapabilityBFloat16DotProductKHR = 5117, SpvCapabilityBFloat16CooperativeMatrixKHR = 5118, @@ -1287,6 +1287,7 @@ typedef enum SpvCapability_ { SpvCapabilityMaskedGatherScatterINTEL = 6427, SpvCapabilityCacheControlsINTEL = 6441, SpvCapabilityRegisterLimitsINTEL = 6460, + SpvCapabilityBindlessImagesINTEL = 6528, SpvCapabilityMax = 0x7fffffff, } SpvCapability; @@ -1463,6 +1464,24 @@ typedef enum SpvTensorAddressingOperandsMask_ { SpvTensorAddressingOperandsDecodeFuncMask = 0x00000002, } SpvTensorAddressingOperandsMask; +typedef enum SpvTensorOperandsShift_ { + SpvTensorOperandsNontemporalARMShift = 0, + SpvTensorOperandsOutOfBoundsValueARMShift = 1, + SpvTensorOperandsMakeElementAvailableARMShift = 2, + SpvTensorOperandsMakeElementVisibleARMShift = 3, + SpvTensorOperandsNonPrivateElementARMShift = 4, + SpvTensorOperandsMax = 0x7fffffff, +} SpvTensorOperandsShift; + +typedef enum SpvTensorOperandsMask_ { + SpvTensorOperandsMaskNone = 0, + SpvTensorOperandsNontemporalARMMask = 0x00000001, + SpvTensorOperandsOutOfBoundsValueARMMask = 0x00000002, + SpvTensorOperandsMakeElementAvailableARMMask = 0x00000004, + SpvTensorOperandsMakeElementVisibleARMMask = 0x00000008, + SpvTensorOperandsNonPrivateElementARMMask = 0x00000010, +} SpvTensorOperandsMask; + typedef enum SpvInitializationModeQualifier_ { SpvInitializationModeQualifierInitOnDeviceReprogramINTEL = 0, SpvInitializationModeQualifierInitOnDeviceResetINTEL = 1, @@ -1549,6 +1568,8 @@ typedef enum SpvRawAccessChainOperandsMask_ { typedef enum SpvFPEncoding_ { SpvFPEncodingBFloat16KHR = 0, + SpvFPEncodingFloat8E4M3EXT = 4214, + SpvFPEncodingFloat8E5M2EXT = 4215, SpvFPEncodingMax = 0x7fffffff, } SpvFPEncoding; @@ -1927,6 +1948,17 @@ typedef enum SpvOp_ { SpvOpColorAttachmentReadEXT = 4160, SpvOpDepthAttachmentReadEXT = 4161, SpvOpStencilAttachmentReadEXT = 4162, + SpvOpTypeTensorARM = 4163, + SpvOpTensorReadARM = 4164, + SpvOpTensorWriteARM = 4165, + SpvOpTensorQuerySizeARM = 4166, + SpvOpGraphConstantARM = 4181, + SpvOpGraphEntryPointARM = 4182, + SpvOpGraphARM = 4183, + SpvOpGraphInputARM = 4184, + SpvOpGraphSetOutputARM = 4185, + SpvOpGraphEndARM = 4186, + SpvOpTypeGraphARM = 4190, SpvOpTerminateInvocation = 4416, SpvOpTypeUntypedPointerKHR = 4417, SpvOpUntypedVariableKHR = 4418, @@ -2385,6 +2417,9 @@ typedef enum SpvOp_ { SpvOpRoundFToTF32INTEL = 6426, SpvOpMaskedGatherINTEL = 6428, SpvOpMaskedScatterINTEL = 6429, + SpvOpConvertHandleToImageINTEL = 6529, + SpvOpConvertHandleToSamplerINTEL = 6530, + SpvOpConvertHandleToSampledImageINTEL = 6531, SpvOpMax = 0x7fffffff, } SpvOp; @@ -2743,6 +2778,17 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; case SpvOpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; case SpvOpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeTensorARM: *hasResult = true; *hasResultType = false; break; + case SpvOpTensorReadARM: *hasResult = true; *hasResultType = true; break; + case SpvOpTensorWriteARM: *hasResult = false; *hasResultType = false; break; + case SpvOpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break; + case SpvOpGraphConstantARM: *hasResult = true; *hasResultType = true; break; + case SpvOpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break; + case SpvOpGraphARM: *hasResult = true; *hasResultType = true; break; + case SpvOpGraphInputARM: *hasResult = true; *hasResultType = true; break; + case SpvOpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break; + case SpvOpGraphEndARM: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeGraphARM: *hasResult = true; *hasResultType = false; break; case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break; case SpvOpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break; case SpvOpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break; @@ -3190,6 +3236,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break; case SpvOpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break; } } inline const char* SpvSourceLanguageToString(SpvSourceLanguage value) { @@ -3305,6 +3354,8 @@ inline const char* SpvExecutionModeToString(SpvExecutionMode value) { case SpvExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve"; case SpvExecutionModeRoundingModeRTE: return "RoundingModeRTE"; case SpvExecutionModeRoundingModeRTZ: return "RoundingModeRTZ"; + case SpvExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM"; + case SpvExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM"; case SpvExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD"; case SpvExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT"; case SpvExecutionModeCoalescingAMDX: return "CoalescingAMDX"; @@ -3371,6 +3422,7 @@ inline const char* SpvStorageClassToString(SpvStorageClass value) { case SpvStorageClassImage: return "Image"; case SpvStorageClassStorageBuffer: return "StorageBuffer"; case SpvStorageClassTileImageEXT: return "TileImageEXT"; + case SpvStorageClassTileAttachmentQCOM: return "TileAttachmentQCOM"; case SpvStorageClassNodePayloadAMDX: return "NodePayloadAMDX"; case SpvStorageClassCallableDataKHR: return "CallableDataKHR"; case SpvStorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR"; @@ -3619,6 +3671,7 @@ inline const char* SpvDecorationToString(SpvDecoration value) { case SpvDecorationMaxByteOffset: return "MaxByteOffset"; case SpvDecorationAlignmentId: return "AlignmentId"; case SpvDecorationMaxByteOffsetId: return "MaxByteOffsetId"; + case SpvDecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT"; case SpvDecorationNoSignedWrap: return "NoSignedWrap"; case SpvDecorationNoUnsignedWrap: return "NoUnsignedWrap"; case SpvDecorationWeightTextureQCOM: return "WeightTextureQCOM"; @@ -3778,6 +3831,9 @@ inline const char* SpvBuiltInToString(SpvBuiltIn value) { case SpvBuiltInDeviceIndex: return "DeviceIndex"; case SpvBuiltInViewIndex: return "ViewIndex"; case SpvBuiltInShadingRateKHR: return "ShadingRateKHR"; + case SpvBuiltInTileOffsetQCOM: return "TileOffsetQCOM"; + case SpvBuiltInTileDimensionQCOM: return "TileDimensionQCOM"; + case SpvBuiltInTileApronSizeQCOM: return "TileApronSizeQCOM"; case SpvBuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD"; case SpvBuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD"; case SpvBuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD"; @@ -3958,7 +4014,13 @@ inline const char* SpvCapabilityToString(SpvCapability value) { case SpvCapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT"; case SpvCapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT"; case SpvCapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT"; + case SpvCapabilityTensorsARM: return "TensorsARM"; + case SpvCapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM"; + case SpvCapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM"; + case SpvCapabilityGraphARM: return "GraphARM"; case SpvCapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM"; + case SpvCapabilityFloat8EXT: return "Float8EXT"; + case SpvCapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT"; case SpvCapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR"; case SpvCapabilitySubgroupBallotKHR: return "SubgroupBallotKHR"; case SpvCapabilityDrawParameters: return "DrawParameters"; @@ -3992,6 +4054,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) { case SpvCapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM"; case SpvCapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM"; case SpvCapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM"; + case SpvCapabilityTileShadingQCOM: return "TileShadingQCOM"; case SpvCapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM"; case SpvCapabilityFloat16ImageAMD: return "Float16ImageAMD"; case SpvCapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD"; @@ -4002,6 +4065,8 @@ inline const char* SpvCapabilityToString(SpvCapability value) { case SpvCapabilityShaderClockKHR: return "ShaderClockKHR"; case SpvCapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX"; case SpvCapabilityQuadControlKHR: return "QuadControlKHR"; + case SpvCapabilityInt4TypeINTEL: return "Int4TypeINTEL"; + case SpvCapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL"; case SpvCapabilityBFloat16TypeKHR: return "BFloat16TypeKHR"; case SpvCapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR"; case SpvCapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR"; @@ -4144,6 +4209,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) { case SpvCapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL"; case SpvCapabilityCacheControlsINTEL: return "CacheControlsINTEL"; case SpvCapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL"; + case SpvCapabilityBindlessImagesINTEL: return "BindlessImagesINTEL"; default: return "Unknown"; } } @@ -4299,6 +4365,8 @@ inline const char* SpvNamedMaximumNumberOfRegistersToString(SpvNamedMaximumNumbe inline const char* SpvFPEncodingToString(SpvFPEncoding value) { switch (value) { case SpvFPEncodingBFloat16KHR: return "BFloat16KHR"; + case SpvFPEncodingFloat8E4M3EXT: return "Float8E4M3EXT"; + case SpvFPEncodingFloat8E5M2EXT: return "Float8E5M2EXT"; default: return "Unknown"; } } @@ -4683,6 +4751,17 @@ inline const char* SpvOpToString(SpvOp value) { case SpvOpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT"; case SpvOpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT"; case SpvOpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT"; + case SpvOpTypeTensorARM: return "OpTypeTensorARM"; + case SpvOpTensorReadARM: return "OpTensorReadARM"; + case SpvOpTensorWriteARM: return "OpTensorWriteARM"; + case SpvOpTensorQuerySizeARM: return "OpTensorQuerySizeARM"; + case SpvOpGraphConstantARM: return "OpGraphConstantARM"; + case SpvOpGraphEntryPointARM: return "OpGraphEntryPointARM"; + case SpvOpGraphARM: return "OpGraphARM"; + case SpvOpGraphInputARM: return "OpGraphInputARM"; + case SpvOpGraphSetOutputARM: return "OpGraphSetOutputARM"; + case SpvOpGraphEndARM: return "OpGraphEndARM"; + case SpvOpTypeGraphARM: return "OpTypeGraphARM"; case SpvOpTerminateInvocation: return "OpTerminateInvocation"; case SpvOpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR"; case SpvOpUntypedVariableKHR: return "OpUntypedVariableKHR"; @@ -5130,6 +5209,9 @@ inline const char* SpvOpToString(SpvOp value) { case SpvOpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL"; case SpvOpMaskedGatherINTEL: return "OpMaskedGatherINTEL"; case SpvOpMaskedScatterINTEL: return "OpMaskedScatterINTEL"; + case SpvOpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL"; + case SpvOpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL"; + case SpvOpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL"; default: return "Unknown"; } } diff --git a/3rdparty/spirv-cross/spirv.hpp b/3rdparty/spirv-cross/spirv.hpp index 5fbba32bf..f7a7bf835 100644 --- a/3rdparty/spirv-cross/spirv.hpp +++ b/3rdparty/spirv-cross/spirv.hpp @@ -1,26 +1,10 @@ -// Copyright (c) 2014-2024 The Khronos Group Inc. +// Copyright: 2014-2024 The Khronos Group Inc. +// License: MIT // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +// https://www.khronos.org/registry/ // This header is automatically generated by the same tool that creates // the Binary Section of the SPIR-V specification. @@ -172,6 +156,8 @@ enum ExecutionMode { ExecutionModeSignedZeroInfNanPreserve = 4461, ExecutionModeRoundingModeRTE = 4462, ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489, + ExecutionModeTileShadingRateQCOM = 4490, ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, ExecutionModeStencilRefReplacingEXT = 5027, ExecutionModeCoalescingAMDX = 5069, @@ -241,6 +227,7 @@ enum StorageClass { StorageClassImage = 11, StorageClassStorageBuffer = 12, StorageClassTileImageEXT = 4172, + StorageClassTileAttachmentQCOM = 4491, StorageClassNodePayloadAMDX = 5068, StorageClassCallableDataKHR = 5328, StorageClassCallableDataNV = 5328, @@ -550,6 +537,7 @@ enum Decoration { DecorationMaxByteOffset = 45, DecorationAlignmentId = 46, DecorationMaxByteOffsetId = 47, + DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216, DecorationNoSignedWrap = 4469, DecorationNoUnsignedWrap = 4470, DecorationWeightTextureQCOM = 4487, @@ -719,6 +707,9 @@ enum BuiltIn { BuiltInDeviceIndex = 4438, BuiltInViewIndex = 4440, BuiltInShadingRateKHR = 4444, + BuiltInTileOffsetQCOM = 4492, + BuiltInTileDimensionQCOM = 4493, + BuiltInTileApronSizeQCOM = 4494, BuiltInBaryCoordNoPerspAMD = 4992, BuiltInBaryCoordNoPerspCentroidAMD = 4993, BuiltInBaryCoordNoPerspSampleAMD = 4994, @@ -1069,7 +1060,13 @@ enum Capability { CapabilityTileImageColorReadAccessEXT = 4166, CapabilityTileImageDepthReadAccessEXT = 4167, CapabilityTileImageStencilReadAccessEXT = 4168, + CapabilityTensorsARM = 4174, + CapabilityStorageTensorArrayDynamicIndexingARM = 4175, + CapabilityStorageTensorArrayNonUniformIndexingARM = 4176, + CapabilityGraphARM = 4191, CapabilityCooperativeMatrixLayoutsARM = 4201, + CapabilityFloat8EXT = 4212, + CapabilityFloat8CooperativeMatrixEXT = 4213, CapabilityFragmentShadingRateKHR = 4422, CapabilitySubgroupBallotKHR = 4423, CapabilityDrawParameters = 4427, @@ -1105,6 +1102,7 @@ enum Capability { CapabilityTextureSampleWeightedQCOM = 4484, CapabilityTextureBoxFilterQCOM = 4485, CapabilityTextureBlockMatchQCOM = 4486, + CapabilityTileShadingQCOM = 4495, CapabilityTextureBlockMatch2QCOM = 4498, CapabilityFloat16ImageAMD = 5008, CapabilityImageGatherBiasLodAMD = 5009, @@ -1115,6 +1113,8 @@ enum Capability { CapabilityShaderClockKHR = 5055, CapabilityShaderEnqueueAMDX = 5067, CapabilityQuadControlKHR = 5087, + CapabilityInt4TypeINTEL = 5112, + CapabilityInt4CooperativeMatrixINTEL = 5114, CapabilityBFloat16TypeKHR = 5116, CapabilityBFloat16DotProductKHR = 5117, CapabilityBFloat16CooperativeMatrixKHR = 5118, @@ -1283,6 +1283,7 @@ enum Capability { CapabilityMaskedGatherScatterINTEL = 6427, CapabilityCacheControlsINTEL = 6441, CapabilityRegisterLimitsINTEL = 6460, + CapabilityBindlessImagesINTEL = 6528, CapabilityMax = 0x7fffffff, }; @@ -1459,6 +1460,24 @@ enum TensorAddressingOperandsMask { TensorAddressingOperandsDecodeFuncMask = 0x00000002, }; +enum TensorOperandsShift { + TensorOperandsNontemporalARMShift = 0, + TensorOperandsOutOfBoundsValueARMShift = 1, + TensorOperandsMakeElementAvailableARMShift = 2, + TensorOperandsMakeElementVisibleARMShift = 3, + TensorOperandsNonPrivateElementARMShift = 4, + TensorOperandsMax = 0x7fffffff, +}; + +enum TensorOperandsMask { + TensorOperandsMaskNone = 0, + TensorOperandsNontemporalARMMask = 0x00000001, + TensorOperandsOutOfBoundsValueARMMask = 0x00000002, + TensorOperandsMakeElementAvailableARMMask = 0x00000004, + TensorOperandsMakeElementVisibleARMMask = 0x00000008, + TensorOperandsNonPrivateElementARMMask = 0x00000010, +}; + enum InitializationModeQualifier { InitializationModeQualifierInitOnDeviceReprogramINTEL = 0, InitializationModeQualifierInitOnDeviceResetINTEL = 1, @@ -1545,6 +1564,8 @@ enum RawAccessChainOperandsMask { enum FPEncoding { FPEncodingBFloat16KHR = 0, + FPEncodingFloat8E4M3EXT = 4214, + FPEncodingFloat8E5M2EXT = 4215, FPEncodingMax = 0x7fffffff, }; @@ -1923,6 +1944,17 @@ enum Op { OpColorAttachmentReadEXT = 4160, OpDepthAttachmentReadEXT = 4161, OpStencilAttachmentReadEXT = 4162, + OpTypeTensorARM = 4163, + OpTensorReadARM = 4164, + OpTensorWriteARM = 4165, + OpTensorQuerySizeARM = 4166, + OpGraphConstantARM = 4181, + OpGraphEntryPointARM = 4182, + OpGraphARM = 4183, + OpGraphInputARM = 4184, + OpGraphSetOutputARM = 4185, + OpGraphEndARM = 4186, + OpTypeGraphARM = 4190, OpTerminateInvocation = 4416, OpTypeUntypedPointerKHR = 4417, OpUntypedVariableKHR = 4418, @@ -2381,6 +2413,9 @@ enum Op { OpRoundFToTF32INTEL = 6426, OpMaskedGatherINTEL = 6428, OpMaskedScatterINTEL = 6429, + OpConvertHandleToImageINTEL = 6529, + OpConvertHandleToSamplerINTEL = 6530, + OpConvertHandleToSampledImageINTEL = 6531, OpMax = 0x7fffffff, }; @@ -2739,6 +2774,17 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break; + case OpTensorReadARM: *hasResult = true; *hasResultType = true; break; + case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break; + case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break; + case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break; + case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break; + case OpGraphARM: *hasResult = true; *hasResultType = true; break; + case OpGraphInputARM: *hasResult = true; *hasResultType = true; break; + case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break; + case OpGraphEndARM: *hasResult = false; *hasResultType = false; break; + case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break; case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break; case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break; @@ -3186,6 +3232,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break; case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break; case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break; + case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break; + case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break; } } inline const char* SourceLanguageToString(SourceLanguage value) { @@ -3301,6 +3350,8 @@ inline const char* ExecutionModeToString(ExecutionMode value) { case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve"; case ExecutionModeRoundingModeRTE: return "RoundingModeRTE"; case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ"; + case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM"; + case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM"; case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD"; case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT"; case ExecutionModeCoalescingAMDX: return "CoalescingAMDX"; @@ -3367,6 +3418,7 @@ inline const char* StorageClassToString(StorageClass value) { case StorageClassImage: return "Image"; case StorageClassStorageBuffer: return "StorageBuffer"; case StorageClassTileImageEXT: return "TileImageEXT"; + case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM"; case StorageClassNodePayloadAMDX: return "NodePayloadAMDX"; case StorageClassCallableDataKHR: return "CallableDataKHR"; case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR"; @@ -3615,6 +3667,7 @@ inline const char* DecorationToString(Decoration value) { case DecorationMaxByteOffset: return "MaxByteOffset"; case DecorationAlignmentId: return "AlignmentId"; case DecorationMaxByteOffsetId: return "MaxByteOffsetId"; + case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT"; case DecorationNoSignedWrap: return "NoSignedWrap"; case DecorationNoUnsignedWrap: return "NoUnsignedWrap"; case DecorationWeightTextureQCOM: return "WeightTextureQCOM"; @@ -3774,6 +3827,9 @@ inline const char* BuiltInToString(BuiltIn value) { case BuiltInDeviceIndex: return "DeviceIndex"; case BuiltInViewIndex: return "ViewIndex"; case BuiltInShadingRateKHR: return "ShadingRateKHR"; + case BuiltInTileOffsetQCOM: return "TileOffsetQCOM"; + case BuiltInTileDimensionQCOM: return "TileDimensionQCOM"; + case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM"; case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD"; case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD"; case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD"; @@ -3954,7 +4010,13 @@ inline const char* CapabilityToString(Capability value) { case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT"; case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT"; case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT"; + case CapabilityTensorsARM: return "TensorsARM"; + case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM"; + case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM"; + case CapabilityGraphARM: return "GraphARM"; case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM"; + case CapabilityFloat8EXT: return "Float8EXT"; + case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT"; case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR"; case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR"; case CapabilityDrawParameters: return "DrawParameters"; @@ -3988,6 +4050,7 @@ inline const char* CapabilityToString(Capability value) { case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM"; case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM"; case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM"; + case CapabilityTileShadingQCOM: return "TileShadingQCOM"; case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM"; case CapabilityFloat16ImageAMD: return "Float16ImageAMD"; case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD"; @@ -3998,6 +4061,8 @@ inline const char* CapabilityToString(Capability value) { case CapabilityShaderClockKHR: return "ShaderClockKHR"; case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX"; case CapabilityQuadControlKHR: return "QuadControlKHR"; + case CapabilityInt4TypeINTEL: return "Int4TypeINTEL"; + case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL"; case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR"; case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR"; case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR"; @@ -4140,6 +4205,7 @@ inline const char* CapabilityToString(Capability value) { case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL"; case CapabilityCacheControlsINTEL: return "CacheControlsINTEL"; case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL"; + case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL"; default: return "Unknown"; } } @@ -4295,6 +4361,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg inline const char* FPEncodingToString(FPEncoding value) { switch (value) { case FPEncodingBFloat16KHR: return "BFloat16KHR"; + case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT"; + case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT"; default: return "Unknown"; } } @@ -4679,6 +4747,17 @@ inline const char* OpToString(Op value) { case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT"; case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT"; case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT"; + case OpTypeTensorARM: return "OpTypeTensorARM"; + case OpTensorReadARM: return "OpTensorReadARM"; + case OpTensorWriteARM: return "OpTensorWriteARM"; + case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM"; + case OpGraphConstantARM: return "OpGraphConstantARM"; + case OpGraphEntryPointARM: return "OpGraphEntryPointARM"; + case OpGraphARM: return "OpGraphARM"; + case OpGraphInputARM: return "OpGraphInputARM"; + case OpGraphSetOutputARM: return "OpGraphSetOutputARM"; + case OpGraphEndARM: return "OpGraphEndARM"; + case OpTypeGraphARM: return "OpTypeGraphARM"; case OpTerminateInvocation: return "OpTerminateInvocation"; case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR"; case OpUntypedVariableKHR: return "OpUntypedVariableKHR"; @@ -5126,6 +5205,9 @@ inline const char* OpToString(Op value) { case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL"; case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL"; case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL"; + case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL"; + case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL"; + case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL"; default: return "Unknown"; } } @@ -5186,6 +5268,10 @@ inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, Te inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); } inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); } inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); } +inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); } +inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); } +inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); } +inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); } inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); } inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); } inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); } diff --git a/3rdparty/spirv-cross/spirv_cfg.cpp b/3rdparty/spirv-cross/spirv_cfg.cpp index 932994798..c68886d98 100644 --- a/3rdparty/spirv-cross/spirv_cfg.cpp +++ b/3rdparty/spirv-cross/spirv_cfg.cpp @@ -81,31 +81,105 @@ bool CFG::is_back_edge(uint32_t to) const // We have a back edge if the visit order is set with the temporary magic value 0. // Crossing edges will have already been recorded with a visit order. auto itr = visit_order.find(to); - return itr != end(visit_order) && itr->second.get() == 0; + return itr != end(visit_order) && itr->second.visited_branches && !itr->second.visited_resolve; } -bool CFG::has_visited_forward_edge(uint32_t to) const +bool CFG::has_visited_branch(uint32_t to) const { - // If > 0, we have visited the edge already, and this is not a back edge branch. auto itr = visit_order.find(to); - return itr != end(visit_order) && itr->second.get() > 0; + return itr != end(visit_order) && itr->second.visited_branches; } -bool CFG::post_order_visit(uint32_t block_id) +void CFG::post_order_visit_entry(uint32_t block) { - // If we have already branched to this block (back edge), stop recursion. - // If our branches are back-edges, we do not record them. - // We have to record crossing edges however. - if (has_visited_forward_edge(block_id)) - return true; - else if (is_back_edge(block_id)) - return false; + visit_stack.push_back(block); - // Block back-edges from recursively revisiting ourselves. - visit_order[block_id].get() = 0; + while (!visit_stack.empty()) + { + bool keep_iterating; + do + { + // Reverse the order to allow for stack-like behavior and preserves the visit order from recursive algorithm. + // Traverse depth first. + uint32_t to_visit = visit_stack.back(); + last_visited_size = visit_stack.size(); + post_order_visit_branches(to_visit); + keep_iterating = last_visited_size != visit_stack.size(); + if (keep_iterating) + std::reverse(visit_stack.begin() + last_visited_size, visit_stack.end()); + } while (keep_iterating); + // We've reached the end of some tree leaf. Resolve the stack. + // Any node which has been visited for real can be popped now. + while (!visit_stack.empty() && visit_order[visit_stack.back()].visited_branches) + { + post_order_visit_resolve(visit_stack.back()); + visit_stack.pop_back(); + } + } +} + +void CFG::visit_branch(uint32_t block_id) +{ + // Prune obvious duplicates. + if (std::find(visit_stack.begin() + last_visited_size, visit_stack.end(), block_id) == visit_stack.end() && + !has_visited_branch(block_id)) + { + visit_stack.push_back(block_id); + } +} + +void CFG::post_order_visit_branches(uint32_t block_id) +{ auto &block = compiler.get(block_id); + auto &visit = visit_order[block_id]; + if (visit.visited_branches) + return; + visit.visited_branches = true; + + if (block.merge == SPIRBlock::MergeLoop) + visit_branch(block.merge_block); + else if (block.merge == SPIRBlock::MergeSelection) + visit_branch(block.next_block); + + // First visit our branch targets. + switch (block.terminator) + { + case SPIRBlock::Direct: + visit_branch(block.next_block); + break; + + case SPIRBlock::Select: + visit_branch(block.true_block); + visit_branch(block.false_block); + break; + + case SPIRBlock::MultiSelect: + { + const auto &cases = compiler.get_case_list(block); + for (const auto &target : cases) + visit_branch(target.block); + if (block.default_block) + visit_branch(block.default_block); + break; + } + + default: + break; + } +} + +void CFG::post_order_visit_resolve(uint32_t block_id) +{ + auto &block = compiler.get(block_id); + + auto &visit_block = visit_order[block_id]; + assert(visit_block.visited_branches); + auto &visited = visit_order[block_id].visited_resolve; + if (visited) + return; + // If this is a loop header, add an implied branch to the merge target. // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. @@ -116,21 +190,21 @@ bool CFG::post_order_visit(uint32_t block_id) // is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis. // For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine, // but for loops, only the header might end up actually branching to merge block. - if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) + if (block.merge == SPIRBlock::MergeLoop && !is_back_edge(block.merge_block)) add_branch(block_id, block.merge_block); // First visit our branch targets. switch (block.terminator) { case SPIRBlock::Direct: - if (post_order_visit(block.next_block)) + if (!is_back_edge(block.next_block)) add_branch(block_id, block.next_block); break; case SPIRBlock::Select: - if (post_order_visit(block.true_block)) + if (!is_back_edge(block.true_block)) add_branch(block_id, block.true_block); - if (post_order_visit(block.false_block)) + if (!is_back_edge(block.false_block)) add_branch(block_id, block.false_block); break; @@ -139,10 +213,10 @@ bool CFG::post_order_visit(uint32_t block_id) const auto &cases = compiler.get_case_list(block); for (const auto &target : cases) { - if (post_order_visit(target.block)) + if (!is_back_edge(target.block)) add_branch(block_id, target.block); } - if (block.default_block && post_order_visit(block.default_block)) + if (block.default_block && !is_back_edge(block.default_block)) add_branch(block_id, block.default_block); break; } @@ -157,7 +231,7 @@ bool CFG::post_order_visit(uint32_t block_id) // We can use the variable without a Phi since there is only one possible parent here. // However, in this case, we need to hoist out the inner variable to outside the branch. // Use same strategy as loops. - if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block)) + if (block.merge == SPIRBlock::MergeSelection && !is_back_edge(block.next_block)) { // If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup. // Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement @@ -201,10 +275,9 @@ bool CFG::post_order_visit(uint32_t block_id) } } - // Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges. - visit_order[block_id].get() = ++visit_count; + visited = true; + visit_block.order = ++visit_count; post_order.push_back(block_id); - return true; } void CFG::build_post_order_visit_order() @@ -213,11 +286,12 @@ void CFG::build_post_order_visit_order() visit_count = 0; visit_order.clear(); post_order.clear(); - post_order_visit(block); + post_order_visit_entry(block); } void CFG::add_branch(uint32_t from, uint32_t to) { + assert(from && to); const auto add_unique = [](SmallVector &l, uint32_t value) { auto itr = find(begin(l), end(l), value); if (itr == end(l)) diff --git a/3rdparty/spirv-cross/spirv_cfg.hpp b/3rdparty/spirv-cross/spirv_cfg.hpp index 1d85fe0a9..1c21ea070 100644 --- a/3rdparty/spirv-cross/spirv_cfg.hpp +++ b/3rdparty/spirv-cross/spirv_cfg.hpp @@ -68,7 +68,7 @@ public: { auto itr = visit_order.find(block); assert(itr != std::end(visit_order)); - int v = itr->second.get(); + int v = itr->second.order; assert(v > 0); return uint32_t(v); } @@ -114,17 +114,9 @@ public: private: struct VisitOrder { - int &get() - { - return v; - } - - const int &get() const - { - return v; - } - - int v = -1; + int order = -1; + bool visited_resolve = false; + bool visited_branches = false; }; Compiler &compiler; @@ -139,11 +131,17 @@ private: void add_branch(uint32_t from, uint32_t to); void build_post_order_visit_order(); void build_immediate_dominators(); - bool post_order_visit(uint32_t block); + void post_order_visit_branches(uint32_t block); + void post_order_visit_resolve(uint32_t block); + void post_order_visit_entry(uint32_t block); uint32_t visit_count = 0; bool is_back_edge(uint32_t to) const; - bool has_visited_forward_edge(uint32_t to) const; + bool has_visited_branch(uint32_t to) const; + void visit_branch(uint32_t block_id); + + SmallVector visit_stack; + size_t last_visited_size = 0; }; class DominatorBuilder diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index a4778c29b..854efe5eb 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -574,6 +574,7 @@ struct SPIRType : IVariant Sampler, AccelerationStructure, RayQuery, + CoopVecNV, // Keep internal types at the end. ControlPointArray, @@ -581,7 +582,11 @@ struct SPIRType : IVariant Char, // MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader. MeshGridProperties, - BFloat16 + BFloat16, + FloatE4M3, + FloatE5M2, + + Tensor }; // Scalar/vector/matrix support. @@ -606,13 +611,29 @@ struct SPIRType : IVariant bool pointer = false; bool forward_pointer = false; - struct + union { - uint32_t use_id = 0; - uint32_t rows_id = 0; - uint32_t columns_id = 0; - uint32_t scope_id = 0; - } cooperative; + struct + { + uint32_t use_id; + uint32_t rows_id; + uint32_t columns_id; + uint32_t scope_id; + } cooperative; + + struct + { + uint32_t component_type_id; + uint32_t component_count_id; + } coopVecNV; + + struct + { + uint32_t type; + uint32_t rank; + uint32_t shape; + } tensor; + } ext; spv::StorageClass storage = spv::StorageClassGeneric; @@ -670,6 +691,12 @@ struct SPIRExtension : IVariant NonSemanticGeneric }; + enum ShaderDebugInfoOps + { + DebugLine = 103, + DebugSource = 35 + }; + explicit SPIRExtension(Extension ext_) : ext(ext_) { @@ -695,6 +722,11 @@ struct SPIREntryPoint FunctionID self = 0; std::string name; std::string orig_name; + std::unordered_map fp_fast_math_defaults; + bool signed_zero_inf_nan_preserve_8 = false; + bool signed_zero_inf_nan_preserve_16 = false; + bool signed_zero_inf_nan_preserve_32 = false; + bool signed_zero_inf_nan_preserve_64 = false; SmallVector interface_variables; Bitset flags; @@ -927,6 +959,7 @@ struct SPIRBlock : IVariant // All access to these variables are dominated by this block, // so before branching anywhere we need to make sure that we declare these variables. SmallVector dominated_variables; + SmallVector rearm_dominated_variables; // These are variables which should be declared in a for loop header, if we // fail to use a classic for-loop, @@ -1238,6 +1271,26 @@ struct SPIRConstant : IVariant return u.f32; } + static inline float fe4m3_to_f32(uint8_t v) + { + if ((v & 0x7f) == 0x7f) + { + union + { + float f32; + uint32_t u32; + } u; + + u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu; + return u.f32; + } + else + { + // Reuse the FP16 to FP32 code. Cute bit-hackery. + return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f; + } + } + inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const { return m.c[col].id[row]; @@ -1286,6 +1339,16 @@ struct SPIRConstant : IVariant return fp32; } + inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const + { + return fe4m3_to_f32(scalar_u8(col, row)); + } + + inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const + { + return f16_to_f32(scalar_u8(col, row) << 8); + } + inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const { return m.c[col].r[row].f32; @@ -1356,9 +1419,10 @@ struct SPIRConstant : IVariant SPIRConstant() = default; - SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false) : constant_type(constant_type_) , specialization(specialized) + , replicated(replicated_) { subconstants.reserve(num_elements); for (uint32_t i = 0; i < num_elements; i++) @@ -1437,6 +1501,9 @@ struct SPIRConstant : IVariant // For composites which are constant arrays, etc. SmallVector subconstants; + // Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT) + bool replicated = false; + // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, // and uses them to initialize the constant. This allows the user // to still be able to specialize the value by supplying corresponding @@ -1732,6 +1799,7 @@ struct Meta uint32_t spec_id = 0; uint32_t index = 0; spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax; + spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone; bool builtin = false; bool qualified_alias_explicit_override = false; @@ -1787,7 +1855,8 @@ private: static inline bool type_is_floating_point(const SPIRType &type) { - return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double; + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double || + type.basetype == SPIRType::BFloat16 || type.basetype == SPIRType::FloatE5M2 || type.basetype == SPIRType::FloatE4M3; } static inline bool type_is_integral(const SPIRType &type) diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 8aa4e5e70..4c1d39d98 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -280,6 +280,9 @@ bool Compiler::block_is_pure(const SPIRBlock &block) // This is a global side effect of the function. return false; + case OpTensorReadARM: + return false; + case OpExtInst: { uint32_t extension_set = ops[2]; @@ -373,6 +376,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_ case OpLoad: case OpCooperativeMatrixLoadKHR: + case OpCooperativeVectorLoadNV: case OpImageRead: { // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. @@ -1152,6 +1156,11 @@ ShaderResources Compiler::get_shader_resources(const unordered_set * { res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } + // Tensors + else if (type.basetype == SPIRType::Tensor) + { + res.tensors.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } else { res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -1169,11 +1178,8 @@ bool Compiler::type_is_top_level_block(const SPIRType &type) const return has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); } -bool Compiler::type_is_block_like(const SPIRType &type) const +bool Compiler::type_is_explicit_layout(const SPIRType &type) const { - if (type_is_top_level_block(type)) - return true; - if (type.basetype == SPIRType::Struct) { // Block-like types may have Offset decorations. @@ -1185,6 +1191,14 @@ bool Compiler::type_is_block_like(const SPIRType &type) const return false; } +bool Compiler::type_is_block_like(const SPIRType &type) const +{ + if (type_is_top_level_block(type)) + return true; + else + return type_is_explicit_layout(type); +} + void Compiler::parse_fixup() { // Figure out specialization constants for work group sizes. @@ -2370,6 +2384,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar execution.output_primitives = arg0; break; + case ExecutionModeFPFastMathDefault: + execution.fp_fast_math_defaults[arg0] = arg1; + break; + default: break; } @@ -4334,6 +4352,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint case OpCopyObject: case OpLoad: + case OpCooperativeVectorLoadNV: case OpCooperativeMatrixLoadKHR: if (ops[2] == var) return true; @@ -5151,7 +5170,7 @@ bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const bool Compiler::type_is_opaque_value(const SPIRType &type) const { return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image || - type.basetype == SPIRType::Sampler); + type.basetype == SPIRType::Sampler || type.basetype == SPIRType::Tensor); } // Make these member functions so we can easily break on any force_recompile events. @@ -5469,6 +5488,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_ { case OpLoad: case OpCooperativeMatrixLoadKHR: + case OpCooperativeVectorLoadNV: { if (length < 3) return false; @@ -5547,6 +5567,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_ case OpImageWrite: case OpAtomicStore: case OpCooperativeMatrixStoreKHR: + case OpCooperativeVectorStoreNV: { if (length < 1) return false; diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index b65b5ac77..65e4bedfa 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -95,6 +95,7 @@ struct ShaderResources SmallVector atomic_counters; SmallVector acceleration_structures; SmallVector gl_plain_uniforms; + SmallVector tensors; // There can only be one push constant block, // but keep the vector in case this restriction is lifted in the future. @@ -1171,6 +1172,7 @@ protected: bool type_contains_recursion(const SPIRType &type); bool type_is_array_of_pointers(const SPIRType &type) const; bool type_is_block_like(const SPIRType &type) const; + bool type_is_explicit_layout(const SPIRType &type) const; bool type_is_top_level_block(const SPIRType &type) const; bool type_is_opaque_value(const SPIRType &type) const; diff --git a/3rdparty/spirv-cross/spirv_cross_c.cpp b/3rdparty/spirv-cross/spirv_cross_c.cpp index 6827f6135..8a4a49279 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.cpp +++ b/3rdparty/spirv-cross/spirv_cross_c.cpp @@ -200,6 +200,7 @@ struct spvc_resources_s : ScratchMemoryAllocation SmallVector separate_samplers; SmallVector acceleration_structures; SmallVector gl_plain_uniforms; + SmallVector tensors; SmallVector builtin_inputs; SmallVector builtin_outputs; @@ -1872,6 +1873,8 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources) return false; if (!copy_resources(gl_plain_uniforms, resources.gl_plain_uniforms)) return false; + if (!copy_resources(tensors, resources.tensors)) + return false; if (!copy_resources(builtin_inputs, resources.builtin_inputs)) return false; if (!copy_resources(builtin_outputs, resources.builtin_outputs)) @@ -2025,6 +2028,11 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, case SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM: list = &resources->gl_plain_uniforms; + break; + + case SPVC_RESOURCE_TYPE_TENSOR: + list = &resources->tensors; + break; default: break; diff --git a/3rdparty/spirv-cross/spirv_cross_c.h b/3rdparty/spirv-cross/spirv_cross_c.h index 4eab9225f..f360711e8 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.h +++ b/3rdparty/spirv-cross/spirv_cross_c.h @@ -40,7 +40,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 66 +#define SPVC_C_API_VERSION_MINOR 67 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -227,6 +227,7 @@ typedef enum spvc_resource_type SPVC_RESOURCE_TYPE_RAY_QUERY = 13, SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14, SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15, + SPVC_RESOURCE_TYPE_TENSOR = 16, SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff } spvc_resource_type; diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp index 397e40f4d..1dcd24e9f 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) dec.fp_rounding_mode = static_cast(argument); break; + case DecorationFPFastMathMode: + dec.fp_fast_math_mode = static_cast(argument); + break; + default: break; } @@ -523,8 +527,27 @@ void ParsedIR::mark_used_as_array_length(ID id) switch (ids[id].get_type()) { case TypeConstant: - get(id).is_used_as_array_length = true; + { + auto &c = get(id); + c.is_used_as_array_length = true; + + // Mark composite dependencies as well. + for (auto &sub_id: c.m.id) + if (sub_id) + mark_used_as_array_length(sub_id); + + for (uint32_t col = 0; col < c.m.columns; col++) + { + for (auto &sub_id : c.m.c[col].id) + if (sub_id) + mark_used_as_array_length(sub_id); + } + + for (auto &sub_id : c.subconstants) + if (sub_id) + mark_used_as_array_length(sub_id); break; + } case TypeConstantOp: { @@ -643,6 +666,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const return dec.index; case DecorationFPRoundingMode: return dec.fp_rounding_mode; + case DecorationFPFastMathMode: + return dec.fp_fast_math_mode; default: return 1; } @@ -730,6 +755,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration) dec.fp_rounding_mode = FPRoundingModeMax; break; + case DecorationFPFastMathMode: + dec.fp_fast_math_mode = FPFastMathModeMaskNone; + break; + case DecorationHlslCounterBufferGOOGLE: { auto &counter = meta[id].hlsl_magic_counter_buffer; diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp index 3892248aa..8c30ef819 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp @@ -111,6 +111,7 @@ public: struct Source { + spv::SourceLanguage lang = spv::SourceLanguageUnknown; uint32_t version = 0; bool es = false; bool known = false; diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index ca9d0309d..5392bd796 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -41,6 +41,9 @@ using namespace spv; using namespace SPIRV_CROSS_NAMESPACE; using namespace std; +namespace SPIRV_CROSS_NAMESPACE +{ + enum ExtraSubExpressionType { // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map. @@ -48,6 +51,46 @@ enum ExtraSubExpressionType EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000 }; +struct GlslConstantNameMapping +{ + uint32_t value; + const char *alias; +}; + +#define DEF_GLSL_MAPPING(x) { x, "gl_" #x } +#define DEF_GLSL_MAPPING_EXT(x) { x##KHR, "gl_" #x } +static const GlslConstantNameMapping CoopVecComponentTypeNames[] = { + DEF_GLSL_MAPPING(ComponentTypeFloat16NV), + DEF_GLSL_MAPPING(ComponentTypeFloat32NV), + DEF_GLSL_MAPPING(ComponentTypeFloat64NV), + DEF_GLSL_MAPPING(ComponentTypeSignedInt8NV), + DEF_GLSL_MAPPING(ComponentTypeSignedInt16NV), + DEF_GLSL_MAPPING(ComponentTypeSignedInt32NV), + DEF_GLSL_MAPPING(ComponentTypeSignedInt64NV), + DEF_GLSL_MAPPING(ComponentTypeUnsignedInt8NV), + DEF_GLSL_MAPPING(ComponentTypeUnsignedInt16NV), + DEF_GLSL_MAPPING(ComponentTypeUnsignedInt32NV), + DEF_GLSL_MAPPING(ComponentTypeUnsignedInt64NV), + DEF_GLSL_MAPPING(ComponentTypeSignedInt8PackedNV), + DEF_GLSL_MAPPING(ComponentTypeUnsignedInt8PackedNV), + DEF_GLSL_MAPPING(ComponentTypeFloatE4M3NV), + DEF_GLSL_MAPPING(ComponentTypeFloatE5M2NV), +}; + +static const GlslConstantNameMapping CoopVecMatrixLayoutNames[] = { + DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutRowMajorNV), + DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutColumnMajorNV), + DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutInferencingOptimalNV), + DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutTrainingOptimalNV), +}; + +static const GlslConstantNameMapping CoopMatMatrixLayoutNames[] = { + DEF_GLSL_MAPPING_EXT(CooperativeMatrixLayoutRowMajor), + DEF_GLSL_MAPPING_EXT(CooperativeMatrixLayoutColumnMajor), +}; +#undef DEF_GLSL_MAPPING +#undef DEF_GLSL_MAPPING_EXT + static bool is_unsigned_opcode(Op op) { // Don't have to be exhaustive, only relevant for legacy target checking ... @@ -159,6 +202,7 @@ static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard return packing; } } +} void CompilerGLSL::init() { @@ -617,6 +661,19 @@ void CompilerGLSL::find_static_extensions() ray_tracing_is_khr = true; break; + case CapabilityRayTracingClusterAccelerationStructureNV: + if (options.es || options.version < 460 || !options.vulkan_semantics) + SPIRV_CROSS_THROW("Cluster AS requires Vulkan GLSL 460."); + require_extension_internal("GL_NV_cluster_acceleration_structure"); + ray_tracing_is_khr = true; + break; + + case CapabilityTensorsARM: + if (options.es || options.version < 460 || !options.vulkan_semantics) + SPIRV_CROSS_THROW("Tensor requires Vulkan GLSL 460."); + require_extension_internal("GL_ARM_tensors"); + break; + default: break; } @@ -1529,9 +1586,12 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack case SPIRType::Half: case SPIRType::Short: case SPIRType::UShort: + case SPIRType::BFloat16: return 2; case SPIRType::SByte: case SPIRType::UByte: + case SPIRType::FloatE4M3: + case SPIRType::FloatE5M2: return 1; default: @@ -2550,7 +2610,7 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); auto flags = ir.get_buffer_block_flags(var); - statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", + statement("uniform ", flags_to_qualifiers_glsl(tmp, 0, flags), type_to_glsl(tmp), " ", buffer_name, "[", buffer_size, "];"); } else @@ -5791,8 +5851,22 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) case OpCompositeExtract: { - auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + // Trivial vector extracts (of WorkGroupSize typically), + // punch through to the input spec constant if the composite is used as array size. + const auto *c = maybe_get(cop.arguments[0]); + + string expr; + if (c && cop.arguments.size() == 2 && c->is_used_as_array_length && + !backend.supports_spec_constant_array_size && + is_vector(get(c->constant_type))) + { + expr = to_expression(c->specialization_constant_id(0, cop.arguments[1])); + } + else + { + expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + } return expr; } @@ -5940,6 +6014,30 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, require_extension_internal("GL_EXT_null_initializer"); return backend.constant_null_initializer; } + else if (c.replicated && type.op != spv::OpTypeArray) + { + if (type.op == spv::OpTypeMatrix) + { + uint32_t num_elements = type.columns; + // GLSL does not allow the replication constructor for matrices + // mat4(vec4(0.0)) needs to be manually expanded to mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + std::string res; + res += type_to_glsl(type); + res += "("; + for (uint32_t i = 0; i < num_elements; i++) + { + res += to_expression(c.subconstants[0]); + if (i < num_elements - 1) + res += ", "; + } + res += ")"; + return res; + } + else + { + return join(type_to_glsl(type), "(", to_expression(c.subconstants[0]), ")"); + } + } else if (!c.subconstants.empty()) { // Handles Arrays and structures. @@ -5989,8 +6087,16 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, } uint32_t subconstant_index = 0; - for (auto &elem : c.subconstants) + size_t num_elements = c.subconstants.size(); + if (c.replicated) { + if (type.array.size() != 1) + SPIRV_CROSS_THROW("Multidimensional arrays not yet supported as replicated constans"); + num_elements = type.array[0]; + } + for (size_t i = 0; i < num_elements; i++) + { + auto &elem = c.subconstants[c.replicated ? 0 : i]; if (auto *op = maybe_get(elem)) { res += constant_op_expression(*op); @@ -6021,7 +6127,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, } } - if (&elem != &c.subconstants.back()) + if (i != num_elements - 1) res += ", "; subconstant_index++; @@ -6095,17 +6201,44 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, #pragma warning(disable : 4996) #endif +string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_floate4m3(col, row); + + // There is no infinity in e4m3. + if (std::isnan(float_value)) + { + SPIRType type { OpTypeFloat }; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + } + else + { + SPIRType type { OpTypeFloat }; + type.basetype = SPIRType::FloatE4M3; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", format_float(float_value), ")"); + } + + return res; +} + string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; - float float_value = c.scalar_f16(col, row); + bool is_bfloat8 = get(c.constant_type).basetype == SPIRType::FloatE5M2; + float float_value = is_bfloat8 ? c.scalar_bf8(col, row) : c.scalar_f16(col, row); // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots // of complicated workarounds, just value-cast to the half type always. if (std::isnan(float_value) || std::isinf(float_value)) { SPIRType type { OpTypeFloat }; - type.basetype = SPIRType::Half; + type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; @@ -6121,7 +6254,7 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, else { SPIRType type { OpTypeFloat }; - type.basetype = SPIRType::Half; + type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; res = join(type_to_glsl(type), "(", format_float(float_value), ")"); @@ -6358,6 +6491,29 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t switch (type.basetype) { + case SPIRType::FloatE4M3: + if (splat || swizzle_splat) + { + res += convert_floate4m3_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_floate4m3_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::FloatE5M2: case SPIRType::Half: if (splat || swizzle_splat) { @@ -6722,7 +6878,7 @@ void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t r if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(result_type)); - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";"); + statement(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), initializer, ";"); } } @@ -6757,7 +6913,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); auto &flags = get_decoration_bitset(result_id); - return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); + return join(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), " = "); } } @@ -6830,7 +6986,7 @@ void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint { // Various FP arithmetic opcodes such as add, sub, mul will hit this. bool force_temporary_precise = backend.support_precise_qualifier && - has_decoration(result_id, DecorationNoContraction) && + has_legacy_nocontract(result_type, result_id) && type_is_floating_point(get(result_type)); bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise; @@ -9795,6 +9951,22 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i return "bfloat16BitsToUintEXT"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::BFloat16) return "bfloat16BitsToIntEXT"; + else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::UByte) + return "uintBitsToFloate4m3EXT"; + else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::SByte) + return "intBitsToFloate4m3EXT"; + else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE4M3) + return "floate4m3BitsToUintEXT"; + else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE4M3) + return "floate4m3BitsToIntEXT"; + else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::UByte) + return "uintBitsToFloate5m2EXT"; + else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::SByte) + return "intBitsToFloate5m2EXT"; + else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE5M2) + return "floate5m2BitsToUintEXT"; + else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE5M2) + return "floate5m2BitsToIntEXT"; return ""; } @@ -10198,6 +10370,14 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInCullPrimitiveEXT: return "gl_CullPrimitiveEXT"; + case BuiltInClusterIDNV: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use ClusterIDNV in Vulkan GLSL."); + require_extension_internal("GL_NV_cluster_acceleration_structure"); + return "gl_ClusterIDNV"; + } + default: return join("gl_BuiltIn_", convert_to_string(builtin)); } @@ -10441,8 +10621,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (ptr_chain_array_entry) expr = join("(", expr, ")"); } - // Arrays - else if (!type->array.empty()) + // Arrays and OpTypeCooperativeVectorNV (aka fancy arrays) + else if (!type->array.empty() || type->op == spv::OpTypeCooperativeVectorNV) { // If we are flattening multidimensional arrays, only create opening bracket on first // array index. @@ -11547,7 +11727,7 @@ void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) { auto &type = get(var.basetype); auto &flags = get_decoration_bitset(var.self); - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + statement(flags_to_qualifiers_glsl(type, var.self, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); flushed_phi_variables.insert(var.self); } } @@ -13761,13 +13941,42 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpCooperativeMatrixConvertNV: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("CooperativeMatrixConvertNV requires vulkan semantics."); + require_extension_internal("GL_NV_cooperative_matrix2"); + // fallthrough case OpFConvert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto func = type_to_glsl_constructor(get(result_type)); - emit_unary_func_op(result_type, id, ops[2], func.c_str()); + auto &type = get(result_type); + + if (type.op == OpTypeCooperativeMatrixKHR && opcode == OpFConvert) + { + auto &expr_type = expression_type(ops[2]); + if (get(type.ext.cooperative.use_id).scalar() != + get(expr_type.ext.cooperative.use_id).scalar()) + { + // Somewhat questionable with spec constant uses. + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("NV_cooperative_matrix2 requires vulkan semantics."); + require_extension_internal("GL_NV_cooperative_matrix2"); + } + } + + if ((type.basetype == SPIRType::FloatE4M3 || type.basetype == SPIRType::FloatE5M2) && + has_decoration(id, spv::DecorationSaturatedToLargestFloat8NormalConversionEXT)) + { + emit_uninitialized_temporary_expression(result_type, id); + statement("saturatedConvertEXT(", to_expression(id), ", ", to_unpacked_expression(ops[2]), ");"); + } + else + { + auto func = type_to_glsl_constructor(type); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + } break; } @@ -14913,8 +15122,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } + else if (ext == SPIRExtension::NonSemanticShaderDebugInfo) + { + emit_non_semantic_shader_debug_info(ops[0], ops[1], ops[3], &ops[4], length - 4); + } else if (ext == SPIRExtension::SPV_debug_info || - ext == SPIRExtension::NonSemanticShaderDebugInfo || ext == SPIRExtension::NonSemanticGeneric) { break; // Ignore SPIR-V debug information extended instructions. @@ -15319,7 +15531,81 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); #undef GLSL_RAY_QUERY_GET_OP #undef GLSL_RAY_QUERY_GET_OP2 + case OpRayQueryGetClusterIdNV: + flush_variable_declaration(ops[2]); + emit_op(ops[0], ops[1], join("rayQueryGetIntersectionClusterIdNV(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); + break; + case OpTensorQuerySizeARM: + flush_variable_declaration(ops[1]); + // tensorSizeARM(tensor, dimension) + emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "tensorSizeARM"); + break; + case OpTensorReadARM: + { + flush_variable_declaration(ops[1]); + emit_uninitialized_temporary_expression(ops[0], ops[1]); + SmallVector args { + to_expression(ops[2]), // tensor + to_expression(ops[3]), // coordinates + to_expression(ops[1]), // out value + }; + if (length > 4) + { + std::string tensor_operands; + if (ops[4] == 0) + tensor_operands = "0x0u"; + else if (ops[4] == spv::TensorOperandsNontemporalARMMask) + tensor_operands = "gl_TensorOperandsNonTemporalARM"; + else if (ops[4] == spv::TensorOperandsOutOfBoundsValueARMMask) + tensor_operands = "gl_TensorOperandsOutOfBoundsValueARM"; + else if (ops[4] == (spv::TensorOperandsNontemporalARMMask | spv::TensorOperandsOutOfBoundsValueARMMask)) + tensor_operands = "gl_TensorOperandsNonTemporalARM | gl_TensorOperandsOutOfBoundsValueARM"; + else + SPIRV_CROSS_THROW("Invalid tensorOperands for tensorReadARM."); + if ((ops[4] & spv::TensorOperandsOutOfBoundsValueARMMask) && length != 6) + SPIRV_CROSS_THROW("gl_TensorOperandsOutOfBoundsValueARM requires an outOfBoundsValue argument."); + args.push_back(tensor_operands); // tensorOperands + } + if (length >= 6) + { + if ((length > 6) || (ops[4] & spv::TensorOperandsOutOfBoundsValueARMMask) == 0) + SPIRV_CROSS_THROW("Too many arguments to tensorReadARM."); + args.push_back(to_expression(ops[5])); // outOfBoundsValue + } + + // tensorRead(tensor, sizeof(type), coordinates, value, operand, ...) + statement("tensorReadARM(", merge(args), ");"); + break; + } + case OpTensorWriteARM: + { + flush_variable_declaration(ops[0]); + + SmallVector args { + to_expression(ops[0]), // tensor + to_expression(ops[1]), // coordinates + to_expression(ops[2]), // out value + }; + + if (length > 3) + { + std::string tensor_operands; + if (ops[3] == 0) + tensor_operands = "0x0u"; + else if (ops[3] == spv::TensorOperandsNontemporalARMMask) + tensor_operands = "gl_TensorOperandsNonTemporalARM"; + else + SPIRV_CROSS_THROW("Invalid tensorOperands for tensorWriteARM."); + args.push_back(tensor_operands); // tensorOperands + } + if (length > 4) + SPIRV_CROSS_THROW("Too many arguments to tensorWriteARM."); + + // tensorWrite(tensor, sizeof(type), coordinates, value) + statement("tensorWriteARM(", merge(args), ");"); + break; + } case OpConvertUToAccelerationStructureKHR: { require_extension_internal("GL_EXT_ray_tracing"); @@ -15465,6 +15751,104 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpCooperativeVectorLoadNV: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + emit_uninitialized_temporary_expression(result_type, id); + + statement("coopVecLoadNV(", to_expression(id), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ");"); + register_read(id, ops[2], false); + break; + } + + case OpCooperativeVectorStoreNV: + { + uint32_t id = ops[0]; + + statement("coopVecStoreNV(", to_expression(ops[2]), ", ", to_expression(id), ", ", to_expression(ops[1]), ");"); + register_write(ops[2]); + break; + } + + case OpCooperativeVectorOuterProductAccumulateNV: + { + auto buf = ops[0]; + auto offset = ops[1]; + auto v1 = ops[2]; + auto v2 = ops[3]; + auto matrix_layout_id = ops[4]; + auto matrix_iterpretation_id = ops[5]; + auto matrix_stride_id = length >= 6 ? ops[6] : 0; + statement(join("coopVecOuterProductAccumulateNV(", to_expression(v1), ", ", to_expression(v2), ", ", + to_expression(buf), ", ", to_expression(offset), ", ", + matrix_stride_id ? to_expression(matrix_stride_id) : "0", + ", ", to_pretty_expression_if_int_constant( + matrix_layout_id, std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames)), + ", ", to_pretty_expression_if_int_constant( + matrix_iterpretation_id, std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames)), + ");")); + register_write(ops[0]); + break; + } + + case OpCooperativeVectorReduceSumAccumulateNV: + { + auto buf = ops[0]; + auto offset = ops[1]; + auto v1 = ops[2]; + statement(join("coopVecReduceSumAccumulateNV(", to_expression(v1), ", ", to_expression(buf), ", ", + to_expression(offset), ");")); + register_write(ops[0]); + break; + } + + case OpCooperativeVectorMatrixMulNV: + case OpCooperativeVectorMatrixMulAddNV: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + emit_uninitialized_temporary_expression(result_type, id); + + std::string stmt; + switch (opcode) + { + case OpCooperativeVectorMatrixMulAddNV: + stmt += "coopVecMatMulAddNV("; + break; + case OpCooperativeVectorMatrixMulNV: + stmt += "coopVecMatMulNV("; + break; + default: + SPIRV_CROSS_THROW("Invalid op code for coopvec instruction."); + } + for (uint32_t i = 1; i < length; i++) + { + // arguments 3, 6 and in case of MulAddNv also 9 use component type int constants + if (i == 3 || i == 6 || (i == 9 && opcode == OpCooperativeVectorMatrixMulAddNV)) + { + stmt += to_pretty_expression_if_int_constant( + ops[i], std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames)); + } + else if ((i == 12 && opcode == OpCooperativeVectorMatrixMulAddNV) || + (i == 9 && opcode == OpCooperativeVectorMatrixMulNV)) + { + stmt += to_pretty_expression_if_int_constant( + ops[i], std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames)); + } + else + stmt += to_expression(ops[i]); + + if (i < length - 1) + stmt += ", "; + } + stmt += ");"; + statement(stmt); + break; + } + case OpCooperativeMatrixLengthKHR: { // Need to synthesize a dummy temporary, since the SPIR-V opcode is based on the type. @@ -15492,27 +15876,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (!is_forcing_recompilation()) split_expr = split_coopmat_pointer(expr); - string layout_expr; - if (const auto *layout = maybe_get(ops[3])) - { - if (!layout->specialization) - { - if (layout->scalar() == spv::CooperativeMatrixLayoutColumnMajorKHR) - layout_expr = "gl_CooperativeMatrixLayoutColumnMajor"; - else - layout_expr = "gl_CooperativeMatrixLayoutRowMajor"; - } - } - - if (layout_expr.empty()) - layout_expr = join("int(", to_expression(ops[3]), ")"); - - statement("coopMatLoad(", - to_expression(id), ", ", - split_expr.first, ", ", - split_expr.second, ", ", - to_expression(ops[4]), ", ", - layout_expr, ");"); + string layout_expr = to_pretty_expression_if_int_constant( + ops[3], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames)); + statement("coopMatLoad(", to_expression(id), ", ", split_expr.first, ", ", split_expr.second, ", ", + to_expression(ops[4]), ", ", layout_expr, ");"); register_read(id, ops[2], false); break; @@ -15532,27 +15899,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (!is_forcing_recompilation()) split_expr = split_coopmat_pointer(expr); - string layout_expr; - if (const auto *layout = maybe_get(ops[2])) - { - if (!layout->specialization) - { - if (layout->scalar() == spv::CooperativeMatrixLayoutColumnMajorKHR) - layout_expr = "gl_CooperativeMatrixLayoutColumnMajor"; - else - layout_expr = "gl_CooperativeMatrixLayoutRowMajor"; - } - } + string layout_expr = to_pretty_expression_if_int_constant( + ops[2], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames)); - if (layout_expr.empty()) - layout_expr = join("int(", to_expression(ops[2]), ")"); - - statement("coopMatStore(", - to_expression(ops[1]), ", ", - split_expr.first, ", ", - split_expr.second, ", ", - to_expression(ops[3]), ", ", - layout_expr, ");"); + statement("coopMatStore(", to_expression(ops[1]), ", ", split_expr.first, ", ", split_expr.second, ", ", + to_expression(ops[3]), ", ", layout_expr, ");"); // TODO: Do we care about memory operands? @@ -15583,6 +15934,51 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpCompositeConstructReplicateEXT: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto &type = get(result_type); + auto value_to_replicate = to_expression(ops[2]); + std::string rhs; + // Matrices don't have a replicating constructor for vectors. Need to manually replicate + if (type.op == spv::OpTypeMatrix || type.op == spv::OpTypeArray) + { + if (type.op == spv::OpTypeArray && type.array.size() != 1) + { + SPIRV_CROSS_THROW( + "Multi-dimensional arrays currently not supported for OpCompositeConstructReplicateEXT"); + } + uint32_t num_elements = type.op == spv::OpTypeMatrix ? type.columns : type.array[0]; + if (backend.use_initializer_list && type.op == spv::OpTypeArray) + { + rhs += "{"; + } + else + { + rhs += type_to_glsl_constructor(type); + rhs += "("; + } + for (uint32_t i = 0; i < num_elements; i++) + { + rhs += value_to_replicate; + if (i < num_elements - 1) + rhs += ", "; + } + if (backend.use_initializer_list && type.op == spv::OpTypeArray) + rhs += "}"; + else + rhs += ")"; + } + else + { + rhs = join(type_to_glsl(type), "(", to_expression(ops[2]), ")"); + } + emit_op(result_type, id, rhs, true); + break; + } + default: statement("// unimplemented op ", instruction.op); break; @@ -15800,7 +16196,7 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type if (is_block) qualifiers = to_interpolation_qualifiers(memberflags); - statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), + statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, 0, memberflags), variable_decl(membertype, to_member_name(type, index)), ";"); } @@ -15808,7 +16204,7 @@ void CompilerGLSL::emit_struct_padding_target(const SPIRType &) { } -string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) +string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags) { // GL_EXT_buffer_reference variables can be marked as restrict. if (flags.get(DecorationRestrictPointerEXT)) @@ -15816,8 +16212,12 @@ string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset string qual; - if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier) + if (type_is_floating_point(type) && + (flags.get(DecorationNoContraction) || (type.self && has_legacy_nocontract(type.self, id))) && + backend.support_precise_qualifier) + { qual = "precise "; + } // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). bool type_supports_precision = @@ -15886,7 +16286,7 @@ string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) if (result_type.width < 32) return "mediump "; } - return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); + return flags_to_qualifiers_glsl(type, id, ir.meta[id].decoration.decoration_flags); } void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) @@ -15969,6 +16369,13 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id) SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL."); } } + else if (type.basetype == SPIRType::Tensor) + { + if (flags.get(DecorationNonWritable)) + res += "readonly "; + if (flags.get(DecorationNonReadable)) + res += "writeonly "; + } res += to_precision_qualifiers_glsl(id); @@ -16403,6 +16810,14 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::RayQuery: return "rayQueryEXT"; + case SPIRType::Tensor: + if (type.ext.tensor.rank == 0) + SPIRV_CROSS_THROW("GLSL tensors must have a Rank."); + if (type.ext.tensor.shape != 0) + SPIRV_CROSS_THROW("GLSL tensors cannot have a Shape."); + return join("tensorARM<", type_to_glsl(get(type.ext.tensor.type)), ", ", + to_expression(type.ext.tensor.rank), ">"); + case SPIRType::Void: return "void"; @@ -16430,6 +16845,17 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) require_extension_internal("GL_ARB_shader_atomic_counters"); } + if (type.op == spv::OpTypeCooperativeVectorNV) + { + require_extension_internal("GL_NV_cooperative_vector"); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Cooperative vector NV only available in Vulkan."); + + std::string component_type_str = type_to_glsl(get(type.ext.coopVecNV.component_type_id)); + + return join("coopvecNV<", component_type_str, ", ", to_expression(type.ext.coopVecNV.component_count_id), ">"); + } + const SPIRType *coop_type = &type; while (is_pointer(*coop_type) || is_array(*coop_type)) coop_type = &get(coop_type->parent_type); @@ -16440,7 +16866,7 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Cooperative matrix only available in Vulkan."); // GLSL doesn't support this as spec constant, which makes sense ... - uint32_t use_type = get(coop_type->cooperative.use_id).scalar(); + uint32_t use_type = get(coop_type->ext.cooperative.use_id).scalar(); const char *use = nullptr; switch (use_type) @@ -16462,7 +16888,7 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) } string scope_expr; - if (const auto *scope = maybe_get(coop_type->cooperative.scope_id)) + if (const auto *scope = maybe_get(coop_type->ext.cooperative.scope_id)) { if (!scope->specialization) { @@ -16477,12 +16903,12 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) } if (scope_expr.empty()) - scope_expr = to_expression(coop_type->cooperative.scope_id); + scope_expr = to_expression(coop_type->ext.cooperative.scope_id); return join("coopmat<", type_to_glsl(get(coop_type->parent_type)), ", ", scope_expr, ", ", - to_expression(coop_type->cooperative.rows_id), ", ", - to_expression(coop_type->cooperative.columns_id), ", ", use, ">"); + to_expression(coop_type->ext.cooperative.rows_id), ", ", + to_expression(coop_type->ext.cooperative.columns_id), ", ", use, ">"); } if (type.vecsize == 1 && type.columns == 1) // Scalar builtin @@ -16512,6 +16938,16 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics."); require_extension_internal("GL_EXT_bfloat16"); return "bfloat16_t"; + case SPIRType::FloatE4M3: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e4m3"); + return "floate4m3_t"; + case SPIRType::FloatE5M2: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e5m2"); + return "floate5m2_t"; case SPIRType::Float: return "float"; case SPIRType::Double: @@ -16549,6 +16985,16 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics."); require_extension_internal("GL_EXT_bfloat16"); return join("bf16vec", type.vecsize); + case SPIRType::FloatE4M3: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e4m3"); + return join("fe4m3vec", type.vecsize); + case SPIRType::FloatE5M2: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e5m2"); + return join("fe5m2vec", type.vecsize); case SPIRType::Float: return join("vec", type.vecsize); case SPIRType::Double: @@ -16785,7 +17231,7 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret string decl; auto &type = get(func.return_type); - decl += flags_to_qualifiers_glsl(type, return_flags); + decl += flags_to_qualifiers_glsl(type, 0, return_flags); decl += type_to_glsl(type); decl += type_to_array_glsl(type, 0); decl += " "; @@ -17643,7 +18089,7 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(tmp.first)); - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); + statement(flags_to_qualifiers_glsl(type, tmp.second, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); hoisted_temporaries.insert(tmp.second); forced_temporaries.insert(tmp.second); @@ -17658,7 +18104,7 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo { uint32_t mirror_id = mirrored_precision_itr->second; auto &mirror_flags = get_decoration_bitset(mirror_id); - statement(flags_to_qualifiers_glsl(type, mirror_flags), + statement(flags_to_qualifiers_glsl(type, mirror_id, mirror_flags), variable_decl(type, to_name(mirror_id)), initializer, ";"); // The temporary might be read from before it's assigned, set up the expression now. @@ -17669,6 +18115,26 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo } void CompilerGLSL::emit_block_chain(SPIRBlock &block) +{ + SmallVector cleanup_stack; + BlockID next_block = emit_block_chain_inner(block); + + while (next_block != 0) + { + cleanup_stack.push_back(next_block); + next_block = emit_block_chain_inner(get(next_block)); + } + + while (!cleanup_stack.empty()) + { + emit_block_chain_cleanup(get(cleanup_stack.back())); + cleanup_stack.pop_back(); + } + + emit_block_chain_cleanup(block); +} + +BlockID CompilerGLSL::emit_block_chain_inner(SPIRBlock &block) { bool select_branch_to_true_block = false; bool select_branch_to_false_block = false; @@ -17719,12 +18185,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } // Remember deferred declaration state. We will restore it before returning. - SmallVector rearm_dominated_variables(block.dominated_variables.size()); + assert(block.rearm_dominated_variables.empty()); + block.rearm_dominated_variables.resize(block.dominated_variables.size()); for (size_t i = 0; i < block.dominated_variables.size(); i++) { uint32_t var_id = block.dominated_variables[i]; auto &var = get(var_id); - rearm_dominated_variables[i] = var.deferred_declaration; + block.rearm_dominated_variables[i] = var.deferred_declaration; } // This is the method often used by spirv-opt to implement loops. @@ -18317,6 +18784,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) SPIRV_CROSS_THROW("Unimplemented block terminator."); } + BlockID trailing_block_id = 0; + if (block.next_block && emit_next_block) { // If we hit this case, we're dealing with an unconditional branch, which means we will output @@ -18324,8 +18793,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (block.merge != SPIRBlock::MergeSelection) { flush_phi(block.self, block.next_block); + // For a direct branch, need to remember to invalidate expressions in the next linear block instead. - get(block.next_block).invalidate_expressions = block.invalidate_expressions; + get(block.next_block).invalidate_expressions.clear(); + std::swap(get(block.next_block).invalidate_expressions, block.invalidate_expressions); } // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. @@ -18346,7 +18817,15 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) branch_to_continue(block.self, block.next_block); } else if (BlockID(block.self) != block.next_block) - emit_block_chain(get(block.next_block)); + { + // Recursing here is quite scary since it's quite easy to stack overflow if + // the SPIR-V is constructed a particular way. + // We have to simulate the tail call ourselves. + if (block.merge != SPIRBlock::MergeLoop) + trailing_block_id = block.next_block; + else + emit_block_chain(get(block.next_block)); + } } } @@ -18388,20 +18867,28 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (is_continue(block.merge_block)) branch_to_continue(block.self, block.merge_block); else - emit_block_chain(get(block.merge_block)); + trailing_block_id = block.merge_block; } + return trailing_block_id; +} + +void CompilerGLSL::emit_block_chain_cleanup(SPIRBlock &block) +{ // Forget about control dependent expressions now. block.invalidate_expressions.clear(); - // After we return, we must be out of scope, so if we somehow have to re-emit this function, + // After we return, we must be out of scope, so if we somehow have to re-emit this block, // re-declare variables if necessary. - assert(rearm_dominated_variables.size() == block.dominated_variables.size()); + // We only need one array here for rearm_dominated_variables, + // since it should be impossible for the same block to be remitted in the same chain twice. + assert(block.rearm_dominated_variables.size() == block.dominated_variables.size()); for (size_t i = 0; i < block.dominated_variables.size(); i++) { uint32_t var = block.dominated_variables[i]; - get(var).deferred_declaration = rearm_dominated_variables[i]; + get(var).deferred_declaration = block.rearm_dominated_variables[i]; } + block.rearm_dominated_variables.clear(); // Just like for deferred declaration, we need to forget about loop variable enable // if our block chain is reinstantiated later. @@ -18943,6 +19430,34 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) } } +void CompilerGLSL::emit_non_semantic_shader_debug_info(uint32_t, uint32_t result_id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + if (!options.emit_line_directives) + return; + + switch (eop) + { + case SPIRExtension::DebugLine: + { + // We're missing line end and columns here, but I don't think we can emit those in any meaningful way. + emit_line_directive(args[0], get(args[1]).scalar()); + break; + } + + case SPIRExtension::DebugSource: + { + // Forward the string declaration here. We ignore the optional text operand. + auto &str = get(args[0]).str; + set(result_id, str); + break; + } + + default: + break; + } +} + void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, SmallVector chain) { @@ -19582,3 +20097,96 @@ std::string CompilerGLSL::format_double(double value) const return convert_to_string(value, current_locale_radix_character); } +std::string CompilerGLSL::to_pretty_expression_if_int_constant( + uint32_t id, + const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end, + bool register_expression_read) +{ + auto *c = maybe_get(id); + if (c && !c->specialization) + { + auto value = c->scalar(); + auto pretty_name = std::find_if(mapping_start, mapping_end, + [value](const GlslConstantNameMapping &mapping) { return mapping.value == value; }); + if (pretty_name != mapping_end) + return pretty_name->alias; + } + return join("int(", to_expression(id, register_expression_read), ")"); +} + +uint32_t CompilerGLSL::get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const +{ + uint32_t fp_flags = ~0; + + if (!type_is_floating_point(get(result_type))) + return fp_flags; + + auto &ep = get_entry_point(); + + // Per-operation flag supersedes all defaults. + if (id != 0 && has_decoration(id, DecorationFPFastMathMode)) + return get_decoration(id, DecorationFPFastMathMode); + + // Handle float_controls1 execution modes. + uint32_t width = get(result_type).width; + + bool szinp = false; + + switch (width) + { + case 8: + szinp = ep.signed_zero_inf_nan_preserve_8; + break; + + case 16: + szinp = ep.signed_zero_inf_nan_preserve_16; + break; + + case 32: + szinp = ep.signed_zero_inf_nan_preserve_32; + break; + + case 64: + szinp = ep.signed_zero_inf_nan_preserve_64; + break; + + default: + break; + } + + if (szinp) + fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask); + + // Legacy NoContraction deals with any kind of transform to the expression. + if (id != 0 && has_decoration(id, DecorationNoContraction)) + fp_flags &= ~(FPFastMathModeAllowContractMask | FPFastMathModeAllowTransformMask | FPFastMathModeAllowReassocMask); + + // Handle float_controls2 execution modes. + bool found_default = false; + for (auto &fp_pair : ep.fp_fast_math_defaults) + { + if (get(fp_pair.first).width == width && fp_pair.second) + { + fp_flags &= get(fp_pair.second).scalar(); + found_default = true; + } + } + + // From SPV_KHR_float_controls2: + // "This definition implies that, if the entry point set any FPFastMathDefault execution mode + // then any type for which a default is not set uses no fast math flags + // (although this can still be overridden on a per-operation basis). + // Modules must not mix setting fast math modes explicitly using this extension and relying on older API defaults." + if (!found_default && !ep.fp_fast_math_defaults.empty()) + fp_flags = 0; + + return fp_flags; +} + +bool CompilerGLSL::has_legacy_nocontract(uint32_t result_type, uint32_t id) const +{ + const auto fp_flags = FPFastMathModeAllowContractMask | + FPFastMathModeAllowTransformMask | + FPFastMathModeAllowReassocMask; + return (get_fp_fast_math_flags_for_op(result_type, id) & fp_flags) != fp_flags; +} diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index cea150f22..776ab8a6c 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -32,6 +32,8 @@ namespace SPIRV_CROSS_NAMESPACE { +struct GlslConstantNameMapping; + enum PlsFormat { PlsNone = 0, @@ -426,6 +428,8 @@ protected: const uint32_t *args, uint32_t count); virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); + void emit_non_semantic_shader_debug_info(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); virtual void emit_header(); void emit_line_directive(uint32_t file_id, uint32_t line_literal); void build_workgroup_size(SmallVector &arguments, const SpecializationConstant &x, @@ -662,6 +666,7 @@ protected: bool workgroup_size_is_hidden = false; bool requires_relaxed_precision_analysis = false; bool implicit_c_integer_promotion_rules = false; + bool supports_spec_constant_array_size = true; } backend; void emit_struct(SPIRType &type); @@ -685,6 +690,8 @@ protected: void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, const SmallVector &indices); void emit_block_chain(SPIRBlock &block); + BlockID emit_block_chain_inner(SPIRBlock &block); + void emit_block_chain_cleanup(SPIRBlock &block); void emit_hoisted_temporaries(SmallVector> &temporaries); int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const; void emit_constant(const SPIRConstant &constant); @@ -802,6 +809,10 @@ protected: virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); std::string to_non_uniform_aware_expression(uint32_t id); std::string to_atomic_ptr_expression(uint32_t id); + std::string to_pretty_expression_if_int_constant( + uint32_t id, + const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end, + bool register_expression_read = true); std::string to_expression(uint32_t id, bool register_expression_read = true); std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type); std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type); @@ -830,7 +841,7 @@ protected: void emit_output_variable_initializer(const SPIRVariable &var); std::string to_precision_qualifiers_glsl(uint32_t id); virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); - std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags); + std::string flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags); const char *format_to_glsl(spv::ImageFormat format); virtual std::string layout_for_member(const SPIRType &type, uint32_t index); virtual std::string to_interpolation_qualifiers(const Bitset &flags); @@ -1017,6 +1028,8 @@ protected: const Instruction *get_next_instruction_in_block(const Instruction &instr); static uint32_t mask_relevant_memory_semantics(uint32_t semantics); + std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); @@ -1068,6 +1081,9 @@ protected: std::string format_float(float value) const; std::string format_double(double value) const; + uint32_t get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const; + bool has_legacy_nocontract(uint32_t result_type, uint32_t id) const; + private: void init(); diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 1ec4cb70f..0db81c46e 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -3029,7 +3029,7 @@ string CompilerHLSL::get_inner_entry_point_name() const SPIRV_CROSS_THROW("Unsupported execution model."); } -uint32_t CompilerHLSL::input_vertices_from_execution_mode(spirv_cross::SPIREntryPoint &execution) const +uint32_t CompilerHLSL::input_vertices_from_execution_mode(SPIREntryPoint &execution) const { uint32_t input_vertices = 1; @@ -3061,7 +3061,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret auto &type = get(func.return_type); if (type.array.empty()) { - decl += flags_to_qualifiers_glsl(type, return_flags); + decl += flags_to_qualifiers_glsl(type, 0, return_flags); decl += type_to_glsl(type); decl += " "; } diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 53f74f177..f60431441 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -510,7 +510,7 @@ void CompilerMSL::build_implicit_builtins() has_local_invocation_index = true; } - if (need_workgroup_size && builtin == BuiltInLocalInvocationId) + if (need_workgroup_size && builtin == BuiltInWorkgroupSize) { builtin_workgroup_size_id = var.self; mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self); @@ -684,28 +684,6 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id); } - if (need_multiview) - { - // Multiview shaders are not allowed to write to gl_Layer, ostensibly because - // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. - // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but - // gl_Layer is an output in vertex-pipeline shaders. - uint32_t type_ptr_out_id = ir.increase_bound_by(2); - SPIRType uint_type_ptr_out = get_uint_type(); - uint_type_ptr.op = OpTypePointer; - uint_type_ptr_out.pointer = true; - uint_type_ptr_out.pointer_depth++; - uint_type_ptr_out.parent_type = get_uint_type_id(); - uint_type_ptr_out.storage = StorageClassOutput; - auto &ptr_out_type = set(type_ptr_out_id, uint_type_ptr_out); - ptr_out_type.self = get_uint_type_id(); - uint32_t var_id = type_ptr_out_id + 1; - set(var_id, type_ptr_out_id, StorageClassOutput); - set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); - builtin_layer_id = var_id; - mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); - } - if (need_multiview && !has_view_idx) { uint32_t var_id = ir.increase_bound_by(1); @@ -718,6 +696,28 @@ void CompilerMSL::build_implicit_builtins() } } + if (need_multiview) + { + // Multiview shaders are not allowed to write to gl_Layer, ostensibly because + // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. + // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but + // gl_Layer is an output in vertex-pipeline shaders. + uint32_t type_ptr_out_id = ir.increase_bound_by(2); + SPIRType uint_type_ptr_out = get_uint_type(); + uint_type_ptr_out.op = OpTypePointer; + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.pointer_depth++; + uint_type_ptr_out.parent_type = get_uint_type_id(); + uint_type_ptr_out.storage = StorageClassOutput; + auto &ptr_out_type = set(type_ptr_out_id, uint_type_ptr_out); + ptr_out_type.self = get_uint_type_id(); + uint32_t var_id = type_ptr_out_id + 1; + set(var_id, type_ptr_out_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); + } + if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) || (need_tese_params && !has_primitive_id) || need_grid_params) { @@ -932,25 +932,55 @@ void CompilerMSL::build_implicit_builtins() if (need_workgroup_size && !has_workgroup_size) { - uint32_t offset = ir.increase_bound_by(2); - uint32_t type_ptr_id = offset; - uint32_t var_id = offset + 1; + auto &execution = get_entry_point(); + // First, check if the workgroup size _constant_ were defined. + // If it were, we don't need to do--in fact, shouldn't do--anything. + builtin_workgroup_size_id = execution.workgroup_size.constant; + if (builtin_workgroup_size_id == 0) + { + uint32_t var_id = ir.increase_bound_by(1); - // Create gl_WorkgroupSize. - uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); - SPIRType uint_type_ptr = get(type_id); - uint_type_ptr.op = OpTypePointer; - uint_type_ptr.pointer = true; - uint_type_ptr.pointer_depth++; - uint_type_ptr.parent_type = type_id; - uint_type_ptr.storage = StorageClassInput; + // Create gl_WorkgroupSize. + uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); + // If we have LocalSize or LocalSizeId, use those to define the workgroup size. + if (execution.flags.get(ExecutionModeLocalSizeId)) + { + const SPIRConstant *init[] = { &get(execution.workgroup_size.id_x), + &get(execution.workgroup_size.id_y), + &get(execution.workgroup_size.id_z) }; + bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization; + set(var_id, type_id, init, 3, specialized); + execution.workgroup_size.constant = var_id; + } + else if (execution.flags.get(ExecutionModeLocalSize)) + { + uint32_t offset = ir.increase_bound_by(3); + const SPIRConstant *init[] = { + &set(offset, get_uint_type_id(), execution.workgroup_size.x, false), + &set(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false), + &set(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false) + }; + set(var_id, type_id, init, 3, false); + execution.workgroup_size.constant = var_id; + } + else + { + uint32_t type_ptr_id = ir.increase_bound_by(1); + SPIRType uint_type_ptr = get(type_id); + uint_type_ptr.op = OpTypePointer; + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; - auto &ptr_type = set(type_ptr_id, uint_type_ptr); - ptr_type.self = type_id; - set(var_id, type_ptr_id, StorageClassInput); - set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); - builtin_workgroup_size_id = var_id; - mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + set(var_id, type_ptr_id, StorageClassInput); + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); + } + set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); + builtin_workgroup_size_id = var_id; + } } if (!has_frag_depth && force_frag_depth_passthrough) @@ -1681,6 +1711,7 @@ string CompilerMSL::compile() backend.array_is_value_type_in_buffer_blocks = false; backend.support_pointer_to_pointer = true; backend.implicit_c_integer_promotion_rules = true; + backend.supports_spec_constant_array_size = false; capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; @@ -1841,7 +1872,7 @@ void CompilerMSL::preprocess_op_codes() if (preproc.uses_atomics) { add_header_line("#include "); - add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false); } // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to @@ -1850,6 +1881,10 @@ void CompilerMSL::preprocess_op_codes() (preproc.uses_image_write && !msl_options.supports_msl_version(2, 2))) is_rasterization_disabled = true; + // FIXME: This currently does not consider BDA side effects, so we cannot deduce const device for BDA. + if (preproc.uses_buffer_write || preproc.uses_image_write) + has_descriptor_side_effects = true; + // Tessellation control shaders are run as compute functions in Metal, and so // must capture their output to a buffer. if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) @@ -5720,22 +5755,44 @@ void CompilerMSL::emit_header() { // This particular line can be overridden during compilation, so make it a flag and not a pragma line. if (suppress_missing_prototypes) - statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false); if (suppress_incompatible_pointer_types_discard_qualifiers) - statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false); // Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables if (suppress_sometimes_unitialized) - statement("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false); // Disable warning about missing braces for array template to make arrays a value type if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0) - statement("#pragma clang diagnostic ignored \"-Wmissing-braces\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false); + + // Floating point fast math compile declarations + if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2)) + { + uint32_t contract_mask = FPFastMathModeAllowContractMask; + uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask); + uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask); + + // FP math mode + uint32_t fp_flags = get_fp_fast_math_flags(true); + const char *math_mode = "safe"; + if ((fp_flags & fast_mask) == fast_mask) // Must have all flags + math_mode = "fast"; + else if ((fp_flags & relax_mask) == relax_mask) // Must have all flags + math_mode = "relaxed"; + + add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false); + + // FP contraction + const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ? "fast" : "off"; + add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false); + } for (auto &pragma : pragma_lines) statement(pragma); - if (!pragma_lines.empty() || suppress_missing_prototypes) + if (!pragma_lines.empty()) statement(""); statement("#include "); @@ -5755,18 +5812,23 @@ void CompilerMSL::emit_header() statement(""); } -void CompilerMSL::add_pragma_line(const string &line) +void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique) { - auto rslt = pragma_lines.insert(line); - if (rslt.second) - force_recompile(); + if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end()) + { + pragma_lines.push_back(line); + if (recompile_on_unique) + force_recompile(); + } } void CompilerMSL::add_typedef_line(const string &line) { - auto rslt = typedef_lines.insert(line); - if (rslt.second) + if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end()) + { + typedef_lines.push_back(line); force_recompile(); + } } // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared @@ -8353,9 +8415,22 @@ void CompilerMSL::emit_specialization_constants_and_structs() if (unique_func_constants[constant_id] == c.self) statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id, ")]];"); - statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, - ") ? ", bitcast_expression(type, sc_tmp_type, sc_tmp_name), " : ", constant_expression(c), - ";"); + // RenderDoc and other instrumentation may reuse the same SpecId with different base types. + // We deduplicate to one [[function_constant(id)]] temp and then initialize all variants from it. + // Metal forbids as_type to/from 'bool', so if either side is Boolean, avoid bitcasting here and + // prefer a value cast via a constructor instead (e.g. uint(tmp) / float(tmp) / bool(tmp)). + // This preserves expected toggle semantics and prevents illegal MSL like as_type(bool_tmp). + { + string sc_true_expr; + if (sc_tmp_type == type.basetype) + sc_true_expr = sc_tmp_name; + else if (sc_tmp_type == SPIRType::Boolean || type.basetype == SPIRType::Boolean) + sc_true_expr = join(sc_type_name, "(", sc_tmp_name, ")"); + else + sc_true_expr = bitcast_expression(type, sc_tmp_type, sc_tmp_name); + statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, + ") ? ", sc_true_expr, " : ", constant_expression(c), ";"); + } } else if (has_decoration(c.self, DecorationSpecId)) { @@ -9161,7 +9236,12 @@ bool CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, cons // and there is a risk of concurrent write access to other components, // we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect. // The MSL compiler refuses to allow component-level access for any non-packed vector types. - if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)) + // MSL refuses to take address or reference to vector component, even for packed types, so just force + // through the pointer cast. No much we can do sadly. + // For packed types, we could technically omit this if we know the reference does not have to turn into a pointer + // of some kind, but that requires external analysis passes to figure out, and + // this case is likely rare enough that we don't need to bother. + if (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup) { const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device"; expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")"); @@ -9485,21 +9565,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; case OpFMul: - if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1])) MSL_BFOP(spvFMul); else MSL_BOP(*); break; case OpFAdd: - if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1])) MSL_BFOP(spvFAdd); else MSL_BOP(+); break; case OpFSub: - if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1])) MSL_BFOP(spvFSub); else MSL_BOP(-); @@ -9997,7 +10077,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpVectorTimesMatrix: case OpMatrixTimesVector: { - if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction)) + if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1])) { CompilerGLSL::emit_instruction(instruction); break; @@ -10039,7 +10119,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpMatrixTimesMatrix: { - if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction)) + if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1])) { CompilerGLSL::emit_instruction(instruction); break; @@ -10725,13 +10805,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh auto *lhs_var = maybe_get_backing_variable(lhs_id); if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage)) lhs_is_array_template = true; - else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get(lhs_var->basetype))) + else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_explicit_layout(get(lhs_var->basetype))) lhs_is_array_template = false; auto *rhs_var = maybe_get_backing_variable(rhs_id); if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage)) rhs_is_array_template = true; - else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get(rhs_var->basetype))) + else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_explicit_layout(get(rhs_var->basetype))) rhs_is_array_template = false; // If threadgroup storage qualifiers are *not* used: @@ -11182,35 +11262,76 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto &restype = get(result_type); + // Only precise:: preserves NaN in trancendentals (supposedly, cannot find documentation for this). + const auto drop_nan_inf = FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask; + bool preserve_nan = (get_fp_fast_math_flags_for_op(result_type, id) & drop_nan_inf) != drop_nan_inf; + const char *preserve_str = preserve_nan ? "precise" : "fast"; + + // TODO: Emit the default behavior to match existing code. Might need to be revisited. + // Only fp32 has the precise:: override. +#define EMIT_PRECISE_OVERRIDE(glsl_op, op) \ + case GLSLstd450##glsl_op: \ + if (restype.basetype == SPIRType::Float && preserve_nan) \ + emit_unary_func_op(result_type, id, args[0], "precise::" op); \ + else \ + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); \ + break + switch (op) { + EMIT_PRECISE_OVERRIDE(Cos, "cos"); + EMIT_PRECISE_OVERRIDE(Sin, "sin"); + EMIT_PRECISE_OVERRIDE(Tan, "tan"); + EMIT_PRECISE_OVERRIDE(Acos, "acos"); + EMIT_PRECISE_OVERRIDE(Asin, "asin"); + EMIT_PRECISE_OVERRIDE(Atan, "atan"); + EMIT_PRECISE_OVERRIDE(Exp, "exp"); + EMIT_PRECISE_OVERRIDE(Exp2, "exp2"); + EMIT_PRECISE_OVERRIDE(Log, "log"); + EMIT_PRECISE_OVERRIDE(Log2, "log2"); + EMIT_PRECISE_OVERRIDE(Sqrt, "sqrt"); +#undef EMIT_PRECISE_OVERRIDE + case GLSLstd450Sinh: if (restype.basetype == SPIRType::Half) { + auto ftype = restype; + ftype.basetype = SPIRType::Float; + // MSL does not have overload for half. Force-cast back to half. - auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))"); + auto expr = join(type_to_glsl(restype), "(", preserve_str, "::sinh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))"); emit_op(result_type, id, expr, should_forward(args[0])); inherit_expression_dependencies(id, args[0]); } + else if (preserve_nan) + emit_unary_func_op(result_type, id, args[0], "precise::sinh"); else emit_unary_func_op(result_type, id, args[0], "fast::sinh"); break; case GLSLstd450Cosh: if (restype.basetype == SPIRType::Half) { + auto ftype = restype; + ftype.basetype = SPIRType::Float; + // MSL does not have overload for half. Force-cast back to half. - auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))"); + auto expr = join(type_to_glsl(restype), "(", preserve_str, "::cosh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))"); emit_op(result_type, id, expr, should_forward(args[0])); inherit_expression_dependencies(id, args[0]); } + else if (preserve_nan) + emit_unary_func_op(result_type, id, args[0], "precise::cosh"); else emit_unary_func_op(result_type, id, args[0], "fast::cosh"); break; case GLSLstd450Tanh: if (restype.basetype == SPIRType::Half) { + auto ftype = restype; + ftype.basetype = SPIRType::Float; + // MSL does not have overload for half. Force-cast back to half. - auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))"); + auto expr = join(type_to_glsl(restype), "(", preserve_str, "::tanh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))"); emit_op(result_type, id, expr, should_forward(args[0])); inherit_expression_dependencies(id, args[0]); } @@ -11221,7 +11342,13 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, if (restype.basetype == SPIRType::Half) { // MSL does not have overload for half. Force-cast back to half. - auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))"); + auto ftype = restype; + ftype.basetype = SPIRType::Float; + + auto expr = join(type_to_glsl(restype), + "(", preserve_str, "::atan2(", + type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), "), ", + type_to_glsl(ftype), "(", to_unpacked_expression(args[1]), ")))"); emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1])); inherit_expression_dependencies(id, args[0]); inherit_expression_dependencies(id, args[1]); @@ -11230,7 +11357,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2"); break; case GLSLstd450InverseSqrt: - emit_unary_func_op(result_type, id, args[0], "rsqrt"); + if (restype.basetype == SPIRType::Float && preserve_nan) + emit_unary_func_op(result_type, id, args[0], "precise::rsqrt"); + else + emit_unary_func_op(result_type, id, args[0], "rsqrt"); break; case GLSLstd450RoundEven: emit_unary_func_op(result_type, id, args[0], "rint"); @@ -11462,11 +11592,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, { auto &exp_type = expression_type(args[0]); // MSL does not support scalar versions here. - // MSL has no implementation for normalize in the fast:: namespace for half2 and half3 + // MSL has no implementation for normalize in the fast:: namespace for half // Returns -1 or 1 for valid input, sign() does the job. + + // precise::normalize asm looks ridiculous. + // Don't think this actually matters unless proven otherwise. if (exp_type.vecsize == 1) emit_unary_func_op(result_type, id, args[0], "sign"); - else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half) + else if (exp_type.basetype == SPIRType::Half) emit_unary_func_op(result_type, id, args[0], "normalize"); else emit_unary_func_op(result_type, id, args[0], "fast::normalize"); @@ -11529,7 +11662,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, case GLSLstd450Pow: // powr makes x < 0.0 undefined, just like SPIR-V. - emit_binary_func_op(result_type, id, args[0], args[1], "powr"); + if (restype.basetype == SPIRType::Float && preserve_nan) + emit_binary_func_op(result_type, id, args[0], args[1], "precise::powr"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "powr"); break; default: @@ -13753,6 +13889,24 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI return loc; } +bool CompilerMSL::entry_point_returns_stage_output() const +{ + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + return false; + bool ep_should_return_output = !get_is_rasterization_disabled(); + return stage_out_var_id && ep_should_return_output; +} + +bool CompilerMSL::entry_point_requires_const_device_buffers() const +{ + // For fragment, we don't really need it, but it might help avoid pessimization + // if the compiler deduces it needs to use late-Z for whatever reason. + return (get_execution_model() == ExecutionModelFragment && !has_descriptor_side_effects) || + (entry_point_returns_stage_output() && + (get_execution_model() == ExecutionModelVertex || + get_execution_model() == ExecutionModelTessellationEvaluation)); +} + // Returns the type declaration for a function, including the // entry type if the current function is the entry point function string CompilerMSL::func_type_decl(SPIRType &type) @@ -13763,8 +13917,7 @@ string CompilerMSL::func_type_decl(SPIRType &type) return return_type; // If an outgoing interface block has been defined, and it should be returned, override the entry point return type - bool ep_should_return_output = !get_is_rasterization_disabled(); - if (stage_out_var_id && ep_should_return_output) + if (entry_point_returns_stage_output()) return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0); // Prepend a entry type, based on the execution model @@ -13888,16 +14041,14 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo case StorageClassStorageBuffer: { - // For arguments from variable pointers, we use the write count deduction, so - // we should not assume any constness here. Only for global SSBOs. - bool readonly = false; - if (!var || has_decoration(type.self, DecorationBlock)) - readonly = flags.get(DecorationNonWritable); - - if (decoration_flags_signal_coherent(flags)) - readonly = false; - - addr_space = readonly ? "const device" : "device"; + // When dealing with descriptor aliasing, it becomes very problematic to make use of + // readonly qualifiers. + // If rasterization is not disabled in vertex/tese, Metal does not allow side effects and refuses to compile "device", + // even if there are no writes. Just force const device. + if (entry_point_requires_const_device_buffers()) + addr_space = "const device"; + else + addr_space = "device"; break; } @@ -13914,7 +14065,12 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo { bool ssbo = has_decoration(type.self, DecorationBufferBlock); if (ssbo) - addr_space = flags.get(DecorationNonWritable) ? "const device" : "device"; + { + if (entry_point_requires_const_device_buffers()) + addr_space = "const device"; + else + addr_space = "device"; + } else addr_space = "constant"; } @@ -16553,7 +16709,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_i default: if (type_is_array_of_pointers(type) || using_builtin_array()) { - const SPIRVariable *var = variable_id ? &get(variable_id) : nullptr; + const SPIRVariable *var = variable_id ? maybe_get(variable_id) : nullptr; if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) && is_array(get_variable_data_type(*var))) { @@ -16854,6 +17010,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb if (p_var && p_var->basevariable) p_var = maybe_get(p_var->basevariable); + bool has_access_qualifier = true; + switch (img_type.access) { case AccessQualifierReadOnly: @@ -16879,12 +17037,21 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb img_type_name += "write"; } + else + { + has_access_qualifier = false; + } break; } } if (p_var && has_decoration(p_var->self, DecorationCoherent) && msl_options.supports_msl_version(3, 2)) + { + // Cannot declare memory_coherence_device without access qualifier. + if (!has_access_qualifier) + img_type_name += ", access::read"; img_type_name += ", memory_coherence_device"; + } } img_type_name += ">"; @@ -17317,13 +17484,18 @@ void CompilerMSL::emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t r inherit_expression_dependencies(result_id, op0); } +// Note: Metal forbids bitcasting to/from 'bool' using as_type. This function is used widely +// for generating casts in the backend. To avoid generating illegal MSL when the canonical +// function constant type (from deduplicated SpecId) is Boolean, fall back to value-cast in +// that case by returning type_to_glsl(out_type) instead of as_type<...>. string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { if (out_type.basetype == in_type.basetype) return ""; - assert(out_type.basetype != SPIRType::Boolean); - assert(in_type.basetype != SPIRType::Boolean); + // Avoid bitcasting to/from booleans in MSL; use value cast instead. + if (out_type.basetype == SPIRType::Boolean || in_type.basetype == SPIRType::Boolean) + return type_to_glsl(out_type); bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize); bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize); @@ -17679,6 +17851,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInGlobalInvocationId: return "thread_position_in_grid"; + case BuiltInWorkgroupSize: + return "threads_per_threadgroup"; + case BuiltInWorkgroupId: return "threadgroup_position_in_grid"; @@ -17864,6 +18039,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInLocalInvocationId: case BuiltInNumWorkgroups: case BuiltInWorkgroupId: + case BuiltInWorkgroupSize: return "uint3"; case BuiltInLocalInvocationIndex: case BuiltInNumSubgroups: @@ -18531,11 +18707,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpFAdd: case OpFSub: - if (compiler.msl_options.invariant_float_math || - compiler.has_decoration(args[1], DecorationNoContraction)) - { + if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1])) return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub; - } break; case OpFMul: @@ -18543,11 +18716,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpMatrixTimesVector: case OpVectorTimesMatrix: case OpMatrixTimesMatrix: - if (compiler.msl_options.invariant_float_math || - compiler.has_decoration(args[1], DecorationNoContraction)) - { + if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1])) return SPVFuncImplFMul; - } break; case OpQuantizeToF16: @@ -19572,7 +19742,7 @@ void CompilerMSL::analyze_argument_buffers() SetBindingPair pair = { desc_set, binding }; if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || - resource.basetype == SPIRType::SampledImage) + resource.basetype == SPIRType::SampledImage || resource.basetype == SPIRType::AccelerationStructure) { // Drop pointer information when we emit the resources into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); @@ -19827,6 +19997,30 @@ bool CompilerMSL::specialization_constant_is_macro(uint32_t const_id) const return constant_macro_ids.find(const_id) != constant_macro_ids.end(); } +// Start with all fast math flags enabled, and selectively disable based execution modes and float controls +uint32_t CompilerMSL::get_fp_fast_math_flags(bool incl_ops) const +{ + uint32_t fp_flags = ~0; + auto &ep = get_entry_point(); + + if (ep.flags.get(ExecutionModeSignedZeroInfNanPreserve)) + fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask); + + if (ep.flags.get(ExecutionModeContractionOff)) + fp_flags &= ~(FPFastMathModeAllowContractMask); + + for (auto &fp_pair : ep.fp_fast_math_defaults) + if (fp_pair.second) + fp_flags &= get(fp_pair.second).scalar(); + + if (incl_ops) + for (auto &p_m : ir.meta) + if (p_m.second.decoration.decoration_flags.get(DecorationFPFastMathMode)) + fp_flags &= p_m.second.decoration.fp_fast_math_mode; + + return fp_flags; +} + void CompilerMSL::emit_block_hints(const SPIRBlock &) { } diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index a3d08bfcc..cd767ea15 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -542,6 +542,10 @@ public: // The result can be queried with get_is_rasterization_disabled. bool auto_disable_rasterization = false; + // Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes. + // Requires MSL 3.2 or above, and has no effect with earlier MSL versions. + bool use_fast_math_pragmas = false; + bool is_ios() const { return platform == iOS; @@ -767,6 +771,14 @@ public: // These must only be called after a successful call to CompilerMSL::compile(). bool specialization_constant_is_macro(uint32_t constant_id) const; + // Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied + // across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags + // for one floating-point type, and the value returned here is a bitwise-AND combination across all types. + // If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND + // to determine the minimal fast-math that applies to all default execution modes and all operations. + // The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff. + uint32_t get_fp_fast_math_flags(bool incl_ops) const; + protected: // An enum of SPIR-V functions that are implemented in additional // source code that is added to the shader if necessary. @@ -1047,6 +1059,8 @@ protected: void fix_up_shader_inputs_outputs(); + bool entry_point_returns_stage_output() const; + bool entry_point_requires_const_device_buffers() const; std::string func_type_decl(SPIRType &type); std::string entry_point_args_classic(bool append_comma); std::string entry_point_args_argument_buffer(bool append_comma); @@ -1126,7 +1140,7 @@ protected: uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); const char *get_memory_order(uint32_t spv_mem_sem); - void add_pragma_line(const std::string &line); + void add_pragma_line(const std::string &line, bool recompile_on_unique); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, @@ -1209,8 +1223,8 @@ protected: std::unordered_map fragment_output_components; std::unordered_map builtin_to_automatic_input_location; std::unordered_map builtin_to_automatic_output_location; - std::set pragma_lines; - std::set typedef_lines; + std::vector pragma_lines; + std::vector typedef_lines; SmallVector vars_needing_early_declaration; std::unordered_set constant_macro_ids; @@ -1252,6 +1266,7 @@ protected: bool using_builtin_array() const; bool is_rasterization_disabled = false; + bool has_descriptor_side_effects = false; bool capture_output_to_buffer = false; bool needs_swizzle_buffer_def = false; bool used_swizzle_buffer = false; diff --git a/3rdparty/spirv-cross/spirv_parser.cpp b/3rdparty/spirv-cross/spirv_parser.cpp index 745ab6e81..f3ea7e288 100644 --- a/3rdparty/spirv-cross/spirv_parser.cpp +++ b/3rdparty/spirv-cross/spirv_parser.cpp @@ -213,8 +213,8 @@ void Parser::parse(const Instruction &instruction) case OpSource: { - auto lang = static_cast(ops[0]); - switch (lang) + ir.source.lang = static_cast(ops[0]); + switch (ir.source.lang) { case SourceLanguageESSL: ir.source.es = true; @@ -318,6 +318,19 @@ void Parser::parse(const Instruction &instruction) ir.load_type_width.insert({ ops[1], type->width }); } } + else if (op == OpExtInst) + { + // Don't want to deal with ForwardRefs here. + + auto &ext = get(ops[2]); + if (ext.ext == SPIRExtension::NonSemanticShaderDebugInfo) + { + // Parse global ShaderDebugInfo we care about. + // Just forward the string information. + if (ops[3] == SPIRExtension::DebugSource) + set(ops[1], get(ops[4]).str); + } + } break; } @@ -369,6 +382,30 @@ void Parser::parse(const Instruction &instruction) execution.output_primitives = ops[2]; break; + case ExecutionModeSignedZeroInfNanPreserve: + switch (ops[2]) + { + case 8: + execution.signed_zero_inf_nan_preserve_8 = true; + break; + + case 16: + execution.signed_zero_inf_nan_preserve_16 = true; + break; + + case 32: + execution.signed_zero_inf_nan_preserve_32 = true; + break; + + case 64: + execution.signed_zero_inf_nan_preserve_64 = true; + break; + + default: + SPIRV_CROSS_THROW("Invalid bit-width for SignedZeroInfNanPreserve."); + } + break; + default: break; } @@ -381,13 +418,21 @@ void Parser::parse(const Instruction &instruction) auto mode = static_cast(ops[1]); execution.flags.set(mode); - if (mode == ExecutionModeLocalSizeId) + switch (mode) { + case ExecutionModeLocalSizeId: execution.workgroup_size.id_x = ops[2]; execution.workgroup_size.id_y = ops[3]; execution.workgroup_size.id_z = ops[4]; - } + break; + case ExecutionModeFPFastMathDefault: + execution.fp_fast_math_defaults[ops[2]] = ops[3]; + break; + + default: + break; + } break; } @@ -538,7 +583,7 @@ void Parser::parse(const Instruction &instruction) uint32_t width = ops[1]; auto &type = set(id, op); - if (width != 16 && length > 2) + if (width != 16 && width != 8 && length > 2) SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat."); if (width == 64) @@ -557,6 +602,17 @@ void Parser::parse(const Instruction &instruction) else type.basetype = SPIRType::Half; } + else if (width == 8) + { + if (length < 2) + SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8."); + else if (ops[2] == spv::FPEncodingFloat8E4M3EXT) + type.basetype = SPIRType::FloatE4M3; + else if (ops[2] == spv::FPEncodingFloat8E5M2EXT) + type.basetype = SPIRType::FloatE5M2; + else + SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8."); + } else SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type."); type.width = width; @@ -614,15 +670,33 @@ void Parser::parse(const Instruction &instruction) auto &matrixbase = set(id, base); matrixbase.op = op; - matrixbase.cooperative.scope_id = ops[2]; - matrixbase.cooperative.rows_id = ops[3]; - matrixbase.cooperative.columns_id = ops[4]; - matrixbase.cooperative.use_id = ops[5]; + matrixbase.ext.cooperative.scope_id = ops[2]; + matrixbase.ext.cooperative.rows_id = ops[3]; + matrixbase.ext.cooperative.columns_id = ops[4]; + matrixbase.ext.cooperative.use_id = ops[5]; matrixbase.self = id; matrixbase.parent_type = ops[1]; break; } + case OpTypeCooperativeVectorNV: + { + uint32_t id = ops[0]; + auto &type = set(id, op); + + type.basetype = SPIRType::CoopVecNV; + type.op = op; + type.ext.coopVecNV.component_type_id = ops[1]; + type.ext.coopVecNV.component_count_id = ops[2]; + type.parent_type = ops[1]; + + // CoopVec-Nv can be used with integer operations like SMax where + // where spirv-opt does explicit checks on integer bitwidth + auto component_type = get(type.ext.coopVecNV.component_type_id); + type.width = component_type.width; + break; + } + case OpTypeArray: { uint32_t id = ops[0]; @@ -820,6 +894,20 @@ void Parser::parse(const Instruction &instruction) break; } + case OpTypeTensorARM: + { + uint32_t id = ops[0]; + auto &type = set(id, op); + type.basetype = SPIRType::Tensor; + type.ext.tensor = {}; + type.ext.tensor.type = ops[1]; + if (length >= 3) + type.ext.tensor.rank = ops[2]; + if (length >= 4) + type.ext.tensor.shape = ops[3]; + break; + } + // Variable declaration // All variables are essentially pointers with a storage qualifier. case OpVariable: @@ -866,17 +954,27 @@ void Parser::parse(const Instruction &instruction) break; } - // Constants + // Constants case OpSpecConstant: case OpConstant: + case OpConstantCompositeReplicateEXT: + case OpSpecConstantCompositeReplicateEXT: { uint32_t id = ops[1]; auto &type = get(ops[0]); - - if (type.width > 32) - set(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant); + if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT) + { + auto subconstant = uint32_t(ops[2]); + set(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true); + } else - set(id, ops[0], ops[2], op == OpSpecConstant); + { + + if (type.width > 32) + set(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant); + else + set(id, ops[0], ops[2], op == OpSpecConstant); + } break; } diff --git a/3rdparty/spirv-cross/spirv_reflect.cpp b/3rdparty/spirv-cross/spirv_reflect.cpp index 552d671a6..ab5a91f9e 100644 --- a/3rdparty/spirv-cross/spirv_reflect.cpp +++ b/3rdparty/spirv-cross/spirv_reflect.cpp @@ -477,6 +477,12 @@ string CompilerReflection::execution_model_to_str(spv::ExecutionModel model) return "rmiss"; case ExecutionModelCallableNV: return "rcall"; + case ExecutionModelMeshNV: + case ExecutionModelMeshEXT: + return "mesh"; + case ExecutionModelTaskNV: + case ExecutionModelTaskEXT: + return "task"; default: return "???"; } @@ -504,7 +510,9 @@ void CompilerReflection::emit_entry_points() json_stream->begin_json_object(); json_stream->emit_json_key_value("name", e.name); json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model)); - if (e.execution_model == ExecutionModelGLCompute) + if (e.execution_model == ExecutionModelGLCompute || e.execution_model == spv::ExecutionModelMeshEXT || + e.execution_model == spv::ExecutionModelMeshNV || e.execution_model == spv::ExecutionModelTaskEXT || + e.execution_model == spv::ExecutionModelTaskNV) { const auto &spv_entry = get_entry_point(e.name, e.execution_model); @@ -547,6 +555,7 @@ void CompilerReflection::emit_resources() emit_resources("push_constants", res.push_constant_buffers); emit_resources("counters", res.atomic_counters); emit_resources("acceleration_structures", res.acceleration_structures); + emit_resources("tensors", res.tensors); } void CompilerReflection::emit_resources(const char *tag, const SmallVector &resources)