mirror of
https://github.com/bkaradzic/bgfx.git
synced 2026-02-17 20:52:36 +01:00
Updated spirv-cross.
This commit is contained in:
1
3rdparty/spirv-cross/main.cpp
vendored
1
3rdparty/spirv-cross/main.cpp
vendored
@@ -536,6 +536,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
|
||||
print_resources(compiler, "push", res.push_constant_buffers);
|
||||
print_resources(compiler, "counters", res.atomic_counters);
|
||||
print_resources(compiler, "acceleration structures", res.acceleration_structures);
|
||||
print_resources(compiler, "tensors", res.tensors);
|
||||
print_resources(compiler, "record buffers", res.shader_record_buffers);
|
||||
print_resources(compiler, spv::StorageClassInput, res.builtin_inputs);
|
||||
print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs);
|
||||
|
||||
126
3rdparty/spirv-cross/spirv.h
vendored
126
3rdparty/spirv-cross/spirv.h
vendored
@@ -1,27 +1,11 @@
|
||||
/*
|
||||
** Copyright (c) 2014-2024 The Khronos Group Inc.
|
||||
** Copyright: 2014-2024 The Khronos Group Inc.
|
||||
** License: MIT
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
** of this software and/or associated documentation files (the "Materials"),
|
||||
** to deal in the Materials without restriction, including without limitation
|
||||
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
** and/or sell copies of the Materials, and to permit persons to whom the
|
||||
** Materials are furnished to do so, subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in
|
||||
** all copies or substantial portions of the Materials.
|
||||
**
|
||||
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
|
||||
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
|
||||
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
|
||||
** IN THE MATERIALS.
|
||||
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
** KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
** SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
** https://www.khronos.org/registry/
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -176,6 +160,8 @@ typedef enum SpvExecutionMode_ {
|
||||
SpvExecutionModeSignedZeroInfNanPreserve = 4461,
|
||||
SpvExecutionModeRoundingModeRTE = 4462,
|
||||
SpvExecutionModeRoundingModeRTZ = 4463,
|
||||
SpvExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
|
||||
SpvExecutionModeTileShadingRateQCOM = 4490,
|
||||
SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
|
||||
SpvExecutionModeStencilRefReplacingEXT = 5027,
|
||||
SpvExecutionModeCoalescingAMDX = 5069,
|
||||
@@ -245,6 +231,7 @@ typedef enum SpvStorageClass_ {
|
||||
SpvStorageClassImage = 11,
|
||||
SpvStorageClassStorageBuffer = 12,
|
||||
SpvStorageClassTileImageEXT = 4172,
|
||||
SpvStorageClassTileAttachmentQCOM = 4491,
|
||||
SpvStorageClassNodePayloadAMDX = 5068,
|
||||
SpvStorageClassCallableDataKHR = 5328,
|
||||
SpvStorageClassCallableDataNV = 5328,
|
||||
@@ -554,6 +541,7 @@ typedef enum SpvDecoration_ {
|
||||
SpvDecorationMaxByteOffset = 45,
|
||||
SpvDecorationAlignmentId = 46,
|
||||
SpvDecorationMaxByteOffsetId = 47,
|
||||
SpvDecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
|
||||
SpvDecorationNoSignedWrap = 4469,
|
||||
SpvDecorationNoUnsignedWrap = 4470,
|
||||
SpvDecorationWeightTextureQCOM = 4487,
|
||||
@@ -723,6 +711,9 @@ typedef enum SpvBuiltIn_ {
|
||||
SpvBuiltInDeviceIndex = 4438,
|
||||
SpvBuiltInViewIndex = 4440,
|
||||
SpvBuiltInShadingRateKHR = 4444,
|
||||
SpvBuiltInTileOffsetQCOM = 4492,
|
||||
SpvBuiltInTileDimensionQCOM = 4493,
|
||||
SpvBuiltInTileApronSizeQCOM = 4494,
|
||||
SpvBuiltInBaryCoordNoPerspAMD = 4992,
|
||||
SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993,
|
||||
SpvBuiltInBaryCoordNoPerspSampleAMD = 4994,
|
||||
@@ -1073,7 +1064,13 @@ typedef enum SpvCapability_ {
|
||||
SpvCapabilityTileImageColorReadAccessEXT = 4166,
|
||||
SpvCapabilityTileImageDepthReadAccessEXT = 4167,
|
||||
SpvCapabilityTileImageStencilReadAccessEXT = 4168,
|
||||
SpvCapabilityTensorsARM = 4174,
|
||||
SpvCapabilityStorageTensorArrayDynamicIndexingARM = 4175,
|
||||
SpvCapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
|
||||
SpvCapabilityGraphARM = 4191,
|
||||
SpvCapabilityCooperativeMatrixLayoutsARM = 4201,
|
||||
SpvCapabilityFloat8EXT = 4212,
|
||||
SpvCapabilityFloat8CooperativeMatrixEXT = 4213,
|
||||
SpvCapabilityFragmentShadingRateKHR = 4422,
|
||||
SpvCapabilitySubgroupBallotKHR = 4423,
|
||||
SpvCapabilityDrawParameters = 4427,
|
||||
@@ -1109,6 +1106,7 @@ typedef enum SpvCapability_ {
|
||||
SpvCapabilityTextureSampleWeightedQCOM = 4484,
|
||||
SpvCapabilityTextureBoxFilterQCOM = 4485,
|
||||
SpvCapabilityTextureBlockMatchQCOM = 4486,
|
||||
SpvCapabilityTileShadingQCOM = 4495,
|
||||
SpvCapabilityTextureBlockMatch2QCOM = 4498,
|
||||
SpvCapabilityFloat16ImageAMD = 5008,
|
||||
SpvCapabilityImageGatherBiasLodAMD = 5009,
|
||||
@@ -1119,6 +1117,8 @@ typedef enum SpvCapability_ {
|
||||
SpvCapabilityShaderClockKHR = 5055,
|
||||
SpvCapabilityShaderEnqueueAMDX = 5067,
|
||||
SpvCapabilityQuadControlKHR = 5087,
|
||||
SpvCapabilityInt4TypeINTEL = 5112,
|
||||
SpvCapabilityInt4CooperativeMatrixINTEL = 5114,
|
||||
SpvCapabilityBFloat16TypeKHR = 5116,
|
||||
SpvCapabilityBFloat16DotProductKHR = 5117,
|
||||
SpvCapabilityBFloat16CooperativeMatrixKHR = 5118,
|
||||
@@ -1287,6 +1287,7 @@ typedef enum SpvCapability_ {
|
||||
SpvCapabilityMaskedGatherScatterINTEL = 6427,
|
||||
SpvCapabilityCacheControlsINTEL = 6441,
|
||||
SpvCapabilityRegisterLimitsINTEL = 6460,
|
||||
SpvCapabilityBindlessImagesINTEL = 6528,
|
||||
SpvCapabilityMax = 0x7fffffff,
|
||||
} SpvCapability;
|
||||
|
||||
@@ -1463,6 +1464,24 @@ typedef enum SpvTensorAddressingOperandsMask_ {
|
||||
SpvTensorAddressingOperandsDecodeFuncMask = 0x00000002,
|
||||
} SpvTensorAddressingOperandsMask;
|
||||
|
||||
typedef enum SpvTensorOperandsShift_ {
|
||||
SpvTensorOperandsNontemporalARMShift = 0,
|
||||
SpvTensorOperandsOutOfBoundsValueARMShift = 1,
|
||||
SpvTensorOperandsMakeElementAvailableARMShift = 2,
|
||||
SpvTensorOperandsMakeElementVisibleARMShift = 3,
|
||||
SpvTensorOperandsNonPrivateElementARMShift = 4,
|
||||
SpvTensorOperandsMax = 0x7fffffff,
|
||||
} SpvTensorOperandsShift;
|
||||
|
||||
typedef enum SpvTensorOperandsMask_ {
|
||||
SpvTensorOperandsMaskNone = 0,
|
||||
SpvTensorOperandsNontemporalARMMask = 0x00000001,
|
||||
SpvTensorOperandsOutOfBoundsValueARMMask = 0x00000002,
|
||||
SpvTensorOperandsMakeElementAvailableARMMask = 0x00000004,
|
||||
SpvTensorOperandsMakeElementVisibleARMMask = 0x00000008,
|
||||
SpvTensorOperandsNonPrivateElementARMMask = 0x00000010,
|
||||
} SpvTensorOperandsMask;
|
||||
|
||||
typedef enum SpvInitializationModeQualifier_ {
|
||||
SpvInitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
|
||||
SpvInitializationModeQualifierInitOnDeviceResetINTEL = 1,
|
||||
@@ -1549,6 +1568,8 @@ typedef enum SpvRawAccessChainOperandsMask_ {
|
||||
|
||||
typedef enum SpvFPEncoding_ {
|
||||
SpvFPEncodingBFloat16KHR = 0,
|
||||
SpvFPEncodingFloat8E4M3EXT = 4214,
|
||||
SpvFPEncodingFloat8E5M2EXT = 4215,
|
||||
SpvFPEncodingMax = 0x7fffffff,
|
||||
} SpvFPEncoding;
|
||||
|
||||
@@ -1927,6 +1948,17 @@ typedef enum SpvOp_ {
|
||||
SpvOpColorAttachmentReadEXT = 4160,
|
||||
SpvOpDepthAttachmentReadEXT = 4161,
|
||||
SpvOpStencilAttachmentReadEXT = 4162,
|
||||
SpvOpTypeTensorARM = 4163,
|
||||
SpvOpTensorReadARM = 4164,
|
||||
SpvOpTensorWriteARM = 4165,
|
||||
SpvOpTensorQuerySizeARM = 4166,
|
||||
SpvOpGraphConstantARM = 4181,
|
||||
SpvOpGraphEntryPointARM = 4182,
|
||||
SpvOpGraphARM = 4183,
|
||||
SpvOpGraphInputARM = 4184,
|
||||
SpvOpGraphSetOutputARM = 4185,
|
||||
SpvOpGraphEndARM = 4186,
|
||||
SpvOpTypeGraphARM = 4190,
|
||||
SpvOpTerminateInvocation = 4416,
|
||||
SpvOpTypeUntypedPointerKHR = 4417,
|
||||
SpvOpUntypedVariableKHR = 4418,
|
||||
@@ -2385,6 +2417,9 @@ typedef enum SpvOp_ {
|
||||
SpvOpRoundFToTF32INTEL = 6426,
|
||||
SpvOpMaskedGatherINTEL = 6428,
|
||||
SpvOpMaskedScatterINTEL = 6429,
|
||||
SpvOpConvertHandleToImageINTEL = 6529,
|
||||
SpvOpConvertHandleToSamplerINTEL = 6530,
|
||||
SpvOpConvertHandleToSampledImageINTEL = 6531,
|
||||
SpvOpMax = 0x7fffffff,
|
||||
} SpvOp;
|
||||
|
||||
@@ -2743,6 +2778,17 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
|
||||
case SpvOpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
|
||||
case SpvOpTensorReadARM: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
|
||||
case SpvOpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
|
||||
case SpvOpGraphARM: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpGraphInputARM: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
|
||||
case SpvOpGraphEndARM: *hasResult = false; *hasResultType = false; break;
|
||||
case SpvOpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
|
||||
case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
|
||||
case SpvOpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
|
||||
case SpvOpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
|
||||
@@ -3190,6 +3236,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
|
||||
case SpvOpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
|
||||
case SpvOpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case SpvOpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
}
|
||||
}
|
||||
inline const char* SpvSourceLanguageToString(SpvSourceLanguage value) {
|
||||
@@ -3305,6 +3354,8 @@ inline const char* SpvExecutionModeToString(SpvExecutionMode value) {
|
||||
case SpvExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
|
||||
case SpvExecutionModeRoundingModeRTE: return "RoundingModeRTE";
|
||||
case SpvExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
|
||||
case SpvExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
|
||||
case SpvExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
|
||||
case SpvExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
|
||||
case SpvExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
|
||||
case SpvExecutionModeCoalescingAMDX: return "CoalescingAMDX";
|
||||
@@ -3371,6 +3422,7 @@ inline const char* SpvStorageClassToString(SpvStorageClass value) {
|
||||
case SpvStorageClassImage: return "Image";
|
||||
case SpvStorageClassStorageBuffer: return "StorageBuffer";
|
||||
case SpvStorageClassTileImageEXT: return "TileImageEXT";
|
||||
case SpvStorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
|
||||
case SpvStorageClassNodePayloadAMDX: return "NodePayloadAMDX";
|
||||
case SpvStorageClassCallableDataKHR: return "CallableDataKHR";
|
||||
case SpvStorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
|
||||
@@ -3619,6 +3671,7 @@ inline const char* SpvDecorationToString(SpvDecoration value) {
|
||||
case SpvDecorationMaxByteOffset: return "MaxByteOffset";
|
||||
case SpvDecorationAlignmentId: return "AlignmentId";
|
||||
case SpvDecorationMaxByteOffsetId: return "MaxByteOffsetId";
|
||||
case SpvDecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
|
||||
case SpvDecorationNoSignedWrap: return "NoSignedWrap";
|
||||
case SpvDecorationNoUnsignedWrap: return "NoUnsignedWrap";
|
||||
case SpvDecorationWeightTextureQCOM: return "WeightTextureQCOM";
|
||||
@@ -3778,6 +3831,9 @@ inline const char* SpvBuiltInToString(SpvBuiltIn value) {
|
||||
case SpvBuiltInDeviceIndex: return "DeviceIndex";
|
||||
case SpvBuiltInViewIndex: return "ViewIndex";
|
||||
case SpvBuiltInShadingRateKHR: return "ShadingRateKHR";
|
||||
case SpvBuiltInTileOffsetQCOM: return "TileOffsetQCOM";
|
||||
case SpvBuiltInTileDimensionQCOM: return "TileDimensionQCOM";
|
||||
case SpvBuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
|
||||
case SpvBuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
|
||||
case SpvBuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
|
||||
case SpvBuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
|
||||
@@ -3958,7 +4014,13 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
|
||||
case SpvCapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
|
||||
case SpvCapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
|
||||
case SpvCapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
|
||||
case SpvCapabilityTensorsARM: return "TensorsARM";
|
||||
case SpvCapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
|
||||
case SpvCapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
|
||||
case SpvCapabilityGraphARM: return "GraphARM";
|
||||
case SpvCapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
|
||||
case SpvCapabilityFloat8EXT: return "Float8EXT";
|
||||
case SpvCapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
|
||||
case SpvCapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
|
||||
case SpvCapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
|
||||
case SpvCapabilityDrawParameters: return "DrawParameters";
|
||||
@@ -3992,6 +4054,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
|
||||
case SpvCapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
|
||||
case SpvCapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
|
||||
case SpvCapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
|
||||
case SpvCapabilityTileShadingQCOM: return "TileShadingQCOM";
|
||||
case SpvCapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
|
||||
case SpvCapabilityFloat16ImageAMD: return "Float16ImageAMD";
|
||||
case SpvCapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
|
||||
@@ -4002,6 +4065,8 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
|
||||
case SpvCapabilityShaderClockKHR: return "ShaderClockKHR";
|
||||
case SpvCapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
|
||||
case SpvCapabilityQuadControlKHR: return "QuadControlKHR";
|
||||
case SpvCapabilityInt4TypeINTEL: return "Int4TypeINTEL";
|
||||
case SpvCapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
|
||||
case SpvCapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
|
||||
case SpvCapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
|
||||
case SpvCapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
|
||||
@@ -4144,6 +4209,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
|
||||
case SpvCapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
|
||||
case SpvCapabilityCacheControlsINTEL: return "CacheControlsINTEL";
|
||||
case SpvCapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
|
||||
case SpvCapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
@@ -4299,6 +4365,8 @@ inline const char* SpvNamedMaximumNumberOfRegistersToString(SpvNamedMaximumNumbe
|
||||
inline const char* SpvFPEncodingToString(SpvFPEncoding value) {
|
||||
switch (value) {
|
||||
case SpvFPEncodingBFloat16KHR: return "BFloat16KHR";
|
||||
case SpvFPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
|
||||
case SpvFPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
@@ -4683,6 +4751,17 @@ inline const char* SpvOpToString(SpvOp value) {
|
||||
case SpvOpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
|
||||
case SpvOpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
|
||||
case SpvOpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
|
||||
case SpvOpTypeTensorARM: return "OpTypeTensorARM";
|
||||
case SpvOpTensorReadARM: return "OpTensorReadARM";
|
||||
case SpvOpTensorWriteARM: return "OpTensorWriteARM";
|
||||
case SpvOpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
|
||||
case SpvOpGraphConstantARM: return "OpGraphConstantARM";
|
||||
case SpvOpGraphEntryPointARM: return "OpGraphEntryPointARM";
|
||||
case SpvOpGraphARM: return "OpGraphARM";
|
||||
case SpvOpGraphInputARM: return "OpGraphInputARM";
|
||||
case SpvOpGraphSetOutputARM: return "OpGraphSetOutputARM";
|
||||
case SpvOpGraphEndARM: return "OpGraphEndARM";
|
||||
case SpvOpTypeGraphARM: return "OpTypeGraphARM";
|
||||
case SpvOpTerminateInvocation: return "OpTerminateInvocation";
|
||||
case SpvOpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
|
||||
case SpvOpUntypedVariableKHR: return "OpUntypedVariableKHR";
|
||||
@@ -5130,6 +5209,9 @@ inline const char* SpvOpToString(SpvOp value) {
|
||||
case SpvOpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
|
||||
case SpvOpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
|
||||
case SpvOpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
|
||||
case SpvOpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
|
||||
case SpvOpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
|
||||
case SpvOpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
130
3rdparty/spirv-cross/spirv.hpp
vendored
130
3rdparty/spirv-cross/spirv.hpp
vendored
@@ -1,26 +1,10 @@
|
||||
// Copyright (c) 2014-2024 The Khronos Group Inc.
|
||||
// Copyright: 2014-2024 The Khronos Group Inc.
|
||||
// License: MIT
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and/or associated documentation files (the "Materials"),
|
||||
// to deal in the Materials without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Materials, and to permit persons to whom the
|
||||
// Materials are furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Materials.
|
||||
//
|
||||
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
|
||||
// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
|
||||
// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
|
||||
//
|
||||
// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
|
||||
// IN THE MATERIALS.
|
||||
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
// https://www.khronos.org/registry/
|
||||
|
||||
// This header is automatically generated by the same tool that creates
|
||||
// the Binary Section of the SPIR-V specification.
|
||||
@@ -172,6 +156,8 @@ enum ExecutionMode {
|
||||
ExecutionModeSignedZeroInfNanPreserve = 4461,
|
||||
ExecutionModeRoundingModeRTE = 4462,
|
||||
ExecutionModeRoundingModeRTZ = 4463,
|
||||
ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
|
||||
ExecutionModeTileShadingRateQCOM = 4490,
|
||||
ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
|
||||
ExecutionModeStencilRefReplacingEXT = 5027,
|
||||
ExecutionModeCoalescingAMDX = 5069,
|
||||
@@ -241,6 +227,7 @@ enum StorageClass {
|
||||
StorageClassImage = 11,
|
||||
StorageClassStorageBuffer = 12,
|
||||
StorageClassTileImageEXT = 4172,
|
||||
StorageClassTileAttachmentQCOM = 4491,
|
||||
StorageClassNodePayloadAMDX = 5068,
|
||||
StorageClassCallableDataKHR = 5328,
|
||||
StorageClassCallableDataNV = 5328,
|
||||
@@ -550,6 +537,7 @@ enum Decoration {
|
||||
DecorationMaxByteOffset = 45,
|
||||
DecorationAlignmentId = 46,
|
||||
DecorationMaxByteOffsetId = 47,
|
||||
DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
|
||||
DecorationNoSignedWrap = 4469,
|
||||
DecorationNoUnsignedWrap = 4470,
|
||||
DecorationWeightTextureQCOM = 4487,
|
||||
@@ -719,6 +707,9 @@ enum BuiltIn {
|
||||
BuiltInDeviceIndex = 4438,
|
||||
BuiltInViewIndex = 4440,
|
||||
BuiltInShadingRateKHR = 4444,
|
||||
BuiltInTileOffsetQCOM = 4492,
|
||||
BuiltInTileDimensionQCOM = 4493,
|
||||
BuiltInTileApronSizeQCOM = 4494,
|
||||
BuiltInBaryCoordNoPerspAMD = 4992,
|
||||
BuiltInBaryCoordNoPerspCentroidAMD = 4993,
|
||||
BuiltInBaryCoordNoPerspSampleAMD = 4994,
|
||||
@@ -1069,7 +1060,13 @@ enum Capability {
|
||||
CapabilityTileImageColorReadAccessEXT = 4166,
|
||||
CapabilityTileImageDepthReadAccessEXT = 4167,
|
||||
CapabilityTileImageStencilReadAccessEXT = 4168,
|
||||
CapabilityTensorsARM = 4174,
|
||||
CapabilityStorageTensorArrayDynamicIndexingARM = 4175,
|
||||
CapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
|
||||
CapabilityGraphARM = 4191,
|
||||
CapabilityCooperativeMatrixLayoutsARM = 4201,
|
||||
CapabilityFloat8EXT = 4212,
|
||||
CapabilityFloat8CooperativeMatrixEXT = 4213,
|
||||
CapabilityFragmentShadingRateKHR = 4422,
|
||||
CapabilitySubgroupBallotKHR = 4423,
|
||||
CapabilityDrawParameters = 4427,
|
||||
@@ -1105,6 +1102,7 @@ enum Capability {
|
||||
CapabilityTextureSampleWeightedQCOM = 4484,
|
||||
CapabilityTextureBoxFilterQCOM = 4485,
|
||||
CapabilityTextureBlockMatchQCOM = 4486,
|
||||
CapabilityTileShadingQCOM = 4495,
|
||||
CapabilityTextureBlockMatch2QCOM = 4498,
|
||||
CapabilityFloat16ImageAMD = 5008,
|
||||
CapabilityImageGatherBiasLodAMD = 5009,
|
||||
@@ -1115,6 +1113,8 @@ enum Capability {
|
||||
CapabilityShaderClockKHR = 5055,
|
||||
CapabilityShaderEnqueueAMDX = 5067,
|
||||
CapabilityQuadControlKHR = 5087,
|
||||
CapabilityInt4TypeINTEL = 5112,
|
||||
CapabilityInt4CooperativeMatrixINTEL = 5114,
|
||||
CapabilityBFloat16TypeKHR = 5116,
|
||||
CapabilityBFloat16DotProductKHR = 5117,
|
||||
CapabilityBFloat16CooperativeMatrixKHR = 5118,
|
||||
@@ -1283,6 +1283,7 @@ enum Capability {
|
||||
CapabilityMaskedGatherScatterINTEL = 6427,
|
||||
CapabilityCacheControlsINTEL = 6441,
|
||||
CapabilityRegisterLimitsINTEL = 6460,
|
||||
CapabilityBindlessImagesINTEL = 6528,
|
||||
CapabilityMax = 0x7fffffff,
|
||||
};
|
||||
|
||||
@@ -1459,6 +1460,24 @@ enum TensorAddressingOperandsMask {
|
||||
TensorAddressingOperandsDecodeFuncMask = 0x00000002,
|
||||
};
|
||||
|
||||
enum TensorOperandsShift {
|
||||
TensorOperandsNontemporalARMShift = 0,
|
||||
TensorOperandsOutOfBoundsValueARMShift = 1,
|
||||
TensorOperandsMakeElementAvailableARMShift = 2,
|
||||
TensorOperandsMakeElementVisibleARMShift = 3,
|
||||
TensorOperandsNonPrivateElementARMShift = 4,
|
||||
TensorOperandsMax = 0x7fffffff,
|
||||
};
|
||||
|
||||
enum TensorOperandsMask {
|
||||
TensorOperandsMaskNone = 0,
|
||||
TensorOperandsNontemporalARMMask = 0x00000001,
|
||||
TensorOperandsOutOfBoundsValueARMMask = 0x00000002,
|
||||
TensorOperandsMakeElementAvailableARMMask = 0x00000004,
|
||||
TensorOperandsMakeElementVisibleARMMask = 0x00000008,
|
||||
TensorOperandsNonPrivateElementARMMask = 0x00000010,
|
||||
};
|
||||
|
||||
enum InitializationModeQualifier {
|
||||
InitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
|
||||
InitializationModeQualifierInitOnDeviceResetINTEL = 1,
|
||||
@@ -1545,6 +1564,8 @@ enum RawAccessChainOperandsMask {
|
||||
|
||||
enum FPEncoding {
|
||||
FPEncodingBFloat16KHR = 0,
|
||||
FPEncodingFloat8E4M3EXT = 4214,
|
||||
FPEncodingFloat8E5M2EXT = 4215,
|
||||
FPEncodingMax = 0x7fffffff,
|
||||
};
|
||||
|
||||
@@ -1923,6 +1944,17 @@ enum Op {
|
||||
OpColorAttachmentReadEXT = 4160,
|
||||
OpDepthAttachmentReadEXT = 4161,
|
||||
OpStencilAttachmentReadEXT = 4162,
|
||||
OpTypeTensorARM = 4163,
|
||||
OpTensorReadARM = 4164,
|
||||
OpTensorWriteARM = 4165,
|
||||
OpTensorQuerySizeARM = 4166,
|
||||
OpGraphConstantARM = 4181,
|
||||
OpGraphEntryPointARM = 4182,
|
||||
OpGraphARM = 4183,
|
||||
OpGraphInputARM = 4184,
|
||||
OpGraphSetOutputARM = 4185,
|
||||
OpGraphEndARM = 4186,
|
||||
OpTypeGraphARM = 4190,
|
||||
OpTerminateInvocation = 4416,
|
||||
OpTypeUntypedPointerKHR = 4417,
|
||||
OpUntypedVariableKHR = 4418,
|
||||
@@ -2381,6 +2413,9 @@ enum Op {
|
||||
OpRoundFToTF32INTEL = 6426,
|
||||
OpMaskedGatherINTEL = 6428,
|
||||
OpMaskedScatterINTEL = 6429,
|
||||
OpConvertHandleToImageINTEL = 6529,
|
||||
OpConvertHandleToSamplerINTEL = 6530,
|
||||
OpConvertHandleToSampledImageINTEL = 6531,
|
||||
OpMax = 0x7fffffff,
|
||||
};
|
||||
|
||||
@@ -2739,6 +2774,17 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
|
||||
case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
|
||||
case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
|
||||
case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
|
||||
case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
|
||||
case OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
|
||||
case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
|
||||
case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
|
||||
case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
|
||||
case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
|
||||
case OpGraphARM: *hasResult = true; *hasResultType = true; break;
|
||||
case OpGraphInputARM: *hasResult = true; *hasResultType = true; break;
|
||||
case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
|
||||
case OpGraphEndARM: *hasResult = false; *hasResultType = false; break;
|
||||
case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
|
||||
case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
|
||||
case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
|
||||
case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
|
||||
@@ -3186,6 +3232,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
|
||||
case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
|
||||
case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
|
||||
}
|
||||
}
|
||||
inline const char* SourceLanguageToString(SourceLanguage value) {
|
||||
@@ -3301,6 +3350,8 @@ inline const char* ExecutionModeToString(ExecutionMode value) {
|
||||
case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
|
||||
case ExecutionModeRoundingModeRTE: return "RoundingModeRTE";
|
||||
case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
|
||||
case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
|
||||
case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
|
||||
case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
|
||||
case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
|
||||
case ExecutionModeCoalescingAMDX: return "CoalescingAMDX";
|
||||
@@ -3367,6 +3418,7 @@ inline const char* StorageClassToString(StorageClass value) {
|
||||
case StorageClassImage: return "Image";
|
||||
case StorageClassStorageBuffer: return "StorageBuffer";
|
||||
case StorageClassTileImageEXT: return "TileImageEXT";
|
||||
case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
|
||||
case StorageClassNodePayloadAMDX: return "NodePayloadAMDX";
|
||||
case StorageClassCallableDataKHR: return "CallableDataKHR";
|
||||
case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
|
||||
@@ -3615,6 +3667,7 @@ inline const char* DecorationToString(Decoration value) {
|
||||
case DecorationMaxByteOffset: return "MaxByteOffset";
|
||||
case DecorationAlignmentId: return "AlignmentId";
|
||||
case DecorationMaxByteOffsetId: return "MaxByteOffsetId";
|
||||
case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
|
||||
case DecorationNoSignedWrap: return "NoSignedWrap";
|
||||
case DecorationNoUnsignedWrap: return "NoUnsignedWrap";
|
||||
case DecorationWeightTextureQCOM: return "WeightTextureQCOM";
|
||||
@@ -3774,6 +3827,9 @@ inline const char* BuiltInToString(BuiltIn value) {
|
||||
case BuiltInDeviceIndex: return "DeviceIndex";
|
||||
case BuiltInViewIndex: return "ViewIndex";
|
||||
case BuiltInShadingRateKHR: return "ShadingRateKHR";
|
||||
case BuiltInTileOffsetQCOM: return "TileOffsetQCOM";
|
||||
case BuiltInTileDimensionQCOM: return "TileDimensionQCOM";
|
||||
case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
|
||||
case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
|
||||
case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
|
||||
case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
|
||||
@@ -3954,7 +4010,13 @@ inline const char* CapabilityToString(Capability value) {
|
||||
case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
|
||||
case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
|
||||
case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
|
||||
case CapabilityTensorsARM: return "TensorsARM";
|
||||
case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
|
||||
case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
|
||||
case CapabilityGraphARM: return "GraphARM";
|
||||
case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
|
||||
case CapabilityFloat8EXT: return "Float8EXT";
|
||||
case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
|
||||
case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
|
||||
case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
|
||||
case CapabilityDrawParameters: return "DrawParameters";
|
||||
@@ -3988,6 +4050,7 @@ inline const char* CapabilityToString(Capability value) {
|
||||
case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
|
||||
case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
|
||||
case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
|
||||
case CapabilityTileShadingQCOM: return "TileShadingQCOM";
|
||||
case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
|
||||
case CapabilityFloat16ImageAMD: return "Float16ImageAMD";
|
||||
case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
|
||||
@@ -3998,6 +4061,8 @@ inline const char* CapabilityToString(Capability value) {
|
||||
case CapabilityShaderClockKHR: return "ShaderClockKHR";
|
||||
case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
|
||||
case CapabilityQuadControlKHR: return "QuadControlKHR";
|
||||
case CapabilityInt4TypeINTEL: return "Int4TypeINTEL";
|
||||
case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
|
||||
case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
|
||||
case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
|
||||
case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
|
||||
@@ -4140,6 +4205,7 @@ inline const char* CapabilityToString(Capability value) {
|
||||
case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
|
||||
case CapabilityCacheControlsINTEL: return "CacheControlsINTEL";
|
||||
case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
|
||||
case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
@@ -4295,6 +4361,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg
|
||||
inline const char* FPEncodingToString(FPEncoding value) {
|
||||
switch (value) {
|
||||
case FPEncodingBFloat16KHR: return "BFloat16KHR";
|
||||
case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
|
||||
case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
@@ -4679,6 +4747,17 @@ inline const char* OpToString(Op value) {
|
||||
case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
|
||||
case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
|
||||
case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
|
||||
case OpTypeTensorARM: return "OpTypeTensorARM";
|
||||
case OpTensorReadARM: return "OpTensorReadARM";
|
||||
case OpTensorWriteARM: return "OpTensorWriteARM";
|
||||
case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
|
||||
case OpGraphConstantARM: return "OpGraphConstantARM";
|
||||
case OpGraphEntryPointARM: return "OpGraphEntryPointARM";
|
||||
case OpGraphARM: return "OpGraphARM";
|
||||
case OpGraphInputARM: return "OpGraphInputARM";
|
||||
case OpGraphSetOutputARM: return "OpGraphSetOutputARM";
|
||||
case OpGraphEndARM: return "OpGraphEndARM";
|
||||
case OpTypeGraphARM: return "OpTypeGraphARM";
|
||||
case OpTerminateInvocation: return "OpTerminateInvocation";
|
||||
case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
|
||||
case OpUntypedVariableKHR: return "OpUntypedVariableKHR";
|
||||
@@ -5126,6 +5205,9 @@ inline const char* OpToString(Op value) {
|
||||
case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
|
||||
case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
|
||||
case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
|
||||
case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
|
||||
case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
|
||||
case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
@@ -5186,6 +5268,10 @@ inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, Te
|
||||
inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
|
||||
inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
|
||||
inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
|
||||
inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
|
||||
inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
|
||||
inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
|
||||
inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
|
||||
inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
|
||||
inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
|
||||
inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }
|
||||
|
||||
124
3rdparty/spirv-cross/spirv_cfg.cpp
vendored
124
3rdparty/spirv-cross/spirv_cfg.cpp
vendored
@@ -81,31 +81,105 @@ bool CFG::is_back_edge(uint32_t to) const
|
||||
// We have a back edge if the visit order is set with the temporary magic value 0.
|
||||
// Crossing edges will have already been recorded with a visit order.
|
||||
auto itr = visit_order.find(to);
|
||||
return itr != end(visit_order) && itr->second.get() == 0;
|
||||
return itr != end(visit_order) && itr->second.visited_branches && !itr->second.visited_resolve;
|
||||
}
|
||||
|
||||
bool CFG::has_visited_forward_edge(uint32_t to) const
|
||||
bool CFG::has_visited_branch(uint32_t to) const
|
||||
{
|
||||
// If > 0, we have visited the edge already, and this is not a back edge branch.
|
||||
auto itr = visit_order.find(to);
|
||||
return itr != end(visit_order) && itr->second.get() > 0;
|
||||
return itr != end(visit_order) && itr->second.visited_branches;
|
||||
}
|
||||
|
||||
bool CFG::post_order_visit(uint32_t block_id)
|
||||
void CFG::post_order_visit_entry(uint32_t block)
|
||||
{
|
||||
// If we have already branched to this block (back edge), stop recursion.
|
||||
// If our branches are back-edges, we do not record them.
|
||||
// We have to record crossing edges however.
|
||||
if (has_visited_forward_edge(block_id))
|
||||
return true;
|
||||
else if (is_back_edge(block_id))
|
||||
return false;
|
||||
visit_stack.push_back(block);
|
||||
|
||||
// Block back-edges from recursively revisiting ourselves.
|
||||
visit_order[block_id].get() = 0;
|
||||
while (!visit_stack.empty())
|
||||
{
|
||||
bool keep_iterating;
|
||||
do
|
||||
{
|
||||
// Reverse the order to allow for stack-like behavior and preserves the visit order from recursive algorithm.
|
||||
// Traverse depth first.
|
||||
uint32_t to_visit = visit_stack.back();
|
||||
last_visited_size = visit_stack.size();
|
||||
post_order_visit_branches(to_visit);
|
||||
keep_iterating = last_visited_size != visit_stack.size();
|
||||
if (keep_iterating)
|
||||
std::reverse(visit_stack.begin() + last_visited_size, visit_stack.end());
|
||||
} while (keep_iterating);
|
||||
|
||||
// We've reached the end of some tree leaf. Resolve the stack.
|
||||
// Any node which has been visited for real can be popped now.
|
||||
while (!visit_stack.empty() && visit_order[visit_stack.back()].visited_branches)
|
||||
{
|
||||
post_order_visit_resolve(visit_stack.back());
|
||||
visit_stack.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CFG::visit_branch(uint32_t block_id)
|
||||
{
|
||||
// Prune obvious duplicates.
|
||||
if (std::find(visit_stack.begin() + last_visited_size, visit_stack.end(), block_id) == visit_stack.end() &&
|
||||
!has_visited_branch(block_id))
|
||||
{
|
||||
visit_stack.push_back(block_id);
|
||||
}
|
||||
}
|
||||
|
||||
void CFG::post_order_visit_branches(uint32_t block_id)
|
||||
{
|
||||
auto &block = compiler.get<SPIRBlock>(block_id);
|
||||
|
||||
auto &visit = visit_order[block_id];
|
||||
if (visit.visited_branches)
|
||||
return;
|
||||
visit.visited_branches = true;
|
||||
|
||||
if (block.merge == SPIRBlock::MergeLoop)
|
||||
visit_branch(block.merge_block);
|
||||
else if (block.merge == SPIRBlock::MergeSelection)
|
||||
visit_branch(block.next_block);
|
||||
|
||||
// First visit our branch targets.
|
||||
switch (block.terminator)
|
||||
{
|
||||
case SPIRBlock::Direct:
|
||||
visit_branch(block.next_block);
|
||||
break;
|
||||
|
||||
case SPIRBlock::Select:
|
||||
visit_branch(block.true_block);
|
||||
visit_branch(block.false_block);
|
||||
break;
|
||||
|
||||
case SPIRBlock::MultiSelect:
|
||||
{
|
||||
const auto &cases = compiler.get_case_list(block);
|
||||
for (const auto &target : cases)
|
||||
visit_branch(target.block);
|
||||
if (block.default_block)
|
||||
visit_branch(block.default_block);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CFG::post_order_visit_resolve(uint32_t block_id)
|
||||
{
|
||||
auto &block = compiler.get<SPIRBlock>(block_id);
|
||||
|
||||
auto &visit_block = visit_order[block_id];
|
||||
assert(visit_block.visited_branches);
|
||||
auto &visited = visit_order[block_id].visited_resolve;
|
||||
if (visited)
|
||||
return;
|
||||
|
||||
// If this is a loop header, add an implied branch to the merge target.
|
||||
// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
|
||||
// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
|
||||
@@ -116,21 +190,21 @@ bool CFG::post_order_visit(uint32_t block_id)
|
||||
// is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
|
||||
// For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
|
||||
// but for loops, only the header might end up actually branching to merge block.
|
||||
if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
|
||||
if (block.merge == SPIRBlock::MergeLoop && !is_back_edge(block.merge_block))
|
||||
add_branch(block_id, block.merge_block);
|
||||
|
||||
// First visit our branch targets.
|
||||
switch (block.terminator)
|
||||
{
|
||||
case SPIRBlock::Direct:
|
||||
if (post_order_visit(block.next_block))
|
||||
if (!is_back_edge(block.next_block))
|
||||
add_branch(block_id, block.next_block);
|
||||
break;
|
||||
|
||||
case SPIRBlock::Select:
|
||||
if (post_order_visit(block.true_block))
|
||||
if (!is_back_edge(block.true_block))
|
||||
add_branch(block_id, block.true_block);
|
||||
if (post_order_visit(block.false_block))
|
||||
if (!is_back_edge(block.false_block))
|
||||
add_branch(block_id, block.false_block);
|
||||
break;
|
||||
|
||||
@@ -139,10 +213,10 @@ bool CFG::post_order_visit(uint32_t block_id)
|
||||
const auto &cases = compiler.get_case_list(block);
|
||||
for (const auto &target : cases)
|
||||
{
|
||||
if (post_order_visit(target.block))
|
||||
if (!is_back_edge(target.block))
|
||||
add_branch(block_id, target.block);
|
||||
}
|
||||
if (block.default_block && post_order_visit(block.default_block))
|
||||
if (block.default_block && !is_back_edge(block.default_block))
|
||||
add_branch(block_id, block.default_block);
|
||||
break;
|
||||
}
|
||||
@@ -157,7 +231,7 @@ bool CFG::post_order_visit(uint32_t block_id)
|
||||
// We can use the variable without a Phi since there is only one possible parent here.
|
||||
// However, in this case, we need to hoist out the inner variable to outside the branch.
|
||||
// Use same strategy as loops.
|
||||
if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
|
||||
if (block.merge == SPIRBlock::MergeSelection && !is_back_edge(block.next_block))
|
||||
{
|
||||
// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
|
||||
// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
|
||||
@@ -201,10 +275,9 @@ bool CFG::post_order_visit(uint32_t block_id)
|
||||
}
|
||||
}
|
||||
|
||||
// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
|
||||
visit_order[block_id].get() = ++visit_count;
|
||||
visited = true;
|
||||
visit_block.order = ++visit_count;
|
||||
post_order.push_back(block_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CFG::build_post_order_visit_order()
|
||||
@@ -213,11 +286,12 @@ void CFG::build_post_order_visit_order()
|
||||
visit_count = 0;
|
||||
visit_order.clear();
|
||||
post_order.clear();
|
||||
post_order_visit(block);
|
||||
post_order_visit_entry(block);
|
||||
}
|
||||
|
||||
void CFG::add_branch(uint32_t from, uint32_t to)
|
||||
{
|
||||
assert(from && to);
|
||||
const auto add_unique = [](SmallVector<uint32_t> &l, uint32_t value) {
|
||||
auto itr = find(begin(l), end(l), value);
|
||||
if (itr == end(l))
|
||||
|
||||
26
3rdparty/spirv-cross/spirv_cfg.hpp
vendored
26
3rdparty/spirv-cross/spirv_cfg.hpp
vendored
@@ -68,7 +68,7 @@ public:
|
||||
{
|
||||
auto itr = visit_order.find(block);
|
||||
assert(itr != std::end(visit_order));
|
||||
int v = itr->second.get();
|
||||
int v = itr->second.order;
|
||||
assert(v > 0);
|
||||
return uint32_t(v);
|
||||
}
|
||||
@@ -114,17 +114,9 @@ public:
|
||||
private:
|
||||
struct VisitOrder
|
||||
{
|
||||
int &get()
|
||||
{
|
||||
return v;
|
||||
}
|
||||
|
||||
const int &get() const
|
||||
{
|
||||
return v;
|
||||
}
|
||||
|
||||
int v = -1;
|
||||
int order = -1;
|
||||
bool visited_resolve = false;
|
||||
bool visited_branches = false;
|
||||
};
|
||||
|
||||
Compiler &compiler;
|
||||
@@ -139,11 +131,17 @@ private:
|
||||
void add_branch(uint32_t from, uint32_t to);
|
||||
void build_post_order_visit_order();
|
||||
void build_immediate_dominators();
|
||||
bool post_order_visit(uint32_t block);
|
||||
void post_order_visit_branches(uint32_t block);
|
||||
void post_order_visit_resolve(uint32_t block);
|
||||
void post_order_visit_entry(uint32_t block);
|
||||
uint32_t visit_count = 0;
|
||||
|
||||
bool is_back_edge(uint32_t to) const;
|
||||
bool has_visited_forward_edge(uint32_t to) const;
|
||||
bool has_visited_branch(uint32_t to) const;
|
||||
void visit_branch(uint32_t block_id);
|
||||
|
||||
SmallVector<uint32_t> visit_stack;
|
||||
size_t last_visited_size = 0;
|
||||
};
|
||||
|
||||
class DominatorBuilder
|
||||
|
||||
83
3rdparty/spirv-cross/spirv_common.hpp
vendored
83
3rdparty/spirv-cross/spirv_common.hpp
vendored
@@ -574,6 +574,7 @@ struct SPIRType : IVariant
|
||||
Sampler,
|
||||
AccelerationStructure,
|
||||
RayQuery,
|
||||
CoopVecNV,
|
||||
|
||||
// Keep internal types at the end.
|
||||
ControlPointArray,
|
||||
@@ -581,7 +582,11 @@ struct SPIRType : IVariant
|
||||
Char,
|
||||
// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
|
||||
MeshGridProperties,
|
||||
BFloat16
|
||||
BFloat16,
|
||||
FloatE4M3,
|
||||
FloatE5M2,
|
||||
|
||||
Tensor
|
||||
};
|
||||
|
||||
// Scalar/vector/matrix support.
|
||||
@@ -606,14 +611,30 @@ struct SPIRType : IVariant
|
||||
bool pointer = false;
|
||||
bool forward_pointer = false;
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t use_id = 0;
|
||||
uint32_t rows_id = 0;
|
||||
uint32_t columns_id = 0;
|
||||
uint32_t scope_id = 0;
|
||||
uint32_t use_id;
|
||||
uint32_t rows_id;
|
||||
uint32_t columns_id;
|
||||
uint32_t scope_id;
|
||||
} cooperative;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t component_type_id;
|
||||
uint32_t component_count_id;
|
||||
} coopVecNV;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t type;
|
||||
uint32_t rank;
|
||||
uint32_t shape;
|
||||
} tensor;
|
||||
} ext;
|
||||
|
||||
spv::StorageClass storage = spv::StorageClassGeneric;
|
||||
|
||||
SmallVector<TypeID> member_types;
|
||||
@@ -670,6 +691,12 @@ struct SPIRExtension : IVariant
|
||||
NonSemanticGeneric
|
||||
};
|
||||
|
||||
enum ShaderDebugInfoOps
|
||||
{
|
||||
DebugLine = 103,
|
||||
DebugSource = 35
|
||||
};
|
||||
|
||||
explicit SPIRExtension(Extension ext_)
|
||||
: ext(ext_)
|
||||
{
|
||||
@@ -695,6 +722,11 @@ struct SPIREntryPoint
|
||||
FunctionID self = 0;
|
||||
std::string name;
|
||||
std::string orig_name;
|
||||
std::unordered_map<uint32_t, uint32_t> fp_fast_math_defaults;
|
||||
bool signed_zero_inf_nan_preserve_8 = false;
|
||||
bool signed_zero_inf_nan_preserve_16 = false;
|
||||
bool signed_zero_inf_nan_preserve_32 = false;
|
||||
bool signed_zero_inf_nan_preserve_64 = false;
|
||||
SmallVector<VariableID> interface_variables;
|
||||
|
||||
Bitset flags;
|
||||
@@ -927,6 +959,7 @@ struct SPIRBlock : IVariant
|
||||
// All access to these variables are dominated by this block,
|
||||
// so before branching anywhere we need to make sure that we declare these variables.
|
||||
SmallVector<VariableID> dominated_variables;
|
||||
SmallVector<bool> rearm_dominated_variables;
|
||||
|
||||
// These are variables which should be declared in a for loop header, if we
|
||||
// fail to use a classic for-loop,
|
||||
@@ -1238,6 +1271,26 @@ struct SPIRConstant : IVariant
|
||||
return u.f32;
|
||||
}
|
||||
|
||||
static inline float fe4m3_to_f32(uint8_t v)
|
||||
{
|
||||
if ((v & 0x7f) == 0x7f)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
} u;
|
||||
|
||||
u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
|
||||
return u.f32;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reuse the FP16 to FP32 code. Cute bit-hackery.
|
||||
return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
|
||||
{
|
||||
return m.c[col].id[row];
|
||||
@@ -1286,6 +1339,16 @@ struct SPIRConstant : IVariant
|
||||
return fp32;
|
||||
}
|
||||
|
||||
inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
return fe4m3_to_f32(scalar_u8(col, row));
|
||||
}
|
||||
|
||||
inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
return f16_to_f32(scalar_u8(col, row) << 8);
|
||||
}
|
||||
|
||||
inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
return m.c[col].r[row].f32;
|
||||
@@ -1356,9 +1419,10 @@ struct SPIRConstant : IVariant
|
||||
|
||||
SPIRConstant() = default;
|
||||
|
||||
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
|
||||
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
|
||||
: constant_type(constant_type_)
|
||||
, specialization(specialized)
|
||||
, replicated(replicated_)
|
||||
{
|
||||
subconstants.reserve(num_elements);
|
||||
for (uint32_t i = 0; i < num_elements; i++)
|
||||
@@ -1437,6 +1501,9 @@ struct SPIRConstant : IVariant
|
||||
// For composites which are constant arrays, etc.
|
||||
SmallVector<ConstantID> subconstants;
|
||||
|
||||
// Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
|
||||
bool replicated = false;
|
||||
|
||||
// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
|
||||
// and uses them to initialize the constant. This allows the user
|
||||
// to still be able to specialize the value by supplying corresponding
|
||||
@@ -1732,6 +1799,7 @@ struct Meta
|
||||
uint32_t spec_id = 0;
|
||||
uint32_t index = 0;
|
||||
spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
|
||||
spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
|
||||
bool builtin = false;
|
||||
bool qualified_alias_explicit_override = false;
|
||||
|
||||
@@ -1787,7 +1855,8 @@ private:
|
||||
|
||||
static inline bool type_is_floating_point(const SPIRType &type)
|
||||
{
|
||||
return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
|
||||
return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double ||
|
||||
type.basetype == SPIRType::BFloat16 || type.basetype == SPIRType::FloatE5M2 || type.basetype == SPIRType::FloatE4M3;
|
||||
}
|
||||
|
||||
static inline bool type_is_integral(const SPIRType &type)
|
||||
|
||||
31
3rdparty/spirv-cross/spirv_cross.cpp
vendored
31
3rdparty/spirv-cross/spirv_cross.cpp
vendored
@@ -280,6 +280,9 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
|
||||
// This is a global side effect of the function.
|
||||
return false;
|
||||
|
||||
case OpTensorReadARM:
|
||||
return false;
|
||||
|
||||
case OpExtInst:
|
||||
{
|
||||
uint32_t extension_set = ops[2];
|
||||
@@ -373,6 +376,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
|
||||
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
case OpCooperativeVectorLoadNV:
|
||||
case OpImageRead:
|
||||
{
|
||||
// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
|
||||
@@ -1152,6 +1156,11 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *
|
||||
{
|
||||
res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
|
||||
}
|
||||
// Tensors
|
||||
else if (type.basetype == SPIRType::Tensor)
|
||||
{
|
||||
res.tensors.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
|
||||
}
|
||||
else
|
||||
{
|
||||
res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
|
||||
@@ -1169,11 +1178,8 @@ bool Compiler::type_is_top_level_block(const SPIRType &type) const
|
||||
return has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
|
||||
}
|
||||
|
||||
bool Compiler::type_is_block_like(const SPIRType &type) const
|
||||
bool Compiler::type_is_explicit_layout(const SPIRType &type) const
|
||||
{
|
||||
if (type_is_top_level_block(type))
|
||||
return true;
|
||||
|
||||
if (type.basetype == SPIRType::Struct)
|
||||
{
|
||||
// Block-like types may have Offset decorations.
|
||||
@@ -1185,6 +1191,14 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Compiler::type_is_block_like(const SPIRType &type) const
|
||||
{
|
||||
if (type_is_top_level_block(type))
|
||||
return true;
|
||||
else
|
||||
return type_is_explicit_layout(type);
|
||||
}
|
||||
|
||||
void Compiler::parse_fixup()
|
||||
{
|
||||
// Figure out specialization constants for work group sizes.
|
||||
@@ -2370,6 +2384,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
|
||||
execution.output_primitives = arg0;
|
||||
break;
|
||||
|
||||
case ExecutionModeFPFastMathDefault:
|
||||
execution.fp_fast_math_defaults[arg0] = arg1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -4334,6 +4352,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
|
||||
|
||||
case OpCopyObject:
|
||||
case OpLoad:
|
||||
case OpCooperativeVectorLoadNV:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
if (ops[2] == var)
|
||||
return true;
|
||||
@@ -5151,7 +5170,7 @@ bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
|
||||
bool Compiler::type_is_opaque_value(const SPIRType &type) const
|
||||
{
|
||||
return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
|
||||
type.basetype == SPIRType::Sampler);
|
||||
type.basetype == SPIRType::Sampler || type.basetype == SPIRType::Tensor);
|
||||
}
|
||||
|
||||
// Make these member functions so we can easily break on any force_recompile events.
|
||||
@@ -5469,6 +5488,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
|
||||
{
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
case OpCooperativeVectorLoadNV:
|
||||
{
|
||||
if (length < 3)
|
||||
return false;
|
||||
@@ -5547,6 +5567,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
|
||||
case OpImageWrite:
|
||||
case OpAtomicStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
case OpCooperativeVectorStoreNV:
|
||||
{
|
||||
if (length < 1)
|
||||
return false;
|
||||
|
||||
2
3rdparty/spirv-cross/spirv_cross.hpp
vendored
2
3rdparty/spirv-cross/spirv_cross.hpp
vendored
@@ -95,6 +95,7 @@ struct ShaderResources
|
||||
SmallVector<Resource> atomic_counters;
|
||||
SmallVector<Resource> acceleration_structures;
|
||||
SmallVector<Resource> gl_plain_uniforms;
|
||||
SmallVector<Resource> tensors;
|
||||
|
||||
// There can only be one push constant block,
|
||||
// but keep the vector in case this restriction is lifted in the future.
|
||||
@@ -1171,6 +1172,7 @@ protected:
|
||||
bool type_contains_recursion(const SPIRType &type);
|
||||
bool type_is_array_of_pointers(const SPIRType &type) const;
|
||||
bool type_is_block_like(const SPIRType &type) const;
|
||||
bool type_is_explicit_layout(const SPIRType &type) const;
|
||||
bool type_is_top_level_block(const SPIRType &type) const;
|
||||
bool type_is_opaque_value(const SPIRType &type) const;
|
||||
|
||||
|
||||
8
3rdparty/spirv-cross/spirv_cross_c.cpp
vendored
8
3rdparty/spirv-cross/spirv_cross_c.cpp
vendored
@@ -200,6 +200,7 @@ struct spvc_resources_s : ScratchMemoryAllocation
|
||||
SmallVector<spvc_reflected_resource> separate_samplers;
|
||||
SmallVector<spvc_reflected_resource> acceleration_structures;
|
||||
SmallVector<spvc_reflected_resource> gl_plain_uniforms;
|
||||
SmallVector<spvc_reflected_resource> tensors;
|
||||
|
||||
SmallVector<spvc_reflected_builtin_resource> builtin_inputs;
|
||||
SmallVector<spvc_reflected_builtin_resource> builtin_outputs;
|
||||
@@ -1872,6 +1873,8 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources)
|
||||
return false;
|
||||
if (!copy_resources(gl_plain_uniforms, resources.gl_plain_uniforms))
|
||||
return false;
|
||||
if (!copy_resources(tensors, resources.tensors))
|
||||
return false;
|
||||
if (!copy_resources(builtin_inputs, resources.builtin_inputs))
|
||||
return false;
|
||||
if (!copy_resources(builtin_outputs, resources.builtin_outputs))
|
||||
@@ -2025,6 +2028,11 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources,
|
||||
|
||||
case SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM:
|
||||
list = &resources->gl_plain_uniforms;
|
||||
break;
|
||||
|
||||
case SPVC_RESOURCE_TYPE_TENSOR:
|
||||
list = &resources->tensors;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
||||
3
3rdparty/spirv-cross/spirv_cross_c.h
vendored
3
3rdparty/spirv-cross/spirv_cross_c.h
vendored
@@ -40,7 +40,7 @@ extern "C" {
|
||||
/* Bumped if ABI or API breaks backwards compatibility. */
|
||||
#define SPVC_C_API_VERSION_MAJOR 0
|
||||
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
|
||||
#define SPVC_C_API_VERSION_MINOR 66
|
||||
#define SPVC_C_API_VERSION_MINOR 67
|
||||
/* Bumped if internal implementation details change. */
|
||||
#define SPVC_C_API_VERSION_PATCH 0
|
||||
|
||||
@@ -227,6 +227,7 @@ typedef enum spvc_resource_type
|
||||
SPVC_RESOURCE_TYPE_RAY_QUERY = 13,
|
||||
SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14,
|
||||
SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15,
|
||||
SPVC_RESOURCE_TYPE_TENSOR = 16,
|
||||
SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff
|
||||
} spvc_resource_type;
|
||||
|
||||
|
||||
31
3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp
vendored
31
3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp
vendored
@@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
|
||||
dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument);
|
||||
break;
|
||||
|
||||
case DecorationFPFastMathMode:
|
||||
dec.fp_fast_math_mode = static_cast<FPFastMathModeMask>(argument);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -523,8 +527,27 @@ void ParsedIR::mark_used_as_array_length(ID id)
|
||||
switch (ids[id].get_type())
|
||||
{
|
||||
case TypeConstant:
|
||||
get<SPIRConstant>(id).is_used_as_array_length = true;
|
||||
{
|
||||
auto &c = get<SPIRConstant>(id);
|
||||
c.is_used_as_array_length = true;
|
||||
|
||||
// Mark composite dependencies as well.
|
||||
for (auto &sub_id: c.m.id)
|
||||
if (sub_id)
|
||||
mark_used_as_array_length(sub_id);
|
||||
|
||||
for (uint32_t col = 0; col < c.m.columns; col++)
|
||||
{
|
||||
for (auto &sub_id : c.m.c[col].id)
|
||||
if (sub_id)
|
||||
mark_used_as_array_length(sub_id);
|
||||
}
|
||||
|
||||
for (auto &sub_id : c.subconstants)
|
||||
if (sub_id)
|
||||
mark_used_as_array_length(sub_id);
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeConstantOp:
|
||||
{
|
||||
@@ -643,6 +666,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
|
||||
return dec.index;
|
||||
case DecorationFPRoundingMode:
|
||||
return dec.fp_rounding_mode;
|
||||
case DecorationFPFastMathMode:
|
||||
return dec.fp_fast_math_mode;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
@@ -730,6 +755,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
|
||||
dec.fp_rounding_mode = FPRoundingModeMax;
|
||||
break;
|
||||
|
||||
case DecorationFPFastMathMode:
|
||||
dec.fp_fast_math_mode = FPFastMathModeMaskNone;
|
||||
break;
|
||||
|
||||
case DecorationHlslCounterBufferGOOGLE:
|
||||
{
|
||||
auto &counter = meta[id].hlsl_magic_counter_buffer;
|
||||
|
||||
@@ -111,6 +111,7 @@ public:
|
||||
|
||||
struct Source
|
||||
{
|
||||
spv::SourceLanguage lang = spv::SourceLanguageUnknown;
|
||||
uint32_t version = 0;
|
||||
bool es = false;
|
||||
bool known = false;
|
||||
|
||||
758
3rdparty/spirv-cross/spirv_glsl.cpp
vendored
758
3rdparty/spirv-cross/spirv_glsl.cpp
vendored
File diff suppressed because it is too large
Load Diff
18
3rdparty/spirv-cross/spirv_glsl.hpp
vendored
18
3rdparty/spirv-cross/spirv_glsl.hpp
vendored
@@ -32,6 +32,8 @@
|
||||
|
||||
namespace SPIRV_CROSS_NAMESPACE
|
||||
{
|
||||
struct GlslConstantNameMapping;
|
||||
|
||||
enum PlsFormat
|
||||
{
|
||||
PlsNone = 0,
|
||||
@@ -426,6 +428,8 @@ protected:
|
||||
const uint32_t *args, uint32_t count);
|
||||
virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
|
||||
uint32_t count);
|
||||
void emit_non_semantic_shader_debug_info(uint32_t result_type, uint32_t result_id, uint32_t op,
|
||||
const uint32_t *args, uint32_t count);
|
||||
virtual void emit_header();
|
||||
void emit_line_directive(uint32_t file_id, uint32_t line_literal);
|
||||
void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
|
||||
@@ -662,6 +666,7 @@ protected:
|
||||
bool workgroup_size_is_hidden = false;
|
||||
bool requires_relaxed_precision_analysis = false;
|
||||
bool implicit_c_integer_promotion_rules = false;
|
||||
bool supports_spec_constant_array_size = true;
|
||||
} backend;
|
||||
|
||||
void emit_struct(SPIRType &type);
|
||||
@@ -685,6 +690,8 @@ protected:
|
||||
void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
|
||||
const SmallVector<uint32_t> &indices);
|
||||
void emit_block_chain(SPIRBlock &block);
|
||||
BlockID emit_block_chain_inner(SPIRBlock &block);
|
||||
void emit_block_chain_cleanup(SPIRBlock &block);
|
||||
void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
|
||||
int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
|
||||
void emit_constant(const SPIRConstant &constant);
|
||||
@@ -802,6 +809,10 @@ protected:
|
||||
virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
|
||||
std::string to_non_uniform_aware_expression(uint32_t id);
|
||||
std::string to_atomic_ptr_expression(uint32_t id);
|
||||
std::string to_pretty_expression_if_int_constant(
|
||||
uint32_t id,
|
||||
const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end,
|
||||
bool register_expression_read = true);
|
||||
std::string to_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
|
||||
std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
|
||||
@@ -830,7 +841,7 @@ protected:
|
||||
void emit_output_variable_initializer(const SPIRVariable &var);
|
||||
std::string to_precision_qualifiers_glsl(uint32_t id);
|
||||
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
|
||||
std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
|
||||
std::string flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags);
|
||||
const char *format_to_glsl(spv::ImageFormat format);
|
||||
virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
|
||||
virtual std::string to_interpolation_qualifiers(const Bitset &flags);
|
||||
@@ -1017,6 +1028,8 @@ protected:
|
||||
const Instruction *get_next_instruction_in_block(const Instruction &instr);
|
||||
static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
|
||||
|
||||
std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
@@ -1068,6 +1081,9 @@ protected:
|
||||
std::string format_float(float value) const;
|
||||
std::string format_double(double value) const;
|
||||
|
||||
uint32_t get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const;
|
||||
bool has_legacy_nocontract(uint32_t result_type, uint32_t id) const;
|
||||
|
||||
private:
|
||||
void init();
|
||||
|
||||
|
||||
4
3rdparty/spirv-cross/spirv_hlsl.cpp
vendored
4
3rdparty/spirv-cross/spirv_hlsl.cpp
vendored
@@ -3029,7 +3029,7 @@ string CompilerHLSL::get_inner_entry_point_name() const
|
||||
SPIRV_CROSS_THROW("Unsupported execution model.");
|
||||
}
|
||||
|
||||
uint32_t CompilerHLSL::input_vertices_from_execution_mode(spirv_cross::SPIREntryPoint &execution) const
|
||||
uint32_t CompilerHLSL::input_vertices_from_execution_mode(SPIREntryPoint &execution) const
|
||||
{
|
||||
uint32_t input_vertices = 1;
|
||||
|
||||
@@ -3061,7 +3061,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
|
||||
auto &type = get<SPIRType>(func.return_type);
|
||||
if (type.array.empty())
|
||||
{
|
||||
decl += flags_to_qualifiers_glsl(type, return_flags);
|
||||
decl += flags_to_qualifiers_glsl(type, 0, return_flags);
|
||||
decl += type_to_glsl(type);
|
||||
decl += " ";
|
||||
}
|
||||
|
||||
334
3rdparty/spirv-cross/spirv_msl.cpp
vendored
334
3rdparty/spirv-cross/spirv_msl.cpp
vendored
@@ -510,7 +510,7 @@ void CompilerMSL::build_implicit_builtins()
|
||||
has_local_invocation_index = true;
|
||||
}
|
||||
|
||||
if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
|
||||
if (need_workgroup_size && builtin == BuiltInWorkgroupSize)
|
||||
{
|
||||
builtin_workgroup_size_id = var.self;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
|
||||
@@ -684,6 +684,18 @@ void CompilerMSL::build_implicit_builtins()
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
|
||||
}
|
||||
|
||||
if (need_multiview && !has_view_idx)
|
||||
{
|
||||
uint32_t var_id = ir.increase_bound_by(1);
|
||||
|
||||
// Create gl_ViewIndex.
|
||||
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
|
||||
set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
|
||||
builtin_view_idx_id = var_id;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (need_multiview)
|
||||
{
|
||||
// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
|
||||
@@ -692,7 +704,7 @@ void CompilerMSL::build_implicit_builtins()
|
||||
// gl_Layer is an output in vertex-pipeline shaders.
|
||||
uint32_t type_ptr_out_id = ir.increase_bound_by(2);
|
||||
SPIRType uint_type_ptr_out = get_uint_type();
|
||||
uint_type_ptr.op = OpTypePointer;
|
||||
uint_type_ptr_out.op = OpTypePointer;
|
||||
uint_type_ptr_out.pointer = true;
|
||||
uint_type_ptr_out.pointer_depth++;
|
||||
uint_type_ptr_out.parent_type = get_uint_type_id();
|
||||
@@ -706,18 +718,6 @@ void CompilerMSL::build_implicit_builtins()
|
||||
mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
|
||||
}
|
||||
|
||||
if (need_multiview && !has_view_idx)
|
||||
{
|
||||
uint32_t var_id = ir.increase_bound_by(1);
|
||||
|
||||
// Create gl_ViewIndex.
|
||||
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
|
||||
set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
|
||||
builtin_view_idx_id = var_id;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
|
||||
}
|
||||
}
|
||||
|
||||
if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
|
||||
(need_tese_params && !has_primitive_id) || need_grid_params)
|
||||
{
|
||||
@@ -932,12 +932,40 @@ void CompilerMSL::build_implicit_builtins()
|
||||
|
||||
if (need_workgroup_size && !has_workgroup_size)
|
||||
{
|
||||
uint32_t offset = ir.increase_bound_by(2);
|
||||
uint32_t type_ptr_id = offset;
|
||||
uint32_t var_id = offset + 1;
|
||||
auto &execution = get_entry_point();
|
||||
// First, check if the workgroup size _constant_ were defined.
|
||||
// If it were, we don't need to do--in fact, shouldn't do--anything.
|
||||
builtin_workgroup_size_id = execution.workgroup_size.constant;
|
||||
if (builtin_workgroup_size_id == 0)
|
||||
{
|
||||
uint32_t var_id = ir.increase_bound_by(1);
|
||||
|
||||
// Create gl_WorkgroupSize.
|
||||
uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
|
||||
// If we have LocalSize or LocalSizeId, use those to define the workgroup size.
|
||||
if (execution.flags.get(ExecutionModeLocalSizeId))
|
||||
{
|
||||
const SPIRConstant *init[] = { &get<SPIRConstant>(execution.workgroup_size.id_x),
|
||||
&get<SPIRConstant>(execution.workgroup_size.id_y),
|
||||
&get<SPIRConstant>(execution.workgroup_size.id_z) };
|
||||
bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization;
|
||||
set<SPIRConstant>(var_id, type_id, init, 3, specialized);
|
||||
execution.workgroup_size.constant = var_id;
|
||||
}
|
||||
else if (execution.flags.get(ExecutionModeLocalSize))
|
||||
{
|
||||
uint32_t offset = ir.increase_bound_by(3);
|
||||
const SPIRConstant *init[] = {
|
||||
&set<SPIRConstant>(offset, get_uint_type_id(), execution.workgroup_size.x, false),
|
||||
&set<SPIRConstant>(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false),
|
||||
&set<SPIRConstant>(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false)
|
||||
};
|
||||
set<SPIRConstant>(var_id, type_id, init, 3, false);
|
||||
execution.workgroup_size.constant = var_id;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t type_ptr_id = ir.increase_bound_by(1);
|
||||
SPIRType uint_type_ptr = get<SPIRType>(type_id);
|
||||
uint_type_ptr.op = OpTypePointer;
|
||||
uint_type_ptr.pointer = true;
|
||||
@@ -948,9 +976,11 @@ void CompilerMSL::build_implicit_builtins()
|
||||
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
|
||||
ptr_type.self = type_id;
|
||||
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
|
||||
}
|
||||
set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
|
||||
builtin_workgroup_size_id = var_id;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_frag_depth && force_frag_depth_passthrough)
|
||||
@@ -1681,6 +1711,7 @@ string CompilerMSL::compile()
|
||||
backend.array_is_value_type_in_buffer_blocks = false;
|
||||
backend.support_pointer_to_pointer = true;
|
||||
backend.implicit_c_integer_promotion_rules = true;
|
||||
backend.supports_spec_constant_array_size = false;
|
||||
|
||||
capture_output_to_buffer = msl_options.capture_output_to_buffer;
|
||||
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
|
||||
@@ -1841,7 +1872,7 @@ void CompilerMSL::preprocess_op_codes()
|
||||
if (preproc.uses_atomics)
|
||||
{
|
||||
add_header_line("#include <metal_atomic>");
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false);
|
||||
}
|
||||
|
||||
// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
|
||||
@@ -1850,6 +1881,10 @@ void CompilerMSL::preprocess_op_codes()
|
||||
(preproc.uses_image_write && !msl_options.supports_msl_version(2, 2)))
|
||||
is_rasterization_disabled = true;
|
||||
|
||||
// FIXME: This currently does not consider BDA side effects, so we cannot deduce const device for BDA.
|
||||
if (preproc.uses_buffer_write || preproc.uses_image_write)
|
||||
has_descriptor_side_effects = true;
|
||||
|
||||
// Tessellation control shaders are run as compute functions in Metal, and so
|
||||
// must capture their output to a buffer.
|
||||
if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
|
||||
@@ -5720,22 +5755,44 @@ void CompilerMSL::emit_header()
|
||||
{
|
||||
// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
|
||||
if (suppress_missing_prototypes)
|
||||
statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false);
|
||||
if (suppress_incompatible_pointer_types_discard_qualifiers)
|
||||
statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false);
|
||||
|
||||
// Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables
|
||||
if (suppress_sometimes_unitialized)
|
||||
statement("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"");
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false);
|
||||
|
||||
// Disable warning about missing braces for array<T> template to make arrays a value type
|
||||
if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
|
||||
statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false);
|
||||
|
||||
// Floating point fast math compile declarations
|
||||
if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2))
|
||||
{
|
||||
uint32_t contract_mask = FPFastMathModeAllowContractMask;
|
||||
uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask);
|
||||
uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask);
|
||||
|
||||
// FP math mode
|
||||
uint32_t fp_flags = get_fp_fast_math_flags(true);
|
||||
const char *math_mode = "safe";
|
||||
if ((fp_flags & fast_mask) == fast_mask) // Must have all flags
|
||||
math_mode = "fast";
|
||||
else if ((fp_flags & relax_mask) == relax_mask) // Must have all flags
|
||||
math_mode = "relaxed";
|
||||
|
||||
add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false);
|
||||
|
||||
// FP contraction
|
||||
const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ? "fast" : "off";
|
||||
add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false);
|
||||
}
|
||||
|
||||
for (auto &pragma : pragma_lines)
|
||||
statement(pragma);
|
||||
|
||||
if (!pragma_lines.empty() || suppress_missing_prototypes)
|
||||
if (!pragma_lines.empty())
|
||||
statement("");
|
||||
|
||||
statement("#include <metal_stdlib>");
|
||||
@@ -5755,18 +5812,23 @@ void CompilerMSL::emit_header()
|
||||
statement("");
|
||||
}
|
||||
|
||||
void CompilerMSL::add_pragma_line(const string &line)
|
||||
void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique)
|
||||
{
|
||||
auto rslt = pragma_lines.insert(line);
|
||||
if (rslt.second)
|
||||
if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end())
|
||||
{
|
||||
pragma_lines.push_back(line);
|
||||
if (recompile_on_unique)
|
||||
force_recompile();
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerMSL::add_typedef_line(const string &line)
|
||||
{
|
||||
auto rslt = typedef_lines.insert(line);
|
||||
if (rslt.second)
|
||||
if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end())
|
||||
{
|
||||
typedef_lines.push_back(line);
|
||||
force_recompile();
|
||||
}
|
||||
}
|
||||
|
||||
// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
|
||||
@@ -8353,9 +8415,22 @@ void CompilerMSL::emit_specialization_constants_and_structs()
|
||||
if (unique_func_constants[constant_id] == c.self)
|
||||
statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id,
|
||||
")]];");
|
||||
// RenderDoc and other instrumentation may reuse the same SpecId with different base types.
|
||||
// We deduplicate to one [[function_constant(id)]] temp and then initialize all variants from it.
|
||||
// Metal forbids as_type to/from 'bool', so if either side is Boolean, avoid bitcasting here and
|
||||
// prefer a value cast via a constructor instead (e.g. uint(tmp) / float(tmp) / bool(tmp)).
|
||||
// This preserves expected toggle semantics and prevents illegal MSL like as_type<uint>(bool_tmp).
|
||||
{
|
||||
string sc_true_expr;
|
||||
if (sc_tmp_type == type.basetype)
|
||||
sc_true_expr = sc_tmp_name;
|
||||
else if (sc_tmp_type == SPIRType::Boolean || type.basetype == SPIRType::Boolean)
|
||||
sc_true_expr = join(sc_type_name, "(", sc_tmp_name, ")");
|
||||
else
|
||||
sc_true_expr = bitcast_expression(type, sc_tmp_type, sc_tmp_name);
|
||||
statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
|
||||
") ? ", bitcast_expression(type, sc_tmp_type, sc_tmp_name), " : ", constant_expression(c),
|
||||
";");
|
||||
") ? ", sc_true_expr, " : ", constant_expression(c), ";");
|
||||
}
|
||||
}
|
||||
else if (has_decoration(c.self, DecorationSpecId))
|
||||
{
|
||||
@@ -9161,7 +9236,12 @@ bool CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, cons
|
||||
// and there is a risk of concurrent write access to other components,
|
||||
// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
|
||||
// The MSL compiler refuses to allow component-level access for any non-packed vector types.
|
||||
if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
|
||||
// MSL refuses to take address or reference to vector component, even for packed types, so just force
|
||||
// through the pointer cast. No much we can do sadly.
|
||||
// For packed types, we could technically omit this if we know the reference does not have to turn into a pointer
|
||||
// of some kind, but that requires external analysis passes to figure out, and
|
||||
// this case is likely rare enough that we don't need to bother.
|
||||
if (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)
|
||||
{
|
||||
const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
|
||||
expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
|
||||
@@ -9485,21 +9565,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
break;
|
||||
|
||||
case OpFMul:
|
||||
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
|
||||
if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
|
||||
MSL_BFOP(spvFMul);
|
||||
else
|
||||
MSL_BOP(*);
|
||||
break;
|
||||
|
||||
case OpFAdd:
|
||||
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
|
||||
if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
|
||||
MSL_BFOP(spvFAdd);
|
||||
else
|
||||
MSL_BOP(+);
|
||||
break;
|
||||
|
||||
case OpFSub:
|
||||
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
|
||||
if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
|
||||
MSL_BFOP(spvFSub);
|
||||
else
|
||||
MSL_BOP(-);
|
||||
@@ -9997,7 +10077,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
case OpVectorTimesMatrix:
|
||||
case OpMatrixTimesVector:
|
||||
{
|
||||
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
|
||||
if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
|
||||
{
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
@@ -10039,7 +10119,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
|
||||
case OpMatrixTimesMatrix:
|
||||
{
|
||||
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
|
||||
if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
|
||||
{
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
@@ -10725,13 +10805,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
|
||||
auto *lhs_var = maybe_get_backing_variable(lhs_id);
|
||||
if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
|
||||
lhs_is_array_template = true;
|
||||
else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
|
||||
else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(lhs_var->basetype)))
|
||||
lhs_is_array_template = false;
|
||||
|
||||
auto *rhs_var = maybe_get_backing_variable(rhs_id);
|
||||
if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
|
||||
rhs_is_array_template = true;
|
||||
else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
|
||||
else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(rhs_var->basetype)))
|
||||
rhs_is_array_template = false;
|
||||
|
||||
// If threadgroup storage qualifiers are *not* used:
|
||||
@@ -11182,35 +11262,76 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
|
||||
auto &restype = get<SPIRType>(result_type);
|
||||
|
||||
// Only precise:: preserves NaN in trancendentals (supposedly, cannot find documentation for this).
|
||||
const auto drop_nan_inf = FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask;
|
||||
bool preserve_nan = (get_fp_fast_math_flags_for_op(result_type, id) & drop_nan_inf) != drop_nan_inf;
|
||||
const char *preserve_str = preserve_nan ? "precise" : "fast";
|
||||
|
||||
// TODO: Emit the default behavior to match existing code. Might need to be revisited.
|
||||
// Only fp32 has the precise:: override.
|
||||
#define EMIT_PRECISE_OVERRIDE(glsl_op, op) \
|
||||
case GLSLstd450##glsl_op: \
|
||||
if (restype.basetype == SPIRType::Float && preserve_nan) \
|
||||
emit_unary_func_op(result_type, id, args[0], "precise::" op); \
|
||||
else \
|
||||
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); \
|
||||
break
|
||||
|
||||
switch (op)
|
||||
{
|
||||
EMIT_PRECISE_OVERRIDE(Cos, "cos");
|
||||
EMIT_PRECISE_OVERRIDE(Sin, "sin");
|
||||
EMIT_PRECISE_OVERRIDE(Tan, "tan");
|
||||
EMIT_PRECISE_OVERRIDE(Acos, "acos");
|
||||
EMIT_PRECISE_OVERRIDE(Asin, "asin");
|
||||
EMIT_PRECISE_OVERRIDE(Atan, "atan");
|
||||
EMIT_PRECISE_OVERRIDE(Exp, "exp");
|
||||
EMIT_PRECISE_OVERRIDE(Exp2, "exp2");
|
||||
EMIT_PRECISE_OVERRIDE(Log, "log");
|
||||
EMIT_PRECISE_OVERRIDE(Log2, "log2");
|
||||
EMIT_PRECISE_OVERRIDE(Sqrt, "sqrt");
|
||||
#undef EMIT_PRECISE_OVERRIDE
|
||||
|
||||
case GLSLstd450Sinh:
|
||||
if (restype.basetype == SPIRType::Half)
|
||||
{
|
||||
auto ftype = restype;
|
||||
ftype.basetype = SPIRType::Float;
|
||||
|
||||
// MSL does not have overload for half. Force-cast back to half.
|
||||
auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))");
|
||||
auto expr = join(type_to_glsl(restype), "(", preserve_str, "::sinh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
|
||||
emit_op(result_type, id, expr, should_forward(args[0]));
|
||||
inherit_expression_dependencies(id, args[0]);
|
||||
}
|
||||
else if (preserve_nan)
|
||||
emit_unary_func_op(result_type, id, args[0], "precise::sinh");
|
||||
else
|
||||
emit_unary_func_op(result_type, id, args[0], "fast::sinh");
|
||||
break;
|
||||
case GLSLstd450Cosh:
|
||||
if (restype.basetype == SPIRType::Half)
|
||||
{
|
||||
auto ftype = restype;
|
||||
ftype.basetype = SPIRType::Float;
|
||||
|
||||
// MSL does not have overload for half. Force-cast back to half.
|
||||
auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))");
|
||||
auto expr = join(type_to_glsl(restype), "(", preserve_str, "::cosh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
|
||||
emit_op(result_type, id, expr, should_forward(args[0]));
|
||||
inherit_expression_dependencies(id, args[0]);
|
||||
}
|
||||
else if (preserve_nan)
|
||||
emit_unary_func_op(result_type, id, args[0], "precise::cosh");
|
||||
else
|
||||
emit_unary_func_op(result_type, id, args[0], "fast::cosh");
|
||||
break;
|
||||
case GLSLstd450Tanh:
|
||||
if (restype.basetype == SPIRType::Half)
|
||||
{
|
||||
auto ftype = restype;
|
||||
ftype.basetype = SPIRType::Float;
|
||||
|
||||
// MSL does not have overload for half. Force-cast back to half.
|
||||
auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))");
|
||||
auto expr = join(type_to_glsl(restype), "(", preserve_str, "::tanh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
|
||||
emit_op(result_type, id, expr, should_forward(args[0]));
|
||||
inherit_expression_dependencies(id, args[0]);
|
||||
}
|
||||
@@ -11221,7 +11342,13 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
if (restype.basetype == SPIRType::Half)
|
||||
{
|
||||
// MSL does not have overload for half. Force-cast back to half.
|
||||
auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))");
|
||||
auto ftype = restype;
|
||||
ftype.basetype = SPIRType::Float;
|
||||
|
||||
auto expr = join(type_to_glsl(restype),
|
||||
"(", preserve_str, "::atan2(",
|
||||
type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), "), ",
|
||||
type_to_glsl(ftype), "(", to_unpacked_expression(args[1]), ")))");
|
||||
emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]));
|
||||
inherit_expression_dependencies(id, args[0]);
|
||||
inherit_expression_dependencies(id, args[1]);
|
||||
@@ -11230,6 +11357,9 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
|
||||
break;
|
||||
case GLSLstd450InverseSqrt:
|
||||
if (restype.basetype == SPIRType::Float && preserve_nan)
|
||||
emit_unary_func_op(result_type, id, args[0], "precise::rsqrt");
|
||||
else
|
||||
emit_unary_func_op(result_type, id, args[0], "rsqrt");
|
||||
break;
|
||||
case GLSLstd450RoundEven:
|
||||
@@ -11462,11 +11592,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
{
|
||||
auto &exp_type = expression_type(args[0]);
|
||||
// MSL does not support scalar versions here.
|
||||
// MSL has no implementation for normalize in the fast:: namespace for half2 and half3
|
||||
// MSL has no implementation for normalize in the fast:: namespace for half
|
||||
// Returns -1 or 1 for valid input, sign() does the job.
|
||||
|
||||
// precise::normalize asm looks ridiculous.
|
||||
// Don't think this actually matters unless proven otherwise.
|
||||
if (exp_type.vecsize == 1)
|
||||
emit_unary_func_op(result_type, id, args[0], "sign");
|
||||
else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half)
|
||||
else if (exp_type.basetype == SPIRType::Half)
|
||||
emit_unary_func_op(result_type, id, args[0], "normalize");
|
||||
else
|
||||
emit_unary_func_op(result_type, id, args[0], "fast::normalize");
|
||||
@@ -11529,6 +11662,9 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
|
||||
case GLSLstd450Pow:
|
||||
// powr makes x < 0.0 undefined, just like SPIR-V.
|
||||
if (restype.basetype == SPIRType::Float && preserve_nan)
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "precise::powr");
|
||||
else
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "powr");
|
||||
break;
|
||||
|
||||
@@ -13753,6 +13889,24 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI
|
||||
return loc;
|
||||
}
|
||||
|
||||
bool CompilerMSL::entry_point_returns_stage_output() const
|
||||
{
|
||||
if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
|
||||
return false;
|
||||
bool ep_should_return_output = !get_is_rasterization_disabled();
|
||||
return stage_out_var_id && ep_should_return_output;
|
||||
}
|
||||
|
||||
bool CompilerMSL::entry_point_requires_const_device_buffers() const
|
||||
{
|
||||
// For fragment, we don't really need it, but it might help avoid pessimization
|
||||
// if the compiler deduces it needs to use late-Z for whatever reason.
|
||||
return (get_execution_model() == ExecutionModelFragment && !has_descriptor_side_effects) ||
|
||||
(entry_point_returns_stage_output() &&
|
||||
(get_execution_model() == ExecutionModelVertex ||
|
||||
get_execution_model() == ExecutionModelTessellationEvaluation));
|
||||
}
|
||||
|
||||
// Returns the type declaration for a function, including the
|
||||
// entry type if the current function is the entry point function
|
||||
string CompilerMSL::func_type_decl(SPIRType &type)
|
||||
@@ -13763,8 +13917,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
|
||||
return return_type;
|
||||
|
||||
// If an outgoing interface block has been defined, and it should be returned, override the entry point return type
|
||||
bool ep_should_return_output = !get_is_rasterization_disabled();
|
||||
if (stage_out_var_id && ep_should_return_output)
|
||||
if (entry_point_returns_stage_output())
|
||||
return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0);
|
||||
|
||||
// Prepend a entry type, based on the execution model
|
||||
@@ -13888,16 +14041,14 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
|
||||
|
||||
case StorageClassStorageBuffer:
|
||||
{
|
||||
// For arguments from variable pointers, we use the write count deduction, so
|
||||
// we should not assume any constness here. Only for global SSBOs.
|
||||
bool readonly = false;
|
||||
if (!var || has_decoration(type.self, DecorationBlock))
|
||||
readonly = flags.get(DecorationNonWritable);
|
||||
|
||||
if (decoration_flags_signal_coherent(flags))
|
||||
readonly = false;
|
||||
|
||||
addr_space = readonly ? "const device" : "device";
|
||||
// When dealing with descriptor aliasing, it becomes very problematic to make use of
|
||||
// readonly qualifiers.
|
||||
// If rasterization is not disabled in vertex/tese, Metal does not allow side effects and refuses to compile "device",
|
||||
// even if there are no writes. Just force const device.
|
||||
if (entry_point_requires_const_device_buffers())
|
||||
addr_space = "const device";
|
||||
else
|
||||
addr_space = "device";
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -13914,7 +14065,12 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
|
||||
{
|
||||
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
|
||||
if (ssbo)
|
||||
addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
|
||||
{
|
||||
if (entry_point_requires_const_device_buffers())
|
||||
addr_space = "const device";
|
||||
else
|
||||
addr_space = "device";
|
||||
}
|
||||
else
|
||||
addr_space = "constant";
|
||||
}
|
||||
@@ -16553,7 +16709,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_i
|
||||
default:
|
||||
if (type_is_array_of_pointers(type) || using_builtin_array())
|
||||
{
|
||||
const SPIRVariable *var = variable_id ? &get<SPIRVariable>(variable_id) : nullptr;
|
||||
const SPIRVariable *var = variable_id ? maybe_get<SPIRVariable>(variable_id) : nullptr;
|
||||
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) &&
|
||||
is_array(get_variable_data_type(*var)))
|
||||
{
|
||||
@@ -16854,6 +17010,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
|
||||
if (p_var && p_var->basevariable)
|
||||
p_var = maybe_get<SPIRVariable>(p_var->basevariable);
|
||||
|
||||
bool has_access_qualifier = true;
|
||||
|
||||
switch (img_type.access)
|
||||
{
|
||||
case AccessQualifierReadOnly:
|
||||
@@ -16879,13 +17037,22 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
|
||||
|
||||
img_type_name += "write";
|
||||
}
|
||||
else
|
||||
{
|
||||
has_access_qualifier = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (p_var && has_decoration(p_var->self, DecorationCoherent) && msl_options.supports_msl_version(3, 2))
|
||||
{
|
||||
// Cannot declare memory_coherence_device without access qualifier.
|
||||
if (!has_access_qualifier)
|
||||
img_type_name += ", access::read";
|
||||
img_type_name += ", memory_coherence_device";
|
||||
}
|
||||
}
|
||||
|
||||
img_type_name += ">";
|
||||
|
||||
@@ -17317,13 +17484,18 @@ void CompilerMSL::emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t r
|
||||
inherit_expression_dependencies(result_id, op0);
|
||||
}
|
||||
|
||||
// Note: Metal forbids bitcasting to/from 'bool' using as_type. This function is used widely
|
||||
// for generating casts in the backend. To avoid generating illegal MSL when the canonical
|
||||
// function constant type (from deduplicated SpecId) is Boolean, fall back to value-cast in
|
||||
// that case by returning type_to_glsl(out_type) instead of as_type<...>.
|
||||
string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
|
||||
{
|
||||
if (out_type.basetype == in_type.basetype)
|
||||
return "";
|
||||
|
||||
assert(out_type.basetype != SPIRType::Boolean);
|
||||
assert(in_type.basetype != SPIRType::Boolean);
|
||||
// Avoid bitcasting to/from booleans in MSL; use value cast instead.
|
||||
if (out_type.basetype == SPIRType::Boolean || in_type.basetype == SPIRType::Boolean)
|
||||
return type_to_glsl(out_type);
|
||||
|
||||
bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
|
||||
bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
|
||||
@@ -17679,6 +17851,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
|
||||
case BuiltInGlobalInvocationId:
|
||||
return "thread_position_in_grid";
|
||||
|
||||
case BuiltInWorkgroupSize:
|
||||
return "threads_per_threadgroup";
|
||||
|
||||
case BuiltInWorkgroupId:
|
||||
return "threadgroup_position_in_grid";
|
||||
|
||||
@@ -17864,6 +18039,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
|
||||
case BuiltInLocalInvocationId:
|
||||
case BuiltInNumWorkgroups:
|
||||
case BuiltInWorkgroupId:
|
||||
case BuiltInWorkgroupSize:
|
||||
return "uint3";
|
||||
case BuiltInLocalInvocationIndex:
|
||||
case BuiltInNumSubgroups:
|
||||
@@ -18531,11 +18707,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
||||
|
||||
case OpFAdd:
|
||||
case OpFSub:
|
||||
if (compiler.msl_options.invariant_float_math ||
|
||||
compiler.has_decoration(args[1], DecorationNoContraction))
|
||||
{
|
||||
if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
|
||||
return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
|
||||
}
|
||||
break;
|
||||
|
||||
case OpFMul:
|
||||
@@ -18543,11 +18716,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
||||
case OpMatrixTimesVector:
|
||||
case OpVectorTimesMatrix:
|
||||
case OpMatrixTimesMatrix:
|
||||
if (compiler.msl_options.invariant_float_math ||
|
||||
compiler.has_decoration(args[1], DecorationNoContraction))
|
||||
{
|
||||
if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
|
||||
return SPVFuncImplFMul;
|
||||
}
|
||||
break;
|
||||
|
||||
case OpQuantizeToF16:
|
||||
@@ -19572,7 +19742,7 @@ void CompilerMSL::analyze_argument_buffers()
|
||||
SetBindingPair pair = { desc_set, binding };
|
||||
|
||||
if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
|
||||
resource.basetype == SPIRType::SampledImage)
|
||||
resource.basetype == SPIRType::SampledImage || resource.basetype == SPIRType::AccelerationStructure)
|
||||
{
|
||||
// Drop pointer information when we emit the resources into a struct.
|
||||
buffer_type.member_types.push_back(get_variable_data_type_id(var));
|
||||
@@ -19827,6 +19997,30 @@ bool CompilerMSL::specialization_constant_is_macro(uint32_t const_id) const
|
||||
return constant_macro_ids.find(const_id) != constant_macro_ids.end();
|
||||
}
|
||||
|
||||
// Start with all fast math flags enabled, and selectively disable based execution modes and float controls
|
||||
uint32_t CompilerMSL::get_fp_fast_math_flags(bool incl_ops) const
|
||||
{
|
||||
uint32_t fp_flags = ~0;
|
||||
auto &ep = get_entry_point();
|
||||
|
||||
if (ep.flags.get(ExecutionModeSignedZeroInfNanPreserve))
|
||||
fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask);
|
||||
|
||||
if (ep.flags.get(ExecutionModeContractionOff))
|
||||
fp_flags &= ~(FPFastMathModeAllowContractMask);
|
||||
|
||||
for (auto &fp_pair : ep.fp_fast_math_defaults)
|
||||
if (fp_pair.second)
|
||||
fp_flags &= get<SPIRConstant>(fp_pair.second).scalar();
|
||||
|
||||
if (incl_ops)
|
||||
for (auto &p_m : ir.meta)
|
||||
if (p_m.second.decoration.decoration_flags.get(DecorationFPFastMathMode))
|
||||
fp_flags &= p_m.second.decoration.fp_fast_math_mode;
|
||||
|
||||
return fp_flags;
|
||||
}
|
||||
|
||||
void CompilerMSL::emit_block_hints(const SPIRBlock &)
|
||||
{
|
||||
}
|
||||
|
||||
21
3rdparty/spirv-cross/spirv_msl.hpp
vendored
21
3rdparty/spirv-cross/spirv_msl.hpp
vendored
@@ -542,6 +542,10 @@ public:
|
||||
// The result can be queried with get_is_rasterization_disabled.
|
||||
bool auto_disable_rasterization = false;
|
||||
|
||||
// Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes.
|
||||
// Requires MSL 3.2 or above, and has no effect with earlier MSL versions.
|
||||
bool use_fast_math_pragmas = false;
|
||||
|
||||
bool is_ios() const
|
||||
{
|
||||
return platform == iOS;
|
||||
@@ -767,6 +771,14 @@ public:
|
||||
// These must only be called after a successful call to CompilerMSL::compile().
|
||||
bool specialization_constant_is_macro(uint32_t constant_id) const;
|
||||
|
||||
// Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied
|
||||
// across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags
|
||||
// for one floating-point type, and the value returned here is a bitwise-AND combination across all types.
|
||||
// If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND
|
||||
// to determine the minimal fast-math that applies to all default execution modes and all operations.
|
||||
// The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff.
|
||||
uint32_t get_fp_fast_math_flags(bool incl_ops) const;
|
||||
|
||||
protected:
|
||||
// An enum of SPIR-V functions that are implemented in additional
|
||||
// source code that is added to the shader if necessary.
|
||||
@@ -1047,6 +1059,8 @@ protected:
|
||||
|
||||
void fix_up_shader_inputs_outputs();
|
||||
|
||||
bool entry_point_returns_stage_output() const;
|
||||
bool entry_point_requires_const_device_buffers() const;
|
||||
std::string func_type_decl(SPIRType &type);
|
||||
std::string entry_point_args_classic(bool append_comma);
|
||||
std::string entry_point_args_argument_buffer(bool append_comma);
|
||||
@@ -1126,7 +1140,7 @@ protected:
|
||||
uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
|
||||
bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
|
||||
const char *get_memory_order(uint32_t spv_mem_sem);
|
||||
void add_pragma_line(const std::string &line);
|
||||
void add_pragma_line(const std::string &line, bool recompile_on_unique);
|
||||
void add_typedef_line(const std::string &line);
|
||||
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
|
||||
bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
|
||||
@@ -1209,8 +1223,8 @@ protected:
|
||||
std::unordered_map<uint32_t, uint32_t> fragment_output_components;
|
||||
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
|
||||
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
|
||||
std::set<std::string> pragma_lines;
|
||||
std::set<std::string> typedef_lines;
|
||||
std::vector<std::string> pragma_lines;
|
||||
std::vector<std::string> typedef_lines;
|
||||
SmallVector<uint32_t> vars_needing_early_declaration;
|
||||
std::unordered_set<uint32_t> constant_macro_ids;
|
||||
|
||||
@@ -1252,6 +1266,7 @@ protected:
|
||||
bool using_builtin_array() const;
|
||||
|
||||
bool is_rasterization_disabled = false;
|
||||
bool has_descriptor_side_effects = false;
|
||||
bool capture_output_to_buffer = false;
|
||||
bool needs_swizzle_buffer_def = false;
|
||||
bool used_swizzle_buffer = false;
|
||||
|
||||
116
3rdparty/spirv-cross/spirv_parser.cpp
vendored
116
3rdparty/spirv-cross/spirv_parser.cpp
vendored
@@ -213,8 +213,8 @@ void Parser::parse(const Instruction &instruction)
|
||||
|
||||
case OpSource:
|
||||
{
|
||||
auto lang = static_cast<SourceLanguage>(ops[0]);
|
||||
switch (lang)
|
||||
ir.source.lang = static_cast<SourceLanguage>(ops[0]);
|
||||
switch (ir.source.lang)
|
||||
{
|
||||
case SourceLanguageESSL:
|
||||
ir.source.es = true;
|
||||
@@ -318,6 +318,19 @@ void Parser::parse(const Instruction &instruction)
|
||||
ir.load_type_width.insert({ ops[1], type->width });
|
||||
}
|
||||
}
|
||||
else if (op == OpExtInst)
|
||||
{
|
||||
// Don't want to deal with ForwardRefs here.
|
||||
|
||||
auto &ext = get<SPIRExtension>(ops[2]);
|
||||
if (ext.ext == SPIRExtension::NonSemanticShaderDebugInfo)
|
||||
{
|
||||
// Parse global ShaderDebugInfo we care about.
|
||||
// Just forward the string information.
|
||||
if (ops[3] == SPIRExtension::DebugSource)
|
||||
set<SPIRString>(ops[1], get<SPIRString>(ops[4]).str);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -369,6 +382,30 @@ void Parser::parse(const Instruction &instruction)
|
||||
execution.output_primitives = ops[2];
|
||||
break;
|
||||
|
||||
case ExecutionModeSignedZeroInfNanPreserve:
|
||||
switch (ops[2])
|
||||
{
|
||||
case 8:
|
||||
execution.signed_zero_inf_nan_preserve_8 = true;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
execution.signed_zero_inf_nan_preserve_16 = true;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
execution.signed_zero_inf_nan_preserve_32 = true;
|
||||
break;
|
||||
|
||||
case 64:
|
||||
execution.signed_zero_inf_nan_preserve_64 = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
SPIRV_CROSS_THROW("Invalid bit-width for SignedZeroInfNanPreserve.");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -381,13 +418,21 @@ void Parser::parse(const Instruction &instruction)
|
||||
auto mode = static_cast<ExecutionMode>(ops[1]);
|
||||
execution.flags.set(mode);
|
||||
|
||||
if (mode == ExecutionModeLocalSizeId)
|
||||
switch (mode)
|
||||
{
|
||||
case ExecutionModeLocalSizeId:
|
||||
execution.workgroup_size.id_x = ops[2];
|
||||
execution.workgroup_size.id_y = ops[3];
|
||||
execution.workgroup_size.id_z = ops[4];
|
||||
}
|
||||
break;
|
||||
|
||||
case ExecutionModeFPFastMathDefault:
|
||||
execution.fp_fast_math_defaults[ops[2]] = ops[3];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -538,7 +583,7 @@ void Parser::parse(const Instruction &instruction)
|
||||
uint32_t width = ops[1];
|
||||
auto &type = set<SPIRType>(id, op);
|
||||
|
||||
if (width != 16 && length > 2)
|
||||
if (width != 16 && width != 8 && length > 2)
|
||||
SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat.");
|
||||
|
||||
if (width == 64)
|
||||
@@ -557,6 +602,17 @@ void Parser::parse(const Instruction &instruction)
|
||||
else
|
||||
type.basetype = SPIRType::Half;
|
||||
}
|
||||
else if (width == 8)
|
||||
{
|
||||
if (length < 2)
|
||||
SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8.");
|
||||
else if (ops[2] == spv::FPEncodingFloat8E4M3EXT)
|
||||
type.basetype = SPIRType::FloatE4M3;
|
||||
else if (ops[2] == spv::FPEncodingFloat8E5M2EXT)
|
||||
type.basetype = SPIRType::FloatE5M2;
|
||||
else
|
||||
SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8.");
|
||||
}
|
||||
else
|
||||
SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
|
||||
type.width = width;
|
||||
@@ -614,15 +670,33 @@ void Parser::parse(const Instruction &instruction)
|
||||
auto &matrixbase = set<SPIRType>(id, base);
|
||||
|
||||
matrixbase.op = op;
|
||||
matrixbase.cooperative.scope_id = ops[2];
|
||||
matrixbase.cooperative.rows_id = ops[3];
|
||||
matrixbase.cooperative.columns_id = ops[4];
|
||||
matrixbase.cooperative.use_id = ops[5];
|
||||
matrixbase.ext.cooperative.scope_id = ops[2];
|
||||
matrixbase.ext.cooperative.rows_id = ops[3];
|
||||
matrixbase.ext.cooperative.columns_id = ops[4];
|
||||
matrixbase.ext.cooperative.use_id = ops[5];
|
||||
matrixbase.self = id;
|
||||
matrixbase.parent_type = ops[1];
|
||||
break;
|
||||
}
|
||||
|
||||
case OpTypeCooperativeVectorNV:
|
||||
{
|
||||
uint32_t id = ops[0];
|
||||
auto &type = set<SPIRType>(id, op);
|
||||
|
||||
type.basetype = SPIRType::CoopVecNV;
|
||||
type.op = op;
|
||||
type.ext.coopVecNV.component_type_id = ops[1];
|
||||
type.ext.coopVecNV.component_count_id = ops[2];
|
||||
type.parent_type = ops[1];
|
||||
|
||||
// CoopVec-Nv can be used with integer operations like SMax where
|
||||
// where spirv-opt does explicit checks on integer bitwidth
|
||||
auto component_type = get<SPIRType>(type.ext.coopVecNV.component_type_id);
|
||||
type.width = component_type.width;
|
||||
break;
|
||||
}
|
||||
|
||||
case OpTypeArray:
|
||||
{
|
||||
uint32_t id = ops[0];
|
||||
@@ -820,6 +894,20 @@ void Parser::parse(const Instruction &instruction)
|
||||
break;
|
||||
}
|
||||
|
||||
case OpTypeTensorARM:
|
||||
{
|
||||
uint32_t id = ops[0];
|
||||
auto &type = set<SPIRType>(id, op);
|
||||
type.basetype = SPIRType::Tensor;
|
||||
type.ext.tensor = {};
|
||||
type.ext.tensor.type = ops[1];
|
||||
if (length >= 3)
|
||||
type.ext.tensor.rank = ops[2];
|
||||
if (length >= 4)
|
||||
type.ext.tensor.shape = ops[3];
|
||||
break;
|
||||
}
|
||||
|
||||
// Variable declaration
|
||||
// All variables are essentially pointers with a storage qualifier.
|
||||
case OpVariable:
|
||||
@@ -869,14 +957,24 @@ void Parser::parse(const Instruction &instruction)
|
||||
// Constants
|
||||
case OpSpecConstant:
|
||||
case OpConstant:
|
||||
case OpConstantCompositeReplicateEXT:
|
||||
case OpSpecConstantCompositeReplicateEXT:
|
||||
{
|
||||
uint32_t id = ops[1];
|
||||
auto &type = get<SPIRType>(ops[0]);
|
||||
if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT)
|
||||
{
|
||||
auto subconstant = uint32_t(ops[2]);
|
||||
set<SPIRConstant>(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if (type.width > 32)
|
||||
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
|
||||
else
|
||||
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
11
3rdparty/spirv-cross/spirv_reflect.cpp
vendored
11
3rdparty/spirv-cross/spirv_reflect.cpp
vendored
@@ -477,6 +477,12 @@ string CompilerReflection::execution_model_to_str(spv::ExecutionModel model)
|
||||
return "rmiss";
|
||||
case ExecutionModelCallableNV:
|
||||
return "rcall";
|
||||
case ExecutionModelMeshNV:
|
||||
case ExecutionModelMeshEXT:
|
||||
return "mesh";
|
||||
case ExecutionModelTaskNV:
|
||||
case ExecutionModelTaskEXT:
|
||||
return "task";
|
||||
default:
|
||||
return "???";
|
||||
}
|
||||
@@ -504,7 +510,9 @@ void CompilerReflection::emit_entry_points()
|
||||
json_stream->begin_json_object();
|
||||
json_stream->emit_json_key_value("name", e.name);
|
||||
json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
|
||||
if (e.execution_model == ExecutionModelGLCompute)
|
||||
if (e.execution_model == ExecutionModelGLCompute || e.execution_model == spv::ExecutionModelMeshEXT ||
|
||||
e.execution_model == spv::ExecutionModelMeshNV || e.execution_model == spv::ExecutionModelTaskEXT ||
|
||||
e.execution_model == spv::ExecutionModelTaskNV)
|
||||
{
|
||||
const auto &spv_entry = get_entry_point(e.name, e.execution_model);
|
||||
|
||||
@@ -547,6 +555,7 @@ void CompilerReflection::emit_resources()
|
||||
emit_resources("push_constants", res.push_constant_buffers);
|
||||
emit_resources("counters", res.atomic_counters);
|
||||
emit_resources("acceleration_structures", res.acceleration_structures);
|
||||
emit_resources("tensors", res.tensors);
|
||||
}
|
||||
|
||||
void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resource> &resources)
|
||||
|
||||
Reference in New Issue
Block a user