Updated spirv-cross.

This commit is contained in:
Бранимир Караџић
2025-09-14 09:07:27 -07:00
parent 6f3fb79c0b
commit 686190d7ff
19 changed files with 1606 additions and 294 deletions

View File

@@ -536,6 +536,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
print_resources(compiler, "push", res.push_constant_buffers);
print_resources(compiler, "counters", res.atomic_counters);
print_resources(compiler, "acceleration structures", res.acceleration_structures);
print_resources(compiler, "tensors", res.tensors);
print_resources(compiler, "record buffers", res.shader_record_buffers);
print_resources(compiler, spv::StorageClassInput, res.builtin_inputs);
print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs);

View File

@@ -1,27 +1,11 @@
/*
** Copyright (c) 2014-2024 The Khronos Group Inc.
** Copyright: 2014-2024 The Khronos Group Inc.
** License: MIT
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and/or associated documentation files (the "Materials"),
** to deal in the Materials without restriction, including without limitation
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
** and/or sell copies of the Materials, and to permit persons to whom the
** Materials are furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Materials.
**
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
** IN THE MATERIALS.
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
** KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
** SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
** https://www.khronos.org/registry/
*/
/*
@@ -176,6 +160,8 @@ typedef enum SpvExecutionMode_ {
SpvExecutionModeSignedZeroInfNanPreserve = 4461,
SpvExecutionModeRoundingModeRTE = 4462,
SpvExecutionModeRoundingModeRTZ = 4463,
SpvExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
SpvExecutionModeTileShadingRateQCOM = 4490,
SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
SpvExecutionModeStencilRefReplacingEXT = 5027,
SpvExecutionModeCoalescingAMDX = 5069,
@@ -245,6 +231,7 @@ typedef enum SpvStorageClass_ {
SpvStorageClassImage = 11,
SpvStorageClassStorageBuffer = 12,
SpvStorageClassTileImageEXT = 4172,
SpvStorageClassTileAttachmentQCOM = 4491,
SpvStorageClassNodePayloadAMDX = 5068,
SpvStorageClassCallableDataKHR = 5328,
SpvStorageClassCallableDataNV = 5328,
@@ -554,6 +541,7 @@ typedef enum SpvDecoration_ {
SpvDecorationMaxByteOffset = 45,
SpvDecorationAlignmentId = 46,
SpvDecorationMaxByteOffsetId = 47,
SpvDecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
SpvDecorationNoSignedWrap = 4469,
SpvDecorationNoUnsignedWrap = 4470,
SpvDecorationWeightTextureQCOM = 4487,
@@ -723,6 +711,9 @@ typedef enum SpvBuiltIn_ {
SpvBuiltInDeviceIndex = 4438,
SpvBuiltInViewIndex = 4440,
SpvBuiltInShadingRateKHR = 4444,
SpvBuiltInTileOffsetQCOM = 4492,
SpvBuiltInTileDimensionQCOM = 4493,
SpvBuiltInTileApronSizeQCOM = 4494,
SpvBuiltInBaryCoordNoPerspAMD = 4992,
SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993,
SpvBuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -1073,7 +1064,13 @@ typedef enum SpvCapability_ {
SpvCapabilityTileImageColorReadAccessEXT = 4166,
SpvCapabilityTileImageDepthReadAccessEXT = 4167,
SpvCapabilityTileImageStencilReadAccessEXT = 4168,
SpvCapabilityTensorsARM = 4174,
SpvCapabilityStorageTensorArrayDynamicIndexingARM = 4175,
SpvCapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
SpvCapabilityGraphARM = 4191,
SpvCapabilityCooperativeMatrixLayoutsARM = 4201,
SpvCapabilityFloat8EXT = 4212,
SpvCapabilityFloat8CooperativeMatrixEXT = 4213,
SpvCapabilityFragmentShadingRateKHR = 4422,
SpvCapabilitySubgroupBallotKHR = 4423,
SpvCapabilityDrawParameters = 4427,
@@ -1109,6 +1106,7 @@ typedef enum SpvCapability_ {
SpvCapabilityTextureSampleWeightedQCOM = 4484,
SpvCapabilityTextureBoxFilterQCOM = 4485,
SpvCapabilityTextureBlockMatchQCOM = 4486,
SpvCapabilityTileShadingQCOM = 4495,
SpvCapabilityTextureBlockMatch2QCOM = 4498,
SpvCapabilityFloat16ImageAMD = 5008,
SpvCapabilityImageGatherBiasLodAMD = 5009,
@@ -1119,6 +1117,8 @@ typedef enum SpvCapability_ {
SpvCapabilityShaderClockKHR = 5055,
SpvCapabilityShaderEnqueueAMDX = 5067,
SpvCapabilityQuadControlKHR = 5087,
SpvCapabilityInt4TypeINTEL = 5112,
SpvCapabilityInt4CooperativeMatrixINTEL = 5114,
SpvCapabilityBFloat16TypeKHR = 5116,
SpvCapabilityBFloat16DotProductKHR = 5117,
SpvCapabilityBFloat16CooperativeMatrixKHR = 5118,
@@ -1287,6 +1287,7 @@ typedef enum SpvCapability_ {
SpvCapabilityMaskedGatherScatterINTEL = 6427,
SpvCapabilityCacheControlsINTEL = 6441,
SpvCapabilityRegisterLimitsINTEL = 6460,
SpvCapabilityBindlessImagesINTEL = 6528,
SpvCapabilityMax = 0x7fffffff,
} SpvCapability;
@@ -1463,6 +1464,24 @@ typedef enum SpvTensorAddressingOperandsMask_ {
SpvTensorAddressingOperandsDecodeFuncMask = 0x00000002,
} SpvTensorAddressingOperandsMask;
typedef enum SpvTensorOperandsShift_ {
SpvTensorOperandsNontemporalARMShift = 0,
SpvTensorOperandsOutOfBoundsValueARMShift = 1,
SpvTensorOperandsMakeElementAvailableARMShift = 2,
SpvTensorOperandsMakeElementVisibleARMShift = 3,
SpvTensorOperandsNonPrivateElementARMShift = 4,
SpvTensorOperandsMax = 0x7fffffff,
} SpvTensorOperandsShift;
typedef enum SpvTensorOperandsMask_ {
SpvTensorOperandsMaskNone = 0,
SpvTensorOperandsNontemporalARMMask = 0x00000001,
SpvTensorOperandsOutOfBoundsValueARMMask = 0x00000002,
SpvTensorOperandsMakeElementAvailableARMMask = 0x00000004,
SpvTensorOperandsMakeElementVisibleARMMask = 0x00000008,
SpvTensorOperandsNonPrivateElementARMMask = 0x00000010,
} SpvTensorOperandsMask;
typedef enum SpvInitializationModeQualifier_ {
SpvInitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
SpvInitializationModeQualifierInitOnDeviceResetINTEL = 1,
@@ -1549,6 +1568,8 @@ typedef enum SpvRawAccessChainOperandsMask_ {
typedef enum SpvFPEncoding_ {
SpvFPEncodingBFloat16KHR = 0,
SpvFPEncodingFloat8E4M3EXT = 4214,
SpvFPEncodingFloat8E5M2EXT = 4215,
SpvFPEncodingMax = 0x7fffffff,
} SpvFPEncoding;
@@ -1927,6 +1948,17 @@ typedef enum SpvOp_ {
SpvOpColorAttachmentReadEXT = 4160,
SpvOpDepthAttachmentReadEXT = 4161,
SpvOpStencilAttachmentReadEXT = 4162,
SpvOpTypeTensorARM = 4163,
SpvOpTensorReadARM = 4164,
SpvOpTensorWriteARM = 4165,
SpvOpTensorQuerySizeARM = 4166,
SpvOpGraphConstantARM = 4181,
SpvOpGraphEntryPointARM = 4182,
SpvOpGraphARM = 4183,
SpvOpGraphInputARM = 4184,
SpvOpGraphSetOutputARM = 4185,
SpvOpGraphEndARM = 4186,
SpvOpTypeGraphARM = 4190,
SpvOpTerminateInvocation = 4416,
SpvOpTypeUntypedPointerKHR = 4417,
SpvOpUntypedVariableKHR = 4418,
@@ -2385,6 +2417,9 @@ typedef enum SpvOp_ {
SpvOpRoundFToTF32INTEL = 6426,
SpvOpMaskedGatherINTEL = 6428,
SpvOpMaskedScatterINTEL = 6429,
SpvOpConvertHandleToImageINTEL = 6529,
SpvOpConvertHandleToSamplerINTEL = 6530,
SpvOpConvertHandleToSampledImageINTEL = 6531,
SpvOpMax = 0x7fffffff,
} SpvOp;
@@ -2743,6 +2778,17 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
case SpvOpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
case SpvOpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
case SpvOpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
case SpvOpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
case SpvOpTensorReadARM: *hasResult = true; *hasResultType = true; break;
case SpvOpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
case SpvOpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
case SpvOpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
case SpvOpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
case SpvOpGraphARM: *hasResult = true; *hasResultType = true; break;
case SpvOpGraphInputARM: *hasResult = true; *hasResultType = true; break;
case SpvOpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
case SpvOpGraphEndARM: *hasResult = false; *hasResultType = false; break;
case SpvOpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
case SpvOpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
case SpvOpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
@@ -3190,6 +3236,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
case SpvOpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
case SpvOpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
case SpvOpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
case SpvOpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
case SpvOpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
case SpvOpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
}
}
inline const char* SpvSourceLanguageToString(SpvSourceLanguage value) {
@@ -3305,6 +3354,8 @@ inline const char* SpvExecutionModeToString(SpvExecutionMode value) {
case SpvExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
case SpvExecutionModeRoundingModeRTE: return "RoundingModeRTE";
case SpvExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
case SpvExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
case SpvExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
case SpvExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
case SpvExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
case SpvExecutionModeCoalescingAMDX: return "CoalescingAMDX";
@@ -3371,6 +3422,7 @@ inline const char* SpvStorageClassToString(SpvStorageClass value) {
case SpvStorageClassImage: return "Image";
case SpvStorageClassStorageBuffer: return "StorageBuffer";
case SpvStorageClassTileImageEXT: return "TileImageEXT";
case SpvStorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
case SpvStorageClassNodePayloadAMDX: return "NodePayloadAMDX";
case SpvStorageClassCallableDataKHR: return "CallableDataKHR";
case SpvStorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
@@ -3619,6 +3671,7 @@ inline const char* SpvDecorationToString(SpvDecoration value) {
case SpvDecorationMaxByteOffset: return "MaxByteOffset";
case SpvDecorationAlignmentId: return "AlignmentId";
case SpvDecorationMaxByteOffsetId: return "MaxByteOffsetId";
case SpvDecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
case SpvDecorationNoSignedWrap: return "NoSignedWrap";
case SpvDecorationNoUnsignedWrap: return "NoUnsignedWrap";
case SpvDecorationWeightTextureQCOM: return "WeightTextureQCOM";
@@ -3778,6 +3831,9 @@ inline const char* SpvBuiltInToString(SpvBuiltIn value) {
case SpvBuiltInDeviceIndex: return "DeviceIndex";
case SpvBuiltInViewIndex: return "ViewIndex";
case SpvBuiltInShadingRateKHR: return "ShadingRateKHR";
case SpvBuiltInTileOffsetQCOM: return "TileOffsetQCOM";
case SpvBuiltInTileDimensionQCOM: return "TileDimensionQCOM";
case SpvBuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
case SpvBuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
case SpvBuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
case SpvBuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
@@ -3958,7 +4014,13 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
case SpvCapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
case SpvCapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
case SpvCapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
case SpvCapabilityTensorsARM: return "TensorsARM";
case SpvCapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
case SpvCapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
case SpvCapabilityGraphARM: return "GraphARM";
case SpvCapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
case SpvCapabilityFloat8EXT: return "Float8EXT";
case SpvCapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
case SpvCapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
case SpvCapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
case SpvCapabilityDrawParameters: return "DrawParameters";
@@ -3992,6 +4054,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
case SpvCapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
case SpvCapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
case SpvCapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
case SpvCapabilityTileShadingQCOM: return "TileShadingQCOM";
case SpvCapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
case SpvCapabilityFloat16ImageAMD: return "Float16ImageAMD";
case SpvCapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
@@ -4002,6 +4065,8 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
case SpvCapabilityShaderClockKHR: return "ShaderClockKHR";
case SpvCapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
case SpvCapabilityQuadControlKHR: return "QuadControlKHR";
case SpvCapabilityInt4TypeINTEL: return "Int4TypeINTEL";
case SpvCapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
case SpvCapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
case SpvCapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
case SpvCapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
@@ -4144,6 +4209,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
case SpvCapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
case SpvCapabilityCacheControlsINTEL: return "CacheControlsINTEL";
case SpvCapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
case SpvCapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
default: return "Unknown";
}
}
@@ -4299,6 +4365,8 @@ inline const char* SpvNamedMaximumNumberOfRegistersToString(SpvNamedMaximumNumbe
inline const char* SpvFPEncodingToString(SpvFPEncoding value) {
switch (value) {
case SpvFPEncodingBFloat16KHR: return "BFloat16KHR";
case SpvFPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
case SpvFPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
default: return "Unknown";
}
}
@@ -4683,6 +4751,17 @@ inline const char* SpvOpToString(SpvOp value) {
case SpvOpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
case SpvOpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
case SpvOpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
case SpvOpTypeTensorARM: return "OpTypeTensorARM";
case SpvOpTensorReadARM: return "OpTensorReadARM";
case SpvOpTensorWriteARM: return "OpTensorWriteARM";
case SpvOpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
case SpvOpGraphConstantARM: return "OpGraphConstantARM";
case SpvOpGraphEntryPointARM: return "OpGraphEntryPointARM";
case SpvOpGraphARM: return "OpGraphARM";
case SpvOpGraphInputARM: return "OpGraphInputARM";
case SpvOpGraphSetOutputARM: return "OpGraphSetOutputARM";
case SpvOpGraphEndARM: return "OpGraphEndARM";
case SpvOpTypeGraphARM: return "OpTypeGraphARM";
case SpvOpTerminateInvocation: return "OpTerminateInvocation";
case SpvOpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
case SpvOpUntypedVariableKHR: return "OpUntypedVariableKHR";
@@ -5130,6 +5209,9 @@ inline const char* SpvOpToString(SpvOp value) {
case SpvOpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
case SpvOpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
case SpvOpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
case SpvOpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
case SpvOpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
case SpvOpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
default: return "Unknown";
}
}

View File

@@ -1,26 +1,10 @@
// Copyright (c) 2014-2024 The Khronos Group Inc.
// Copyright: 2014-2024 The Khronos Group Inc.
// License: MIT
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and/or associated documentation files (the "Materials"),
// to deal in the Materials without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Materials, and to permit persons to whom the
// Materials are furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Materials.
//
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
//
// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
// IN THE MATERIALS.
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
// https://www.khronos.org/registry/
// This header is automatically generated by the same tool that creates
// the Binary Section of the SPIR-V specification.
@@ -172,6 +156,8 @@ enum ExecutionMode {
ExecutionModeSignedZeroInfNanPreserve = 4461,
ExecutionModeRoundingModeRTE = 4462,
ExecutionModeRoundingModeRTZ = 4463,
ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
ExecutionModeTileShadingRateQCOM = 4490,
ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
ExecutionModeStencilRefReplacingEXT = 5027,
ExecutionModeCoalescingAMDX = 5069,
@@ -241,6 +227,7 @@ enum StorageClass {
StorageClassImage = 11,
StorageClassStorageBuffer = 12,
StorageClassTileImageEXT = 4172,
StorageClassTileAttachmentQCOM = 4491,
StorageClassNodePayloadAMDX = 5068,
StorageClassCallableDataKHR = 5328,
StorageClassCallableDataNV = 5328,
@@ -550,6 +537,7 @@ enum Decoration {
DecorationMaxByteOffset = 45,
DecorationAlignmentId = 46,
DecorationMaxByteOffsetId = 47,
DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
DecorationNoSignedWrap = 4469,
DecorationNoUnsignedWrap = 4470,
DecorationWeightTextureQCOM = 4487,
@@ -719,6 +707,9 @@ enum BuiltIn {
BuiltInDeviceIndex = 4438,
BuiltInViewIndex = 4440,
BuiltInShadingRateKHR = 4444,
BuiltInTileOffsetQCOM = 4492,
BuiltInTileDimensionQCOM = 4493,
BuiltInTileApronSizeQCOM = 4494,
BuiltInBaryCoordNoPerspAMD = 4992,
BuiltInBaryCoordNoPerspCentroidAMD = 4993,
BuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -1069,7 +1060,13 @@ enum Capability {
CapabilityTileImageColorReadAccessEXT = 4166,
CapabilityTileImageDepthReadAccessEXT = 4167,
CapabilityTileImageStencilReadAccessEXT = 4168,
CapabilityTensorsARM = 4174,
CapabilityStorageTensorArrayDynamicIndexingARM = 4175,
CapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
CapabilityGraphARM = 4191,
CapabilityCooperativeMatrixLayoutsARM = 4201,
CapabilityFloat8EXT = 4212,
CapabilityFloat8CooperativeMatrixEXT = 4213,
CapabilityFragmentShadingRateKHR = 4422,
CapabilitySubgroupBallotKHR = 4423,
CapabilityDrawParameters = 4427,
@@ -1105,6 +1102,7 @@ enum Capability {
CapabilityTextureSampleWeightedQCOM = 4484,
CapabilityTextureBoxFilterQCOM = 4485,
CapabilityTextureBlockMatchQCOM = 4486,
CapabilityTileShadingQCOM = 4495,
CapabilityTextureBlockMatch2QCOM = 4498,
CapabilityFloat16ImageAMD = 5008,
CapabilityImageGatherBiasLodAMD = 5009,
@@ -1115,6 +1113,8 @@ enum Capability {
CapabilityShaderClockKHR = 5055,
CapabilityShaderEnqueueAMDX = 5067,
CapabilityQuadControlKHR = 5087,
CapabilityInt4TypeINTEL = 5112,
CapabilityInt4CooperativeMatrixINTEL = 5114,
CapabilityBFloat16TypeKHR = 5116,
CapabilityBFloat16DotProductKHR = 5117,
CapabilityBFloat16CooperativeMatrixKHR = 5118,
@@ -1283,6 +1283,7 @@ enum Capability {
CapabilityMaskedGatherScatterINTEL = 6427,
CapabilityCacheControlsINTEL = 6441,
CapabilityRegisterLimitsINTEL = 6460,
CapabilityBindlessImagesINTEL = 6528,
CapabilityMax = 0x7fffffff,
};
@@ -1459,6 +1460,24 @@ enum TensorAddressingOperandsMask {
TensorAddressingOperandsDecodeFuncMask = 0x00000002,
};
enum TensorOperandsShift {
TensorOperandsNontemporalARMShift = 0,
TensorOperandsOutOfBoundsValueARMShift = 1,
TensorOperandsMakeElementAvailableARMShift = 2,
TensorOperandsMakeElementVisibleARMShift = 3,
TensorOperandsNonPrivateElementARMShift = 4,
TensorOperandsMax = 0x7fffffff,
};
enum TensorOperandsMask {
TensorOperandsMaskNone = 0,
TensorOperandsNontemporalARMMask = 0x00000001,
TensorOperandsOutOfBoundsValueARMMask = 0x00000002,
TensorOperandsMakeElementAvailableARMMask = 0x00000004,
TensorOperandsMakeElementVisibleARMMask = 0x00000008,
TensorOperandsNonPrivateElementARMMask = 0x00000010,
};
enum InitializationModeQualifier {
InitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
InitializationModeQualifierInitOnDeviceResetINTEL = 1,
@@ -1545,6 +1564,8 @@ enum RawAccessChainOperandsMask {
enum FPEncoding {
FPEncodingBFloat16KHR = 0,
FPEncodingFloat8E4M3EXT = 4214,
FPEncodingFloat8E5M2EXT = 4215,
FPEncodingMax = 0x7fffffff,
};
@@ -1923,6 +1944,17 @@ enum Op {
OpColorAttachmentReadEXT = 4160,
OpDepthAttachmentReadEXT = 4161,
OpStencilAttachmentReadEXT = 4162,
OpTypeTensorARM = 4163,
OpTensorReadARM = 4164,
OpTensorWriteARM = 4165,
OpTensorQuerySizeARM = 4166,
OpGraphConstantARM = 4181,
OpGraphEntryPointARM = 4182,
OpGraphARM = 4183,
OpGraphInputARM = 4184,
OpGraphSetOutputARM = 4185,
OpGraphEndARM = 4186,
OpTypeGraphARM = 4190,
OpTerminateInvocation = 4416,
OpTypeUntypedPointerKHR = 4417,
OpUntypedVariableKHR = 4418,
@@ -2381,6 +2413,9 @@ enum Op {
OpRoundFToTF32INTEL = 6426,
OpMaskedGatherINTEL = 6428,
OpMaskedScatterINTEL = 6429,
OpConvertHandleToImageINTEL = 6529,
OpConvertHandleToSamplerINTEL = 6530,
OpConvertHandleToSampledImageINTEL = 6531,
OpMax = 0x7fffffff,
};
@@ -2739,6 +2774,17 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
case OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
case OpGraphARM: *hasResult = true; *hasResultType = true; break;
case OpGraphInputARM: *hasResult = true; *hasResultType = true; break;
case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
case OpGraphEndARM: *hasResult = false; *hasResultType = false; break;
case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
@@ -3186,6 +3232,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
}
}
inline const char* SourceLanguageToString(SourceLanguage value) {
@@ -3301,6 +3350,8 @@ inline const char* ExecutionModeToString(ExecutionMode value) {
case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
case ExecutionModeRoundingModeRTE: return "RoundingModeRTE";
case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
case ExecutionModeCoalescingAMDX: return "CoalescingAMDX";
@@ -3367,6 +3418,7 @@ inline const char* StorageClassToString(StorageClass value) {
case StorageClassImage: return "Image";
case StorageClassStorageBuffer: return "StorageBuffer";
case StorageClassTileImageEXT: return "TileImageEXT";
case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
case StorageClassNodePayloadAMDX: return "NodePayloadAMDX";
case StorageClassCallableDataKHR: return "CallableDataKHR";
case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
@@ -3615,6 +3667,7 @@ inline const char* DecorationToString(Decoration value) {
case DecorationMaxByteOffset: return "MaxByteOffset";
case DecorationAlignmentId: return "AlignmentId";
case DecorationMaxByteOffsetId: return "MaxByteOffsetId";
case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
case DecorationNoSignedWrap: return "NoSignedWrap";
case DecorationNoUnsignedWrap: return "NoUnsignedWrap";
case DecorationWeightTextureQCOM: return "WeightTextureQCOM";
@@ -3774,6 +3827,9 @@ inline const char* BuiltInToString(BuiltIn value) {
case BuiltInDeviceIndex: return "DeviceIndex";
case BuiltInViewIndex: return "ViewIndex";
case BuiltInShadingRateKHR: return "ShadingRateKHR";
case BuiltInTileOffsetQCOM: return "TileOffsetQCOM";
case BuiltInTileDimensionQCOM: return "TileDimensionQCOM";
case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
@@ -3954,7 +4010,13 @@ inline const char* CapabilityToString(Capability value) {
case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
case CapabilityTensorsARM: return "TensorsARM";
case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
case CapabilityGraphARM: return "GraphARM";
case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
case CapabilityFloat8EXT: return "Float8EXT";
case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
case CapabilityDrawParameters: return "DrawParameters";
@@ -3988,6 +4050,7 @@ inline const char* CapabilityToString(Capability value) {
case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
case CapabilityTileShadingQCOM: return "TileShadingQCOM";
case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
case CapabilityFloat16ImageAMD: return "Float16ImageAMD";
case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
@@ -3998,6 +4061,8 @@ inline const char* CapabilityToString(Capability value) {
case CapabilityShaderClockKHR: return "ShaderClockKHR";
case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
case CapabilityQuadControlKHR: return "QuadControlKHR";
case CapabilityInt4TypeINTEL: return "Int4TypeINTEL";
case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
@@ -4140,6 +4205,7 @@ inline const char* CapabilityToString(Capability value) {
case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
case CapabilityCacheControlsINTEL: return "CacheControlsINTEL";
case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
default: return "Unknown";
}
}
@@ -4295,6 +4361,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg
inline const char* FPEncodingToString(FPEncoding value) {
switch (value) {
case FPEncodingBFloat16KHR: return "BFloat16KHR";
case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
default: return "Unknown";
}
}
@@ -4679,6 +4747,17 @@ inline const char* OpToString(Op value) {
case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
case OpTypeTensorARM: return "OpTypeTensorARM";
case OpTensorReadARM: return "OpTensorReadARM";
case OpTensorWriteARM: return "OpTensorWriteARM";
case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
case OpGraphConstantARM: return "OpGraphConstantARM";
case OpGraphEntryPointARM: return "OpGraphEntryPointARM";
case OpGraphARM: return "OpGraphARM";
case OpGraphInputARM: return "OpGraphInputARM";
case OpGraphSetOutputARM: return "OpGraphSetOutputARM";
case OpGraphEndARM: return "OpGraphEndARM";
case OpTypeGraphARM: return "OpTypeGraphARM";
case OpTerminateInvocation: return "OpTerminateInvocation";
case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
case OpUntypedVariableKHR: return "OpUntypedVariableKHR";
@@ -5126,6 +5205,9 @@ inline const char* OpToString(Op value) {
case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
default: return "Unknown";
}
}
@@ -5186,6 +5268,10 @@ inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, Te
inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }

View File

@@ -81,31 +81,105 @@ bool CFG::is_back_edge(uint32_t to) const
// We have a back edge if the visit order is set with the temporary magic value 0.
// Crossing edges will have already been recorded with a visit order.
auto itr = visit_order.find(to);
return itr != end(visit_order) && itr->second.get() == 0;
return itr != end(visit_order) && itr->second.visited_branches && !itr->second.visited_resolve;
}
bool CFG::has_visited_forward_edge(uint32_t to) const
bool CFG::has_visited_branch(uint32_t to) const
{
// If > 0, we have visited the edge already, and this is not a back edge branch.
auto itr = visit_order.find(to);
return itr != end(visit_order) && itr->second.get() > 0;
return itr != end(visit_order) && itr->second.visited_branches;
}
bool CFG::post_order_visit(uint32_t block_id)
void CFG::post_order_visit_entry(uint32_t block)
{
// If we have already branched to this block (back edge), stop recursion.
// If our branches are back-edges, we do not record them.
// We have to record crossing edges however.
if (has_visited_forward_edge(block_id))
return true;
else if (is_back_edge(block_id))
return false;
visit_stack.push_back(block);
// Block back-edges from recursively revisiting ourselves.
visit_order[block_id].get() = 0;
while (!visit_stack.empty())
{
bool keep_iterating;
do
{
// Reverse the order to allow for stack-like behavior and preserves the visit order from recursive algorithm.
// Traverse depth first.
uint32_t to_visit = visit_stack.back();
last_visited_size = visit_stack.size();
post_order_visit_branches(to_visit);
keep_iterating = last_visited_size != visit_stack.size();
if (keep_iterating)
std::reverse(visit_stack.begin() + last_visited_size, visit_stack.end());
} while (keep_iterating);
// We've reached the end of some tree leaf. Resolve the stack.
// Any node which has been visited for real can be popped now.
while (!visit_stack.empty() && visit_order[visit_stack.back()].visited_branches)
{
post_order_visit_resolve(visit_stack.back());
visit_stack.pop_back();
}
}
}
void CFG::visit_branch(uint32_t block_id)
{
// Prune obvious duplicates.
if (std::find(visit_stack.begin() + last_visited_size, visit_stack.end(), block_id) == visit_stack.end() &&
!has_visited_branch(block_id))
{
visit_stack.push_back(block_id);
}
}
void CFG::post_order_visit_branches(uint32_t block_id)
{
auto &block = compiler.get<SPIRBlock>(block_id);
auto &visit = visit_order[block_id];
if (visit.visited_branches)
return;
visit.visited_branches = true;
if (block.merge == SPIRBlock::MergeLoop)
visit_branch(block.merge_block);
else if (block.merge == SPIRBlock::MergeSelection)
visit_branch(block.next_block);
// First visit our branch targets.
switch (block.terminator)
{
case SPIRBlock::Direct:
visit_branch(block.next_block);
break;
case SPIRBlock::Select:
visit_branch(block.true_block);
visit_branch(block.false_block);
break;
case SPIRBlock::MultiSelect:
{
const auto &cases = compiler.get_case_list(block);
for (const auto &target : cases)
visit_branch(target.block);
if (block.default_block)
visit_branch(block.default_block);
break;
}
default:
break;
}
}
void CFG::post_order_visit_resolve(uint32_t block_id)
{
auto &block = compiler.get<SPIRBlock>(block_id);
auto &visit_block = visit_order[block_id];
assert(visit_block.visited_branches);
auto &visited = visit_order[block_id].visited_resolve;
if (visited)
return;
// If this is a loop header, add an implied branch to the merge target.
// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
@@ -116,21 +190,21 @@ bool CFG::post_order_visit(uint32_t block_id)
// is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
// For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
// but for loops, only the header might end up actually branching to merge block.
if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
if (block.merge == SPIRBlock::MergeLoop && !is_back_edge(block.merge_block))
add_branch(block_id, block.merge_block);
// First visit our branch targets.
switch (block.terminator)
{
case SPIRBlock::Direct:
if (post_order_visit(block.next_block))
if (!is_back_edge(block.next_block))
add_branch(block_id, block.next_block);
break;
case SPIRBlock::Select:
if (post_order_visit(block.true_block))
if (!is_back_edge(block.true_block))
add_branch(block_id, block.true_block);
if (post_order_visit(block.false_block))
if (!is_back_edge(block.false_block))
add_branch(block_id, block.false_block);
break;
@@ -139,10 +213,10 @@ bool CFG::post_order_visit(uint32_t block_id)
const auto &cases = compiler.get_case_list(block);
for (const auto &target : cases)
{
if (post_order_visit(target.block))
if (!is_back_edge(target.block))
add_branch(block_id, target.block);
}
if (block.default_block && post_order_visit(block.default_block))
if (block.default_block && !is_back_edge(block.default_block))
add_branch(block_id, block.default_block);
break;
}
@@ -157,7 +231,7 @@ bool CFG::post_order_visit(uint32_t block_id)
// We can use the variable without a Phi since there is only one possible parent here.
// However, in this case, we need to hoist out the inner variable to outside the branch.
// Use same strategy as loops.
if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
if (block.merge == SPIRBlock::MergeSelection && !is_back_edge(block.next_block))
{
// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
@@ -201,10 +275,9 @@ bool CFG::post_order_visit(uint32_t block_id)
}
}
// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
visit_order[block_id].get() = ++visit_count;
visited = true;
visit_block.order = ++visit_count;
post_order.push_back(block_id);
return true;
}
void CFG::build_post_order_visit_order()
@@ -213,11 +286,12 @@ void CFG::build_post_order_visit_order()
visit_count = 0;
visit_order.clear();
post_order.clear();
post_order_visit(block);
post_order_visit_entry(block);
}
void CFG::add_branch(uint32_t from, uint32_t to)
{
assert(from && to);
const auto add_unique = [](SmallVector<uint32_t> &l, uint32_t value) {
auto itr = find(begin(l), end(l), value);
if (itr == end(l))

View File

@@ -68,7 +68,7 @@ public:
{
auto itr = visit_order.find(block);
assert(itr != std::end(visit_order));
int v = itr->second.get();
int v = itr->second.order;
assert(v > 0);
return uint32_t(v);
}
@@ -114,17 +114,9 @@ public:
private:
struct VisitOrder
{
int &get()
{
return v;
}
const int &get() const
{
return v;
}
int v = -1;
int order = -1;
bool visited_resolve = false;
bool visited_branches = false;
};
Compiler &compiler;
@@ -139,11 +131,17 @@ private:
void add_branch(uint32_t from, uint32_t to);
void build_post_order_visit_order();
void build_immediate_dominators();
bool post_order_visit(uint32_t block);
void post_order_visit_branches(uint32_t block);
void post_order_visit_resolve(uint32_t block);
void post_order_visit_entry(uint32_t block);
uint32_t visit_count = 0;
bool is_back_edge(uint32_t to) const;
bool has_visited_forward_edge(uint32_t to) const;
bool has_visited_branch(uint32_t to) const;
void visit_branch(uint32_t block_id);
SmallVector<uint32_t> visit_stack;
size_t last_visited_size = 0;
};
class DominatorBuilder

View File

@@ -574,6 +574,7 @@ struct SPIRType : IVariant
Sampler,
AccelerationStructure,
RayQuery,
CoopVecNV,
// Keep internal types at the end.
ControlPointArray,
@@ -581,7 +582,11 @@ struct SPIRType : IVariant
Char,
// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
MeshGridProperties,
BFloat16
BFloat16,
FloatE4M3,
FloatE5M2,
Tensor
};
// Scalar/vector/matrix support.
@@ -606,13 +611,29 @@ struct SPIRType : IVariant
bool pointer = false;
bool forward_pointer = false;
struct
union
{
uint32_t use_id = 0;
uint32_t rows_id = 0;
uint32_t columns_id = 0;
uint32_t scope_id = 0;
} cooperative;
struct
{
uint32_t use_id;
uint32_t rows_id;
uint32_t columns_id;
uint32_t scope_id;
} cooperative;
struct
{
uint32_t component_type_id;
uint32_t component_count_id;
} coopVecNV;
struct
{
uint32_t type;
uint32_t rank;
uint32_t shape;
} tensor;
} ext;
spv::StorageClass storage = spv::StorageClassGeneric;
@@ -670,6 +691,12 @@ struct SPIRExtension : IVariant
NonSemanticGeneric
};
enum ShaderDebugInfoOps
{
DebugLine = 103,
DebugSource = 35
};
explicit SPIRExtension(Extension ext_)
: ext(ext_)
{
@@ -695,6 +722,11 @@ struct SPIREntryPoint
FunctionID self = 0;
std::string name;
std::string orig_name;
std::unordered_map<uint32_t, uint32_t> fp_fast_math_defaults;
bool signed_zero_inf_nan_preserve_8 = false;
bool signed_zero_inf_nan_preserve_16 = false;
bool signed_zero_inf_nan_preserve_32 = false;
bool signed_zero_inf_nan_preserve_64 = false;
SmallVector<VariableID> interface_variables;
Bitset flags;
@@ -927,6 +959,7 @@ struct SPIRBlock : IVariant
// All access to these variables are dominated by this block,
// so before branching anywhere we need to make sure that we declare these variables.
SmallVector<VariableID> dominated_variables;
SmallVector<bool> rearm_dominated_variables;
// These are variables which should be declared in a for loop header, if we
// fail to use a classic for-loop,
@@ -1238,6 +1271,26 @@ struct SPIRConstant : IVariant
return u.f32;
}
static inline float fe4m3_to_f32(uint8_t v)
{
if ((v & 0x7f) == 0x7f)
{
union
{
float f32;
uint32_t u32;
} u;
u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
return u.f32;
}
else
{
// Reuse the FP16 to FP32 code. Cute bit-hackery.
return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
}
}
inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
{
return m.c[col].id[row];
@@ -1286,6 +1339,16 @@ struct SPIRConstant : IVariant
return fp32;
}
inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
{
return fe4m3_to_f32(scalar_u8(col, row));
}
inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
{
return f16_to_f32(scalar_u8(col, row) << 8);
}
inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
{
return m.c[col].r[row].f32;
@@ -1356,9 +1419,10 @@ struct SPIRConstant : IVariant
SPIRConstant() = default;
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
: constant_type(constant_type_)
, specialization(specialized)
, replicated(replicated_)
{
subconstants.reserve(num_elements);
for (uint32_t i = 0; i < num_elements; i++)
@@ -1437,6 +1501,9 @@ struct SPIRConstant : IVariant
// For composites which are constant arrays, etc.
SmallVector<ConstantID> subconstants;
// Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
bool replicated = false;
// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
// and uses them to initialize the constant. This allows the user
// to still be able to specialize the value by supplying corresponding
@@ -1732,6 +1799,7 @@ struct Meta
uint32_t spec_id = 0;
uint32_t index = 0;
spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
bool builtin = false;
bool qualified_alias_explicit_override = false;
@@ -1787,7 +1855,8 @@ private:
static inline bool type_is_floating_point(const SPIRType &type)
{
return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double ||
type.basetype == SPIRType::BFloat16 || type.basetype == SPIRType::FloatE5M2 || type.basetype == SPIRType::FloatE4M3;
}
static inline bool type_is_integral(const SPIRType &type)

View File

@@ -280,6 +280,9 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
// This is a global side effect of the function.
return false;
case OpTensorReadARM:
return false;
case OpExtInst:
{
uint32_t extension_set = ops[2];
@@ -373,6 +376,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
case OpLoad:
case OpCooperativeMatrixLoadKHR:
case OpCooperativeVectorLoadNV:
case OpImageRead:
{
// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
@@ -1152,6 +1156,11 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *
{
res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
}
// Tensors
else if (type.basetype == SPIRType::Tensor)
{
res.tensors.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
}
else
{
res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -1169,11 +1178,8 @@ bool Compiler::type_is_top_level_block(const SPIRType &type) const
return has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
}
bool Compiler::type_is_block_like(const SPIRType &type) const
bool Compiler::type_is_explicit_layout(const SPIRType &type) const
{
if (type_is_top_level_block(type))
return true;
if (type.basetype == SPIRType::Struct)
{
// Block-like types may have Offset decorations.
@@ -1185,6 +1191,14 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
return false;
}
bool Compiler::type_is_block_like(const SPIRType &type) const
{
if (type_is_top_level_block(type))
return true;
else
return type_is_explicit_layout(type);
}
void Compiler::parse_fixup()
{
// Figure out specialization constants for work group sizes.
@@ -2370,6 +2384,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
execution.output_primitives = arg0;
break;
case ExecutionModeFPFastMathDefault:
execution.fp_fast_math_defaults[arg0] = arg1;
break;
default:
break;
}
@@ -4334,6 +4352,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
case OpCopyObject:
case OpLoad:
case OpCooperativeVectorLoadNV:
case OpCooperativeMatrixLoadKHR:
if (ops[2] == var)
return true;
@@ -5151,7 +5170,7 @@ bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
bool Compiler::type_is_opaque_value(const SPIRType &type) const
{
return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
type.basetype == SPIRType::Sampler);
type.basetype == SPIRType::Sampler || type.basetype == SPIRType::Tensor);
}
// Make these member functions so we can easily break on any force_recompile events.
@@ -5469,6 +5488,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
{
case OpLoad:
case OpCooperativeMatrixLoadKHR:
case OpCooperativeVectorLoadNV:
{
if (length < 3)
return false;
@@ -5547,6 +5567,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
case OpImageWrite:
case OpAtomicStore:
case OpCooperativeMatrixStoreKHR:
case OpCooperativeVectorStoreNV:
{
if (length < 1)
return false;

View File

@@ -95,6 +95,7 @@ struct ShaderResources
SmallVector<Resource> atomic_counters;
SmallVector<Resource> acceleration_structures;
SmallVector<Resource> gl_plain_uniforms;
SmallVector<Resource> tensors;
// There can only be one push constant block,
// but keep the vector in case this restriction is lifted in the future.
@@ -1171,6 +1172,7 @@ protected:
bool type_contains_recursion(const SPIRType &type);
bool type_is_array_of_pointers(const SPIRType &type) const;
bool type_is_block_like(const SPIRType &type) const;
bool type_is_explicit_layout(const SPIRType &type) const;
bool type_is_top_level_block(const SPIRType &type) const;
bool type_is_opaque_value(const SPIRType &type) const;

View File

@@ -200,6 +200,7 @@ struct spvc_resources_s : ScratchMemoryAllocation
SmallVector<spvc_reflected_resource> separate_samplers;
SmallVector<spvc_reflected_resource> acceleration_structures;
SmallVector<spvc_reflected_resource> gl_plain_uniforms;
SmallVector<spvc_reflected_resource> tensors;
SmallVector<spvc_reflected_builtin_resource> builtin_inputs;
SmallVector<spvc_reflected_builtin_resource> builtin_outputs;
@@ -1872,6 +1873,8 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources)
return false;
if (!copy_resources(gl_plain_uniforms, resources.gl_plain_uniforms))
return false;
if (!copy_resources(tensors, resources.tensors))
return false;
if (!copy_resources(builtin_inputs, resources.builtin_inputs))
return false;
if (!copy_resources(builtin_outputs, resources.builtin_outputs))
@@ -2025,6 +2028,11 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources,
case SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM:
list = &resources->gl_plain_uniforms;
break;
case SPVC_RESOURCE_TYPE_TENSOR:
list = &resources->tensors;
break;
default:
break;

View File

@@ -40,7 +40,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 66
#define SPVC_C_API_VERSION_MINOR 67
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@@ -227,6 +227,7 @@ typedef enum spvc_resource_type
SPVC_RESOURCE_TYPE_RAY_QUERY = 13,
SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14,
SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15,
SPVC_RESOURCE_TYPE_TENSOR = 16,
SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff
} spvc_resource_type;

View File

@@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument);
break;
case DecorationFPFastMathMode:
dec.fp_fast_math_mode = static_cast<FPFastMathModeMask>(argument);
break;
default:
break;
}
@@ -523,8 +527,27 @@ void ParsedIR::mark_used_as_array_length(ID id)
switch (ids[id].get_type())
{
case TypeConstant:
get<SPIRConstant>(id).is_used_as_array_length = true;
{
auto &c = get<SPIRConstant>(id);
c.is_used_as_array_length = true;
// Mark composite dependencies as well.
for (auto &sub_id: c.m.id)
if (sub_id)
mark_used_as_array_length(sub_id);
for (uint32_t col = 0; col < c.m.columns; col++)
{
for (auto &sub_id : c.m.c[col].id)
if (sub_id)
mark_used_as_array_length(sub_id);
}
for (auto &sub_id : c.subconstants)
if (sub_id)
mark_used_as_array_length(sub_id);
break;
}
case TypeConstantOp:
{
@@ -643,6 +666,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
return dec.index;
case DecorationFPRoundingMode:
return dec.fp_rounding_mode;
case DecorationFPFastMathMode:
return dec.fp_fast_math_mode;
default:
return 1;
}
@@ -730,6 +755,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
dec.fp_rounding_mode = FPRoundingModeMax;
break;
case DecorationFPFastMathMode:
dec.fp_fast_math_mode = FPFastMathModeMaskNone;
break;
case DecorationHlslCounterBufferGOOGLE:
{
auto &counter = meta[id].hlsl_magic_counter_buffer;

View File

@@ -111,6 +111,7 @@ public:
struct Source
{
spv::SourceLanguage lang = spv::SourceLanguageUnknown;
uint32_t version = 0;
bool es = false;
bool known = false;

File diff suppressed because it is too large Load Diff

View File

@@ -32,6 +32,8 @@
namespace SPIRV_CROSS_NAMESPACE
{
struct GlslConstantNameMapping;
enum PlsFormat
{
PlsNone = 0,
@@ -426,6 +428,8 @@ protected:
const uint32_t *args, uint32_t count);
virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count);
void emit_non_semantic_shader_debug_info(uint32_t result_type, uint32_t result_id, uint32_t op,
const uint32_t *args, uint32_t count);
virtual void emit_header();
void emit_line_directive(uint32_t file_id, uint32_t line_literal);
void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
@@ -662,6 +666,7 @@ protected:
bool workgroup_size_is_hidden = false;
bool requires_relaxed_precision_analysis = false;
bool implicit_c_integer_promotion_rules = false;
bool supports_spec_constant_array_size = true;
} backend;
void emit_struct(SPIRType &type);
@@ -685,6 +690,8 @@ protected:
void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
const SmallVector<uint32_t> &indices);
void emit_block_chain(SPIRBlock &block);
BlockID emit_block_chain_inner(SPIRBlock &block);
void emit_block_chain_cleanup(SPIRBlock &block);
void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
void emit_constant(const SPIRConstant &constant);
@@ -802,6 +809,10 @@ protected:
virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
std::string to_non_uniform_aware_expression(uint32_t id);
std::string to_atomic_ptr_expression(uint32_t id);
std::string to_pretty_expression_if_int_constant(
uint32_t id,
const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end,
bool register_expression_read = true);
std::string to_expression(uint32_t id, bool register_expression_read = true);
std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
@@ -830,7 +841,7 @@ protected:
void emit_output_variable_initializer(const SPIRVariable &var);
std::string to_precision_qualifiers_glsl(uint32_t id);
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
std::string flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags);
const char *format_to_glsl(spv::ImageFormat format);
virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
virtual std::string to_interpolation_qualifiers(const Bitset &flags);
@@ -1017,6 +1028,8 @@ protected:
const Instruction *get_next_instruction_in_block(const Instruction &instr);
static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
@@ -1068,6 +1081,9 @@ protected:
std::string format_float(float value) const;
std::string format_double(double value) const;
uint32_t get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const;
bool has_legacy_nocontract(uint32_t result_type, uint32_t id) const;
private:
void init();

View File

@@ -3029,7 +3029,7 @@ string CompilerHLSL::get_inner_entry_point_name() const
SPIRV_CROSS_THROW("Unsupported execution model.");
}
uint32_t CompilerHLSL::input_vertices_from_execution_mode(spirv_cross::SPIREntryPoint &execution) const
uint32_t CompilerHLSL::input_vertices_from_execution_mode(SPIREntryPoint &execution) const
{
uint32_t input_vertices = 1;
@@ -3061,7 +3061,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
auto &type = get<SPIRType>(func.return_type);
if (type.array.empty())
{
decl += flags_to_qualifiers_glsl(type, return_flags);
decl += flags_to_qualifiers_glsl(type, 0, return_flags);
decl += type_to_glsl(type);
decl += " ";
}

View File

@@ -510,7 +510,7 @@ void CompilerMSL::build_implicit_builtins()
has_local_invocation_index = true;
}
if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
if (need_workgroup_size && builtin == BuiltInWorkgroupSize)
{
builtin_workgroup_size_id = var.self;
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
@@ -684,28 +684,6 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
}
if (need_multiview)
{
// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
// gl_Layer is an output in vertex-pipeline shaders.
uint32_t type_ptr_out_id = ir.increase_bound_by(2);
SPIRType uint_type_ptr_out = get_uint_type();
uint_type_ptr.op = OpTypePointer;
uint_type_ptr_out.pointer = true;
uint_type_ptr_out.pointer_depth++;
uint_type_ptr_out.parent_type = get_uint_type_id();
uint_type_ptr_out.storage = StorageClassOutput;
auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
ptr_out_type.self = get_uint_type_id();
uint32_t var_id = type_ptr_out_id + 1;
set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
builtin_layer_id = var_id;
mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
}
if (need_multiview && !has_view_idx)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -718,6 +696,28 @@ void CompilerMSL::build_implicit_builtins()
}
}
if (need_multiview)
{
// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
// gl_Layer is an output in vertex-pipeline shaders.
uint32_t type_ptr_out_id = ir.increase_bound_by(2);
SPIRType uint_type_ptr_out = get_uint_type();
uint_type_ptr_out.op = OpTypePointer;
uint_type_ptr_out.pointer = true;
uint_type_ptr_out.pointer_depth++;
uint_type_ptr_out.parent_type = get_uint_type_id();
uint_type_ptr_out.storage = StorageClassOutput;
auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
ptr_out_type.self = get_uint_type_id();
uint32_t var_id = type_ptr_out_id + 1;
set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
builtin_layer_id = var_id;
mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
}
if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
(need_tese_params && !has_primitive_id) || need_grid_params)
{
@@ -932,25 +932,55 @@ void CompilerMSL::build_implicit_builtins()
if (need_workgroup_size && !has_workgroup_size)
{
uint32_t offset = ir.increase_bound_by(2);
uint32_t type_ptr_id = offset;
uint32_t var_id = offset + 1;
auto &execution = get_entry_point();
// First, check if the workgroup size _constant_ were defined.
// If it were, we don't need to do--in fact, shouldn't do--anything.
builtin_workgroup_size_id = execution.workgroup_size.constant;
if (builtin_workgroup_size_id == 0)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_WorkgroupSize.
uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
SPIRType uint_type_ptr = get<SPIRType>(type_id);
uint_type_ptr.op = OpTypePointer;
uint_type_ptr.pointer = true;
uint_type_ptr.pointer_depth++;
uint_type_ptr.parent_type = type_id;
uint_type_ptr.storage = StorageClassInput;
// Create gl_WorkgroupSize.
uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
// If we have LocalSize or LocalSizeId, use those to define the workgroup size.
if (execution.flags.get(ExecutionModeLocalSizeId))
{
const SPIRConstant *init[] = { &get<SPIRConstant>(execution.workgroup_size.id_x),
&get<SPIRConstant>(execution.workgroup_size.id_y),
&get<SPIRConstant>(execution.workgroup_size.id_z) };
bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization;
set<SPIRConstant>(var_id, type_id, init, 3, specialized);
execution.workgroup_size.constant = var_id;
}
else if (execution.flags.get(ExecutionModeLocalSize))
{
uint32_t offset = ir.increase_bound_by(3);
const SPIRConstant *init[] = {
&set<SPIRConstant>(offset, get_uint_type_id(), execution.workgroup_size.x, false),
&set<SPIRConstant>(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false),
&set<SPIRConstant>(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false)
};
set<SPIRConstant>(var_id, type_id, init, 3, false);
execution.workgroup_size.constant = var_id;
}
else
{
uint32_t type_ptr_id = ir.increase_bound_by(1);
SPIRType uint_type_ptr = get<SPIRType>(type_id);
uint_type_ptr.op = OpTypePointer;
uint_type_ptr.pointer = true;
uint_type_ptr.pointer_depth++;
uint_type_ptr.parent_type = type_id;
uint_type_ptr.storage = StorageClassInput;
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
builtin_workgroup_size_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
}
set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
builtin_workgroup_size_id = var_id;
}
}
if (!has_frag_depth && force_frag_depth_passthrough)
@@ -1681,6 +1711,7 @@ string CompilerMSL::compile()
backend.array_is_value_type_in_buffer_blocks = false;
backend.support_pointer_to_pointer = true;
backend.implicit_c_integer_promotion_rules = true;
backend.supports_spec_constant_array_size = false;
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@@ -1841,7 +1872,7 @@ void CompilerMSL::preprocess_op_codes()
if (preproc.uses_atomics)
{
add_header_line("#include <metal_atomic>");
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false);
}
// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
@@ -1850,6 +1881,10 @@ void CompilerMSL::preprocess_op_codes()
(preproc.uses_image_write && !msl_options.supports_msl_version(2, 2)))
is_rasterization_disabled = true;
// FIXME: This currently does not consider BDA side effects, so we cannot deduce const device for BDA.
if (preproc.uses_buffer_write || preproc.uses_image_write)
has_descriptor_side_effects = true;
// Tessellation control shaders are run as compute functions in Metal, and so
// must capture their output to a buffer.
if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
@@ -5720,22 +5755,44 @@ void CompilerMSL::emit_header()
{
// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
if (suppress_missing_prototypes)
statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false);
if (suppress_incompatible_pointer_types_discard_qualifiers)
statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false);
// Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables
if (suppress_sometimes_unitialized)
statement("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"");
add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false);
// Disable warning about missing braces for array<T> template to make arrays a value type
if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false);
// Floating point fast math compile declarations
if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2))
{
uint32_t contract_mask = FPFastMathModeAllowContractMask;
uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask);
uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask);
// FP math mode
uint32_t fp_flags = get_fp_fast_math_flags(true);
const char *math_mode = "safe";
if ((fp_flags & fast_mask) == fast_mask) // Must have all flags
math_mode = "fast";
else if ((fp_flags & relax_mask) == relax_mask) // Must have all flags
math_mode = "relaxed";
add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false);
// FP contraction
const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ? "fast" : "off";
add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false);
}
for (auto &pragma : pragma_lines)
statement(pragma);
if (!pragma_lines.empty() || suppress_missing_prototypes)
if (!pragma_lines.empty())
statement("");
statement("#include <metal_stdlib>");
@@ -5755,18 +5812,23 @@ void CompilerMSL::emit_header()
statement("");
}
void CompilerMSL::add_pragma_line(const string &line)
void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique)
{
auto rslt = pragma_lines.insert(line);
if (rslt.second)
force_recompile();
if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end())
{
pragma_lines.push_back(line);
if (recompile_on_unique)
force_recompile();
}
}
void CompilerMSL::add_typedef_line(const string &line)
{
auto rslt = typedef_lines.insert(line);
if (rslt.second)
if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end())
{
typedef_lines.push_back(line);
force_recompile();
}
}
// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
@@ -8353,9 +8415,22 @@ void CompilerMSL::emit_specialization_constants_and_structs()
if (unique_func_constants[constant_id] == c.self)
statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id,
")]];");
statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
") ? ", bitcast_expression(type, sc_tmp_type, sc_tmp_name), " : ", constant_expression(c),
";");
// RenderDoc and other instrumentation may reuse the same SpecId with different base types.
// We deduplicate to one [[function_constant(id)]] temp and then initialize all variants from it.
// Metal forbids as_type to/from 'bool', so if either side is Boolean, avoid bitcasting here and
// prefer a value cast via a constructor instead (e.g. uint(tmp) / float(tmp) / bool(tmp)).
// This preserves expected toggle semantics and prevents illegal MSL like as_type<uint>(bool_tmp).
{
string sc_true_expr;
if (sc_tmp_type == type.basetype)
sc_true_expr = sc_tmp_name;
else if (sc_tmp_type == SPIRType::Boolean || type.basetype == SPIRType::Boolean)
sc_true_expr = join(sc_type_name, "(", sc_tmp_name, ")");
else
sc_true_expr = bitcast_expression(type, sc_tmp_type, sc_tmp_name);
statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
") ? ", sc_true_expr, " : ", constant_expression(c), ";");
}
}
else if (has_decoration(c.self, DecorationSpecId))
{
@@ -9161,7 +9236,12 @@ bool CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, cons
// and there is a risk of concurrent write access to other components,
// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
// The MSL compiler refuses to allow component-level access for any non-packed vector types.
if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
// MSL refuses to take address or reference to vector component, even for packed types, so just force
// through the pointer cast. No much we can do sadly.
// For packed types, we could technically omit this if we know the reference does not have to turn into a pointer
// of some kind, but that requires external analysis passes to figure out, and
// this case is likely rare enough that we don't need to bother.
if (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)
{
const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
@@ -9485,21 +9565,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
case OpFMul:
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
MSL_BFOP(spvFMul);
else
MSL_BOP(*);
break;
case OpFAdd:
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
MSL_BFOP(spvFAdd);
else
MSL_BOP(+);
break;
case OpFSub:
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
MSL_BFOP(spvFSub);
else
MSL_BOP(-);
@@ -9997,7 +10077,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpVectorTimesMatrix:
case OpMatrixTimesVector:
{
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
{
CompilerGLSL::emit_instruction(instruction);
break;
@@ -10039,7 +10119,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpMatrixTimesMatrix:
{
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
{
CompilerGLSL::emit_instruction(instruction);
break;
@@ -10725,13 +10805,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
auto *lhs_var = maybe_get_backing_variable(lhs_id);
if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
lhs_is_array_template = true;
else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(lhs_var->basetype)))
lhs_is_array_template = false;
auto *rhs_var = maybe_get_backing_variable(rhs_id);
if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
rhs_is_array_template = true;
else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(rhs_var->basetype)))
rhs_is_array_template = false;
// If threadgroup storage qualifiers are *not* used:
@@ -11182,35 +11262,76 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
auto &restype = get<SPIRType>(result_type);
// Only precise:: preserves NaN in trancendentals (supposedly, cannot find documentation for this).
const auto drop_nan_inf = FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask;
bool preserve_nan = (get_fp_fast_math_flags_for_op(result_type, id) & drop_nan_inf) != drop_nan_inf;
const char *preserve_str = preserve_nan ? "precise" : "fast";
// TODO: Emit the default behavior to match existing code. Might need to be revisited.
// Only fp32 has the precise:: override.
#define EMIT_PRECISE_OVERRIDE(glsl_op, op) \
case GLSLstd450##glsl_op: \
if (restype.basetype == SPIRType::Float && preserve_nan) \
emit_unary_func_op(result_type, id, args[0], "precise::" op); \
else \
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); \
break
switch (op)
{
EMIT_PRECISE_OVERRIDE(Cos, "cos");
EMIT_PRECISE_OVERRIDE(Sin, "sin");
EMIT_PRECISE_OVERRIDE(Tan, "tan");
EMIT_PRECISE_OVERRIDE(Acos, "acos");
EMIT_PRECISE_OVERRIDE(Asin, "asin");
EMIT_PRECISE_OVERRIDE(Atan, "atan");
EMIT_PRECISE_OVERRIDE(Exp, "exp");
EMIT_PRECISE_OVERRIDE(Exp2, "exp2");
EMIT_PRECISE_OVERRIDE(Log, "log");
EMIT_PRECISE_OVERRIDE(Log2, "log2");
EMIT_PRECISE_OVERRIDE(Sqrt, "sqrt");
#undef EMIT_PRECISE_OVERRIDE
case GLSLstd450Sinh:
if (restype.basetype == SPIRType::Half)
{
auto ftype = restype;
ftype.basetype = SPIRType::Float;
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))");
auto expr = join(type_to_glsl(restype), "(", preserve_str, "::sinh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
emit_op(result_type, id, expr, should_forward(args[0]));
inherit_expression_dependencies(id, args[0]);
}
else if (preserve_nan)
emit_unary_func_op(result_type, id, args[0], "precise::sinh");
else
emit_unary_func_op(result_type, id, args[0], "fast::sinh");
break;
case GLSLstd450Cosh:
if (restype.basetype == SPIRType::Half)
{
auto ftype = restype;
ftype.basetype = SPIRType::Float;
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))");
auto expr = join(type_to_glsl(restype), "(", preserve_str, "::cosh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
emit_op(result_type, id, expr, should_forward(args[0]));
inherit_expression_dependencies(id, args[0]);
}
else if (preserve_nan)
emit_unary_func_op(result_type, id, args[0], "precise::cosh");
else
emit_unary_func_op(result_type, id, args[0], "fast::cosh");
break;
case GLSLstd450Tanh:
if (restype.basetype == SPIRType::Half)
{
auto ftype = restype;
ftype.basetype = SPIRType::Float;
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))");
auto expr = join(type_to_glsl(restype), "(", preserve_str, "::tanh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
emit_op(result_type, id, expr, should_forward(args[0]));
inherit_expression_dependencies(id, args[0]);
}
@@ -11221,7 +11342,13 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
if (restype.basetype == SPIRType::Half)
{
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))");
auto ftype = restype;
ftype.basetype = SPIRType::Float;
auto expr = join(type_to_glsl(restype),
"(", preserve_str, "::atan2(",
type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), "), ",
type_to_glsl(ftype), "(", to_unpacked_expression(args[1]), ")))");
emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]));
inherit_expression_dependencies(id, args[0]);
inherit_expression_dependencies(id, args[1]);
@@ -11230,7 +11357,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
break;
case GLSLstd450InverseSqrt:
emit_unary_func_op(result_type, id, args[0], "rsqrt");
if (restype.basetype == SPIRType::Float && preserve_nan)
emit_unary_func_op(result_type, id, args[0], "precise::rsqrt");
else
emit_unary_func_op(result_type, id, args[0], "rsqrt");
break;
case GLSLstd450RoundEven:
emit_unary_func_op(result_type, id, args[0], "rint");
@@ -11462,11 +11592,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
{
auto &exp_type = expression_type(args[0]);
// MSL does not support scalar versions here.
// MSL has no implementation for normalize in the fast:: namespace for half2 and half3
// MSL has no implementation for normalize in the fast:: namespace for half
// Returns -1 or 1 for valid input, sign() does the job.
// precise::normalize asm looks ridiculous.
// Don't think this actually matters unless proven otherwise.
if (exp_type.vecsize == 1)
emit_unary_func_op(result_type, id, args[0], "sign");
else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half)
else if (exp_type.basetype == SPIRType::Half)
emit_unary_func_op(result_type, id, args[0], "normalize");
else
emit_unary_func_op(result_type, id, args[0], "fast::normalize");
@@ -11529,7 +11662,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
case GLSLstd450Pow:
// powr makes x < 0.0 undefined, just like SPIR-V.
emit_binary_func_op(result_type, id, args[0], args[1], "powr");
if (restype.basetype == SPIRType::Float && preserve_nan)
emit_binary_func_op(result_type, id, args[0], args[1], "precise::powr");
else
emit_binary_func_op(result_type, id, args[0], args[1], "powr");
break;
default:
@@ -13753,6 +13889,24 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI
return loc;
}
bool CompilerMSL::entry_point_returns_stage_output() const
{
if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
return false;
bool ep_should_return_output = !get_is_rasterization_disabled();
return stage_out_var_id && ep_should_return_output;
}
bool CompilerMSL::entry_point_requires_const_device_buffers() const
{
// For fragment, we don't really need it, but it might help avoid pessimization
// if the compiler deduces it needs to use late-Z for whatever reason.
return (get_execution_model() == ExecutionModelFragment && !has_descriptor_side_effects) ||
(entry_point_returns_stage_output() &&
(get_execution_model() == ExecutionModelVertex ||
get_execution_model() == ExecutionModelTessellationEvaluation));
}
// Returns the type declaration for a function, including the
// entry type if the current function is the entry point function
string CompilerMSL::func_type_decl(SPIRType &type)
@@ -13763,8 +13917,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
return return_type;
// If an outgoing interface block has been defined, and it should be returned, override the entry point return type
bool ep_should_return_output = !get_is_rasterization_disabled();
if (stage_out_var_id && ep_should_return_output)
if (entry_point_returns_stage_output())
return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0);
// Prepend a entry type, based on the execution model
@@ -13888,16 +14041,14 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
case StorageClassStorageBuffer:
{
// For arguments from variable pointers, we use the write count deduction, so
// we should not assume any constness here. Only for global SSBOs.
bool readonly = false;
if (!var || has_decoration(type.self, DecorationBlock))
readonly = flags.get(DecorationNonWritable);
if (decoration_flags_signal_coherent(flags))
readonly = false;
addr_space = readonly ? "const device" : "device";
// When dealing with descriptor aliasing, it becomes very problematic to make use of
// readonly qualifiers.
// If rasterization is not disabled in vertex/tese, Metal does not allow side effects and refuses to compile "device",
// even if there are no writes. Just force const device.
if (entry_point_requires_const_device_buffers())
addr_space = "const device";
else
addr_space = "device";
break;
}
@@ -13914,7 +14065,12 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
{
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
if (ssbo)
addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
{
if (entry_point_requires_const_device_buffers())
addr_space = "const device";
else
addr_space = "device";
}
else
addr_space = "constant";
}
@@ -16553,7 +16709,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_i
default:
if (type_is_array_of_pointers(type) || using_builtin_array())
{
const SPIRVariable *var = variable_id ? &get<SPIRVariable>(variable_id) : nullptr;
const SPIRVariable *var = variable_id ? maybe_get<SPIRVariable>(variable_id) : nullptr;
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) &&
is_array(get_variable_data_type(*var)))
{
@@ -16854,6 +17010,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
if (p_var && p_var->basevariable)
p_var = maybe_get<SPIRVariable>(p_var->basevariable);
bool has_access_qualifier = true;
switch (img_type.access)
{
case AccessQualifierReadOnly:
@@ -16879,12 +17037,21 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
img_type_name += "write";
}
else
{
has_access_qualifier = false;
}
break;
}
}
if (p_var && has_decoration(p_var->self, DecorationCoherent) && msl_options.supports_msl_version(3, 2))
{
// Cannot declare memory_coherence_device without access qualifier.
if (!has_access_qualifier)
img_type_name += ", access::read";
img_type_name += ", memory_coherence_device";
}
}
img_type_name += ">";
@@ -17317,13 +17484,18 @@ void CompilerMSL::emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t r
inherit_expression_dependencies(result_id, op0);
}
// Note: Metal forbids bitcasting to/from 'bool' using as_type. This function is used widely
// for generating casts in the backend. To avoid generating illegal MSL when the canonical
// function constant type (from deduplicated SpecId) is Boolean, fall back to value-cast in
// that case by returning type_to_glsl(out_type) instead of as_type<...>.
string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
if (out_type.basetype == in_type.basetype)
return "";
assert(out_type.basetype != SPIRType::Boolean);
assert(in_type.basetype != SPIRType::Boolean);
// Avoid bitcasting to/from booleans in MSL; use value cast instead.
if (out_type.basetype == SPIRType::Boolean || in_type.basetype == SPIRType::Boolean)
return type_to_glsl(out_type);
bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
@@ -17679,6 +17851,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
case BuiltInGlobalInvocationId:
return "thread_position_in_grid";
case BuiltInWorkgroupSize:
return "threads_per_threadgroup";
case BuiltInWorkgroupId:
return "threadgroup_position_in_grid";
@@ -17864,6 +18039,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
case BuiltInLocalInvocationId:
case BuiltInNumWorkgroups:
case BuiltInWorkgroupId:
case BuiltInWorkgroupSize:
return "uint3";
case BuiltInLocalInvocationIndex:
case BuiltInNumSubgroups:
@@ -18531,11 +18707,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
case OpFAdd:
case OpFSub:
if (compiler.msl_options.invariant_float_math ||
compiler.has_decoration(args[1], DecorationNoContraction))
{
if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
}
break;
case OpFMul:
@@ -18543,11 +18716,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
case OpMatrixTimesVector:
case OpVectorTimesMatrix:
case OpMatrixTimesMatrix:
if (compiler.msl_options.invariant_float_math ||
compiler.has_decoration(args[1], DecorationNoContraction))
{
if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
return SPVFuncImplFMul;
}
break;
case OpQuantizeToF16:
@@ -19572,7 +19742,7 @@ void CompilerMSL::analyze_argument_buffers()
SetBindingPair pair = { desc_set, binding };
if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
resource.basetype == SPIRType::SampledImage)
resource.basetype == SPIRType::SampledImage || resource.basetype == SPIRType::AccelerationStructure)
{
// Drop pointer information when we emit the resources into a struct.
buffer_type.member_types.push_back(get_variable_data_type_id(var));
@@ -19827,6 +19997,30 @@ bool CompilerMSL::specialization_constant_is_macro(uint32_t const_id) const
return constant_macro_ids.find(const_id) != constant_macro_ids.end();
}
// Start with all fast math flags enabled, and selectively disable based execution modes and float controls
uint32_t CompilerMSL::get_fp_fast_math_flags(bool incl_ops) const
{
uint32_t fp_flags = ~0;
auto &ep = get_entry_point();
if (ep.flags.get(ExecutionModeSignedZeroInfNanPreserve))
fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask);
if (ep.flags.get(ExecutionModeContractionOff))
fp_flags &= ~(FPFastMathModeAllowContractMask);
for (auto &fp_pair : ep.fp_fast_math_defaults)
if (fp_pair.second)
fp_flags &= get<SPIRConstant>(fp_pair.second).scalar();
if (incl_ops)
for (auto &p_m : ir.meta)
if (p_m.second.decoration.decoration_flags.get(DecorationFPFastMathMode))
fp_flags &= p_m.second.decoration.fp_fast_math_mode;
return fp_flags;
}
void CompilerMSL::emit_block_hints(const SPIRBlock &)
{
}

View File

@@ -542,6 +542,10 @@ public:
// The result can be queried with get_is_rasterization_disabled.
bool auto_disable_rasterization = false;
// Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes.
// Requires MSL 3.2 or above, and has no effect with earlier MSL versions.
bool use_fast_math_pragmas = false;
bool is_ios() const
{
return platform == iOS;
@@ -767,6 +771,14 @@ public:
// These must only be called after a successful call to CompilerMSL::compile().
bool specialization_constant_is_macro(uint32_t constant_id) const;
// Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied
// across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags
// for one floating-point type, and the value returned here is a bitwise-AND combination across all types.
// If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND
// to determine the minimal fast-math that applies to all default execution modes and all operations.
// The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff.
uint32_t get_fp_fast_math_flags(bool incl_ops) const;
protected:
// An enum of SPIR-V functions that are implemented in additional
// source code that is added to the shader if necessary.
@@ -1047,6 +1059,8 @@ protected:
void fix_up_shader_inputs_outputs();
bool entry_point_returns_stage_output() const;
bool entry_point_requires_const_device_buffers() const;
std::string func_type_decl(SPIRType &type);
std::string entry_point_args_classic(bool append_comma);
std::string entry_point_args_argument_buffer(bool append_comma);
@@ -1126,7 +1140,7 @@ protected:
uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
const char *get_memory_order(uint32_t spv_mem_sem);
void add_pragma_line(const std::string &line);
void add_pragma_line(const std::string &line, bool recompile_on_unique);
void add_typedef_line(const std::string &line);
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
@@ -1209,8 +1223,8 @@ protected:
std::unordered_map<uint32_t, uint32_t> fragment_output_components;
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
std::set<std::string> pragma_lines;
std::set<std::string> typedef_lines;
std::vector<std::string> pragma_lines;
std::vector<std::string> typedef_lines;
SmallVector<uint32_t> vars_needing_early_declaration;
std::unordered_set<uint32_t> constant_macro_ids;
@@ -1252,6 +1266,7 @@ protected:
bool using_builtin_array() const;
bool is_rasterization_disabled = false;
bool has_descriptor_side_effects = false;
bool capture_output_to_buffer = false;
bool needs_swizzle_buffer_def = false;
bool used_swizzle_buffer = false;

View File

@@ -213,8 +213,8 @@ void Parser::parse(const Instruction &instruction)
case OpSource:
{
auto lang = static_cast<SourceLanguage>(ops[0]);
switch (lang)
ir.source.lang = static_cast<SourceLanguage>(ops[0]);
switch (ir.source.lang)
{
case SourceLanguageESSL:
ir.source.es = true;
@@ -318,6 +318,19 @@ void Parser::parse(const Instruction &instruction)
ir.load_type_width.insert({ ops[1], type->width });
}
}
else if (op == OpExtInst)
{
// Don't want to deal with ForwardRefs here.
auto &ext = get<SPIRExtension>(ops[2]);
if (ext.ext == SPIRExtension::NonSemanticShaderDebugInfo)
{
// Parse global ShaderDebugInfo we care about.
// Just forward the string information.
if (ops[3] == SPIRExtension::DebugSource)
set<SPIRString>(ops[1], get<SPIRString>(ops[4]).str);
}
}
break;
}
@@ -369,6 +382,30 @@ void Parser::parse(const Instruction &instruction)
execution.output_primitives = ops[2];
break;
case ExecutionModeSignedZeroInfNanPreserve:
switch (ops[2])
{
case 8:
execution.signed_zero_inf_nan_preserve_8 = true;
break;
case 16:
execution.signed_zero_inf_nan_preserve_16 = true;
break;
case 32:
execution.signed_zero_inf_nan_preserve_32 = true;
break;
case 64:
execution.signed_zero_inf_nan_preserve_64 = true;
break;
default:
SPIRV_CROSS_THROW("Invalid bit-width for SignedZeroInfNanPreserve.");
}
break;
default:
break;
}
@@ -381,13 +418,21 @@ void Parser::parse(const Instruction &instruction)
auto mode = static_cast<ExecutionMode>(ops[1]);
execution.flags.set(mode);
if (mode == ExecutionModeLocalSizeId)
switch (mode)
{
case ExecutionModeLocalSizeId:
execution.workgroup_size.id_x = ops[2];
execution.workgroup_size.id_y = ops[3];
execution.workgroup_size.id_z = ops[4];
}
break;
case ExecutionModeFPFastMathDefault:
execution.fp_fast_math_defaults[ops[2]] = ops[3];
break;
default:
break;
}
break;
}
@@ -538,7 +583,7 @@ void Parser::parse(const Instruction &instruction)
uint32_t width = ops[1];
auto &type = set<SPIRType>(id, op);
if (width != 16 && length > 2)
if (width != 16 && width != 8 && length > 2)
SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat.");
if (width == 64)
@@ -557,6 +602,17 @@ void Parser::parse(const Instruction &instruction)
else
type.basetype = SPIRType::Half;
}
else if (width == 8)
{
if (length < 2)
SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8.");
else if (ops[2] == spv::FPEncodingFloat8E4M3EXT)
type.basetype = SPIRType::FloatE4M3;
else if (ops[2] == spv::FPEncodingFloat8E5M2EXT)
type.basetype = SPIRType::FloatE5M2;
else
SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8.");
}
else
SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
type.width = width;
@@ -614,15 +670,33 @@ void Parser::parse(const Instruction &instruction)
auto &matrixbase = set<SPIRType>(id, base);
matrixbase.op = op;
matrixbase.cooperative.scope_id = ops[2];
matrixbase.cooperative.rows_id = ops[3];
matrixbase.cooperative.columns_id = ops[4];
matrixbase.cooperative.use_id = ops[5];
matrixbase.ext.cooperative.scope_id = ops[2];
matrixbase.ext.cooperative.rows_id = ops[3];
matrixbase.ext.cooperative.columns_id = ops[4];
matrixbase.ext.cooperative.use_id = ops[5];
matrixbase.self = id;
matrixbase.parent_type = ops[1];
break;
}
case OpTypeCooperativeVectorNV:
{
uint32_t id = ops[0];
auto &type = set<SPIRType>(id, op);
type.basetype = SPIRType::CoopVecNV;
type.op = op;
type.ext.coopVecNV.component_type_id = ops[1];
type.ext.coopVecNV.component_count_id = ops[2];
type.parent_type = ops[1];
// CoopVec-Nv can be used with integer operations like SMax where
// where spirv-opt does explicit checks on integer bitwidth
auto component_type = get<SPIRType>(type.ext.coopVecNV.component_type_id);
type.width = component_type.width;
break;
}
case OpTypeArray:
{
uint32_t id = ops[0];
@@ -820,6 +894,20 @@ void Parser::parse(const Instruction &instruction)
break;
}
case OpTypeTensorARM:
{
uint32_t id = ops[0];
auto &type = set<SPIRType>(id, op);
type.basetype = SPIRType::Tensor;
type.ext.tensor = {};
type.ext.tensor.type = ops[1];
if (length >= 3)
type.ext.tensor.rank = ops[2];
if (length >= 4)
type.ext.tensor.shape = ops[3];
break;
}
// Variable declaration
// All variables are essentially pointers with a storage qualifier.
case OpVariable:
@@ -866,17 +954,27 @@ void Parser::parse(const Instruction &instruction)
break;
}
// Constants
// Constants
case OpSpecConstant:
case OpConstant:
case OpConstantCompositeReplicateEXT:
case OpSpecConstantCompositeReplicateEXT:
{
uint32_t id = ops[1];
auto &type = get<SPIRType>(ops[0]);
if (type.width > 32)
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT)
{
auto subconstant = uint32_t(ops[2]);
set<SPIRConstant>(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true);
}
else
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
{
if (type.width > 32)
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
else
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
}
break;
}

View File

@@ -477,6 +477,12 @@ string CompilerReflection::execution_model_to_str(spv::ExecutionModel model)
return "rmiss";
case ExecutionModelCallableNV:
return "rcall";
case ExecutionModelMeshNV:
case ExecutionModelMeshEXT:
return "mesh";
case ExecutionModelTaskNV:
case ExecutionModelTaskEXT:
return "task";
default:
return "???";
}
@@ -504,7 +510,9 @@ void CompilerReflection::emit_entry_points()
json_stream->begin_json_object();
json_stream->emit_json_key_value("name", e.name);
json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
if (e.execution_model == ExecutionModelGLCompute)
if (e.execution_model == ExecutionModelGLCompute || e.execution_model == spv::ExecutionModelMeshEXT ||
e.execution_model == spv::ExecutionModelMeshNV || e.execution_model == spv::ExecutionModelTaskEXT ||
e.execution_model == spv::ExecutionModelTaskNV)
{
const auto &spv_entry = get_entry_point(e.name, e.execution_model);
@@ -547,6 +555,7 @@ void CompilerReflection::emit_resources()
emit_resources("push_constants", res.push_constant_buffers);
emit_resources("counters", res.atomic_counters);
emit_resources("acceleration_structures", res.acceleration_structures);
emit_resources("tensors", res.tensors);
}
void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resource> &resources)