diff --git a/3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h b/3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h index 34d6c943c..a671d79a8 100644 --- a/3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h +++ b/3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h @@ -40,4 +40,7 @@ const char* const E_SPV_QCOM_image_processing = "SPV_QCOM_image_processing"; //SPV_QCOM_image_processing2 const char* const E_SPV_QCOM_image_processing2 = "SPV_QCOM_image_processing2"; +//SPV_QCOM_tile_shading +const char* const E_SPV_QCOM_tile_shading = "SPV_QCOM_tile_shading"; + #endif // #ifndef GLSLextQCOM_H diff --git a/3rdparty/glslang/SPIRV/GlslangToSpv.cpp b/3rdparty/glslang/SPIRV/GlslangToSpv.cpp index 1139b915f..9088f776b 100644 --- a/3rdparty/glslang/SPIRV/GlslangToSpv.cpp +++ b/3rdparty/glslang/SPIRV/GlslangToSpv.cpp @@ -1147,6 +1147,17 @@ spv::BuiltIn TGlslangToSpvTraverser::TranslateBuiltInDecoration(glslang::TBuiltI builder.addCapability(spv::Capability::CoreBuiltinsARM); return spv::BuiltIn::WarpMaxIDARM; + // QCOM builtins + case glslang::EbvTileOffsetQCOM: + builder.addExtension(spv::E_SPV_QCOM_tile_shading); + return spv::BuiltIn::TileOffsetQCOM; + case glslang::EbvTileDimensionQCOM: + builder.addExtension(spv::E_SPV_QCOM_tile_shading); + return spv::BuiltIn::TileDimensionQCOM; + case glslang::EbvTileApronSizeQCOM: + builder.addExtension(spv::E_SPV_QCOM_tile_shading); + return spv::BuiltIn::TileApronSizeQCOM; + default: return spv::BuiltIn::Max; } @@ -1328,6 +1339,12 @@ spv::StorageClass TGlslangToSpvTraverser::TranslateStorageClass(const glslang::T return spv::StorageClass::TileImageEXT; } + if (type.getQualifier().isTileAttachmentQCOM()) { + builder.addExtension(spv::E_SPV_QCOM_tile_shading); + builder.addCapability(spv::Capability::TileShadingQCOM); + return spv::StorageClass::TileAttachmentQCOM; + } + if (glslangIntermediate->getSource() != glslang::EShSourceHlsl || type.getQualifier().storage == glslang::EvqUniform) { if (type.isAtomic()) @@ -1741,6 +1758,12 @@ TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, builder.addExtension(spv::E_SPV_EXT_shader_tile_image); } + if (glslangIntermediate->getNonCoherentTileAttachmentReadQCOM()) { + builder.addCapability(spv::Capability::TileShadingQCOM); + builder.addExecutionMode(shaderEntry, spv::ExecutionMode::NonCoherentTileAttachmentReadQCOM); + builder.addExtension(spv::E_SPV_QCOM_tile_shading); + } + if (glslangIntermediate->isDepthReplacing()) builder.addExecutionMode(shaderEntry, spv::ExecutionMode::DepthReplacing); @@ -1822,9 +1845,19 @@ TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, } builder.addExecutionModeId(shaderEntry, spv::ExecutionMode::LocalSizeId, dimConstId); } else { - builder.addExecutionMode(shaderEntry, spv::ExecutionMode::LocalSize, glslangIntermediate->getLocalSize(0), - glslangIntermediate->getLocalSize(1), - glslangIntermediate->getLocalSize(2)); + if (glslangIntermediate->getTileShadingRateQCOM(0) >= 1 || glslangIntermediate->getTileShadingRateQCOM(1) >= 1 || glslangIntermediate->getTileShadingRateQCOM(2) >= 1) { + auto rate_x = glslangIntermediate->getTileShadingRateQCOM(0); + auto rate_y = glslangIntermediate->getTileShadingRateQCOM(1); + auto rate_z = glslangIntermediate->getTileShadingRateQCOM(2); + rate_x = ( rate_x == 0 ? 1 : rate_x ); + rate_y = ( rate_y == 0 ? 1 : rate_y ); + rate_z = ( rate_z == 0 ? 1 : rate_z ); + builder.addExecutionMode(shaderEntry, spv::ExecutionMode::TileShadingRateQCOM, rate_x, rate_y, rate_z); + } else { + builder.addExecutionMode(shaderEntry, spv::ExecutionMode::LocalSize, glslangIntermediate->getLocalSize(0), + glslangIntermediate->getLocalSize(1), + glslangIntermediate->getLocalSize(2)); + } } if (glslangIntermediate->getLayoutDerivativeModeNone() == glslang::LayoutDerivativeGroupQuads) { builder.addCapability(spv::Capability::ComputeDerivativeGroupQuadsNV); @@ -1835,6 +1868,13 @@ TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, builder.addExecutionMode(shaderEntry, spv::ExecutionMode::DerivativeGroupLinearNV); builder.addExtension(spv::E_SPV_NV_compute_shader_derivatives); } + + if (glslangIntermediate->getNonCoherentTileAttachmentReadQCOM()) { + builder.addCapability(spv::Capability::TileShadingQCOM); + builder.addExecutionMode(shaderEntry, spv::ExecutionMode::NonCoherentTileAttachmentReadQCOM); + builder.addExtension(spv::E_SPV_QCOM_tile_shading); + } + break; } case EShLangTessEvaluation: @@ -3475,20 +3515,6 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt noReturnValue = true; break; - case glslang::EOpHitObjectGetSpherePositionNV: - case glslang::EOpHitObjectGetSphereRadiusNV: - case glslang::EOpHitObjectIsSphereHitNV: - builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); - builder.addCapability(spv::Capability::ShaderInvocationReorderNV); - builder.addCapability(spv::Capability::RayTracingSpheresGeometryNV); - break; - - case glslang::EOpHitObjectIsLSSHitNV: - builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); - builder.addCapability(spv::Capability::ShaderInvocationReorderNV); - builder.addCapability(spv::Capability::RayTracingLinearSweptSpheresGeometryNV); - break; - case glslang::EOpRayQueryGetIntersectionLSSPositionsNV: case glslang::EOpRayQueryGetIntersectionLSSRadiiNV: builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); @@ -3518,7 +3544,6 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt builder.addCapability(spv::Capability::RayQueryPositionFetchKHR); noReturnValue = true; break; - case glslang::EOpImageSampleWeightedQCOM: builder.addCapability(spv::Capability::TextureSampleWeightedQCOM); builder.addExtension(spv::E_SPV_QCOM_image_processing); @@ -3877,8 +3902,8 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt lvalueCoherentFlags |= TranslateCoherent(glslangOperands[arg]->getAsTyped()->getType()); } else { builder.setDebugSourceLocation(node->getLoc().line, node->getLoc().getFilename()); - glslang::TOperator glslangOp = node->getOp(); - if (arg == 1 && + glslang::TOperator glslangOp = node->getOp(); + if (arg == 1 && (glslangOp == glslang::EOpRayQueryGetIntersectionType || glslangOp == glslang::EOpRayQueryGetIntersectionT || glslangOp == glslang::EOpRayQueryGetIntersectionInstanceCustomIndex || @@ -3904,18 +3929,18 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt )) { bool cond = glslangOperands[arg]->getAsConstantUnion()->getConstArray()[0].getBConst(); operands.push_back(builder.makeIntConstant(cond ? 1 : 0)); - } else if ((arg == 10 && glslangOp == glslang::EOpTraceKHR) || - (arg == 11 && glslangOp == glslang::EOpTraceRayMotionNV) || - (arg == 1 && glslangOp == glslang::EOpExecuteCallableKHR) || - (arg == 1 && glslangOp == glslang::EOpHitObjectExecuteShaderNV) || - (arg == 11 && glslangOp == glslang::EOpHitObjectTraceRayNV) || - (arg == 12 && glslangOp == glslang::EOpHitObjectTraceRayMotionNV)) { - const int set = glslangOp == glslang::EOpExecuteCallableKHR ? 1 : 0; - const int location = glslangOperands[arg]->getAsConstantUnion()->getConstArray()[0].getUConst(); - auto itNode = locationToSymbol[set].find(location); - visitSymbol(itNode->second); - spv::Id symId = getSymbolId(itNode->second); - operands.push_back(symId); + } else if ((arg == 10 && glslangOp == glslang::EOpTraceKHR) || + (arg == 11 && glslangOp == glslang::EOpTraceRayMotionNV) || + (arg == 1 && glslangOp == glslang::EOpExecuteCallableKHR) || + (arg == 1 && glslangOp == glslang::EOpHitObjectExecuteShaderNV) || + (arg == 11 && glslangOp == glslang::EOpHitObjectTraceRayNV) || + (arg == 12 && glslangOp == glslang::EOpHitObjectTraceRayMotionNV)) { + const int set = glslangOp == glslang::EOpExecuteCallableKHR ? 1 : 0; + const int location = glslangOperands[arg]->getAsConstantUnion()->getConstArray()[0].getUConst(); + auto itNode = locationToSymbol[set].find(location); + visitSymbol(itNode->second); + spv::Id symId = getSymbolId(itNode->second); + operands.push_back(symId); } else if ((arg == 12 && glslangOp == glslang::EOpHitObjectRecordHitNV) || (arg == 13 && glslangOp == glslang::EOpHitObjectRecordHitMotionNV) || (arg == 11 && glslangOp == glslang::EOpHitObjectRecordHitWithIndexNV) || @@ -3927,16 +3952,16 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt visitSymbol(itNode->second); spv::Id symId = getSymbolId(itNode->second); operands.push_back(symId); - } else if (glslangOperands[arg]->getAsTyped()->getQualifier().isSpirvLiteral()) { - // Will be translated to a literal value, make a placeholder here - operands.push_back(spv::NoResult); - } else if (glslangOperands[arg]->getAsTyped()->getBasicType() == glslang::EbtFunction) { - spv::Function* function = functionMap[glslangOperands[arg]->getAsSymbolNode()->getMangledName().c_str()]; - assert(function); - operands.push_back(function->getId()); - } else { - operands.push_back(accessChainLoad(glslangOperands[arg]->getAsTyped()->getType())); - } + } else if (glslangOperands[arg]->getAsTyped()->getQualifier().isSpirvLiteral()) { + // Will be translated to a literal value, make a placeholder here + operands.push_back(spv::NoResult); + } else if (glslangOperands[arg]->getAsTyped()->getBasicType() == glslang::EbtFunction) { + spv::Function* function = functionMap[glslangOperands[arg]->getAsSymbolNode()->getMangledName().c_str()]; + assert(function); + operands.push_back(function->getId()); + } else { + operands.push_back(accessChainLoad(glslangOperands[arg]->getAsTyped()->getType())); + } } } @@ -4708,12 +4733,8 @@ bool TGlslangToSpvTraverser::visitBranch(glslang::TVisit /* visit */, glslang::T switch (node->getFlowOp()) { case glslang::EOpKill: if (glslangIntermediate->getSpv().spv >= glslang::EShTargetSpv_1_6) { - if (glslangIntermediate->getSource() == glslang::EShSourceHlsl) { - builder.addCapability(spv::Capability::DemoteToHelperInvocation); - builder.createNoResultOp(spv::Op::OpDemoteToHelperInvocationEXT); - } else { - builder.makeStatementTerminator(spv::Op::OpTerminateInvocation, "post-terminate-invocation"); - } + builder.addCapability(spv::Capability::DemoteToHelperInvocation); + builder.createNoResultOp(spv::Op::OpDemoteToHelperInvocationEXT); } else { builder.makeStatementTerminator(spv::Op::OpKill, "post-discard"); } @@ -4812,6 +4833,9 @@ spv::Id TGlslangToSpvTraverser::createSpvVariable(const glslang::TIntermSymbol* builder.addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3); builder.addCapability(spv::Capability::StorageUniformBufferBlock16); break; + case spv::StorageClass::TileAttachmentQCOM: + builder.addCapability(spv::Capability::TileShadingQCOM); + break; default: if (storageClass == spv::StorageClass::Workgroup && node->getType().getBasicType() == glslang::EbtBlock) { @@ -7951,18 +7975,30 @@ spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, OpDe case glslang::EOpHitObjectGetSpherePositionNV: unaryOp = spv::Op::OpHitObjectGetSpherePositionNV; + builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); + builder.addCapability(spv::Capability::ShaderInvocationReorderNV); + builder.addCapability(spv::Capability::RayTracingSpheresGeometryNV); break; case glslang::EOpHitObjectGetSphereRadiusNV: unaryOp = spv::Op::OpHitObjectGetSphereRadiusNV; + builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); + builder.addCapability(spv::Capability::ShaderInvocationReorderNV); + builder.addCapability(spv::Capability::RayTracingSpheresGeometryNV); break; case glslang::EOpHitObjectIsSphereHitNV: unaryOp = spv::Op::OpHitObjectIsSphereHitNV; + builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); + builder.addCapability(spv::Capability::ShaderInvocationReorderNV); + builder.addCapability(spv::Capability::RayTracingSpheresGeometryNV); break; case glslang::EOpHitObjectIsLSSHitNV: unaryOp = spv::Op::OpHitObjectIsLSSHitNV; + builder.addExtension(spv::E_SPV_NV_linear_swept_spheres); + builder.addCapability(spv::Capability::ShaderInvocationReorderNV); + builder.addCapability(spv::Capability::RayTracingLinearSweptSpheresGeometryNV); break; case glslang::EOpFetchMicroTriangleVertexPositionNV: diff --git a/3rdparty/glslang/SPIRV/SpvBuilder.cpp b/3rdparty/glslang/SPIRV/SpvBuilder.cpp index 1c2e3874f..0f953ac5a 100644 --- a/3rdparty/glslang/SPIRV/SpvBuilder.cpp +++ b/3rdparty/glslang/SPIRV/SpvBuilder.cpp @@ -3266,8 +3266,12 @@ Id Builder::createTextureCall(Decoration precision, Id resultType, bool sparse, texArgs.push_back(parameters.offset); } if (parameters.offsets) { - addCapability(Capability::ImageGatherExtended); - mask = (ImageOperandsMask)(mask | ImageOperandsMask::ConstOffsets); + if (!isConstant(parameters.offsets) && sourceLang == spv::SourceLanguage::GLSL) { + mask = (ImageOperandsMask)(mask | ImageOperandsMask::Offsets); + } else { + addCapability(Capability::ImageGatherExtended); + mask = (ImageOperandsMask)(mask | ImageOperandsMask::ConstOffsets); + } texArgs.push_back(parameters.offsets); } if (parameters.sample) { diff --git a/3rdparty/glslang/SPIRV/doc.cpp b/3rdparty/glslang/SPIRV/doc.cpp index 429290b3e..db097a7ac 100644 --- a/3rdparty/glslang/SPIRV/doc.cpp +++ b/3rdparty/glslang/SPIRV/doc.cpp @@ -194,6 +194,10 @@ const char* ExecutionModeString(int mode) case (int)ExecutionMode::SignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve"; case (int)ExecutionMode::RoundingModeRTE: return "RoundingModeRTE"; case (int)ExecutionMode::RoundingModeRTZ: return "RoundingModeRTZ"; + + case (int)ExecutionMode::NonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM"; + case (int)ExecutionMode::TileShadingRateQCOM: return "TileShadingRateQCOM"; + case (int)ExecutionMode::EarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD"; case (int)ExecutionMode::StencilRefUnchangedFrontAMD: return "StencilRefUnchangedFrontAMD"; case (int)ExecutionMode::StencilRefLessFrontAMD: return "StencilRefLessFrontAMD"; @@ -249,6 +253,7 @@ const char* StorageClassString(int StorageClass) case 11: return "Image"; case 12: return "StorageBuffer"; + case (int)StorageClass::TileAttachmentQCOM: return "TileAttachmentQCOM"; case (int)StorageClass::RayPayloadKHR: return "RayPayloadKHR"; case (int)StorageClass::HitAttributeKHR: return "HitAttributeKHR"; case (int)StorageClass::IncomingRayPayloadKHR: return "IncomingRayPayloadKHR"; @@ -404,6 +409,10 @@ const char* BuiltInString(int builtIn) case 4444: return "ShadingRateKHR"; case 5014: return "FragStencilRefEXT"; + case (int)BuiltIn::TileOffsetQCOM: return "TileOffsetQCOM"; + case (int)BuiltIn::TileDimensionQCOM: return "TileDimensionQCOM"; + case (int)BuiltIn::TileApronSizeQCOM: return "TileApronSizeQCOM"; + case 4992: return "BaryCoordNoPerspAMD"; case 4993: return "BaryCoordNoPerspCentroidAMD"; case 4994: return "BaryCoordNoPerspSampleAMD"; @@ -638,7 +647,7 @@ const char* ImageChannelDataTypeString(int type) } } -const int ImageOperandsCeiling = 15; +const int ImageOperandsCeiling = 17; const char* ImageOperandsString(int format) { @@ -658,6 +667,7 @@ const char* ImageOperandsString(int format) case (int)ImageOperandsShift::SignExtend: return "SignExtend"; case (int)ImageOperandsShift::ZeroExtend: return "ZeroExtend"; case (int)ImageOperandsShift::Nontemporal: return "Nontemporal"; + case (int)ImageOperandsShift::Offsets: return "Offsets"; case ImageOperandsCeiling: default: @@ -1098,6 +1108,7 @@ const char* CapabilityString(int info) case (int)Capability::TextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM"; case (int)Capability::TextureBoxFilterQCOM: return "TextureBoxFilterQCOM"; case (int)Capability::TextureBlockMatchQCOM: return "TextureBlockMatchQCOM"; + case (int)Capability::TileShadingQCOM: return "TileShadingQCOM"; case (int)Capability::TextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM"; case (int)Capability::ReplicatedCompositesEXT: return "CapabilityReplicatedCompositesEXT"; diff --git a/3rdparty/glslang/SPIRV/spirv.hpp11 b/3rdparty/glslang/SPIRV/spirv.hpp11 index 3b1a14757..e9e36c37f 100644 --- a/3rdparty/glslang/SPIRV/spirv.hpp11 +++ b/3rdparty/glslang/SPIRV/spirv.hpp11 @@ -172,6 +172,8 @@ enum class ExecutionMode : unsigned { SignedZeroInfNanPreserve = 4461, RoundingModeRTE = 4462, RoundingModeRTZ = 4463, + NonCoherentTileAttachmentReadQCOM = 4489, + TileShadingRateQCOM = 4490, EarlyAndLateFragmentTestsAMD = 5017, StencilRefReplacingEXT = 5027, CoalescingAMDX = 5069, @@ -241,6 +243,7 @@ enum class StorageClass : unsigned { Image = 11, StorageBuffer = 12, TileImageEXT = 4172, + TileAttachmentQCOM = 4491, NodePayloadAMDX = 5068, CallableDataKHR = 5328, CallableDataNV = 5328, @@ -713,6 +716,9 @@ enum class BuiltIn : unsigned { DeviceIndex = 4438, ViewIndex = 4440, ShadingRateKHR = 4444, + TileOffsetQCOM = 4492, + TileDimensionQCOM = 4493, + TileApronSizeQCOM = 4494, BaryCoordNoPerspAMD = 4992, BaryCoordNoPerspCentroidAMD = 4993, BaryCoordNoPerspSampleAMD = 4994, @@ -1099,6 +1105,7 @@ enum class Capability : unsigned { TextureSampleWeightedQCOM = 4484, TextureBoxFilterQCOM = 4485, TextureBlockMatchQCOM = 4486, + TileShadingQCOM = 4495, TextureBlockMatch2QCOM = 4498, Float16ImageAMD = 5008, ImageGatherBiasLodAMD = 5009, @@ -3278,6 +3285,8 @@ inline const char* ExecutionModeToString(ExecutionMode value) { case ExecutionMode::SignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve"; case ExecutionMode::RoundingModeRTE: return "RoundingModeRTE"; case ExecutionMode::RoundingModeRTZ: return "RoundingModeRTZ"; + case ExecutionMode::NonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM"; + case ExecutionMode::TileShadingRateQCOM: return "TileShadingRateQCOM"; case ExecutionMode::EarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD"; case ExecutionMode::StencilRefReplacingEXT: return "StencilRefReplacingEXT"; case ExecutionMode::CoalescingAMDX: return "CoalescingAMDX"; @@ -3344,6 +3353,7 @@ inline const char* StorageClassToString(StorageClass value) { case StorageClass::Image: return "Image"; case StorageClass::StorageBuffer: return "StorageBuffer"; case StorageClass::TileImageEXT: return "TileImageEXT"; + case StorageClass::TileAttachmentQCOM: return "TileAttachmentQCOM"; case StorageClass::NodePayloadAMDX: return "NodePayloadAMDX"; case StorageClass::CallableDataKHR: return "CallableDataKHR"; case StorageClass::IncomingCallableDataKHR: return "IncomingCallableDataKHR"; @@ -3745,6 +3755,9 @@ inline const char* BuiltInToString(BuiltIn value) { case BuiltIn::DeviceIndex: return "DeviceIndex"; case BuiltIn::ViewIndex: return "ViewIndex"; case BuiltIn::ShadingRateKHR: return "ShadingRateKHR"; + case BuiltIn::TileOffsetQCOM: return "TileOffsetQCOM"; + case BuiltIn::TileDimensionQCOM: return "TileDimensionQCOM"; + case BuiltIn::TileApronSizeQCOM: return "TileApronSizeQCOM"; case BuiltIn::BaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD"; case BuiltIn::BaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD"; case BuiltIn::BaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD"; @@ -3959,6 +3972,7 @@ inline const char* CapabilityToString(Capability value) { case Capability::TextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM"; case Capability::TextureBoxFilterQCOM: return "TextureBoxFilterQCOM"; case Capability::TextureBlockMatchQCOM: return "TextureBlockMatchQCOM"; + case Capability::TileShadingQCOM: return "TileShadingQCOM"; case Capability::TextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM"; case Capability::Float16ImageAMD: return "Float16ImageAMD"; case Capability::ImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD"; diff --git a/3rdparty/glslang/StandAlone/StandAlone.cpp b/3rdparty/glslang/StandAlone/StandAlone.cpp index 88ac68b76..b1853b024 100644 --- a/3rdparty/glslang/StandAlone/StandAlone.cpp +++ b/3rdparty/glslang/StandAlone/StandAlone.cpp @@ -1954,25 +1954,26 @@ void usage() { printf("Usage: glslang [option]... [file]...\n" "\n" - "'file' can end in . for auto-stage classification, where is:\n" - " .conf to provide a config file that replaces the default configuration\n" - " (see -c option below for generating a template)\n" - " .vert for a vertex shader\n" - " .tesc for a tessellation control shader\n" - " .tese for a tessellation evaluation shader\n" - " .geom for a geometry shader\n" - " .frag for a fragment shader\n" - " .comp for a compute shader\n" - " .mesh for a mesh shader\n" - " .task for a task shader\n" - " .rgen for a ray generation shader\n" - " .rint for a ray intersection shader\n" - " .rahit for a ray any hit shader\n" - " .rchit for a ray closest hit shader\n" - " .rmiss for a ray miss shader\n" - " .rcall for a ray callable shader\n" - " .glsl for .vert.glsl, .tesc.glsl, ..., .comp.glsl compound suffixes\n" - " .hlsl for .vert.hlsl, .tesc.hlsl, ..., .comp.hlsl compound suffixes\n" + "'file' with one of the following three endings can be auto-classified:\n" + "1) ., where is one of:\n" + " vert for a vertex shader\n" + " tesc for a tessellation control shader\n" + " tese for a tessellation evaluation shader\n" + " geom for a geometry shader\n" + " frag for a fragment shader\n" + " comp for a compute shader\n" + " mesh for a mesh shader\n" + " task for a task shader\n" + " rgen for a ray generation shader\n" + " rint for a ray intersection shader\n" + " rahit for a ray any hit shader\n" + " rchit for a ray closest hit shader\n" + " rmiss for a ray miss shader\n" + " rcall for a ray callable shader\n" + "2) ..glsl or ..hlsl compound suffix, where stage options are\n" + " described above\n" + "3) .conf, to provide a config file that replaces the default configuration\n" + " (see -c option below for generating a template)\n" "\n" "Options:\n" " -C cascading errors; risk crash from accumulation of error recoveries\n" @@ -2000,7 +2001,8 @@ void usage() " allowing the use of default uniforms, atomic_uints, and\n" " gl_VertexID and gl_InstanceID keywords.\n" " -S uses specified stage rather than parsing the file extension\n" - " choices for are vert, tesc, tese, geom, frag, or comp\n" + " choices for include vert, tesc, tese, geom, frag, comp.\n" + " A full list of options is given above." " -U | --undef-macro | --U \n" " undefine a pre-processor macro\n" " -V[ver] create SPIR-V binary, under Vulkan semantics; turns on -l;\n" @@ -2028,7 +2030,7 @@ void usage() " -m memory leak mode\n" " -o save binary to , requires a binary option (e.g., -V)\n" " -q dump reflection query database; requires -l for linking\n" - " -r | --relaxed-errors" + " -r | --relaxed-errors\n" " relaxed GLSL semantic error-checking mode\n" " -s silence syntax and semantic error reporting\n" " -t multi-threaded mode\n" @@ -2053,10 +2055,10 @@ void usage() " --flatten-uniform-arrays | --fua flatten uniform texture/sampler arrays to\n" " scalars\n" " --glsl-version {100 | 110 | 120 | 130 | 140 | 150 |\n" - " 300es | 310es | 320es | 330\n" - " 400 | 410 | 420 | 430 | 440 | 450 | 460}\n" + " 300es | 310es | 320es | 330\n" + " 400 | 410 | 420 | 430 | 440 | 450 | 460}\n" " set GLSL version, overrides #version\n" - " in shader sourcen\n" + " in shader source\n" " --hlsl-offsets allow block offsets to follow HLSL rules\n" " works independently of source language\n" " --hlsl-iomap perform IO mapping in HLSL register space\n" diff --git a/3rdparty/glslang/glslang/Include/BaseTypes.h b/3rdparty/glslang/glslang/Include/BaseTypes.h index 83eef226d..a33ab3f15 100644 --- a/3rdparty/glslang/glslang/Include/BaseTypes.h +++ b/3rdparty/glslang/glslang/Include/BaseTypes.h @@ -344,6 +344,11 @@ enum TBuiltInVariable { EbvPositionFetch, + // SPV_QCOM_tile_shading + EbvTileOffsetQCOM, + EbvTileDimensionQCOM, + EbvTileApronSizeQCOM, + EbvLast }; diff --git a/3rdparty/glslang/glslang/Include/Types.h b/3rdparty/glslang/glslang/Include/Types.h index 1f5abf2e8..d6841bb0d 100644 --- a/3rdparty/glslang/glslang/Include/Types.h +++ b/3rdparty/glslang/glslang/Include/Types.h @@ -85,6 +85,7 @@ struct TSampler { // misnomer now; includes images, textures without sampler, bool image : 1; // image, combined should be false bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler bool sampler : 1; // true means a pure sampler, other fields should be clear() + bool tileQCOM : 1; // is tile shading attachment unsigned int vectorSize : 3; // vector return type size. // Some languages support structures as sample results. Storing the whole structure in the @@ -127,6 +128,8 @@ struct TSampler { // misnomer now; includes images, textures without sampler, bool isShadow() const { return shadow; } bool isArrayed() const { return arrayed; } + bool isTileAttachmentQCOM() const { return tileQCOM; } + void clear() { type = EbtVoid; @@ -139,6 +142,7 @@ struct TSampler { // misnomer now; includes images, textures without sampler, sampler = false; external = false; yuv = false; + tileQCOM = false; #ifdef ENABLE_HLSL clearReturnStruct(); @@ -220,7 +224,8 @@ struct TSampler { // misnomer now; includes images, textures without sampler, isCombined() == right.isCombined() && isPureSampler() == right.isPureSampler() && isExternal() == right.isExternal() && - isYuv() == right.isYuv() + isYuv() == right.isYuv() && + isTileAttachmentQCOM() == right.isTileAttachmentQCOM() #ifdef ENABLE_HLSL && getVectorSize() == right.getVectorSize() && getStructReturnIndex() == right.getStructReturnIndex() @@ -260,6 +265,8 @@ struct TSampler { // misnomer now; includes images, textures without sampler, s.append("attachmentEXT"); else if (isSubpass()) s.append("subpass"); + else if (isTileAttachmentQCOM()) + s.append("attachmentQCOM"); else s.append("image"); } else if (isCombined()) { @@ -850,6 +857,8 @@ public: layoutBufferReferenceAlign = layoutBufferReferenceAlignEnd; layoutFormat = ElfNone; + layoutTileAttachmentQCOM = false; + clearInterstageLayout(); layoutSpecConstantId = layoutSpecConstantIdEnd; @@ -951,6 +960,8 @@ public: bool layoutBindlessSampler; bool layoutBindlessImage; + bool layoutTileAttachmentQCOM; + bool hasUniformLayout() const { return hasMatrix() || @@ -1069,6 +1080,10 @@ public: { return layoutBindlessImage; } + bool isTileAttachmentQCOM() const + { + return layoutTileAttachmentQCOM; + } // GL_EXT_spirv_intrinsics bool hasSpirvDecorate() const { return spirvDecorate != nullptr; } @@ -1282,7 +1297,7 @@ public: } }; -// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. +// Qualifiers that don't need to be kept per object. They have shader scope, not object scope. // So, they will not be part of TType, TQualifier, etc. struct TShaderQualifiers { TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives @@ -1312,6 +1327,9 @@ struct TShaderQualifiers { bool layoutDerivativeGroupLinear; // true if layout derivative_group_linearNV set int primitives; // mesh shader "max_primitives"DerivativeGroupLinear; // true if layout derivative_group_linearNV set bool layoutPrimitiveCulling; // true if layout primitive_culling set + bool layoutNonCoherentTileAttachmentReadQCOM; // fragment shaders -- per object + int layoutTileShadingRateQCOM[3]; // compute shader + bool layoutTileShadingRateQCOMNotDefault[3]; // compute shader TLayoutDepth getDepth() const { return layoutDepth; } TLayoutStencil getStencil() const { return layoutStencil; } @@ -1348,6 +1366,13 @@ struct TShaderQualifiers { layoutDerivativeGroupQuads = false; layoutDerivativeGroupLinear = false; layoutPrimitiveCulling = false; + layoutNonCoherentTileAttachmentReadQCOM = false; + layoutTileShadingRateQCOM[0] = 0; + layoutTileShadingRateQCOM[1] = 0; + layoutTileShadingRateQCOM[2] = 0; + layoutTileShadingRateQCOMNotDefault[0] = false; + layoutTileShadingRateQCOMNotDefault[1] = false; + layoutTileShadingRateQCOMNotDefault[2] = false; primitives = TQualifier::layoutNotSet; interlockOrdering = EioNone; } @@ -1417,6 +1442,15 @@ struct TShaderQualifiers { interlockOrdering = src.interlockOrdering; if (src.layoutPrimitiveCulling) layoutPrimitiveCulling = src.layoutPrimitiveCulling; + if (src.layoutNonCoherentTileAttachmentReadQCOM) + layoutNonCoherentTileAttachmentReadQCOM = src.layoutNonCoherentTileAttachmentReadQCOM; + for (int i = 0; i < 3; ++i) { + if (src.layoutTileShadingRateQCOM[i] > 1) + layoutTileShadingRateQCOM[i] = src.layoutTileShadingRateQCOM[i]; + } + for (int i = 0; i < 3; ++i) { + layoutTileShadingRateQCOMNotDefault[i] = src.layoutTileShadingRateQCOMNotDefault[i] || layoutTileShadingRateQCOMNotDefault[i]; + } } }; @@ -1465,6 +1499,7 @@ public: bool coopmatNV : 1; bool coopmatKHR : 1; bool coopvecNV : 1; + bool tileAttachmentQCOM: 1; TArraySizes* arraySizes; const TType* userDef; TSourceLoc loc; @@ -1494,6 +1529,7 @@ public: coopmatNV = false; coopmatKHR = false; coopvecNV = false; + tileAttachmentQCOM = false; spirvType = nullptr; } @@ -1554,7 +1590,7 @@ public: explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, bool isVector = false) : basicType(t), vectorSize(static_cast(vs) & 0b1111), matrixCols(static_cast(mc) & 0b1111), matrixRows(static_cast(mr) & 0b1111), vector1(isVector && vs == 1), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr), + tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr), spirvType(nullptr) { assert(vs >= 0); @@ -1570,7 +1606,7 @@ public: TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, bool isVector = false) : basicType(t), vectorSize(static_cast(vs) & 0b1111), matrixCols(static_cast(mc) & 0b1111), matrixRows(static_cast(mr) & 0b1111), vector1(isVector && vs == 1), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr), + tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr), spirvType(nullptr) { assert(vs >= 0); @@ -1588,7 +1624,7 @@ public: explicit TType(const TPublicType& p) : basicType(p.basicType), vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmatNV(p.coopmatNV), coopmatKHR(p.coopmatKHR), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(p.coopvecNV), - arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters), + tileAttachmentQCOM(p.tileAttachmentQCOM), arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters), spirvType(p.spirvType) { if (basicType == EbtSampler) @@ -1645,7 +1681,7 @@ public: // for construction of sampler types TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : basicType(EbtSampler), vectorSize(1u), matrixCols(0u), matrixRows(0u), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false), - arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), + tileAttachmentQCOM(false), arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), sampler(sampler), typeParameters(nullptr), spirvType(nullptr) { qualifier.clear(); @@ -1694,13 +1730,16 @@ public: coopmatKHRUseValid = false; coopvecNV = false; typeParameters = nullptr; + } else if (isTileAttachmentQCOM()) { + tileAttachmentQCOM = false; + typeParameters = nullptr; } } } // for making structures, ... TType(TTypeList* userDef, const TString& n) : basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false), - arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr), + tileAttachmentQCOM(false), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr), spirvType(nullptr) { sampler.clear(); @@ -1710,7 +1749,7 @@ public: // For interface blocks TType(TTypeList* userDef, const TString& n, const TQualifier& q) : basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false), - qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr), + tileAttachmentQCOM(false), qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr), spirvType(nullptr) { sampler.clear(); @@ -1719,7 +1758,7 @@ public: // for block reference (first parameter must be EbtReference) explicit TType(TBasicType t, const TType &p, const TString& n) : basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr), + tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr), spirvType(nullptr) { assert(t == EbtReference); @@ -1758,6 +1797,7 @@ public: coopmatKHRuse = copyOf.coopmatKHRuse; coopmatKHRUseValid = copyOf.coopmatKHRUseValid; coopvecNV = copyOf.isCoopVecNV(); + tileAttachmentQCOM = copyOf.tileAttachmentQCOM; } // Make complete copy of the whole type graph rooted at 'copyOf'. @@ -1876,7 +1916,7 @@ public: } virtual bool isOpaque() const { return basicType == EbtSampler || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery - || basicType == EbtHitObjectNV; } + || basicType == EbtHitObjectNV || isTileAttachmentQCOM(); } virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } virtual bool isAttachmentEXT() const { return basicType == EbtSampler && getSampler().isAttachmentEXT(); } @@ -1894,6 +1934,7 @@ public: bool isCoopMatKHR() const { return coopmatKHR; } bool isCoopVecNV() const { return coopvecNV; } bool isCoopMatOrVec() const { return isCoopMat() || isCoopVecNV(); } + bool isTileAttachmentQCOM() const { return tileAttachmentQCOM; } bool isReference() const { return getBasicType() == EbtReference; } bool isSpirvType() const { return getBasicType() == EbtSpirvType; } int getCoopMatKHRuse() const { return static_cast(coopmatKHRuse); } @@ -2244,7 +2285,7 @@ public: appendStr(" layoutSecondaryViewportRelativeOffset="); appendInt(qualifier.layoutSecondaryViewportRelativeOffset); } - + if (qualifier.layoutShaderRecord) appendStr(" shaderRecordNV"); if (qualifier.layoutFullQuads) @@ -2967,6 +3008,7 @@ protected: uint32_t coopmatKHRuse : 3; // Accepts one of three values: 0, 1, 2 (gl_MatrixUseA, gl_MatrixUseB, gl_MatrixUseAccumulator) bool coopmatKHRUseValid : 1; // True if coopmatKHRuse has been set bool coopvecNV : 1; + bool tileAttachmentQCOM : 1; TQualifier qualifier; TArraySizes* arraySizes; // nullptr unless an array; can be shared across types diff --git a/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp b/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp index ad0f0ab90..862f4aba0 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp @@ -1506,7 +1506,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } - if ((profile != EEsProfile && version >= 400) || + if ((profile != EEsProfile && version >= 150) || // GL_NV_gpu_shader5 (profile == EEsProfile && version >= 310)) { // GL_OES_gpu_shader5 commonBuiltins.append( @@ -1536,7 +1536,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV } if ((profile == EEsProfile && version >= 310) || - (profile != EEsProfile && version >= 400)) { + (profile != EEsProfile && version >= 150)) { // GL_NV_gpu_shader5 commonBuiltins.append( "float frexp(highp float, out highp int);" "vec2 frexp(highp vec2, out highp ivec2);" @@ -1895,7 +1895,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV // Bitfield if ((profile == EEsProfile && version >= 310) || - (profile != EEsProfile && version >= 400)) { + (profile != EEsProfile && version >= 150)) { // ARB_gpu_shader5/NV_gpu_shader5 commonBuiltins.append( " int bitfieldExtract( int, int, int);" "ivec2 bitfieldExtract(ivec2, int, int);" @@ -1920,7 +1920,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } - if (profile != EEsProfile && version >= 400) { + if (profile != EEsProfile && version >= 150) { //GL_ARB_gpu_shader5/GL_NV_gpu_shader5 commonBuiltins.append( " int findLSB( int);" "ivec2 findLSB(ivec2);" @@ -1948,7 +1948,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } - if (profile != EEsProfile && version >= 400) { + if (profile != EEsProfile && version >= 150) { //GL_ARB_gpu_shader5/GL_NV_gpu_shader5 commonBuiltins.append( " int bitCount( int);" "ivec2 bitCount(ivec2);" @@ -1969,12 +1969,146 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "ivec2 findMSB(highp uvec2);" "ivec3 findMSB(highp uvec3);" "ivec4 findMSB(highp uvec4);" + "\n"); + } + + if (profile != EEsProfile && version >= 150 && version < 450) { //GL_NV_gpu_shader5 + commonBuiltins.append( + "int64_t packInt2x32(ivec2);" + "uint64_t packUint2x32(uvec2);" + "ivec2 unpackInt2x32(int64_t);" + "uvec2 unpackUint2x32(uint64_t);" + + "uint packFloat2x16(f16vec2);" + "f16vec2 unpackFloat2x16(uint);" + + "int64_t doubleBitsToInt64(double);" + "i64vec2 doubleBitsToInt64(dvec2);" + "i64vec3 doubleBitsToInt64(dvec3);" + "i64vec4 doubleBitsToInt64(dvec4);" + + "uint64_t doubleBitsToUint64(double);" + "u64vec2 doubleBitsToUint64(dvec2);" + "u64vec3 doubleBitsToUint64(dvec3);" + "u64vec4 doubleBitsToUint64(dvec4);" + + "double int64BitsToDouble(int64_t);" + "dvec2 int64BitsToDouble(i64vec2);" + "dvec3 int64BitsToDouble(i64vec3);" + "dvec4 int64BitsToDouble(i64vec4);" + + "double uint64BitsToDouble(uint64_t);" + "dvec2 uint64BitsToDouble(u64vec2);" + "dvec3 uint64BitsToDouble(u64vec3);" + "dvec4 uint64BitsToDouble(u64vec4);" + // Modifications to Vector Relational Functions + // Introduction of explicitly sized types + "bvec2 lessThan(i64vec2, i64vec2);" + "bvec3 lessThan(i64vec3, i64vec3);" + "bvec4 lessThan(i64vec4, i64vec4);" + "bvec2 lessThan(u64vec2, u64vec2);" + "bvec3 lessThan(u64vec3, u64vec3);" + "bvec4 lessThan(u64vec4, u64vec4);" + + "bvec2 lessThanEqual(i64vec2, i64vec2);" + "bvec3 lessThanEqual(i64vec3, i64vec3);" + "bvec4 lessThanEqual(i64vec4, i64vec4);" + "bvec2 lessThanEqual(u64vec2, u64vec2);" + "bvec3 lessThanEqual(u64vec3, u64vec3);" + "bvec4 lessThanEqual(u64vec4, u64vec4);" + + "bvec2 greaterThan(i64vec2, i64vec2);" + "bvec3 greaterThan(i64vec3, i64vec3);" + "bvec4 greaterThan(i64vec4, i64vec4);" + "bvec2 greaterThan(u64vec2, u64vec2);" + "bvec3 greaterThan(u64vec3, u64vec3);" + "bvec4 greaterThan(u64vec4, u64vec4);" + + "bvec2 greaterThanEqual(i64vec2, i64vec2);" + "bvec3 greaterThanEqual(i64vec3, i64vec3);" + "bvec4 greaterThanEqual(i64vec4, i64vec4);" + "bvec2 greaterThanEqual(u64vec2, u64vec2);" + "bvec3 greaterThanEqual(u64vec3, u64vec3);" + "bvec4 greaterThanEqual(u64vec4, u64vec4);" + + "bvec2 equal(i64vec2, i64vec2);" + "bvec3 equal(i64vec3, i64vec3);" + "bvec4 equal(i64vec4, i64vec4);" + "bvec2 equal(u64vec2, u64vec2);" + "bvec3 equal(u64vec3, u64vec3);" + "bvec4 equal(u64vec4, u64vec4);" + + "bvec2 notEqual(i64vec2, i64vec2);" + "bvec3 notEqual(i64vec3, i64vec3);" + "bvec4 notEqual(i64vec4, i64vec4);" + "bvec2 notEqual(u64vec2, u64vec2);" + "bvec3 notEqual(u64vec3, u64vec3);" + "bvec4 notEqual(u64vec4, u64vec4);" + + "bvec2 lessThan(f16vec2, f16vec2);" + "bvec3 lessThan(f16vec3, f16vec3);" + "bvec4 lessThan(f16vec4, f16vec4);" + + "bvec2 lessThanEqual(f16vec2, f16vec2);" + "bvec3 lessThanEqual(f16vec3, f16vec3);" + "bvec4 lessThanEqual(f16vec4, f16vec4);" + + "bvec2 greaterThan(f16vec2, f16vec2);" + "bvec3 greaterThan(f16vec3, f16vec3);" + "bvec4 greaterThan(f16vec4, f16vec4);" + + "bvec2 greaterThanEqual(f16vec2, f16vec2);" + "bvec3 greaterThanEqual(f16vec3, f16vec3);" + "bvec4 greaterThanEqual(f16vec4, f16vec4);" + + "bvec2 equal(f16vec2, f16vec2);" + "bvec3 equal(f16vec3, f16vec3);" + "bvec4 equal(f16vec4, f16vec4);" + + "bvec2 notEqual(f16vec2, f16vec2);" + "bvec3 notEqual(f16vec3, f16vec3);" + "bvec4 notEqual(f16vec4, f16vec4);" + + // Dependency on GL_ARB_gpu_shader_fp64 + "bvec2 lessThan(dvec2, dvec2);" + "bvec3 lessThan(dvec3, dvec3);" + "bvec4 lessThan(dvec4, dvec4);" + + "bvec2 lessThanEqual(dvec2, dvec2);" + "bvec3 lessThanEqual(dvec3, dvec3);" + "bvec4 lessThanEqual(dvec4, dvec4);" + + "bvec2 greaterThan(dvec2, dvec2);" + "bvec3 greaterThan(dvec3, dvec3);" + "bvec4 greaterThan(dvec4, dvec4);" + + "bvec2 greaterThanEqual(dvec2, dvec2);" + "bvec3 greaterThanEqual(dvec3, dvec3);" + "bvec4 greaterThanEqual(dvec4, dvec4);" + + "bvec2 equal(dvec2, dvec2);" + "bvec3 equal(dvec3, dvec3);" + "bvec4 equal(dvec4, dvec4);" + + "bvec2 notEqual(dvec2, dvec2);" + "bvec3 notEqual(dvec3, dvec3);" + "bvec4 notEqual(dvec4, dvec4);" + + "\n"); + } + + + if (profile != EEsProfile && version >= 150) { + commonBuiltins.append( + "bool anyThreadNV(bool);" + "bool allThreadsNV(bool);" + "bool allThreadsEqualNV(bool);" "\n"); } if ((profile == EEsProfile && version >= 310) || - (profile != EEsProfile && version >= 400)) { + (profile != EEsProfile && version >= 150)) { // NV_gpu_shader5 commonBuiltins.append( " uint uaddCarry(highp uint, highp uint, out lowp uint carry);" "uvec2 uaddCarry(highp uvec2, highp uvec2, out lowp uvec2 carry);" @@ -4485,8 +4619,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV // Prototypes for built-in functions seen by geometry shaders only. // //============================================================================ - - if (profile != EEsProfile && (version >= 400 || version == 150)) { + if (profile != EEsProfile && version >= 150) { stageBuiltins[EShLangGeometry].append( "void EmitStreamVertex(int);" "void EndStreamPrimitive(int);" @@ -4986,7 +5119,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV // GL_OES_shader_multisample_interpolation if ((profile == EEsProfile && version >= 310) || - (profile != EEsProfile && version >= 400)) { + (profile != EEsProfile && version >= 150)) { // NV_gpu_shader5 stageBuiltins[EShLangFragment].append( "float interpolateAtCentroid(float);" "vec2 interpolateAtCentroid(vec2);" @@ -5498,6 +5631,16 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } + // GL_QCOM_tile_shading + if ((profile == EEsProfile && version >= 310) || + (profile != EEsProfile && version >= 460)) { + stageBuiltins[EShLangCompute].append( + "in highp uvec2 gl_TileOffsetQCOM;" // GL_QCOM_tile_shading + "in highp uvec3 gl_TileDimensionQCOM;" // GL_QCOM_tile_shading + "in highp uvec2 gl_TileApronSizeQCOM;" // GL_QCOM_tile_shading + "\n"); + } + //============================================================================ // // Define the interface to the mesh/task shader. @@ -5888,7 +6031,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "out vec4 gl_ClipVertex;" ); - if (version >= 400) + if (version >= 150) stageBuiltins[EShLangGeometry].append( "in int gl_InvocationID;" ); @@ -6203,7 +6346,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV } } - if (version >= 400) + if (version >= 150) stageBuiltins[EShLangFragment].append( "flat in int gl_SampleMaskIn[];" ); @@ -6338,6 +6481,16 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "flat in highp uint gl_ViewID_OVR;" // GL_OVR_multiview, GL_OVR_multiview2 "\n"); } + + // GL_QCOM_tile_shading + if ((profile == EEsProfile && version >= 310) || + (profile != EEsProfile && version >= 460)) { + stageBuiltins[EShLangFragment].append( + "flat in highp uvec2 gl_TileOffsetQCOM;" // GL_QCOM_tile_shading + "flat in highp uvec3 gl_TileDimensionQCOM;" // GL_QCOM_tile_shading + "flat in highp uvec2 gl_TileApronSizeQCOM;" // GL_QCOM_tile_shading + "\n"); + } // GL_ARB_shader_ballot if (profile != EEsProfile && version >= 450) { @@ -8401,6 +8554,11 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.setFunctionExtensions("allInvocationsARB", 1, &E_GL_ARB_shader_group_vote); symbolTable.setFunctionExtensions("allInvocationsEqualARB", 1, &E_GL_ARB_shader_group_vote); } + if (version >= 150) { + symbolTable.setFunctionExtensions("anyThreadNV", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("allThreadsNV", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("allThreadsEqualNV", 1, &E_GL_NV_gpu_shader5); + } } @@ -8489,8 +8647,11 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion if (version == 310) symbolTable.setFunctionExtensions("textureGatherOffsets", Num_AEP_gpu_shader5, AEP_gpu_shader5); } - if (version == 310) + if (version == 310) { symbolTable.setFunctionExtensions("fma", Num_AEP_gpu_shader5, AEP_gpu_shader5); + } else if (profile != EEsProfile && version >= 150) { + symbolTable.setFunctionExtensions("fma", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + } } if (profile == EEsProfile && version < 320) { @@ -8579,6 +8740,9 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.setVariableExtensions("gl_PositionPerViewNV", 1, &E_GL_NVX_multiview_per_view_attributes); symbolTable.setVariableExtensions("gl_ViewportMaskPerViewNV", 1, &E_GL_NVX_multiview_per_view_attributes); + if (profile != EEsProfile && language == EShLangGeometry && version < 400) { + symbolTable.setVariableExtensions("gl_InvocationID", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + } BuiltInVariable("gl_ViewportMask", EbvViewportMaskNV, symbolTable); BuiltInVariable("gl_SecondaryPositionNV", EbvSecondaryPositionNV, symbolTable); BuiltInVariable("gl_SecondaryViewportMaskNV", EbvSecondaryViewportMaskNV, symbolTable); @@ -8677,6 +8841,11 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion BuiltInVariable("gl_SubGroupSizeARB", EbvSubGroupSize, symbolTable); } + // GL_ARB_gpu_shader5/GL_NV_gpu_shader5 + if (profile != EEsProfile && version < 400 && language == EShLangGeometry) { + symbolTable.setFunctionExtensions("EmitStreamVertex", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("EndStreamPrimitive", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + } // GL_KHR_shader_subgroup if ((profile == EEsProfile && version >= 310) || (profile != EEsProfile && version >= 140)) { @@ -8720,18 +8889,18 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion BuiltInVariable("gl_WarpMaxIDARM", EbvWarpMaxIDARM, symbolTable); } - if (language == EShLangGeometry || language == EShLangVertex) { - if ((profile == EEsProfile && version >= 310) || - (profile != EEsProfile && version >= 450)) { - symbolTable.setVariableExtensions("gl_PrimitiveShadingRateEXT", 1, &E_GL_EXT_fragment_shading_rate); - BuiltInVariable("gl_PrimitiveShadingRateEXT", EbvPrimitiveShadingRateKHR, symbolTable); + if (language == EShLangGeometry || language == EShLangVertex) { + if ((profile == EEsProfile && version >= 310) || + (profile != EEsProfile && version >= 450)) { + symbolTable.setVariableExtensions("gl_PrimitiveShadingRateEXT", 1, &E_GL_EXT_fragment_shading_rate); + BuiltInVariable("gl_PrimitiveShadingRateEXT", EbvPrimitiveShadingRateKHR, symbolTable); - symbolTable.setVariableExtensions("gl_ShadingRateFlag2VerticalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); - symbolTable.setVariableExtensions("gl_ShadingRateFlag4VerticalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); - symbolTable.setVariableExtensions("gl_ShadingRateFlag2HorizontalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); - symbolTable.setVariableExtensions("gl_ShadingRateFlag4HorizontalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); - } - } + symbolTable.setVariableExtensions("gl_ShadingRateFlag2VerticalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); + symbolTable.setVariableExtensions("gl_ShadingRateFlag4VerticalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); + symbolTable.setVariableExtensions("gl_ShadingRateFlag2HorizontalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); + symbolTable.setVariableExtensions("gl_ShadingRateFlag4HorizontalPixelsEXT", 1, &E_GL_EXT_fragment_shading_rate); + } + } break; case EShLangFragment: @@ -8810,6 +8979,9 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion BuiltInVariable("gl_SampleMask", EbvSampleMask, symbolTable); if (profile != EEsProfile && version < 400) { + BuiltInVariable("gl_SampleMaskIn", EbvSampleMask, symbolTable); + symbolTable.setVariableExtensions("gl_SampleMaskIn", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + BuiltInVariable("gl_NumSamples", EbvSampleMask, symbolTable); symbolTable.setVariableExtensions("gl_SampleMask", 1, &E_GL_ARB_sample_shading); @@ -8915,6 +9087,37 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.setFunctionExtensions("atomicCounter" , 1, &E_GL_ARB_shader_atomic_counters); } + // E_GL_ARB_gpu_shader5/E_GL_NV_gpu_shader5 + if (profile != EEsProfile && version < 400) { + symbolTable.setFunctionExtensions("bitfieldExtract", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("bitfieldInsert", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("bitfieldReverse", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("bitCount", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("findLSB", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("findMSB", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("uaddCarry", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("usubBorrow", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("umulExtended", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("imulExtended", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("interpolateAtCentroid", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("interpolateAtSample", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + symbolTable.setFunctionExtensions("interpolateAtOffset", Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5); + } + + // E_GL_NV_gpu_shader5 + if (profile != EEsProfile && version < 450) { + symbolTable.setFunctionExtensions("packInt2x32", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("packUint2x32", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("unpackInt2x32", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("unpackUint2x32", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("packFloat2x16", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("unpackFloat2x16", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("doubleBitsToInt64", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("doubleBitsToUint64", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("int64BitsToDouble", 1, &E_GL_NV_gpu_shader5); + symbolTable.setFunctionExtensions("uint64BitsToDouble", 1, &E_GL_NV_gpu_shader5); + } + // E_GL_ARB_shader_atomic_counter_ops if (profile != EEsProfile && version == 450) { symbolTable.setFunctionExtensions("atomicCounterAddARB" , 1, &E_GL_ARB_shader_atomic_counter_ops); @@ -9059,11 +9262,15 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion } if (profile != EEsProfile && version < 330 ) { - const char* bitsConvertExt[2] = {E_GL_ARB_shader_bit_encoding, E_GL_ARB_gpu_shader5}; - symbolTable.setFunctionExtensions("floatBitsToInt", 2, bitsConvertExt); - symbolTable.setFunctionExtensions("floatBitsToUint", 2, bitsConvertExt); - symbolTable.setFunctionExtensions("intBitsToFloat", 2, bitsConvertExt); - symbolTable.setFunctionExtensions("uintBitsToFloat", 2, bitsConvertExt); + const int numBitEncodingExts = 3; + const char* bitEncodingExts[numBitEncodingExts] = { E_GL_ARB_shader_bit_encoding, + E_GL_ARB_gpu_shader5, + E_GL_NV_gpu_shader5}; + symbolTable.setFunctionExtensions("floatBitsToInt", numBitEncodingExts, bitEncodingExts); + symbolTable.setFunctionExtensions("floatBitsToUint", numBitEncodingExts, bitEncodingExts); + symbolTable.setFunctionExtensions("intBitsToFloat", numBitEncodingExts, bitEncodingExts); + symbolTable.setFunctionExtensions("uintBitsToFloat", numBitEncodingExts, bitEncodingExts); + } if (profile != EEsProfile && version < 430 ) { @@ -9084,12 +9291,16 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion // GL_ARB_shading_language_packing if (profile != EEsProfile && version < 400 ) { - symbolTable.setFunctionExtensions("packUnorm2x16", 1, &E_GL_ARB_shading_language_packing); - symbolTable.setFunctionExtensions("unpackUnorm2x16", 1, &E_GL_ARB_shading_language_packing); - symbolTable.setFunctionExtensions("packSnorm4x8", 1, &E_GL_ARB_shading_language_packing); - symbolTable.setFunctionExtensions("packUnorm4x8", 1, &E_GL_ARB_shading_language_packing); - symbolTable.setFunctionExtensions("unpackSnorm4x8", 1, &E_GL_ARB_shading_language_packing); - symbolTable.setFunctionExtensions("unpackUnorm4x8", 1, &E_GL_ARB_shading_language_packing); + const int numPackingExts = 3; + const char* packingExts[numPackingExts] = { E_GL_ARB_shading_language_packing, + E_GL_ARB_gpu_shader5, + E_GL_NV_gpu_shader5}; + symbolTable.setFunctionExtensions("packUnorm2x16", numPackingExts, packingExts); + symbolTable.setFunctionExtensions("unpackUnorm2x16", numPackingExts, packingExts); + symbolTable.setFunctionExtensions("packSnorm4x8", numPackingExts, packingExts); + symbolTable.setFunctionExtensions("packUnorm4x8", numPackingExts, packingExts); + symbolTable.setFunctionExtensions("unpackSnorm4x8", numPackingExts, packingExts); + symbolTable.setFunctionExtensions("unpackUnorm4x8", numPackingExts, packingExts); } if (profile != EEsProfile && version < 420 ) { symbolTable.setFunctionExtensions("packSnorm2x16", 1, &E_GL_ARB_shading_language_packing); @@ -9327,6 +9538,17 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.setFunctionExtensions("textureBlockMatchGatherSSDQCOM", 1, &E_GL_QCOM_image_processing2); symbolTable.setFunctionExtensions("textureBlockMatchGatherSADQCOM", 1, &E_GL_QCOM_image_processing2); } + + if ((profile == EEsProfile && version >= 310) || + (profile != EEsProfile && version >= 460)) { + BuiltInVariable("gl_TileOffsetQCOM", EbvTileOffsetQCOM, symbolTable); + BuiltInVariable("gl_TileDimensionQCOM", EbvTileDimensionQCOM, symbolTable); + BuiltInVariable("gl_TileApronSizeQCOM", EbvTileApronSizeQCOM, symbolTable); + + symbolTable.setVariableExtensions("gl_TileOffsetQCOM", 1, &E_GL_QCOM_tile_shading); + symbolTable.setVariableExtensions("gl_TileDimensionQCOM", 1, &E_GL_QCOM_tile_shading); + symbolTable.setVariableExtensions("gl_TileApronSizeQCOM", 1, &E_GL_QCOM_tile_shading); + } break; case EShLangCompute: @@ -9532,6 +9754,17 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.setFunctionExtensions("uintBitsToBFloat16EXT", 1, &E_GL_EXT_bfloat16); } + // E_SPV_QCOM_tile_shading + if ((profile == EEsProfile && version >= 310) || + (profile != EEsProfile && version >= 460)) { + BuiltInVariable("gl_TileOffsetQCOM", EbvTileOffsetQCOM, symbolTable); + BuiltInVariable("gl_TileDimensionQCOM", EbvTileDimensionQCOM, symbolTable); + BuiltInVariable("gl_TileApronSizeQCOM", EbvTileApronSizeQCOM, symbolTable); + + symbolTable.setVariableExtensions("gl_TileOffsetQCOM", 1, &E_GL_QCOM_tile_shading); + symbolTable.setVariableExtensions("gl_TileDimensionQCOM", 1, &E_GL_QCOM_tile_shading); + symbolTable.setVariableExtensions("gl_TileApronSizeQCOM", 1, &E_GL_QCOM_tile_shading); + } break; case EShLangRayGen: @@ -10445,6 +10678,16 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.relateToOperator("allInvocations", EOpAllInvocations); symbolTable.relateToOperator("allInvocationsEqual", EOpAllInvocationsEqual); } + // As per dependency between NV_gpu_shader5 and ARB_shader_group_vote + // anyInvocationARB = anyThreadNV + // allInvocationsARB = allThreadsNV + // allInvocationsEqualARB = allThreadsEqualNV + // Thus we reuse the Op's + if (version >= 150) { + symbolTable.relateToOperator("anyThreadNV", EOpAnyInvocation); + symbolTable.relateToOperator("allThreadsNV", EOpAllInvocations); + symbolTable.relateToOperator("allThreadsEqualNV", EOpAllInvocationsEqual); + } symbolTable.relateToOperator("minInvocationsAMD", EOpMinInvocations); symbolTable.relateToOperator("maxInvocationsAMD", EOpMaxInvocations); symbolTable.relateToOperator("addInvocationsAMD", EOpAddInvocations); diff --git a/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp b/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp index d162f32c6..638f9c1e1 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp @@ -1510,12 +1510,17 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat case EbtInt16: case EbtUint16: return (version >= 400 || numericFeatures.contains(TNumericFeatures::gpu_shader_fp64)) && - numericFeatures.contains(TNumericFeatures::gpu_shader_int16); + (numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_int16)); case EbtFloat16: return (version >= 400 || numericFeatures.contains(TNumericFeatures::gpu_shader_fp64)) && - numericFeatures.contains(TNumericFeatures::gpu_shader_half_float); + (numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_half_float)); case EbtBFloat16: return true; + case EbtInt8: + case EbtUint8: + return numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); default: return false; } @@ -1528,24 +1533,35 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat return getSource() == EShSourceHlsl; case EbtInt16: case EbtUint16: - return numericFeatures.contains(TNumericFeatures::gpu_shader_int16); + return numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); case EbtFloat16: return numericFeatures.contains(TNumericFeatures::gpu_shader_half_float) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || getSource() == EShSourceHlsl; case EbtBFloat16: return true; + case EbtInt8: + case EbtUint8: + return numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); default: return false; } case EbtUint: switch (from) { case EbtInt: - return version >= 400 || getSource() == EShSourceHlsl || IsRequestedExtension(E_GL_ARB_gpu_shader5); + return version >= 400 || getSource() == EShSourceHlsl || + IsRequestedExtension(E_GL_ARB_gpu_shader5) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); case EbtBool: return getSource() == EShSourceHlsl; case EbtInt16: case EbtUint16: - return numericFeatures.contains(TNumericFeatures::gpu_shader_int16); + return numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); + case EbtInt8: + case EbtUint8: + return numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); default: return false; } @@ -1554,7 +1570,10 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat case EbtBool: return getSource() == EShSourceHlsl; case EbtInt16: - return numericFeatures.contains(TNumericFeatures::gpu_shader_int16); + return numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); + case EbtInt8: + return numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); default: return false; } @@ -1566,7 +1585,11 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat return true; case EbtInt16: case EbtUint16: - return numericFeatures.contains(TNumericFeatures::gpu_shader_int16); + return numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); + case EbtInt8: + case EbtUint8: + return numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); default: return false; } @@ -1574,8 +1597,11 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat switch (from) { case EbtInt: return true; + case EbtInt8: + return numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); case EbtInt16: - return numericFeatures.contains(TNumericFeatures::gpu_shader_int16); + return numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types); default: return false; } diff --git a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp index ee3bad930..8ea6e2e3b 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp @@ -645,9 +645,13 @@ TIntermTyped* TParseContext::handleBracketDereference(const TSourceLoc& loc, TIn if (base->getBasicType() == EbtBlock) { if (base->getQualifier().storage == EvqBuffer) requireProfile(base->getLoc(), ~EEsProfile, "variable indexing buffer block array"); - else if (base->getQualifier().storage == EvqUniform) + else if (base->getQualifier().storage == EvqUniform) { profileRequires(base->getLoc(), EEsProfile, 320, Num_AEP_gpu_shader5, AEP_gpu_shader5, "variable indexing uniform block array"); + profileRequires(base->getLoc(), ECoreProfile, 400, Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, + "variable indexing uniform block array"); + + } else { // input/output blocks either don't exist or can't be variably indexed } @@ -657,7 +661,8 @@ TIntermTyped* TParseContext::handleBracketDereference(const TSourceLoc& loc, TIn const char* explanation = "variable indexing sampler array"; requireProfile(base->getLoc(), EEsProfile | ECoreProfile | ECompatibilityProfile, explanation); profileRequires(base->getLoc(), EEsProfile, 320, Num_AEP_gpu_shader5, AEP_gpu_shader5, explanation); - profileRequires(base->getLoc(), ECoreProfile | ECompatibilityProfile, 400, nullptr, explanation); + profileRequires(base->getLoc(), ECoreProfile | ECompatibilityProfile, 400, Num_AEP_core_gpu_shader5, + AEP_core_gpu_shader5, explanation); } result = intermediate.addIndex(EOpIndexIndirect, base, index, loc); @@ -2389,23 +2394,27 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan feature = featureString.c_str(); profileRequires(loc, EEsProfile, 310, nullptr, feature); int compArg = -1; // track which argument, if any, is the constant component argument + const int numTexGatherExts = 3; + const char* texGatherExts[numTexGatherExts] = { E_GL_ARB_texture_gather, + E_GL_ARB_gpu_shader5, + E_GL_NV_gpu_shader5}; switch (callNode.getOp()) { case EOpTextureGather: // More than two arguments needs gpu_shader5, and rectangular or shadow needs gpu_shader5, // otherwise, need GL_ARB_texture_gather. if (fnCandidate.getParamCount() > 2 || fnCandidate[0].type->getSampler().dim == EsdRect || fnCandidate[0].type->getSampler().shadow) { - profileRequires(loc, ~EEsProfile, 400, E_GL_ARB_gpu_shader5, feature); + profileRequires(loc, ~EEsProfile, 400, Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, feature); if (! fnCandidate[0].type->getSampler().shadow) compArg = 2; } else - profileRequires(loc, ~EEsProfile, 400, E_GL_ARB_texture_gather, feature); + profileRequires(loc, ~EEsProfile, 400, numTexGatherExts, texGatherExts, feature); break; case EOpTextureGatherOffset: // GL_ARB_texture_gather is good enough for 2D non-shadow textures with no component argument if (fnCandidate[0].type->getSampler().dim == Esd2D && ! fnCandidate[0].type->getSampler().shadow && fnCandidate.getParamCount() == 3) - profileRequires(loc, ~EEsProfile, 400, E_GL_ARB_texture_gather, feature); + profileRequires(loc, ~EEsProfile, 400, numTexGatherExts, texGatherExts, feature); else - profileRequires(loc, ~EEsProfile, 400, E_GL_ARB_gpu_shader5, feature); + profileRequires(loc, ~EEsProfile, 400, Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, feature); if (! (*argp)[fnCandidate[0].type->getSampler().shadow ? 3 : 2]->getAsConstantUnion()) profileRequires(loc, EEsProfile, 320, Num_AEP_gpu_shader5, AEP_gpu_shader5, "non-constant offset argument"); @@ -2413,11 +2422,13 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan compArg = 3; break; case EOpTextureGatherOffsets: - profileRequires(loc, ~EEsProfile, 400, E_GL_ARB_gpu_shader5, feature); + profileRequires(loc, ~EEsProfile, 400, Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, feature); if (! fnCandidate[0].type->getSampler().shadow) compArg = 3; // check for constant offsets - if (! (*argp)[fnCandidate[0].type->getSampler().shadow ? 3 : 2]->getAsConstantUnion()) + if (! (*argp)[fnCandidate[0].type->getSampler().shadow ? 3 : 2]->getAsConstantUnion() + // NV_gpu_shader5 relaxes this limitation and allows for non-constant offsets + && !extensionTurnedOn(E_GL_NV_gpu_shader5)) error(loc, "must be a compile-time constant:", feature, "offsets argument"); break; default: @@ -2595,8 +2606,15 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan arg0->getType().getSampler().shadow; if (f16ShadowCompare) ++arg; + // Allow non-constant offsets for certain texture ops + bool variableOffsetSupport = extensionTurnedOn(E_GL_NV_gpu_shader5) && + (callNode.getOp() == EOpTextureOffset || + callNode.getOp() == EOpTextureFetchOffset || + callNode.getOp() == EOpTextureProjOffset || + callNode.getOp() == EOpTextureLodOffset || + callNode.getOp() == EOpTextureProjLodOffset); if (! (*argp)[arg]->getAsTyped()->getQualifier().isConstant()) { - if (!extensionTurnedOn(E_GL_EXT_texture_offset_non_const)) + if (!extensionTurnedOn(E_GL_EXT_texture_offset_non_const) && !variableOffsetSupport) error(loc, "argument must be compile-time constant", "texel offset", ""); } else if ((*argp)[arg]->getAsConstantUnion()) { @@ -2984,7 +3002,7 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan case EOpEmitStreamVertex: case EOpEndStreamPrimitive: if (version == 150) - requireExtensions(loc, 1, &E_GL_ARB_gpu_shader5, "if the version is 150 , the EmitStreamVertex and EndStreamPrimitive only support at extension GL_ARB_gpu_shader5"); + requireExtensions(loc, Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, "if the verison is 150 , the EmitStreamVertex and EndStreamPrimitive only support at extension GL_ARB_gpu_shader5/GL_NV_gpu_shader5"); intermediate.setMultiStream(); break; @@ -3047,6 +3065,28 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan } } + break; + case EOpLessThan: + case EOpLessThanEqual: + case EOpGreaterThan: + case EOpGreaterThanEqual: + case EOpEqual: + case EOpNotEqual: + if (profile != EEsProfile && version >= 150 && version < 450) { + if ((*argp)[1]->getAsTyped()->getBasicType() == EbtInt64 || + (*argp)[1]->getAsTyped()->getBasicType() == EbtUint64) + requireExtensions(loc, 1, &E_GL_NV_gpu_shader5, fnCandidate.getName().c_str()); + } + break; + case EOpFma: + case EOpFrexp: + case EOpLdexp: + if (profile != EEsProfile && version < 400) { + if ((*argp)[0]->getAsTyped()->getBasicType() == EbtFloat) { + requireExtensions(loc, Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, fnCandidate.getName().c_str()); + } + } + break; case EOpCooperativeVectorMatMulNV: case EOpCooperativeVectorMatMulAddNV: @@ -3193,7 +3233,7 @@ void TParseContext::nonOpBuiltInCheck(const TSourceLoc& loc, const TFunction& fn compArg = 3; // check for constant offsets int offsetArg = fnCandidate[0].type->getSampler().shadow ? 3 : 2; - if (! callNode.getSequence()[offsetArg]->getAsConstantUnion()) + if (! callNode.getSequence()[offsetArg]->getAsConstantUnion() && !extensionTurnedOn(E_GL_NV_gpu_shader5)) error(loc, "must be a compile-time constant:", feature, "offsets argument"); } else if (fnCandidate.getName().compare("textureGather") == 0) { // More than two arguments needs gpu_shader5, and rectangular or shadow needs gpu_shader5, @@ -4123,10 +4163,16 @@ bool TParseContext::constructorTextureSamplerError(const TSourceLoc& loc, const error(loc, "sampler-constructor first argument must be a scalar *texture* type", token, ""); return true; } + // simulate the first argument's impact on the result type, so it can be compared with the encapsulated operator!=() TSampler texture = function.getType().getSampler(); texture.setCombined(false); texture.shadow = false; + if (function[0].type->getSampler().isTileAttachmentQCOM()) { + //TSampler& texture = const_cast(function).getWritableType().getSampler(); + texture.image = true; + texture.tileQCOM = true; + } if (texture != function[0].type->getSampler()) { error(loc, "sampler-constructor first argument must be a *texture* type" " matching the dimensionality and sampled type of the constructor", token, ""); @@ -4218,7 +4264,7 @@ void TParseContext::samplerCheck(const TSourceLoc& loc, const TType& type, const // if (! initializer) if (type.getSampler().isAttachmentEXT() && type.getQualifier().storage != EvqTileImageEXT) error(loc, "can only be used in tileImageEXT variables or function parameters:", type.getBasicTypeString().c_str(), identifier.c_str()); - else if (type.getQualifier().storage != EvqTileImageEXT) + else if (type.getQualifier().storage != EvqTileImageEXT) error(loc, "sampler/image types can only be used in uniform variables or function parameters:", type.getBasicTypeString().c_str(), identifier.c_str()); } } @@ -4289,6 +4335,10 @@ void TParseContext::memberQualifierCheck(glslang::TPublicType& publicType) error(publicType.loc, "not allowed on block or structure members", "nonuniformEXT", ""); publicType.qualifier.nonUniform = false; } + if (publicType.qualifier.isPatch()) { + error(publicType.loc, "not allowed on block or structure members", + "patch", ""); + } } // @@ -4429,6 +4479,12 @@ void TParseContext::globalQualifierTypeCheck(const TSourceLoc& loc, const TQuali if (qualifier.isPatch() && qualifier.isInterpolation()) error(loc, "cannot use interpolation qualifiers with patch", "patch", ""); + // Only "patch in" is supported via GL_NV_gpu_shader5 + if (! symbolTable.atBuiltInLevel() && qualifier.isPatch() && + (language == EShLangGeometry) && qualifier.storage != EvqVaryingIn && + extensionTurnedOn(E_GL_NV_gpu_shader5)) + error(loc, "only 'patch in' is supported in this stage:", "patch", "geometry"); + if (qualifier.isTaskPayload() && publicType.basicType == EbtBlock) error(loc, "taskPayloadSharedEXT variables should not be declared as interface blocks", "taskPayloadSharedEXT", ""); @@ -4446,8 +4502,11 @@ void TParseContext::globalQualifierTypeCheck(const TSourceLoc& loc, const TQuali requireProfile(loc, ~EEsProfile, "vertex input arrays"); profileRequires(loc, ENoProfile, 150, nullptr, "vertex input arrays"); } - if (publicType.basicType == EbtDouble) - profileRequires(loc, ~EEsProfile, 410, E_GL_ARB_vertex_attrib_64bit, "vertex-shader `double` type input"); + if (publicType.basicType == EbtDouble) { + const char* const float64_attrib[] = {E_GL_NV_gpu_shader5, E_GL_ARB_vertex_attrib_64bit}; + const int Num_float64_attrib = sizeof(float64_attrib) / sizeof(float64_attrib[0]); + profileRequires(loc, ~EEsProfile, 410, Num_float64_attrib, float64_attrib, "vertex-shader `double` type input"); + } if (qualifier.isAuxiliary() || qualifier.isInterpolation() || qualifier.isMemory() || qualifier.invariant) error(loc, "vertex input cannot be further qualified", "", ""); break; @@ -6258,6 +6317,16 @@ void TParseContext::setLayoutQualifier(const TSourceLoc& loc, TPublicType& publi publicType.qualifier.layoutFullQuads = true; return; } + if (id == "non_coherent_attachment_readqcom") { + requireExtensions(loc, 1, &E_GL_QCOM_tile_shading, "tile shading QCOM"); + publicType.shaderQualifiers.layoutNonCoherentTileAttachmentReadQCOM = true; + return; + } + if (id == "tile_attachmentqcom") { + requireExtensions(loc, 1, &E_GL_QCOM_tile_shading, "tile shading QCOM"); + publicType.qualifier.layoutTileAttachmentQCOM = true; + return; + } } if (language == EShLangVertex || language == EShLangTessControl || @@ -6299,6 +6368,11 @@ void TParseContext::setLayoutQualifier(const TSourceLoc& loc, TPublicType& publi return; } } + if (id == "tile_attachmentqcom") { + requireExtensions(loc, 1, &E_GL_QCOM_tile_shading, "tile shading QCOM"); + publicType.qualifier.layoutTileAttachmentQCOM = true; + return; + } } if (id == "primitive_culling") { @@ -6540,7 +6614,9 @@ void TParseContext::setLayoutQualifier(const TSourceLoc& loc, TPublicType& publi case EShLangGeometry: if (id == "invocations") { - profileRequires(loc, ECompatibilityProfile | ECoreProfile, 400, nullptr, "invocations"); + profileRequires(loc, ECompatibilityProfile | ECoreProfile, 400, + Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5, "invocations"); + if (value == 0) error(loc, "must be at least 1", "invocations", ""); else @@ -6666,6 +6742,38 @@ void TParseContext::setLayoutQualifier(const TSourceLoc& loc, TPublicType& publi } } } + if (id.compare(0, 18, "shading_rate_xqcom") == 0 || + id.compare(0, 18, "shading_rate_yqcom") == 0 || + id.compare(0, 18, "shading_rate_zqcom") == 0) { + requireExtensions(loc, 1, &E_GL_QCOM_tile_shading, "tile shading QCOM"); + if (nonLiteral) + error(loc, "needs a literal integer", "shading_rate_*QCOM", ""); + if (id.size() == 18 && value == 0) { + error(loc, "must be at least 1", id.c_str(), ""); + return; + } + if (id == "shading_rate_xqcom") { + publicType.shaderQualifiers.layoutTileShadingRateQCOM[0] = value; + publicType.shaderQualifiers.layoutTileShadingRateQCOMNotDefault[0] = true; + if (! IsPow2(value)) + error(loc, "must be a power of 2", id.c_str(), ""); + return; + } + if (id == "shading_rate_yqcom") { + publicType.shaderQualifiers.layoutTileShadingRateQCOM[1] = value; + publicType.shaderQualifiers.layoutTileShadingRateQCOMNotDefault[1] = true; + if (! IsPow2(value)) + error(loc, "must be a power of 2", id.c_str(), ""); + return; + } + if (id == "shading_rate_zqcom") { + publicType.shaderQualifiers.layoutTileShadingRateQCOM[2] = value; + publicType.shaderQualifiers.layoutTileShadingRateQCOMNotDefault[2] = true; + if (value <= 0) + error(loc, "must be a positive value", id.c_str(), ""); + return; + } + } break; default: @@ -6759,6 +6867,7 @@ void TParseContext::mergeObjectLayoutQualifiers(TQualifier& dst, const TQualifie dst.pervertexEXT = true; if (src.layoutHitObjectShaderRecordNV) dst.layoutHitObjectShaderRecordNV = true; + dst.layoutTileAttachmentQCOM |= src.layoutTileAttachmentQCOM; } } @@ -7203,8 +7312,8 @@ void TParseContext::layoutQualifierCheck(const TSourceLoc& loc, const TQualifier } if (qualifier.hasBinding()) { - if (! qualifier.isUniformOrBuffer() && !qualifier.isTaskMemory()) - error(loc, "requires uniform or buffer storage qualifier", "binding", ""); + if (! qualifier.isUniformOrBuffer() && !qualifier.isTaskMemory() && !qualifier.isTileAttachmentQCOM()) + error(loc, "requires uniform or buffer or tile image storage qualifier", "binding", ""); } if (qualifier.hasStream()) { if (!qualifier.isPipeOutput()) @@ -7309,6 +7418,15 @@ void TParseContext::checkNoShaderLayouts(const TSourceLoc& loc, const TShaderQua error(loc, message, TQualifier::getInterlockOrderingString(shaderQualifiers.interlockOrdering), ""); if (shaderQualifiers.layoutPrimitiveCulling) error(loc, "can only be applied as standalone", "primitive_culling", ""); + + if (shaderQualifiers.layoutNonCoherentTileAttachmentReadQCOM) + error(loc, message, "non_coherent_attachment_readQCOM", ""); + if (shaderQualifiers.layoutTileShadingRateQCOM[0] >= 1) + error(loc, message, "shading_rate_xQCOM", ""); + if (shaderQualifiers.layoutTileShadingRateQCOM[1] >= 1) + error(loc, message, "shading_rate_yQCOM", ""); + if (shaderQualifiers.layoutTileShadingRateQCOM[2] >= 1) + error(loc, message, "shading_rate_zQCOM", ""); } // Correct and/or advance an object's offset layout qualifier. @@ -7398,7 +7516,9 @@ const TFunction* TParseContext::findFunction(const TSourceLoc& loc, const TFunct else if (version < 120) function = findFunctionExact(loc, call, builtIn); else if (version < 400) { - bool needfindFunction400 = extensionTurnedOn(E_GL_ARB_gpu_shader_fp64) || extensionTurnedOn(E_GL_ARB_gpu_shader5); + bool needfindFunction400 = extensionTurnedOn(E_GL_ARB_gpu_shader_fp64) + || extensionTurnedOn(E_GL_ARB_gpu_shader5) + || extensionTurnedOn(E_GL_NV_gpu_shader5); function = needfindFunction400 ? findFunction400(loc, call, builtIn) : findFunction120(loc, call, builtIn); } else if (explicitTypesEnabled) @@ -7581,13 +7701,35 @@ const TFunction* TParseContext::findFunction400(const TSourceLoc& loc, const TFu // Is 'to2' a better conversion than 'to1'? // Ties should not be considered as better. // Assumes 'convertible' already said true. - const auto better = [](const TType& from, const TType& to1, const TType& to2) -> bool { + const auto better = [&](const TType& from, const TType& to1, const TType& to2) -> bool { // 1. exact match if (from == to2) return from != to1; if (from == to1) return false; - + if (extensionTurnedOn(E_GL_NV_gpu_shader5)) { + // This map refers to the conversion table mentioned under the + // section "Modify Section 6.1, Function Definitions, p. 63" in NV_gpu_shader5 spec + const static std::map> conversionTable = { + {EbtInt8, {EbtInt, EbtInt64}}, + {EbtInt16, {EbtInt, EbtInt64}}, + {EbtInt, {EbtInt64}}, + {EbtUint8, {EbtUint, EbtUint64}}, + {EbtUint16, {EbtUint, EbtUint64}}, + {EbtUint, {EbtUint64}}, + }; + auto source = conversionTable.find(from.getBasicType()); + if (source != conversionTable.end()) { + for (auto destination : source->second) { + if (to2.getBasicType() == destination && + to1.getBasicType() != destination) // to2 is better then to1 + return true; + else if (to1.getBasicType() == destination && + to2.getBasicType() != destination) // This means to1 is better then to2 + return false; + } + } + } // 2. float -> double is better if (from.getBasicType() == EbtFloat) { if (to2.getBasicType() == EbtDouble && to1.getBasicType() != EbtDouble) @@ -10274,6 +10416,12 @@ void TParseContext::updateStandaloneQualifierDefaults(const TSourceLoc& loc, con else error(loc, "can only apply to 'in'", "non_coherent_stencil_attachment_readEXT", ""); } + if (publicType.shaderQualifiers.layoutNonCoherentTileAttachmentReadQCOM) { + if (publicType.qualifier.storage == EvqVaryingIn) + intermediate.setNonCoherentTileAttachmentReadQCOM(); + else + error(loc, "can only apply to 'in'", "non_coherent_attachment_readQCOM", ""); + } if (publicType.shaderQualifiers.hasBlendEquation()) { if (publicType.qualifier.storage != EvqVaryingOut) error(loc, "can only apply to 'out'", "blend equation", ""); @@ -10337,6 +10485,16 @@ void TParseContext::updateStandaloneQualifierDefaults(const TSourceLoc& loc, con return; } + for (int i = 0; i < 3; ++i) { + if (publicType.shaderQualifiers.layoutTileShadingRateQCOMNotDefault[i]) { + if (publicType.qualifier.storage == EvqVaryingIn) { + if (! intermediate.setTileShadingRateQCOM(i, publicType.shaderQualifiers.layoutTileShadingRateQCOM[i])) + error(loc, "cannot change previously set size", (i==0?"shading_rate_xQCOM":(i==1?"shading_rate_yQCOM":"shading_rate_zQCOM")), ""); + } else + error(loc, "can only apply to 'in'", (i==0?"shading_rate_xQCOM":(i==1?"shading_rate_yQCOM":"shading_rate_zQCOM")), ""); + } + } + const TQualifier& qualifier = publicType.qualifier; if (qualifier.isAuxiliary() || diff --git a/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp b/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp index d5778d72c..4705d9390 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp @@ -1131,12 +1131,16 @@ int TScanContext::tokenizeIdentifier() return es30ReservedFromGLSL(400); - case SAMPLE: + case SAMPLE: + { + const int numLayoutExts = 3; + const char* layoutExts[numLayoutExts] = {E_GL_OES_shader_multisample_interpolation, E_GL_ARB_gpu_shader5, + E_GL_NV_gpu_shader5}; if ((parseContext.isEsProfile() && parseContext.version >= 320) || - parseContext.extensionsTurnedOn(1, &E_GL_OES_shader_multisample_interpolation)) + parseContext.extensionsTurnedOn(numLayoutExts, layoutExts)) return keyword; return es30ReservedFromGLSL(400); - + } case SUBROUTINE: return es30ReservedFromGLSL(400); @@ -1328,6 +1332,7 @@ int TScanContext::tokenizeIdentifier() if (parseContext.symbolTable.atBuiltInLevel() || parseContext.extensionTurnedOn(E_GL_ARB_gpu_shader_int64) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || + parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_int64)) return keyword; return identifierOrType(); @@ -1344,6 +1349,7 @@ int TScanContext::tokenizeIdentifier() if (parseContext.symbolTable.atBuiltInLevel() || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || parseContext.extensionTurnedOn(E_GL_EXT_shader_8bit_storage) || + parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_int8)) return keyword; return identifierOrType(); @@ -1361,6 +1367,7 @@ int TScanContext::tokenizeIdentifier() parseContext.extensionTurnedOn(E_GL_AMD_gpu_shader_int16) || parseContext.extensionTurnedOn(E_GL_EXT_shader_16bit_storage) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || + parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_int16)) return keyword; return identifierOrType(); @@ -1375,6 +1382,7 @@ int TScanContext::tokenizeIdentifier() afterType = true; if (parseContext.symbolTable.atBuiltInLevel() || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || + parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_int32)) return keyword; return identifierOrType(); @@ -1382,6 +1390,13 @@ int TScanContext::tokenizeIdentifier() case F32VEC2: case F32VEC3: case F32VEC4: + afterType = true; + if (parseContext.symbolTable.atBuiltInLevel() || + parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || + parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) || + parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_float32)) + return keyword; + return identifierOrType(); case F32MAT2: case F32MAT3: case F32MAT4: @@ -1405,6 +1420,14 @@ int TScanContext::tokenizeIdentifier() case F64VEC2: case F64VEC3: case F64VEC4: + afterType = true; + if (parseContext.symbolTable.atBuiltInLevel() || + parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || + (parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) && + parseContext.extensionTurnedOn(E_GL_ARB_gpu_shader_fp64)) || + parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_float64)) + return keyword; + return identifierOrType(); case F64MAT2: case F64MAT3: case F64MAT4: @@ -1433,6 +1456,7 @@ int TScanContext::tokenizeIdentifier() parseContext.extensionTurnedOn(E_GL_AMD_gpu_shader_half_float) || parseContext.extensionTurnedOn(E_GL_EXT_shader_16bit_storage) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) || + parseContext.extensionTurnedOn(E_GL_NV_gpu_shader5) || parseContext.extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_float16)) return keyword; @@ -1478,7 +1502,9 @@ int TScanContext::tokenizeIdentifier() if ((parseContext.isEsProfile() && parseContext.version >= 320) || parseContext.extensionsTurnedOn(Num_AEP_texture_cube_map_array, AEP_texture_cube_map_array)) return keyword; - if (parseContext.isEsProfile() || (parseContext.version < 400 && ! parseContext.extensionTurnedOn(E_GL_ARB_texture_cube_map_array))) + if (parseContext.isEsProfile() || (parseContext.version < 400 && + ! parseContext.extensionTurnedOn(E_GL_ARB_texture_cube_map_array) + && ! parseContext.extensionsTurnedOn(Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5))) reservedWord(); return keyword; @@ -1759,7 +1785,9 @@ int TScanContext::tokenizeIdentifier() case PRECISE: if ((parseContext.isEsProfile() && (parseContext.version >= 320 || parseContext.extensionsTurnedOn(Num_AEP_gpu_shader5, AEP_gpu_shader5))) || - (!parseContext.isEsProfile() && parseContext.version >= 400)) + (!parseContext.isEsProfile() && + (parseContext.version >= 400 + || parseContext.extensionsTurnedOn(Num_AEP_core_gpu_shader5, AEP_core_gpu_shader5)))) return keyword; if (parseContext.isEsProfile() && parseContext.version == 310) { reservedWord(); diff --git a/3rdparty/glslang/glslang/MachineIndependent/ShaderLang.cpp b/3rdparty/glslang/glslang/MachineIndependent/ShaderLang.cpp index 3cb6687fa..62d72535b 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/ShaderLang.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/ShaderLang.cpp @@ -2200,6 +2200,7 @@ bool TProgram::buildReflection(int opts) } unsigned TProgram::getLocalSize(int dim) const { return reflection->getLocalSize(dim); } +unsigned TProgram::getTileShadingRateQCOM(int dim) const { return reflection->getTileShadingRateQCOM(dim); } int TProgram::getReflectionIndex(const char* name) const { return reflection->getIndex(name); } int TProgram::getReflectionPipeIOIndex(const char* name, const bool inOrOut) const { return reflection->getPipeIOIndex(name, inOrOut); } diff --git a/3rdparty/glslang/glslang/MachineIndependent/Versions.cpp b/3rdparty/glslang/glslang/MachineIndependent/Versions.cpp index e20d8083b..63ad51a64 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/Versions.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/Versions.cpp @@ -188,7 +188,7 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_3DL_array_objects] = EBhDisable; extensionBehavior[E_GL_ARB_shading_language_420pack] = EBhDisable; extensionBehavior[E_GL_ARB_texture_gather] = EBhDisable; - extensionBehavior[E_GL_ARB_gpu_shader5] = EBhDisablePartial; + extensionBehavior[E_GL_ARB_gpu_shader5] = EBhDisable; extensionBehavior[E_GL_ARB_separate_shader_objects] = EBhDisable; extensionBehavior[E_GL_ARB_compute_shader] = EBhDisable; extensionBehavior[E_GL_ARB_tessellation_shader] = EBhDisable; @@ -225,6 +225,7 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_ARB_shading_language_packing] = EBhDisable; extensionBehavior[E_GL_ARB_texture_query_lod] = EBhDisable; extensionBehavior[E_GL_ARB_vertex_attrib_64bit] = EBhDisable; + extensionBehavior[E_GL_NV_gpu_shader5] = EBhDisable; extensionBehavior[E_GL_ARB_draw_instanced] = EBhDisable; extensionBehavior[E_GL_ARB_bindless_texture] = EBhDisable; extensionBehavior[E_GL_ARB_fragment_coord_conventions] = EBhDisable; @@ -322,6 +323,7 @@ void TParseVersions::initializeExtensionBehavior() // QCOM extensionBehavior[E_GL_QCOM_image_processing] = EBhDisable; extensionBehavior[E_GL_QCOM_image_processing2] = EBhDisable; + extensionBehavior[E_GL_QCOM_tile_shading] = EBhDisable; // AEP extensionBehavior[E_GL_ANDROID_extension_pack_es31a] = EBhDisable; @@ -459,6 +461,7 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_QCOM_image_processing 1\n" "#define GL_QCOM_image_processing2 1\n" + "#define GL_QCOM_tile_shading 1\n" ; if (version >= 300) { @@ -509,6 +512,7 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_ARB_shader_storage_buffer_object 1\n" "#define GL_ARB_texture_query_lod 1\n" "#define GL_ARB_vertex_attrib_64bit 1\n" + "#define GL_NV_gpu_shader5 1\n" "#define GL_ARB_draw_instanced 1\n" "#define GL_ARB_fragment_coord_conventions 1\n" @@ -588,6 +592,7 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_QCOM_image_processing 1\n" "#define GL_QCOM_image_processing2 1\n" + "#define GL_QCOM_tile_shading 1\n" "#define GL_EXT_shader_explicit_arithmetic_types 1\n" "#define GL_EXT_shader_explicit_arithmetic_types_int8 1\n" @@ -1077,6 +1082,9 @@ void TParseVersions::updateExtensionBehavior(int line, const char* extension, co intermediate.updateNumericFeature(TNumericFeatures::gpu_shader_int16, on); else if (strcmp(extension, "GL_AMD_gpu_shader_half_float") == 0) intermediate.updateNumericFeature(TNumericFeatures::gpu_shader_half_float, on); + else if (strcmp(extension, "GL_NV_gpu_shader5") == 0) { + intermediate.updateNumericFeature(TNumericFeatures::nv_gpu_shader5_types, on); + } } void TParseVersions::updateExtensionBehavior(const char* extension, TExtensionBehavior behavior) @@ -1204,6 +1212,7 @@ bool TParseVersions::float16Arithmetic() const char* const extensions[] = { E_GL_AMD_gpu_shader_half_float, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_float16}; return extensionsTurnedOn(sizeof(extensions)/sizeof(extensions[0]), extensions); } @@ -1213,6 +1222,7 @@ bool TParseVersions::int16Arithmetic() const char* const extensions[] = { E_GL_AMD_gpu_shader_int16, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int16}; return extensionsTurnedOn(sizeof(extensions)/sizeof(extensions[0]), extensions); } @@ -1221,6 +1231,7 @@ bool TParseVersions::int8Arithmetic() { const char* const extensions[] = { E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int8}; return extensionsTurnedOn(sizeof(extensions)/sizeof(extensions[0]), extensions); } @@ -1235,6 +1246,7 @@ void TParseVersions::requireFloat16Arithmetic(const TSourceLoc& loc, const char* const char* const extensions[] = { E_GL_AMD_gpu_shader_half_float, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_float16}; requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, combined.c_str()); } @@ -1249,6 +1261,7 @@ void TParseVersions::requireInt16Arithmetic(const TSourceLoc& loc, const char* o const char* const extensions[] = { E_GL_AMD_gpu_shader_int16, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int16}; requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, combined.c_str()); } @@ -1262,6 +1275,7 @@ void TParseVersions::requireInt8Arithmetic(const TSourceLoc& loc, const char* op const char* const extensions[] = { E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int8}; requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, combined.c_str()); } @@ -1273,6 +1287,7 @@ void TParseVersions::float16ScalarVectorCheck(const TSourceLoc& loc, const char* E_GL_AMD_gpu_shader_half_float, E_GL_EXT_shader_16bit_storage, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_float16}; requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op); } @@ -1292,9 +1307,10 @@ void TParseVersions::bfloat16ScalarVectorCheck(const TSourceLoc& loc, const char void TParseVersions::explicitFloat32Check(const TSourceLoc& loc, const char* op, bool builtIn) { if (!builtIn) { - const char* const extensions[2] = {E_GL_EXT_shader_explicit_arithmetic_types, + const char* const extensions[] = {E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_float32}; - requireExtensions(loc, 2, extensions, op); + requireExtensions(loc, sizeof(extensions) / sizeof(extensions[0]), extensions, op); } } @@ -1302,11 +1318,15 @@ void TParseVersions::explicitFloat32Check(const TSourceLoc& loc, const char* op, void TParseVersions::explicitFloat64Check(const TSourceLoc& loc, const char* op, bool builtIn) { if (!builtIn) { - const char* const extensions[2] = {E_GL_EXT_shader_explicit_arithmetic_types, + const char* const extensions[] = {E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_float64}; - requireExtensions(loc, 2, extensions, op); + requireExtensions(loc, sizeof(extensions) / sizeof(extensions[0]), extensions, op); requireProfile(loc, ECoreProfile | ECompatibilityProfile, op); - profileRequires(loc, ECoreProfile | ECompatibilityProfile, 400, nullptr, op); + if(extensionTurnedOn(E_GL_ARB_gpu_shader_fp64) && extensionTurnedOn(E_GL_NV_gpu_shader5)) + profileRequires(loc, ECoreProfile | ECompatibilityProfile, 150, nullptr, op); + else + profileRequires(loc, ECoreProfile | ECompatibilityProfile, 400, nullptr, op); } } @@ -1349,6 +1369,7 @@ void TParseVersions::int16ScalarVectorCheck(const TSourceLoc& loc, const char* o E_GL_AMD_gpu_shader_int16, E_GL_EXT_shader_16bit_storage, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int16}; requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op); } @@ -1360,6 +1381,7 @@ void TParseVersions::int8ScalarVectorCheck(const TSourceLoc& loc, const char* op const char* const extensions[] = { E_GL_EXT_shader_8bit_storage, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int8}; requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op); } @@ -1369,9 +1391,10 @@ void TParseVersions::int8ScalarVectorCheck(const TSourceLoc& loc, const char* op void TParseVersions::explicitInt32Check(const TSourceLoc& loc, const char* op, bool builtIn) { if (! builtIn) { - const char* const extensions[2] = {E_GL_EXT_shader_explicit_arithmetic_types, + const char* const extensions[] = {E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int32}; - requireExtensions(loc, 2, extensions, op); + requireExtensions(loc, sizeof(extensions) / sizeof(extensions[0]), extensions, op); } } @@ -1379,11 +1402,15 @@ void TParseVersions::explicitInt32Check(const TSourceLoc& loc, const char* op, b void TParseVersions::int64Check(const TSourceLoc& loc, const char* op, bool builtIn) { if (! builtIn) { - const char* const extensions[3] = {E_GL_ARB_gpu_shader_int64, + const char* const extensions[] = {E_GL_ARB_gpu_shader_int64, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int64}; - requireExtensions(loc, 3, extensions, op); + requireExtensions(loc, sizeof(extensions) / sizeof(extensions[0]), extensions, op); requireProfile(loc, ECoreProfile | ECompatibilityProfile, op); + if (extensionTurnedOn(E_GL_NV_gpu_shader5)) + profileRequires(loc, ECoreProfile | ECompatibilityProfile, 150, nullptr, op); + else profileRequires(loc, ECoreProfile | ECompatibilityProfile, 400, nullptr, op); } } @@ -1428,6 +1455,14 @@ void TParseVersions::coopvecCheck(const TSourceLoc& loc, const char* op, bool bu } } +void TParseVersions::intattachmentCheck(const TSourceLoc& loc, const char* op, bool builtIn) +{ + if (!builtIn) { + const char* const extensions[] = {E_GL_QCOM_tile_shading}; + requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op); + } +} + // Call for any operation removed because SPIR-V is in use. void TParseVersions::spvRemoved(const TSourceLoc& loc, const char* op) { diff --git a/3rdparty/glslang/glslang/MachineIndependent/Versions.h b/3rdparty/glslang/glslang/MachineIndependent/Versions.h index bd74a0f9c..bf7bfc0e0 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/Versions.h +++ b/3rdparty/glslang/glslang/MachineIndependent/Versions.h @@ -289,6 +289,7 @@ const char* const E_GL_NV_cooperative_matrix2 = "GL_NV_coopera const char* const E_GL_NV_cooperative_vector = "GL_NV_cooperative_vector"; const char* const E_GL_NV_cluster_acceleration_structure = "GL_NV_cluster_acceleration_structure"; const char* const E_GL_NV_linear_swept_spheres = "GL_NV_linear_swept_spheres"; +const char* const E_GL_NV_gpu_shader5 = "GL_NV_gpu_shader5"; // ARM const char* const E_GL_ARM_shader_core_builtins = "GL_ARM_shader_core_builtins"; @@ -301,6 +302,7 @@ const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]); const char* const E_GL_QCOM_image_processing = "GL_QCOM_image_processing"; const char* const E_GL_QCOM_image_processing2 = "GL_QCOM_image_processing2"; +const char* const E_GL_QCOM_tile_shading = "GL_QCOM_tile_shading"; // AEP const char* const E_GL_ANDROID_extension_pack_es31a = "GL_ANDROID_extension_pack_es31a"; @@ -369,6 +371,9 @@ const int Num_AEP_geometry_point_size = sizeof(AEP_geometry_point_size)/sizeof(A const char* const AEP_gpu_shader5[] = { E_GL_EXT_gpu_shader5, E_GL_OES_gpu_shader5 }; const int Num_AEP_gpu_shader5 = sizeof(AEP_gpu_shader5)/sizeof(AEP_gpu_shader5[0]); +const char* const AEP_core_gpu_shader5[] = { E_GL_ARB_gpu_shader5, E_GL_NV_gpu_shader5}; +const int Num_AEP_core_gpu_shader5 = sizeof(AEP_core_gpu_shader5)/sizeof(AEP_core_gpu_shader5[0]); + const char* const AEP_primitive_bounding_box[] = { E_GL_EXT_primitive_bounding_box, E_GL_OES_primitive_bounding_box }; const int Num_AEP_primitive_bounding_box = sizeof(AEP_primitive_bounding_box)/sizeof(AEP_primitive_bounding_box[0]); diff --git a/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp b/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp index 866503a79..7ec8ed4a7 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp @@ -1447,6 +1447,8 @@ void TIntermediate::output(TInfoSink& infoSink, bool tree) infoSink.debug << "using non_coherent_depth_attachment_readEXT\n"; if (nonCoherentStencilAttachmentReadEXT) infoSink.debug << "using non_coherent_stencil_attachment_readEXT\n"; + if (nonCoherentTileAttachmentReadQCOM) + infoSink.debug << "using non_coherent_attachment_readQCOM\n"; if (depthLayout != EldNone) infoSink.debug << "using " << TQualifier::getLayoutDepthString(depthLayout) << "\n"; if (blendEquations != 0) { @@ -1481,6 +1483,13 @@ void TIntermediate::output(TInfoSink& infoSink, bool tree) localSizeSpecId[2] << ")\n"; } } + if (nonCoherentTileAttachmentReadQCOM) + infoSink.debug << "using non_coherent_attachment_readQCOM\n"; + if (isTileShadingRateQCOMSet()) { + infoSink.debug << "shading_rateQCOM = (" << tileShadingRateQCOM[0] << ", " + << tileShadingRateQCOM[1] << ", " + << tileShadingRateQCOM[2] << ")\n"; + } break; default: diff --git a/3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp b/3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp index f53479e01..fa0e66e7c 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp @@ -600,6 +600,7 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit) MERGE_TRUE(nonCoherentColorAttachmentReadEXT); MERGE_TRUE(nonCoherentDepthAttachmentReadEXT); MERGE_TRUE(nonCoherentStencilAttachmentReadEXT); + MERGE_TRUE(nonCoherentTileAttachmentReadQCOM); if (depthLayout == EldNone) depthLayout = unit.depthLayout; diff --git a/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h b/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h index 1f4b7681a..0d299bda2 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h +++ b/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h @@ -268,6 +268,7 @@ public: gpu_shader_fp64 = 1 << 9, gpu_shader_int16 = 1 << 10, gpu_shader_half_float = 1 << 11, + nv_gpu_shader5_types = 1 << 12, } feature; void insert(feature f) { features |= f; } void erase(feature f) { features &= ~f; } @@ -342,6 +343,7 @@ public: numTaskNVBlocks(0), layoutPrimitiveCulling(false), numTaskEXTPayloads(0), + nonCoherentTileAttachmentReadQCOM(false), autoMapBindings(false), autoMapLocations(false), flattenUniformArrays(false), @@ -370,6 +372,12 @@ public: localSizeSpecId[1] = TQualifier::layoutNotSet; localSizeSpecId[2] = TQualifier::layoutNotSet; xfbBuffers.resize(TQualifier::layoutXfbBufferEnd); + tileShadingRateQCOM[0] = 0; + tileShadingRateQCOM[1] = 0; + tileShadingRateQCOM[2] = 0; + tileShadingRateQCOMNotDefault[0] = false; + tileShadingRateQCOMNotDefault[1] = false; + tileShadingRateQCOMNotDefault[2] = false; shiftBinding.fill(0); } @@ -650,6 +658,21 @@ public: bool isEsProfile() const { return profile == EEsProfile; } + bool setTileShadingRateQCOM(int dim, int size) + { + if (tileShadingRateQCOMNotDefault[dim]) + return size == tileShadingRateQCOM[dim]; + tileShadingRateQCOMNotDefault[dim] = true; + tileShadingRateQCOM[dim] = size; + return true; + } + unsigned int getTileShadingRateQCOM(int dim) const { return tileShadingRateQCOM[dim]; } + bool isTileShadingRateQCOMSet() const + { + // Return true if any component has been set (i.e. any component is not default). + return tileShadingRateQCOMNotDefault[0] || tileShadingRateQCOMNotDefault[1] || tileShadingRateQCOMNotDefault[2]; + } + void setShiftBinding(TResourceType res, unsigned int shift) { shiftBinding[res] = shift; @@ -895,6 +918,8 @@ public: bool getNonCoherentDepthAttachmentReadEXT() const { return nonCoherentDepthAttachmentReadEXT; } void setNonCoherentStencilAttachmentReadEXT() { nonCoherentStencilAttachmentReadEXT = true; } bool getNonCoherentStencilAttachmentReadEXT() const { return nonCoherentStencilAttachmentReadEXT; } + void setNonCoherentTileAttachmentReadQCOM() { nonCoherentTileAttachmentReadQCOM = true; } + bool getNonCoherentTileAttachmentReadQCOM() const { return nonCoherentTileAttachmentReadQCOM; } void setPostDepthCoverage() { postDepthCoverage = true; } bool getPostDepthCoverage() const { return postDepthCoverage; } void setEarlyFragmentTests() { earlyFragmentTests = true; } @@ -1107,17 +1132,20 @@ public: // Certain explicit conversions are allowed conditionally bool getArithemeticInt8Enabled() const { return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int8); } bool getArithemeticInt16Enabled() const { return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int16); } bool getArithemeticFloat16Enabled() const { return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || numericFeatures.contains(TNumericFeatures::gpu_shader_half_float) || + numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_float16); } void updateNumericFeature(TNumericFeatures::feature f, bool on) @@ -1238,6 +1266,10 @@ protected: bool layoutPrimitiveCulling; int numTaskEXTPayloads; + bool nonCoherentTileAttachmentReadQCOM; + int tileShadingRateQCOM[3]; + bool tileShadingRateQCOMNotDefault[3]; + // Base shift values std::array shiftBinding; diff --git a/3rdparty/glslang/glslang/MachineIndependent/parseVersions.h b/3rdparty/glslang/glslang/MachineIndependent/parseVersions.h index 1e06c23a8..1deb2db1b 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/parseVersions.h +++ b/3rdparty/glslang/glslang/MachineIndependent/parseVersions.h @@ -124,6 +124,7 @@ public: virtual void coopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false); virtual void tensorLayoutViewCheck(const TSourceLoc&, const char* op, bool builtIn = false); virtual void coopvecCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void intattachmentCheck(const TSourceLoc&, const char *op, bool builtIn = false); bool relaxedErrors() const { return (messages & EShMsgRelaxedErrors) != 0; } bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; } bool isForwardCompatible() const { return forwardCompatible; } diff --git a/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp b/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp index 49dafa59a..df64b7bd5 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp @@ -470,6 +470,7 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken) static const char* const Int64_Extensions[] = { E_GL_ARB_gpu_shader_int64, E_GL_EXT_shader_explicit_arithmetic_types, + E_GL_NV_gpu_shader5, E_GL_EXT_shader_explicit_arithmetic_types_int64 }; static const int Num_Int64_Extensions = sizeof(Int64_Extensions) / sizeof(Int64_Extensions[0]); diff --git a/3rdparty/glslang/glslang/MachineIndependent/reflection.cpp b/3rdparty/glslang/glslang/MachineIndependent/reflection.cpp index 7e3160929..3afbe3d8c 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/reflection.cpp +++ b/3rdparty/glslang/glslang/MachineIndependent/reflection.cpp @@ -1138,8 +1138,10 @@ void TReflection::buildAttributeReflection(EShLanguage stage, const TIntermediat { if (stage == EShLangCompute) { // Remember thread dimensions - for (int dim=0; dim<3; ++dim) + for (int dim=0; dim<3; ++dim) { localSize[dim] = intermediate.getLocalSize(dim); + tileShadingRateQCOM[dim] = intermediate.getTileShadingRateQCOM(dim); + } } } @@ -1270,9 +1272,8 @@ void TReflection::dump() indexToPipeOutput[i].dump(); printf("\n"); + static const char* axis[] = { "X", "Y", "Z" }; if (getLocalSize(0) > 1) { - static const char* axis[] = { "X", "Y", "Z" }; - for (int dim=0; dim<3; ++dim) if (getLocalSize(dim) > 1) printf("Local size %s: %u\n", axis[dim], getLocalSize(dim)); @@ -1280,6 +1281,12 @@ void TReflection::dump() printf("\n"); } + if (getTileShadingRateQCOM(0) > 1 || getTileShadingRateQCOM(1) > 1) { + for (int dim=0; dim<3; ++dim) + printf("Tile shading rate QCOM %s: %u\n", axis[dim], getTileShadingRateQCOM(dim)); + printf("\n"); + } + // printf("Live names\n"); // for (TNameToIndex::const_iterator it = nameToIndex.begin(); it != nameToIndex.end(); ++it) // printf("%s: %d\n", it->first.c_str(), it->second); diff --git a/3rdparty/glslang/glslang/MachineIndependent/reflection.h b/3rdparty/glslang/glslang/MachineIndependent/reflection.h index 8315b1128..dccb99175 100644 --- a/3rdparty/glslang/glslang/MachineIndependent/reflection.h +++ b/3rdparty/glslang/glslang/MachineIndependent/reflection.h @@ -58,8 +58,10 @@ public: TReflection(EShReflectionOptions opts, EShLanguage first, EShLanguage last) : options(opts), firstStage(first), lastStage(last), badReflection(TObjectReflection::badReflection()) { - for (int dim=0; dim<3; ++dim) + for (int dim=0; dim<3; ++dim) { localSize[dim] = 0; + tileShadingRateQCOM[dim] = 0; + } } virtual ~TReflection() {} @@ -168,6 +170,9 @@ public: // Thread local size unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; } + // Tile shading rate QCOM + unsigned getTileShadingRateQCOM(int dim) const { return dim <= 2 ? tileShadingRateQCOM[dim] : 0; } + void dump(); protected: @@ -213,6 +218,7 @@ protected: TIndices atomicCounterUniformIndices; unsigned int localSize[3]; + unsigned int tileShadingRateQCOM[3]; }; } // end namespace glslang diff --git a/3rdparty/glslang/glslang/Public/ShaderLang.h b/3rdparty/glslang/glslang/Public/ShaderLang.h index e681a55a4..0536cc58f 100644 --- a/3rdparty/glslang/glslang/Public/ShaderLang.h +++ b/3rdparty/glslang/glslang/Public/ShaderLang.h @@ -895,6 +895,7 @@ public: // call first, to do liveness analysis, index mapping, etc.; returns false on failure GLSLANG_EXPORT bool buildReflection(int opts = EShReflectionDefault); GLSLANG_EXPORT unsigned getLocalSize(int dim) const; // return dim'th local size + GLSLANG_EXPORT unsigned getTileShadingRateQCOM(int dim) const; // return dim'th tile shading rate QCOM GLSLANG_EXPORT int getReflectionIndex(const char *name) const; GLSLANG_EXPORT int getReflectionPipeIOIndex(const char* name, const bool inOrOut) const; GLSLANG_EXPORT int getNumUniformVariables() const;