Updated glslang.

2026-02-17 20:52:36 +01:00 · 2025-09-14 09:07:36 -07:00
parent 686190d7ff
commit f1e0ba4b0b
34 changed files with 8287 additions and 6306 deletions
--- a/3rdparty/glslang/SPIRV/GLSL.ext.ARM.h
+++ b/3rdparty/glslang/SPIRV/GLSL.ext.ARM.h
@@ -1,5 +1,5 @@
 /*
-** Copyright (c) 2022 ARM Limited
+** Copyright (c) 2022, 2025 ARM Limited
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a copy
 ** of this software and/or associated documentation files (the "Materials"),
@@ -28,8 +28,10 @@
 #define GLSLextARM_H

 static const int GLSLextARMVersion = 100;
-static const int GLSLextARMRevision = 1;
+static const int GLSLextARMRevision = 2;

-static const char * const E_SPV_ARM_core_builtins = "SPV_ARM_core_builtins";
+static const char* const E_SPV_ARM_core_builtins = "SPV_ARM_core_builtins";
+static const char* const E_SPV_ARM_cooperative_matrix_layouts = "SPV_ARM_cooperative_matrix_layouts";
+static const char* const E_SPV_ARM_tensors = "SPV_ARM_tensors";

 #endif  // #ifndef GLSLextARM_H
--- a/3rdparty/glslang/SPIRV/GLSL.ext.EXT.h
+++ b/3rdparty/glslang/SPIRV/GLSL.ext.EXT.h
@@ -41,6 +41,6 @@ static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader
 static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64";
 static const char* const E_SPV_EXT_shader_tile_image = "SPV_EXT_shader_tile_image";
 static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader";
-static const char* const E_SPV_ARM_cooperative_matrix_layouts = "SPV_ARM_cooperative_matrix_layouts";
+static const char* const E_SPV_EXT_float8 = "SPV_EXT_float8";

 #endif  // #ifndef GLSLextEXT_H
--- a/3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h
+++ b/3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h
@@ -39,6 +39,8 @@ static const int GLSLextQCOMRevision = 1;
 const char* const E_SPV_QCOM_image_processing = "SPV_QCOM_image_processing";
 //SPV_QCOM_image_processing2
 const char* const E_SPV_QCOM_image_processing2 = "SPV_QCOM_image_processing2";
+//SPV_QCOM_cooperative_matrix_conversion
+const char* const E_SPV_QCOM_cooperative_matrix_conversion = "SPV_QCOM_cooperative_matrix_conversion";

 //SPV_QCOM_tile_shading
 const char* const E_SPV_QCOM_tile_shading = "SPV_QCOM_tile_shading";
--- a/3rdparty/glslang/SPIRV/GlslangToSpv.cpp
+++ b/3rdparty/glslang/SPIRV/GlslangToSpv.cpp
@@ -1505,6 +1505,10 @@ bool IsDescriptorResource(const glslang::TType& type)
        type.getBasicType() == glslang::EbtAccStruct)
        return type.getQualifier().isUniformOrBuffer();

+    // Tensors are tied to a descriptor.
+    if (type.isTensorARM())
+        return true;
+
    // None of the above.
    return false;
 }
@@ -2399,6 +2403,16 @@ bool TGlslangToSpvTraverser::visitBinary(glslang::TVisit /* visit */, glslang::T
            node->getRight()->traverse(this);
            spv::Id index = accessChainLoad(node->getRight()->getType());

+            // Zero-extend smaller unsigned integer types for array indexing.
+            // SPIR-V OpAccessChain treats indices as signed, so we need to zero-extend
+            // unsigned types to preserve their values (signed types are fine as-is).
+            spv::Id indexType = builder.getTypeId(index);
+            if (builder.isUintType(indexType) && builder.getScalarTypeWidth(indexType) < 32) {
+                // Zero-extend unsigned types to preserve their values
+                spv::Id uintType = builder.makeUintType(32);
+                index = builder.createUnaryOp(spv::Op::OpUConvert, uintType, index);
+            }
+
            addIndirectionIndexCapabilities(node->getLeft()->getType(), node->getRight()->getType());

            // restore the saved access chain
@@ -2855,6 +2869,10 @@ bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TI
                one = builder.makeFloat16Constant(1.0F);
            else if (node->getBasicType() == glslang::EbtBFloat16)
                one = builder.makeBFloat16Constant(1.0F);
+            else if (node->getBasicType() == glslang::EbtFloatE5M2)
+                one = builder.makeFloatE5M2Constant(1.0F);
+            else if (node->getBasicType() == glslang::EbtFloatE4M3)
+                one = builder.makeFloatE4M3Constant(1.0F);
            else if (node->getBasicType() == glslang::EbtInt8  || node->getBasicType() == glslang::EbtUint8)
                one = builder.makeInt8Constant(1);
            else if (node->getBasicType() == glslang::EbtInt16 || node->getBasicType() == glslang::EbtUint16)
@@ -3198,6 +3216,14 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
    case glslang::EOpConstructBF16Vec2:
    case glslang::EOpConstructBF16Vec3:
    case glslang::EOpConstructBF16Vec4:
+    case glslang::EOpConstructFloatE5M2:
+    case glslang::EOpConstructFloatE5M2Vec2:
+    case glslang::EOpConstructFloatE5M2Vec3:
+    case glslang::EOpConstructFloatE5M2Vec4:
+    case glslang::EOpConstructFloatE4M3:
+    case glslang::EOpConstructFloatE4M3Vec2:
+    case glslang::EOpConstructFloatE4M3Vec3:
+    case glslang::EOpConstructFloatE4M3Vec4:
    case glslang::EOpConstructBool:
    case glslang::EOpConstructBVec2:
    case glslang::EOpConstructBVec3:
@@ -3240,6 +3266,7 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
    case glslang::EOpConstructCooperativeMatrixNV:
    case glslang::EOpConstructCooperativeMatrixKHR:
    case glslang::EOpConstructCooperativeVectorNV:
+    case glslang::EOpConstructSaturated:
    {
        builder.setDebugSourceLocation(node->getLoc().line, node->getLoc().getFilename());
        std::vector<spv::Id> arguments;
@@ -3277,6 +3304,16 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
            constructed = createCompositeConstruct(resultType(), constituents);
        } else if (isMatrix)
            constructed = builder.createMatrixConstructor(precision, arguments, resultType());
+        else if (node->getOp() == glslang::EOpConstructSaturated) {
+            OpDecorations decorations = { TranslatePrecisionDecoration(node->getOperationPrecision()),
+                                          TranslateNoContractionDecoration(node->getType().getQualifier()),
+                                          TranslateNonUniformDecoration(lvalueCoherentFlags) };
+
+            constructed = createConversion(node->getOp(), decorations, resultType(), arguments[1],
+                                           node->getType().getBasicType(), node->getSequence()[1]->getAsTyped()->getBasicType());
+            builder.addDecoration(constructed, spv::Decoration::SaturatedToLargestFloat8NormalConversionEXT);
+            builder.createStore(constructed, arguments[0]);
+        }
        else
            constructed = builder.createConstructor(precision, arguments, resultType());

@@ -3557,6 +3594,9 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
        builder.addCapability(spv::Capability::TextureBlockMatchQCOM);
        builder.addExtension(spv::E_SPV_QCOM_image_processing);
        break;
+    case glslang::EOpTensorWriteARM:
+        noReturnValue = true;
+        break;

    case glslang::EOpImageBlockMatchWindowSSDQCOM:
    case glslang::EOpImageBlockMatchWindowSADQCOM:
@@ -3807,6 +3847,10 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
            if (arg == 0 || arg == 2)
                lvalue = true;
            break;
+        case glslang::EOpTensorReadARM:
+            if (arg == 2)
+                lvalue = true;
+            break;
        default:
            break;
        }
@@ -4218,6 +4262,24 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
        // store the result to the pointer
        builder.createStore(result, operands[0]);
        result = 0;
+    } else if (node->getOp() == glslang::EOpBitCastArrayQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+        result = builder.createUnaryOp(spv::Op::OpBitCastArrayQCOM, resultType(), operands[0]);
+    } else if (node->getOp() == glslang::EOpCompositeConstructCoopMatQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+        result = builder.createUnaryOp(spv::Op::OpCompositeConstructCoopMatQCOM, resultType(), operands[0]);
+    } else if (node->getOp() == glslang::EOpCompositeExtractCoopMatQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+        result = builder.createUnaryOp(spv::Op::OpCompositeExtractCoopMatQCOM, resultType(), operands[0]);
+    } else if (node->getOp() == glslang::EOpExtractSubArrayQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+
+        std::vector<spv::Id> arguments { operands[0], operands[1] };;
+        result = builder.createOp(spv::Op::OpExtractSubArrayQCOM, resultType(), arguments);
    } else if (node->getOp() == glslang::EOpCooperativeVectorMatMulNV ||
               node->getOp() == glslang::EOpCooperativeVectorMatMulAddNV) {
        auto matrixOperands = spv::CooperativeMatrixOperandsMask::MaskNone;
@@ -4308,6 +4370,66 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
        idImmOps.push_back(spv::IdImmediate(true, operands[0])); // A
        builder.createNoResultOp(spv::Op::OpCooperativeVectorReduceSumAccumulateNV, idImmOps);
        result = 0;
+    } else if (node->getOp() == glslang::EOpTensorReadARM ||
+               node->getOp() == glslang::EOpTensorWriteARM) {
+        const bool isWrite = node->getOp() == glslang::EOpTensorWriteARM;
+        const unsigned int tensorMinOperandCount = 3;
+        assert(operands.size() >= tensorMinOperandCount);
+        std::vector<spv::IdImmediate> idImmOps;
+
+        idImmOps.push_back(spv::IdImmediate(true, operands[0])); // tensor
+        idImmOps.push_back(spv::IdImmediate(true, operands[1])); // coords
+        if (isWrite) {
+            idImmOps.push_back(spv::IdImmediate(true, operands[2])); // value
+        }
+
+        // Analyze the tensor operands
+        spv::IdImmediate tensorOperands = { false, uint32_t(spv::TensorOperandsMask::MaskNone) };
+        bool pushExtraArg = false;
+        if (operands.size() > tensorMinOperandCount) {
+            auto enumVal = builder.getConstantScalar(operands[tensorMinOperandCount]);
+
+            if (enumVal & uint32_t(spv::TensorOperandsMask::NontemporalARM)) {
+                tensorOperands.word |= uint32_t(spv::TensorOperandsMask::NontemporalARM);
+            }
+            if (enumVal & uint32_t(spv::TensorOperandsMask::OutOfBoundsValueARM)) {
+                tensorOperands.word |= uint32_t(spv::TensorOperandsMask::OutOfBoundsValueARM);
+                assert(operands.size() >= tensorMinOperandCount + 2 &&
+                    "TensorOperandsOutOfBoundsValueMask requires an additional value");
+                pushExtraArg = true;
+            }
+        }
+
+        // Append optional tensor operands if the mask was non-zero.
+        if (tensorOperands.word) {
+            idImmOps.push_back(tensorOperands);
+            if (pushExtraArg)
+                idImmOps.push_back(spv::IdImmediate(true, operands[tensorMinOperandCount + 1]));
+        }
+
+        if (isWrite) {
+            builder.createNoResultOp(spv::Op::OpTensorWriteARM, idImmOps);
+            result = 0;
+        } else {
+            // Use the result argument type as the OpTensorReadARM result type.
+            const glslang::TType &resArgType = glslangOperands[2]->getAsTyped()->getType();
+            spv::Id retType = convertGlslangToSpvType(resArgType);
+            result = builder.createOp(spv::Op::OpTensorReadARM, retType, idImmOps);
+            // Store the result to the result argument.
+            builder.createStore(result, operands[2]);
+        }
+    } else if (node->getOp() == glslang::EOpTensorSizeARM) {
+        // Expected operands are (tensor, dimension)
+        assert(operands.size() == 2);
+
+        spv::Id tensorOp = operands[0];
+        spv::Id dimOp = operands[1];
+        assert(builder.isTensorTypeARM(builder.getTypeId(tensorOp)) && "operand #0 must be a tensor");
+
+        std::vector<spv::IdImmediate> idImmOps;
+        idImmOps.push_back(spv::IdImmediate(true, tensorOp));
+        idImmOps.push_back(spv::IdImmediate(true, dimOp));
+        result = builder.createOp(spv::Op::OpTensorQuerySizeARM, resultType(), idImmOps);
    } else if (atomic) {
        // Handle all atomics
        glslang::TBasicType typeProxy = (node->getOp() == glslang::EOpAtomicStore)
@@ -5007,6 +5129,12 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
    case glslang::EbtBFloat16:
        spvType = builder.makeBFloat16Type();
        break;
+    case glslang::EbtFloatE5M2:
+        spvType = builder.makeFloatE5M2Type();
+        break;
+    case glslang::EbtFloatE4M3:
+        spvType = builder.makeFloatE4M3Type();
+        break;
    case glslang::EbtInt8:
        spvType = builder.makeIntType(8);
        break;
@@ -5257,6 +5385,11 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
            builder.addCapability(spv::Capability::BFloat16CooperativeMatrixKHR);
        }

+        if (type.getBasicType() == glslang::EbtFloatE5M2 || type.getBasicType() == glslang::EbtFloatE4M3) {
+            builder.addExtension(spv::E_SPV_EXT_float8);
+            builder.addCapability(spv::Capability::Float8CooperativeMatrixEXT);
+        }
+
        if (type.getBasicType() == glslang::EbtFloat16)
            builder.addCapability(spv::Capability::Float16);
        if (type.getBasicType() == glslang::EbtUint8 || type.getBasicType() == glslang::EbtInt8) {
@@ -5266,10 +5399,29 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
        spv::Id scope = makeArraySizeId(*type.getTypeParameters()->arraySizes, 0);
        spv::Id rows = makeArraySizeId(*type.getTypeParameters()->arraySizes, 1);
        spv::Id cols = makeArraySizeId(*type.getTypeParameters()->arraySizes, 2);
-        spv::Id use = builder.makeUintConstant(type.getCoopMatKHRuse());
+        spv::Id use = makeArraySizeId(*type.getTypeParameters()->arraySizes, 3, true);

        spvType = builder.makeCooperativeMatrixTypeKHR(spvType, scope, rows, cols, use);
    }
+    else if (type.isTensorARM()) {
+        builder.addCapability(spv::Capability::TensorsARM);
+        builder.addExtension(spv::E_SPV_ARM_tensors);
+        if (type.getBasicType() == glslang::EbtInt8 || type.getBasicType() == glslang::EbtUint8) {
+            builder.addCapability(spv::Capability::Int8);
+        } else if (type.getBasicType() == glslang::EbtInt16 ||
+                   type.getBasicType() == glslang::EbtUint16) {
+            builder.addCapability(spv::Capability::Int16);
+        } else if (type.getBasicType() == glslang::EbtInt64 ||
+                   type.getBasicType() == glslang::EbtUint64) {
+            builder.addCapability(spv::Capability::Int64);
+        } else if (type.getBasicType() == glslang::EbtFloat16) {
+            builder.addCapability(spv::Capability::Float16);
+        }
+
+        spv::Id rank = makeArraySizeId(*type.getTypeParameters()->arraySizes, 0);
+
+        spvType = builder.makeTensorTypeARM(spvType, rank);
+    }

    if (type.isCoopVecNV()) {
        builder.addCapability(spv::Capability::CooperativeVectorNV);
@@ -6371,6 +6523,10 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate&
            if (i == 2)
                lvalue = true;
            break;
+        case glslang::EOpConstructSaturated:
+            if (i == 0)
+                lvalue = true;
+            break;
        default:
            break;
        }
@@ -8124,7 +8280,7 @@ spv::Id TGlslangToSpvTraverser::createConversion(glslang::TOperator op, OpDecora

    int vectorSize = builder.isVectorType(destType) ? builder.getNumTypeComponents(destType) : 0;

-    if (IsOpNumericConv(op)) {
+    if (IsOpNumericConv(op) || op == glslang::EOpConstructSaturated) {
        if (isTypeSignedInt(operandBasicType) && isTypeFloat(resultBasicType)) {
            convOp = spv::Op::OpConvertSToF;
        }
@@ -10583,6 +10739,12 @@ spv::Id TGlslangToSpvTraverser::createSpvConstantFromConstUnionArray(const glsla
            case glslang::EbtBFloat16:
                spvConsts.push_back(builder.makeBFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst()));
                break;
+            case glslang::EbtFloatE5M2:
+                spvConsts.push_back(builder.makeFloatE5M2Constant(zero ? 0.0F : (float)consts[nextConst].getDConst()));
+                break;
+            case glslang::EbtFloatE4M3:
+                spvConsts.push_back(builder.makeFloatE4M3Constant(zero ? 0.0F : (float)consts[nextConst].getDConst()));
+                break;
            default:
                assert(0);
                break;
@@ -10638,6 +10800,12 @@ spv::Id TGlslangToSpvTraverser::createSpvConstantFromConstUnionArray(const glsla
        case glslang::EbtBFloat16:
            scalar = builder.makeBFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant);
            break;
+        case glslang::EbtFloatE5M2:
+            scalar = builder.makeFloatE5M2Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant);
+            break;
+        case glslang::EbtFloatE4M3:
+            scalar = builder.makeFloatE4M3Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant);
+            break;
        case glslang::EbtReference:
            scalar = builder.makeUint64Constant(zero ? 0 : consts[nextConst].getU64Const(), specConstant);
            scalar = builder.createUnaryOp(spv::Op::OpBitcast, typeId, scalar);
--- a/3rdparty/glslang/SPIRV/SpvBuilder.cpp
+++ b/3rdparty/glslang/SPIRV/SpvBuilder.cpp
@@ -341,6 +341,80 @@ Id Builder::makeBFloat16Type()
    return type->getResultId();
 }

+Id Builder::makeFloatE5M2Type()
+{
+    // try to find it
+    Instruction* type;
+    for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypeFloat)].size(); ++t) {
+        type = groupedTypes[enumCast(Op::OpTypeFloat)][t];
+        if (type->getNumOperands() != 2) {
+            continue;
+        }
+        if (type->getImmediateOperand(0) == (unsigned)8 &&
+            type->getImmediateOperand(1) == FPEncoding::Float8E5M2EXT)
+            return type->getResultId();
+    }
+
+    // not found, make it
+    type = new Instruction(getUniqueId(), NoType, Op::OpTypeFloat);
+    type->addImmediateOperand(8);
+    type->addImmediateOperand(FPEncoding::Float8E5M2EXT);
+    groupedTypes[enumCast(Op::OpTypeFloat)].push_back(type);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
+    module.mapInstruction(type);
+
+    addExtension(spv::E_SPV_EXT_float8);
+    addCapability(Capability::Float8EXT);
+
+#if 0
+    // XXX not supported
+    if (emitNonSemanticShaderDebugInfo)
+    {
+        auto const debugResultId = makeFloatDebugType(width);
+        debugId[type->getResultId()] = debugResultId;
+    }
+#endif
+
+    return type->getResultId();
+}
+
+Id Builder::makeFloatE4M3Type()
+{
+    // try to find it
+    Instruction* type;
+    for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypeFloat)].size(); ++t) {
+        type = groupedTypes[enumCast(Op::OpTypeFloat)][t];
+        if (type->getNumOperands() != 2) {
+            continue;
+        }
+        if (type->getImmediateOperand(0) == (unsigned)8 &&
+            type->getImmediateOperand(1) == FPEncoding::Float8E4M3EXT)
+            return type->getResultId();
+    }
+
+    // not found, make it
+    type = new Instruction(getUniqueId(), NoType, Op::OpTypeFloat);
+    type->addImmediateOperand(8);
+    type->addImmediateOperand(FPEncoding::Float8E4M3EXT);
+    groupedTypes[enumCast(Op::OpTypeFloat)].push_back(type);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
+    module.mapInstruction(type);
+
+    addExtension(spv::E_SPV_EXT_float8);
+    addCapability(Capability::Float8EXT);
+
+#if 0
+    // XXX not supported
+    if (emitNonSemanticShaderDebugInfo)
+    {
+        auto const debugResultId = makeFloatDebugType(width);
+        debugId[type->getResultId()] = debugResultId;
+    }
+#endif
+
+    return type->getResultId();
+}
+
 // Make a struct without checking for duplication.
 // See makeStructResultType() for non-decorated structs
 // needed as the result of some instructions, which does
@@ -578,6 +652,26 @@ Id Builder::makeCooperativeVectorTypeNV(Id componentType, Id components)
    return type->getResultId();
 }

+Id Builder::makeTensorTypeARM(Id elementType, Id rank)
+{
+    // See if an OpTypeTensorARM with same element type and rank already exists.
+    for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypeTensorARM)].size(); ++t) {
+        const Instruction *type = groupedTypes[enumCast(Op::OpTypeTensorARM)][t];
+        if (type->getIdOperand(0) == elementType && type->getIdOperand(1) == rank)
+            return type->getResultId();
+    }
+
+    // Not found, make it.
+    std::unique_ptr<Instruction> type(new Instruction(getUniqueId(), NoType, Op::OpTypeTensorARM));
+    type->addIdOperand(elementType);
+    type->addIdOperand(rank);
+    groupedTypes[enumCast(Op::OpTypeTensorARM)].push_back(type.get());
+    module.mapInstruction(type.get());
+    Id resultID = type->getResultId();
+    constantsTypesGlobals.push_back(std::move(type));
+    return resultID;
+}
+
 Id Builder::makeGenericType(spv::Op opcode, std::vector<spv::IdImmediate>& operands)
 {
    // try to find it
@@ -1897,6 +1991,62 @@ Id Builder::makeBFloat16Constant(float bf16, bool specConstant)
    return c->getResultId();
 }

+Id Builder::makeFloatE5M2Constant(float fe5m2, bool specConstant)
+{
+    Op opcode = specConstant ? Op::OpSpecConstant : Op::OpConstant;
+    Id typeId = makeFloatE5M2Type();
+
+    spvutils::HexFloat<spvutils::FloatProxy<float>> fVal(fe5m2);
+    spvutils::HexFloat<spvutils::FloatProxy<spvutils::FloatE5M2>> fe5m2Val(0);
+    fVal.castTo(fe5m2Val, spvutils::kRoundToZero);
+
+    unsigned value = fe5m2Val.value().getAsFloat().get_value();
+
+    // See if we already made it. Applies only to regular constants, because specialization constants
+    // must remain distinct for the purpose of applying a SpecId decoration.
+    if (!specConstant) {
+        Id existing = findScalarConstant(Op::OpTypeFloat, opcode, typeId, value);
+        if (existing)
+            return existing;
+    }
+
+    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
+    c->addImmediateOperand(value);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
+    groupedConstants[enumCast(Op::OpTypeFloat)].push_back(c);
+    module.mapInstruction(c);
+
+    return c->getResultId();
+}
+
+Id Builder::makeFloatE4M3Constant(float fe4m3, bool specConstant)
+{
+    Op opcode = specConstant ? Op::OpSpecConstant : Op::OpConstant;
+    Id typeId = makeFloatE4M3Type();
+
+    spvutils::HexFloat<spvutils::FloatProxy<float>> fVal(fe4m3);
+    spvutils::HexFloat<spvutils::FloatProxy<spvutils::FloatE4M3>> fe4m3Val(0);
+    fVal.castTo(fe4m3Val, spvutils::kRoundToZero);
+
+    unsigned value = fe4m3Val.value().getAsFloat().get_value();
+
+    // See if we already made it. Applies only to regular constants, because specialization constants
+    // must remain distinct for the purpose of applying a SpecId decoration.
+    if (!specConstant) {
+        Id existing = findScalarConstant(Op::OpTypeFloat, opcode, typeId, value);
+        if (existing)
+            return existing;
+    }
+
+    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
+    c->addImmediateOperand(value);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
+    groupedConstants[enumCast(Op::OpTypeFloat)].push_back(c);
+    module.mapInstruction(c);
+
+    return c->getResultId();
+}
+
 Id Builder::makeFpConstant(Id type, double d, bool specConstant)
 {
    const int width = getScalarTypeWidth(type);
--- a/3rdparty/glslang/SPIRV/SpvBuilder.h
+++ b/3rdparty/glslang/SPIRV/SpvBuilder.h
@@ -209,6 +209,8 @@ public:
    Id makeUintType(int width) { return makeIntegerType(width, false); }
    Id makeFloatType(int width);
    Id makeBFloat16Type();
+    Id makeFloatE5M2Type();
+    Id makeFloatE4M3Type();
    Id makeStructType(const std::vector<Id>& members, const char* name, bool const compilerGenerated = true);
    Id makeStructResultType(Id type0, Id type1);
    Id makeVectorType(Id component, int size);
@@ -223,6 +225,7 @@ public:
    Id makeCooperativeMatrixTypeNV(Id component, Id scope, Id rows, Id cols);
    Id makeCooperativeMatrixTypeWithSameShape(Id component, Id otherType);
    Id makeCooperativeVectorTypeNV(Id componentType, Id components);
+    Id makeTensorTypeARM(Id elementType, Id rank);
    Id makeGenericType(spv::Op opcode, std::vector<spv::IdImmediate>& operands);

    // SPIR-V NonSemantic Shader DebugInfo Instructions
@@ -320,6 +323,7 @@ public:
    }
    bool isTensorViewType(Id typeId) const { return getTypeClass(typeId) == Op::OpTypeTensorViewNV; }
    bool isCooperativeVectorType(Id typeId) const { return getTypeClass(typeId) == Op::OpTypeCooperativeVectorNV; }
+    bool isTensorTypeARM(Id typeId)    const { return getTypeClass(typeId) == Op::OpTypeTensorARM; }
    bool isAggregateType(Id typeId)    const
        { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); }
    bool isImageType(Id typeId)        const { return getTypeClass(typeId) == Op::OpTypeImage; }
@@ -414,6 +418,8 @@ public:
    Id makeDoubleConstant(double d, bool specConstant = false);
    Id makeFloat16Constant(float f16, bool specConstant = false);
    Id makeBFloat16Constant(float bf16, bool specConstant = false);
+    Id makeFloatE5M2Constant(float fe5m2, bool specConstant = false);
+    Id makeFloatE4M3Constant(float fe4m3, bool specConstant = false);
    Id makeFpConstant(Id type, double d, bool specConstant = false);

    Id importNonSemanticShaderDebugInfoInstructions();
--- a/3rdparty/glslang/SPIRV/disassemble.cpp
+++ b/3rdparty/glslang/SPIRV/disassemble.cpp
@@ -61,7 +61,7 @@ namespace spv {
        #include "GLSL.ext.QCOM.h"
    }
 }
-const char* GlslStd450DebugNames[spv::GLSLstd450Count];
+static const char* GlslStd450DebugNames[spv::GLSLstd450Count];

 namespace spv {

@@ -382,9 +382,22 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode,
                break;
            case Op::OpTypeFloat:
                switch (stream[word]) {
+                case 8:
                case 16:
-                    if (numOperands > 1 && stream[word+1] == spv::FPEncoding::BFloat16KHR) {
+                    if (numOperands > 1) {
+                        switch (stream[word+1]) {
+                        default:
+                            assert(0); [[fallthrough]];
+                        case (int)spv::FPEncoding::BFloat16KHR:
                            idDescriptor[resultId] = "bfloat16_t";
+                            break;
+                        case (int)spv::FPEncoding::Float8E4M3EXT:
+                            idDescriptor[resultId] = "floate4m3_t";
+                            break;
+                        case (int)spv::FPEncoding::Float8E5M2EXT:
+                            idDescriptor[resultId] = "floate5m2_t";
+                            break;
+                        }
                    } else {
                        idDescriptor[resultId] = "float16_t";
                    }
--- a/3rdparty/glslang/SPIRV/doc.cpp
+++ b/3rdparty/glslang/SPIRV/doc.cpp
@@ -344,6 +344,8 @@ const char* DecorationString(int decoration)
    case (int)Decoration::AliasedPointerEXT:       return "DecorationAliasedPointerEXT";

    case (int)Decoration::HitObjectShaderRecordBufferNV:  return "DecorationHitObjectShaderRecordBufferNV";
+
+    case (int)Decoration::SaturatedToLargestFloat8NormalConversionEXT: return "DecorationSaturatedToLargestFloat8NormalConversionEXT";
    }
 }

@@ -975,7 +977,7 @@ const char* CapabilityString(int info)
    case (int)Capability::SubgroupBallotKHR: return "SubgroupBallotKHR";
    case (int)Capability::DrawParameters:    return "DrawParameters";
    case (int)Capability::SubgroupVoteKHR:   return "SubgroupVoteKHR";
-    case (int)Capability::GroupNonUniformRotateKHR: return "CapabilityGroupNonUniformRotateKHR";
+    case (int)Capability::GroupNonUniformRotateKHR: return "GroupNonUniformRotateKHR";

    case (int)Capability::StorageUniformBufferBlock16: return "StorageUniformBufferBlock16";
    case (int)Capability::StorageUniform16:            return "StorageUniform16";
@@ -1021,7 +1023,7 @@ const char* CapabilityString(int info)
    case (int)Capability::RayTracingPositionFetchKHR:      return "RayTracingPositionFetchKHR";
    case (int)Capability::DisplacementMicromapNV:           return "DisplacementMicromapNV";
    case (int)Capability::RayTracingOpacityMicromapEXT:    return "RayTracingOpacityMicromapEXT";
-    case (int)Capability::RayTracingDisplacementMicromapNV: return "CapabilityRayTracingDisplacementMicromapNV";
+    case (int)Capability::RayTracingDisplacementMicromapNV: return "RayTracingDisplacementMicromapNV";
    case (int)Capability::RayQueryPositionFetchKHR:        return "RayQueryPositionFetchKHR";
    case (int)Capability::ComputeDerivativeGroupQuadsNV:   return "ComputeDerivativeGroupQuadsNV";
    case (int)Capability::ComputeDerivativeGroupLinearNV:  return "ComputeDerivativeGroupLinearNV";
@@ -1069,15 +1071,16 @@ const char* CapabilityString(int info)
    case (int)Capability::CooperativeVectorNV:                     return "CooperativeVectorNV";
    case (int)Capability::CooperativeVectorTrainingNV:             return "CooperativeVectorTrainingNV";

-    case (int)Capability::FragmentShaderSampleInterlockEXT:        return "CapabilityFragmentShaderSampleInterlockEXT";
-    case (int)Capability::FragmentShaderPixelInterlockEXT:         return "CapabilityFragmentShaderPixelInterlockEXT";
-    case (int)Capability::FragmentShaderShadingRateInterlockEXT:   return "CapabilityFragmentShaderShadingRateInterlockEXT";
+    case (int)Capability::FragmentShaderSampleInterlockEXT:        return "FragmentShaderSampleInterlockEXT";
+    case (int)Capability::FragmentShaderPixelInterlockEXT:         return "FragmentShaderPixelInterlockEXT";
+    case (int)Capability::FragmentShaderShadingRateInterlockEXT:   return "FragmentShaderShadingRateInterlockEXT";

    case (int)Capability::TileImageColorReadAccessEXT:           return "TileImageColorReadAccessEXT";
    case (int)Capability::TileImageDepthReadAccessEXT:           return "TileImageDepthReadAccessEXT";
    case (int)Capability::TileImageStencilReadAccessEXT:         return "TileImageStencilReadAccessEXT";

    case (int)Capability::CooperativeMatrixLayoutsARM:             return "CooperativeMatrixLayoutsARM";
+    case (int)Capability::TensorsARM:                              return "TensorsARM";

    case (int)Capability::FragmentShadingRateKHR:                  return "FragmentShadingRateKHR";

@@ -1087,7 +1090,7 @@ const char* CapabilityString(int info)
    case (int)Capability::QuadControlKHR:                          return "QuadControlKHR";
    case (int)Capability::Int64ImageEXT:                           return "Int64ImageEXT";

-    case (int)Capability::IntegerFunctions2INTEL:              return "CapabilityIntegerFunctions2INTEL";
+    case (int)Capability::IntegerFunctions2INTEL:              return "IntegerFunctions2INTEL";

    case (int)Capability::ExpectAssumeKHR:                         return "ExpectAssumeKHR";

@@ -1098,9 +1101,9 @@ const char* CapabilityString(int info)
    case (int)Capability::AtomicFloat32MinMaxEXT:                  return "AtomicFloat32MinMaxEXT";
    case (int)Capability::AtomicFloat64MinMaxEXT:                  return "AtomicFloat64MinMaxEXT";

-    case (int)Capability::WorkgroupMemoryExplicitLayoutKHR:            return "CapabilityWorkgroupMemoryExplicitLayoutKHR";
-    case (int)Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR:  return "CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR";
-    case (int)Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR: return "CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR";
+    case (int)Capability::WorkgroupMemoryExplicitLayoutKHR:            return "WorkgroupMemoryExplicitLayoutKHR";
+    case (int)Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR:  return "WorkgroupMemoryExplicitLayout8BitAccessKHR";
+    case (int)Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR: return "WorkgroupMemoryExplicitLayout16BitAccessKHR";
    case (int)Capability::CoreBuiltinsARM:                             return "CoreBuiltinsARM";

    case (int)Capability::ShaderInvocationReorderNV:                return "ShaderInvocationReorderNV";
@@ -1111,7 +1114,9 @@ const char* CapabilityString(int info)
    case (int)Capability::TileShadingQCOM:                     return "TileShadingQCOM";
    case (int)Capability::TextureBlockMatch2QCOM:              return "TextureBlockMatch2QCOM";

-    case (int)Capability::ReplicatedCompositesEXT:             return "CapabilityReplicatedCompositesEXT";
+    case (int)Capability::CooperativeMatrixConversionQCOM:     return "CooperativeMatrixConversionQCOM";
+
+    case (int)Capability::ReplicatedCompositesEXT:             return "ReplicatedCompositesEXT";

    case (int)Capability::DotProductKHR:                       return "DotProductKHR";
    case (int)Capability::DotProductInputAllKHR:               return "DotProductInputAllKHR";
@@ -1123,9 +1128,12 @@ const char* CapabilityString(int info)
    case (int)Capability::RayTracingSpheresGeometryNV:             return "RayTracingSpheresGeometryNV";
    case (int)Capability::RayTracingLinearSweptSpheresGeometryNV:  return "RayTracingLinearSweptSpheresGeometryNV";

-    case (int)Capability::BFloat16TypeKHR:                     return "CapabilityBFloat16TypeKHR";
-    case (int)Capability::BFloat16DotProductKHR:               return "CapabilityBFloat16DotProductKHR";
-    case (int)Capability::BFloat16CooperativeMatrixKHR:        return "CapabilityBFloat16CooperativeMatrixKHR";
+    case (int)Capability::BFloat16TypeKHR:                     return "BFloat16TypeKHR";
+    case (int)Capability::BFloat16DotProductKHR:               return "BFloat16DotProductKHR";
+    case (int)Capability::BFloat16CooperativeMatrixKHR:        return "BFloat16CooperativeMatrixKHR";
+
+    case (int)Capability::Float8EXT:                           return "Float8EXT";
+    case (int)Capability::Float8CooperativeMatrixEXT:          return "Float8CooperativeMatrixEXT";

    default: return "Bad";
    }
@@ -1621,6 +1629,11 @@ const char* OpcodeString(int op)
    case (int)Op::OpTensorViewSetStrideNV:           return "OpTensorViewSetStrideNV";
    case (int)Op::OpTensorViewSetClipNV:             return "OpTensorViewSetClipNV";

+    case (int)Op::OpTypeTensorARM:                   return "OpTypeTensorARM";
+    case (int)Op::OpTensorReadARM:                   return "OpTensorReadARM";
+    case (int)Op::OpTensorWriteARM:                  return "OpTensorWriteARM";
+    case (int)Op::OpTensorQuerySizeARM:              return "OpTensorQuerySizeARM";
+
    case (int)Op::OpTypeCooperativeVectorNV:         return "OpTypeCooperativeVectorNV";
    case (int)Op::OpCooperativeVectorMatrixMulNV:    return "OpCooperativeVectorMatrixMulNV";
    case (int)Op::OpCooperativeVectorMatrixMulAddNV: return "OpCooperativeVectorMatrixMulAddNV";
@@ -1689,6 +1702,11 @@ const char* OpcodeString(int op)
    case (int)Op::OpImageBlockMatchGatherSSDQCOM:    return "OpImageBlockMatchGatherSSDQCOM";
    case (int)Op::OpImageBlockMatchGatherSADQCOM:    return "OpImageBlockMatchGatherSADQCOM";

+    case (int)Op::OpBitCastArrayQCOM:                return "OpBitCastArrayQCOM";
+    case (int)Op::OpCompositeConstructCoopMatQCOM:   return "OpCompositeConstructCoopMatQCOM";
+    case (int)Op::OpCompositeExtractCoopMatQCOM:     return "OpCompositeExtractCoopMatQCOM";
+    case (int)Op::OpExtractSubArrayQCOM:             return "OpExtractSubArrayQCOM";
+
    case (int)Op::OpConstantCompositeReplicateEXT: return "OpConstantCompositeReplicateEXT";
    case (int)Op::OpSpecConstantCompositeReplicateEXT: return "OpSpecConstantCompositeReplicateEXT";
    case (int)Op::OpCompositeConstructReplicateEXT: return "OpCompositeConstructReplicateEXT";
@@ -1828,6 +1846,10 @@ void Parameterize()
        InstructionDesc[enumCast(Op::OpCooperativeVectorOuterProductAccumulateNV)].setResultAndType(false, false);
        InstructionDesc[enumCast(Op::OpCooperativeVectorReduceSumAccumulateNV)].setResultAndType(false, false);

+        InstructionDesc[enumCast(Op::OpTypeTensorARM)].setResultAndType(true, false);
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].setResultAndType(true, true);
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].setResultAndType(false, false);
+
        // Specific additional context-dependent operands

        ExecutionModeOperands[enumCast(ExecutionMode::Invocations)].push(OperandLiteralNumber, "'Number of <<Invocation,invocations>>'");
@@ -3709,6 +3731,19 @@ void Parameterize()
        InstructionDesc[enumCast(Op::OpImageBlockMatchGatherSADQCOM)].operands.push(OperandImageOperands, "", true);
        InstructionDesc[enumCast(Op::OpImageBlockMatchGatherSADQCOM)].setResultAndType(true, true);

+        InstructionDesc[enumCast(Op::OpBitCastArrayQCOM)].operands.push(OperandId, "'source array'");
+        InstructionDesc[enumCast(Op::OpBitCastArrayQCOM)].setResultAndType(true, true);
+
+        InstructionDesc[enumCast(Op::OpCompositeConstructCoopMatQCOM)].operands.push(OperandId, "'source array'");
+        InstructionDesc[enumCast(Op::OpCompositeConstructCoopMatQCOM)].setResultAndType(true, true);
+
+        InstructionDesc[enumCast(Op::OpCompositeExtractCoopMatQCOM)].operands.push(OperandId, "'source cooperative matrix'");
+        InstructionDesc[enumCast(Op::OpCompositeExtractCoopMatQCOM)].setResultAndType(true, true);
+
+        InstructionDesc[enumCast(Op::OpExtractSubArrayQCOM)].operands.push(OperandId, "'source array'");
+        InstructionDesc[enumCast(Op::OpExtractSubArrayQCOM)].operands.push(OperandId, "'start index'");
+        InstructionDesc[enumCast(Op::OpExtractSubArrayQCOM)].setResultAndType(true, true);
+
        InstructionDesc[enumCast(Op::OpConstantCompositeReplicateEXT)].operands.push(OperandId, "'Value'");
        InstructionDesc[enumCast(Op::OpSpecConstantCompositeReplicateEXT)].operands.push(OperandId, "'Value'");
        InstructionDesc[enumCast(Op::OpCompositeConstructReplicateEXT)].operands.push(OperandId, "'Value'");
@@ -3800,6 +3835,24 @@ void Parameterize()
        InstructionDesc[enumCast(Op::OpSUDotAccSatKHR)].operands.push(OperandId, "'Vector2'");
        InstructionDesc[enumCast(Op::OpSUDotAccSatKHR)].operands.push(OperandId, "'Accumulator'");
        InstructionDesc[enumCast(Op::OpSUDotAccSatKHR)].operands.push(OperandLiteralNumber, "'PackedVectorFormat'");
+
+        InstructionDesc[enumCast(Op::OpTypeTensorARM)].operands.push(OperandId, "'Element Type'");
+        InstructionDesc[enumCast(Op::OpTypeTensorARM)].operands.push(OperandId, "'Rank'");
+
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandId, "'Tensor'");
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandId, "'Coordinate'");
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandLiteralNumber, "'Tensor Operand'", true);
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandVariableIds, "'Tensor Operands'");
+
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandId, "'Tensor'");
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandId, "'Coordinate'");
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandId, "'Object'");
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandLiteralNumber, "'Tensor Operand'", true);
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandVariableIds, "'Tensor Operands'");
+
+        InstructionDesc[enumCast(Op::OpTensorQuerySizeARM)].operands.push(OperandId, "'Tensor'");
+        InstructionDesc[enumCast(Op::OpTensorQuerySizeARM)].operands.push(OperandId, "'Dimension'", true);
+
    });
 }

--- a/3rdparty/glslang/SPIRV/hex_float.h
+++ b/3rdparty/glslang/SPIRV/hex_float.h
@@ -50,6 +50,52 @@ class Float16 {
  uint16_t val;
 };

+class FloatE5M2 {
+ public:
+  FloatE5M2(uint8_t v) : val(v) {}
+  FloatE5M2() {}
+  static bool isNan(const FloatE5M2& val) {
+    return ((val.val & 0x7C) == 0x7C) && ((val.val & 0x3) != 0);
+  }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(const FloatE5M2& val) {
+    return ((val.val & 0x7C) == 0x7C) && ((val.val & 0x3) == 0);
+  }
+  FloatE5M2(const FloatE5M2& other) { val = other.val; }
+  uint8_t get_value() const { return val; }
+
+  // Returns the maximum normal value.
+  static FloatE5M2 max() { return FloatE5M2(0x7B); }
+  // Returns the lowest normal value.
+  static FloatE5M2 lowest() { return FloatE5M2(0xFB); }
+
+ private:
+  uint8_t val;
+};
+
+class FloatE4M3 {
+ public:
+  FloatE4M3(uint8_t v) : val(v) {}
+  FloatE4M3() {}
+  static bool isNan(const FloatE4M3& val) {
+    return (val.val & 0x7F) == 0x7F;
+  }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(const FloatE4M3&) {
+    return false;
+  }
+  FloatE4M3(const FloatE4M3& other) { val = other.val; }
+  uint8_t get_value() const { return val; }
+
+  // Returns the maximum normal value.
+  static FloatE4M3 max() { return FloatE4M3(0x7E); }
+  // Returns the lowest normal value.
+  static FloatE4M3 lowest() { return FloatE4M3(0xFE); }
+
+ private:
+  uint8_t val;
+};
+
 // To specialize this type, you must override uint_type to define
 // an unsigned integer that can fit your floating point type.
 // You must also add a isNan function that returns true if
@@ -95,6 +141,30 @@ struct FloatProxyTraits<Float16> {
  static Float16 lowest() { return Float16::lowest(); }
 };

+template <>
+struct FloatProxyTraits<FloatE5M2> {
+  typedef uint8_t uint_type;
+  static bool isNan(FloatE5M2 f) { return FloatE5M2::isNan(f); }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(FloatE5M2 f) { return FloatE5M2::isInfinity(f); }
+  // Returns the maximum normal value.
+  static FloatE5M2 max() { return FloatE5M2::max(); }
+  // Returns the lowest normal value.
+  static FloatE5M2 lowest() { return FloatE5M2::lowest(); }
+};
+
+template <>
+struct FloatProxyTraits<FloatE4M3> {
+  typedef uint8_t uint_type;
+  static bool isNan(FloatE4M3 f) { return FloatE4M3::isNan(f); }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(FloatE4M3 f) { return FloatE4M3::isInfinity(f); }
+  // Returns the maximum normal value.
+  static FloatE4M3 max() { return FloatE4M3::max(); }
+  // Returns the lowest normal value.
+  static FloatE4M3 lowest() { return FloatE4M3::lowest(); }
+};
+
 // Since copying a floating point number (especially if it is NaN)
 // does not guarantee that bits are preserved, this class lets us
 // store the type and use it as a float when necessary.
@@ -182,6 +252,7 @@ struct HexFloatTraits {
  // The bias of the exponent. (How much we need to subtract from the stored
  // value to get the correct value.)
  static const uint32_t exponent_bias = 0;
+  static bool supportsInfinity() { return true; }
 };

 // Traits for IEEE float.
@@ -196,6 +267,7 @@ struct HexFloatTraits<FloatProxy<float>> {
  static const uint_type num_exponent_bits = 8;
  static const uint_type num_fraction_bits = 23;
  static const uint_type exponent_bias = 127;
+  static bool supportsInfinity() { return true; }
 };

 // Traits for IEEE double.
@@ -210,6 +282,7 @@ struct HexFloatTraits<FloatProxy<double>> {
  static const uint_type num_exponent_bits = 11;
  static const uint_type num_fraction_bits = 52;
  static const uint_type exponent_bias = 1023;
+  static bool supportsInfinity() { return true; }
 };

 // Traits for IEEE half.
@@ -224,6 +297,33 @@ struct HexFloatTraits<FloatProxy<Float16>> {
  static const uint_type num_exponent_bits = 5;
  static const uint_type num_fraction_bits = 10;
  static const uint_type exponent_bias = 15;
+  static bool supportsInfinity() { return true; }
+};
+
+template <>
+struct HexFloatTraits<FloatProxy<FloatE5M2>> {
+  typedef uint8_t uint_type;
+  typedef int8_t int_type;
+  typedef uint8_t underlying_type;
+  typedef uint8_t native_type;
+  static const uint_type num_used_bits = 8;
+  static const uint_type num_exponent_bits = 5;
+  static const uint_type num_fraction_bits = 2;
+  static const uint_type exponent_bias = 15;
+  static bool supportsInfinity() { return true; }
+};
+
+template <>
+struct HexFloatTraits<FloatProxy<FloatE4M3>> {
+  typedef uint8_t uint_type;
+  typedef int8_t int_type;
+  typedef uint8_t underlying_type;
+  typedef uint8_t native_type;
+  static const uint_type num_used_bits = 8;
+  static const uint_type num_exponent_bits = 4;
+  static const uint_type num_fraction_bits = 3;
+  static const uint_type exponent_bias = 7;
+  static bool supportsInfinity() { return false; }
 };

 enum round_direction {
@@ -243,6 +343,7 @@ class HexFloat {
  typedef typename Traits::int_type int_type;
  typedef typename Traits::underlying_type underlying_type;
  typedef typename Traits::native_type native_type;
+  using Traits_T = Traits;

  explicit HexFloat(T f) : value_(f) {}

@@ -584,14 +685,22 @@ class HexFloat {
        (getBits() & exponent_mask) == exponent_mask && significand != 0;
    bool is_inf =
        !is_nan &&
-        ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) ||
+        (((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) && other_T::Traits_T::supportsInfinity()) ||
+         ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias + 1) && !other_T::Traits_T::supportsInfinity()) ||
         (significand == 0 && (getBits() & exponent_mask) == exponent_mask));

    // If we are Nan or Inf we should pass that through.
    if (is_inf) {
+      if (other_T::Traits_T::supportsInfinity()) {
+        // encode as +/-inf
        other.set_value(BitwiseCast<typename other_T::underlying_type>(
            static_cast<typename other_T::uint_type>(
                (negate ? other_T::sign_mask : 0) | other_T::exponent_mask)));
+      } else {
+        // encode as +/-nan
+        other.set_value(BitwiseCast<typename other_T::underlying_type>(
+            static_cast<typename other_T::uint_type>(negate ? ~0 : ~other_T::sign_mask)));
+      }
      return;
    }
    if (is_nan) {
--- a/3rdparty/glslang/SPIRV/spirv.hpp11
+++ b/3rdparty/glslang/SPIRV/spirv.hpp11
@@ -547,6 +547,7 @@ enum class Decoration : unsigned {
    MaxByteOffset = 45,
    AlignmentId = 46,
    MaxByteOffsetId = 47,
+    SaturatedToLargestFloat8NormalConversionEXT = 4216,
    NoSignedWrap = 4469,
    NoUnsignedWrap = 4470,
    WeightTextureQCOM = 4487,
@@ -1069,7 +1070,10 @@ enum class Capability : unsigned {
    TileImageColorReadAccessEXT = 4166,
    TileImageDepthReadAccessEXT = 4167,
    TileImageStencilReadAccessEXT = 4168,
+    TensorsARM = 4174,
    CooperativeMatrixLayoutsARM = 4201,
+    Float8EXT = 4212,
+    Float8CooperativeMatrixEXT = 4213,
    FragmentShadingRateKHR = 4422,
    SubgroupBallotKHR = 4423,
    DrawParameters = 4427,
@@ -1106,6 +1110,7 @@ enum class Capability : unsigned {
    TextureBoxFilterQCOM = 4485,
    TextureBlockMatchQCOM = 4486,
    TileShadingQCOM = 4495,
+    CooperativeMatrixConversionQCOM = 4496,
    TextureBlockMatch2QCOM = 4498,
    Float16ImageAMD = 5008,
    ImageGatherBiasLodAMD = 5009,
@@ -1457,6 +1462,18 @@ enum class TensorAddressingOperandsMask : unsigned {
    DecodeFunc = 0x00000002,
 };

+enum class TensorOperandsShift : unsigned {
+    NontemporalARM = 0,
+    OutOfBoundsValueARM = 1,
+    Max = 0x7fffffff,
+};
+
+enum class TensorOperandsMask : unsigned {
+    MaskNone = 0,
+    NontemporalARM = 0x00000001,
+    OutOfBoundsValueARM = 0x00000002,
+};
+
 enum class InitializationModeQualifier : unsigned {
    InitOnDeviceReprogramINTEL = 0,
    InitOnDeviceResetINTEL = 1,
@@ -1543,6 +1560,8 @@ enum class RawAccessChainOperandsMask : unsigned {

 enum class FPEncoding : unsigned {
    BFloat16KHR = 0,
+    Float8E4M3EXT = 4214,
+    Float8E5M2EXT = 4215,
    Max = 0x7fffffff,
 };

@@ -1921,6 +1940,10 @@ enum class Op : unsigned {
    OpColorAttachmentReadEXT = 4160,
    OpDepthAttachmentReadEXT = 4161,
    OpStencilAttachmentReadEXT = 4162,
+    OpTypeTensorARM = 4163,
+    OpTensorReadARM = 4164,
+    OpTensorWriteARM = 4165,
+    OpTensorQuerySizeARM = 4166,
    OpTerminateInvocation = 4416,
    OpTypeUntypedPointerKHR = 4417,
    OpUntypedVariableKHR = 4418,
@@ -1974,10 +1997,14 @@ enum class Op : unsigned {
    OpImageBoxFilterQCOM = 4481,
    OpImageBlockMatchSSDQCOM = 4482,
    OpImageBlockMatchSADQCOM = 4483,
+    OpBitCastArrayQCOM = 4497,
    OpImageBlockMatchWindowSSDQCOM = 4500,
    OpImageBlockMatchWindowSADQCOM = 4501,
    OpImageBlockMatchGatherSSDQCOM = 4502,
    OpImageBlockMatchGatherSADQCOM = 4503,
+    OpCompositeConstructCoopMatQCOM = 4540,
+    OpCompositeExtractCoopMatQCOM = 4541,
+    OpExtractSubArrayQCOM = 4542,
    OpGroupIAddNonUniformAMD = 5000,
    OpGroupFAddNonUniformAMD = 5001,
    OpGroupFMinNonUniformAMD = 5002,
@@ -2730,6 +2757,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
    case Op::OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
    case Op::OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
    case Op::OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+    case Op::OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
+    case Op::OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
+    case Op::OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
    case Op::OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
    case Op::OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
    case Op::OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
@@ -2777,10 +2808,14 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
    case Op::OpImageBoxFilterQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpImageBlockMatchSSDQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpImageBlockMatchSADQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpBitCastArrayQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpImageBlockMatchWindowSSDQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpImageBlockMatchWindowSADQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpImageBlockMatchGatherSSDQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpImageBlockMatchGatherSADQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpCompositeConstructCoopMatQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpCompositeExtractCoopMatQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpExtractSubArrayQCOM: *hasResult = true; *hasResultType = true; break;
    case Op::OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
    case Op::OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
    case Op::OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
@@ -3596,6 +3631,7 @@ inline const char* DecorationToString(Decoration value) {
    case Decoration::MaxByteOffset: return "MaxByteOffset";
    case Decoration::AlignmentId: return "AlignmentId";
    case Decoration::MaxByteOffsetId: return "MaxByteOffsetId";
+    case Decoration::SaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
    case Decoration::NoSignedWrap: return "NoSignedWrap";
    case Decoration::NoUnsignedWrap: return "NoUnsignedWrap";
    case Decoration::WeightTextureQCOM: return "WeightTextureQCOM";
@@ -3938,7 +3974,10 @@ inline const char* CapabilityToString(Capability value) {
    case Capability::TileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
    case Capability::TileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
    case Capability::TileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
+    case Capability::TensorsARM: return "TensorsARM";
    case Capability::CooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
+    case Capability::Float8EXT: return "Float8EXT";
+    case Capability::Float8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
    case Capability::FragmentShadingRateKHR: return "FragmentShadingRateKHR";
    case Capability::SubgroupBallotKHR: return "SubgroupBallotKHR";
    case Capability::DrawParameters: return "DrawParameters";
@@ -3973,6 +4012,7 @@ inline const char* CapabilityToString(Capability value) {
    case Capability::TextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
    case Capability::TextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
    case Capability::TileShadingQCOM: return "TileShadingQCOM";
+    case Capability::CooperativeMatrixConversionQCOM: return "CooperativeMatrixConversionQCOM";
    case Capability::TextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
    case Capability::Float16ImageAMD: return "Float16ImageAMD";
    case Capability::ImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
@@ -4277,6 +4317,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg
 inline const char* FPEncodingToString(FPEncoding value) {
    switch (value) {
    case FPEncoding::BFloat16KHR: return "BFloat16KHR";
+    case FPEncoding::Float8E4M3EXT: return "Float8E4M3EXT";
+    case FPEncoding::Float8E5M2EXT: return "Float8E5M2EXT";
    default: return "Unknown";
    }
 }
@@ -4661,6 +4703,10 @@ inline const char* OpToString(Op value) {
    case Op::OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
    case Op::OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
    case Op::OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
+    case Op::OpTypeTensorARM: return "OpTypeTensorARM";
+    case Op::OpTensorReadARM: return "OpTensorReadARM";
+    case Op::OpTensorWriteARM: return "OpTensorWriteARM";
+    case Op::OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
    case Op::OpTerminateInvocation: return "OpTerminateInvocation";
    case Op::OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
    case Op::OpUntypedVariableKHR: return "OpUntypedVariableKHR";
@@ -4708,10 +4754,14 @@ inline const char* OpToString(Op value) {
    case Op::OpImageBoxFilterQCOM: return "OpImageBoxFilterQCOM";
    case Op::OpImageBlockMatchSSDQCOM: return "OpImageBlockMatchSSDQCOM";
    case Op::OpImageBlockMatchSADQCOM: return "OpImageBlockMatchSADQCOM";
+    case Op::OpBitCastArrayQCOM: return "OpBitCastArrayQCOM";
    case Op::OpImageBlockMatchWindowSSDQCOM: return "OpImageBlockMatchWindowSSDQCOM";
    case Op::OpImageBlockMatchWindowSADQCOM: return "OpImageBlockMatchWindowSADQCOM";
    case Op::OpImageBlockMatchGatherSSDQCOM: return "OpImageBlockMatchGatherSSDQCOM";
    case Op::OpImageBlockMatchGatherSADQCOM: return "OpImageBlockMatchGatherSADQCOM";
+    case Op::OpCompositeConstructCoopMatQCOM: return "OpCompositeConstructCoopMatQCOM";
+    case Op::OpCompositeExtractCoopMatQCOM: return "OpCompositeExtractCoopMatQCOM";
+    case Op::OpExtractSubArrayQCOM: return "OpExtractSubArrayQCOM";
    case Op::OpGroupIAddNonUniformAMD: return "OpGroupIAddNonUniformAMD";
    case Op::OpGroupFAddNonUniformAMD: return "OpGroupFAddNonUniformAMD";
    case Op::OpGroupFMinNonUniformAMD: return "OpGroupFMinNonUniformAMD";
@@ -5161,6 +5211,10 @@ constexpr TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a,
 constexpr TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
 constexpr TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
 constexpr TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
+constexpr TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
+constexpr TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
+constexpr TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
+constexpr TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
 constexpr MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
 constexpr MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
 constexpr MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }
--- a/3rdparty/glslang/build_info.h
+++ b/3rdparty/glslang/build_info.h
@@ -35,7 +35,7 @@
 #define GLSLANG_BUILD_INFO

 #define GLSLANG_VERSION_MAJOR 15
-#define GLSLANG_VERSION_MINOR 3
+#define GLSLANG_VERSION_MINOR 4
 #define GLSLANG_VERSION_PATCH 0
 #define GLSLANG_VERSION_FLAVOR ""

--- a/3rdparty/glslang/glslang/Include/BaseTypes.h
+++ b/3rdparty/glslang/glslang/Include/BaseTypes.h
@@ -50,6 +50,8 @@ enum TBasicType {
    EbtDouble,
    EbtFloat16,
    EbtBFloat16,
+    EbtFloatE5M2,
+    EbtFloatE4M3,
    EbtInt8,
    EbtUint8,
    EbtInt16,
@@ -72,6 +74,7 @@ enum TBasicType {
    EbtTensorLayoutNV,
    EbtTensorViewNV,
    EbtCoopvecNV,
+    EbtTensorARM,
    // SPIR-V type defined by spirv_type
    EbtSpirvType,

@@ -609,6 +612,8 @@ __inline bool isTypeFloat(TBasicType type)
    case EbtDouble:
    case EbtFloat16:
    case EbtBFloat16:
+    case EbtFloatE5M2:
+    case EbtFloatE4M3:
        return true;
    default:
        return false;
@@ -620,6 +625,8 @@ __inline uint32_t GetNumBits(TBasicType type)
    switch (type) {
    case EbtInt8:
    case EbtUint8:
+    case EbtFloatE5M2:
+    case EbtFloatE4M3:
        return 8;
    case EbtBFloat16:
    case EbtFloat16:
--- a/3rdparty/glslang/glslang/Include/ConstantUnion.h
+++ b/3rdparty/glslang/glslang/Include/ConstantUnion.h
@@ -899,6 +899,17 @@ public:
        unionArray = new TConstUnionVector(size, val);
    }

+    TConstUnionArray* clone() const
+    {
+        TConstUnionArray *copy = new TConstUnionArray(size());
+        if (unionArray) {
+            for (const auto i : *unionArray) {
+                copy->unionArray->push_back(i);
+            }
+        }
+        return copy;
+    }
+
    int size() const { return unionArray ? (int)unionArray->size() : 0; }
    TConstUnion& operator[](size_t index) { return (*unionArray)[index]; }
    const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; }
--- a/3rdparty/glslang/glslang/Include/Types.h
+++ b/3rdparty/glslang/glslang/Include/Types.h
@@ -252,6 +252,8 @@ struct TSampler {   // misnomer now; includes images, textures without sampler,
        case EbtUint:   s.append("u");   break;
        case EbtFloat16: s.append("f16"); break;
        case EbtBFloat16: s.append("bf16"); break;
+        case EbtFloatE5M2: s.append("fe5m2"); break;
+        case EbtFloatE4M3: s.append("fe4m3"); break;
        case EbtInt8:   s.append("i8");  break;
        case EbtUint16: s.append("u8");  break;
        case EbtInt16:  s.append("i16"); break;
@@ -1500,6 +1502,7 @@ public:
    bool coopmatKHR : 1;
    bool coopvecNV  : 1;
    bool tileAttachmentQCOM: 1;
+    uint32_t tensorRankARM : 4;
    TArraySizes* arraySizes;
    const TType* userDef;
    TSourceLoc loc;
@@ -1511,7 +1514,8 @@ public:
    bool isCoopmatNV() const { return coopmatNV; }
    bool isCoopmatKHR() const { return coopmatKHR; }
    bool isCoopvecNV() const { return coopvecNV; }
-    bool isCoopmatOrvec() const { return isCoopmat() || isCoopvecNV(); }
+    bool isTensorARM() const { return tensorRankARM; }
+    bool hasTypeParameter() const { return isCoopmat() || isCoopvecNV() || isTensorARM(); }

    bool isTensorLayoutNV() const { return basicType == EbtTensorLayoutNV; }
    bool isTensorViewNV() const { return basicType == EbtTensorViewNV; }
@@ -1530,6 +1534,7 @@ public:
        coopmatKHR = false;
        coopvecNV = false;
        tileAttachmentQCOM = false;
+        tensorRankARM = 0;
        spirvType = nullptr;
    }

@@ -1590,7 +1595,7 @@ public:
    explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0,
                   bool isVector = false) :
                            basicType(t), vectorSize(static_cast<uint32_t>(vs) & 0b1111), matrixCols(static_cast<uint32_t>(mc) & 0b1111), matrixRows(static_cast<uint32_t>(mr) & 0b1111), vector1(isVector && vs == 1), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
                            spirvType(nullptr)
                            {
                                assert(vs >= 0);
@@ -1606,7 +1611,7 @@ public:
    TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0,
          bool isVector = false) :
                            basicType(t), vectorSize(static_cast<uint32_t>(vs) & 0b1111), matrixCols(static_cast<uint32_t>(mc) & 0b1111), matrixRows(static_cast<uint32_t>(mr) & 0b1111), vector1(isVector && vs == 1), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
                            spirvType(nullptr)
                            {
                                assert(vs >= 0);
@@ -1624,7 +1629,7 @@ public:
    explicit TType(const TPublicType& p) :
                            basicType(p.basicType),
                            vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmatNV(p.coopmatNV), coopmatKHR(p.coopmatKHR), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(p.coopvecNV),
-                            tileAttachmentQCOM(p.tileAttachmentQCOM), arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters),
+                            tileAttachmentQCOM(p.tileAttachmentQCOM), tensorRankARM(p.tensorRankARM), arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters),
                            spirvType(p.spirvType)
                            {
                                if (basicType == EbtSampler)
@@ -1677,11 +1682,17 @@ public:
                                if (p.isCoopvecNV() && p.typeParameters) {
                                    basicType = p.typeParameters->basicType;
                                }
+                                if (p.isTensorARM() && p.typeParameters) {
+                                    basicType = p.typeParameters->basicType;
+                                    if (p.typeParameters->arraySizes->getNumDims() > 0) {
+                                        tensorRankARM = static_cast<uint32_t>(p.typeParameters->arraySizes->getDimSize(0)) & 0b1111;
+                                    }
+                                }
                            }
    // for construction of sampler types
    TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) :
        basicType(EbtSampler), vectorSize(1u), matrixCols(0u), matrixRows(0u), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-        tileAttachmentQCOM(false), arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr),
+        tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr),
        sampler(sampler), typeParameters(nullptr), spirvType(nullptr)
    {
        qualifier.clear();
@@ -1739,7 +1750,7 @@ public:
    // for making structures, ...
    TType(TTypeList* userDef, const TString& n) :
                            basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
                            spirvType(nullptr)
                            {
                                sampler.clear();
@@ -1749,7 +1760,7 @@ public:
    // For interface blocks
    TType(TTypeList* userDef, const TString& n, const TQualifier& q) :
                            basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
                            spirvType(nullptr)
                            {
                                sampler.clear();
@@ -1758,7 +1769,7 @@ public:
    // for block reference (first parameter must be EbtReference)
    explicit TType(TBasicType t, const TType &p, const TString& n) :
                            basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
                            spirvType(nullptr)
                            {
                                assert(t == EbtReference);
@@ -1798,6 +1809,7 @@ public:
        coopmatKHRUseValid = copyOf.coopmatKHRUseValid;
        coopvecNV = copyOf.isCoopVecNV();
        tileAttachmentQCOM = copyOf.tileAttachmentQCOM;
+        tensorRankARM = copyOf.tensorRankARM;
    }

    // Make complete copy of the whole type graph rooted at 'copyOf'.
@@ -1837,6 +1849,7 @@ public:
        return *typeName;
    }

+    virtual bool hasFieldName() const { return (fieldName != nullptr); }
    virtual const TString& getFieldName() const
    {
        assert(fieldName);
@@ -1895,7 +1908,8 @@ public:
    virtual void updateImplicitArraySize(int size) { assert(isArray()); arraySizes->updateImplicitSize(size); }
    virtual void setImplicitlySized(bool isImplicitSized) { arraySizes->setImplicitlySized(isImplicitSized); }
    virtual bool isStruct() const { return basicType == EbtStruct || basicType == EbtBlock; }
-    virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16 || basicType == EbtBFloat16; }
+    virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16 ||
+                                                   basicType == EbtBFloat16 || basicType == EbtFloatE5M2 || basicType == EbtFloatE4M3; }
    virtual bool isIntegerDomain() const
    {
        switch (basicType) {
@@ -1916,7 +1930,9 @@ public:
    }
    virtual bool isOpaque() const { return basicType == EbtSampler
            || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery
-            || basicType == EbtHitObjectNV || isTileAttachmentQCOM(); }
+            || basicType == EbtHitObjectNV || isTileAttachmentQCOM()
+            || isTensorARM();
+    }
    virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; }

    virtual bool isAttachmentEXT() const { return basicType == EbtSampler && getSampler().isAttachmentEXT(); }
@@ -1933,8 +1949,10 @@ public:
    bool isCoopMatNV() const { return coopmatNV; }
    bool isCoopMatKHR() const { return coopmatKHR; }
    bool isCoopVecNV() const { return coopvecNV; }
-    bool isCoopMatOrVec() const { return isCoopMat() || isCoopVecNV(); }
    bool isTileAttachmentQCOM() const { return tileAttachmentQCOM; }
+    bool isTensorARM() const { return tensorRankARM; }
+    bool hasTypeParameter() const { return isCoopMat() || isCoopVecNV() || isTensorARM(); }
+    int getTensorRankARM() const { return static_cast<int>(tensorRankARM); }
    bool isReference() const { return getBasicType() == EbtReference; }
    bool isSpirvType() const { return getBasicType() == EbtSpirvType; }
    int getCoopMatKHRuse() const { return static_cast<int>(coopmatKHRuse); }
@@ -1996,6 +2014,11 @@ public:

    virtual bool containsNonOpaque() const
    {
+        if (isTensorARM()) {
+            // Tensors have a numerical basicType even though it is Opaque
+            return false;
+        }
+
        const auto nonOpaque = [](const TType* t) {
            switch (t->basicType) {
            case EbtVoid:
@@ -2003,6 +2026,8 @@ public:
            case EbtDouble:
            case EbtFloat16:
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
            case EbtInt8:
            case EbtUint8:
            case EbtInt16:
@@ -2039,6 +2064,10 @@ public:
    {
        return containsBasicType(EbtBFloat16);
    }
+    bool contains8BitFloat() const
+    {
+        return containsBasicType(EbtFloatE5M2) || containsBasicType(EbtFloatE4M3);
+    }
    bool contains64BitInt() const
    {
        return containsBasicType(EbtInt64) || containsBasicType(EbtUint64);
@@ -2161,6 +2190,8 @@ public:
        case EbtDouble:            return "double";
        case EbtFloat16:           return "float16_t";
        case EbtBFloat16:          return "bfloat16_t";
+        case EbtFloatE5M2:         return "floate5m2_t";
+        case EbtFloatE4M3:         return "floate4m3_t";
        case EbtInt8:              return "int8_t";
        case EbtUint8:             return "uint8_t";
        case EbtInt16:             return "int16_t";
@@ -2180,6 +2211,7 @@ public:
        case EbtTensorLayoutNV:    return "tensorLayoutNV";
        case EbtTensorViewNV:      return "tensorViewNV";
        case EbtCoopvecNV:         return "coopvecNV";
+        case EbtTensorARM:         return "tensorARM";
        default:                   return "unknown type";
        }
    }
@@ -2792,6 +2824,7 @@ public:
              isCoopMatNV() == right.isCoopMatNV() &&
              isCoopMatKHR() == right.isCoopMatKHR() &&
              isCoopVecNV() == right.isCoopVecNV() &&
+               isTensorARM() == right.isTensorARM() &&
               sameStructType(right, lpidx, rpidx) &&
               sameReferenceType(right);
    }
@@ -2839,8 +2872,8 @@ public:
            else
                rv = false;
        } else if (isCoopMatKHR() && right.isCoopMatKHR()) {
-            if (getBasicType() == EbtFloat || getBasicType() == EbtFloat16 || getBasicType() == EbtBFloat16)
-                rv = right.getBasicType() == EbtFloat || right.getBasicType() == EbtFloat16 || right.getBasicType() == EbtBFloat16 || right.getBasicType() == EbtCoopmat;
+            if (isFloatingDomain())
+                rv = right.isFloatingDomain() || right.getBasicType() == EbtCoopmat;
            else if (getBasicType() == EbtUint || getBasicType() == EbtUint8 || getBasicType() == EbtUint16)
                rv = right.getBasicType() == EbtUint || right.getBasicType() == EbtUint8 || right.getBasicType() == EbtUint16 || right.getBasicType() == EbtCoopmat;
            else if (getBasicType() == EbtInt || getBasicType() == EbtInt8 || getBasicType() == EbtInt16)
@@ -2859,9 +2892,18 @@ public:
        if (isTensorViewNV()) {
            return right.isTensorViewNV() && right.typeParameters == nullptr && typeParameters != nullptr;
        }
+        if (isTensorARM()) {
+            return right.isTensorARM() && right.typeParameters == nullptr && typeParameters != nullptr;
+        }
+
        return false;
    }

+    bool sameTensorBaseTypeARM(const TType &right) const {
+        return (typeParameters == nullptr || right.typeParameters == nullptr ||
+                (tensorRankARM == right.tensorRankARM && getBasicType() == right.getBasicType()));
+    }
+
    bool sameCoopVecBaseType(const TType &right) const {
        bool rv = false;

@@ -3009,6 +3051,7 @@ protected:
    bool coopmatKHRUseValid   : 1;  // True if coopmatKHRuse has been set
    bool coopvecNV       : 1;
    bool tileAttachmentQCOM : 1;
+    uint32_t tensorRankARM       : 4;  // 0 means not a tensor; non-zero indicates the tensor rank.
    TQualifier qualifier;

    TArraySizes* arraySizes;    // nullptr unless an array; can be shared across types
--- a/3rdparty/glslang/glslang/Include/intermediate.h
+++ b/3rdparty/glslang/glslang/Include/intermediate.h
@@ -479,6 +479,10 @@ enum TOperator {
    EOpCooperativeVectorOuterProductAccumulateNV,
    EOpCooperativeVectorReduceSumAccumulateNV,

+    EOpTensorReadARM,
+    EOpTensorWriteARM,
+    EOpTensorSizeARM,
+
    EOpBeginInvocationInterlock, // Fragment only
    EOpEndInvocationInterlock, // Fragment only

@@ -615,6 +619,14 @@ enum TOperator {
    EOpConstructBF16Vec2,
    EOpConstructBF16Vec3,
    EOpConstructBF16Vec4,
+    EOpConstructFloatE5M2,
+    EOpConstructFloatE5M2Vec2,
+    EOpConstructFloatE5M2Vec3,
+    EOpConstructFloatE5M2Vec4,
+    EOpConstructFloatE4M3,
+    EOpConstructFloatE4M3Vec2,
+    EOpConstructFloatE4M3Vec3,
+    EOpConstructFloatE4M3Vec4,
    EOpConstructStruct,
    EOpConstructTextureSampler,
    EOpConstructNonuniform,     // expected to be transformed away, not present in final AST
@@ -623,6 +635,7 @@ enum TOperator {
    EOpConstructCooperativeMatrixKHR,
    EOpConstructCooperativeVectorNV,
    EOpConstructAccStruct,
+    EOpConstructSaturated,
    EOpConstructGuardEnd,

    //
@@ -972,6 +985,12 @@ enum TOperator {
    EOpImageBlockMatchGatherSSDQCOM,
    EOpImageBlockMatchGatherSADQCOM,

+    // Cooperative Matrix Conversion
+    EOpBitCastArrayQCOM,
+    EOpExtractSubArrayQCOM,
+    EOpCompositeConstructCoopMatQCOM,
+    EOpCompositeExtractCoopMatQCOM,
+
    // GL_NV_cluster_acceleration_structure
    EOpRayQueryGetIntersectionClusterIdNV,
    EOpHitObjectGetClusterIdNV,
@@ -1095,6 +1114,7 @@ public:
    virtual int getVectorSize() const { return type.getVectorSize(); }
    virtual int getMatrixCols() const { return type.getMatrixCols(); }
    virtual int getMatrixRows() const { return type.getMatrixRows(); }
+    virtual int getTensorRankARM() const { return type.getTensorRankARM(); }
    virtual bool isMatrix() const { return type.isMatrix(); }
    virtual bool isArray()  const { return type.isArray(); }
    virtual bool isVector() const { return type.isVector(); }
--- a/3rdparty/glslang/glslang/MachineIndependent/Constant.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Constant.cpp
@@ -152,6 +152,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* right
            case EbtFloat:
            case EbtFloat16:
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
                if (rightUnionArray[i].getDConst() != 0.0)
                    newConstArray[i].setDConst(leftUnionArray[i].getDConst() / rightUnionArray[i].getDConst());
                else if (leftUnionArray[i].getDConst() > 0.0)
@@ -505,6 +507,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
            case EbtDouble:
            case EbtFloat16:
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
            case EbtFloat:
                valf = unionArray[i].getDConst();
                srcType = CONV_FLOAT;
@@ -554,6 +558,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
            case EbtDouble:
            case EbtFloat16:
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
            case EbtFloat:
                dstType = CONV_FLOAT;
                break;
@@ -625,6 +631,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
            case EbtDouble:
            case EbtFloat16:
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
            case EbtFloat:
                newConstArray[i].setDConst(valf); break;
            case EbtInt8:
@@ -657,6 +665,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
            case EbtDouble:
            case EbtFloat16:
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
            case EbtFloat: newConstArray[i].setDConst(-unionArray[i].getDConst()); break;
            // Note: avoid UBSAN error regarding negating 0x80000000
            case EbtInt:   newConstArray[i].setIConst(
@@ -950,6 +960,8 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
                switch(children[0]->getAsTyped()->getBasicType()) {
                case EbtFloat16:
                case EbtBFloat16:
+                case EbtFloatE5M2:
+                case EbtFloatE4M3:
                case EbtFloat:
                case EbtDouble:
                    newConstArray[comp].setDConst(std::min(childConstUnions[0][arg0comp].getDConst(), childConstUnions[1][arg1comp].getDConst()));
@@ -985,6 +997,8 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
                switch(children[0]->getAsTyped()->getBasicType()) {
                case EbtFloat16:
                case EbtBFloat16:
+                case EbtFloatE5M2:
+                case EbtFloatE4M3:
                case EbtFloat:
                case EbtDouble:
                    newConstArray[comp].setDConst(std::max(childConstUnions[0][arg0comp].getDConst(), childConstUnions[1][arg1comp].getDConst()));
@@ -1020,6 +1034,8 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
                switch(children[0]->getAsTyped()->getBasicType()) {
                case EbtFloat16:
                case EbtBFloat16:
+                case EbtFloatE5M2:
+                case EbtFloatE4M3:
                case EbtFloat:
                case EbtDouble:
                    newConstArray[comp].setDConst(std::min(std::max(childConstUnions[0][arg0comp].getDConst(), childConstUnions[1][arg1comp].getDConst()),
--- a/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp
@@ -4025,6 +4025,47 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
            "bf16vec3   uintBitsToBFloat16EXT(u16vec3 value);"
            "bf16vec4   uintBitsToBFloat16EXT(u16vec4 value);"

+            "int8_t  floate5m2BitsToIntEXT(floate5m2_t value);"
+            "i8vec2  floate5m2BitsToIntEXT(fe5m2vec2 value);"
+            "i8vec3  floate5m2BitsToIntEXT(fe5m2vec3 value);"
+            "i8vec4  floate5m2BitsToIntEXT(fe5m2vec4 value);"
+
+            "uint8_t floate5m2BitsToUintEXT(floate5m2_t value);"
+            "u8vec2  floate5m2BitsToUintEXT(fe5m2vec2 value);"
+            "u8vec3  floate5m2BitsToUintEXT(fe5m2vec3 value);"
+            "u8vec4  floate5m2BitsToUintEXT(fe5m2vec4 value);"
+
+            "floate5m2_t intBitsToFloate5m2EXT(int8_t value);"
+            "fe5m2vec2   intBitsToFloate5m2EXT(i8vec2 value);"
+            "fe5m2vec3   intBitsToFloate5m2EXT(i8vec3 value);"
+            "fe5m2vec4   intBitsToFloate5m2EXT(i8vec4 value);"
+
+            "floate5m2_t uintBitsToFloate5m2EXT(uint8_t value);"
+            "fe5m2vec2   uintBitsToFloate5m2EXT(u8vec2 value);"
+            "fe5m2vec3   uintBitsToFloate5m2EXT(u8vec3 value);"
+            "fe5m2vec4   uintBitsToFloate5m2EXT(u8vec4 value);"
+
+            "int8_t  floate4m3BitsToIntEXT(floate4m3_t value);"
+            "i8vec2  floate4m3BitsToIntEXT(fe4m3vec2 value);"
+            "i8vec3  floate4m3BitsToIntEXT(fe4m3vec3 value);"
+            "i8vec4  floate4m3BitsToIntEXT(fe4m3vec4 value);"
+
+            "uint8_t floate4m3BitsToUintEXT(floate4m3_t value);"
+            "u8vec2  floate4m3BitsToUintEXT(fe4m3vec2 value);"
+            "u8vec3  floate4m3BitsToUintEXT(fe4m3vec3 value);"
+            "u8vec4  floate4m3BitsToUintEXT(fe4m3vec4 value);"
+
+            "floate4m3_t intBitsToFloate4m3EXT(int8_t value);"
+            "fe4m3vec2   intBitsToFloate4m3EXT(i8vec2 value);"
+            "fe4m3vec3   intBitsToFloate4m3EXT(i8vec3 value);"
+            "fe4m3vec4   intBitsToFloate4m3EXT(i8vec4 value);"
+
+            "floate4m3_t uintBitsToFloate4m3EXT(uint8_t value);"
+            "fe4m3vec2   uintBitsToFloate4m3EXT(u8vec2 value);"
+            "fe4m3vec3   uintBitsToFloate4m3EXT(u8vec3 value);"
+            "fe4m3vec4   uintBitsToFloate4m3EXT(u8vec4 value);"
+
+            "void saturatedConvertEXT();"
            "\n");
    }

@@ -4777,6 +4818,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
                "float", "vec2", "vec4",
                "float16_t", "f16vec2", "f16vec4",
                "bfloat16_t", "bf16vec2", "bf16vec4",
+                "floate5m2_t", "fe5m2vec2", "fe5m2vec4",
+                "floate4m3_t", "fe4m3vec2", "fe4m3vec4",
                "double", "dvec2", "dvec4",
                "int8_t", "i8vec2", "i8vec4",
                "int16_t", "i16vec2", "i16vec4",
@@ -4841,6 +4884,31 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
            "\n"
            );

+        {
+          std::stringstream coopMatConvFuncs;
+
+          const std::string eltTypes[] = {"uint32_t", "uint", "int32_t", "int", "float32_t", "float", "float16_t"};
+
+          for (auto srcEltTy : eltTypes) {
+            for (auto dstEltTy : eltTypes) {
+              coopMatConvFuncs << "void bitcastQCOM(" << srcEltTy.c_str() << " SrcArr[], " << dstEltTy.c_str()
+                << " DstArr[]);\n";
+            }
+          }
+          coopMatConvFuncs << "\n";
+
+          for (auto eltTy : {"float32_t", "float16_t", "int8_t", "uint8_t", "uint32_t", "uint", "int32_t", "int"}) {
+            coopMatConvFuncs << "void vectorToCoopmatQCOM(" << eltTy << " SrcVec[], coopmat CM);\n";
+            coopMatConvFuncs << "void coopmatToVectorQCOM(coopmat CM, " << eltTy << " Dstvec[]);\n";
+          }
+
+          for (auto eltTy : {"uint32_t", "uint", "int32_t", "int", "float32_t", "float", "float16_t"}) {
+            coopMatConvFuncs << "void extractSubArrayQCOM(" << eltTy << " arr[], uint index, " << eltTy << " subarr[]);\n";
+          }
+
+          commonBuiltins.append(coopMatConvFuncs.str().c_str());
+        }
+
        commonBuiltins.append(
            "tensorLayoutNV createTensorLayoutNV(uint Dim);\n"
            "tensorLayoutNV createTensorLayoutNV(uint Dim, uint Mode);\n"
@@ -4894,6 +4962,29 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
            "tensorViewNV setTensorViewClipNV(tensorViewNV v, uint clipRowOffset, uint clipRowSpan, uint clipColOffset, uint clipColSpan);\n"
            "\n"
        );
+
+        // GL_ARM_tensors builtins.
+        static const char *tensorDataTypesARM[] = {
+            "bool",
+            "int8_t", "int16_t", "int32_t", "int64_t",
+            "uint8_t", "uint16_t", "uint32_t", "uint64_t",
+            "float16_t", "float32_t", "float64_t",
+        };
+        std::ostringstream ostream;
+        for (auto t : tensorDataTypesARM) {
+            // Scalar
+            ostream << "void tensorReadARM(readonly tensorARM t, uint coords[], out "
+                    << t << " data, uint tensorOperands = 0U, ...);\n";
+            ostream << "void tensorWriteARM(writeonly tensorARM t, uint coords[], "
+                    << t << " data, uint tensorOperands = 0U, ...);\n";
+            // Array
+            ostream << "void tensorReadARM(readonly tensorARM t, uint coords[], "
+                    << t << " data[], uint tensorOperands = 0U, ...);\n";
+            ostream << "void tensorWriteARM(writeonly tensorARM t, uint coords[], "
+                    << t << " data[], uint tensorOperands = 0U, ...);\n";
+        }
+        ostream << "uint tensorSizeARM(readonly writeonly tensorARM t, uint dim);\n";
+        commonBuiltins.append(ostream.str());
    }

    if (profile != EEsProfile && version >= 450) {
@@ -8285,6 +8376,12 @@ void TBuiltIns::initialize(const TBuiltInResource &resources, int version, EProf
        snprintf(builtInConstant, maxSize, "const int gl_MaxComputeTextureImageUnits = %d;", resources.maxComputeTextureImageUnits);
        s.append(builtInConstant);

+        // GL_ARM_tensors operands.
+        snprintf(builtInConstant, maxSize, "const uint gl_TensorOperandsNonTemporalARM = 0x1U;");
+        s.append(builtInConstant);
+        snprintf(builtInConstant, maxSize, "const uint gl_TensorOperandsOutOfBoundsValueARM = 0x2U;");
+        s.append(builtInConstant);
+
        s.append("\n");
    }

@@ -9706,6 +9803,12 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
            symbolTable.setFunctionExtensions("setTensorViewClipNV",            1, &E_GL_NV_cooperative_matrix2);
        }

+        {
+            symbolTable.setFunctionExtensions("tensorReadARM",   1, &E_GL_ARM_tensors);
+            symbolTable.setFunctionExtensions("tensorWriteARM",  1, &E_GL_ARM_tensors);
+            symbolTable.setFunctionExtensions("tensorSizeARM",   1, &E_GL_ARM_tensors);
+        }
+
        {
            symbolTable.setFunctionExtensions("coopVecMatMulNV",                    1, &E_GL_NV_cooperative_vector);
            symbolTable.setFunctionExtensions("coopVecMatMulAddNV",                 1, &E_GL_NV_cooperative_vector);
@@ -9713,6 +9816,13 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
            symbolTable.setFunctionExtensions("coopVecReduceSumAccumulateNV",       1, &E_GL_NV_cooperative_vector);
        }

+        {
+          symbolTable.setFunctionExtensions("bitcastQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+          symbolTable.setFunctionExtensions("extractSubArrayQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+          symbolTable.setFunctionExtensions("vectorToCoopmatQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+          symbolTable.setFunctionExtensions("coopmatToVectorQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+        }
+
        if ((profile != EEsProfile && version >= 450) || (profile == EEsProfile && version >= 320)) {
            symbolTable.setFunctionExtensions("dFdx",                   1, &E_GL_NV_compute_shader_derivatives);
            symbolTable.setFunctionExtensions("dFdy",                   1, &E_GL_NV_compute_shader_derivatives);
@@ -9752,6 +9862,19 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
            symbolTable.setFunctionExtensions("bfloat16BitsToUintEXT", 1, &E_GL_EXT_bfloat16);
            symbolTable.setFunctionExtensions("intBitsToBFloat16EXT", 1, &E_GL_EXT_bfloat16);
            symbolTable.setFunctionExtensions("uintBitsToBFloat16EXT", 1, &E_GL_EXT_bfloat16);
+
+            symbolTable.setFunctionExtensions("floate5m2BitsToIntEXT", 1, &E_GL_EXT_float_e5m2);
+            symbolTable.setFunctionExtensions("floate5m2BitsToUintEXT", 1, &E_GL_EXT_float_e5m2);
+            symbolTable.setFunctionExtensions("intBitsToFloate5m2EXT", 1, &E_GL_EXT_float_e5m2);
+            symbolTable.setFunctionExtensions("uintBitsToFloate5m2EXT", 1, &E_GL_EXT_float_e5m2);
+
+            symbolTable.setFunctionExtensions("floate4m3BitsToIntEXT", 1, &E_GL_EXT_float_e4m3);
+            symbolTable.setFunctionExtensions("floate4m3BitsToUintEXT", 1, &E_GL_EXT_float_e4m3);
+            symbolTable.setFunctionExtensions("intBitsToFloate4m3EXT", 1, &E_GL_EXT_float_e4m3);
+            symbolTable.setFunctionExtensions("uintBitsToFloate4m3EXT", 1, &E_GL_EXT_float_e4m3);
+
+            const char *float8exts[] = {E_GL_EXT_float_e5m2, E_GL_EXT_float_e4m3};
+            symbolTable.setFunctionExtensions("saturatedConvertEXT", 2, float8exts);
        }

        // E_SPV_QCOM_tile_shading
@@ -10750,6 +10873,18 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
            symbolTable.relateToOperator("bfloat16BitsToUintEXT", EOpFloatBitsToUint);
            symbolTable.relateToOperator("intBitsToBFloat16EXT",  EOpIntBitsToFloat);
            symbolTable.relateToOperator("uintBitsToBFloat16EXT", EOpUintBitsToFloat);
+
+            symbolTable.relateToOperator("floate5m2BitsToIntEXT",  EOpFloatBitsToInt);
+            symbolTable.relateToOperator("floate5m2BitsToUintEXT", EOpFloatBitsToUint);
+            symbolTable.relateToOperator("intBitsToFloate5m2EXT",  EOpIntBitsToFloat);
+            symbolTable.relateToOperator("uintBitsToFloate5m2EXT", EOpUintBitsToFloat);
+
+            symbolTable.relateToOperator("floate4m3BitsToIntEXT",  EOpFloatBitsToInt);
+            symbolTable.relateToOperator("floate4m3BitsToUintEXT", EOpFloatBitsToUint);
+            symbolTable.relateToOperator("intBitsToFloate4m3EXT",  EOpIntBitsToFloat);
+            symbolTable.relateToOperator("uintBitsToFloate4m3EXT", EOpUintBitsToFloat);
+
+            symbolTable.relateToOperator("saturatedConvertEXT", EOpConstructSaturated);
        }

        // GL_KHR_shader_subgroup
@@ -10997,6 +11132,15 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
        symbolTable.relateToOperator("setTensorViewStrideNV",        EOpTensorViewSetStrideNV);
        symbolTable.relateToOperator("setTensorViewClipNV",          EOpTensorViewSetClipNV);

+        symbolTable.relateToOperator("tensorReadARM",                EOpTensorReadARM);
+        symbolTable.relateToOperator("tensorWriteARM",               EOpTensorWriteARM);
+        symbolTable.relateToOperator("tensorSizeARM",                EOpTensorSizeARM);
+
+        symbolTable.relateToOperator("bitcastQCOM", EOpBitCastArrayQCOM);
+        symbolTable.relateToOperator("extractSubArrayQCOM", EOpExtractSubArrayQCOM);
+        symbolTable.relateToOperator("vectorToCoopmatQCOM", EOpCompositeConstructCoopMatQCOM);
+        symbolTable.relateToOperator("coopmatToVectorQCOM", EOpCompositeExtractCoopMatQCOM);
+
        if (profile != EEsProfile && version >= 460) {
            symbolTable.relateToOperator("fetchMicroTriangleVertexPositionNV", EOpFetchMicroTriangleVertexPositionNV);
            symbolTable.relateToOperator("fetchMicroTriangleVertexBarycentricNV", EOpFetchMicroTriangleVertexBarycentricNV);
--- a/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp
@@ -400,6 +400,8 @@ TIntermTyped* TIntermediate::addUnaryMath(TOperator op, TIntermTyped* child,
    case EOpConstructDouble: newType = EbtDouble; break;
    case EOpConstructFloat16: newType = EbtFloat16; break;
    case EOpConstructBFloat16: newType = EbtBFloat16; break;
+    case EOpConstructFloatE4M3: newType = EbtFloatE4M3; break;
+    case EOpConstructFloatE5M2: newType = EbtFloatE5M2; break;
    default: break; // some compilers want this
    }

@@ -430,7 +432,9 @@ TIntermTyped* TIntermediate::addUnaryMath(TOperator op, TIntermTyped* child,
        case EOpConstructFloat:
        case EOpConstructDouble:
        case EOpConstructFloat16:
-        case EOpConstructBFloat16: {
+        case EOpConstructBFloat16:
+        case EOpConstructFloatE5M2:
+        case EOpConstructFloatE4M3: {
            TIntermUnary* unary_node = child->getAsUnaryNode();
            if (unary_node != nullptr)
                unary_node->updatePrecision();
@@ -571,9 +575,9 @@ bool TIntermediate::isConversionAllowed(TOperator op, TIntermTyped* node) const

 bool TIntermediate::buildConvertOp(TBasicType dst, TBasicType src, TOperator& newOp) const
 {
-    // bfloat16_t <-> bool not supported
-    if ((src == EbtBFloat16 && dst == EbtBool) ||
-        (dst == EbtBFloat16 && src == EbtBool)) {
+    // (bfloat16_t,fp8) <-> bool not supported
+    if (((src == EbtBFloat16 || src == EbtFloatE5M2 || src == EbtFloatE4M3) && dst == EbtBool) ||
+        ((dst == EbtBFloat16 || dst == EbtFloatE5M2 || dst == EbtFloatE4M3) && src == EbtBool)) {
        return false;
    }

@@ -604,12 +608,15 @@ TIntermTyped* TIntermediate::createConversion(TBasicType convertTo, TIntermTyped
                                node->getBasicType() == EbtInt   || node->getBasicType() == EbtUint   ||
                                node->getBasicType() == EbtInt64 || node->getBasicType() == EbtUint64);

-    bool convertToFloatTypes = (convertTo == EbtFloat16 || convertTo == EbtBFloat16 || convertTo == EbtFloat || convertTo == EbtDouble);
+    bool convertToFloatTypes = (convertTo == EbtFloat16 || convertTo == EbtBFloat16 || convertTo == EbtFloat || convertTo == EbtDouble ||
+                                convertTo == EbtFloatE5M2 || convertTo == EbtFloatE4M3);

    bool convertFromFloatTypes = (node->getBasicType() == EbtFloat16 ||
                                  node->getBasicType() == EbtBFloat16 ||
                                  node->getBasicType() == EbtFloat ||
-                                  node->getBasicType() == EbtDouble);
+                                  node->getBasicType() == EbtDouble ||
+                                  node->getBasicType() == EbtFloatE5M2 ||
+                                  node->getBasicType() == EbtFloatE4M3);

    if (((convertTo == EbtInt8 || convertTo == EbtUint8) && ! convertFromIntTypes) ||
        ((node->getBasicType() == EbtInt8 || node->getBasicType() == EbtUint8) && ! convertToIntTypes)) {
@@ -832,7 +839,8 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
    // Reject implicit conversions to cooperative matrix types
    if (node->getType().isCoopMat() &&
        op != EOpConstructCooperativeMatrixNV &&
-        op != EOpConstructCooperativeMatrixKHR)
+        op != EOpConstructCooperativeMatrixKHR &&
+        op != glslang::EOpCompositeConstructCoopMatQCOM)
        return nullptr;

    if (node->getType().isTensorLayoutNV() ||
@@ -858,12 +866,15 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
    case EOpConstructDouble:
    case EOpConstructFloat16:
    case EOpConstructBFloat16:
+    case EOpConstructFloatE5M2:
+    case EOpConstructFloatE4M3:
    case EOpConstructInt8:
    case EOpConstructUint8:
    case EOpConstructInt16:
    case EOpConstructUint16:
    case EOpConstructInt64:
    case EOpConstructUint64:
+    case EOpConstructSaturated:
        break;

    //
@@ -965,6 +976,8 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
    //  - at the time of this writing (14-Aug-2020), no test results are changed by this.
    switch (op) {
    case EOpConstructBFloat16:
+    case EOpConstructFloatE5M2:
+    case EOpConstructFloatE4M3:
        canPromoteConstant = true;
        break;
    case EOpConstructFloat16:
@@ -1270,6 +1283,8 @@ bool TIntermediate::isFPPromotion(TBasicType from, TBasicType to) const
    if (to == EbtDouble) {
        switch(from) {
        case EbtBFloat16:
+        case EbtFloatE5M2:
+        case EbtFloatE4M3:
        case EbtFloat16:
        case EbtFloat:
            return true;
@@ -1362,7 +1377,7 @@ bool TIntermediate::isIntegralConversion(TBasicType from, TBasicType to) const

 bool TIntermediate::isFPConversion(TBasicType from, TBasicType to) const
 {
-    if (to == EbtFloat && (from == EbtFloat16 || from == EbtBFloat16)) {
+    if (to == EbtFloat && (from == EbtFloat16 || from == EbtBFloat16 || from == EbtFloatE5M2 || from == EbtFloatE4M3)) {
        return true;
    } else {
        return false;
@@ -1517,6 +1532,8 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
                                        (numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || 
                                        numericFeatures.contains(TNumericFeatures::gpu_shader_half_float));
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
                return true;
            case EbtInt8:
            case EbtUint8:
@@ -1540,6 +1557,8 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
                    numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) ||
                    getSource() == EShSourceHlsl;
            case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
                return true;
            case EbtInt8:
            case EbtUint8:
@@ -1610,6 +1629,18 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
            case EbtInt16:
            case EbtUint16:
                return numericFeatures.contains(TNumericFeatures::gpu_shader_int16);
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
+                return true;
+            default:
+                break;
+            }
+            return false;
+        case EbtBFloat16:
+            switch (from) {
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
+                return true;
            default:
                break;
            }
@@ -2077,6 +2108,24 @@ TOperator TIntermediate::mapTypeToConstructorOp(const TType& type) const
        default: break; // some compilers want this
        }
        break;
+    case EbtFloatE5M2:
+        switch (type.getVectorSize()) {
+        case 1: op = EOpConstructFloatE5M2;  break;
+        case 2: op = EOpConstructFloatE5M2Vec2;  break;
+        case 3: op = EOpConstructFloatE5M2Vec3;  break;
+        case 4: op = EOpConstructFloatE5M2Vec4;  break;
+        default: break; // some compilers want this
+        }
+        break;
+    case EbtFloatE4M3:
+        switch (type.getVectorSize()) {
+        case 1: op = EOpConstructFloatE4M3;  break;
+        case 2: op = EOpConstructFloatE4M3Vec2;  break;
+        case 3: op = EOpConstructFloatE4M3Vec3;  break;
+        case 4: op = EOpConstructFloatE4M3Vec4;  break;
+        default: break; // some compilers want this
+        }
+        break;
    case EbtInt8:
        switch(type.getVectorSize()) {
        case 1: op = EOpConstructInt8;   break;
@@ -2486,7 +2535,7 @@ TIntermConstantUnion* TIntermediate::addConstantUnion(bool b, const TSourceLoc&

 TIntermConstantUnion* TIntermediate::addConstantUnion(double d, TBasicType baseType, const TSourceLoc& loc, bool literal) const
 {
-    assert(baseType == EbtFloat || baseType == EbtDouble || baseType == EbtFloat16 || baseType == EbtBFloat16);
+    assert(baseType == EbtFloat || baseType == EbtDouble || baseType == EbtFloat16 || baseType == EbtBFloat16 || baseType == EbtFloatE5M2 || baseType == EbtFloatE4M3);

    if (isEsProfile() && (baseType == EbtFloat || baseType == EbtFloat16)) {
        int exponent = 0;
@@ -3741,6 +3790,8 @@ TIntermTyped* TIntermediate::promoteConstantUnion(TBasicType promoteTo, TIntermC
 #define TO_ALL(Get)   \
        switch (promoteTo) { \
        case EbtBFloat16: PROMOTE(setDConst, double, Get); break; \
+        case EbtFloatE5M2: PROMOTE(setDConst, double, Get); break; \
+        case EbtFloatE4M3: PROMOTE(setDConst, double, Get); break; \
        case EbtFloat16: PROMOTE(setDConst, double, Get); break; \
        case EbtFloat: PROMOTE(setDConst, double, Get); break; \
        case EbtDouble: PROMOTE(setDConst, double, Get); break; \
@@ -3763,6 +3814,8 @@ TIntermTyped* TIntermediate::promoteConstantUnion(TBasicType promoteTo, TIntermC
        case EbtBool: TO_ALL(getBConst); break;
        case EbtFloat16: TO_ALL(getDConst); break;
        case EbtBFloat16: TO_ALL(getDConst); break;
+        case EbtFloatE5M2: TO_ALL(getDConst); break;
+        case EbtFloatE4M3: TO_ALL(getDConst); break;
        case EbtDouble: TO_ALL(getDConst); break;
        case EbtInt8: TO_ALL(getI8Const); break;
        case EbtInt16: TO_ALL(getI16Const); break;
--- a/3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp
@@ -424,7 +424,7 @@ const TFunction* TParseContextBase::selectFunction(
        // to even be a potential match, number of arguments must be >= the number of
        // fixed (non-default) parameters, and <= the total (including parameter with defaults).
        if (call.getParamCount() < candidate.getFixedParamCount() ||
-            call.getParamCount() > candidate.getParamCount())
+            (call.getParamCount() > candidate.getParamCount() && !candidate.isVariadic()))
            continue;

        // see if arguments are convertible
@@ -463,7 +463,8 @@ const TFunction* TParseContextBase::selectFunction(
    const auto betterParam = [&call, &better](const TFunction& can1, const TFunction& can2) -> bool {
        // is call -> can2 better than call -> can1 for any parameter
        bool hasBetterParam = false;
-        for (int param = 0; param < call.getParamCount(); ++param) {
+        const int paramCount = std::min({call.getParamCount(), can1.getParamCount(), can2.getParamCount()});
+        for (int param = 0; param < paramCount; ++param) {
            if (better(*call[param].type, *can1[param].type, *can2[param].type)) {
                hasBetterParam = true;
                break;
@@ -474,7 +475,8 @@ const TFunction* TParseContextBase::selectFunction(

    const auto equivalentParams = [&call, &better](const TFunction& can1, const TFunction& can2) -> bool {
        // is call -> can2 equivalent to call -> can1 for all the call parameters?
-        for (int param = 0; param < call.getParamCount(); ++param) {
+        const int paramCount = std::min({call.getParamCount(), can1.getParamCount(), can2.getParamCount()});
+        for (int param = 0; param < paramCount; ++param) {
            if (better(*call[param].type, *can1[param].type, *can2[param].type) ||
                better(*call[param].type, *can2[param].type, *can1[param].type))
                return false;
--- a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp
@@ -912,7 +912,8 @@ TIntermTyped* TParseContext::handleBinaryMath(const TSourceLoc& loc, const char*
    if (((left->getType().contains16BitFloat() || right->getType().contains16BitFloat()) && !float16Arithmetic()) ||
        ((left->getType().contains16BitInt() || right->getType().contains16BitInt()) && !int16Arithmetic()) ||
        ((left->getType().contains8BitInt() || right->getType().contains8BitInt()) && !int8Arithmetic()) ||
-        (left->getType().containsBFloat16() || right->getType().containsBFloat16())) {
+        (left->getType().containsBFloat16() || right->getType().containsBFloat16()) ||
+        (left->getType().contains8BitFloat() || right->getType().contains8BitFloat())) {
        allowed = false;
    }

@@ -940,7 +941,8 @@ TIntermTyped* TParseContext::handleUnaryMath(const TSourceLoc& loc, const char*
    if ((childNode->getType().contains16BitFloat() && !float16Arithmetic()) ||
        (childNode->getType().contains16BitInt() && !int16Arithmetic()) ||
        (childNode->getType().contains8BitInt() && !int8Arithmetic()) ||
-        (childNode->getType().containsBFloat16())) {
+        (childNode->getType().containsBFloat16()) ||
+        (childNode->getType().contains8BitFloat())) {
        allowed = false;
    }

@@ -1405,6 +1407,24 @@ TIntermTyped* TParseContext::handleFunctionCall(const TSourceLoc& loc, TFunction
                    // At this early point there is a slight ambiguity between whether an aggregate 'arguments'
                    // is the single argument itself or its children are the arguments.  Only one argument
                    // means take 'arguments' itself as the one argument.
+                    if ((*fnCandidate)[i].defaultValue) {
+                        if (!aggregate) {
+                            // Only one argument was passed (rest are default arguments) so arguments isn't a TIntermAggregate.
+                            // But the function takes at least one more argument, so a TIntermAggregate is needed.
+                            aggregate = new TIntermAggregate;
+                            aggregate->getSequence().push_back(arguments);
+                            arguments = aggregate;
+                        }
+                        if (i >= static_cast<int>(aggregate->getSequence().size())) {
+                            // Append the default value if there are no more arguments left in the aggregate.
+                            TIntermConstantUnion *defaultValue = nullptr;
+                            if (const auto *constUnion = (*fnCandidate)[i].defaultValue->getAsConstantUnion()) {
+                                defaultValue = new TIntermConstantUnion(constUnion->getConstArray(), constUnion->getType());
+                            }
+                            assert(defaultValue && "unsupported default value construct");
+                            aggregate->getSequence().push_back(defaultValue);
+                        }
+                    }
                    TIntermNode* arg = fnCandidate->getParamCount() == 1 ? arguments : (aggregate ? aggregate->getSequence()[i] : arguments);
                    TQualifier& formalQualifier = (*fnCandidate)[i].type->getQualifier();
                    if (formalQualifier.isParamOutput()) {
@@ -1520,6 +1540,8 @@ TIntermTyped* TParseContext::handleFunctionCall(const TSourceLoc& loc, TFunction

            handleCoopMat2FunctionCall(loc, fnCandidate, result, arguments);

+            handleVector2CoopMatConversionCall(loc, fnCandidate, result, arguments);
+
            if (result->getAsTyped()->getType().isCoopVecNV() &&
               !result->getAsTyped()->getType().isParameterized()) {
                if (auto unaryNode = result->getAsUnaryNode())
@@ -1527,6 +1549,11 @@ TIntermTyped* TParseContext::handleFunctionCall(const TSourceLoc& loc, TFunction
                else
                    result->setType(result->getAsAggregate()->getSequence()[0]->getAsTyped()->getType());
            }
+
+            if (fnCandidate->getBuiltInOp() == EOpConstructSaturated) {
+                // result type is taken from the first parameter
+                result->setType(result->getAsAggregate()->getSequence()[0]->getAsTyped()->getType());
+            }
        }
    }

@@ -1756,6 +1783,297 @@ void TParseContext::handleCoopMat2FunctionCall(const TSourceLoc& loc, const TFun
    }
 }

+
+static const uint32_t spv_Scope_Subgroup = 3;
+
+void TParseContext::handleVector2CoopMatConversionCall(const TSourceLoc& loc, const TFunction* fnCandidate,
+                                                       TIntermTyped*& result, TIntermNode* arguments)
+{
+  const int CM_MatrixUseA = 0;           // == gl_MatrixUseA
+  const int CM_MatrixUseB = 1;           // == gl_MatrixUseB
+  const int CM_MatrixUseAccumulator = 2; // == gl_MatrixUseAccumulator
+
+  TOperator builtinOp = fnCandidate->getBuiltInOp();
+
+  if (!(builtinOp == EOpBitCastArrayQCOM || builtinOp == EOpExtractSubArrayQCOM ||
+        builtinOp == EOpCompositeConstructCoopMatQCOM || builtinOp == EOpCompositeExtractCoopMatQCOM))
+    return;
+
+  TPublicType pubType{};
+  auto* oldResult = result;
+
+  if (builtinOp == EOpBitCastArrayQCOM) {
+    auto srcArr = arguments->getAsAggregate()->getSequence()[0]->getAsTyped();
+    auto& srcTy = srcArr->getType();
+    auto srcArrLen = srcTy.getArraySizes()->getDimSize(0);
+    auto srcLenAsNode = srcTy.getArraySizes()->getDimNode(0);
+
+    auto dstArr = arguments->getAsAggregate()->getSequence()[1]->getAsTyped();
+    auto& dstTy = dstArr->getType();
+    auto dstArrLen = dstTy.getArraySizes()->getDimSize(0);
+    auto dstLenAsNode = dstTy.getArraySizes()->getDimNode(0);
+
+    if (srcLenAsNode == nullptr && dstLenAsNode == nullptr) {
+      //do basic tests:
+      if ((srcArrLen * GetNumBits(srcTy.getBasicType())) != (dstArrLen * GetNumBits(dstTy.getBasicType())))
+        error(loc, "source and target arrays have different bit sizes", "", "");
+    }
+
+    pubType.basicType = dstTy.getBasicType();
+    pubType.vectorSize = 1u;
+    pubType.qualifier.precision = EpqNone;
+    pubType.coopmatNV = false;
+    pubType.coopmatKHR = false;
+    pubType.arraySizes = new TArraySizes;
+    pubType.arraySizes->addInnerSize(dstArrLen, dstLenAsNode);
+    pubType.typeParameters = nullptr;
+  }
+
+  if (builtinOp == EOpExtractSubArrayQCOM) {
+    auto dstArr = arguments->getAsAggregate()->getSequence()[2]->getAsTyped();
+    auto& dstTy = dstArr->getType();
+    auto dstArrLen = dstTy.getArraySizes()->getDimSize(0);
+    auto dstLenAsNode = dstTy.getArraySizes()->getDimNode(0);
+
+    if (dstLenAsNode == nullptr) {
+      if ((dstArrLen * GetNumBits(dstTy.getBasicType())) == 32)
+        error(loc, "the byte size of the target array must be 32", "", "");
+    }
+
+    pubType.basicType = dstTy.getBasicType();
+    pubType.vectorSize = 1u;
+    pubType.qualifier.precision = EpqNone;
+    pubType.coopmatNV = false;
+    pubType.coopmatKHR = false;
+    pubType.arraySizes = new TArraySizes;
+    pubType.arraySizes->addInnerSize(dstArrLen, dstLenAsNode);
+    pubType.typeParameters = nullptr;
+  }
+
+  if (builtinOp == EOpCompositeConstructCoopMatQCOM) {
+
+    auto& srcType = arguments->getAsAggregate()->getSequence()[0]->getAsTyped()->getType();
+    auto& dstType = arguments->getAsAggregate()->getSequence()[1]->getAsTyped()->getType();
+
+    glslang::TBasicType srcBasicType = srcType.getBasicType();
+    glslang::TBasicType dstBasicType = dstType.getBasicType();
+
+    if (srcBasicType != EbtUint && srcBasicType != dstBasicType)
+      error(loc, "source and destination element types are not compatible", "", "");
+
+    uint32_t scope = spv_Scope_Subgroup;
+    uint32_t coopMatKHRuse = -1u;
+    uint32_t coopMatNumRows = -1u, coopMatNumCols = -1u;
+    TIntermTyped *nodeNumRows = nullptr, *nodeNumCols = nullptr;
+    const TTypeParameters* dstTypeParameters = dstType.getTypeParameters();
+    if (dstTypeParameters->arraySizes == nullptr || dstTypeParameters->arraySizes->getNumDims() != 4) {
+      error(loc, "destination cooperative matrix has an unsupported type", "", "");
+    } else {
+      auto arraySizes = dstTypeParameters->arraySizes;
+      scope = arraySizes->getDimSize(0);
+      coopMatNumRows = arraySizes->getDimSize(1);
+      nodeNumRows = arraySizes->getDimNode(1);
+      coopMatNumCols = arraySizes->getDimSize(2);
+      nodeNumCols = arraySizes->getDimNode(2);
+      coopMatKHRuse = arraySizes->getDimSize(3);
+    }
+
+    if (scope != spv_Scope_Subgroup) {
+      scope = spv_Scope_Subgroup;
+      error(loc, "cooperative matrix has unsupported scope; gl_SubgroupScope is expected", "", "");
+    }
+
+    if (coopMatKHRuse < CM_MatrixUseA || coopMatKHRuse > CM_MatrixUseAccumulator) {
+      coopMatKHRuse = CM_MatrixUseA;
+      error(loc, "cooperative matrix use must be one of gl_MatrixUseA, gl_MatrixUseB, gl_MatrixUseAccumulator",
+            "", "");
+    }
+
+    uint32_t dstBasicTypeSize = GetNumBits(dstBasicType) / 8;
+
+    unsigned numRows = coopMatNumRows;
+    TIntermTyped* specConstRows = nodeNumRows;
+    unsigned numCols = coopMatNumCols;
+    TIntermTyped* specConstCols = nodeNumCols;
+
+    // input array type
+    const TType& type = arguments->getAsAggregate()->getSequence()[0]->getAsTyped()->getType();
+    uint32_t arrayLen = type.getArraySizes()->getDimSize(0);
+    auto arrayDimNode = type.getArraySizes()->getDimNode(0);
+
+    if (coopMatKHRuse == CM_MatrixUseA || coopMatKHRuse == CM_MatrixUseAccumulator) {
+      // update numCols
+      if (arrayDimNode == nullptr && specConstCols == nullptr)
+        numCols = arrayLen * (sizeof(uint32_t) / dstBasicTypeSize);
+    } else if (coopMatKHRuse == CM_MatrixUseB) {
+      // update numRows
+      if (arrayDimNode == nullptr && specConstRows == nullptr) {
+        numRows = arrayLen * (sizeof(uint32_t) / dstBasicTypeSize);
+      }
+    }
+
+    // construct the type
+    TArraySizes* arraySizes = new TArraySizes;
+
+    // add Scope
+    arraySizes->addInnerSize(scope);
+
+    // add the row size
+    arraySizes->addInnerSize(numRows, specConstRows); // copy from source
+    // add the column size
+    arraySizes->addInnerSize(numCols, specConstCols); // copy from source
+    // add cooperative matrix use
+    arraySizes->addInnerSize(coopMatKHRuse);
+
+    pubType.basicType = dstBasicType;
+    pubType.vectorSize = 1u;
+    pubType.qualifier = srcType.getQualifier();
+    pubType.qualifier.precision = EpqNone;
+    pubType.coopmatNV = dstType.isCoopMatNV();
+    pubType.coopmatKHR = dstType.isCoopMatKHR();
+    pubType.arraySizes = nullptr;
+    pubType.typeParameters = const_cast<glslang::TTypeParameters*>(dstTypeParameters);
+  }
+
+  if (builtinOp == EOpCompositeExtractCoopMatQCOM) {
+    auto& srcType = arguments->getAsAggregate()->getSequence()[0]->getAsTyped()->getType();
+    auto& dstType = arguments->getAsAggregate()->getSequence()[1]->getAsTyped()->getType();
+
+    glslang::TBasicType srcBasicType = srcType.getBasicType();
+    glslang::TBasicType dstBasicType = dstType.getBasicType();
+
+    if (dstBasicType != EbtUint && srcBasicType != dstBasicType)
+      error(loc, "source and destination element types are not compatible", "", "");
+
+    uint32_t scope = spv_Scope_Subgroup;
+    unsigned coopMatKHRuse = -1u;
+    const TTypeParameters* srcTypeParameters = srcType.getTypeParameters();
+    if (srcTypeParameters->arraySizes == nullptr || srcTypeParameters->arraySizes->getNumDims() != 4) {
+      error(loc, "source cooperative matrix has an unsupported type", "", "");
+    } else {
+      auto arraySizes = srcTypeParameters->arraySizes;
+      scope = arraySizes->getDimSize(0);
+      coopMatKHRuse = arraySizes->getDimSize(3);
+    }
+
+    if (scope != spv_Scope_Subgroup) {
+      scope = spv_Scope_Subgroup;
+      error(loc, "cooperative matrix has unsupported scope; gl_SubgroupScope is expected", "", "");
+    }
+
+    if (coopMatKHRuse < CM_MatrixUseA || coopMatKHRuse > CM_MatrixUseAccumulator) {
+      coopMatKHRuse = CM_MatrixUseA;
+      error(loc, "cooperative matrix use must be one of gl_MatrixUseA, gl_MatrixUseB, gl_MatrixUseAccumulator",
+            "", "");
+    }
+
+    auto dstArrLen = dstType.getArraySizes()->getDimSize(0);
+    auto dstLenAsNode = dstType.getArraySizes()->getDimNode(0);
+
+    if (dstLenAsNode == nullptr) {
+      bool ok = true;
+      switch (dstBasicType) {
+      case EbtUint:
+      case EbtInt:
+      case EbtFloat:
+        ok = (((coopMatKHRuse == CM_MatrixUseA || coopMatKHRuse == CM_MatrixUseB) && dstArrLen == 8) ||
+              (coopMatKHRuse ==
+               CM_MatrixUseAccumulator) /* && (dstArrLen == 64 || dstArrLen == 32 || dstArrLen == 16))*/);
+        break;
+      case EbtFloat16:
+        ok = (((coopMatKHRuse == CM_MatrixUseA || coopMatKHRuse == CM_MatrixUseB) && dstArrLen == 16) ||
+              (coopMatKHRuse ==
+               CM_MatrixUseAccumulator) /* && (dstArrLen == 64 || dstArrLen == 32 || dstArrLen == 16))*/);
+        break;
+      case EbtInt8:
+      case EbtUint8:
+        ok = (((coopMatKHRuse == CM_MatrixUseA || coopMatKHRuse == CM_MatrixUseB) && dstArrLen == 32) ||
+              (coopMatKHRuse ==
+               CM_MatrixUseAccumulator) /* && (dstArrLen == 64 || dstArrLen == 32 || dstArrLen == 16))*/);
+        break;
+      default:
+        error(loc, "unsupported element type", "", "");
+      }
+      if (!ok)
+        error(loc, "unsupported destination array length", "", "");
+    }
+
+    pubType.basicType = dstBasicType;
+    pubType.vectorSize = 1u;
+    pubType.qualifier.precision = EpqNone;
+    pubType.coopmatNV = false;
+    pubType.coopmatKHR = false;
+
+    pubType.arraySizes = new TArraySizes;
+
+    {
+      //int coopMatKHRuse = srcTypeParameters->arraySizes->getDimSize(3);
+      uint32_t index = -1u;
+      if (coopMatKHRuse == CM_MatrixUseA) {
+        index = 2;
+      } else if (coopMatKHRuse == CM_MatrixUseB) {
+        index = 1;
+      } else if (coopMatKHRuse == CM_MatrixUseAccumulator) {
+        index = 2;
+      } else {
+        error(loc, "source cooperative matrix has an unexpected cooperative matrix use", "", "");
+      }
+      int32_t numRowsOrCols = srcTypeParameters->arraySizes->getDimSize(index);
+      auto dimNode = srcTypeParameters->arraySizes->getDimNode(index);
+      if (dimNode != nullptr && dstLenAsNode == nullptr) {
+        numRowsOrCols = dstType.getArraySizes()->getDimSize(0);
+        dimNode = nullptr;
+      }
+      //int32_t dstArrLen = dstType.getArraySizes()->getDimSize(0);
+      pubType.arraySizes->addInnerSize(dstArrLen, dstLenAsNode);
+      if (dimNode == nullptr && dstLenAsNode == nullptr) {
+        const char* msg = nullptr;
+        if (coopMatKHRuse == CM_MatrixUseA && (numRowsOrCols != dstArrLen && dstArrLen != 8)) {
+          msg = "the source matrix's column is not compatible with the destination array";
+        } else if (coopMatKHRuse == CM_MatrixUseB && (numRowsOrCols != dstArrLen && dstArrLen != 8)) {
+          msg = "the source matrix's row is not compatible with the destination array";
+        } else if (coopMatKHRuse == CM_MatrixUseAccumulator &&
+                   (numRowsOrCols != dstArrLen &&
+                    (srcBasicType == EbtFloat16 && numRowsOrCols != 2 * dstArrLen))) {
+          msg = "the source matrix's column is not compatible with the destination array";
+        }
+        if (msg != nullptr)
+          error(loc, msg, "", "");
+      }
+    }
+
+    pubType.typeParameters = nullptr;
+  }
+
+  TType resultType(pubType);
+  if (pubType.typeParameters != nullptr)
+    resultType.copyTypeParameters(*pubType.typeParameters);
+  // need to make StorageQualifier temp
+  resultType.makeTemporary();
+  result->setType(resultType);
+
+  // the RHS of an assignment to be formed
+  auto rhs = result;
+
+  // the LHS of an assignment to be formed; pick the last argument
+  int lhsIdx = (builtinOp == EOpExtractSubArrayQCOM ? 2 : 1);
+  auto lhs = arguments->getAsAggregate()->getSequence()[lhsIdx]->getAsTyped();
+  // pop the last argument from the arguments sequence
+  arguments->getAsAggregate()->getSequence().pop_back();
+
+  // Create OpAssign
+  {
+    arrayObjectCheck(loc, lhs->getType(), "array assignment");
+    storage16BitAssignmentCheck(loc, lhs->getType(), "=");
+    lValueErrorCheck(loc, "assign", lhs);
+    rValueErrorCheck(loc, "assign", rhs);
+    result = addAssign(loc, EOpAssign, lhs, rhs);
+    if (result == nullptr)
+      result = oldResult;
+  }
+}
+
+
 TIntermTyped* TParseContext::handleBuiltInFunctionCall(TSourceLoc loc, TIntermNode* arguments,
                                                       const TFunction& function)
 {
@@ -1873,6 +2191,7 @@ void TParseContext::computeBuiltinPrecisions(TIntermTyped& node, const TFunction
        case EOpDebugPrintf:
        case EOpCooperativeMatrixPerElementOpNV:
        case EOpCooperativeMatrixReduceNV:
+        case EOpConstructSaturated:
            numArgs = 0;
            break;
        default:
@@ -1881,6 +2200,8 @@ void TParseContext::computeBuiltinPrecisions(TIntermTyped& node, const TFunction
        // find the maximum precision from the arguments and parameters
        for (unsigned int arg = 0; arg < numArgs; ++arg) {
            operationPrecision = std::max(operationPrecision, sequence[arg]->getAsTyped()->getQualifier().precision);
+        }
+        for (int arg = 0; arg < function.getParamCount(); ++arg) {
            operationPrecision = std::max(operationPrecision, function[arg].type->getQualifier().precision);
        }
        // compute the result precision
@@ -2077,7 +2398,10 @@ void TParseContext::addInputArgumentConversions(const TFunction& function, TInte
        TIntermTyped* arg = function.getParamCount() == 1 ? arguments->getAsTyped() : (aggregate ? aggregate->getSequence()[i]->getAsTyped() : arguments->getAsTyped());
        if (*function[i].type != arg->getType()) {
            if (function[i].type->getQualifier().isParamInput() &&
-               !function[i].type->isCoopMat()) {
+               !function[i].type->isCoopMat() && !function[i].type->isTensorARM() &&
+               // tensor layout/view type declarations don't do conversions, so we can't convert these parameters either
+               (function.getName() != "createTensorLayoutNV") &&
+               (function.getName() != "createTensorViewNV")) {
                // In-qualified arguments just need an extra node added above the argument to
                // convert to the correct type.
                arg = intermediate.addConversion(EOpFunctionCall, *function[i].type, arg);
@@ -2204,6 +2528,9 @@ void TParseContext::memorySemanticsCheck(const TSourceLoc& loc, const TFunction&
    const int gl_StorageSemanticsImage    = 0x800;
    const int gl_StorageSemanticsOutput   = 0x1000;

+    const int nonRelaxedMemoryOrder = gl_SemanticsAcquire |
+                                      gl_SemanticsRelease |
+                                      gl_SemanticsAcquireRelease;

    unsigned int semantics = 0, storageClassSemantics = 0;
    unsigned int semantics2 = 0, storageClassSemantics2 = 0;
@@ -2270,22 +2597,6 @@ void TParseContext::memorySemanticsCheck(const TSourceLoc& loc, const TFunction&
        break;
    }

-    if ((semantics & gl_SemanticsAcquire) &&
-        (callNode.getOp() == EOpAtomicStore || callNode.getOp() == EOpImageAtomicStore)) {
-        error(loc, "gl_SemanticsAcquire must not be used with (image) atomic store",
-              fnCandidate.getName().c_str(), "");
-    }
-    if ((semantics & gl_SemanticsRelease) &&
-        (callNode.getOp() == EOpAtomicLoad || callNode.getOp() == EOpImageAtomicLoad)) {
-        error(loc, "gl_SemanticsRelease must not be used with (image) atomic load",
-              fnCandidate.getName().c_str(), "");
-    }
-    if ((semantics & gl_SemanticsAcquireRelease) &&
-        (callNode.getOp() == EOpAtomicStore || callNode.getOp() == EOpImageAtomicStore ||
-         callNode.getOp() == EOpAtomicLoad  || callNode.getOp() == EOpImageAtomicLoad)) {
-        error(loc, "gl_SemanticsAcquireRelease must not be used with (image) atomic load/store",
-              fnCandidate.getName().c_str(), "");
-    }
    if (((semantics | semantics2) & ~(gl_SemanticsAcquire |
                                      gl_SemanticsRelease |
                                      gl_SemanticsAcquireRelease |
@@ -2294,6 +2605,7 @@ void TParseContext::memorySemanticsCheck(const TSourceLoc& loc, const TFunction&
                                      gl_SemanticsVolatile))) {
        error(loc, "Invalid semantics value", fnCandidate.getName().c_str(), "");
    }
+
    if (((storageClassSemantics | storageClassSemantics2) & ~(gl_StorageSemanticsBuffer |
                                                              gl_StorageSemanticsShared |
                                                              gl_StorageSemanticsImage |
@@ -2301,58 +2613,102 @@ void TParseContext::memorySemanticsCheck(const TSourceLoc& loc, const TFunction&
        error(loc, "Invalid storage class semantics value", fnCandidate.getName().c_str(), "");
    }

-    if (callNode.getOp() == EOpMemoryBarrier) {
-        if (!IsPow2(semantics & (gl_SemanticsAcquire | gl_SemanticsRelease | gl_SemanticsAcquireRelease))) {
-            error(loc, "Semantics must include exactly one of gl_SemanticsRelease, gl_SemanticsAcquire, or "
-                       "gl_SemanticsAcquireRelease", fnCandidate.getName().c_str(), "");
-        }
-    } else {
-        if (semantics & (gl_SemanticsAcquire | gl_SemanticsRelease | gl_SemanticsAcquireRelease)) {
-            if (!IsPow2(semantics & (gl_SemanticsAcquire | gl_SemanticsRelease | gl_SemanticsAcquireRelease))) {
-                error(loc, "Semantics must not include multiple of gl_SemanticsRelease, gl_SemanticsAcquire, or "
-                           "gl_SemanticsAcquireRelease", fnCandidate.getName().c_str(), "");
-            }
-        }
-        if (semantics2 & (gl_SemanticsAcquire | gl_SemanticsRelease | gl_SemanticsAcquireRelease)) {
-            if (!IsPow2(semantics2 & (gl_SemanticsAcquire | gl_SemanticsRelease | gl_SemanticsAcquireRelease))) {
-                error(loc, "semUnequal must not include multiple of gl_SemanticsRelease, gl_SemanticsAcquire, or "
-                           "gl_SemanticsAcquireRelease", fnCandidate.getName().c_str(), "");
-            }
-        }
-    }
-    if (callNode.getOp() == EOpMemoryBarrier) {
-        if (storageClassSemantics == 0) {
-            error(loc, "Storage class semantics must not be zero", fnCandidate.getName().c_str(), "");
-        }
-    }
-    if (callNode.getOp() == EOpBarrier && semantics != 0 && storageClassSemantics == 0) {
-        error(loc, "Storage class semantics must not be zero", fnCandidate.getName().c_str(), "");
-    }
-    if ((callNode.getOp() == EOpAtomicCompSwap || callNode.getOp() == EOpImageAtomicCompSwap) &&
-        (semantics2 & (gl_SemanticsRelease | gl_SemanticsAcquireRelease))) {
-        error(loc, "semUnequal must not be gl_SemanticsRelease or gl_SemanticsAcquireRelease",
+    if (((semantics & nonRelaxedMemoryOrder) && !IsPow2(semantics & nonRelaxedMemoryOrder)) ||
+        ((semantics2 & nonRelaxedMemoryOrder) && !IsPow2(semantics2 & nonRelaxedMemoryOrder))) {
+        error(loc,
+              "Semantics must not include multiple of gl_SemanticsRelease, gl_SemanticsAcquire, or "
+              "gl_SemanticsAcquireRelease",
              fnCandidate.getName().c_str(), "");
    }
-    if ((semantics & gl_SemanticsMakeAvailable) &&
-        !(semantics & (gl_SemanticsRelease | gl_SemanticsAcquireRelease))) {
+
+    if (((semantics & nonRelaxedMemoryOrder) && !storageClassSemantics) ||
+        ((semantics2 & nonRelaxedMemoryOrder) && !storageClassSemantics2)) {
+        error(loc,
+              "Storage class semantics must not be zero when used with gl_SemanticsRelease, "
+              "gl_SemanticsAcquire, or gl_SemanticsAcquireRelease",
+              fnCandidate.getName().c_str(), "");
+    }
+
+    if ((storageClassSemantics && !(semantics & nonRelaxedMemoryOrder)) ||
+        (storageClassSemantics2 && !(semantics2 & nonRelaxedMemoryOrder))) {
+        error(loc,
+              "Semantics must be gl_SemanticsRelease, gl_SemanticsAcquire, or gl_SemanticsAcquireRelease when used "
+              "with non-zero storage class semantics",
+              fnCandidate.getName().c_str(), "");
+    }
+
+    if (((semantics & gl_SemanticsMakeAvailable) &&
+         !(semantics & (gl_SemanticsRelease | gl_SemanticsAcquireRelease))) ||
+        ((semantics2 & gl_SemanticsMakeAvailable) &&
+         !(semantics2 & (gl_SemanticsRelease | gl_SemanticsAcquireRelease)))) {
        error(loc, "gl_SemanticsMakeAvailable requires gl_SemanticsRelease or gl_SemanticsAcquireRelease",
              fnCandidate.getName().c_str(), "");
    }
-    if ((semantics & gl_SemanticsMakeVisible) &&
-        !(semantics & (gl_SemanticsAcquire | gl_SemanticsAcquireRelease))) {
+
+    if (((semantics & gl_SemanticsMakeVisible) && !(semantics & (gl_SemanticsAcquire | gl_SemanticsAcquireRelease))) ||
+        ((semantics2 & gl_SemanticsMakeVisible) &&
+         !(semantics2 & (gl_SemanticsAcquire | gl_SemanticsAcquireRelease)))) {
        error(loc, "gl_SemanticsMakeVisible requires gl_SemanticsAcquire or gl_SemanticsAcquireRelease",
              fnCandidate.getName().c_str(), "");
    }
-    if ((semantics & gl_SemanticsVolatile) &&
-        (callNode.getOp() == EOpMemoryBarrier || callNode.getOp() == EOpBarrier)) {
+
+    if ((callNode.getOp() == EOpAtomicStore || callNode.getOp() == EOpImageAtomicStore) &&
+        (semantics & gl_SemanticsAcquire)) {
+        error(loc, "gl_SemanticsAcquire must not be used with (image) atomic store", fnCandidate.getName().c_str(), "");
+    }
+
+    if ((callNode.getOp() == EOpAtomicLoad || callNode.getOp() == EOpImageAtomicLoad) &&
+        (semantics & gl_SemanticsRelease)) {
+        error(loc, "gl_SemanticsRelease must not be used with (image) atomic load", fnCandidate.getName().c_str(), "");
+    }
+
+    if ((callNode.getOp() == EOpAtomicStore || callNode.getOp() == EOpImageAtomicStore ||
+         callNode.getOp() == EOpAtomicLoad || callNode.getOp() == EOpImageAtomicLoad) &&
+        (semantics & gl_SemanticsAcquireRelease)) {
+        error(loc, "gl_SemanticsAcquireRelease must not be used with (image) atomic load/store",
+              fnCandidate.getName().c_str(), "");
+    }
+
+    if (callNode.getOp() == EOpMemoryBarrier &&
+        !(semantics & (gl_SemanticsAcquire | gl_SemanticsRelease | gl_SemanticsAcquireRelease))) {
+        error(loc,
+              "Semantics must include exactly one of gl_SemanticsRelease, gl_SemanticsAcquire, or "
+              "gl_SemanticsAcquireRelease when used with memoryBarrier",
+              fnCandidate.getName().c_str(), "");
+    }
+
+    if ((callNode.getOp() == EOpMemoryBarrier || callNode.getOp() == EOpBarrier) &&
+        (semantics & gl_SemanticsVolatile)) {
        error(loc, "gl_SemanticsVolatile must not be used with memoryBarrier or controlBarrier",
              fnCandidate.getName().c_str(), "");
    }
-    if ((callNode.getOp() == EOpAtomicCompSwap || callNode.getOp() == EOpImageAtomicCompSwap) &&
-        ((semantics ^ semantics2) & gl_SemanticsVolatile)) {
+
+    if (callNode.getOp() == EOpAtomicCompSwap || callNode.getOp() == EOpImageAtomicCompSwap) {
+        if (semantics2 & (gl_SemanticsRelease | gl_SemanticsAcquireRelease)) {
+            error(loc, "semUnequal must not be gl_SemanticsRelease or gl_SemanticsAcquireRelease",
+                  fnCandidate.getName().c_str(), "");
+        }
+        if ((semantics2 & gl_SemanticsAcquire) && !(semantics & (gl_SemanticsAcquire | gl_SemanticsAcquireRelease))) {
+            error(loc,
+                  "semUnequal must not be gl_SemanticsAcquire unless semEqual is gl_SemanticsAcquire "
+                  "or gl_SemanticsAcquireRelease",
+                  fnCandidate.getName().c_str(), "");
+        }
+        if ((semantics2 & gl_SemanticsMakeVisible) && !(semantics & gl_SemanticsMakeVisible)) {
+            error(loc,
+                  "semUnequal must not include gl_SemanticsMakeVisible unless semEqual also includes "
+                  "gl_SemanticsMakeVisible",
+                  fnCandidate.getName().c_str(), "");
+        }
+        if (storageClassSemantics2 & ~(storageClassSemantics)) {
+            error(loc, "semStorageUnequal must not include any option that is not present in semStorageEqual",
+                  fnCandidate.getName().c_str(), "");
+        }
+        if ((semantics ^ semantics2) & gl_SemanticsVolatile) {
            error(loc, "semEqual and semUnequal must either both include gl_SemanticsVolatile or neither",
                  fnCandidate.getName().c_str(), "");
        }
+    }
 }

 //
@@ -3123,6 +3479,30 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
        if (!(*argp)[6]->getAsTyped()->getType().getQualifier().isConstant())
            error(loc, "argument must be compile-time constant", "matrixInterpretation", "");
        break;
+
+    case EOpCooperativeMatrixLoad:
+    case EOpCooperativeMatrixLoadNV:
+    case EOpCooperativeMatrixLoadTensorNV:
+    case EOpCooperativeMatrixStore:
+    case EOpCooperativeMatrixStoreNV:
+    case EOpCooperativeMatrixStoreTensorNV:
+    {
+        const TIntermTyped *arg1 = (*argp)[1]->getAsTyped();
+        const TIntermTyped* base = TIntermediate::traverseLValueBase(arg1, true, true);
+        const char* errMsg = "Only l-values corresponding to storage block or shared variables can be used with "
+                             "cooperative matrix load/store functions.";
+        if (base) {
+            const TType* refType = (base->getType().isReference()) ? base->getType().getReferentType() : nullptr;
+            const TQualifier& qualifier =
+                (refType != nullptr) ? refType->getQualifier() : base->getType().getQualifier();
+            if (qualifier.storage != EvqShared && qualifier.storage != EvqBuffer)
+                error(loc, errMsg, fnCandidate.getName().c_str(), "");
+        } else {
+            error(loc, errMsg, fnCandidate.getName().c_str(), "");
+        }
+    }
+    break;
+
    default:
        break;
    }
@@ -3148,6 +3528,102 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
        break;
    }

+    case EOpConstructSaturated:
+    {
+        auto &sequence = callNode.getAsAggregate()->getSequence();
+        if (sequence.size() != 2) {
+            error(loc, "requires exactly two parameters", "", "");
+        }
+        auto &op0Type = sequence[0]->getAsTyped()->getType();
+        auto &op1Type = sequence[1]->getAsTyped()->getType();
+        if (op0Type.getBasicType() != EbtFloatE5M2 && op0Type.getBasicType() != EbtFloatE4M3) {
+            error(loc, "first parameter must have floate5m2 or floate4m3 basic type", "", "");
+        }
+        if (op1Type.getBasicType() == EbtFloatE5M2 || op1Type.getBasicType() == EbtFloatE4M3) {
+            error(loc, "second parameter must not have floate5m2 or floate4m3 basic type", "", "");
+        }
+
+        if (!(op0Type.isScalar() || op0Type.isVector() || op0Type.isCoopMatKHR())) {
+            error(loc, "first parameter must be scalar, vector, or cooperative matrix", "", "");
+        }
+        if (!(op1Type.isScalar() || op1Type.isVector() || op1Type.isCoopMatKHR())) {
+            error(loc, "second parameter must be scalar, vector, or cooperative matrix", "", "");
+        }
+        if (!(op0Type.sameElementShape(op1Type) || op0Type.sameCoopMatShape(op1Type))) {
+            error(loc, "types must match other than scalar type and coopmat Use", "", "");
+        }
+        break;
+    }
+    case EOpTensorReadARM:
+    case EOpTensorWriteARM:
+    {
+        const TType &tensorType = (*argp)[0]->getAsTyped()->getType();
+
+        // Check that coordinates argument length matches rank of tensor argument.
+        int tensorRank = tensorType.getTensorRankARM();
+        const TArraySizes *coordArgArrayTy = (*argp)[1]->getAsTyped()->getType().getArraySizes();
+        assert(coordArgArrayTy->getNumDims() == 1 && "expecting 1D coordinate array");
+        if (coordArgArrayTy->getDimSize(0) != tensorRank) {
+            error(loc, "number of coordinates does not match tensor rank", "coord", "");
+        }
+
+        // Check that tensor element type matches data argument.
+        TBasicType eltTy = tensorType.getBasicType();
+        TBasicType argTy = (*argp)[2]->getAsTyped()->getType().getBasicType();
+        if (eltTy != argTy) {
+            error(loc, "", "data", "data argument type (%s) does not match tensor element type (%s)",
+                  TType::getBasicString(argTy), TType::getBasicString(eltTy));
+        }
+
+        // Check optional tensor operands.
+        if (argp->size() > 3) {
+            const TIntermConstantUnion* opArg = (*argp)[3]->getAsConstantUnion();
+            if (!opArg) {
+                error(loc, "tensor operands argument must be a constant integral expression", "tensorOps", "");
+            }
+            const unsigned int ops = opArg ? opArg->getConstArray()[0].getUConst() : 0;
+            const int gl_TensorOperandsOutOfBoundsValueARM = 0x2;
+            if (ops & gl_TensorOperandsOutOfBoundsValueARM) {
+                // Out-of-bounds values can only be used with reads.
+                if (callNode.getOp() != EOpTensorReadARM) {
+                    error(loc, "out-of-bounds value is only valid with tensorReadARM", "tensorOps", "");
+                }
+                // Check that an out-of-bounds value is present.
+                if (argp->size() == 4) {
+                    error(loc, "expecting out-of-bounds value as next argument", "tensorOps", "");
+                } else {
+                    // Check constantness of out-of-bounds value.
+                    const TIntermConstantUnion* oobArg = (*argp)[4]->getAsConstantUnion();
+                    if (!oobArg) {
+                        error(loc, "argument following gl_TensorOperandsOutOfBoundsValueARM must be constant", "vararg",
+                              "");
+                    } else if (oobArg->getType().getBasicType() != tensorType.getBasicType()) {
+                        // The type of the OOB value does not match the tensor type.
+                        error(loc, "", "vararg",
+                            "out-of-bounds value type (%s) does not match tensor element type (%s)",
+                            TType::getBasicString(oobArg->getBasicType()), TType::getBasicString(eltTy));
+
+                    }
+                }
+            }
+        }
+        break;
+    }
+
+    case EOpTensorSizeARM:
+    {
+        unsigned int tensorRank = (*argp)[0]->getAsTyped()->getType().getTensorRankARM();
+        const TIntermConstantUnion *dimArg = (*argp)[1]->getAsConstantUnion();
+        if (dimArg) {
+            if (dimArg->getConstArray()[0].getUConst() >= tensorRank) {
+                error(loc, "dimension argument exceeds tensor rank", "dim", "");
+            }
+        } else {
+            error(loc, "dimension argument must be constant", "dim", "");
+        }
+        break;
+    }
+
    default:
        break;
    }
@@ -4268,6 +4744,9 @@ void TParseContext::samplerCheck(const TSourceLoc& loc, const TType& type, const
                 error(loc, "sampler/image types can only be used in uniform variables or function parameters:", type.getBasicTypeString().c_str(), identifier.c_str());
        }
    }
+    else if (type.isTensorARM() && type.getQualifier().storage != EvqUniform) {
+        error(loc, "tensorARM types can only be used in uniform variables or function parameters:", "tensorARM", identifier.c_str());
+    }
 }

 void TParseContext::atomicUintCheck(const TSourceLoc& loc, const TType& type, const TString& identifier)
@@ -4424,7 +4903,7 @@ void TParseContext::globalQualifierTypeCheck(const TSourceLoc& loc, const TQuali
    if (! symbolTable.atGlobalLevel())
        return;

-    if (!(publicType.userDef && publicType.userDef->isReference()) && !parsingBuiltins) {
+    if (!(publicType.userDef && publicType.userDef->isReference()) && !publicType.isTensorARM() && !parsingBuiltins) {
        if (qualifier.isMemoryQualifierImageAndSSBOOnly() && ! publicType.isImage() && publicType.qualifier.storage != EvqBuffer) {
            error(loc, "memory qualifiers cannot be used on this type", "", "");
        } else if (qualifier.isMemory() && (publicType.basicType != EbtSampler) && !publicType.qualifier.isUniformOrBuffer()) {
@@ -4450,11 +4929,6 @@ void TParseContext::globalQualifierTypeCheck(const TSourceLoc& loc, const TQuali

    // now, knowing it is a shader in/out, do all the in/out semantic checks

-    if (publicType.basicType == EbtBool && !parsingBuiltins) {
-        error(loc, "cannot be bool", GetStorageQualifierString(qualifier.storage), "");
-        return;
-    }
-
    if (isTypeInt(publicType.basicType) || publicType.basicType == EbtDouble) {
        profileRequires(loc, EEsProfile, 300, nullptr, "non-float shader input/output");
        profileRequires(loc, ~EEsProfile, 130, nullptr, "non-float shader input/output");
@@ -4775,7 +5249,7 @@ TPrecisionQualifier TParseContext::getDefaultPrecision(TPublicType& publicType)
        return defaultPrecision[publicType.basicType];
 }

-void TParseContext::precisionQualifierCheck(const TSourceLoc& loc, TBasicType baseType, TQualifier& qualifier, bool isCoopMatOrVec)
+void TParseContext::precisionQualifierCheck(const TSourceLoc& loc, TBasicType baseType, TQualifier& qualifier, bool hasTypeParameter)
 {
    // Built-in symbols are allowed some ambiguous precisions, to be pinned down
    // later by context.
@@ -4785,7 +5259,7 @@ void TParseContext::precisionQualifierCheck(const TSourceLoc& loc, TBasicType ba
    if (baseType == EbtAtomicUint && qualifier.precision != EpqNone && qualifier.precision != EpqHigh)
        error(loc, "atomic counters can only be highp", "atomic_uint", "");

-    if (isCoopMatOrVec)
+    if (hasTypeParameter)
        return;

    if (baseType == EbtFloat || baseType == EbtUint || baseType == EbtInt || baseType == EbtSampler || baseType == EbtAtomicUint) {
@@ -6943,6 +7417,22 @@ void TParseContext::layoutObjectCheck(const TSourceLoc& loc, const TSymbol& symb
            break;
        }
    }
+
+    // Check that an in/out variable or block doesn't contain a boolean member
+    // Don't enforce if redeclaring a builtin, which are allowed to contain bool
+    if (!parsingBuiltins && type.containsBasicType(EbtBool) && !builtInName(symbol.getName())) {
+        switch(qualifier.storage) {
+        case EvqVaryingIn:
+        case EvqVaryingOut:
+        {
+            const char *reason = type.getBasicType() == EbtBool ? "cannot be bool" : "cannot contain bool";
+            error(loc, reason, GetStorageQualifierString(qualifier.storage), "");
+            break;
+        }
+        default:
+            break;
+        }
+    }
 }

 // "For some blocks declared as arrays, the location can only be applied at the block level:
@@ -7214,6 +7704,8 @@ void TParseContext::layoutTypeCheck(const TSourceLoc& loc, const TType& type)
        case EbtDouble:
        case EbtFloat16:
        case EbtBFloat16:
+        case EbtFloatE5M2:
+        case EbtFloatE4M3:
            break;
        default:
            error(loc, "cannot be applied to this type", "constant_id", "");
@@ -7498,6 +7990,12 @@ const TFunction* TParseContext::findFunction(const TSourceLoc& loc, const TFunct
            return symbol->getAsFunction();
    }

+    if (call.getName() == "saturatedConvertEXT") {
+        TSymbol* symbol = symbolTable.find("saturatedConvertEXT(", &builtIn);
+        if (symbol)
+            return symbol->getAsFunction();
+    }
+
    bool explicitTypesEnabled = extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types) ||
                                extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_int8) ||
                                extensionTurnedOn(E_GL_EXT_shader_explicit_arithmetic_types_int16) ||
@@ -7682,8 +8180,7 @@ const TFunction* TParseContext::findFunction400(const TSourceLoc& loc, const TFu
            TType toElementType(to, 0);
            // Load/store tensor functions allow any element type for the pointer
            if ((op == EOpCooperativeMatrixLoadTensorNV || op == EOpCooperativeMatrixStoreTensorNV) &&
-                param == 1 &&
-                (from.getQualifier().storage == EvqBuffer || from.getQualifier().storage == EvqShared)) {
+                param == 1) {
                return true;
            }
            if (fromElementType == toElementType)
@@ -7695,6 +8192,8 @@ const TFunction* TParseContext::findFunction400(const TSourceLoc& loc, const TFu
            return from.sameCoopMatBaseType(to);
        if (from.isCoopVecNV() && to.isCoopVecNV())
            return from.sameCoopVecBaseType(to);
+        if (from.isTensorARM() && to.isTensorARM())
+            return from.sameTensorBaseTypeARM(to);
        return intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType());
    };

@@ -7794,8 +8293,7 @@ const TFunction* TParseContext::findFunctionExplicitTypes(const TSourceLoc& loc,
            TType toElementType(to, 0);
            // Load/store tensor functions allow any element type for the pointer
            if ((op == EOpCooperativeMatrixLoadTensorNV || op == EOpCooperativeMatrixStoreTensorNV) &&
-                param == 1 &&
-                (from.getQualifier().storage == EvqBuffer || from.getQualifier().storage == EvqShared)) {
+                param == 1) {
                return true;
            }
            if (fromElementType == toElementType)
@@ -7807,6 +8305,8 @@ const TFunction* TParseContext::findFunctionExplicitTypes(const TSourceLoc& loc,
            return from.sameCoopMatBaseType(to);
        if (from.isCoopVecNV() && to.isCoopVecNV())
            return from.sameCoopVecBaseType(to);
+        if (from.isTensorARM() && to.isTensorARM())
+            return from.sameTensorBaseTypeARM(to);
        return intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType());
    };

@@ -7955,6 +8455,8 @@ void TParseContext::typeParametersCheck(const TSourceLoc& loc, const TPublicType
        case EbtFloat:
        case EbtFloat16:
        case EbtBFloat16:
+        case EbtFloatE5M2:
+        case EbtFloatE4M3:
        case EbtInt:
        case EbtInt8:
        case EbtInt16:
@@ -8011,6 +8513,24 @@ void TParseContext::typeParametersCheck(const TSourceLoc& loc, const TPublicType
            }
        }
    }
+    if (publicType.isTensorARM()) {
+        if (publicType.typeParameters == nullptr) {
+            error(loc, "tensor type is missing type parameters", "", "");
+            return;
+        }
+        if (publicType.typeParameters->arraySizes == nullptr) {
+            error(loc, "tensor type is missing rank information", "", "");
+            return;
+        }
+        if (publicType.typeParameters->arraySizes->getNumDims() != 1) {
+            error(loc, "tensor type requires exactly 1 rank specifier", "", "");
+            return;
+        }
+        if (publicType.typeParameters->arraySizes->getDimSize(0) < 1) {
+            error(loc, "tensor rank must be greater than or equal to 1", "", "");
+            return;
+        }
+    }
 }

 bool TParseContext::vkRelaxedRemapUniformVariable(const TSourceLoc& loc, TString& identifier, const TPublicType& publicType,
@@ -8106,6 +8626,8 @@ static void ForEachOpaque(const TType& type, const TString& path, Function callb
                 ++flatIndex)
            {
                TString subscriptPath = path;
+                if (path != "")
+                {
                    for (size_t dimIndex = 0; dimIndex < indices.size(); ++dimIndex)
                    {
                        int index = indices[dimIndex];
@@ -8113,6 +8635,7 @@ static void ForEachOpaque(const TType& type, const TString& path, Function callb
                        subscriptPath.append(String(index));
                        subscriptPath.append("]");
                    }
+                }

                recursion(type, subscriptPath, true, recursion);

@@ -8133,8 +8656,11 @@ static void ForEachOpaque(const TType& type, const TString& path, Function callb
            for (const TTypeLoc& typeLoc : types)
            {
                TString nextPath = path;
+                if (path != "")
+                {
                    nextPath.append(".");
                    nextPath.append(typeLoc.type->getFieldName());
+                }

                recursion(*(typeLoc.type), nextPath, false, recursion);
            }
@@ -8192,9 +8718,13 @@ void TParseContext::vkRelaxedRemapFunctionParameter(TFunction* function, TParame
    if (!param.type->isStruct() || !param.type->containsOpaque())
        return;

-    ForEachOpaque(*param.type, (param.name ? *param.name : param.type->getFieldName()),
+    TString fieldName = param.name
+        ? *param.name
+        : param.type->hasFieldName() ? param.type->getFieldName() : "";
+
+    ForEachOpaque(*param.type, fieldName,
                  [function, param, newParams](const TType& type, const TString& path) {
-                      TString* memberName = NewPoolTString(path.c_str());
+                      TString* memberName = path != "" ? NewPoolTString(path.c_str()) : nullptr;

                      TType* memberType = new TType();
                      memberType->shallowCopy(type);
@@ -8433,6 +8963,29 @@ TIntermNode* TParseContext::declareVariable(const TSourceLoc& loc, TString& iden
        } else if (publicType.typeParameters->arraySizes->getDimSize(0) <= 0) {
            error(loc, "expected positive number of components", identifier.c_str(), "");
        }
+    } else if (type.isTensorARM()) {
+        intermediate.setUseStorageBuffer();
+
+        if (!publicType.typeParameters || publicType.typeParameters->arraySizes->getNumDims() != 1) {
+            error(loc, "expected two type parameters", identifier.c_str(), "");
+        }
+        if (publicType.typeParameters) {
+            if (publicType.typeParameters->basicType != EbtBool &&
+                publicType.typeParameters->basicType != EbtInt8 &&
+                publicType.typeParameters->basicType != EbtInt16 &&
+                publicType.typeParameters->basicType != EbtInt &&
+                publicType.typeParameters->basicType != EbtInt64 &&
+                publicType.typeParameters->basicType != EbtUint8 &&
+                publicType.typeParameters->basicType != EbtUint16 &&
+                publicType.typeParameters->basicType != EbtUint &&
+                publicType.typeParameters->basicType != EbtUint64 &&
+                publicType.typeParameters->basicType != EbtFloat16 &&
+                publicType.typeParameters->basicType != EbtFloat &&
+                publicType.typeParameters->basicType != EbtDouble) {
+                error(loc, "expected bool, integer or floating point type parameter", identifier.c_str(), "");
+            }
+
+        }
    } else {
        if (publicType.typeParameters && publicType.typeParameters->arraySizes->getNumDims() != 0) {
            error(loc, "unexpected type parameters", identifier.c_str(), "");
@@ -8469,6 +9022,10 @@ TIntermNode* TParseContext::declareVariable(const TSourceLoc& loc, TString& iden
        (type.getQualifier().storage == EvqVaryingIn || type.getQualifier().storage == EvqVaryingOut))
        error(loc, "qualifier", "bfloat16 types not allowed as input/output", "");

+    if ((type.getBasicType() == EbtFloatE5M2 || type.getBasicType() == EbtFloatE4M3) &&
+        (type.getQualifier().storage == EvqVaryingIn || type.getQualifier().storage == EvqVaryingOut))
+        error(loc, "qualifier", "fp8 types not allowed as input/output", "");
+
    if (type.getQualifier().storage == EvqtaskPayloadSharedEXT)
        intermediate.addTaskPayloadEXTCount();
    if (type.getQualifier().storage == EvqShared && type.containsCoopMat())
@@ -9145,6 +9702,20 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
        basicOp = EOpConstructBFloat16;
        break;

+    case EOpConstructFloatE5M2Vec2:
+    case EOpConstructFloatE5M2Vec3:
+    case EOpConstructFloatE5M2Vec4:
+    case EOpConstructFloatE5M2:
+        basicOp = EOpConstructFloatE5M2;
+        break;
+
+    case EOpConstructFloatE4M3Vec2:
+    case EOpConstructFloatE4M3Vec3:
+    case EOpConstructFloatE4M3Vec4:
+    case EOpConstructFloatE4M3:
+        basicOp = EOpConstructFloatE4M3;
+        break;
+
    case EOpConstructI8Vec2:
    case EOpConstructI8Vec3:
    case EOpConstructI8Vec4:
@@ -9370,6 +9941,34 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
    return intermediate.setAggregateOperator(newNode, op, type, loc);
 }

+void TParseContext::makeVariadic(TFunction *F, const TSourceLoc &loc) {
+    if (parsingBuiltins) {
+        F->setVariadic();
+    } else {
+        error(loc, "variadic argument specifier is only available for builtins", "...", "");
+    }
+}
+
+TParameter TParseContext::getParamWithDefault(const TPublicType& ty, TString* identifier, TIntermTyped* initializer,
+                                              const TSourceLoc& loc)
+{
+    if (!parsingBuiltins) {
+        error(loc, "default argument values are only available for builtins", "=", "");
+        initializer = nullptr;
+    }
+    if (ty.arraySizes) {
+        error(loc, "array arguments cannot be default-initialized", identifier->c_str(), "");
+        initializer = nullptr;
+    }
+    if (ty.basicType == EbtVoid) {
+        error(loc, "illegal use of type 'void'", identifier->c_str(), "");
+        initializer = nullptr;
+    }
+    reservedErrorCheck(loc, *identifier);
+    TParameter param = {identifier, new TType(ty), initializer};
+    return param;
+}
+
 // This function tests for the type of the parameters to the structure or array constructor. Raises
 // an error message if the expected type does not match the parameter passed to the constructor.
 //
--- a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h
@@ -407,7 +407,7 @@ public:
    void setDefaultPrecision(const TSourceLoc&, TPublicType&, TPrecisionQualifier);
    int computeSamplerTypeIndex(TSampler&);
    TPrecisionQualifier getDefaultPrecision(TPublicType&);
-    void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&, bool isCoopMatOrVec);
+    void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&, bool hasTypeParameter);
    void parameterTypeCheck(const TSourceLoc&, TStorageQualifier qualifier, const TType& type);
    bool containsFieldWithBasicType(const TType& type ,TBasicType basicType);
    TSymbol* redeclareBuiltinVariable(const TSourceLoc&, const TString&, const TQualifier&, const TShaderQualifiers&);
@@ -450,6 +450,9 @@ public:
    TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&);
    TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&);
    TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset);
+    void makeVariadic(TFunction *F, const TSourceLoc &loc);
+    TParameter getParamWithDefault(const TPublicType& ty, TString* identifier, TIntermTyped* initializer,
+                                   const TSourceLoc& loc);
    void inheritMemoryQualifiers(const TQualifier& from, TQualifier& to);
    void declareBlock(const TSourceLoc&, TTypeList& typeList, const TString* instanceName = nullptr, TArraySizes* arraySizes = nullptr);
    void blockStorageRemap(const TSourceLoc&, const TString*, TQualifier&);
@@ -510,6 +513,7 @@ protected:
    TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer);
    void finish() override;
    void handleCoopMat2FunctionCall(const TSourceLoc& loc, const TFunction* fnCandidate, TIntermTyped* result, TIntermNode* arguments);
+    void handleVector2CoopMatConversionCall(const TSourceLoc& loc, const TFunction* fnCandidate, TIntermTyped* &result, TIntermNode* arguments);

    virtual const char* getGlobalUniformBlockName() const override;
    virtual void finalizeGlobalUniformBlockLayout(TVariable&) override;
--- a/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp
@@ -542,6 +542,16 @@ const std::unordered_map<const char*, int, str_hash, str_eq> KeywordMap {
    {"bf16vec3",BF16VEC3},
    {"bf16vec4",BF16VEC4},

+    {"floate5m2_t",FLOATE5M2_T},
+    {"fe5m2vec2",FE5M2VEC2},
+    {"fe5m2vec3",FE5M2VEC3},
+    {"fe5m2vec4",FE5M2VEC4},
+
+    {"floate4m3_t",FLOATE4M3_T},
+    {"fe4m3vec2",FE4M3VEC2},
+    {"fe4m3vec3",FE4M3VEC3},
+    {"fe4m3vec4",FE4M3VEC4},
+
    {"float32_t",FLOAT32_T},
    {"f32vec2",F32VEC2},
    {"f32vec3",F32VEC3},
@@ -759,6 +769,8 @@ const std::unordered_map<const char*, int, str_hash, str_eq> KeywordMap {
    {"hitObjectNV",HITOBJECTNV},
    {"hitObjectAttributeNV",HITOBJECTATTRNV},

+    {"tensorARM",TENSORARM},
+
    {"__function",FUNCTION},
    {"tensorLayoutNV",TENSORLAYOUTNV},
    {"tensorViewNV",TENSORVIEWNV},
@@ -824,12 +836,22 @@ int TScanContext::tokenize(TPpContext* pp, TParserToken& token)
        loc = ppToken.loc;
        parserToken->sType.lex.loc = loc;
        switch (token) {
-        case ';':  afterType = false; afterBuffer = false; return SEMICOLON;
-        case ',':  afterType = false;   return COMMA;
+        case ';':  afterType = false; afterBuffer = false; inDeclaratorList = false; afterDeclarator = false; angleBracketDepth = 0; squareBracketDepth = 0; parenDepth = 0; return SEMICOLON;
+        case ',':
+            // If we just processed a declarator (identifier after a type), this comma
+            // indicates that we're in a declarator list. Note that 'afterDeclarator' is
+            // only set when we are not inside a template parameter list, array expression,
+            // or function parameter list.
+            if (afterDeclarator) {
+                inDeclaratorList = true;
+            }
+            afterType = false;
+            afterDeclarator = false;
+            return COMMA;
        case ':':                       return COLON;
-        case '=':  afterType = false;   return EQUAL;
-        case '(':  afterType = false;   return LEFT_PAREN;
-        case ')':  afterType = false;   return RIGHT_PAREN;
+        case '=':  afterType = false; inDeclaratorList = false; afterDeclarator = false; return EQUAL;
+        case '(':  afterType = false; inDeclaratorList = false; afterDeclarator = false; parenDepth++; return LEFT_PAREN;
+        case ')':  afterType = false; inDeclaratorList = false; afterDeclarator = false; if (parenDepth > 0) parenDepth--; return RIGHT_PAREN;
        case '.':  field = true;        return DOT;
        case '!':                       return BANG;
        case '-':                       return DASH;
@@ -838,16 +860,16 @@ int TScanContext::tokenize(TPpContext* pp, TParserToken& token)
        case '*':                       return STAR;
        case '/':                       return SLASH;
        case '%':                       return PERCENT;
-        case '<':                       return LEFT_ANGLE;
-        case '>':                       return RIGHT_ANGLE;
+        case '<':                       angleBracketDepth++; return LEFT_ANGLE;
+        case '>':                       if (angleBracketDepth > 0) angleBracketDepth--; return RIGHT_ANGLE;
        case '|':                       return VERTICAL_BAR;
        case '^':                       return CARET;
        case '&':                       return AMPERSAND;
        case '?':                       return QUESTION;
-        case '[':                       return LEFT_BRACKET;
-        case ']':                       return RIGHT_BRACKET;
-        case '{':  afterStruct = false; afterBuffer = false; return LEFT_BRACE;
-        case '}':                       return RIGHT_BRACE;
+        case '[':                       squareBracketDepth++; return LEFT_BRACKET;
+        case ']':                       if (squareBracketDepth > 0) squareBracketDepth--; return RIGHT_BRACKET;
+        case '{':  afterStruct = false; afterBuffer = false; inDeclaratorList = false; afterDeclarator = false; angleBracketDepth = 0; squareBracketDepth = 0; parenDepth = 0; return LEFT_BRACE;
+        case '}':  inDeclaratorList = false; afterDeclarator = false; angleBracketDepth = 0; squareBracketDepth = 0; parenDepth = 0; return RIGHT_BRACE;
        case '\\':
            parseContext.error(loc, "illegal use of escape character", "\\", "");
            break;
@@ -1494,6 +1516,28 @@ int TScanContext::tokenizeIdentifier()

        return identifierOrType();

+    case FLOATE5M2_T:
+    case FE5M2VEC2:
+    case FE5M2VEC3:
+    case FE5M2VEC4:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_EXT_float_e5m2))
+            return keyword;
+
+        return identifierOrType();
+
+    case FLOATE4M3_T:
+    case FE4M3VEC2:
+    case FE4M3VEC3:
+    case FE4M3VEC4:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_EXT_float_e4m3))
+            return keyword;
+
+        return identifierOrType();
+
    case SAMPLERCUBEARRAY:
    case SAMPLERCUBEARRAYSHADOW:
    case ISAMPLERCUBEARRAY:
@@ -1824,6 +1868,12 @@ int TScanContext::tokenizeIdentifier()
            parseContext.extensionTurnedOn(E_GL_NV_integer_cooperative_matrix))
            return keyword;
        return identifierOrType();
+    case TENSORARM:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_ARM_tensors))
+            return keyword;
+        return identifierOrType();

    case COOPMAT:
        afterType = true;
@@ -1895,14 +1945,29 @@ int TScanContext::identifierOrType()
    if (field)
        return IDENTIFIER;

+    // If we see an identifier right after a type, this might be a declarator.
+    // But not in template parameters (inside angle brackets), array expressions (inside square brackets),
+    // or function parameters (inside parentheses)
+    if (afterType && angleBracketDepth == 0 && squareBracketDepth == 0 && parenDepth == 0) {
+        afterDeclarator = true;
+        afterType = false;
+        return IDENTIFIER;
+    }
+
    parserToken->sType.lex.symbol = parseContext.symbolTable.find(*parserToken->sType.lex.string);
    if ((afterType == false && afterStruct == false) && parserToken->sType.lex.symbol != nullptr) {
        if (const TVariable* variable = parserToken->sType.lex.symbol->getAsVariable()) {
            if (variable->isUserType() &&
                // treat redeclaration of forward-declared buffer/uniform reference as an identifier
                !(variable->getType().isReference() && afterBuffer)) {
-                afterType = true;

+                // If we're in a declarator list (like "float a, B;"), treat struct names as IDENTIFIER
+                // to fix GitHub issue #3931
+                if (inDeclaratorList) {
+                    return IDENTIFIER;
+                }
+                
+                afterType = true;
                return TYPE_NAME;
            }
        }
--- a/3rdparty/glslang/glslang/MachineIndependent/ScanContext.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/ScanContext.h
@@ -53,7 +53,7 @@ public:
    explicit TScanContext(TParseContextBase& pc) :
        parseContext(pc),
        afterType(false), afterStruct(false),
-        field(false), afterBuffer(false) { }
+        field(false), afterBuffer(false), inDeclaratorList(false), afterDeclarator(false), angleBracketDepth(0), squareBracketDepth(0), parenDepth(0) { }
    virtual ~TScanContext() { }

    static void fillInKeywordMap();
@@ -82,6 +82,11 @@ protected:
    bool afterStruct;         // true if we've recognized the STRUCT keyword, so can only be looking for an identifier
    bool field;               // true if we're on a field, right after a '.'
    bool afterBuffer;         // true if we've recognized the BUFFER keyword
+    bool inDeclaratorList;    // true if we detected we're in a declarator list like "float a, b;"
+    bool afterDeclarator;     // true if we just saw an identifier after a type (potential declarator)
+    int angleBracketDepth;    // track nesting level of < > to detect template parameters
+    int squareBracketDepth;   // track nesting level of [ ] to detect array expressions
+    int parenDepth;           // track nesting level of ( ) to detect function parameters
    TSourceLoc loc;
    TParserToken* parserToken;
    TPpToken* ppToken;
--- a/3rdparty/glslang/glslang/MachineIndependent/SymbolTable.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/SymbolTable.cpp
@@ -55,7 +55,9 @@ namespace glslang {
 //
 void TType::buildMangledName(TString& mangledName) const
 {
-    if (isMatrix())
+    if (isTensorARM())
+        mangledName += 'T';
+    else if (isMatrix())
        mangledName += 'm';
    else if (isVector())
        mangledName += 'v';
@@ -71,6 +73,8 @@ void TType::buildMangledName(TString& mangledName) const
    case EbtDouble:             mangledName += 'd';      break;
    case EbtFloat16:            mangledName += "f16";    break;
    case EbtBFloat16:           mangledName += "bf16";   break;
+    case EbtFloatE5M2:          mangledName += "fe5m2";  break;
+    case EbtFloatE4M3:          mangledName += "fe4m3";  break;
    case EbtInt8:               mangledName += "i8";     break;
    case EbtUint8:              mangledName += "u8";     break;
    case EbtInt16:              mangledName += "i16";    break;
@@ -421,6 +425,7 @@ TFunction::TFunction(const TFunction& copyOf) : TSymbol(copyOf)
    defined = copyOf.defined;
    prototyped = copyOf.prototyped;
    implicitThis = copyOf.implicitThis;
+    variadic = copyOf.variadic;
    illegalImplicitThis = copyOf.illegalImplicitThis;
    defaultParamCount = copyOf.defaultParamCount;
    spirvInst = copyOf.spirvInst;
--- a/3rdparty/glslang/glslang/MachineIndependent/SymbolTable.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/SymbolTable.h
@@ -232,6 +232,13 @@ struct TParameter {
            name = nullptr;
        type = param.type->clone();
        defaultValue = param.defaultValue;
+        if (defaultValue) {
+            // The defaultValue of a builtin is created in a TPoolAllocator that no longer exists
+            // when parsing the user program, so make a deep copy.
+            if (const auto *constUnion = defaultValue->getAsConstantUnion()) {
+                defaultValue = new TIntermConstantUnion(*constUnion->getConstArray().clone(), constUnion->getType());
+            }
+        }
        return *this;
    }
    TBuiltInVariable getDeclaredBuiltIn() const { return type->getQualifier().declaredBuiltIn; }
@@ -245,12 +252,12 @@ public:
    explicit TFunction(TOperator o) :
        TSymbol(nullptr),
        op(o),
-        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) { }
+        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), variadic(false), defaultParamCount(0) { }
    TFunction(const TString *name, const TType& retType, TOperator tOp = EOpNull) :
        TSymbol(name),
        mangledName(*name + '('),
        op(tOp),
-        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0),
+        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), variadic(false), defaultParamCount(0),
        linkType(ELinkNone)
    {
        returnType.shallowCopy(retType);
@@ -268,6 +275,7 @@ public:
    virtual void addParameter(TParameter& p)
    {
        assert(writable);
+        assert(!variadic && "cannot add more parameters if function is marked variadic");
        parameters.push_back(p);
        p.type->appendMangledName(mangledName);

@@ -310,6 +318,13 @@ public:
    virtual bool hasImplicitThis() const { return implicitThis; }
    virtual void setIllegalImplicitThis() { assert(writable); illegalImplicitThis = true; }
    virtual bool hasIllegalImplicitThis() const { return illegalImplicitThis; }
+    virtual void setVariadic() {
+        assert(writable);
+        assert(!variadic && "function was already marked variadic");
+        variadic = true;
+        mangledName += 'z';
+    }
+    virtual bool isVariadic() const { return variadic; }

    // Return total number of parameters
    virtual int getParamCount() const { return static_cast<int>(parameters.size()); }
@@ -352,6 +367,7 @@ protected:
                               // even if it finds member variables in the symbol table.
                               // This is important for a static member function that has member variables in scope,
                               // but is not allowed to use them, or see hidden symbols instead.
+    bool variadic;
    int  defaultParamCount;

    TSpirvInstruction spirvInst; // SPIR-V instruction qualifiers
--- a/3rdparty/glslang/glslang/MachineIndependent/Versions.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Versions.cpp
@@ -319,11 +319,13 @@ void TParseVersions::initializeExtensionBehavior()

    // ARM
    extensionBehavior[E_GL_ARM_shader_core_builtins]                 = EBhDisable;
+    extensionBehavior[E_GL_ARM_tensors]                              = EBhDisable;

    // QCOM
    extensionBehavior[E_GL_QCOM_image_processing]                    = EBhDisable;
    extensionBehavior[E_GL_QCOM_image_processing2]                   = EBhDisable;
    extensionBehavior[E_GL_QCOM_tile_shading]                        = EBhDisable;
+    extensionBehavior[E_GL_QCOM_cooperative_matrix_conversion]       = EBhDisable;

    // AEP
    extensionBehavior[E_GL_ANDROID_extension_pack_es31a]             = EBhDisable;
@@ -381,6 +383,8 @@ void TParseVersions::initializeExtensionBehavior()
    extensionBehavior[E_GL_EXT_texture_offset_non_const]    = EBhDisable;
    extensionBehavior[E_GL_EXT_nontemporal_keyword]         = EBhDisable;
    extensionBehavior[E_GL_EXT_bfloat16]                    = EBhDisable;
+    extensionBehavior[E_GL_EXT_float_e4m3]                  = EBhDisable;
+    extensionBehavior[E_GL_EXT_float_e5m2]                  = EBhDisable;

    // OVR extensions
    extensionBehavior[E_GL_OVR_multiview]                = EBhDisable;
@@ -462,6 +466,7 @@ void TParseVersions::getPreamble(std::string& preamble)
            "#define GL_QCOM_image_processing 1\n"
            "#define GL_QCOM_image_processing2 1\n"
            "#define GL_QCOM_tile_shading 1\n"
+            "#define GL_QCOM_cooperative_matrix_conversion 1\n"
            ;

            if (version >= 300) {
@@ -593,6 +598,7 @@ void TParseVersions::getPreamble(std::string& preamble)
            "#define GL_QCOM_image_processing 1\n"
            "#define GL_QCOM_image_processing2 1\n"
            "#define GL_QCOM_tile_shading 1\n"
+            "#define GL_QCOM_cooperative_matrix_conversion 1\n"

            "#define GL_EXT_shader_explicit_arithmetic_types 1\n"
            "#define GL_EXT_shader_explicit_arithmetic_types_int8 1\n"
@@ -619,6 +625,8 @@ void TParseVersions::getPreamble(std::string& preamble)

            "#define GL_EXT_integer_dot_product 1\n"
            "#define GL_EXT_bfloat16 1\n"
+            "#define GL_EXT_float_e5m2 1\n"
+            "#define GL_EXT_float_e4m3 1\n"
            ;

        if (spvVersion.spv == 0) {
@@ -1303,6 +1311,26 @@ void TParseVersions::bfloat16ScalarVectorCheck(const TSourceLoc& loc, const char
    }
 }

+void TParseVersions::floate5m2ScalarVectorCheck(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {
+                                           E_GL_EXT_float_e5m2,
+                                         };
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
+void TParseVersions::floate4m3ScalarVectorCheck(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {
+                                           E_GL_EXT_float_e4m3,
+                                         };
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
 // Call for any operation needing GLSL float32 data-type support.
 void TParseVersions::explicitFloat32Check(const TSourceLoc& loc, const char* op, bool builtIn)
 {
@@ -1439,6 +1467,14 @@ void TParseVersions::coopmatCheck(const TSourceLoc& loc, const char* op, bool bu
    }
 }

+void TParseVersions::coopmatConverisonCheckQCOM(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+  if (!builtIn) {
+    const char* const extensions[] = {E_GL_KHR_cooperative_matrix};
+    requireExtensions(loc, sizeof(extensions) / sizeof(extensions[0]), extensions, op);
+  }
+}
+
 void TParseVersions::tensorLayoutViewCheck(const TSourceLoc& loc, const char* op, bool builtIn)
 {
    if (!builtIn) {
@@ -1463,6 +1499,14 @@ void TParseVersions::intattachmentCheck(const TSourceLoc& loc, const char* op, b
    }
 }

+void TParseVersions::tensorCheckARM(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {E_GL_ARM_tensors};
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
 // Call for any operation removed because SPIR-V is in use.
 void TParseVersions::spvRemoved(const TSourceLoc& loc, const char* op)
 {
--- a/3rdparty/glslang/glslang/MachineIndependent/Versions.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/Versions.h
@@ -293,6 +293,7 @@ const char* const E_GL_NV_gpu_shader5                           = "GL_NV_gpu_sha

 // ARM
 const char* const E_GL_ARM_shader_core_builtins                 = "GL_ARM_shader_core_builtins";
+const char* const E_GL_ARM_tensors                              = "GL_ARM_tensors";

 // Arrays of extensions for the above viewportEXTs duplications

@@ -303,6 +304,7 @@ const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]);
 const char* const E_GL_QCOM_image_processing                    = "GL_QCOM_image_processing";
 const char* const E_GL_QCOM_image_processing2                   = "GL_QCOM_image_processing2";
 const char* const E_GL_QCOM_tile_shading                        = "GL_QCOM_tile_shading";
+const char* const E_GL_QCOM_cooperative_matrix_conversion       = "GL_QCOM_cooperative_matrix_conversion";

 // AEP
 const char* const E_GL_ANDROID_extension_pack_es31a             = "GL_ANDROID_extension_pack_es31a";
@@ -359,6 +361,8 @@ const char* const E_GL_EXT_texture_shadow_lod = "GL_EXT_texture_shadow_lod";
 const char* const E_GL_EXT_integer_dot_product                    = "GL_EXT_integer_dot_product";

 const char* const E_GL_EXT_bfloat16 = "GL_EXT_bfloat16";
+const char* const E_GL_EXT_float_e5m2 = "GL_EXT_float_e5m2";
+const char* const E_GL_EXT_float_e4m3 = "GL_EXT_float_e4m3";

 // Arrays of extensions for the above AEP duplications

--- a/3rdparty/glslang/glslang/MachineIndependent/glslang.y
+++ b/3rdparty/glslang/glslang/MachineIndependent/glslang.y
@@ -146,7 +146,7 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> UTEXTURE2D UTEXTURE3D UTEXTURECUBE UTEXTURE2DARRAY

 %token <lex> ATTRIBUTE VARYING
-%token <lex> BFLOAT16_T FLOAT16_T FLOAT32_T DOUBLE FLOAT64_T
+%token <lex> FLOATE5M2_T FLOATE4M3_T BFLOAT16_T FLOAT16_T FLOAT32_T DOUBLE FLOAT64_T
 %token <lex> INT64_T UINT64_T INT32_T UINT32_T INT16_T UINT16_T INT8_T UINT8_T
 %token <lex> I64VEC2 I64VEC3 I64VEC4
 %token <lex> U64VEC2 U64VEC3 U64VEC4
@@ -158,6 +158,8 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> U8VEC2  U8VEC3  U8VEC4
 %token <lex> DVEC2 DVEC3 DVEC4 DMAT2 DMAT3 DMAT4
 %token <lex> BF16VEC2 BF16VEC3 BF16VEC4
+%token <lex> FE5M2VEC2 FE5M2VEC3 FE5M2VEC4
+%token <lex> FE4M3VEC2 FE4M3VEC3 FE4M3VEC4
 %token <lex> F16VEC2 F16VEC3 F16VEC4 F16MAT2 F16MAT3 F16MAT4
 %token <lex> F32VEC2 F32VEC3 F32VEC4 F32MAT2 F32MAT3 F32MAT4
 %token <lex> F64VEC2 F64VEC3 F64VEC4 F64MAT2 F64MAT3 F64MAT4
@@ -182,6 +184,7 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> COOPVECNV
 %token <lex> HITOBJECTNV HITOBJECTATTRNV
 %token <lex> TENSORLAYOUTNV TENSORVIEWNV
+%token <lex> TENSORARM

 // combined image/sampler
 %token <lex> SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW
@@ -925,15 +928,9 @@ declaration
        parseContext.updateStandaloneQualifierDefaults($1.loc, $1);
        $$ = 0;
    }
-    | type_qualifier IDENTIFIER SEMICOLON {
+    | type_qualifier identifier_list SEMICOLON {
        parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
-        parseContext.addQualifierToExisting($1.loc, $1.qualifier, *$2.string);
-        $$ = 0;
-    }
-    | type_qualifier IDENTIFIER identifier_list SEMICOLON {
-        parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
-        $3->push_back($2.string);
-        parseContext.addQualifierToExisting($1.loc, $1.qualifier, *$3);
+        parseContext.addQualifierToExisting($1.loc, $1.qualifier, *$2);
        $$ = 0;
    }
    ;
@@ -950,9 +947,9 @@ block_structure
    }

 identifier_list
-    : COMMA IDENTIFIER {
+    : IDENTIFIER {
        $$ = new TIdentifierList;
-        $$->push_back($2.string);
+        $$->push_back($1.string);
    }
    | identifier_list COMMA IDENTIFIER {
        $$ = $1;
@@ -1037,6 +1034,10 @@ function_header_with_parameters
                parseContext.vkRelaxedRemapFunctionParameter($1, $3.param);
        }
    }
+    | function_header_with_parameters COMMA DOT DOT DOT {
+        $$ = $1;
+        parseContext.makeVariadic($1, $3.loc);
+    }
    ;

 function_header
@@ -1097,6 +1098,11 @@ parameter_declarator
        $$.loc = $2.loc;
        $$.param = param;
    }
+    | type_specifier IDENTIFIER EQUAL initializer {
+        TParameter param = parseContext.getParamWithDefault($1, $2.string, $4, $3.loc);
+        $$.loc = $2.loc;
+        $$.param = param;
+    }
    ;

 parameter_declaration
@@ -1107,7 +1113,7 @@ parameter_declaration
        $$ = $2;
        if ($1.qualifier.precision != EpqNone)
            $$.param.type->getQualifier().precision = $1.qualifier.precision;
-        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());

        parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
        parseContext.parameterTypeCheck($2.loc, $1.qualifier.storage, *$$.param.type);
@@ -1119,7 +1125,7 @@ parameter_declaration

        parseContext.parameterTypeCheck($1.loc, EvqIn, *$1.param.type);
        parseContext.paramCheckFixStorage($1.loc, EvqTemporary, *$$.param.type);
-        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());
    }
    //
    // Without name
@@ -1128,7 +1134,7 @@ parameter_declaration
        $$ = $2;
        if ($1.qualifier.precision != EpqNone)
            $$.param.type->getQualifier().precision = $1.qualifier.precision;
-        parseContext.precisionQualifierCheck($1.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($1.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());

        parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
        parseContext.parameterTypeCheck($2.loc, $1.qualifier.storage, *$$.param.type);
@@ -1139,7 +1145,7 @@ parameter_declaration

        parseContext.parameterTypeCheck($1.loc, EvqIn, *$1.param.type);
        parseContext.paramCheckFixStorage($1.loc, EvqTemporary, *$$.param.type);
-        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());
    }
    ;

@@ -1214,7 +1220,7 @@ fully_specified_type
            parseContext.profileRequires($1.loc, ENoProfile, 120, E_GL_3DL_array_objects, "arrayed type");
            parseContext.profileRequires($1.loc, EEsProfile, 300, 0, "arrayed type");
        }
-        parseContext.precisionQualifierCheck($$.loc, $$.basicType, $$.qualifier, $$.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($$.loc, $$.basicType, $$.qualifier, $$.hasTypeParameter());
    }
    | type_qualifier type_specifier  {
        parseContext.globalQualifierFixCheck($1.loc, $1.qualifier, false, &$2);
@@ -1231,7 +1237,7 @@ fully_specified_type
        parseContext.checkNoShaderLayouts($2.loc, $1.shaderQualifiers);
        $2.shaderQualifiers.merge($1.shaderQualifiers);
        parseContext.mergeQualifiers($2.loc, $2.qualifier, $1.qualifier, true);
-        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.hasTypeParameter());

        $$ = $2;

@@ -1943,6 +1949,16 @@ type_specifier_nonarray
        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
        $$.basicType = EbtBFloat16;
    }
+    | FLOATE5M2_T {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "floate5m2_t", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+    }
+    | FLOATE4M3_T {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "floate4m3_t", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+    }
    | FLOAT16_T {
        parseContext.float16ScalarVectorCheck($1.loc, "float16_t", parseContext.symbolTable.atBuiltInLevel());
        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
@@ -2040,6 +2056,42 @@ type_specifier_nonarray
        $$.basicType = EbtBFloat16;
        $$.setVector(4);
    }
+    | FE5M2VEC2 {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "fe5m2 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+        $$.setVector(2);
+    }
+    | FE5M2VEC3 {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "fe5m2 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+        $$.setVector(3);
+    }
+    | FE5M2VEC4 {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "fe5m2 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+        $$.setVector(4);
+    }
+    | FE4M3VEC2 {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "fe4m3 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+        $$.setVector(2);
+    }
+    | FE4M3VEC3 {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "fe4m3 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+        $$.setVector(3);
+    }
+    | FE4M3VEC4 {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "fe4m3 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+        $$.setVector(4);
+    }
    | F16VEC2 {
        parseContext.float16ScalarVectorCheck($1.loc, "half float vector", parseContext.symbolTable.atBuiltInLevel());
        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
@@ -3585,6 +3637,12 @@ type_specifier_nonarray
        $$.basicType = EbtCoopvecNV;
        $$.coopvecNV = true;
    }
+    | TENSORARM {
+        parseContext.tensorCheckARM($1.loc, "tensorARM", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.tensorRankARM = 1; // placeholder value
+        $$.basicType = EbtTensorARM;
+    }
    | spirv_type_specifier {
        parseContext.requireExtensions($1.loc, 1, &E_GL_EXT_spirv_intrinsics, "SPIR-V type specifier");
        $$ = $1;
@@ -3686,7 +3744,7 @@ struct_declaration
        $$ = $2;

        parseContext.voidErrorCheck($1.loc, (*$2)[0].type->getFieldName(), $1.basicType);
-        parseContext.precisionQualifierCheck($1.loc, $1.basicType, $1.qualifier, $1.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($1.loc, $1.basicType, $1.qualifier, $1.hasTypeParameter());

        for (unsigned int i = 0; i < $$->size(); ++i) {
            TType type($1);
@@ -3710,7 +3768,7 @@ struct_declaration
        parseContext.memberQualifierCheck($1);
        parseContext.voidErrorCheck($2.loc, (*$3)[0].type->getFieldName(), $2.basicType);
        parseContext.mergeQualifiers($2.loc, $2.qualifier, $1.qualifier, true);
-        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.hasTypeParameter());

        for (unsigned int i = 0; i < $$->size(); ++i) {
            TType type($2);
--- a/3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp
--- a/3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp.h
@@ -114,417 +114,426 @@ extern int yydebug;
    UTEXTURE2DARRAY = 315,         /* UTEXTURE2DARRAY  */
    ATTRIBUTE = 316,               /* ATTRIBUTE  */
    VARYING = 317,                 /* VARYING  */
-    BFLOAT16_T = 318,              /* BFLOAT16_T  */
-    FLOAT16_T = 319,               /* FLOAT16_T  */
-    FLOAT32_T = 320,               /* FLOAT32_T  */
-    DOUBLE = 321,                  /* DOUBLE  */
-    FLOAT64_T = 322,               /* FLOAT64_T  */
-    INT64_T = 323,                 /* INT64_T  */
-    UINT64_T = 324,                /* UINT64_T  */
-    INT32_T = 325,                 /* INT32_T  */
-    UINT32_T = 326,                /* UINT32_T  */
-    INT16_T = 327,                 /* INT16_T  */
-    UINT16_T = 328,                /* UINT16_T  */
-    INT8_T = 329,                  /* INT8_T  */
-    UINT8_T = 330,                 /* UINT8_T  */
-    I64VEC2 = 331,                 /* I64VEC2  */
-    I64VEC3 = 332,                 /* I64VEC3  */
-    I64VEC4 = 333,                 /* I64VEC4  */
-    U64VEC2 = 334,                 /* U64VEC2  */
-    U64VEC3 = 335,                 /* U64VEC3  */
-    U64VEC4 = 336,                 /* U64VEC4  */
-    I32VEC2 = 337,                 /* I32VEC2  */
-    I32VEC3 = 338,                 /* I32VEC3  */
-    I32VEC4 = 339,                 /* I32VEC4  */
-    U32VEC2 = 340,                 /* U32VEC2  */
-    U32VEC3 = 341,                 /* U32VEC3  */
-    U32VEC4 = 342,                 /* U32VEC4  */
-    I16VEC2 = 343,                 /* I16VEC2  */
-    I16VEC3 = 344,                 /* I16VEC3  */
-    I16VEC4 = 345,                 /* I16VEC4  */
-    U16VEC2 = 346,                 /* U16VEC2  */
-    U16VEC3 = 347,                 /* U16VEC3  */
-    U16VEC4 = 348,                 /* U16VEC4  */
-    I8VEC2 = 349,                  /* I8VEC2  */
-    I8VEC3 = 350,                  /* I8VEC3  */
-    I8VEC4 = 351,                  /* I8VEC4  */
-    U8VEC2 = 352,                  /* U8VEC2  */
-    U8VEC3 = 353,                  /* U8VEC3  */
-    U8VEC4 = 354,                  /* U8VEC4  */
-    DVEC2 = 355,                   /* DVEC2  */
-    DVEC3 = 356,                   /* DVEC3  */
-    DVEC4 = 357,                   /* DVEC4  */
-    DMAT2 = 358,                   /* DMAT2  */
-    DMAT3 = 359,                   /* DMAT3  */
-    DMAT4 = 360,                   /* DMAT4  */
-    BF16VEC2 = 361,                /* BF16VEC2  */
-    BF16VEC3 = 362,                /* BF16VEC3  */
-    BF16VEC4 = 363,                /* BF16VEC4  */
-    F16VEC2 = 364,                 /* F16VEC2  */
-    F16VEC3 = 365,                 /* F16VEC3  */
-    F16VEC4 = 366,                 /* F16VEC4  */
-    F16MAT2 = 367,                 /* F16MAT2  */
-    F16MAT3 = 368,                 /* F16MAT3  */
-    F16MAT4 = 369,                 /* F16MAT4  */
-    F32VEC2 = 370,                 /* F32VEC2  */
-    F32VEC3 = 371,                 /* F32VEC3  */
-    F32VEC4 = 372,                 /* F32VEC4  */
-    F32MAT2 = 373,                 /* F32MAT2  */
-    F32MAT3 = 374,                 /* F32MAT3  */
-    F32MAT4 = 375,                 /* F32MAT4  */
-    F64VEC2 = 376,                 /* F64VEC2  */
-    F64VEC3 = 377,                 /* F64VEC3  */
-    F64VEC4 = 378,                 /* F64VEC4  */
-    F64MAT2 = 379,                 /* F64MAT2  */
-    F64MAT3 = 380,                 /* F64MAT3  */
-    F64MAT4 = 381,                 /* F64MAT4  */
-    DMAT2X2 = 382,                 /* DMAT2X2  */
-    DMAT2X3 = 383,                 /* DMAT2X3  */
-    DMAT2X4 = 384,                 /* DMAT2X4  */
-    DMAT3X2 = 385,                 /* DMAT3X2  */
-    DMAT3X3 = 386,                 /* DMAT3X3  */
-    DMAT3X4 = 387,                 /* DMAT3X4  */
-    DMAT4X2 = 388,                 /* DMAT4X2  */
-    DMAT4X3 = 389,                 /* DMAT4X3  */
-    DMAT4X4 = 390,                 /* DMAT4X4  */
-    F16MAT2X2 = 391,               /* F16MAT2X2  */
-    F16MAT2X3 = 392,               /* F16MAT2X3  */
-    F16MAT2X4 = 393,               /* F16MAT2X4  */
-    F16MAT3X2 = 394,               /* F16MAT3X2  */
-    F16MAT3X3 = 395,               /* F16MAT3X3  */
-    F16MAT3X4 = 396,               /* F16MAT3X4  */
-    F16MAT4X2 = 397,               /* F16MAT4X2  */
-    F16MAT4X3 = 398,               /* F16MAT4X3  */
-    F16MAT4X4 = 399,               /* F16MAT4X4  */
-    F32MAT2X2 = 400,               /* F32MAT2X2  */
-    F32MAT2X3 = 401,               /* F32MAT2X3  */
-    F32MAT2X4 = 402,               /* F32MAT2X4  */
-    F32MAT3X2 = 403,               /* F32MAT3X2  */
-    F32MAT3X3 = 404,               /* F32MAT3X3  */
-    F32MAT3X4 = 405,               /* F32MAT3X4  */
-    F32MAT4X2 = 406,               /* F32MAT4X2  */
-    F32MAT4X3 = 407,               /* F32MAT4X3  */
-    F32MAT4X4 = 408,               /* F32MAT4X4  */
-    F64MAT2X2 = 409,               /* F64MAT2X2  */
-    F64MAT2X3 = 410,               /* F64MAT2X3  */
-    F64MAT2X4 = 411,               /* F64MAT2X4  */
-    F64MAT3X2 = 412,               /* F64MAT3X2  */
-    F64MAT3X3 = 413,               /* F64MAT3X3  */
-    F64MAT3X4 = 414,               /* F64MAT3X4  */
-    F64MAT4X2 = 415,               /* F64MAT4X2  */
-    F64MAT4X3 = 416,               /* F64MAT4X3  */
-    F64MAT4X4 = 417,               /* F64MAT4X4  */
-    ATOMIC_UINT = 418,             /* ATOMIC_UINT  */
-    ACCSTRUCTNV = 419,             /* ACCSTRUCTNV  */
-    ACCSTRUCTEXT = 420,            /* ACCSTRUCTEXT  */
-    RAYQUERYEXT = 421,             /* RAYQUERYEXT  */
-    FCOOPMATNV = 422,              /* FCOOPMATNV  */
-    ICOOPMATNV = 423,              /* ICOOPMATNV  */
-    UCOOPMATNV = 424,              /* UCOOPMATNV  */
-    COOPMAT = 425,                 /* COOPMAT  */
-    COOPVECNV = 426,               /* COOPVECNV  */
-    HITOBJECTNV = 427,             /* HITOBJECTNV  */
-    HITOBJECTATTRNV = 428,         /* HITOBJECTATTRNV  */
-    TENSORLAYOUTNV = 429,          /* TENSORLAYOUTNV  */
-    TENSORVIEWNV = 430,            /* TENSORVIEWNV  */
-    SAMPLERCUBEARRAY = 431,        /* SAMPLERCUBEARRAY  */
-    SAMPLERCUBEARRAYSHADOW = 432,  /* SAMPLERCUBEARRAYSHADOW  */
-    ISAMPLERCUBEARRAY = 433,       /* ISAMPLERCUBEARRAY  */
-    USAMPLERCUBEARRAY = 434,       /* USAMPLERCUBEARRAY  */
-    SAMPLER1D = 435,               /* SAMPLER1D  */
-    SAMPLER1DARRAY = 436,          /* SAMPLER1DARRAY  */
-    SAMPLER1DARRAYSHADOW = 437,    /* SAMPLER1DARRAYSHADOW  */
-    ISAMPLER1D = 438,              /* ISAMPLER1D  */
-    SAMPLER1DSHADOW = 439,         /* SAMPLER1DSHADOW  */
-    SAMPLER2DRECT = 440,           /* SAMPLER2DRECT  */
-    SAMPLER2DRECTSHADOW = 441,     /* SAMPLER2DRECTSHADOW  */
-    ISAMPLER2DRECT = 442,          /* ISAMPLER2DRECT  */
-    USAMPLER2DRECT = 443,          /* USAMPLER2DRECT  */
-    SAMPLERBUFFER = 444,           /* SAMPLERBUFFER  */
-    ISAMPLERBUFFER = 445,          /* ISAMPLERBUFFER  */
-    USAMPLERBUFFER = 446,          /* USAMPLERBUFFER  */
-    SAMPLER2DMS = 447,             /* SAMPLER2DMS  */
-    ISAMPLER2DMS = 448,            /* ISAMPLER2DMS  */
-    USAMPLER2DMS = 449,            /* USAMPLER2DMS  */
-    SAMPLER2DMSARRAY = 450,        /* SAMPLER2DMSARRAY  */
-    ISAMPLER2DMSARRAY = 451,       /* ISAMPLER2DMSARRAY  */
-    USAMPLER2DMSARRAY = 452,       /* USAMPLER2DMSARRAY  */
-    SAMPLEREXTERNALOES = 453,      /* SAMPLEREXTERNALOES  */
-    SAMPLEREXTERNAL2DY2YEXT = 454, /* SAMPLEREXTERNAL2DY2YEXT  */
-    ISAMPLER1DARRAY = 455,         /* ISAMPLER1DARRAY  */
-    USAMPLER1D = 456,              /* USAMPLER1D  */
-    USAMPLER1DARRAY = 457,         /* USAMPLER1DARRAY  */
-    F16SAMPLER1D = 458,            /* F16SAMPLER1D  */
-    F16SAMPLER2D = 459,            /* F16SAMPLER2D  */
-    F16SAMPLER3D = 460,            /* F16SAMPLER3D  */
-    F16SAMPLER2DRECT = 461,        /* F16SAMPLER2DRECT  */
-    F16SAMPLERCUBE = 462,          /* F16SAMPLERCUBE  */
-    F16SAMPLER1DARRAY = 463,       /* F16SAMPLER1DARRAY  */
-    F16SAMPLER2DARRAY = 464,       /* F16SAMPLER2DARRAY  */
-    F16SAMPLERCUBEARRAY = 465,     /* F16SAMPLERCUBEARRAY  */
-    F16SAMPLERBUFFER = 466,        /* F16SAMPLERBUFFER  */
-    F16SAMPLER2DMS = 467,          /* F16SAMPLER2DMS  */
-    F16SAMPLER2DMSARRAY = 468,     /* F16SAMPLER2DMSARRAY  */
-    F16SAMPLER1DSHADOW = 469,      /* F16SAMPLER1DSHADOW  */
-    F16SAMPLER2DSHADOW = 470,      /* F16SAMPLER2DSHADOW  */
-    F16SAMPLER1DARRAYSHADOW = 471, /* F16SAMPLER1DARRAYSHADOW  */
-    F16SAMPLER2DARRAYSHADOW = 472, /* F16SAMPLER2DARRAYSHADOW  */
-    F16SAMPLER2DRECTSHADOW = 473,  /* F16SAMPLER2DRECTSHADOW  */
-    F16SAMPLERCUBESHADOW = 474,    /* F16SAMPLERCUBESHADOW  */
-    F16SAMPLERCUBEARRAYSHADOW = 475, /* F16SAMPLERCUBEARRAYSHADOW  */
-    IMAGE1D = 476,                 /* IMAGE1D  */
-    IIMAGE1D = 477,                /* IIMAGE1D  */
-    UIMAGE1D = 478,                /* UIMAGE1D  */
-    IMAGE2D = 479,                 /* IMAGE2D  */
-    IIMAGE2D = 480,                /* IIMAGE2D  */
-    UIMAGE2D = 481,                /* UIMAGE2D  */
-    IMAGE3D = 482,                 /* IMAGE3D  */
-    IIMAGE3D = 483,                /* IIMAGE3D  */
-    UIMAGE3D = 484,                /* UIMAGE3D  */
-    IMAGE2DRECT = 485,             /* IMAGE2DRECT  */
-    IIMAGE2DRECT = 486,            /* IIMAGE2DRECT  */
-    UIMAGE2DRECT = 487,            /* UIMAGE2DRECT  */
-    IMAGECUBE = 488,               /* IMAGECUBE  */
-    IIMAGECUBE = 489,              /* IIMAGECUBE  */
-    UIMAGECUBE = 490,              /* UIMAGECUBE  */
-    IMAGEBUFFER = 491,             /* IMAGEBUFFER  */
-    IIMAGEBUFFER = 492,            /* IIMAGEBUFFER  */
-    UIMAGEBUFFER = 493,            /* UIMAGEBUFFER  */
-    IMAGE1DARRAY = 494,            /* IMAGE1DARRAY  */
-    IIMAGE1DARRAY = 495,           /* IIMAGE1DARRAY  */
-    UIMAGE1DARRAY = 496,           /* UIMAGE1DARRAY  */
-    IMAGE2DARRAY = 497,            /* IMAGE2DARRAY  */
-    IIMAGE2DARRAY = 498,           /* IIMAGE2DARRAY  */
-    UIMAGE2DARRAY = 499,           /* UIMAGE2DARRAY  */
-    IMAGECUBEARRAY = 500,          /* IMAGECUBEARRAY  */
-    IIMAGECUBEARRAY = 501,         /* IIMAGECUBEARRAY  */
-    UIMAGECUBEARRAY = 502,         /* UIMAGECUBEARRAY  */
-    IMAGE2DMS = 503,               /* IMAGE2DMS  */
-    IIMAGE2DMS = 504,              /* IIMAGE2DMS  */
-    UIMAGE2DMS = 505,              /* UIMAGE2DMS  */
-    IMAGE2DMSARRAY = 506,          /* IMAGE2DMSARRAY  */
-    IIMAGE2DMSARRAY = 507,         /* IIMAGE2DMSARRAY  */
-    UIMAGE2DMSARRAY = 508,         /* UIMAGE2DMSARRAY  */
-    F16IMAGE1D = 509,              /* F16IMAGE1D  */
-    F16IMAGE2D = 510,              /* F16IMAGE2D  */
-    F16IMAGE3D = 511,              /* F16IMAGE3D  */
-    F16IMAGE2DRECT = 512,          /* F16IMAGE2DRECT  */
-    F16IMAGECUBE = 513,            /* F16IMAGECUBE  */
-    F16IMAGE1DARRAY = 514,         /* F16IMAGE1DARRAY  */
-    F16IMAGE2DARRAY = 515,         /* F16IMAGE2DARRAY  */
-    F16IMAGECUBEARRAY = 516,       /* F16IMAGECUBEARRAY  */
-    F16IMAGEBUFFER = 517,          /* F16IMAGEBUFFER  */
-    F16IMAGE2DMS = 518,            /* F16IMAGE2DMS  */
-    F16IMAGE2DMSARRAY = 519,       /* F16IMAGE2DMSARRAY  */
-    I64IMAGE1D = 520,              /* I64IMAGE1D  */
-    U64IMAGE1D = 521,              /* U64IMAGE1D  */
-    I64IMAGE2D = 522,              /* I64IMAGE2D  */
-    U64IMAGE2D = 523,              /* U64IMAGE2D  */
-    I64IMAGE3D = 524,              /* I64IMAGE3D  */
-    U64IMAGE3D = 525,              /* U64IMAGE3D  */
-    I64IMAGE2DRECT = 526,          /* I64IMAGE2DRECT  */
-    U64IMAGE2DRECT = 527,          /* U64IMAGE2DRECT  */
-    I64IMAGECUBE = 528,            /* I64IMAGECUBE  */
-    U64IMAGECUBE = 529,            /* U64IMAGECUBE  */
-    I64IMAGEBUFFER = 530,          /* I64IMAGEBUFFER  */
-    U64IMAGEBUFFER = 531,          /* U64IMAGEBUFFER  */
-    I64IMAGE1DARRAY = 532,         /* I64IMAGE1DARRAY  */
-    U64IMAGE1DARRAY = 533,         /* U64IMAGE1DARRAY  */
-    I64IMAGE2DARRAY = 534,         /* I64IMAGE2DARRAY  */
-    U64IMAGE2DARRAY = 535,         /* U64IMAGE2DARRAY  */
-    I64IMAGECUBEARRAY = 536,       /* I64IMAGECUBEARRAY  */
-    U64IMAGECUBEARRAY = 537,       /* U64IMAGECUBEARRAY  */
-    I64IMAGE2DMS = 538,            /* I64IMAGE2DMS  */
-    U64IMAGE2DMS = 539,            /* U64IMAGE2DMS  */
-    I64IMAGE2DMSARRAY = 540,       /* I64IMAGE2DMSARRAY  */
-    U64IMAGE2DMSARRAY = 541,       /* U64IMAGE2DMSARRAY  */
-    TEXTURECUBEARRAY = 542,        /* TEXTURECUBEARRAY  */
-    ITEXTURECUBEARRAY = 543,       /* ITEXTURECUBEARRAY  */
-    UTEXTURECUBEARRAY = 544,       /* UTEXTURECUBEARRAY  */
-    TEXTURE1D = 545,               /* TEXTURE1D  */
-    ITEXTURE1D = 546,              /* ITEXTURE1D  */
-    UTEXTURE1D = 547,              /* UTEXTURE1D  */
-    TEXTURE1DARRAY = 548,          /* TEXTURE1DARRAY  */
-    ITEXTURE1DARRAY = 549,         /* ITEXTURE1DARRAY  */
-    UTEXTURE1DARRAY = 550,         /* UTEXTURE1DARRAY  */
-    TEXTURE2DRECT = 551,           /* TEXTURE2DRECT  */
-    ITEXTURE2DRECT = 552,          /* ITEXTURE2DRECT  */
-    UTEXTURE2DRECT = 553,          /* UTEXTURE2DRECT  */
-    TEXTUREBUFFER = 554,           /* TEXTUREBUFFER  */
-    ITEXTUREBUFFER = 555,          /* ITEXTUREBUFFER  */
-    UTEXTUREBUFFER = 556,          /* UTEXTUREBUFFER  */
-    TEXTURE2DMS = 557,             /* TEXTURE2DMS  */
-    ITEXTURE2DMS = 558,            /* ITEXTURE2DMS  */
-    UTEXTURE2DMS = 559,            /* UTEXTURE2DMS  */
-    TEXTURE2DMSARRAY = 560,        /* TEXTURE2DMSARRAY  */
-    ITEXTURE2DMSARRAY = 561,       /* ITEXTURE2DMSARRAY  */
-    UTEXTURE2DMSARRAY = 562,       /* UTEXTURE2DMSARRAY  */
-    F16TEXTURE1D = 563,            /* F16TEXTURE1D  */
-    F16TEXTURE2D = 564,            /* F16TEXTURE2D  */
-    F16TEXTURE3D = 565,            /* F16TEXTURE3D  */
-    F16TEXTURE2DRECT = 566,        /* F16TEXTURE2DRECT  */
-    F16TEXTURECUBE = 567,          /* F16TEXTURECUBE  */
-    F16TEXTURE1DARRAY = 568,       /* F16TEXTURE1DARRAY  */
-    F16TEXTURE2DARRAY = 569,       /* F16TEXTURE2DARRAY  */
-    F16TEXTURECUBEARRAY = 570,     /* F16TEXTURECUBEARRAY  */
-    F16TEXTUREBUFFER = 571,        /* F16TEXTUREBUFFER  */
-    F16TEXTURE2DMS = 572,          /* F16TEXTURE2DMS  */
-    F16TEXTURE2DMSARRAY = 573,     /* F16TEXTURE2DMSARRAY  */
-    SUBPASSINPUT = 574,            /* SUBPASSINPUT  */
-    SUBPASSINPUTMS = 575,          /* SUBPASSINPUTMS  */
-    ISUBPASSINPUT = 576,           /* ISUBPASSINPUT  */
-    ISUBPASSINPUTMS = 577,         /* ISUBPASSINPUTMS  */
-    USUBPASSINPUT = 578,           /* USUBPASSINPUT  */
-    USUBPASSINPUTMS = 579,         /* USUBPASSINPUTMS  */
-    F16SUBPASSINPUT = 580,         /* F16SUBPASSINPUT  */
-    F16SUBPASSINPUTMS = 581,       /* F16SUBPASSINPUTMS  */
-    SPIRV_INSTRUCTION = 582,       /* SPIRV_INSTRUCTION  */
-    SPIRV_EXECUTION_MODE = 583,    /* SPIRV_EXECUTION_MODE  */
-    SPIRV_EXECUTION_MODE_ID = 584, /* SPIRV_EXECUTION_MODE_ID  */
-    SPIRV_DECORATE = 585,          /* SPIRV_DECORATE  */
-    SPIRV_DECORATE_ID = 586,       /* SPIRV_DECORATE_ID  */
-    SPIRV_DECORATE_STRING = 587,   /* SPIRV_DECORATE_STRING  */
-    SPIRV_TYPE = 588,              /* SPIRV_TYPE  */
-    SPIRV_STORAGE_CLASS = 589,     /* SPIRV_STORAGE_CLASS  */
-    SPIRV_BY_REFERENCE = 590,      /* SPIRV_BY_REFERENCE  */
-    SPIRV_LITERAL = 591,           /* SPIRV_LITERAL  */
-    ATTACHMENTEXT = 592,           /* ATTACHMENTEXT  */
-    IATTACHMENTEXT = 593,          /* IATTACHMENTEXT  */
-    UATTACHMENTEXT = 594,          /* UATTACHMENTEXT  */
-    LEFT_OP = 595,                 /* LEFT_OP  */
-    RIGHT_OP = 596,                /* RIGHT_OP  */
-    INC_OP = 597,                  /* INC_OP  */
-    DEC_OP = 598,                  /* DEC_OP  */
-    LE_OP = 599,                   /* LE_OP  */
-    GE_OP = 600,                   /* GE_OP  */
-    EQ_OP = 601,                   /* EQ_OP  */
-    NE_OP = 602,                   /* NE_OP  */
-    AND_OP = 603,                  /* AND_OP  */
-    OR_OP = 604,                   /* OR_OP  */
-    XOR_OP = 605,                  /* XOR_OP  */
-    MUL_ASSIGN = 606,              /* MUL_ASSIGN  */
-    DIV_ASSIGN = 607,              /* DIV_ASSIGN  */
-    ADD_ASSIGN = 608,              /* ADD_ASSIGN  */
-    MOD_ASSIGN = 609,              /* MOD_ASSIGN  */
-    LEFT_ASSIGN = 610,             /* LEFT_ASSIGN  */
-    RIGHT_ASSIGN = 611,            /* RIGHT_ASSIGN  */
-    AND_ASSIGN = 612,              /* AND_ASSIGN  */
-    XOR_ASSIGN = 613,              /* XOR_ASSIGN  */
-    OR_ASSIGN = 614,               /* OR_ASSIGN  */
-    SUB_ASSIGN = 615,              /* SUB_ASSIGN  */
-    STRING_LITERAL = 616,          /* STRING_LITERAL  */
-    LEFT_PAREN = 617,              /* LEFT_PAREN  */
-    RIGHT_PAREN = 618,             /* RIGHT_PAREN  */
-    LEFT_BRACKET = 619,            /* LEFT_BRACKET  */
-    RIGHT_BRACKET = 620,           /* RIGHT_BRACKET  */
-    LEFT_BRACE = 621,              /* LEFT_BRACE  */
-    RIGHT_BRACE = 622,             /* RIGHT_BRACE  */
-    DOT = 623,                     /* DOT  */
-    COMMA = 624,                   /* COMMA  */
-    COLON = 625,                   /* COLON  */
-    EQUAL = 626,                   /* EQUAL  */
-    SEMICOLON = 627,               /* SEMICOLON  */
-    BANG = 628,                    /* BANG  */
-    DASH = 629,                    /* DASH  */
-    TILDE = 630,                   /* TILDE  */
-    PLUS = 631,                    /* PLUS  */
-    STAR = 632,                    /* STAR  */
-    SLASH = 633,                   /* SLASH  */
-    PERCENT = 634,                 /* PERCENT  */
-    LEFT_ANGLE = 635,              /* LEFT_ANGLE  */
-    RIGHT_ANGLE = 636,             /* RIGHT_ANGLE  */
-    VERTICAL_BAR = 637,            /* VERTICAL_BAR  */
-    CARET = 638,                   /* CARET  */
-    AMPERSAND = 639,               /* AMPERSAND  */
-    QUESTION = 640,                /* QUESTION  */
-    INVARIANT = 641,               /* INVARIANT  */
-    HIGH_PRECISION = 642,          /* HIGH_PRECISION  */
-    MEDIUM_PRECISION = 643,        /* MEDIUM_PRECISION  */
-    LOW_PRECISION = 644,           /* LOW_PRECISION  */
-    PRECISION = 645,               /* PRECISION  */
-    PACKED = 646,                  /* PACKED  */
-    RESOURCE = 647,                /* RESOURCE  */
-    SUPERP = 648,                  /* SUPERP  */
-    FLOATCONSTANT = 649,           /* FLOATCONSTANT  */
-    INTCONSTANT = 650,             /* INTCONSTANT  */
-    UINTCONSTANT = 651,            /* UINTCONSTANT  */
-    BOOLCONSTANT = 652,            /* BOOLCONSTANT  */
-    IDENTIFIER = 653,              /* IDENTIFIER  */
-    TYPE_NAME = 654,               /* TYPE_NAME  */
-    CENTROID = 655,                /* CENTROID  */
-    IN = 656,                      /* IN  */
-    OUT = 657,                     /* OUT  */
-    INOUT = 658,                   /* INOUT  */
-    STRUCT = 659,                  /* STRUCT  */
-    VOID = 660,                    /* VOID  */
-    WHILE = 661,                   /* WHILE  */
-    BREAK = 662,                   /* BREAK  */
-    CONTINUE = 663,                /* CONTINUE  */
-    DO = 664,                      /* DO  */
-    ELSE = 665,                    /* ELSE  */
-    FOR = 666,                     /* FOR  */
-    IF = 667,                      /* IF  */
-    DISCARD = 668,                 /* DISCARD  */
-    RETURN = 669,                  /* RETURN  */
-    SWITCH = 670,                  /* SWITCH  */
-    CASE = 671,                    /* CASE  */
-    DEFAULT = 672,                 /* DEFAULT  */
-    TERMINATE_INVOCATION = 673,    /* TERMINATE_INVOCATION  */
-    TERMINATE_RAY = 674,           /* TERMINATE_RAY  */
-    IGNORE_INTERSECTION = 675,     /* IGNORE_INTERSECTION  */
-    UNIFORM = 676,                 /* UNIFORM  */
-    SHARED = 677,                  /* SHARED  */
-    BUFFER = 678,                  /* BUFFER  */
-    TILEIMAGEEXT = 679,            /* TILEIMAGEEXT  */
-    FLAT = 680,                    /* FLAT  */
-    SMOOTH = 681,                  /* SMOOTH  */
-    LAYOUT = 682,                  /* LAYOUT  */
-    DOUBLECONSTANT = 683,          /* DOUBLECONSTANT  */
-    INT16CONSTANT = 684,           /* INT16CONSTANT  */
-    UINT16CONSTANT = 685,          /* UINT16CONSTANT  */
-    FLOAT16CONSTANT = 686,         /* FLOAT16CONSTANT  */
-    INT32CONSTANT = 687,           /* INT32CONSTANT  */
-    UINT32CONSTANT = 688,          /* UINT32CONSTANT  */
-    INT64CONSTANT = 689,           /* INT64CONSTANT  */
-    UINT64CONSTANT = 690,          /* UINT64CONSTANT  */
-    SUBROUTINE = 691,              /* SUBROUTINE  */
-    DEMOTE = 692,                  /* DEMOTE  */
-    FUNCTION = 693,                /* FUNCTION  */
-    PAYLOADNV = 694,               /* PAYLOADNV  */
-    PAYLOADINNV = 695,             /* PAYLOADINNV  */
-    HITATTRNV = 696,               /* HITATTRNV  */
-    CALLDATANV = 697,              /* CALLDATANV  */
-    CALLDATAINNV = 698,            /* CALLDATAINNV  */
-    PAYLOADEXT = 699,              /* PAYLOADEXT  */
-    PAYLOADINEXT = 700,            /* PAYLOADINEXT  */
-    HITATTREXT = 701,              /* HITATTREXT  */
-    CALLDATAEXT = 702,             /* CALLDATAEXT  */
-    CALLDATAINEXT = 703,           /* CALLDATAINEXT  */
-    PATCH = 704,                   /* PATCH  */
-    SAMPLE = 705,                  /* SAMPLE  */
-    NONUNIFORM = 706,              /* NONUNIFORM  */
-    COHERENT = 707,                /* COHERENT  */
-    VOLATILE = 708,                /* VOLATILE  */
-    RESTRICT = 709,                /* RESTRICT  */
-    READONLY = 710,                /* READONLY  */
-    WRITEONLY = 711,               /* WRITEONLY  */
-    NONTEMPORAL = 712,             /* NONTEMPORAL  */
-    DEVICECOHERENT = 713,          /* DEVICECOHERENT  */
-    QUEUEFAMILYCOHERENT = 714,     /* QUEUEFAMILYCOHERENT  */
-    WORKGROUPCOHERENT = 715,       /* WORKGROUPCOHERENT  */
-    SUBGROUPCOHERENT = 716,        /* SUBGROUPCOHERENT  */
-    NONPRIVATE = 717,              /* NONPRIVATE  */
-    SHADERCALLCOHERENT = 718,      /* SHADERCALLCOHERENT  */
-    NOPERSPECTIVE = 719,           /* NOPERSPECTIVE  */
-    EXPLICITINTERPAMD = 720,       /* EXPLICITINTERPAMD  */
-    PERVERTEXEXT = 721,            /* PERVERTEXEXT  */
-    PERVERTEXNV = 722,             /* PERVERTEXNV  */
-    PERPRIMITIVENV = 723,          /* PERPRIMITIVENV  */
-    PERVIEWNV = 724,               /* PERVIEWNV  */
-    PERTASKNV = 725,               /* PERTASKNV  */
-    PERPRIMITIVEEXT = 726,         /* PERPRIMITIVEEXT  */
-    TASKPAYLOADWORKGROUPEXT = 727, /* TASKPAYLOADWORKGROUPEXT  */
-    PRECISE = 728                  /* PRECISE  */
+    FLOATE5M2_T = 318,             /* FLOATE5M2_T  */
+    FLOATE4M3_T = 319,             /* FLOATE4M3_T  */
+    BFLOAT16_T = 320,              /* BFLOAT16_T  */
+    FLOAT16_T = 321,               /* FLOAT16_T  */
+    FLOAT32_T = 322,               /* FLOAT32_T  */
+    DOUBLE = 323,                  /* DOUBLE  */
+    FLOAT64_T = 324,               /* FLOAT64_T  */
+    INT64_T = 325,                 /* INT64_T  */
+    UINT64_T = 326,                /* UINT64_T  */
+    INT32_T = 327,                 /* INT32_T  */
+    UINT32_T = 328,                /* UINT32_T  */
+    INT16_T = 329,                 /* INT16_T  */
+    UINT16_T = 330,                /* UINT16_T  */
+    INT8_T = 331,                  /* INT8_T  */
+    UINT8_T = 332,                 /* UINT8_T  */
+    I64VEC2 = 333,                 /* I64VEC2  */
+    I64VEC3 = 334,                 /* I64VEC3  */
+    I64VEC4 = 335,                 /* I64VEC4  */
+    U64VEC2 = 336,                 /* U64VEC2  */
+    U64VEC3 = 337,                 /* U64VEC3  */
+    U64VEC4 = 338,                 /* U64VEC4  */
+    I32VEC2 = 339,                 /* I32VEC2  */
+    I32VEC3 = 340,                 /* I32VEC3  */
+    I32VEC4 = 341,                 /* I32VEC4  */
+    U32VEC2 = 342,                 /* U32VEC2  */
+    U32VEC3 = 343,                 /* U32VEC3  */
+    U32VEC4 = 344,                 /* U32VEC4  */
+    I16VEC2 = 345,                 /* I16VEC2  */
+    I16VEC3 = 346,                 /* I16VEC3  */
+    I16VEC4 = 347,                 /* I16VEC4  */
+    U16VEC2 = 348,                 /* U16VEC2  */
+    U16VEC3 = 349,                 /* U16VEC3  */
+    U16VEC4 = 350,                 /* U16VEC4  */
+    I8VEC2 = 351,                  /* I8VEC2  */
+    I8VEC3 = 352,                  /* I8VEC3  */
+    I8VEC4 = 353,                  /* I8VEC4  */
+    U8VEC2 = 354,                  /* U8VEC2  */
+    U8VEC3 = 355,                  /* U8VEC3  */
+    U8VEC4 = 356,                  /* U8VEC4  */
+    DVEC2 = 357,                   /* DVEC2  */
+    DVEC3 = 358,                   /* DVEC3  */
+    DVEC4 = 359,                   /* DVEC4  */
+    DMAT2 = 360,                   /* DMAT2  */
+    DMAT3 = 361,                   /* DMAT3  */
+    DMAT4 = 362,                   /* DMAT4  */
+    BF16VEC2 = 363,                /* BF16VEC2  */
+    BF16VEC3 = 364,                /* BF16VEC3  */
+    BF16VEC4 = 365,                /* BF16VEC4  */
+    FE5M2VEC2 = 366,               /* FE5M2VEC2  */
+    FE5M2VEC3 = 367,               /* FE5M2VEC3  */
+    FE5M2VEC4 = 368,               /* FE5M2VEC4  */
+    FE4M3VEC2 = 369,               /* FE4M3VEC2  */
+    FE4M3VEC3 = 370,               /* FE4M3VEC3  */
+    FE4M3VEC4 = 371,               /* FE4M3VEC4  */
+    F16VEC2 = 372,                 /* F16VEC2  */
+    F16VEC3 = 373,                 /* F16VEC3  */
+    F16VEC4 = 374,                 /* F16VEC4  */
+    F16MAT2 = 375,                 /* F16MAT2  */
+    F16MAT3 = 376,                 /* F16MAT3  */
+    F16MAT4 = 377,                 /* F16MAT4  */
+    F32VEC2 = 378,                 /* F32VEC2  */
+    F32VEC3 = 379,                 /* F32VEC3  */
+    F32VEC4 = 380,                 /* F32VEC4  */
+    F32MAT2 = 381,                 /* F32MAT2  */
+    F32MAT3 = 382,                 /* F32MAT3  */
+    F32MAT4 = 383,                 /* F32MAT4  */
+    F64VEC2 = 384,                 /* F64VEC2  */
+    F64VEC3 = 385,                 /* F64VEC3  */
+    F64VEC4 = 386,                 /* F64VEC4  */
+    F64MAT2 = 387,                 /* F64MAT2  */
+    F64MAT3 = 388,                 /* F64MAT3  */
+    F64MAT4 = 389,                 /* F64MAT4  */
+    DMAT2X2 = 390,                 /* DMAT2X2  */
+    DMAT2X3 = 391,                 /* DMAT2X3  */
+    DMAT2X4 = 392,                 /* DMAT2X4  */
+    DMAT3X2 = 393,                 /* DMAT3X2  */
+    DMAT3X3 = 394,                 /* DMAT3X3  */
+    DMAT3X4 = 395,                 /* DMAT3X4  */
+    DMAT4X2 = 396,                 /* DMAT4X2  */
+    DMAT4X3 = 397,                 /* DMAT4X3  */
+    DMAT4X4 = 398,                 /* DMAT4X4  */
+    F16MAT2X2 = 399,               /* F16MAT2X2  */
+    F16MAT2X3 = 400,               /* F16MAT2X3  */
+    F16MAT2X4 = 401,               /* F16MAT2X4  */
+    F16MAT3X2 = 402,               /* F16MAT3X2  */
+    F16MAT3X3 = 403,               /* F16MAT3X3  */
+    F16MAT3X4 = 404,               /* F16MAT3X4  */
+    F16MAT4X2 = 405,               /* F16MAT4X2  */
+    F16MAT4X3 = 406,               /* F16MAT4X3  */
+    F16MAT4X4 = 407,               /* F16MAT4X4  */
+    F32MAT2X2 = 408,               /* F32MAT2X2  */
+    F32MAT2X3 = 409,               /* F32MAT2X3  */
+    F32MAT2X4 = 410,               /* F32MAT2X4  */
+    F32MAT3X2 = 411,               /* F32MAT3X2  */
+    F32MAT3X3 = 412,               /* F32MAT3X3  */
+    F32MAT3X4 = 413,               /* F32MAT3X4  */
+    F32MAT4X2 = 414,               /* F32MAT4X2  */
+    F32MAT4X3 = 415,               /* F32MAT4X3  */
+    F32MAT4X4 = 416,               /* F32MAT4X4  */
+    F64MAT2X2 = 417,               /* F64MAT2X2  */
+    F64MAT2X3 = 418,               /* F64MAT2X3  */
+    F64MAT2X4 = 419,               /* F64MAT2X4  */
+    F64MAT3X2 = 420,               /* F64MAT3X2  */
+    F64MAT3X3 = 421,               /* F64MAT3X3  */
+    F64MAT3X4 = 422,               /* F64MAT3X4  */
+    F64MAT4X2 = 423,               /* F64MAT4X2  */
+    F64MAT4X3 = 424,               /* F64MAT4X3  */
+    F64MAT4X4 = 425,               /* F64MAT4X4  */
+    ATOMIC_UINT = 426,             /* ATOMIC_UINT  */
+    ACCSTRUCTNV = 427,             /* ACCSTRUCTNV  */
+    ACCSTRUCTEXT = 428,            /* ACCSTRUCTEXT  */
+    RAYQUERYEXT = 429,             /* RAYQUERYEXT  */
+    FCOOPMATNV = 430,              /* FCOOPMATNV  */
+    ICOOPMATNV = 431,              /* ICOOPMATNV  */
+    UCOOPMATNV = 432,              /* UCOOPMATNV  */
+    COOPMAT = 433,                 /* COOPMAT  */
+    COOPVECNV = 434,               /* COOPVECNV  */
+    HITOBJECTNV = 435,             /* HITOBJECTNV  */
+    HITOBJECTATTRNV = 436,         /* HITOBJECTATTRNV  */
+    TENSORLAYOUTNV = 437,          /* TENSORLAYOUTNV  */
+    TENSORVIEWNV = 438,            /* TENSORVIEWNV  */
+    TENSORARM = 439,               /* TENSORARM  */
+    SAMPLERCUBEARRAY = 440,        /* SAMPLERCUBEARRAY  */
+    SAMPLERCUBEARRAYSHADOW = 441,  /* SAMPLERCUBEARRAYSHADOW  */
+    ISAMPLERCUBEARRAY = 442,       /* ISAMPLERCUBEARRAY  */
+    USAMPLERCUBEARRAY = 443,       /* USAMPLERCUBEARRAY  */
+    SAMPLER1D = 444,               /* SAMPLER1D  */
+    SAMPLER1DARRAY = 445,          /* SAMPLER1DARRAY  */
+    SAMPLER1DARRAYSHADOW = 446,    /* SAMPLER1DARRAYSHADOW  */
+    ISAMPLER1D = 447,              /* ISAMPLER1D  */
+    SAMPLER1DSHADOW = 448,         /* SAMPLER1DSHADOW  */
+    SAMPLER2DRECT = 449,           /* SAMPLER2DRECT  */
+    SAMPLER2DRECTSHADOW = 450,     /* SAMPLER2DRECTSHADOW  */
+    ISAMPLER2DRECT = 451,          /* ISAMPLER2DRECT  */
+    USAMPLER2DRECT = 452,          /* USAMPLER2DRECT  */
+    SAMPLERBUFFER = 453,           /* SAMPLERBUFFER  */
+    ISAMPLERBUFFER = 454,          /* ISAMPLERBUFFER  */
+    USAMPLERBUFFER = 455,          /* USAMPLERBUFFER  */
+    SAMPLER2DMS = 456,             /* SAMPLER2DMS  */
+    ISAMPLER2DMS = 457,            /* ISAMPLER2DMS  */
+    USAMPLER2DMS = 458,            /* USAMPLER2DMS  */
+    SAMPLER2DMSARRAY = 459,        /* SAMPLER2DMSARRAY  */
+    ISAMPLER2DMSARRAY = 460,       /* ISAMPLER2DMSARRAY  */
+    USAMPLER2DMSARRAY = 461,       /* USAMPLER2DMSARRAY  */
+    SAMPLEREXTERNALOES = 462,      /* SAMPLEREXTERNALOES  */
+    SAMPLEREXTERNAL2DY2YEXT = 463, /* SAMPLEREXTERNAL2DY2YEXT  */
+    ISAMPLER1DARRAY = 464,         /* ISAMPLER1DARRAY  */
+    USAMPLER1D = 465,              /* USAMPLER1D  */
+    USAMPLER1DARRAY = 466,         /* USAMPLER1DARRAY  */
+    F16SAMPLER1D = 467,            /* F16SAMPLER1D  */
+    F16SAMPLER2D = 468,            /* F16SAMPLER2D  */
+    F16SAMPLER3D = 469,            /* F16SAMPLER3D  */
+    F16SAMPLER2DRECT = 470,        /* F16SAMPLER2DRECT  */
+    F16SAMPLERCUBE = 471,          /* F16SAMPLERCUBE  */
+    F16SAMPLER1DARRAY = 472,       /* F16SAMPLER1DARRAY  */
+    F16SAMPLER2DARRAY = 473,       /* F16SAMPLER2DARRAY  */
+    F16SAMPLERCUBEARRAY = 474,     /* F16SAMPLERCUBEARRAY  */
+    F16SAMPLERBUFFER = 475,        /* F16SAMPLERBUFFER  */
+    F16SAMPLER2DMS = 476,          /* F16SAMPLER2DMS  */
+    F16SAMPLER2DMSARRAY = 477,     /* F16SAMPLER2DMSARRAY  */
+    F16SAMPLER1DSHADOW = 478,      /* F16SAMPLER1DSHADOW  */
+    F16SAMPLER2DSHADOW = 479,      /* F16SAMPLER2DSHADOW  */
+    F16SAMPLER1DARRAYSHADOW = 480, /* F16SAMPLER1DARRAYSHADOW  */
+    F16SAMPLER2DARRAYSHADOW = 481, /* F16SAMPLER2DARRAYSHADOW  */
+    F16SAMPLER2DRECTSHADOW = 482,  /* F16SAMPLER2DRECTSHADOW  */
+    F16SAMPLERCUBESHADOW = 483,    /* F16SAMPLERCUBESHADOW  */
+    F16SAMPLERCUBEARRAYSHADOW = 484, /* F16SAMPLERCUBEARRAYSHADOW  */
+    IMAGE1D = 485,                 /* IMAGE1D  */
+    IIMAGE1D = 486,                /* IIMAGE1D  */
+    UIMAGE1D = 487,                /* UIMAGE1D  */
+    IMAGE2D = 488,                 /* IMAGE2D  */
+    IIMAGE2D = 489,                /* IIMAGE2D  */
+    UIMAGE2D = 490,                /* UIMAGE2D  */
+    IMAGE3D = 491,                 /* IMAGE3D  */
+    IIMAGE3D = 492,                /* IIMAGE3D  */
+    UIMAGE3D = 493,                /* UIMAGE3D  */
+    IMAGE2DRECT = 494,             /* IMAGE2DRECT  */
+    IIMAGE2DRECT = 495,            /* IIMAGE2DRECT  */
+    UIMAGE2DRECT = 496,            /* UIMAGE2DRECT  */
+    IMAGECUBE = 497,               /* IMAGECUBE  */
+    IIMAGECUBE = 498,              /* IIMAGECUBE  */
+    UIMAGECUBE = 499,              /* UIMAGECUBE  */
+    IMAGEBUFFER = 500,             /* IMAGEBUFFER  */
+    IIMAGEBUFFER = 501,            /* IIMAGEBUFFER  */
+    UIMAGEBUFFER = 502,            /* UIMAGEBUFFER  */
+    IMAGE1DARRAY = 503,            /* IMAGE1DARRAY  */
+    IIMAGE1DARRAY = 504,           /* IIMAGE1DARRAY  */
+    UIMAGE1DARRAY = 505,           /* UIMAGE1DARRAY  */
+    IMAGE2DARRAY = 506,            /* IMAGE2DARRAY  */
+    IIMAGE2DARRAY = 507,           /* IIMAGE2DARRAY  */
+    UIMAGE2DARRAY = 508,           /* UIMAGE2DARRAY  */
+    IMAGECUBEARRAY = 509,          /* IMAGECUBEARRAY  */
+    IIMAGECUBEARRAY = 510,         /* IIMAGECUBEARRAY  */
+    UIMAGECUBEARRAY = 511,         /* UIMAGECUBEARRAY  */
+    IMAGE2DMS = 512,               /* IMAGE2DMS  */
+    IIMAGE2DMS = 513,              /* IIMAGE2DMS  */
+    UIMAGE2DMS = 514,              /* UIMAGE2DMS  */
+    IMAGE2DMSARRAY = 515,          /* IMAGE2DMSARRAY  */
+    IIMAGE2DMSARRAY = 516,         /* IIMAGE2DMSARRAY  */
+    UIMAGE2DMSARRAY = 517,         /* UIMAGE2DMSARRAY  */
+    F16IMAGE1D = 518,              /* F16IMAGE1D  */
+    F16IMAGE2D = 519,              /* F16IMAGE2D  */
+    F16IMAGE3D = 520,              /* F16IMAGE3D  */
+    F16IMAGE2DRECT = 521,          /* F16IMAGE2DRECT  */
+    F16IMAGECUBE = 522,            /* F16IMAGECUBE  */
+    F16IMAGE1DARRAY = 523,         /* F16IMAGE1DARRAY  */
+    F16IMAGE2DARRAY = 524,         /* F16IMAGE2DARRAY  */
+    F16IMAGECUBEARRAY = 525,       /* F16IMAGECUBEARRAY  */
+    F16IMAGEBUFFER = 526,          /* F16IMAGEBUFFER  */
+    F16IMAGE2DMS = 527,            /* F16IMAGE2DMS  */
+    F16IMAGE2DMSARRAY = 528,       /* F16IMAGE2DMSARRAY  */
+    I64IMAGE1D = 529,              /* I64IMAGE1D  */
+    U64IMAGE1D = 530,              /* U64IMAGE1D  */
+    I64IMAGE2D = 531,              /* I64IMAGE2D  */
+    U64IMAGE2D = 532,              /* U64IMAGE2D  */
+    I64IMAGE3D = 533,              /* I64IMAGE3D  */
+    U64IMAGE3D = 534,              /* U64IMAGE3D  */
+    I64IMAGE2DRECT = 535,          /* I64IMAGE2DRECT  */
+    U64IMAGE2DRECT = 536,          /* U64IMAGE2DRECT  */
+    I64IMAGECUBE = 537,            /* I64IMAGECUBE  */
+    U64IMAGECUBE = 538,            /* U64IMAGECUBE  */
+    I64IMAGEBUFFER = 539,          /* I64IMAGEBUFFER  */
+    U64IMAGEBUFFER = 540,          /* U64IMAGEBUFFER  */
+    I64IMAGE1DARRAY = 541,         /* I64IMAGE1DARRAY  */
+    U64IMAGE1DARRAY = 542,         /* U64IMAGE1DARRAY  */
+    I64IMAGE2DARRAY = 543,         /* I64IMAGE2DARRAY  */
+    U64IMAGE2DARRAY = 544,         /* U64IMAGE2DARRAY  */
+    I64IMAGECUBEARRAY = 545,       /* I64IMAGECUBEARRAY  */
+    U64IMAGECUBEARRAY = 546,       /* U64IMAGECUBEARRAY  */
+    I64IMAGE2DMS = 547,            /* I64IMAGE2DMS  */
+    U64IMAGE2DMS = 548,            /* U64IMAGE2DMS  */
+    I64IMAGE2DMSARRAY = 549,       /* I64IMAGE2DMSARRAY  */
+    U64IMAGE2DMSARRAY = 550,       /* U64IMAGE2DMSARRAY  */
+    TEXTURECUBEARRAY = 551,        /* TEXTURECUBEARRAY  */
+    ITEXTURECUBEARRAY = 552,       /* ITEXTURECUBEARRAY  */
+    UTEXTURECUBEARRAY = 553,       /* UTEXTURECUBEARRAY  */
+    TEXTURE1D = 554,               /* TEXTURE1D  */
+    ITEXTURE1D = 555,              /* ITEXTURE1D  */
+    UTEXTURE1D = 556,              /* UTEXTURE1D  */
+    TEXTURE1DARRAY = 557,          /* TEXTURE1DARRAY  */
+    ITEXTURE1DARRAY = 558,         /* ITEXTURE1DARRAY  */
+    UTEXTURE1DARRAY = 559,         /* UTEXTURE1DARRAY  */
+    TEXTURE2DRECT = 560,           /* TEXTURE2DRECT  */
+    ITEXTURE2DRECT = 561,          /* ITEXTURE2DRECT  */
+    UTEXTURE2DRECT = 562,          /* UTEXTURE2DRECT  */
+    TEXTUREBUFFER = 563,           /* TEXTUREBUFFER  */
+    ITEXTUREBUFFER = 564,          /* ITEXTUREBUFFER  */
+    UTEXTUREBUFFER = 565,          /* UTEXTUREBUFFER  */
+    TEXTURE2DMS = 566,             /* TEXTURE2DMS  */
+    ITEXTURE2DMS = 567,            /* ITEXTURE2DMS  */
+    UTEXTURE2DMS = 568,            /* UTEXTURE2DMS  */
+    TEXTURE2DMSARRAY = 569,        /* TEXTURE2DMSARRAY  */
+    ITEXTURE2DMSARRAY = 570,       /* ITEXTURE2DMSARRAY  */
+    UTEXTURE2DMSARRAY = 571,       /* UTEXTURE2DMSARRAY  */
+    F16TEXTURE1D = 572,            /* F16TEXTURE1D  */
+    F16TEXTURE2D = 573,            /* F16TEXTURE2D  */
+    F16TEXTURE3D = 574,            /* F16TEXTURE3D  */
+    F16TEXTURE2DRECT = 575,        /* F16TEXTURE2DRECT  */
+    F16TEXTURECUBE = 576,          /* F16TEXTURECUBE  */
+    F16TEXTURE1DARRAY = 577,       /* F16TEXTURE1DARRAY  */
+    F16TEXTURE2DARRAY = 578,       /* F16TEXTURE2DARRAY  */
+    F16TEXTURECUBEARRAY = 579,     /* F16TEXTURECUBEARRAY  */
+    F16TEXTUREBUFFER = 580,        /* F16TEXTUREBUFFER  */
+    F16TEXTURE2DMS = 581,          /* F16TEXTURE2DMS  */
+    F16TEXTURE2DMSARRAY = 582,     /* F16TEXTURE2DMSARRAY  */
+    SUBPASSINPUT = 583,            /* SUBPASSINPUT  */
+    SUBPASSINPUTMS = 584,          /* SUBPASSINPUTMS  */
+    ISUBPASSINPUT = 585,           /* ISUBPASSINPUT  */
+    ISUBPASSINPUTMS = 586,         /* ISUBPASSINPUTMS  */
+    USUBPASSINPUT = 587,           /* USUBPASSINPUT  */
+    USUBPASSINPUTMS = 588,         /* USUBPASSINPUTMS  */
+    F16SUBPASSINPUT = 589,         /* F16SUBPASSINPUT  */
+    F16SUBPASSINPUTMS = 590,       /* F16SUBPASSINPUTMS  */
+    SPIRV_INSTRUCTION = 591,       /* SPIRV_INSTRUCTION  */
+    SPIRV_EXECUTION_MODE = 592,    /* SPIRV_EXECUTION_MODE  */
+    SPIRV_EXECUTION_MODE_ID = 593, /* SPIRV_EXECUTION_MODE_ID  */
+    SPIRV_DECORATE = 594,          /* SPIRV_DECORATE  */
+    SPIRV_DECORATE_ID = 595,       /* SPIRV_DECORATE_ID  */
+    SPIRV_DECORATE_STRING = 596,   /* SPIRV_DECORATE_STRING  */
+    SPIRV_TYPE = 597,              /* SPIRV_TYPE  */
+    SPIRV_STORAGE_CLASS = 598,     /* SPIRV_STORAGE_CLASS  */
+    SPIRV_BY_REFERENCE = 599,      /* SPIRV_BY_REFERENCE  */
+    SPIRV_LITERAL = 600,           /* SPIRV_LITERAL  */
+    ATTACHMENTEXT = 601,           /* ATTACHMENTEXT  */
+    IATTACHMENTEXT = 602,          /* IATTACHMENTEXT  */
+    UATTACHMENTEXT = 603,          /* UATTACHMENTEXT  */
+    LEFT_OP = 604,                 /* LEFT_OP  */
+    RIGHT_OP = 605,                /* RIGHT_OP  */
+    INC_OP = 606,                  /* INC_OP  */
+    DEC_OP = 607,                  /* DEC_OP  */
+    LE_OP = 608,                   /* LE_OP  */
+    GE_OP = 609,                   /* GE_OP  */
+    EQ_OP = 610,                   /* EQ_OP  */
+    NE_OP = 611,                   /* NE_OP  */
+    AND_OP = 612,                  /* AND_OP  */
+    OR_OP = 613,                   /* OR_OP  */
+    XOR_OP = 614,                  /* XOR_OP  */
+    MUL_ASSIGN = 615,              /* MUL_ASSIGN  */
+    DIV_ASSIGN = 616,              /* DIV_ASSIGN  */
+    ADD_ASSIGN = 617,              /* ADD_ASSIGN  */
+    MOD_ASSIGN = 618,              /* MOD_ASSIGN  */
+    LEFT_ASSIGN = 619,             /* LEFT_ASSIGN  */
+    RIGHT_ASSIGN = 620,            /* RIGHT_ASSIGN  */
+    AND_ASSIGN = 621,              /* AND_ASSIGN  */
+    XOR_ASSIGN = 622,              /* XOR_ASSIGN  */
+    OR_ASSIGN = 623,               /* OR_ASSIGN  */
+    SUB_ASSIGN = 624,              /* SUB_ASSIGN  */
+    STRING_LITERAL = 625,          /* STRING_LITERAL  */
+    LEFT_PAREN = 626,              /* LEFT_PAREN  */
+    RIGHT_PAREN = 627,             /* RIGHT_PAREN  */
+    LEFT_BRACKET = 628,            /* LEFT_BRACKET  */
+    RIGHT_BRACKET = 629,           /* RIGHT_BRACKET  */
+    LEFT_BRACE = 630,              /* LEFT_BRACE  */
+    RIGHT_BRACE = 631,             /* RIGHT_BRACE  */
+    DOT = 632,                     /* DOT  */
+    COMMA = 633,                   /* COMMA  */
+    COLON = 634,                   /* COLON  */
+    EQUAL = 635,                   /* EQUAL  */
+    SEMICOLON = 636,               /* SEMICOLON  */
+    BANG = 637,                    /* BANG  */
+    DASH = 638,                    /* DASH  */
+    TILDE = 639,                   /* TILDE  */
+    PLUS = 640,                    /* PLUS  */
+    STAR = 641,                    /* STAR  */
+    SLASH = 642,                   /* SLASH  */
+    PERCENT = 643,                 /* PERCENT  */
+    LEFT_ANGLE = 644,              /* LEFT_ANGLE  */
+    RIGHT_ANGLE = 645,             /* RIGHT_ANGLE  */
+    VERTICAL_BAR = 646,            /* VERTICAL_BAR  */
+    CARET = 647,                   /* CARET  */
+    AMPERSAND = 648,               /* AMPERSAND  */
+    QUESTION = 649,                /* QUESTION  */
+    INVARIANT = 650,               /* INVARIANT  */
+    HIGH_PRECISION = 651,          /* HIGH_PRECISION  */
+    MEDIUM_PRECISION = 652,        /* MEDIUM_PRECISION  */
+    LOW_PRECISION = 653,           /* LOW_PRECISION  */
+    PRECISION = 654,               /* PRECISION  */
+    PACKED = 655,                  /* PACKED  */
+    RESOURCE = 656,                /* RESOURCE  */
+    SUPERP = 657,                  /* SUPERP  */
+    FLOATCONSTANT = 658,           /* FLOATCONSTANT  */
+    INTCONSTANT = 659,             /* INTCONSTANT  */
+    UINTCONSTANT = 660,            /* UINTCONSTANT  */
+    BOOLCONSTANT = 661,            /* BOOLCONSTANT  */
+    IDENTIFIER = 662,              /* IDENTIFIER  */
+    TYPE_NAME = 663,               /* TYPE_NAME  */
+    CENTROID = 664,                /* CENTROID  */
+    IN = 665,                      /* IN  */
+    OUT = 666,                     /* OUT  */
+    INOUT = 667,                   /* INOUT  */
+    STRUCT = 668,                  /* STRUCT  */
+    VOID = 669,                    /* VOID  */
+    WHILE = 670,                   /* WHILE  */
+    BREAK = 671,                   /* BREAK  */
+    CONTINUE = 672,                /* CONTINUE  */
+    DO = 673,                      /* DO  */
+    ELSE = 674,                    /* ELSE  */
+    FOR = 675,                     /* FOR  */
+    IF = 676,                      /* IF  */
+    DISCARD = 677,                 /* DISCARD  */
+    RETURN = 678,                  /* RETURN  */
+    SWITCH = 679,                  /* SWITCH  */
+    CASE = 680,                    /* CASE  */
+    DEFAULT = 681,                 /* DEFAULT  */
+    TERMINATE_INVOCATION = 682,    /* TERMINATE_INVOCATION  */
+    TERMINATE_RAY = 683,           /* TERMINATE_RAY  */
+    IGNORE_INTERSECTION = 684,     /* IGNORE_INTERSECTION  */
+    UNIFORM = 685,                 /* UNIFORM  */
+    SHARED = 686,                  /* SHARED  */
+    BUFFER = 687,                  /* BUFFER  */
+    TILEIMAGEEXT = 688,            /* TILEIMAGEEXT  */
+    FLAT = 689,                    /* FLAT  */
+    SMOOTH = 690,                  /* SMOOTH  */
+    LAYOUT = 691,                  /* LAYOUT  */
+    DOUBLECONSTANT = 692,          /* DOUBLECONSTANT  */
+    INT16CONSTANT = 693,           /* INT16CONSTANT  */
+    UINT16CONSTANT = 694,          /* UINT16CONSTANT  */
+    FLOAT16CONSTANT = 695,         /* FLOAT16CONSTANT  */
+    INT32CONSTANT = 696,           /* INT32CONSTANT  */
+    UINT32CONSTANT = 697,          /* UINT32CONSTANT  */
+    INT64CONSTANT = 698,           /* INT64CONSTANT  */
+    UINT64CONSTANT = 699,          /* UINT64CONSTANT  */
+    SUBROUTINE = 700,              /* SUBROUTINE  */
+    DEMOTE = 701,                  /* DEMOTE  */
+    FUNCTION = 702,                /* FUNCTION  */
+    PAYLOADNV = 703,               /* PAYLOADNV  */
+    PAYLOADINNV = 704,             /* PAYLOADINNV  */
+    HITATTRNV = 705,               /* HITATTRNV  */
+    CALLDATANV = 706,              /* CALLDATANV  */
+    CALLDATAINNV = 707,            /* CALLDATAINNV  */
+    PAYLOADEXT = 708,              /* PAYLOADEXT  */
+    PAYLOADINEXT = 709,            /* PAYLOADINEXT  */
+    HITATTREXT = 710,              /* HITATTREXT  */
+    CALLDATAEXT = 711,             /* CALLDATAEXT  */
+    CALLDATAINEXT = 712,           /* CALLDATAINEXT  */
+    PATCH = 713,                   /* PATCH  */
+    SAMPLE = 714,                  /* SAMPLE  */
+    NONUNIFORM = 715,              /* NONUNIFORM  */
+    COHERENT = 716,                /* COHERENT  */
+    VOLATILE = 717,                /* VOLATILE  */
+    RESTRICT = 718,                /* RESTRICT  */
+    READONLY = 719,                /* READONLY  */
+    WRITEONLY = 720,               /* WRITEONLY  */
+    NONTEMPORAL = 721,             /* NONTEMPORAL  */
+    DEVICECOHERENT = 722,          /* DEVICECOHERENT  */
+    QUEUEFAMILYCOHERENT = 723,     /* QUEUEFAMILYCOHERENT  */
+    WORKGROUPCOHERENT = 724,       /* WORKGROUPCOHERENT  */
+    SUBGROUPCOHERENT = 725,        /* SUBGROUPCOHERENT  */
+    NONPRIVATE = 726,              /* NONPRIVATE  */
+    SHADERCALLCOHERENT = 727,      /* SHADERCALLCOHERENT  */
+    NOPERSPECTIVE = 728,           /* NOPERSPECTIVE  */
+    EXPLICITINTERPAMD = 729,       /* EXPLICITINTERPAMD  */
+    PERVERTEXEXT = 730,            /* PERVERTEXEXT  */
+    PERVERTEXNV = 731,             /* PERVERTEXNV  */
+    PERPRIMITIVENV = 732,          /* PERPRIMITIVENV  */
+    PERVIEWNV = 733,               /* PERVIEWNV  */
+    PERTASKNV = 734,               /* PERTASKNV  */
+    PERPRIMITIVEEXT = 735,         /* PERPRIMITIVEEXT  */
+    TASKPAYLOADWORKGROUPEXT = 736, /* TASKPAYLOADWORKGROUPEXT  */
+    PRECISE = 737                  /* PRECISE  */
  };
  typedef enum yytokentype yytoken_kind_t;
 #endif
@@ -572,7 +581,7 @@ union YYSTYPE
        glslang::TTypeParameters* typeParameters;
    } interm;

-#line 576 "MachineIndependent/glslang_tab.cpp.h"
+#line 585 "MachineIndependent/glslang_tab.cpp.h"

 };
 typedef union YYSTYPE YYSTYPE;
--- a/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp
@@ -629,6 +629,14 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
    case EOpConstructBF16Vec2:  out.debug << "Construct bf16vec2";   break;
    case EOpConstructBF16Vec3:  out.debug << "Construct bf16vec3";   break;
    case EOpConstructBF16Vec4:  out.debug << "Construct bf16vec4";   break;
+    case EOpConstructFloatE5M2:  out.debug << "Construct floate5m2_t"; break;
+    case EOpConstructFloatE5M2Vec2:  out.debug << "Construct fe5m2vec2";   break;
+    case EOpConstructFloatE5M2Vec3:  out.debug << "Construct fe5m2vec3";   break;
+    case EOpConstructFloatE5M2Vec4:  out.debug << "Construct fe5m2vec4";   break;
+    case EOpConstructFloatE4M3:  out.debug << "Construct floate4m3_t"; break;
+    case EOpConstructFloatE4M3Vec2:  out.debug << "Construct fe4m3vec2";   break;
+    case EOpConstructFloatE4M3Vec3:  out.debug << "Construct fe4m3vec3";   break;
+    case EOpConstructFloatE4M3Vec4:  out.debug << "Construct fe4m3vec4";   break;
    case EOpConstructFloat16:   out.debug << "Construct float16_t"; break;
    case EOpConstructF16Vec2:   out.debug << "Construct f16vec2";   break;
    case EOpConstructF16Vec3:   out.debug << "Construct f16vec3";   break;
@@ -650,6 +658,11 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
    case EOpConstructCooperativeVectorNV:  out.debug << "Construct cooperative vector NV";  break;
    case EOpConstructAccStruct: out.debug << "Construct acceleration structure"; break;

+    case EOpBitCastArrayQCOM:              out.debug << "Bitcast To Array QCOM"; break;
+    case EOpExtractSubArrayQCOM:           out.debug << "Extract Subarray QCOM"; break;
+    case EOpCompositeConstructCoopMatQCOM:   out.debug << "Construct Cooperative Matrix QCOM"; break;
+    case EOpCompositeExtractCoopMatQCOM:     out.debug << "Extract Cooperative Matrix QCOM"; break;
+
    case EOpLessThan:         out.debug << "Compare Less Than";             break;
    case EOpGreaterThan:      out.debug << "Compare Greater Than";          break;
    case EOpLessThanEqual:    out.debug << "Compare Less Than or Equal";    break;
@@ -975,6 +988,10 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
    case EOpCooperativeVectorOuterProductAccumulateNV: out.debug << "Cooperative vector outer product accumulate NV"; break;
    case EOpCooperativeVectorReduceSumAccumulateNV: out.debug << "Cooperative vector reduce sum accumulate NV"; break;

+    case EOpTensorReadARM:   out.debug << "Read from tensor";  break;
+    case EOpTensorWriteARM:  out.debug << "Write to tensor";  break;
+    case EOpTensorSizeARM:   out.debug << "Get tensor size";  break;
+
    case EOpIsHelperInvocation: out.debug << "IsHelperInvocation"; break;
    case EOpDebugPrintf:  out.debug << "Debug printf";  break;

@@ -1164,6 +1181,8 @@ static void OutputConstantUnion(TInfoSink& out, const TIntermTyped* node, const
        case EbtDouble:
        case EbtFloat16:
        case EbtBFloat16:
+        case EbtFloatE5M2:
+        case EbtFloatE4M3:
            OutputDouble(out, constUnion[i].getDConst(), extra);
            out.debug << "\n";
            break;
--- a/3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp
@@ -535,6 +535,9 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
            error(infoSink, "number of invocations must match between compilation units");
    }

+    // The GLSL specification requires that at least one compilation unit
+    // must declare the vertices layout, but not all units need to do so.
+    // However, all declarations must match.
    if (vertices == TQualifier::layoutNotSet)
        vertices = unit.vertices;
    else if (unit.vertices != TQualifier::layoutNotSet && vertices != unit.vertices) {
@@ -545,20 +548,30 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
        else
            assert(0);
    }
+
+    // The mesh shader extension requires that at least one compilation unit
+    // must declare the max_primitives layout, but not all units need to do so.
+    // However, all declarations must match.
    if (primitives == TQualifier::layoutNotSet)
        primitives = unit.primitives;
-    else if (primitives != unit.primitives) {
+    else if (unit.primitives != TQualifier::layoutNotSet && primitives != unit.primitives) {
        if (language == EShLangMesh)
            error(infoSink, "Contradictory layout max_primitives values");
        else
            assert(0);
    }

+    // The GLSL specification requires that at least one compilation unit
+    // must declare the input primitive layout, but not all units need to do so.
+    // However, all declarations must match.
    if (inputPrimitive == ElgNone)
        inputPrimitive = unit.inputPrimitive;
    else if (unit.inputPrimitive != ElgNone && inputPrimitive != unit.inputPrimitive)
        error(infoSink, "Contradictory input layout primitives");

+    // The GLSL specification requires that at least one compilation unit
+    // must declare the output primitive layout, but not all units need to do so.
+    // However, all declarations must match.
    if (outputPrimitive == ElgNone)
        outputPrimitive = unit.outputPrimitive;
    else if (unit.outputPrimitive != ElgNone && outputPrimitive != unit.outputPrimitive)
@@ -567,19 +580,27 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
    if (originUpperLeft != unit.originUpperLeft || pixelCenterInteger != unit.pixelCenterInteger)
        error(infoSink, "gl_FragCoord redeclarations must match across shaders");

+    // The GLSL specification requires that at least one compilation unit
+    // must declare the vertex spacing layout, but not all units need to do so.
+    // However, all declarations must match.
    if (vertexSpacing == EvsNone)
        vertexSpacing = unit.vertexSpacing;
-    else if (vertexSpacing != unit.vertexSpacing)
+    else if (unit.vertexSpacing != EvsNone && vertexSpacing != unit.vertexSpacing)
        error(infoSink, "Contradictory input vertex spacing");

+    // The GLSL specification requires that at least one compilation unit
+    // must declare the triangle ordering layout, but not all units need to do so.
+    // However, all declarations must match.
    if (vertexOrder == EvoNone)
        vertexOrder = unit.vertexOrder;
-    else if (vertexOrder != unit.vertexOrder)
+    else if (unit.vertexOrder != EvoNone && vertexOrder != unit.vertexOrder)
        error(infoSink, "Contradictory triangle ordering");

    MERGE_TRUE(pointMode);

    for (int i = 0; i < 3; ++i) {
+        // The GLSL specification requires that all workgroup size declarations must match
+        // but not all units have to declare the layout.
        if (unit.localSizeNotDefault[i]) {
            if (!localSizeNotDefault[i]) {
                localSize[i] = unit.localSize[i];
@@ -589,9 +610,11 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
                error(infoSink, "Contradictory local size");
        }

+        // The GLSL specification requires that all workgroup size specialization
+        // ids declarations must match, but not all units have to declare the layout.
        if (localSizeSpecId[i] == TQualifier::layoutNotSet)
            localSizeSpecId[i] = unit.localSizeSpecId[i];
-        else if (localSizeSpecId[i] != unit.localSizeSpecId[i])
+        else if (unit.localSizeSpecId[i] != TQualifier::layoutNotSet && localSizeSpecId[i] != unit.localSizeSpecId[i])
            error(infoSink, "Contradictory local size specialization ids");
    }

@@ -602,9 +625,11 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
    MERGE_TRUE(nonCoherentStencilAttachmentReadEXT);
    MERGE_TRUE(nonCoherentTileAttachmentReadQCOM);

+    // The GLSL specification requires that all depth layout redeclarations must match,
+    // but not all units have to declare the layout.
    if (depthLayout == EldNone)
        depthLayout = unit.depthLayout;
-    else if (depthLayout != unit.depthLayout)
+    else if (unit.depthLayout != EldNone && depthLayout != unit.depthLayout)
        error(infoSink, "Contradictory depth layouts");

    MERGE_TRUE(depthReplacing);
@@ -615,9 +640,11 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
    MERGE_TRUE(xfbMode);

    for (size_t b = 0; b < xfbBuffers.size(); ++b) {
+        // The GLSL specification requires that all xfb_stride declarations for
+        // the same buffer must match, but not all units have to declare the layout.
        if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd)
            xfbBuffers[b].stride = unit.xfbBuffers[b].stride;
-        else if (xfbBuffers[b].stride != unit.xfbBuffers[b].stride)
+        else if (unit.xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].stride != unit.xfbBuffers[b].stride)
            error(infoSink, "Contradictory xfb_stride");
        xfbBuffers[b].implicitStride = std::max(xfbBuffers[b].implicitStride, unit.xfbBuffers[b].implicitStride);
        if (unit.xfbBuffers[b].contains64BitType)
@@ -2386,6 +2413,8 @@ int TIntermediate::getBaseAlignmentScalar(const TType& type, int& size)
    case EbtDouble:  size = 8; return 8;
    case EbtFloat16: size = 2; return 2;
    case EbtBFloat16: size = 2; return 2;
+    case EbtFloatE5M2:
+    case EbtFloatE4M3:
    case EbtInt8:
    case EbtUint8:   size = 1; return 1;
    case EbtInt16:
--- a/3rdparty/glslang/glslang/MachineIndependent/parseVersions.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/parseVersions.h
@@ -104,6 +104,8 @@ public:
    virtual void float16Check(const TSourceLoc&, const char* op, bool builtIn = false);
    virtual void float16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
    virtual void bfloat16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
+    virtual void floate5m2ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
+    virtual void floate4m3ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
    virtual bool float16Arithmetic();
    virtual void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc);
    virtual void int16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
@@ -122,9 +124,11 @@ public:
    virtual void fcoopmatCheckNV(const TSourceLoc&, const char* op, bool builtIn = false);
    virtual void intcoopmatCheckNV(const TSourceLoc&, const char *op, bool builtIn = false);
    virtual void coopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false);
+    virtual void coopmatConverisonCheckQCOM(const TSourceLoc& loc, const char* op, bool builtIn = false);
    virtual void tensorLayoutViewCheck(const TSourceLoc&, const char* op, bool builtIn = false);
    virtual void coopvecCheck(const TSourceLoc&, const char* op, bool builtIn = false);
    virtual void intattachmentCheck(const TSourceLoc&, const char *op, bool builtIn = false);
+    virtual void tensorCheckARM(const TSourceLoc&, const char *op, bool builtIn = false);
    bool relaxedErrors()    const { return (messages & EShMsgRelaxedErrors) != 0; }
    bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; }
    bool isForwardCompatible() const { return forwardCompatible; }
--- a/3rdparty/glslang/glslang/ResourceLimits/ResourceLimits.cpp
+++ b/3rdparty/glslang/glslang/ResourceLimits/ResourceLimits.cpp
@@ -39,9 +39,9 @@

 #include "glslang/Public/ResourceLimits.h"

-TBuiltInResource Resources;
+static TBuiltInResource Resources;

-const TBuiltInResource DefaultTBuiltInResource = {
+static const TBuiltInResource DefaultTBuiltInResource = {
    /* .MaxLights = */ 32,
    /* .MaxClipPlanes = */ 6,
    /* .MaxTextureUnits = */ 32,