diff --git a/3rdparty/glslang/SPIRV/GLSL.ext.KHR.h b/3rdparty/glslang/SPIRV/GLSL.ext.KHR.h
index 344dd398c..4614a3c67 100644
--- a/3rdparty/glslang/SPIRV/GLSL.ext.KHR.h
+++ b/3rdparty/glslang/SPIRV/GLSL.ext.KHR.h
@@ -32,5 +32,5 @@ static const char* const E_SPV_KHR_shader_ballot                = "SPV_KHR_shade
 
 // SPV_KHR_shader_draw_parameters
 static const char* const E_SPV_KHR_shader_draw_parameters       = "SPV_KHR_shader_draw_parameters";
-
+static const char* const E_SPV_KHR_subgroup_vote                = "SPV_KHR_subgroup_vote";
 #endif  // #ifndef GLSLextKHR_H
diff --git a/3rdparty/glslang/SPIRV/GlslangToSpv.cpp b/3rdparty/glslang/SPIRV/GlslangToSpv.cpp
index 5c439faa3..81243f9f9 100755
--- a/3rdparty/glslang/SPIRV/GlslangToSpv.cpp
+++ b/3rdparty/glslang/SPIRV/GlslangToSpv.cpp
@@ -161,7 +161,7 @@ protected:
     spv::Id makeSmearedConstant(spv::Id constant, int vectorSize);
     spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy);
     spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy);
-    spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::Id typeId, std::vector<spv::Id>& operands);
+    spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, spv::Id typeId, std::vector<spv::Id>& operands);
     spv::Id createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy);
     spv::Id createNoArgOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId);
     spv::Id getSymbolId(const glslang::TIntermSymbol* node);
@@ -1115,6 +1115,9 @@ bool TGlslangToSpvTraverser::visitBinary(glslang::TVisit /* visit */, glslang::T
             builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType()));
         }
         return false;
+    case glslang::EOpMatrixSwizzle:
+        logger->missingFunctionality("matrix swizzle");
+        return true;
     case glslang::EOpLogicalOr:
     case glslang::EOpLogicalAnd:
         {
@@ -2012,7 +2015,6 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
 #ifdef AMD_EXTENSIONS
     case glslang::EbtFloat16:
         builder.addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
-        builder.addCapability(spv::CapabilityFloat16);
         spvType = builder.makeFloatType(16);
         break;
 #endif
@@ -3740,6 +3742,18 @@ spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, spv:
     case glslang::EOpMinInvocationsNonUniform:
     case glslang::EOpMaxInvocationsNonUniform:
     case glslang::EOpAddInvocationsNonUniform:
+    case glslang::EOpMinInvocationsInclusiveScan:
+    case glslang::EOpMaxInvocationsInclusiveScan:
+    case glslang::EOpAddInvocationsInclusiveScan:
+    case glslang::EOpMinInvocationsInclusiveScanNonUniform:
+    case glslang::EOpMaxInvocationsInclusiveScanNonUniform:
+    case glslang::EOpAddInvocationsInclusiveScanNonUniform:
+    case glslang::EOpMinInvocationsExclusiveScan:
+    case glslang::EOpMaxInvocationsExclusiveScan:
+    case glslang::EOpAddInvocationsExclusiveScan:
+    case glslang::EOpMinInvocationsExclusiveScanNonUniform:
+    case glslang::EOpMaxInvocationsExclusiveScanNonUniform:
+    case glslang::EOpAddInvocationsExclusiveScanNonUniform:
 #endif
     {
         std::vector<spv::Id> operands;
@@ -4127,26 +4141,64 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
 #endif
 
     spv::Op opCode = spv::OpNop;
-
     std::vector<spv::Id> spvGroupOperands;
+    spv::GroupOperation groupOperation = spv::GroupOperationMax;
+
     if (op == glslang::EOpBallot || op == glslang::EOpReadFirstInvocation ||
         op == glslang::EOpReadInvocation) {
         builder.addExtension(spv::E_SPV_KHR_shader_ballot);
         builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+    } else if (op == glslang::EOpAnyInvocation ||
+        op == glslang::EOpAllInvocations ||
+        op == glslang::EOpAllInvocationsEqual) {
+        builder.addExtension(spv::E_SPV_KHR_subgroup_vote);
+        builder.addCapability(spv::CapabilitySubgroupVoteKHR);
     } else {
         builder.addCapability(spv::CapabilityGroups);
 #ifdef AMD_EXTENSIONS
         if (op == glslang::EOpMinInvocationsNonUniform ||
             op == glslang::EOpMaxInvocationsNonUniform ||
-            op == glslang::EOpAddInvocationsNonUniform)
+            op == glslang::EOpAddInvocationsNonUniform ||
+            op == glslang::EOpMinInvocationsInclusiveScanNonUniform ||
+            op == glslang::EOpMaxInvocationsInclusiveScanNonUniform ||
+            op == glslang::EOpAddInvocationsInclusiveScanNonUniform ||
+            op == glslang::EOpMinInvocationsExclusiveScanNonUniform ||
+            op == glslang::EOpMaxInvocationsExclusiveScanNonUniform ||
+            op == glslang::EOpAddInvocationsExclusiveScanNonUniform)
             builder.addExtension(spv::E_SPV_AMD_shader_ballot);
 #endif
 
         spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup));
 #ifdef AMD_EXTENSIONS
-        if (op == glslang::EOpMinInvocations || op == glslang::EOpMaxInvocations || op == glslang::EOpAddInvocations ||
-            op == glslang::EOpMinInvocationsNonUniform || op == glslang::EOpMaxInvocationsNonUniform || op == glslang::EOpAddInvocationsNonUniform)
-            spvGroupOperands.push_back(spv::GroupOperationReduce);
+        switch (op) {
+        case glslang::EOpMinInvocations:
+        case glslang::EOpMaxInvocations:
+        case glslang::EOpAddInvocations:
+        case glslang::EOpMinInvocationsNonUniform:
+        case glslang::EOpMaxInvocationsNonUniform:
+        case glslang::EOpAddInvocationsNonUniform:
+            groupOperation = spv::GroupOperationReduce;
+            spvGroupOperands.push_back(groupOperation);
+            break;
+        case glslang::EOpMinInvocationsInclusiveScan:
+        case glslang::EOpMaxInvocationsInclusiveScan:
+        case glslang::EOpAddInvocationsInclusiveScan:
+        case glslang::EOpMinInvocationsInclusiveScanNonUniform:
+        case glslang::EOpMaxInvocationsInclusiveScanNonUniform:
+        case glslang::EOpAddInvocationsInclusiveScanNonUniform:
+            groupOperation = spv::GroupOperationInclusiveScan;
+            spvGroupOperands.push_back(groupOperation);
+            break;
+        case glslang::EOpMinInvocationsExclusiveScan:
+        case glslang::EOpMaxInvocationsExclusiveScan:
+        case glslang::EOpAddInvocationsExclusiveScan:
+        case glslang::EOpMinInvocationsExclusiveScanNonUniform:
+        case glslang::EOpMaxInvocationsExclusiveScanNonUniform:
+        case glslang::EOpAddInvocationsExclusiveScanNonUniform:
+            groupOperation = spv::GroupOperationExclusiveScan;
+            spvGroupOperands.push_back(groupOperation);
+            break;
+        }
 #endif
     }
 
@@ -4155,24 +4207,18 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
 
     switch (op) {
     case glslang::EOpAnyInvocation:
-        opCode = spv::OpGroupAny;
+        opCode = spv::OpSubgroupAnyKHR;
         break;
     case glslang::EOpAllInvocations:
-        opCode = spv::OpGroupAll;
+        opCode = spv::OpSubgroupAllKHR;
         break;
     case glslang::EOpAllInvocationsEqual:
-    {
-        spv::Id groupAll = builder.createOp(spv::OpGroupAll, typeId, spvGroupOperands);
-        spv::Id groupAny = builder.createOp(spv::OpGroupAny, typeId, spvGroupOperands);
-
-        return builder.createBinOp(spv::OpLogicalOr, typeId, groupAll,
-                                   builder.createUnaryOp(spv::OpLogicalNot, typeId, groupAny));
-    }
-
+        opCode = spv::OpSubgroupAllEqualKHR;
+        break;
     case glslang::EOpReadInvocation:
         opCode = spv::OpSubgroupReadInvocationKHR;
         if (builder.isVectorType(typeId))
-            return CreateInvocationsVectorOperation(opCode, typeId, operands);
+            return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands);
         break;
     case glslang::EOpReadFirstInvocation:
         opCode = spv::OpSubgroupFirstInvocationKHR;
@@ -4202,7 +4248,15 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
     case glslang::EOpMinInvocations:
     case glslang::EOpMaxInvocations:
     case glslang::EOpAddInvocations:
-        if (op == glslang::EOpMinInvocations) {
+    case glslang::EOpMinInvocationsInclusiveScan:
+    case glslang::EOpMaxInvocationsInclusiveScan:
+    case glslang::EOpAddInvocationsInclusiveScan:
+    case glslang::EOpMinInvocationsExclusiveScan:
+    case glslang::EOpMaxInvocationsExclusiveScan:
+    case glslang::EOpAddInvocationsExclusiveScan:
+        if (op == glslang::EOpMinInvocations ||
+            op == glslang::EOpMinInvocationsInclusiveScan ||
+            op == glslang::EOpMinInvocationsExclusiveScan) {
             if (isFloat)
                 opCode = spv::OpGroupFMin;
             else {
@@ -4211,7 +4265,9 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
                 else
                     opCode = spv::OpGroupSMin;
             }
-        } else if (op == glslang::EOpMaxInvocations) {
+        } else if (op == glslang::EOpMaxInvocations ||
+                   op == glslang::EOpMaxInvocationsInclusiveScan ||
+                   op == glslang::EOpMaxInvocationsExclusiveScan) {
             if (isFloat)
                 opCode = spv::OpGroupFMax;
             else {
@@ -4228,13 +4284,21 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
         }
 
         if (builder.isVectorType(typeId))
-            return CreateInvocationsVectorOperation(opCode, typeId, operands);
+            return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands);
 
         break;
     case glslang::EOpMinInvocationsNonUniform:
     case glslang::EOpMaxInvocationsNonUniform:
     case glslang::EOpAddInvocationsNonUniform:
-        if (op == glslang::EOpMinInvocationsNonUniform) {
+    case glslang::EOpMinInvocationsInclusiveScanNonUniform:
+    case glslang::EOpMaxInvocationsInclusiveScanNonUniform:
+    case glslang::EOpAddInvocationsInclusiveScanNonUniform:
+    case glslang::EOpMinInvocationsExclusiveScanNonUniform:
+    case glslang::EOpMaxInvocationsExclusiveScanNonUniform:
+    case glslang::EOpAddInvocationsExclusiveScanNonUniform:
+        if (op == glslang::EOpMinInvocationsNonUniform ||
+            op == glslang::EOpMinInvocationsInclusiveScanNonUniform ||
+            op == glslang::EOpMinInvocationsExclusiveScanNonUniform) {
             if (isFloat)
                 opCode = spv::OpGroupFMinNonUniformAMD;
             else {
@@ -4244,7 +4308,9 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
                     opCode = spv::OpGroupSMinNonUniformAMD;
             }
         }
-        else if (op == glslang::EOpMaxInvocationsNonUniform) {
+        else if (op == glslang::EOpMaxInvocationsNonUniform ||
+                 op == glslang::EOpMaxInvocationsInclusiveScanNonUniform ||
+                 op == glslang::EOpMaxInvocationsExclusiveScanNonUniform) {
             if (isFloat)
                 opCode = spv::OpGroupFMaxNonUniformAMD;
             else {
@@ -4262,7 +4328,7 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
         }
 
         if (builder.isVectorType(typeId))
-            return CreateInvocationsVectorOperation(opCode, typeId, operands);
+            return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands);
 
         break;
 #endif
@@ -4276,7 +4342,7 @@ spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op
 }
 
 // Create group invocation operations on a vector
-spv::Id TGlslangToSpvTraverser::CreateInvocationsVectorOperation(spv::Op op, spv::Id typeId, std::vector<spv::Id>& operands)
+spv::Id TGlslangToSpvTraverser::CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, spv::Id typeId, std::vector<spv::Id>& operands)
 {
 #ifdef AMD_EXTENSIONS
     assert(op == spv::OpGroupFMin || op == spv::OpGroupUMin || op == spv::OpGroupSMin ||
@@ -4320,7 +4386,7 @@ spv::Id TGlslangToSpvTraverser::CreateInvocationsVectorOperation(spv::Op op, spv
             spvGroupOperands.push_back(operands[1]);
         } else {
             spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup));
-            spvGroupOperands.push_back(spv::GroupOperationReduce);
+            spvGroupOperands.push_back(groupOperation);
             spvGroupOperands.push_back(scalar);
         }
 
diff --git a/3rdparty/glslang/SPIRV/SpvBuilder.cpp b/3rdparty/glslang/SPIRV/SpvBuilder.cpp
index 720eac2ff..1b04a6cf7 100644
--- a/3rdparty/glslang/SPIRV/SpvBuilder.cpp
+++ b/3rdparty/glslang/SPIRV/SpvBuilder.cpp
@@ -2354,7 +2354,7 @@ void Builder::dump(std::vector<unsigned int>& out) const
 
     for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) {
         Instruction extInst(0, 0, OpExtension);
-        extInst.addStringOperand(*it);
+        extInst.addStringOperand(it->c_str());
         extInst.dump(out);
     }
 
diff --git a/3rdparty/glslang/SPIRV/SpvBuilder.h b/3rdparty/glslang/SPIRV/SpvBuilder.h
index 331f0e00f..a97b44b8b 100755
--- a/3rdparty/glslang/SPIRV/SpvBuilder.h
+++ b/3rdparty/glslang/SPIRV/SpvBuilder.h
@@ -555,7 +555,7 @@ public:
 
     SourceLanguage source;
     int sourceVersion;
-    std::set<const char*> extensions;
+    std::set<std::string> extensions;
     std::vector<const char*> sourceExtensions;
     AddressingModel addressModel;
     MemoryModel memoryModel;
diff --git a/3rdparty/glslang/SPIRV/doc.cpp b/3rdparty/glslang/SPIRV/doc.cpp
index de401c73f..0bb9e0177 100755
--- a/3rdparty/glslang/SPIRV/doc.cpp
+++ b/3rdparty/glslang/SPIRV/doc.cpp
@@ -819,6 +819,7 @@ const char* CapabilityString(int info)
 
     case 4423: return "SubgroupBallotKHR";
     case 4427: return "DrawParameters";
+    case 4431: return "SubgroupVoteKHR";
 
 #ifdef NV_EXTENSIONS
     case 5251: return "GeometryShaderPassthroughNV";
@@ -1158,6 +1159,9 @@ const char* OpcodeString(int op)
 
     case 4421: return "OpSubgroupBallotKHR";
     case 4422: return "OpSubgroupFirstInvocationKHR";
+    case 4428: return "OpSubgroupAnyKHR";
+    case 4429: return "OpSubgroupAllKHR";
+    case 4430: return "OpSubgroupAllEqualKHR";
     case 4432: return "OpSubgroupReadInvocationKHR";
 
 #ifdef AMD_EXTENSIONS
@@ -2771,6 +2775,18 @@ void Parameterize()
 
     InstructionDesc[OpSubgroupFirstInvocationKHR].operands.push(OperandId, "'Value'");
 
+    InstructionDesc[OpSubgroupAnyKHR].capabilities.push_back(CapabilitySubgroupVoteKHR);
+    InstructionDesc[OpSubgroupAnyKHR].operands.push(OperandScope, "'Execution'");
+    InstructionDesc[OpSubgroupAnyKHR].operands.push(OperandId, "'Predicate'");
+
+    InstructionDesc[OpSubgroupAllKHR].capabilities.push_back(CapabilitySubgroupVoteKHR);
+    InstructionDesc[OpSubgroupAllKHR].operands.push(OperandScope, "'Execution'");
+    InstructionDesc[OpSubgroupAllKHR].operands.push(OperandId, "'Predicate'");
+
+    InstructionDesc[OpSubgroupAllEqualKHR].capabilities.push_back(CapabilitySubgroupVoteKHR);
+    InstructionDesc[OpSubgroupAllEqualKHR].operands.push(OperandScope, "'Execution'");
+    InstructionDesc[OpSubgroupAllEqualKHR].operands.push(OperandId, "'Predicate'");
+
     InstructionDesc[OpSubgroupReadInvocationKHR].capabilities.push_back(CapabilityGroups);
     InstructionDesc[OpSubgroupReadInvocationKHR].operands.push(OperandId, "'Value'");
     InstructionDesc[OpSubgroupReadInvocationKHR].operands.push(OperandId, "'Index'");
diff --git a/3rdparty/glslang/SPIRV/spirv.hpp b/3rdparty/glslang/SPIRV/spirv.hpp
index 088b1af10..c7b3d5faa 100755
--- a/3rdparty/glslang/SPIRV/spirv.hpp
+++ b/3rdparty/glslang/SPIRV/spirv.hpp
@@ -605,6 +605,7 @@ enum Capability {
     CapabilityMultiViewport = 57,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
+    CapabilitySubgroupVoteKHR = 4431,
     CapabilityMax = 0x7fffffff,
 };
 
@@ -906,6 +907,9 @@ enum Op {
     OpSubgroupBallotKHR = 4421,
     OpSubgroupFirstInvocationKHR = 4422,
     OpSubgroupReadInvocationKHR = 4432,
+    OpSubgroupAllKHR = 4428,
+    OpSubgroupAnyKHR = 4429,
+    OpSubgroupAllEqualKHR = 4430,
     OpMax = 0x7fffffff,
 };
 
diff --git a/3rdparty/glslang/Test/baseResults/120.frag.out b/3rdparty/glslang/Test/baseResults/120.frag.out
index 79898aafc..5df3f987b 100644
--- a/3rdparty/glslang/Test/baseResults/120.frag.out
+++ b/3rdparty/glslang/Test/baseResults/120.frag.out
@@ -15,11 +15,9 @@ ERROR: 0:63: 'bitwise-or assign' : not supported for this version or the enabled
 ERROR: 0:63: 'assign' :  cannot convert from 'temp bool' to 'temp float'
 ERROR: 0:79: ':' :  wrong operand types: no operation ':' exists that takes a left-hand operand of type 'temp 4-component vector of float' and a right operand of type 'temp 4X4 matrix of float' (or there is no acceptable conversion)
 ERROR: 0:79: 'assign' :  cannot convert from 'temp 4X4 matrix of float' to 'fragColor 4-component vector of float FragColor'
-ERROR: 0:82: 'xr' : illegal - vector component fields not from the same set 
-ERROR: 0:83: 'xyxyx' : illegal vector field selection 
-ERROR: 0:83: 'scalar swizzle' : not supported for this version or the enabled extensions 
-ERROR: 0:83: 'xy' : vector field selection out of range 
-ERROR: 0:84: 'z' : vector field selection out of range 
+ERROR: 0:82: 'xr' : vector swizzle selectors not from the same set 
+ERROR: 0:83: 'xyxyx' : vector swizzle too long 
+ERROR: 0:84: 'z' : vector swizzle selection out of range 
 ERROR: 0:85: 'assign' :  l-value required 
 ERROR: 0:91: 'int' : overloaded functions must have the same return type 
 ERROR: 0:91: 'main' : function already has a body 
@@ -52,7 +50,7 @@ ERROR: 0:191: 'shadow2DProjGradARB' : required extension not requested: GL_ARB_s
 ERROR: 0:209: 'shadow2DRectProjGradARB' : no matching overloaded function found 
 ERROR: 0:209: 'assign' :  cannot convert from 'const float' to 'temp 4-component vector of float'
 ERROR: 0:212: 'sampler2DRect' : Reserved word. 
-ERROR: 53 compilation errors.  No code generated.
+ERROR: 51 compilation errors.  No code generated.
 
 
 Shader version: 120
@@ -251,10 +249,23 @@ ERROR: node is still EOpNull!
 0:82        'gl_FragColor' (fragColor 4-component vector of float FragColor)
 0:82        Constant:
 0:82          0 (const int)
-0:83      direct index (temp float)
-0:83        'gl_FragColor' (fragColor 4-component vector of float FragColor)
-0:83        Constant:
-0:83          0 (const int)
+0:83      vector swizzle (temp 2-component vector of float)
+0:83        vector swizzle (temp 4-component vector of float)
+0:83          'gl_FragColor' (fragColor 4-component vector of float FragColor)
+0:83          Sequence
+0:83            Constant:
+0:83              0 (const int)
+0:83            Constant:
+0:83              1 (const int)
+0:83            Constant:
+0:83              0 (const int)
+0:83            Constant:
+0:83              1 (const int)
+0:83        Sequence
+0:83          Constant:
+0:83            0 (const int)
+0:83          Constant:
+0:83            1 (const int)
 0:84      direct index (temp float)
 0:84        'centTexCoord' (centroid smooth in 2-component vector of float)
 0:84        Constant:
diff --git a/3rdparty/glslang/Test/baseResults/420.vert.out b/3rdparty/glslang/Test/baseResults/420.vert.out
index a70b44e78..b8e1306d7 100644
--- a/3rdparty/glslang/Test/baseResults/420.vert.out
+++ b/3rdparty/glslang/Test/baseResults/420.vert.out
@@ -18,9 +18,9 @@ ERROR: 0:40: 'j' : undeclared identifier
 ERROR: 0:40: '=' :  cannot convert from 'temp float' to 'temp int'
 ERROR: 0:44: 'jj' : undeclared identifier 
 ERROR: 0:44: '=' :  cannot convert from 'temp float' to 'temp int'
-ERROR: 0:54: 'y' : vector field selection out of range 
-ERROR: 0:62: 'xxxxx' : illegal vector field selection 
-ERROR: 0:63: 'xxy' : vector field selection out of range 
+ERROR: 0:54: 'y' : vector swizzle selection out of range 
+ERROR: 0:62: 'xxxxx' : vector swizzle too long 
+ERROR: 0:63: 'xxy' : vector swizzle selection out of range 
 ERROR: 0:66: 'binding' : cannot declare a default, include a type or full declaration 
 ERROR: 0:69: 'location/component/index' : cannot declare a default, use a full declaration 
 ERROR: 0:70: 'input block' : not supported in this stage: vertex
@@ -124,8 +124,10 @@ ERROR: node is still EOpNull!
 0:61          'smeared' (temp 3-component vector of float)
 0:61          Construct vec3 (temp 3-component vector of float)
 0:61            'f' (temp float)
-0:62      'f' (temp float)
-0:63      'f' (temp float)
+0:62      Construct vec4 (temp 4-component vector of float)
+0:62        'f' (temp float)
+0:63      Construct vec2 (temp 2-component vector of float)
+0:63        'f' (temp float)
 0:88  Function Definition: bar23444( (global void)
 0:88    Function Parameters: 
 0:?     Sequence
diff --git a/3rdparty/glslang/Test/baseResults/cppComplexExpr.vert.out b/3rdparty/glslang/Test/baseResults/cppComplexExpr.vert.out
index 352dcac18..90d62c126 100644
--- a/3rdparty/glslang/Test/baseResults/cppComplexExpr.vert.out
+++ b/3rdparty/glslang/Test/baseResults/cppComplexExpr.vert.out
@@ -1,6 +1,8 @@
 cppComplexExpr.vert
-ERROR: 0:46: 'xyxwx' : illegal vector field selection 
-ERROR: 0:46: 'xyxwx' : illegal vector field selection 
+ERROR: 0:46: 'xyxwx' : vector swizzle too long 
+ERROR: 0:46: 'xyxwx' : vector swizzle too long 
+ERROR: 0:46: 'return' : cannot convert return value to function return type 
+WARNING: 0:46: 'return' : type conversion on return values was not explicitly allowed until version 420 
 ERROR: 0:66: '#define' : Macro redefined; different substitutions: BIG
 ERROR: 0:81: 'preprocessor evaluation' : bad expression 
 ERROR: 0:81: '#if' : unexpected tokens following directive 
@@ -47,7 +49,7 @@ ERROR: 0:0: 'preprocessor evaluation' : division by 0
 ERROR: 0:3: 'preprocessor evaluation' : bad expression 
 ERROR: 0:3: 'preprocessor evaluation' : division by 0 
 ERROR: 0:10001: '' : missing #endif 
-ERROR: 48 compilation errors.  No code generated.
+ERROR: 49 compilation errors.  No code generated.
 
 
 Shader version: 300
@@ -80,19 +82,33 @@ ERROR: node is still EOpNull!
 0:44    Function Parameters: 
 0:46    Sequence
 0:46      Branch: Return with expression
-0:46        add (temp highp float)
-0:46          add (temp highp float)
-0:46            direct index (temp highp float)
+0:46        add (temp highp 4-component vector of float)
+0:46          add (temp highp 4-component vector of float)
+0:46            vector swizzle (temp highp 4-component vector of float)
 0:46              'gl_Position' (gl_Position highp 4-component vector of float Position)
-0:46              Constant:
-0:46                0 (const int)
+0:46              Sequence
+0:46                Constant:
+0:46                  0 (const int)
+0:46                Constant:
+0:46                  1 (const int)
+0:46                Constant:
+0:46                  0 (const int)
+0:46                Constant:
+0:46                  3 (const int)
 0:46            Constant:
 0:46              3.000000
-0:46          add (temp highp float)
-0:46            direct index (temp highp float)
+0:46          add (temp highp 4-component vector of float)
+0:46            vector swizzle (temp highp 4-component vector of float)
 0:46              'gl_Position' (gl_Position highp 4-component vector of float Position)
-0:46              Constant:
-0:46                0 (const int)
+0:46              Sequence
+0:46                Constant:
+0:46                  0 (const int)
+0:46                Constant:
+0:46                  1 (const int)
+0:46                Constant:
+0:46                  0 (const int)
+0:46                Constant:
+0:46                  3 (const int)
 0:46            Constant:
 0:46              3.000000
 0:47      Branch: Return with expression
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.entry-in.frag.out b/3rdparty/glslang/Test/baseResults/hlsl.entry-in.frag.out
index 686b355c4..9dfbe4194 100755
--- a/3rdparty/glslang/Test/baseResults/hlsl.entry-in.frag.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.entry-in.frag.out
@@ -4,48 +4,96 @@ gl_FragCoord origin is upper left
 0:? Sequence
 0:8  Function Definition: fun(struct-InParam-vf2-vf4-vi21; (temp float)
 0:8    Function Parameters: 
-0:8      'p' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:8      'p' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:?     Sequence
 0:9      Branch: Return with expression
 0:9        add (temp float)
 0:9          direct index (temp float)
 0:9            v: direct index for structure (temp 2-component vector of float)
-0:9              'p' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:9              'p' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:9              Constant:
 0:9                0 (const int)
 0:9            Constant:
 0:9              1 (const int)
 0:9          direct index (temp float)
-0:9            fragCoord: direct index for structure (temp 4-component vector of float FragCoord)
-0:9              'p' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:9            fragCoord: direct index for structure (temp 4-component vector of float)
+0:9              'p' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:9              Constant:
 0:9                1 (const int)
 0:9            Constant:
 0:9              0 (const int)
 0:13  Function Definition: PixelShaderFunction(struct-InParam-vf2-vf4-vi21; (temp 4-component vector of float)
 0:13    Function Parameters: 
-0:13      'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:13      'i' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
 0:?     Sequence
-0:15      move second child to first child (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
-0:15        'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
-0:15        'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:15      Sequence
+0:15        move second child to first child (temp 2-component vector of float)
+0:15          v: direct index for structure (temp 2-component vector of float)
+0:15            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              0 (const int)
+0:15          v: direct index for structure (temp 2-component vector of float)
+0:15            'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              0 (const int)
+0:15        move second child to first child (temp 4-component vector of float)
+0:15          fragCoord: direct index for structure (temp 4-component vector of float)
+0:15            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              1 (const int)
+0:?           'i_fragCoord' (in 4-component vector of float FragCoord)
+0:15        move second child to first child (temp 2-component vector of int)
+0:15          i2: direct index for structure (temp 2-component vector of int)
+0:15            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              2 (const int)
+0:15          i2: direct index for structure (temp 2-component vector of int)
+0:15            'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              1 (const int)
 0:16      Sequence
 0:16        move second child to first child (temp float)
 0:16          'ret1' (temp float)
 0:16          Function Call: fun(struct-InParam-vf2-vf4-vi21; (temp float)
-0:16            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:16            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:17      Sequence
 0:17        move second child to first child (temp float)
 0:17          'ret2' (temp float)
 0:17          Function Call: fun(struct-InParam-vf2-vf4-vi21; (temp float)
-0:17            'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17            Comma (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17              Sequence
+0:17                move second child to first child (temp 2-component vector of float)
+0:17                  v: direct index for structure (temp 2-component vector of float)
+0:17                    'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      0 (const int)
+0:17                  v: direct index for structure (temp 2-component vector of float)
+0:17                    'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      0 (const int)
+0:17                move second child to first child (temp 4-component vector of float)
+0:17                  fragCoord: direct index for structure (temp 4-component vector of float FragCoord)
+0:17                    'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      1 (const int)
+0:?                   'i_fragCoord' (in 4-component vector of float FragCoord)
+0:17                move second child to first child (temp 2-component vector of int)
+0:17                  i2: direct index for structure (temp 2-component vector of int)
+0:17                    'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      2 (const int)
+0:17                  i2: direct index for structure (temp 2-component vector of int)
+0:17                    'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      1 (const int)
+0:17              'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
 0:19      Sequence
 0:19        move second child to first child (temp 4-component vector of float)
 0:?           '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
 0:19          vector-scale (temp 4-component vector of float)
 0:19            vector-scale (temp 4-component vector of float)
-0:19              fragCoord: direct index for structure (temp 4-component vector of float FragCoord)
-0:19                'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:19              fragCoord: direct index for structure (temp 4-component vector of float)
+0:19                'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:19                Constant:
 0:19                  1 (const int)
 0:19              'ret1' (temp float)
@@ -53,7 +101,8 @@ gl_FragCoord origin is upper left
 0:19        Branch: Return
 0:?   Linker Objects
 0:?     '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
-0:?     'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:?     'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:?     'i_fragCoord' (in 4-component vector of float FragCoord)
 
 
 Linked fragment stage:
@@ -64,48 +113,96 @@ gl_FragCoord origin is upper left
 0:? Sequence
 0:8  Function Definition: fun(struct-InParam-vf2-vf4-vi21; (temp float)
 0:8    Function Parameters: 
-0:8      'p' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:8      'p' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:?     Sequence
 0:9      Branch: Return with expression
 0:9        add (temp float)
 0:9          direct index (temp float)
 0:9            v: direct index for structure (temp 2-component vector of float)
-0:9              'p' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:9              'p' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:9              Constant:
 0:9                0 (const int)
 0:9            Constant:
 0:9              1 (const int)
 0:9          direct index (temp float)
-0:9            fragCoord: direct index for structure (temp 4-component vector of float FragCoord)
-0:9              'p' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:9            fragCoord: direct index for structure (temp 4-component vector of float)
+0:9              'p' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:9              Constant:
 0:9                1 (const int)
 0:9            Constant:
 0:9              0 (const int)
 0:13  Function Definition: PixelShaderFunction(struct-InParam-vf2-vf4-vi21; (temp 4-component vector of float)
 0:13    Function Parameters: 
-0:13      'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:13      'i' (in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
 0:?     Sequence
-0:15      move second child to first child (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
-0:15        'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
-0:15        'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:15      Sequence
+0:15        move second child to first child (temp 2-component vector of float)
+0:15          v: direct index for structure (temp 2-component vector of float)
+0:15            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              0 (const int)
+0:15          v: direct index for structure (temp 2-component vector of float)
+0:15            'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              0 (const int)
+0:15        move second child to first child (temp 4-component vector of float)
+0:15          fragCoord: direct index for structure (temp 4-component vector of float)
+0:15            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              1 (const int)
+0:?           'i_fragCoord' (in 4-component vector of float FragCoord)
+0:15        move second child to first child (temp 2-component vector of int)
+0:15          i2: direct index for structure (temp 2-component vector of int)
+0:15            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              2 (const int)
+0:15          i2: direct index for structure (temp 2-component vector of int)
+0:15            'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:15            Constant:
+0:15              1 (const int)
 0:16      Sequence
 0:16        move second child to first child (temp float)
 0:16          'ret1' (temp float)
 0:16          Function Call: fun(struct-InParam-vf2-vf4-vi21; (temp float)
-0:16            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:16            'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:17      Sequence
 0:17        move second child to first child (temp float)
 0:17          'ret2' (temp float)
 0:17          Function Call: fun(struct-InParam-vf2-vf4-vi21; (temp float)
-0:17            'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17            Comma (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17              Sequence
+0:17                move second child to first child (temp 2-component vector of float)
+0:17                  v: direct index for structure (temp 2-component vector of float)
+0:17                    'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      0 (const int)
+0:17                  v: direct index for structure (temp 2-component vector of float)
+0:17                    'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      0 (const int)
+0:17                move second child to first child (temp 4-component vector of float)
+0:17                  fragCoord: direct index for structure (temp 4-component vector of float FragCoord)
+0:17                    'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      1 (const int)
+0:?                   'i_fragCoord' (in 4-component vector of float FragCoord)
+0:17                move second child to first child (temp 2-component vector of int)
+0:17                  i2: direct index for structure (temp 2-component vector of int)
+0:17                    'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      2 (const int)
+0:17                  i2: direct index for structure (temp 2-component vector of int)
+0:17                    'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:17                    Constant:
+0:17                      1 (const int)
+0:17              'aggShadow' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
 0:19      Sequence
 0:19        move second child to first child (temp 4-component vector of float)
 0:?           '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
 0:19          vector-scale (temp 4-component vector of float)
 0:19            vector-scale (temp 4-component vector of float)
-0:19              fragCoord: direct index for structure (temp 4-component vector of float FragCoord)
-0:19                'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:19              fragCoord: direct index for structure (temp 4-component vector of float)
+0:19                'local' (temp structure{temp 2-component vector of float v, temp 4-component vector of float fragCoord, temp 2-component vector of int i2})
 0:19                Constant:
 0:19                  1 (const int)
 0:19              'ret1' (temp float)
@@ -113,16 +210,17 @@ gl_FragCoord origin is upper left
 0:19        Branch: Return
 0:?   Linker Objects
 0:?     '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
-0:?     'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 4-component vector of float FragCoord fragCoord, temp 2-component vector of int i2})
+0:?     'i' (layout(location=0 ) in structure{temp 2-component vector of float v, temp 2-component vector of int i2})
+0:?     'i_fragCoord' (in 4-component vector of float FragCoord)
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 52
+// Id's are bound by 78
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Fragment 4  "PixelShaderFunction" 32 43
+                              EntryPoint Fragment 4  "PixelShaderFunction" 33 40 70
                               ExecutionMode 4 OriginUpperLeft
                               Name 4  "PixelShaderFunction"
                               Name 11  "InParam"
@@ -132,15 +230,25 @@ gl_FragCoord origin is upper left
                               Name 15  "fun(struct-InParam-vf2-vf4-vi21;"
                               Name 14  "p"
                               Name 30  "local"
-                              Name 32  "i"
-                              Name 34  "ret1"
-                              Name 35  "param"
-                              Name 38  "ret2"
-                              Name 39  "param"
-                              Name 43  "@entryPointOutput"
-                              MemberDecorate 11(InParam) 1 BuiltIn FragCoord
-                              Decorate 32(i) Location 0
-                              Decorate 43(@entryPointOutput) Location 0
+                              Name 31  "InParam"
+                              MemberName 31(InParam) 0  "v"
+                              MemberName 31(InParam) 1  "i2"
+                              Name 33  "i"
+                              Name 40  "i_fragCoord"
+                              Name 50  "ret1"
+                              Name 51  "param"
+                              Name 54  "ret2"
+                              Name 55  "InParam"
+                              MemberName 55(InParam) 0  "v"
+                              MemberName 55(InParam) 1  "fragCoord"
+                              MemberName 55(InParam) 2  "i2"
+                              Name 57  "aggShadow"
+                              Name 66  "param"
+                              Name 70  "@entryPointOutput"
+                              Decorate 33(i) Location 0
+                              Decorate 40(i_fragCoord) BuiltIn FragCoord
+                              MemberDecorate 55(InParam) 1 BuiltIn FragCoord
+                              Decorate 70(@entryPointOutput) Location 0
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -157,35 +265,66 @@ gl_FragCoord origin is upper left
               20:             TypePointer Function 6(float)
               23:      9(int) Constant 1
               24:     18(int) Constant 0
-              31:             TypePointer Input 11(InParam)
-           32(i):     31(ptr) Variable Input
-              42:             TypePointer Output 8(fvec4)
-43(@entryPointOutput):     42(ptr) Variable Output
-              44:             TypePointer Function 8(fvec4)
+     31(InParam):             TypeStruct 7(fvec2) 10(ivec2)
+              32:             TypePointer Input 31(InParam)
+           33(i):     32(ptr) Variable Input
+              34:             TypePointer Input 7(fvec2)
+              37:             TypePointer Function 7(fvec2)
+              39:             TypePointer Input 8(fvec4)
+ 40(i_fragCoord):     39(ptr) Variable Input
+              42:             TypePointer Function 8(fvec4)
+              44:      9(int) Constant 2
+              45:             TypePointer Input 10(ivec2)
+              48:             TypePointer Function 10(ivec2)
+     55(InParam):             TypeStruct 7(fvec2) 8(fvec4) 10(ivec2)
+              56:             TypePointer Function 55(InParam)
+              69:             TypePointer Output 8(fvec4)
+70(@entryPointOutput):     69(ptr) Variable Output
 4(PixelShaderFunction):           2 Function None 3
                5:             Label
        30(local):     12(ptr) Variable Function
-        34(ret1):     20(ptr) Variable Function
-       35(param):     12(ptr) Variable Function
-        38(ret2):     20(ptr) Variable Function
-       39(param):     12(ptr) Variable Function
-              33: 11(InParam) Load 32(i)
-                              Store 30(local) 33
-              36: 11(InParam) Load 30(local)
-                              Store 35(param) 36
-              37:    6(float) FunctionCall 15(fun(struct-InParam-vf2-vf4-vi21;) 35(param)
-                              Store 34(ret1) 37
-              40: 11(InParam) Load 32(i)
-                              Store 39(param) 40
-              41:    6(float) FunctionCall 15(fun(struct-InParam-vf2-vf4-vi21;) 39(param)
-                              Store 38(ret2) 41
-              45:     44(ptr) AccessChain 30(local) 23
-              46:    8(fvec4) Load 45
-              47:    6(float) Load 34(ret1)
-              48:    8(fvec4) VectorTimesScalar 46 47
-              49:    6(float) Load 38(ret2)
-              50:    8(fvec4) VectorTimesScalar 48 49
-                              Store 43(@entryPointOutput) 50
+        50(ret1):     20(ptr) Variable Function
+       51(param):     12(ptr) Variable Function
+        54(ret2):     20(ptr) Variable Function
+   57(aggShadow):     56(ptr) Variable Function
+       66(param):     56(ptr) Variable Function
+              35:     34(ptr) AccessChain 33(i) 17
+              36:    7(fvec2) Load 35
+              38:     37(ptr) AccessChain 30(local) 17
+                              Store 38 36
+              41:    8(fvec4) Load 40(i_fragCoord)
+              43:     42(ptr) AccessChain 30(local) 23
+                              Store 43 41
+              46:     45(ptr) AccessChain 33(i) 23
+              47:   10(ivec2) Load 46
+              49:     48(ptr) AccessChain 30(local) 44
+                              Store 49 47
+              52: 11(InParam) Load 30(local)
+                              Store 51(param) 52
+              53:    6(float) FunctionCall 15(fun(struct-InParam-vf2-vf4-vi21;) 51(param)
+                              Store 50(ret1) 53
+              58:     34(ptr) AccessChain 33(i) 17
+              59:    7(fvec2) Load 58
+              60:     37(ptr) AccessChain 57(aggShadow) 17
+                              Store 60 59
+              61:    8(fvec4) Load 40(i_fragCoord)
+              62:     42(ptr) AccessChain 57(aggShadow) 23
+                              Store 62 61
+              63:     45(ptr) AccessChain 33(i) 23
+              64:   10(ivec2) Load 63
+              65:     48(ptr) AccessChain 57(aggShadow) 44
+                              Store 65 64
+              67: 55(InParam) Load 57(aggShadow)
+                              Store 66(param) 67
+              68:    6(float) FunctionCall 15(fun(struct-InParam-vf2-vf4-vi21;) 66(param)
+                              Store 54(ret2) 68
+              71:     42(ptr) AccessChain 30(local) 23
+              72:    8(fvec4) Load 71
+              73:    6(float) Load 50(ret1)
+              74:    8(fvec4) VectorTimesScalar 72 73
+              75:    6(float) Load 54(ret2)
+              76:    8(fvec4) VectorTimesScalar 74 75
+                              Store 70(@entryPointOutput) 76
                               Return
                               FunctionEnd
 15(fun(struct-InParam-vf2-vf4-vi21;):    6(float) Function None 13
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.gather.basic.dx10.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.gather.basic.dx10.vert.out
index 3226769da..bea5142b3 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.gather.basic.dx10.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.gather.basic.dx10.vert.out
@@ -102,7 +102,7 @@ Shader version: 450
 0:?     'g_tTexcdf4' (uniform textureCube)
 0:?     'g_tTexcdi4' (uniform itextureCube)
 0:?     'g_tTexcdu4' (uniform utextureCube)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 
 Linked vertex stage:
@@ -211,17 +211,17 @@ Shader version: 450
 0:?     'g_tTexcdf4' (uniform textureCube)
 0:?     'g_tTexcdi4' (uniform itextureCube)
 0:?     'g_tTexcdu4' (uniform utextureCube)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 121
+// Id's are bound by 124
 
                               Capability Shader
                               Capability Sampled1D
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 97
+                              EntryPoint Vertex 4  "main" 97 123
                               Name 4  "main"
                               Name 9  "txval20"
                               Name 12  "g_tTex2df4"
@@ -248,6 +248,9 @@ Shader version: 450
                               Name 114  "g_tTex3df4"
                               Name 117  "g_tTex3di4"
                               Name 120  "g_tTex3du4"
+                              Name 121  "PerVertex_out"
+                              MemberName 121(PerVertex_out) 0  "Pos"
+                              Name 123  "PerVertex_out"
                               Decorate 12(g_tTex2df4) DescriptorSet 0
                               Decorate 16(g_sSamp) DescriptorSet 0
                               Decorate 16(g_sSamp) Binding 0
@@ -267,6 +270,8 @@ Shader version: 450
                               Decorate 114(g_tTex3df4) DescriptorSet 0
                               Decorate 117(g_tTex3di4) DescriptorSet 0
                               Decorate 120(g_tTex3du4) DescriptorSet 0
+                              MemberDecorate 121(PerVertex_out) 0 BuiltIn Position
+                              Decorate 121(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -349,6 +354,9 @@ Shader version: 450
              118:             TypeImage 41(int) 3D sampled format:Unknown
              119:             TypePointer UniformConstant 118
  120(g_tTex3du4):    119(ptr) Variable UniformConstant
+121(PerVertex_out):             TypeStruct 7(fvec4)
+             122:             TypePointer Output 121(PerVertex_out)
+123(PerVertex_out):    122(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
       9(txval20):      8(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.getdimensions.dx10.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.getdimensions.dx10.vert.out
index 77316b123..85a25af4d 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.getdimensions.dx10.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.getdimensions.dx10.vert.out
@@ -48,7 +48,7 @@ Shader version: 450
 0:?   Linker Objects
 0:?     'g_sSamp' (layout(binding=0 ) uniform sampler)
 0:?     'g_tTex1df4' (layout(binding=0 ) uniform texture1D)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 
 Linked vertex stage:
@@ -103,18 +103,18 @@ Shader version: 450
 0:?   Linker Objects
 0:?     'g_sSamp' (layout(binding=0 ) uniform sampler)
 0:?     'g_tTex1df4' (layout(binding=0 ) uniform texture1D)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 43
+// Id's are bound by 46
 
                               Capability Shader
                               Capability Sampled1D
                               Capability ImageQuery
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 36
+                              EntryPoint Vertex 4  "main" 36 45
                               Name 4  "main"
                               Name 8  "sizeQueryTemp"
                               Name 12  "g_tTex1df4"
@@ -126,11 +126,16 @@ Shader version: 450
                               Name 29  "vsout"
                               Name 36  "Pos"
                               Name 42  "g_sSamp"
+                              Name 43  "PerVertex_out"
+                              MemberName 43(PerVertex_out) 0  "Pos"
+                              Name 45  "PerVertex_out"
                               Decorate 12(g_tTex1df4) DescriptorSet 0
                               Decorate 12(g_tTex1df4) Binding 0
                               Decorate 36(Pos) BuiltIn Position
                               Decorate 42(g_sSamp) DescriptorSet 0
                               Decorate 42(g_sSamp) Binding 0
+                              MemberDecorate 43(PerVertex_out) 0 BuiltIn Position
+                              Decorate 43(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 0
@@ -153,6 +158,9 @@ Shader version: 450
               40:             TypeSampler
               41:             TypePointer UniformConstant 40
      42(g_sSamp):     41(ptr) Variable UniformConstant
+43(PerVertex_out):             TypeStruct 26(fvec4)
+              44:             TypePointer Output 43(PerVertex_out)
+45(PerVertex_out):     44(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
 8(sizeQueryTemp):      7(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.load.basic.dx10.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.load.basic.dx10.vert.out
index 7441d49e2..a3e45e4f4 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.load.basic.dx10.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.load.basic.dx10.vert.out
@@ -218,7 +218,7 @@ Shader version: 450
 0:?     'g_tTexcdi4a' (uniform itextureCubeArray)
 0:?     'g_tTexcdu4a' (uniform utextureCubeArray)
 0:?     'anon@0' (layout(row_major std140 ) uniform block{layout(offset=0 ) uniform int c1, layout(offset=8 ) uniform 2-component vector of int c2, layout(offset=16 ) uniform 3-component vector of int c3, layout(offset=32 ) uniform 4-component vector of int c4, layout(offset=48 ) uniform int o1, layout(offset=56 ) uniform 2-component vector of int o2, layout(offset=64 ) uniform 3-component vector of int o3, layout(offset=80 ) uniform 4-component vector of int o4})
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 
 Linked vertex stage:
@@ -443,18 +443,18 @@ Shader version: 450
 0:?     'g_tTexcdi4a' (uniform itextureCubeArray)
 0:?     'g_tTexcdu4a' (uniform utextureCubeArray)
 0:?     'anon@0' (layout(row_major std140 ) uniform block{layout(offset=0 ) uniform int c1, layout(offset=8 ) uniform 2-component vector of int c2, layout(offset=16 ) uniform 3-component vector of int c3, layout(offset=32 ) uniform 4-component vector of int c4, layout(offset=48 ) uniform int o1, layout(offset=56 ) uniform 2-component vector of int o2, layout(offset=64 ) uniform 3-component vector of int o3, layout(offset=80 ) uniform 4-component vector of int o4})
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 166
+// Id's are bound by 169
 
                               Capability Shader
                               Capability Sampled1D
                               Capability SampledCubeArray
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 123
+                              EntryPoint Vertex 4  "main" 123 168
                               Name 4  "main"
                               Name 9  "g_tTex1df4"
                               Name 15  "$Global"
@@ -492,6 +492,9 @@ Shader version: 450
                               Name 159  "g_tTexcdf4a"
                               Name 162  "g_tTexcdi4a"
                               Name 165  "g_tTexcdu4a"
+                              Name 166  "PerVertex_out"
+                              MemberName 166(PerVertex_out) 0  "Pos"
+                              Name 168  "PerVertex_out"
                               Decorate 9(g_tTex1df4) DescriptorSet 0
                               Decorate 9(g_tTex1df4) Binding 0
                               MemberDecorate 15($Global) 0 Offset 0
@@ -527,6 +530,8 @@ Shader version: 450
                               Decorate 159(g_tTexcdf4a) DescriptorSet 0
                               Decorate 162(g_tTexcdi4a) DescriptorSet 0
                               Decorate 165(g_tTexcdu4a) DescriptorSet 0
+                              MemberDecorate 166(PerVertex_out) 0 BuiltIn Position
+                              Decorate 166(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -624,6 +629,9 @@ Shader version: 450
              163:             TypeImage 19(int) Cube array sampled format:Unknown
              164:             TypePointer UniformConstant 163
 165(g_tTexcdu4a):    164(ptr) Variable UniformConstant
+166(PerVertex_out):             TypeStruct 27(fvec4)
+             167:             TypePointer Output 166(PerVertex_out)
+168(PerVertex_out):    167(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
       116(vsout):    115(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.matrixSwizzle.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.matrixSwizzle.vert.out
new file mode 100755
index 000000000..cf79f7245
--- /dev/null
+++ b/3rdparty/glslang/Test/baseResults/hlsl.matrixSwizzle.vert.out
@@ -0,0 +1,818 @@
+hlsl.matrixSwizzle.vert
+Shader version: 450
+0:? Sequence
+0:2  Function Definition: ShaderFunction(f1; (temp void)
+0:2    Function Parameters: 
+0:2      'inf' (layout(location=0 ) in float)
+0:?     Sequence
+0:7      move second child to first child (temp float)
+0:7        direct index (temp float)
+0:7          direct index (temp 4-component vector of float)
+0:7            'm' (temp 3X4 matrix of float)
+0:7            Constant:
+0:7              2 (const int)
+0:7          Constant:
+0:7            3 (const int)
+0:7        Constant:
+0:7          1.000000
+0:8      move second child to first child (temp float)
+0:8        direct index (temp float)
+0:8          direct index (temp 4-component vector of float)
+0:8            'm' (temp 3X4 matrix of float)
+0:8            Constant:
+0:8              2 (const int)
+0:8          Constant:
+0:8            3 (const int)
+0:8        Constant:
+0:8          2.000000
+0:9      move second child to first child (temp float)
+0:9        direct index (temp float)
+0:9          direct index (temp 4-component vector of float)
+0:9            'm' (temp 3X4 matrix of float)
+0:9            Constant:
+0:9              2 (const int)
+0:9          Constant:
+0:9            3 (const int)
+0:9        Constant:
+0:9          2.000000
+0:11      move second child to first child (temp 4-component vector of float)
+0:11        direct index (temp 4-component vector of float)
+0:11          'm' (temp 3X4 matrix of float)
+0:11          Constant:
+0:11            0 (const int)
+0:11        Constant:
+0:11          3.000000
+0:11          3.000000
+0:11          3.000000
+0:11          3.000000
+0:12      move second child to first child (temp 4-component vector of float)
+0:12        direct index (temp 4-component vector of float)
+0:12          'm' (temp 3X4 matrix of float)
+0:12          Constant:
+0:12            1 (const int)
+0:12        Constant:
+0:12          3.000000
+0:12          3.000000
+0:12          3.000000
+0:12          3.000000
+0:13      move second child to first child (temp 4-component vector of float)
+0:13        direct index (temp 4-component vector of float)
+0:13          'm' (temp 3X4 matrix of float)
+0:13          Constant:
+0:13            1 (const int)
+0:13        Constant:
+0:13          3.000000
+0:13          3.000000
+0:13          3.000000
+0:13          3.000000
+0:?       Sequence
+0:18        move second child to first child (temp float)
+0:18          direct index (temp float)
+0:18            direct index (temp 4-component vector of float)
+0:18              'm' (temp 3X4 matrix of float)
+0:18              Constant:
+0:18                0 (const int)
+0:18            Constant:
+0:18              0 (const int)
+0:18          direct index (temp float)
+0:18            'f3' (temp 3-component vector of float)
+0:18            Constant:
+0:18              0 (const int)
+0:18        move second child to first child (temp float)
+0:18          direct index (temp float)
+0:18            direct index (temp 4-component vector of float)
+0:18              'm' (temp 3X4 matrix of float)
+0:18              Constant:
+0:18                1 (const int)
+0:18            Constant:
+0:18              1 (const int)
+0:18          direct index (temp float)
+0:18            'f3' (temp 3-component vector of float)
+0:18            Constant:
+0:18              1 (const int)
+0:18        move second child to first child (temp float)
+0:18          direct index (temp float)
+0:18            direct index (temp 4-component vector of float)
+0:18              'm' (temp 3X4 matrix of float)
+0:18              Constant:
+0:18                1 (const int)
+0:18            Constant:
+0:18              2 (const int)
+0:18          direct index (temp float)
+0:18            'f3' (temp 3-component vector of float)
+0:18            Constant:
+0:18              2 (const int)
+0:19      Sequence
+0:19        move second child to first child (temp 3-component vector of float)
+0:19          'intermVec' (temp 3-component vector of float)
+0:19          Constant:
+0:19            5.000000
+0:19            5.000000
+0:19            5.000000
+0:19        move second child to first child (temp float)
+0:19          direct index (temp float)
+0:19            direct index (temp 4-component vector of float)
+0:19              'm' (temp 3X4 matrix of float)
+0:19              Constant:
+0:19                1 (const int)
+0:19            Constant:
+0:19              0 (const int)
+0:19          direct index (temp float)
+0:19            'intermVec' (temp 3-component vector of float)
+0:19            Constant:
+0:19              0 (const int)
+0:19        move second child to first child (temp float)
+0:19          direct index (temp float)
+0:19            direct index (temp 4-component vector of float)
+0:19              'm' (temp 3X4 matrix of float)
+0:19              Constant:
+0:19                0 (const int)
+0:19            Constant:
+0:19              1 (const int)
+0:19          direct index (temp float)
+0:19            'intermVec' (temp 3-component vector of float)
+0:19            Constant:
+0:19              1 (const int)
+0:19        move second child to first child (temp float)
+0:19          direct index (temp float)
+0:19            direct index (temp 4-component vector of float)
+0:19              'm' (temp 3X4 matrix of float)
+0:19              Constant:
+0:19                2 (const int)
+0:19            Constant:
+0:19              0 (const int)
+0:19          direct index (temp float)
+0:19            'intermVec' (temp 3-component vector of float)
+0:19            Constant:
+0:19              2 (const int)
+0:20      Sequence
+0:20        move second child to first child (temp 3-component vector of float)
+0:20          'intermVec' (temp 3-component vector of float)
+0:20          vector-scale (temp 3-component vector of float)
+0:20            Constant:
+0:20              2.000000
+0:20            'f3' (temp 3-component vector of float)
+0:20        move second child to first child (temp float)
+0:20          direct index (temp float)
+0:20            direct index (temp 4-component vector of float)
+0:20              'm' (temp 3X4 matrix of float)
+0:20              Constant:
+0:20                0 (const int)
+0:20            Constant:
+0:20              0 (const int)
+0:20          direct index (temp float)
+0:20            'intermVec' (temp 3-component vector of float)
+0:20            Constant:
+0:20              0 (const int)
+0:20        move second child to first child (temp float)
+0:20          direct index (temp float)
+0:20            direct index (temp 4-component vector of float)
+0:20              'm' (temp 3X4 matrix of float)
+0:20              Constant:
+0:20                0 (const int)
+0:20            Constant:
+0:20              1 (const int)
+0:20          direct index (temp float)
+0:20            'intermVec' (temp 3-component vector of float)
+0:20            Constant:
+0:20              1 (const int)
+0:20        move second child to first child (temp float)
+0:20          direct index (temp float)
+0:20            direct index (temp 4-component vector of float)
+0:20              'm' (temp 3X4 matrix of float)
+0:20              Constant:
+0:20                1 (const int)
+0:20            Constant:
+0:20              0 (const int)
+0:20          direct index (temp float)
+0:20            'intermVec' (temp 3-component vector of float)
+0:20            Constant:
+0:20              2 (const int)
+0:23      move second child to first child (temp 3-component vector of float)
+0:23        'f3' (temp 3-component vector of float)
+0:23        matrix swizzle (temp 3-component vector of float)
+0:23          'm' (temp 3X4 matrix of float)
+0:23          Sequence
+0:23            Constant:
+0:23              1 (const int)
+0:23            Constant:
+0:23              0 (const int)
+0:23            Constant:
+0:23              0 (const int)
+0:23            Constant:
+0:23              1 (const int)
+0:23            Constant:
+0:23              2 (const int)
+0:23            Constant:
+0:23              0 (const int)
+0:27  Function Definition: createMat3x3(vf3;vf3;vf3; (temp 3X3 matrix of float)
+0:27    Function Parameters: 
+0:27      'a' (in 3-component vector of float)
+0:27      'b' (in 3-component vector of float)
+0:27      'c' (in 3-component vector of float)
+0:?     Sequence
+0:?       Sequence
+0:29        move second child to first child (temp float)
+0:29          direct index (temp float)
+0:29            direct index (temp 3-component vector of float)
+0:29              'm' (temp 3X3 matrix of float)
+0:29              Constant:
+0:29                0 (const int)
+0:29            Constant:
+0:29              0 (const int)
+0:29          direct index (temp float)
+0:29            'a' (in 3-component vector of float)
+0:29            Constant:
+0:29              0 (const int)
+0:29        move second child to first child (temp float)
+0:29          direct index (temp float)
+0:29            direct index (temp 3-component vector of float)
+0:29              'm' (temp 3X3 matrix of float)
+0:29              Constant:
+0:29                1 (const int)
+0:29            Constant:
+0:29              0 (const int)
+0:29          direct index (temp float)
+0:29            'a' (in 3-component vector of float)
+0:29            Constant:
+0:29              1 (const int)
+0:29        move second child to first child (temp float)
+0:29          direct index (temp float)
+0:29            direct index (temp 3-component vector of float)
+0:29              'm' (temp 3X3 matrix of float)
+0:29              Constant:
+0:29                2 (const int)
+0:29            Constant:
+0:29              0 (const int)
+0:29          direct index (temp float)
+0:29            'a' (in 3-component vector of float)
+0:29            Constant:
+0:29              2 (const int)
+0:?       Sequence
+0:30        move second child to first child (temp float)
+0:30          direct index (temp float)
+0:30            direct index (temp 3-component vector of float)
+0:30              'm' (temp 3X3 matrix of float)
+0:30              Constant:
+0:30                0 (const int)
+0:30            Constant:
+0:30              1 (const int)
+0:30          direct index (temp float)
+0:30            'b' (in 3-component vector of float)
+0:30            Constant:
+0:30              0 (const int)
+0:30        move second child to first child (temp float)
+0:30          direct index (temp float)
+0:30            direct index (temp 3-component vector of float)
+0:30              'm' (temp 3X3 matrix of float)
+0:30              Constant:
+0:30                1 (const int)
+0:30            Constant:
+0:30              1 (const int)
+0:30          direct index (temp float)
+0:30            'b' (in 3-component vector of float)
+0:30            Constant:
+0:30              1 (const int)
+0:30        move second child to first child (temp float)
+0:30          direct index (temp float)
+0:30            direct index (temp 3-component vector of float)
+0:30              'm' (temp 3X3 matrix of float)
+0:30              Constant:
+0:30                2 (const int)
+0:30            Constant:
+0:30              1 (const int)
+0:30          direct index (temp float)
+0:30            'b' (in 3-component vector of float)
+0:30            Constant:
+0:30              2 (const int)
+0:?       Sequence
+0:31        move second child to first child (temp float)
+0:31          direct index (temp float)
+0:31            direct index (temp 3-component vector of float)
+0:31              'm' (temp 3X3 matrix of float)
+0:31              Constant:
+0:31                0 (const int)
+0:31            Constant:
+0:31              2 (const int)
+0:31          direct index (temp float)
+0:31            'c' (in 3-component vector of float)
+0:31            Constant:
+0:31              0 (const int)
+0:31        move second child to first child (temp float)
+0:31          direct index (temp float)
+0:31            direct index (temp 3-component vector of float)
+0:31              'm' (temp 3X3 matrix of float)
+0:31              Constant:
+0:31                1 (const int)
+0:31            Constant:
+0:31              2 (const int)
+0:31          direct index (temp float)
+0:31            'c' (in 3-component vector of float)
+0:31            Constant:
+0:31              1 (const int)
+0:31        move second child to first child (temp float)
+0:31          direct index (temp float)
+0:31            direct index (temp 3-component vector of float)
+0:31              'm' (temp 3X3 matrix of float)
+0:31              Constant:
+0:31                2 (const int)
+0:31            Constant:
+0:31              2 (const int)
+0:31          direct index (temp float)
+0:31            'c' (in 3-component vector of float)
+0:31            Constant:
+0:31              2 (const int)
+0:32      Branch: Return with expression
+0:32        'm' (temp 3X3 matrix of float)
+0:?   Linker Objects
+0:?     'inf' (layout(location=0 ) in float)
+
+
+Linked vertex stage:
+
+
+Shader version: 450
+0:? Sequence
+0:2  Function Definition: ShaderFunction(f1; (temp void)
+0:2    Function Parameters: 
+0:2      'inf' (layout(location=0 ) in float)
+0:?     Sequence
+0:7      move second child to first child (temp float)
+0:7        direct index (temp float)
+0:7          direct index (temp 4-component vector of float)
+0:7            'm' (temp 3X4 matrix of float)
+0:7            Constant:
+0:7              2 (const int)
+0:7          Constant:
+0:7            3 (const int)
+0:7        Constant:
+0:7          1.000000
+0:8      move second child to first child (temp float)
+0:8        direct index (temp float)
+0:8          direct index (temp 4-component vector of float)
+0:8            'm' (temp 3X4 matrix of float)
+0:8            Constant:
+0:8              2 (const int)
+0:8          Constant:
+0:8            3 (const int)
+0:8        Constant:
+0:8          2.000000
+0:9      move second child to first child (temp float)
+0:9        direct index (temp float)
+0:9          direct index (temp 4-component vector of float)
+0:9            'm' (temp 3X4 matrix of float)
+0:9            Constant:
+0:9              2 (const int)
+0:9          Constant:
+0:9            3 (const int)
+0:9        Constant:
+0:9          2.000000
+0:11      move second child to first child (temp 4-component vector of float)
+0:11        direct index (temp 4-component vector of float)
+0:11          'm' (temp 3X4 matrix of float)
+0:11          Constant:
+0:11            0 (const int)
+0:11        Constant:
+0:11          3.000000
+0:11          3.000000
+0:11          3.000000
+0:11          3.000000
+0:12      move second child to first child (temp 4-component vector of float)
+0:12        direct index (temp 4-component vector of float)
+0:12          'm' (temp 3X4 matrix of float)
+0:12          Constant:
+0:12            1 (const int)
+0:12        Constant:
+0:12          3.000000
+0:12          3.000000
+0:12          3.000000
+0:12          3.000000
+0:13      move second child to first child (temp 4-component vector of float)
+0:13        direct index (temp 4-component vector of float)
+0:13          'm' (temp 3X4 matrix of float)
+0:13          Constant:
+0:13            1 (const int)
+0:13        Constant:
+0:13          3.000000
+0:13          3.000000
+0:13          3.000000
+0:13          3.000000
+0:?       Sequence
+0:18        move second child to first child (temp float)
+0:18          direct index (temp float)
+0:18            direct index (temp 4-component vector of float)
+0:18              'm' (temp 3X4 matrix of float)
+0:18              Constant:
+0:18                0 (const int)
+0:18            Constant:
+0:18              0 (const int)
+0:18          direct index (temp float)
+0:18            'f3' (temp 3-component vector of float)
+0:18            Constant:
+0:18              0 (const int)
+0:18        move second child to first child (temp float)
+0:18          direct index (temp float)
+0:18            direct index (temp 4-component vector of float)
+0:18              'm' (temp 3X4 matrix of float)
+0:18              Constant:
+0:18                1 (const int)
+0:18            Constant:
+0:18              1 (const int)
+0:18          direct index (temp float)
+0:18            'f3' (temp 3-component vector of float)
+0:18            Constant:
+0:18              1 (const int)
+0:18        move second child to first child (temp float)
+0:18          direct index (temp float)
+0:18            direct index (temp 4-component vector of float)
+0:18              'm' (temp 3X4 matrix of float)
+0:18              Constant:
+0:18                1 (const int)
+0:18            Constant:
+0:18              2 (const int)
+0:18          direct index (temp float)
+0:18            'f3' (temp 3-component vector of float)
+0:18            Constant:
+0:18              2 (const int)
+0:19      Sequence
+0:19        move second child to first child (temp 3-component vector of float)
+0:19          'intermVec' (temp 3-component vector of float)
+0:19          Constant:
+0:19            5.000000
+0:19            5.000000
+0:19            5.000000
+0:19        move second child to first child (temp float)
+0:19          direct index (temp float)
+0:19            direct index (temp 4-component vector of float)
+0:19              'm' (temp 3X4 matrix of float)
+0:19              Constant:
+0:19                1 (const int)
+0:19            Constant:
+0:19              0 (const int)
+0:19          direct index (temp float)
+0:19            'intermVec' (temp 3-component vector of float)
+0:19            Constant:
+0:19              0 (const int)
+0:19        move second child to first child (temp float)
+0:19          direct index (temp float)
+0:19            direct index (temp 4-component vector of float)
+0:19              'm' (temp 3X4 matrix of float)
+0:19              Constant:
+0:19                0 (const int)
+0:19            Constant:
+0:19              1 (const int)
+0:19          direct index (temp float)
+0:19            'intermVec' (temp 3-component vector of float)
+0:19            Constant:
+0:19              1 (const int)
+0:19        move second child to first child (temp float)
+0:19          direct index (temp float)
+0:19            direct index (temp 4-component vector of float)
+0:19              'm' (temp 3X4 matrix of float)
+0:19              Constant:
+0:19                2 (const int)
+0:19            Constant:
+0:19              0 (const int)
+0:19          direct index (temp float)
+0:19            'intermVec' (temp 3-component vector of float)
+0:19            Constant:
+0:19              2 (const int)
+0:20      Sequence
+0:20        move second child to first child (temp 3-component vector of float)
+0:20          'intermVec' (temp 3-component vector of float)
+0:20          vector-scale (temp 3-component vector of float)
+0:20            Constant:
+0:20              2.000000
+0:20            'f3' (temp 3-component vector of float)
+0:20        move second child to first child (temp float)
+0:20          direct index (temp float)
+0:20            direct index (temp 4-component vector of float)
+0:20              'm' (temp 3X4 matrix of float)
+0:20              Constant:
+0:20                0 (const int)
+0:20            Constant:
+0:20              0 (const int)
+0:20          direct index (temp float)
+0:20            'intermVec' (temp 3-component vector of float)
+0:20            Constant:
+0:20              0 (const int)
+0:20        move second child to first child (temp float)
+0:20          direct index (temp float)
+0:20            direct index (temp 4-component vector of float)
+0:20              'm' (temp 3X4 matrix of float)
+0:20              Constant:
+0:20                0 (const int)
+0:20            Constant:
+0:20              1 (const int)
+0:20          direct index (temp float)
+0:20            'intermVec' (temp 3-component vector of float)
+0:20            Constant:
+0:20              1 (const int)
+0:20        move second child to first child (temp float)
+0:20          direct index (temp float)
+0:20            direct index (temp 4-component vector of float)
+0:20              'm' (temp 3X4 matrix of float)
+0:20              Constant:
+0:20                1 (const int)
+0:20            Constant:
+0:20              0 (const int)
+0:20          direct index (temp float)
+0:20            'intermVec' (temp 3-component vector of float)
+0:20            Constant:
+0:20              2 (const int)
+0:23      move second child to first child (temp 3-component vector of float)
+0:23        'f3' (temp 3-component vector of float)
+0:23        matrix swizzle (temp 3-component vector of float)
+0:23          'm' (temp 3X4 matrix of float)
+0:23          Sequence
+0:23            Constant:
+0:23              1 (const int)
+0:23            Constant:
+0:23              0 (const int)
+0:23            Constant:
+0:23              0 (const int)
+0:23            Constant:
+0:23              1 (const int)
+0:23            Constant:
+0:23              2 (const int)
+0:23            Constant:
+0:23              0 (const int)
+0:27  Function Definition: createMat3x3(vf3;vf3;vf3; (temp 3X3 matrix of float)
+0:27    Function Parameters: 
+0:27      'a' (in 3-component vector of float)
+0:27      'b' (in 3-component vector of float)
+0:27      'c' (in 3-component vector of float)
+0:?     Sequence
+0:?       Sequence
+0:29        move second child to first child (temp float)
+0:29          direct index (temp float)
+0:29            direct index (temp 3-component vector of float)
+0:29              'm' (temp 3X3 matrix of float)
+0:29              Constant:
+0:29                0 (const int)
+0:29            Constant:
+0:29              0 (const int)
+0:29          direct index (temp float)
+0:29            'a' (in 3-component vector of float)
+0:29            Constant:
+0:29              0 (const int)
+0:29        move second child to first child (temp float)
+0:29          direct index (temp float)
+0:29            direct index (temp 3-component vector of float)
+0:29              'm' (temp 3X3 matrix of float)
+0:29              Constant:
+0:29                1 (const int)
+0:29            Constant:
+0:29              0 (const int)
+0:29          direct index (temp float)
+0:29            'a' (in 3-component vector of float)
+0:29            Constant:
+0:29              1 (const int)
+0:29        move second child to first child (temp float)
+0:29          direct index (temp float)
+0:29            direct index (temp 3-component vector of float)
+0:29              'm' (temp 3X3 matrix of float)
+0:29              Constant:
+0:29                2 (const int)
+0:29            Constant:
+0:29              0 (const int)
+0:29          direct index (temp float)
+0:29            'a' (in 3-component vector of float)
+0:29            Constant:
+0:29              2 (const int)
+0:?       Sequence
+0:30        move second child to first child (temp float)
+0:30          direct index (temp float)
+0:30            direct index (temp 3-component vector of float)
+0:30              'm' (temp 3X3 matrix of float)
+0:30              Constant:
+0:30                0 (const int)
+0:30            Constant:
+0:30              1 (const int)
+0:30          direct index (temp float)
+0:30            'b' (in 3-component vector of float)
+0:30            Constant:
+0:30              0 (const int)
+0:30        move second child to first child (temp float)
+0:30          direct index (temp float)
+0:30            direct index (temp 3-component vector of float)
+0:30              'm' (temp 3X3 matrix of float)
+0:30              Constant:
+0:30                1 (const int)
+0:30            Constant:
+0:30              1 (const int)
+0:30          direct index (temp float)
+0:30            'b' (in 3-component vector of float)
+0:30            Constant:
+0:30              1 (const int)
+0:30        move second child to first child (temp float)
+0:30          direct index (temp float)
+0:30            direct index (temp 3-component vector of float)
+0:30              'm' (temp 3X3 matrix of float)
+0:30              Constant:
+0:30                2 (const int)
+0:30            Constant:
+0:30              1 (const int)
+0:30          direct index (temp float)
+0:30            'b' (in 3-component vector of float)
+0:30            Constant:
+0:30              2 (const int)
+0:?       Sequence
+0:31        move second child to first child (temp float)
+0:31          direct index (temp float)
+0:31            direct index (temp 3-component vector of float)
+0:31              'm' (temp 3X3 matrix of float)
+0:31              Constant:
+0:31                0 (const int)
+0:31            Constant:
+0:31              2 (const int)
+0:31          direct index (temp float)
+0:31            'c' (in 3-component vector of float)
+0:31            Constant:
+0:31              0 (const int)
+0:31        move second child to first child (temp float)
+0:31          direct index (temp float)
+0:31            direct index (temp 3-component vector of float)
+0:31              'm' (temp 3X3 matrix of float)
+0:31              Constant:
+0:31                1 (const int)
+0:31            Constant:
+0:31              2 (const int)
+0:31          direct index (temp float)
+0:31            'c' (in 3-component vector of float)
+0:31            Constant:
+0:31              1 (const int)
+0:31        move second child to first child (temp float)
+0:31          direct index (temp float)
+0:31            direct index (temp 3-component vector of float)
+0:31              'm' (temp 3X3 matrix of float)
+0:31              Constant:
+0:31                2 (const int)
+0:31            Constant:
+0:31              2 (const int)
+0:31          direct index (temp float)
+0:31            'c' (in 3-component vector of float)
+0:31            Constant:
+0:31              2 (const int)
+0:32      Branch: Return with expression
+0:32        'm' (temp 3X3 matrix of float)
+0:?   Linker Objects
+0:?     'inf' (layout(location=0 ) in float)
+
+Missing functionality: matrix swizzle
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 109
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Vertex 4  "ShaderFunction" 108
+                              Name 4  "ShaderFunction"
+                              Name 14  "createMat3x3(vf3;vf3;vf3;"
+                              Name 11  "a"
+                              Name 12  "b"
+                              Name 13  "c"
+                              Name 19  "m"
+                              Name 38  "f3"
+                              Name 51  "intermVec"
+                              Name 63  "intermVec"
+                              Name 76  "m"
+                              Name 108  "inf"
+                              Decorate 108(inf) Location 0
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 3
+               8:             TypePointer Function 7(fvec3)
+               9:             TypeMatrix 7(fvec3) 3
+              10:             TypeFunction 9 8(ptr) 8(ptr) 8(ptr)
+              16:             TypeVector 6(float) 4
+              17:             TypeMatrix 16(fvec4) 3
+              18:             TypePointer Function 17
+              20:             TypeInt 32 1
+              21:     20(int) Constant 2
+              22:    6(float) Constant 1065353216
+              23:             TypeInt 32 0
+              24:     23(int) Constant 3
+              25:             TypePointer Function 6(float)
+              27:    6(float) Constant 1073741824
+              30:     20(int) Constant 0
+              31:    6(float) Constant 1077936128
+              32:   16(fvec4) ConstantComposite 31 31 31 31
+              33:             TypePointer Function 16(fvec4)
+              35:     20(int) Constant 1
+              39:     23(int) Constant 0
+              43:     23(int) Constant 1
+              47:     23(int) Constant 2
+              52:    6(float) Constant 1084227584
+              53:    7(fvec3) ConstantComposite 52 52 52
+              75:             TypePointer Function 9
+             107:             TypePointer Input 6(float)
+        108(inf):    107(ptr) Variable Input
+4(ShaderFunction):           2 Function None 3
+               5:             Label
+           19(m):     18(ptr) Variable Function
+          38(f3):      8(ptr) Variable Function
+   51(intermVec):      8(ptr) Variable Function
+   63(intermVec):      8(ptr) Variable Function
+              26:     25(ptr) AccessChain 19(m) 21 24
+                              Store 26 22
+              28:     25(ptr) AccessChain 19(m) 21 24
+                              Store 28 27
+              29:     25(ptr) AccessChain 19(m) 21 24
+                              Store 29 27
+              34:     33(ptr) AccessChain 19(m) 30
+                              Store 34 32
+              36:     33(ptr) AccessChain 19(m) 35
+                              Store 36 32
+              37:     33(ptr) AccessChain 19(m) 35
+                              Store 37 32
+              40:     25(ptr) AccessChain 38(f3) 39
+              41:    6(float) Load 40
+              42:     25(ptr) AccessChain 19(m) 30 39
+                              Store 42 41
+              44:     25(ptr) AccessChain 38(f3) 43
+              45:    6(float) Load 44
+              46:     25(ptr) AccessChain 19(m) 35 43
+                              Store 46 45
+              48:     25(ptr) AccessChain 38(f3) 47
+              49:    6(float) Load 48
+              50:     25(ptr) AccessChain 19(m) 35 47
+                              Store 50 49
+                              Store 51(intermVec) 53
+              54:     25(ptr) AccessChain 51(intermVec) 39
+              55:    6(float) Load 54
+              56:     25(ptr) AccessChain 19(m) 35 39
+                              Store 56 55
+              57:     25(ptr) AccessChain 51(intermVec) 43
+              58:    6(float) Load 57
+              59:     25(ptr) AccessChain 19(m) 30 43
+                              Store 59 58
+              60:     25(ptr) AccessChain 51(intermVec) 47
+              61:    6(float) Load 60
+              62:     25(ptr) AccessChain 19(m) 21 39
+                              Store 62 61
+              64:    7(fvec3) Load 38(f3)
+              65:    7(fvec3) VectorTimesScalar 64 27
+                              Store 63(intermVec) 65
+              66:     25(ptr) AccessChain 63(intermVec) 39
+              67:    6(float) Load 66
+              68:     25(ptr) AccessChain 19(m) 30 39
+                              Store 68 67
+              69:     25(ptr) AccessChain 63(intermVec) 43
+              70:    6(float) Load 69
+              71:     25(ptr) AccessChain 19(m) 30 43
+                              Store 71 70
+              72:     25(ptr) AccessChain 63(intermVec) 47
+              73:    6(float) Load 72
+              74:     25(ptr) AccessChain 19(m) 35 39
+                              Store 74 73
+                              Store 38(f3) 30
+                              Return
+                              FunctionEnd
+14(createMat3x3(vf3;vf3;vf3;):           9 Function None 10
+           11(a):      8(ptr) FunctionParameter
+           12(b):      8(ptr) FunctionParameter
+           13(c):      8(ptr) FunctionParameter
+              15:             Label
+           76(m):     75(ptr) Variable Function
+              77:     25(ptr) AccessChain 11(a) 39
+              78:    6(float) Load 77
+              79:     25(ptr) AccessChain 76(m) 30 39
+                              Store 79 78
+              80:     25(ptr) AccessChain 11(a) 43
+              81:    6(float) Load 80
+              82:     25(ptr) AccessChain 76(m) 35 39
+                              Store 82 81
+              83:     25(ptr) AccessChain 11(a) 47
+              84:    6(float) Load 83
+              85:     25(ptr) AccessChain 76(m) 21 39
+                              Store 85 84
+              86:     25(ptr) AccessChain 12(b) 39
+              87:    6(float) Load 86
+              88:     25(ptr) AccessChain 76(m) 30 43
+                              Store 88 87
+              89:     25(ptr) AccessChain 12(b) 43
+              90:    6(float) Load 89
+              91:     25(ptr) AccessChain 76(m) 35 43
+                              Store 91 90
+              92:     25(ptr) AccessChain 12(b) 47
+              93:    6(float) Load 92
+              94:     25(ptr) AccessChain 76(m) 21 43
+                              Store 94 93
+              95:     25(ptr) AccessChain 13(c) 39
+              96:    6(float) Load 95
+              97:     25(ptr) AccessChain 76(m) 30 47
+                              Store 97 96
+              98:     25(ptr) AccessChain 13(c) 43
+              99:    6(float) Load 98
+             100:     25(ptr) AccessChain 76(m) 35 47
+                              Store 100 99
+             101:     25(ptr) AccessChain 13(c) 47
+             102:    6(float) Load 101
+             103:     25(ptr) AccessChain 76(m) 21 47
+                              Store 103 102
+             104:           9 Load 76(m)
+                              ReturnValue 104
+                              FunctionEnd
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.samplegrad.basic.dx10.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.samplegrad.basic.dx10.vert.out
index d7ea38621..1f438cee4 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.samplegrad.basic.dx10.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.samplegrad.basic.dx10.vert.out
@@ -239,7 +239,7 @@ Shader version: 450
 0:?     'g_tTexcdf4' (uniform textureCube)
 0:?     'g_tTexcdi4' (uniform itextureCube)
 0:?     'g_tTexcdu4' (uniform utextureCube)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 
 Linked vertex stage:
@@ -485,17 +485,17 @@ Shader version: 450
 0:?     'g_tTexcdf4' (uniform textureCube)
 0:?     'g_tTexcdi4' (uniform itextureCube)
 0:?     'g_tTexcdu4' (uniform utextureCube)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 161
+// Id's are bound by 164
 
                               Capability Shader
                               Capability Sampled1D
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 156
+                              EntryPoint Vertex 4  "main" 156 163
                               Name 4  "main"
                               Name 9  "txval10"
                               Name 12  "g_tTex1df4"
@@ -527,6 +527,9 @@ Shader version: 450
                               Name 150  "vsout"
                               Name 156  "Pos"
                               Name 160  "g_tTex1df4a"
+                              Name 161  "PerVertex_out"
+                              MemberName 161(PerVertex_out) 0  "Pos"
+                              Name 163  "PerVertex_out"
                               Decorate 12(g_tTex1df4) DescriptorSet 0
                               Decorate 12(g_tTex1df4) Binding 0
                               Decorate 16(g_sSamp) DescriptorSet 0
@@ -545,6 +548,8 @@ Shader version: 450
                               Decorate 156(Pos) BuiltIn Position
                               Decorate 160(g_tTex1df4a) DescriptorSet 0
                               Decorate 160(g_tTex1df4a) Binding 1
+                              MemberDecorate 161(PerVertex_out) 0 BuiltIn Position
+                              Decorate 161(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -637,6 +642,9 @@ Shader version: 450
              155:             TypePointer Output 7(fvec4)
         156(Pos):    155(ptr) Variable Output
 160(g_tTex1df4a):     11(ptr) Variable UniformConstant
+161(PerVertex_out):             TypeStruct 7(fvec4)
+             162:             TypePointer Output 161(PerVertex_out)
+163(PerVertex_out):    162(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
       9(txval10):      8(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.samplelevel.basic.dx10.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.samplelevel.basic.dx10.vert.out
index 35a9ede21..a811ed256 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.samplelevel.basic.dx10.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.samplelevel.basic.dx10.vert.out
@@ -185,7 +185,7 @@ Shader version: 450
 0:?     'g_tTexcdf4' (uniform textureCube)
 0:?     'g_tTexcdi4' (uniform itextureCube)
 0:?     'g_tTexcdu4' (uniform utextureCube)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 
 Linked vertex stage:
@@ -377,17 +377,17 @@ Shader version: 450
 0:?     'g_tTexcdf4' (uniform textureCube)
 0:?     'g_tTexcdi4' (uniform itextureCube)
 0:?     'g_tTexcdu4' (uniform utextureCube)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 157
+// Id's are bound by 160
 
                               Capability Shader
                               Capability Sampled1D
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 152
+                              EntryPoint Vertex 4  "main" 152 159
                               Name 4  "main"
                               Name 9  "txval10"
                               Name 12  "g_tTex1df4"
@@ -419,6 +419,9 @@ Shader version: 450
                               Name 146  "vsout"
                               Name 152  "Pos"
                               Name 156  "g_tTex1df4a"
+                              Name 157  "PerVertex_out"
+                              MemberName 157(PerVertex_out) 0  "Pos"
+                              Name 159  "PerVertex_out"
                               Decorate 12(g_tTex1df4) DescriptorSet 0
                               Decorate 12(g_tTex1df4) Binding 0
                               Decorate 16(g_sSamp) DescriptorSet 0
@@ -437,6 +440,8 @@ Shader version: 450
                               Decorate 152(Pos) BuiltIn Position
                               Decorate 156(g_tTex1df4a) DescriptorSet 0
                               Decorate 156(g_tTex1df4a) Binding 1
+                              MemberDecorate 157(PerVertex_out) 0 BuiltIn Position
+                              Decorate 157(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -525,6 +530,9 @@ Shader version: 450
              151:             TypePointer Output 7(fvec4)
         152(Pos):    151(ptr) Variable Output
 156(g_tTex1df4a):     11(ptr) Variable UniformConstant
+157(PerVertex_out):             TypeStruct 7(fvec4)
+             158:             TypePointer Output 157(PerVertex_out)
+159(PerVertex_out):    158(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
       9(txval10):      8(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.frag.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.frag.out
index e2b2f728a..3c94ffaed 100755
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.frag.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.frag.out
@@ -9,7 +9,7 @@ gl_FragCoord origin is upper left
 0:34  Function Definition: PixelShaderFunction(vf4;struct-IN_S-vf4-b1-vf1-vf2-b1-b1-b1-vf41; (temp 4-component vector of float)
 0:34    Function Parameters: 
 0:34      'input' (layout(location=0 ) in 4-component vector of float)
-0:34      's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
+0:34      's' (in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
 0:?     Sequence
 0:39      Compare Equal (temp bool)
 0:39        's3' (temp structure{temp 3-component vector of bool b3})
@@ -20,9 +20,9 @@ gl_FragCoord origin is upper left
 0:40          Constant:
 0:40            0 (const int)
 0:40        ff4: direct index for structure (layout(binding=0 offset=4 ) temp 4-component vector of float)
-0:40          's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
+0:40          's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
 0:40          Constant:
-0:40            7 (const int)
+0:40            6 (const int)
 0:42      Sequence
 0:42        move second child to first child (temp 4-component vector of float)
 0:?           '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
@@ -31,9 +31,10 @@ gl_FragCoord origin is upper left
 0:?   Linker Objects
 0:?     '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
 0:?     'input' (layout(location=0 ) in 4-component vector of float)
-0:?     's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
+0:?     's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
 0:?     's2' (global structure{temp 4-component vector of float i})
 0:?     'anon@0' (layout(row_major std140 ) uniform block{layout(offset=0 ) uniform structure{temp bool b, temp bool c, temp 4-component vector of float a, temp 4-component vector of float d} s1, layout(binding=5 offset=1620 ) uniform float ff5, layout(binding=8 offset=1636 ) uniform float ff6})
+0:?     's_ff1' (in bool Face)
 
 
 Linked fragment stage:
@@ -45,7 +46,7 @@ gl_FragCoord origin is upper left
 0:34  Function Definition: PixelShaderFunction(vf4;struct-IN_S-vf4-b1-vf1-vf2-b1-b1-b1-vf41; (temp 4-component vector of float)
 0:34    Function Parameters: 
 0:34      'input' (layout(location=0 ) in 4-component vector of float)
-0:34      's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
+0:34      's' (in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
 0:?     Sequence
 0:39      Compare Equal (temp bool)
 0:39        's3' (temp structure{temp 3-component vector of bool b3})
@@ -56,9 +57,9 @@ gl_FragCoord origin is upper left
 0:40          Constant:
 0:40            0 (const int)
 0:40        ff4: direct index for structure (layout(binding=0 offset=4 ) temp 4-component vector of float)
-0:40          's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
+0:40          's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
 0:40          Constant:
-0:40            7 (const int)
+0:40            6 (const int)
 0:42      Sequence
 0:42        move second child to first child (temp 4-component vector of float)
 0:?           '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
@@ -67,18 +68,19 @@ gl_FragCoord origin is upper left
 0:?   Linker Objects
 0:?     '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
 0:?     'input' (layout(location=0 ) in 4-component vector of float)
-0:?     's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, temp bool Face ff1, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
+0:?     's' (layout(location=1 ) in structure{smooth temp 4-component vector of float a, flat temp bool b, centroid noperspective temp 1-component vector of float c, centroid sample temp 2-component vector of float d, layout(offset=4 ) temp bool ff2, layout(binding=0 offset=4 ) temp bool ff3, layout(binding=0 offset=4 ) temp 4-component vector of float ff4})
 0:?     's2' (global structure{temp 4-component vector of float i})
 0:?     'anon@0' (layout(row_major std140 ) uniform block{layout(offset=0 ) uniform structure{temp bool b, temp bool c, temp 4-component vector of float a, temp 4-component vector of float d} s1, layout(binding=5 offset=1620 ) uniform float ff5, layout(binding=8 offset=1636 ) uniform float ff6})
+0:?     's_ff1' (in bool Face)
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 44
+// Id's are bound by 46
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Fragment 4  "PixelShaderFunction" 27 35 36
+                              EntryPoint Fragment 4  "PixelShaderFunction" 27 35 36 45
                               ExecutionMode 4 OriginUpperLeft
                               Name 4  "PixelShaderFunction"
                               Name 8  "FS"
@@ -92,10 +94,9 @@ gl_FragCoord origin is upper left
                               MemberName 25(IN_S) 1  "b"
                               MemberName 25(IN_S) 2  "c"
                               MemberName 25(IN_S) 3  "d"
-                              MemberName 25(IN_S) 4  "ff1"
-                              MemberName 25(IN_S) 5  "ff2"
-                              MemberName 25(IN_S) 6  "ff3"
-                              MemberName 25(IN_S) 7  "ff4"
+                              MemberName 25(IN_S) 4  "ff2"
+                              MemberName 25(IN_S) 5  "ff3"
+                              MemberName 25(IN_S) 6  "ff4"
                               Name 27  "s"
                               Name 35  "@entryPointOutput"
                               Name 36  "input"
@@ -109,7 +110,7 @@ gl_FragCoord origin is upper left
                               MemberName 41($Global) 1  "ff5"
                               MemberName 41($Global) 2  "ff6"
                               Name 43  ""
-                              MemberDecorate 25(IN_S) 4 BuiltIn FrontFacing
+                              Name 45  "s_ff1"
                               Decorate 27(s) Location 1
                               Decorate 35(@entryPointOutput) Location 0
                               Decorate 36(input) Location 0
@@ -122,6 +123,7 @@ gl_FragCoord origin is upper left
                               MemberDecorate 41($Global) 2 Offset 1636
                               Decorate 41($Global) Block
                               Decorate 43 DescriptorSet 0
+                              Decorate 45(s_ff1) BuiltIn FrontFacing
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeBool
@@ -136,10 +138,10 @@ gl_FragCoord origin is upper left
               22:             TypeInt 32 1
               23:     22(int) Constant 0
               24:             TypeVector 17(float) 2
-        25(IN_S):             TypeStruct 18(fvec4) 6(bool) 17(float) 24(fvec2) 6(bool) 6(bool) 6(bool) 18(fvec4)
+        25(IN_S):             TypeStruct 18(fvec4) 6(bool) 17(float) 24(fvec2) 6(bool) 6(bool) 18(fvec4)
               26:             TypePointer Input 25(IN_S)
            27(s):     26(ptr) Variable Input
-              28:     22(int) Constant 7
+              28:     22(int) Constant 6
               29:             TypePointer Input 18(fvec4)
               32:             TypePointer Private 18(fvec4)
               34:             TypePointer Output 18(fvec4)
@@ -150,6 +152,8 @@ gl_FragCoord origin is upper left
      41($Global):             TypeStruct 40(myS) 17(float) 17(float)
               42:             TypePointer Uniform 41($Global)
               43:     42(ptr) Variable Uniform
+              44:             TypePointer Input 6(bool)
+       45(s_ff1):     44(ptr) Variable Input
 4(PixelShaderFunction):           2 Function None 3
                5:             Label
           10(s3):      9(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split-1.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split-1.vert.out
index 62431ac37..a1c0c0c81 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.split-1.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split-1.vert.out
@@ -59,7 +59,7 @@ Shader version: 450
 0:?     'Pos_in' (in 4-component vector of float Position)
 0:?     'x1_in' (layout(location=1 ) in int)
 0:?     'Pos_loose' (in 4-component vector of float Position)
-0:?     'Pos_out' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos_out})
 
 
 Linked vertex stage:
@@ -125,16 +125,16 @@ Shader version: 450
 0:?     'Pos_in' (in 4-component vector of float Position)
 0:?     'x1_in' (layout(location=1 ) in int)
 0:?     'Pos_loose' (in 4-component vector of float Position)
-0:?     'Pos_out' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos_out})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 46
+// Id's are bound by 49
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 14 20 22 28 33 39
+                              EntryPoint Vertex 4  "main" 14 20 22 28 33 39 48
                               Name 4  "main"
                               Name 9  "VS_OUTPUT"
                               MemberName 9(VS_OUTPUT) 0  "x0_out"
@@ -150,12 +150,17 @@ Shader version: 450
                               MemberName 31(VS_OUTPUT) 1  "x1_out"
                               Name 33  "@entryPointOutput"
                               Name 39  "Pos_out"
+                              Name 46  "PerVertex_out"
+                              MemberName 46(PerVertex_out) 0  "Pos_out"
+                              Name 48  "PerVertex_out"
                               Decorate 14(x0_in) Location 0
                               Decorate 20(Pos_in) BuiltIn Position
                               Decorate 22(Pos_loose) BuiltIn Position
                               Decorate 28(x1_in) Location 1
                               Decorate 33(@entryPointOutput) Location 0
                               Decorate 39(Pos_out) BuiltIn Position
+                              MemberDecorate 46(PerVertex_out) 0 BuiltIn Position
+                              Decorate 46(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 1
@@ -180,6 +185,9 @@ Shader version: 450
               36:             TypePointer Output 6(int)
               38:             TypePointer Output 8(fvec4)
      39(Pos_out):     38(ptr) Variable Output
+46(PerVertex_out):             TypeStruct 8(fvec4)
+              47:             TypePointer Output 46(PerVertex_out)
+48(PerVertex_out):     47(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
        11(vsout):     10(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.array.geom.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.array.geom.out
index e8921b68e..bc5b00d1c 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.array.geom.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.array.geom.out
@@ -64,7 +64,7 @@ output primitive = triangle_strip
 0:?   Linker Objects
 0:?     'v' (layout(location=0 ) in 1-element array of uint)
 0:?     'OutputStream' (layout(location=0 ) out structure{temp 2-component vector of float TexCoord, temp 3-component vector of float TerrainPos, temp uint VertexID})
-0:?     'OutputStream.Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position OutputStream_Pos})
 
 
 Linked geometry stage:
@@ -135,16 +135,16 @@ output primitive = triangle_strip
 0:?   Linker Objects
 0:?     'v' (layout(location=0 ) in 1-element array of uint)
 0:?     'OutputStream' (layout(location=0 ) out structure{temp 2-component vector of float TexCoord, temp 3-component vector of float TerrainPos, temp uint VertexID})
-0:?     'OutputStream.Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position OutputStream_Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 89
+// Id's are bound by 90
 
                               Capability Geometry
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Geometry 4  "main" 83 86 88
+                              EntryPoint Geometry 4  "main" 83 86 89
                               ExecutionMode 4 InputPoints
                               ExecutionMode 4 Invocations 1
                               ExecutionMode 4 OutputTriangleStrip
@@ -175,11 +175,14 @@ output primitive = triangle_strip
                               MemberName 84(PSInput) 1  "TerrainPos"
                               MemberName 84(PSInput) 2  "VertexID"
                               Name 86  "OutputStream"
-                              Name 88  "OutputStream.Pos"
+                              Name 87  "PerVertex_out"
+                              MemberName 87(PerVertex_out) 0  "OutputStream_Pos"
+                              Name 89  "PerVertex_out"
                               MemberDecorate 14(PSInput) 0 BuiltIn Position
                               Decorate 83(v) Location 0
                               Decorate 86(OutputStream) Location 0
-                              Decorate 88(OutputStream.Pos) BuiltIn Position
+                              MemberDecorate 87(PerVertex_out) 0 BuiltIn Position
+                              Decorate 87(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -221,8 +224,9 @@ output primitive = triangle_strip
      84(PSInput):             TypeStruct 8(fvec2) 9(fvec3) 10(int)
               85:             TypePointer Output 84(PSInput)
 86(OutputStream):     85(ptr) Variable Output
-              87:             TypePointer Output 7(fvec4)
-88(OutputStream.Pos):     87(ptr) Variable Output
+87(PerVertex_out):             TypeStruct 7(fvec4)
+              88:             TypePointer Output 87(PerVertex_out)
+89(PerVertex_out):     88(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
          13(Out):     12(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.assign.frag.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.assign.frag.out
new file mode 100644
index 000000000..e214c5120
--- /dev/null
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.assign.frag.out
@@ -0,0 +1,196 @@
+hlsl.struct.split.assign.frag
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:7  Function Definition: main(i1;struct-S-f1-vf41[3]; (temp 4-component vector of float)
+0:7    Function Parameters: 
+0:7      'i' (layout(location=0 ) in int)
+0:7      'input' (in 3-element array of structure{temp float f, temp 4-component vector of float FragCoord pos})
+0:?     Sequence
+0:9      Sequence
+0:9        move second child to first child (temp float)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (layout(location=1 ) in structure{temp float f})
+0:9              'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:9              Constant:
+0:9                0 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (temp structure{temp float f, temp 4-component vector of float pos})
+0:9              'a' (temp 3-element array of structure{temp float f, temp 4-component vector of float pos})
+0:9              Constant:
+0:9                0 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9        move second child to first child (temp float)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (layout(location=1 ) in structure{temp float f})
+0:9              'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:9              Constant:
+0:9                1 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (temp structure{temp float f, temp 4-component vector of float pos})
+0:9              'a' (temp 3-element array of structure{temp float f, temp 4-component vector of float pos})
+0:9              Constant:
+0:9                1 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9        move second child to first child (temp float)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (layout(location=1 ) in structure{temp float f})
+0:9              'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:9              Constant:
+0:9                2 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (temp structure{temp float f, temp 4-component vector of float pos})
+0:9              'a' (temp 3-element array of structure{temp float f, temp 4-component vector of float pos})
+0:9              Constant:
+0:9                2 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:11      Sequence
+0:11        Branch: Return
+0:?   Linker Objects
+0:?     '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
+0:?     'i' (layout(location=0 ) in int)
+0:?     'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:?     'input_pos' (in 3-element array of 4-component vector of float FragCoord)
+
+
+Linked fragment stage:
+
+
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:7  Function Definition: main(i1;struct-S-f1-vf41[3]; (temp 4-component vector of float)
+0:7    Function Parameters: 
+0:7      'i' (layout(location=0 ) in int)
+0:7      'input' (in 3-element array of structure{temp float f, temp 4-component vector of float FragCoord pos})
+0:?     Sequence
+0:9      Sequence
+0:9        move second child to first child (temp float)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (layout(location=1 ) in structure{temp float f})
+0:9              'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:9              Constant:
+0:9                0 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (temp structure{temp float f, temp 4-component vector of float pos})
+0:9              'a' (temp 3-element array of structure{temp float f, temp 4-component vector of float pos})
+0:9              Constant:
+0:9                0 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9        move second child to first child (temp float)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (layout(location=1 ) in structure{temp float f})
+0:9              'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:9              Constant:
+0:9                1 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (temp structure{temp float f, temp 4-component vector of float pos})
+0:9              'a' (temp 3-element array of structure{temp float f, temp 4-component vector of float pos})
+0:9              Constant:
+0:9                1 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9        move second child to first child (temp float)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (layout(location=1 ) in structure{temp float f})
+0:9              'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:9              Constant:
+0:9                2 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:9          f: direct index for structure (temp float)
+0:9            direct index (temp structure{temp float f, temp 4-component vector of float pos})
+0:9              'a' (temp 3-element array of structure{temp float f, temp 4-component vector of float pos})
+0:9              Constant:
+0:9                2 (const int)
+0:9            Constant:
+0:9              0 (const int)
+0:11      Sequence
+0:11        Branch: Return
+0:?   Linker Objects
+0:?     '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
+0:?     'i' (layout(location=0 ) in int)
+0:?     'input' (layout(location=1 ) in 3-element array of structure{temp float f})
+0:?     'input_pos' (in 3-element array of 4-component vector of float FragCoord)
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 41
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "main" 12 35 37 40
+                              ExecutionMode 4 OriginUpperLeft
+                              Name 4  "main"
+                              Name 7  "S"
+                              MemberName 7(S) 0  "f"
+                              Name 12  "input"
+                              Name 16  "S"
+                              MemberName 16(S) 0  "f"
+                              MemberName 16(S) 1  "pos"
+                              Name 19  "a"
+                              Name 35  "@entryPointOutput"
+                              Name 37  "i"
+                              Name 40  "input_pos"
+                              Decorate 12(input) Location 1
+                              Decorate 35(@entryPointOutput) Location 0
+                              Decorate 37(i) Location 0
+                              Decorate 40(input_pos) BuiltIn FragCoord
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+            7(S):             TypeStruct 6(float)
+               8:             TypeInt 32 0
+               9:      8(int) Constant 3
+              10:             TypeArray 7(S) 9
+              11:             TypePointer Input 10
+       12(input):     11(ptr) Variable Input
+              13:             TypeInt 32 1
+              14:     13(int) Constant 0
+              15:             TypeVector 6(float) 4
+           16(S):             TypeStruct 6(float) 15(fvec4)
+              17:             TypeArray 16(S) 9
+              18:             TypePointer Function 17
+              20:             TypePointer Function 6(float)
+              23:             TypePointer Input 6(float)
+              25:     13(int) Constant 1
+              29:     13(int) Constant 2
+              34:             TypePointer Output 15(fvec4)
+35(@entryPointOutput):     34(ptr) Variable Output
+              36:             TypePointer Input 13(int)
+           37(i):     36(ptr) Variable Input
+              38:             TypeArray 15(fvec4) 9
+              39:             TypePointer Input 38
+   40(input_pos):     39(ptr) Variable Input
+         4(main):           2 Function None 3
+               5:             Label
+           19(a):     18(ptr) Variable Function
+              21:     20(ptr) AccessChain 19(a) 14 14
+              22:    6(float) Load 21
+              24:     23(ptr) AccessChain 12(input) 14 14
+                              Store 24 22
+              26:     20(ptr) AccessChain 19(a) 25 14
+              27:    6(float) Load 26
+              28:     23(ptr) AccessChain 12(input) 25 14
+                              Store 28 27
+              30:     20(ptr) AccessChain 19(a) 29 14
+              31:    6(float) Load 30
+              32:     23(ptr) AccessChain 12(input) 29 14
+                              Store 32 31
+                              Return
+                              FunctionEnd
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.call.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.call.vert.out
index d7a19e448..40245f441 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.call.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.call.vert.out
@@ -92,7 +92,7 @@ Shader version: 450
 0:?     'x0_in' (layout(location=0 ) in int)
 0:?     'Pos_in' (in 4-component vector of float Position)
 0:?     'x1_in' (layout(location=1 ) in int)
-0:?     'Pos_out' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos_out})
 
 
 Linked vertex stage:
@@ -191,16 +191,16 @@ Shader version: 450
 0:?     'x0_in' (layout(location=0 ) in int)
 0:?     'Pos_in' (in 4-component vector of float Position)
 0:?     'x1_in' (layout(location=1 ) in int)
-0:?     'Pos_out' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos_out})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 69
+// Id's are bound by 72
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 28 33 37 56 62
+                              EntryPoint Vertex 4  "main" 28 33 37 56 62 71
                               Name 4  "main"
                               Name 9  "VS_INPUT"
                               MemberName 9(VS_INPUT) 0  "x0_in"
@@ -229,11 +229,16 @@ Shader version: 450
                               MemberName 54(VS_OUTPUT) 1  "x1_out"
                               Name 56  "@entryPointOutput"
                               Name 62  "Pos_out"
+                              Name 69  "PerVertex_out"
+                              MemberName 69(PerVertex_out) 0  "Pos_out"
+                              Name 71  "PerVertex_out"
                               Decorate 28(x0_in) Location 0
                               Decorate 33(Pos_in) BuiltIn Position
                               Decorate 37(x1_in) Location 1
                               Decorate 56(@entryPointOutput) Location 0
                               Decorate 62(Pos_out) BuiltIn Position
+                              MemberDecorate 69(PerVertex_out) 0 BuiltIn Position
+                              Decorate 69(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 1
@@ -262,6 +267,9 @@ Shader version: 450
               59:             TypePointer Output 6(int)
               61:             TypePointer Output 8(fvec4)
      62(Pos_out):     61(ptr) Variable Output
+69(PerVertex_out):             TypeStruct 8(fvec4)
+              70:             TypePointer Output 69(PerVertex_out)
+71(PerVertex_out):     70(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
        25(vsout):     12(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.nested.geom.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.nested.geom.out
index 3dd8d9f88..a1db67d97 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.nested.geom.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.nested.geom.out
@@ -37,7 +37,7 @@ output primitive = triangle_strip
 0:30      Sequence
 0:30        Sequence
 0:30          move second child to first child (temp 4-component vector of float)
-0:?             'ts.psIn.pos' (out 4-component vector of float Position)
+0:?             'ts_psIn_pos' (out 4-component vector of float Position)
 0:30            pos: direct index for structure (temp 4-component vector of float)
 0:30              psIn: direct index for structure (temp structure{temp 4-component vector of float pos, temp 2-component vector of float tc})
 0:30                'o' (temp structure{temp structure{temp 4-component vector of float pos, temp 2-component vector of float tc} psIn, temp structure{temp 2-element array of float m0_array, temp int m1} contains_no_builtin_io})
@@ -73,8 +73,8 @@ output primitive = triangle_strip
 0:?   Linker Objects
 0:?     'tin' (layout(location=0 ) in 3-element array of structure{temp 2-component vector of float tc})
 0:?     'ts' (layout(location=0 ) out structure{temp structure{temp 2-component vector of float tc} psIn, temp structure{temp 2-element array of float m0_array, temp int m1} contains_no_builtin_io})
-0:?     'tin.pos' (in 3-element array of 4-component vector of float Position)
-0:?     'ts.psIn.pos' (out 4-component vector of float Position)
+0:?     'PerVertex_in' (in 3-element array of block{in 4-component vector of float Position tin_pos})
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position ts_psIn_pos})
 
 
 Linked geometry stage:
@@ -118,7 +118,7 @@ output primitive = triangle_strip
 0:30      Sequence
 0:30        Sequence
 0:30          move second child to first child (temp 4-component vector of float)
-0:?             'ts.psIn.pos' (out 4-component vector of float Position)
+0:?             'ts_psIn_pos' (out 4-component vector of float Position)
 0:30            pos: direct index for structure (temp 4-component vector of float)
 0:30              psIn: direct index for structure (temp structure{temp 4-component vector of float pos, temp 2-component vector of float tc})
 0:30                'o' (temp structure{temp structure{temp 4-component vector of float pos, temp 2-component vector of float tc} psIn, temp structure{temp 2-element array of float m0_array, temp int m1} contains_no_builtin_io})
@@ -154,17 +154,17 @@ output primitive = triangle_strip
 0:?   Linker Objects
 0:?     'tin' (layout(location=0 ) in 3-element array of structure{temp 2-component vector of float tc})
 0:?     'ts' (layout(location=0 ) out structure{temp structure{temp 2-component vector of float tc} psIn, temp structure{temp 2-element array of float m0_array, temp int m1} contains_no_builtin_io})
-0:?     'tin.pos' (in 3-element array of 4-component vector of float Position)
-0:?     'ts.psIn.pos' (out 4-component vector of float Position)
+0:?     'PerVertex_in' (in 3-element array of block{in 4-component vector of float Position tin_pos})
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position ts_psIn_pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 64
+// Id's are bound by 68
 
                               Capability Geometry
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Geometry 4  "main" 33 40 60 63
+                              EntryPoint Geometry 4  "main" 33 40 60 64 67
                               ExecutionMode 4 Triangles
                               ExecutionMode 4 Invocations 1
                               ExecutionMode 4 OutputTriangleStrip
@@ -180,7 +180,7 @@ output primitive = triangle_strip
                               MemberName 15(GS_OUT) 0  "psIn"
                               MemberName 15(GS_OUT) 1  "contains_no_builtin_io"
                               Name 17  "o"
-                              Name 33  "ts.psIn.pos"
+                              Name 33  "ts_psIn_pos"
                               Name 36  "PS_IN"
                               MemberName 36(PS_IN) 0  "tc"
                               Name 37  "STRUCT_WITH_NO_BUILTIN_INTERSTAGE_IO"
@@ -193,11 +193,19 @@ output primitive = triangle_strip
                               Name 56  "PS_IN"
                               MemberName 56(PS_IN) 0  "tc"
                               Name 60  "tin"
-                              Name 63  "tin.pos"
-                              Decorate 33(ts.psIn.pos) BuiltIn Position
+                              Name 61  "PerVertex_in"
+                              MemberName 61(PerVertex_in) 0  "tin_pos"
+                              Name 64  "PerVertex_in"
+                              Name 65  "PerVertex_out"
+                              MemberName 65(PerVertex_out) 0  "ts_psIn_pos"
+                              Name 67  "PerVertex_out"
+                              Decorate 33(ts_psIn_pos) BuiltIn Position
                               Decorate 40(ts) Location 0
                               Decorate 60(tin) Location 0
-                              Decorate 63(tin.pos) BuiltIn Position
+                              MemberDecorate 61(PerVertex_in) 0 BuiltIn Position
+                              Decorate 61(PerVertex_in) Block
+                              MemberDecorate 65(PerVertex_out) 0 BuiltIn Position
+                              Decorate 65(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -224,7 +232,7 @@ output primitive = triangle_strip
               29:    8(fvec2) ConstantComposite 27 28
               30:             TypePointer Function 8(fvec2)
               32:             TypePointer Output 7(fvec4)
- 33(ts.psIn.pos):     32(ptr) Variable Output
+ 33(ts_psIn_pos):     32(ptr) Variable Output
        36(PS_IN):             TypeStruct 8(fvec2)
 37(STRUCT_WITH_NO_BUILTIN_INTERSTAGE_IO):             TypeStruct 12 13(int)
       38(GS_OUT):             TypeStruct 36(PS_IN) 37(STRUCT_WITH_NO_BUILTIN_INTERSTAGE_IO)
@@ -240,9 +248,13 @@ output primitive = triangle_strip
               58:             TypeArray 56(PS_IN) 57
               59:             TypePointer Input 58
          60(tin):     59(ptr) Variable Input
-              61:             TypeArray 7(fvec4) 57
-              62:             TypePointer Input 61
-     63(tin.pos):     62(ptr) Variable Input
+61(PerVertex_in):             TypeStruct 7(fvec4)
+              62:             TypeArray 61(PerVertex_in) 57
+              63:             TypePointer Input 62
+64(PerVertex_in):     63(ptr) Variable Input
+65(PerVertex_out):             TypeStruct 7(fvec4)
+              66:             TypePointer Output 65(PerVertex_out)
+67(PerVertex_out):     66(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
            17(o):     16(ptr) Variable Function
@@ -252,7 +264,7 @@ output primitive = triangle_strip
                               Store 31 29
               34:     24(ptr) AccessChain 17(o) 18 18
               35:    7(fvec4) Load 34
-                              Store 33(ts.psIn.pos) 35
+                              Store 33(ts_psIn_pos) 35
               41:     30(ptr) AccessChain 17(o) 18 26
               42:    8(fvec2) Load 41
               44:     43(ptr) AccessChain 40(ts) 18 18
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.geom.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.geom.out
index 8448ac4a8..2ab8fdf71 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.geom.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.geom.out
@@ -29,12 +29,12 @@ output primitive = triangle_strip
 0:18                Constant:
 0:18                  0 (const int)
 0:18              indirect index (temp 4-component vector of float Position)
-0:18                'i.pos' (in 3-element array of 4-component vector of float Position)
+0:18                'i_pos' (in 3-element array of 4-component vector of float Position)
 0:18                'x' (temp int)
 0:19            Sequence
 0:19              Sequence
 0:19                move second child to first child (temp 4-component vector of float)
-0:?                   'ts.pos' (out 4-component vector of float Position)
+0:?                   'ts_pos' (out 4-component vector of float Position)
 0:19                  pos: direct index for structure (temp 4-component vector of float)
 0:19                    'o' (temp structure{temp 4-component vector of float pos})
 0:19                    Constant:
@@ -44,8 +44,8 @@ output primitive = triangle_strip
 0:17          Pre-Increment (temp int)
 0:17            'x' (temp int)
 0:?   Linker Objects
-0:?     'i.pos' (in 3-element array of 4-component vector of float Position)
-0:?     'ts.pos' (out 4-component vector of float Position)
+0:?     'PerVertex_in' (in 3-element array of block{in 4-component vector of float Position i_pos})
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position ts_pos})
 
 
 Linked geometry stage:
@@ -81,12 +81,12 @@ output primitive = triangle_strip
 0:18                Constant:
 0:18                  0 (const int)
 0:18              indirect index (temp 4-component vector of float Position)
-0:18                'i.pos' (in 3-element array of 4-component vector of float Position)
+0:18                'i_pos' (in 3-element array of 4-component vector of float Position)
 0:18                'x' (temp int)
 0:19            Sequence
 0:19              Sequence
 0:19                move second child to first child (temp 4-component vector of float)
-0:?                   'ts.pos' (out 4-component vector of float Position)
+0:?                   'ts_pos' (out 4-component vector of float Position)
 0:19                  pos: direct index for structure (temp 4-component vector of float)
 0:19                    'o' (temp structure{temp 4-component vector of float pos})
 0:19                    Constant:
@@ -96,17 +96,17 @@ output primitive = triangle_strip
 0:17          Pre-Increment (temp int)
 0:17            'x' (temp int)
 0:?   Linker Objects
-0:?     'i.pos' (in 3-element array of 4-component vector of float Position)
-0:?     'ts.pos' (out 4-component vector of float Position)
+0:?     'PerVertex_in' (in 3-element array of block{in 4-component vector of float Position i_pos})
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position ts_pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 42
+// Id's are bound by 49
 
                               Capability Geometry
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Geometry 4  "main" 28 36
+                              EntryPoint Geometry 4  "main" 28 36 45 48
                               ExecutionMode 4 Triangles
                               ExecutionMode 4 Invocations 1
                               ExecutionMode 4 OutputTriangleStrip
@@ -116,10 +116,20 @@ output primitive = triangle_strip
                               Name 21  "GS_OUT"
                               MemberName 21(GS_OUT) 0  "pos"
                               Name 23  "o"
-                              Name 28  "i.pos"
-                              Name 36  "ts.pos"
-                              Decorate 28(i.pos) BuiltIn Position
-                              Decorate 36(ts.pos) BuiltIn Position
+                              Name 28  "i_pos"
+                              Name 36  "ts_pos"
+                              Name 42  "PerVertex_in"
+                              MemberName 42(PerVertex_in) 0  "i_pos"
+                              Name 45  "PerVertex_in"
+                              Name 46  "PerVertex_out"
+                              MemberName 46(PerVertex_out) 0  "ts_pos"
+                              Name 48  "PerVertex_out"
+                              Decorate 28(i_pos) BuiltIn Position
+                              Decorate 36(ts_pos) BuiltIn Position
+                              MemberDecorate 42(PerVertex_in) 0 BuiltIn Position
+                              Decorate 42(PerVertex_in) Block
+                              MemberDecorate 46(PerVertex_out) 0 BuiltIn Position
+                              Decorate 46(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 1
@@ -135,12 +145,19 @@ output primitive = triangle_strip
               25:     24(int) Constant 3
               26:             TypeArray 20(fvec4) 25
               27:             TypePointer Input 26
-       28(i.pos):     27(ptr) Variable Input
+       28(i_pos):     27(ptr) Variable Input
               30:             TypePointer Input 20(fvec4)
               33:             TypePointer Function 20(fvec4)
               35:             TypePointer Output 20(fvec4)
-      36(ts.pos):     35(ptr) Variable Output
+      36(ts_pos):     35(ptr) Variable Output
               40:      6(int) Constant 1
+42(PerVertex_in):             TypeStruct 20(fvec4)
+              43:             TypeArray 42(PerVertex_in) 25
+              44:             TypePointer Input 43
+45(PerVertex_in):     44(ptr) Variable Input
+46(PerVertex_out):             TypeStruct 20(fvec4)
+              47:             TypePointer Output 46(PerVertex_out)
+48(PerVertex_out):     47(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
             8(x):      7(ptr) Variable Function
@@ -156,13 +173,13 @@ output primitive = triangle_strip
                               BranchConditional 18 11 12
               11:               Label
               29:      6(int)   Load 8(x)
-              31:     30(ptr)   AccessChain 28(i.pos) 29
+              31:     30(ptr)   AccessChain 28(i_pos) 29
               32:   20(fvec4)   Load 31
               34:     33(ptr)   AccessChain 23(o) 9
                                 Store 34 32
               37:     33(ptr)   AccessChain 23(o) 9
               38:   20(fvec4)   Load 37
-                                Store 36(ts.pos) 38
+                                Store 36(ts_pos) 38
                                 EmitVertex
                                 Branch 13
               13:               Label
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.vert.out b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.vert.out
index bdf156c4f..29fb9f502 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.vert.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.struct.split.trivial.vert.out
@@ -26,7 +26,7 @@ Shader version: 450
 0:?   Linker Objects
 0:?     'Pos_in' (in 4-component vector of float Position)
 0:?     'Pos_loose' (in 4-component vector of float Position)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 
 Linked vertex stage:
@@ -59,16 +59,16 @@ Shader version: 450
 0:?   Linker Objects
 0:?     'Pos_in' (in 4-component vector of float Position)
 0:?     'Pos_loose' (in 4-component vector of float Position)
-0:?     'Pos' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position Pos})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 26
+// Id's are bound by 29
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 14 16 22
+                              EntryPoint Vertex 4  "main" 14 16 22 28
                               Name 4  "main"
                               Name 8  "VS_OUTPUT"
                               MemberName 8(VS_OUTPUT) 0  "Pos"
@@ -76,9 +76,14 @@ Shader version: 450
                               Name 14  "Pos_in"
                               Name 16  "Pos_loose"
                               Name 22  "Pos"
+                              Name 26  "PerVertex_out"
+                              MemberName 26(PerVertex_out) 0  "Pos"
+                              Name 28  "PerVertex_out"
                               Decorate 14(Pos_in) BuiltIn Position
                               Decorate 16(Pos_loose) BuiltIn Position
                               Decorate 22(Pos) BuiltIn Position
+                              MemberDecorate 26(PerVertex_out) 0 BuiltIn Position
+                              Decorate 26(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -93,6 +98,9 @@ Shader version: 450
               19:             TypePointer Function 7(fvec4)
               21:             TypePointer Output 7(fvec4)
          22(Pos):     21(ptr) Variable Output
+26(PerVertex_out):             TypeStruct 7(fvec4)
+              27:             TypePointer Output 26(PerVertex_out)
+28(PerVertex_out):     27(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
        10(vsout):      9(ptr) Variable Function
diff --git a/3rdparty/glslang/Test/baseResults/hlsl.structarray.flatten.geom.out b/3rdparty/glslang/Test/baseResults/hlsl.structarray.flatten.geom.out
index 0922bae86..626ab8dde 100644
--- a/3rdparty/glslang/Test/baseResults/hlsl.structarray.flatten.geom.out
+++ b/3rdparty/glslang/Test/baseResults/hlsl.structarray.flatten.geom.out
@@ -49,7 +49,7 @@ output primitive = triangle_strip
 0:22      Sequence
 0:22        Sequence
 0:22          move second child to first child (temp 4-component vector of float)
-0:?             'outStream.position' (out 4-component vector of float Position)
+0:?             'outStream_position' (out 4-component vector of float Position)
 0:22            position: direct index for structure (temp 4-component vector of float)
 0:22              'vout' (temp structure{temp 4-component vector of float position, temp 4-component vector of float color, temp 2-component vector of float uv})
 0:22              Constant:
@@ -76,7 +76,7 @@ output primitive = triangle_strip
 0:?   Linker Objects
 0:?     'vin' (layout(location=0 ) in 2-element array of structure{temp 4-component vector of float position, temp 4-component vector of float color, temp 2-component vector of float uv})
 0:?     'outStream' (layout(location=0 ) out structure{temp 4-component vector of float color, temp 2-component vector of float uv})
-0:?     'outStream.position' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position outStream_position})
 
 
 Linked geometry stage:
@@ -132,7 +132,7 @@ output primitive = triangle_strip
 0:22      Sequence
 0:22        Sequence
 0:22          move second child to first child (temp 4-component vector of float)
-0:?             'outStream.position' (out 4-component vector of float Position)
+0:?             'outStream_position' (out 4-component vector of float Position)
 0:22            position: direct index for structure (temp 4-component vector of float)
 0:22              'vout' (temp structure{temp 4-component vector of float position, temp 4-component vector of float color, temp 2-component vector of float uv})
 0:22              Constant:
@@ -159,16 +159,16 @@ output primitive = triangle_strip
 0:?   Linker Objects
 0:?     'vin' (layout(location=0 ) in 2-element array of structure{temp 4-component vector of float position, temp 4-component vector of float color, temp 2-component vector of float uv})
 0:?     'outStream' (layout(location=0 ) out structure{temp 4-component vector of float color, temp 2-component vector of float uv})
-0:?     'outStream.position' (out 4-component vector of float Position)
+0:?     'PerVertex_out' (out block{out 4-component vector of float Position outStream_position})
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 49
+// Id's are bound by 52
 
                               Capability Geometry
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Geometry 4  "main" 19 36 41
+                              EntryPoint Geometry 4  "main" 19 36 41 51
                               ExecutionMode 4 InputLines
                               ExecutionMode 4 Invocations 1
                               ExecutionMode 4 OutputTriangleStrip
@@ -184,14 +184,19 @@ output primitive = triangle_strip
                               MemberName 14(VertexData) 1  "color"
                               MemberName 14(VertexData) 2  "uv"
                               Name 19  "vin"
-                              Name 36  "outStream.position"
+                              Name 36  "outStream_position"
                               Name 39  "PS_IN"
                               MemberName 39(PS_IN) 0  "color"
                               MemberName 39(PS_IN) 1  "uv"
                               Name 41  "outStream"
+                              Name 49  "PerVertex_out"
+                              MemberName 49(PerVertex_out) 0  "outStream_position"
+                              Name 51  "PerVertex_out"
                               Decorate 19(vin) Location 0
-                              Decorate 36(outStream.position) BuiltIn Position
+                              Decorate 36(outStream_position) BuiltIn Position
                               Decorate 41(outStream) Location 0
+                              MemberDecorate 49(PerVertex_out) 0 BuiltIn Position
+                              Decorate 49(PerVertex_out) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -214,11 +219,14 @@ output primitive = triangle_strip
               29:             TypePointer Function 8(fvec2)
               31:     12(int) Constant 0
               35:             TypePointer Output 7(fvec4)
-36(outStream.position):     35(ptr) Variable Output
+36(outStream_position):     35(ptr) Variable Output
        39(PS_IN):             TypeStruct 7(fvec4) 8(fvec2)
               40:             TypePointer Output 39(PS_IN)
    41(outStream):     40(ptr) Variable Output
               47:             TypePointer Output 8(fvec2)
+49(PerVertex_out):             TypeStruct 7(fvec4)
+              50:             TypePointer Output 49(PerVertex_out)
+51(PerVertex_out):     50(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
         11(vout):     10(ptr) Variable Function
@@ -236,7 +244,7 @@ output primitive = triangle_strip
                               Store 34 33
               37:     23(ptr) AccessChain 11(vout) 31
               38:    7(fvec4) Load 37
-                              Store 36(outStream.position) 38
+                              Store 36(outStream_position) 38
               42:     23(ptr) AccessChain 11(vout) 13
               43:    7(fvec4) Load 42
               44:     35(ptr) AccessChain 41(outStream) 31
diff --git a/3rdparty/glslang/Test/baseResults/specExamples.frag.out b/3rdparty/glslang/Test/baseResults/specExamples.frag.out
index a66144a7c..f660e66f7 100644
--- a/3rdparty/glslang/Test/baseResults/specExamples.frag.out
+++ b/3rdparty/glslang/Test/baseResults/specExamples.frag.out
@@ -22,14 +22,14 @@ ERROR: 0:172: '[]' : scalar integer expression required
 ERROR: 0:175: 'x' : undeclared identifier 
 ERROR: 0:175: '[]' : scalar integer expression required 
 ERROR: 0:175: 'b' :  left of '[' is not of type array, matrix, or vector  
-ERROR: 0:175: 'a' : vector field selection out of range 
+ERROR: 0:175: 'a' : vector swizzle selection out of range 
 ERROR: 0:175: 'length' : does not operate on this type: const float
 ERROR: 0:175: '' : function call, method, or subroutine call expected 
 ERROR: 0:175: '' : no matching overloaded function found 
 ERROR: 0:178: '[]' : scalar integer expression required 
 ERROR: 0:178: 's' : undeclared identifier 
 ERROR: 0:178: 's' :  left of '[' is not of type array, matrix, or vector  
-ERROR: 0:178: 'a' : vector field selection out of range 
+ERROR: 0:178: 'a' : vector swizzle selection out of range 
 ERROR: 0:178: 'length' : does not operate on this type: const float
 ERROR: 0:178: '' : function call, method, or subroutine call expected 
 ERROR: 0:178: '' : no matching overloaded function found 
diff --git a/3rdparty/glslang/Test/baseResults/spv.shaderBallotAMD.comp.out b/3rdparty/glslang/Test/baseResults/spv.shaderBallotAMD.comp.out
new file mode 100644
index 000000000..bb7f8c162
--- /dev/null
+++ b/3rdparty/glslang/Test/baseResults/spv.shaderBallotAMD.comp.out
@@ -0,0 +1,1217 @@
+spv.shaderBallotAMD.comp
+Warning, version 450 is not yet complete; most version-specific features are present, but some are missing.
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 1048
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Float64
+                              Capability Int64
+                              Capability Groups
+                              Extension  "SPV_AMD_gpu_shader_half_float"
+                              Extension  "SPV_AMD_shader_ballot"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 8 8 1
+                              Source GLSL 450
+                              SourceExtension  "GL_AMD_gpu_shader_half_float"
+                              SourceExtension  "GL_AMD_shader_ballot"
+                              SourceExtension  "GL_ARB_gpu_shader_int64"
+                              Name 4  "main"
+                              Name 18  "Buffers"
+                              MemberName 18(Buffers) 0  "i"
+                              MemberName 18(Buffers) 1  "uv"
+                              MemberName 18(Buffers) 2  "fv"
+                              MemberName 18(Buffers) 3  "dv"
+                              MemberName 18(Buffers) 4  "i64"
+                              MemberName 18(Buffers) 5  "u64v"
+                              MemberName 18(Buffers) 6  "f16v"
+                              Name 20  ""
+                              MemberDecorate 18(Buffers) 0 Offset 0
+                              MemberDecorate 18(Buffers) 1 Offset 8
+                              MemberDecorate 18(Buffers) 2 Offset 16
+                              MemberDecorate 18(Buffers) 3 Offset 32
+                              MemberDecorate 18(Buffers) 4 Offset 64
+                              MemberDecorate 18(Buffers) 5 Offset 80
+                              MemberDecorate 18(Buffers) 6 Offset 96
+                              Decorate 18(Buffers) BufferBlock
+                              Decorate 20 DescriptorSet 0
+                              Decorate 20 Binding 0
+                              Decorate 1047 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 1
+               7:             TypeInt 32 0
+               8:             TypeVector 7(int) 2
+               9:             TypeFloat 32
+              10:             TypeVector 9(float) 3
+              11:             TypeFloat 64
+              12:             TypeVector 11(float) 4
+              13:             TypeInt 64 1
+              14:             TypeInt 64 0
+              15:             TypeVector 14(int) 2
+              16:             TypeFloat 16
+              17:             TypeVector 16(float) 3
+     18(Buffers):             TypeStruct 6(int) 8(ivec2) 10(fvec3) 12(fvec4) 13(int) 15(ivec2) 17(fvec3)
+              19:             TypePointer Uniform 18(Buffers)
+              20:     19(ptr) Variable Uniform
+              21:      6(int) Constant 0
+              22:             TypePointer Uniform 6(int)
+              25:      7(int) Constant 3
+              28:      6(int) Constant 1
+              29:             TypePointer Uniform 8(ivec2)
+              38:      6(int) Constant 2
+              39:             TypePointer Uniform 10(fvec3)
+              50:      6(int) Constant 3
+              51:             TypePointer Uniform 12(fvec4)
+              64:      6(int) Constant 4
+              65:             TypePointer Uniform 13(int)
+              70:      6(int) Constant 5
+              71:             TypePointer Uniform 15(ivec2)
+              80:      6(int) Constant 6
+              81:             TypePointer Uniform 17(fvec3)
+            1044:             TypeVector 7(int) 3
+            1045:      7(int) Constant 8
+            1046:      7(int) Constant 1
+            1047: 1044(ivec3) ConstantComposite 1045 1045 1046
+         4(main):           2 Function None 3
+               5:             Label
+              23:     22(ptr) AccessChain 20 21
+              24:      6(int) Load 23
+              26:      6(int) GroupSMin 25 Reduce 24
+              27:     22(ptr) AccessChain 20 21
+                              Store 27 26
+              30:     29(ptr) AccessChain 20 28
+              31:    8(ivec2) Load 30
+              32:      7(int) CompositeExtract 31 0
+              33:      7(int) GroupUMin 25 Reduce 32
+              34:      7(int) CompositeExtract 31 1
+              35:      7(int) GroupUMin 25 Reduce 34
+              36:    8(ivec2) CompositeConstruct 33 35
+              37:     29(ptr) AccessChain 20 28
+                              Store 37 36
+              40:     39(ptr) AccessChain 20 38
+              41:   10(fvec3) Load 40
+              42:    9(float) CompositeExtract 41 0
+              43:    9(float) GroupFMin 25 Reduce 42
+              44:    9(float) CompositeExtract 41 1
+              45:    9(float) GroupFMin 25 Reduce 44
+              46:    9(float) CompositeExtract 41 2
+              47:    9(float) GroupFMin 25 Reduce 46
+              48:   10(fvec3) CompositeConstruct 43 45 47
+              49:     39(ptr) AccessChain 20 38
+                              Store 49 48
+              52:     51(ptr) AccessChain 20 50
+              53:   12(fvec4) Load 52
+              54:   11(float) CompositeExtract 53 0
+              55:   11(float) GroupFMin 25 Reduce 54
+              56:   11(float) CompositeExtract 53 1
+              57:   11(float) GroupFMin 25 Reduce 56
+              58:   11(float) CompositeExtract 53 2
+              59:   11(float) GroupFMin 25 Reduce 58
+              60:   11(float) CompositeExtract 53 3
+              61:   11(float) GroupFMin 25 Reduce 60
+              62:   12(fvec4) CompositeConstruct 55 57 59 61
+              63:     51(ptr) AccessChain 20 50
+                              Store 63 62
+              66:     65(ptr) AccessChain 20 64
+              67:     13(int) Load 66
+              68:     13(int) GroupSMin 25 Reduce 67
+              69:     65(ptr) AccessChain 20 64
+                              Store 69 68
+              72:     71(ptr) AccessChain 20 70
+              73:   15(ivec2) Load 72
+              74:     14(int) CompositeExtract 73 0
+              75:     14(int) GroupUMin 25 Reduce 74
+              76:     14(int) CompositeExtract 73 1
+              77:     14(int) GroupUMin 25 Reduce 76
+              78:   15(ivec2) CompositeConstruct 75 77
+              79:     71(ptr) AccessChain 20 70
+                              Store 79 78
+              82:     81(ptr) AccessChain 20 80
+              83:   17(fvec3) Load 82
+              84:   16(float) CompositeExtract 83 0
+              85:   16(float) GroupFMin 25 Reduce 84
+              86:   16(float) CompositeExtract 83 1
+              87:   16(float) GroupFMin 25 Reduce 86
+              88:   16(float) CompositeExtract 83 2
+              89:   16(float) GroupFMin 25 Reduce 88
+              90:   17(fvec3) CompositeConstruct 85 87 89
+              91:     81(ptr) AccessChain 20 80
+                              Store 91 90
+              92:     22(ptr) AccessChain 20 21
+              93:      6(int) Load 92
+              94:      6(int) GroupSMax 25 Reduce 93
+              95:     22(ptr) AccessChain 20 21
+                              Store 95 94
+              96:     29(ptr) AccessChain 20 28
+              97:    8(ivec2) Load 96
+              98:      7(int) CompositeExtract 97 0
+              99:      7(int) GroupUMax 25 Reduce 98
+             100:      7(int) CompositeExtract 97 1
+             101:      7(int) GroupUMax 25 Reduce 100
+             102:    8(ivec2) CompositeConstruct 99 101
+             103:     29(ptr) AccessChain 20 28
+                              Store 103 102
+             104:     39(ptr) AccessChain 20 38
+             105:   10(fvec3) Load 104
+             106:    9(float) CompositeExtract 105 0
+             107:    9(float) GroupFMax 25 Reduce 106
+             108:    9(float) CompositeExtract 105 1
+             109:    9(float) GroupFMax 25 Reduce 108
+             110:    9(float) CompositeExtract 105 2
+             111:    9(float) GroupFMax 25 Reduce 110
+             112:   10(fvec3) CompositeConstruct 107 109 111
+             113:     39(ptr) AccessChain 20 38
+                              Store 113 112
+             114:     51(ptr) AccessChain 20 50
+             115:   12(fvec4) Load 114
+             116:   11(float) CompositeExtract 115 0
+             117:   11(float) GroupFMax 25 Reduce 116
+             118:   11(float) CompositeExtract 115 1
+             119:   11(float) GroupFMax 25 Reduce 118
+             120:   11(float) CompositeExtract 115 2
+             121:   11(float) GroupFMax 25 Reduce 120
+             122:   11(float) CompositeExtract 115 3
+             123:   11(float) GroupFMax 25 Reduce 122
+             124:   12(fvec4) CompositeConstruct 117 119 121 123
+             125:     51(ptr) AccessChain 20 50
+                              Store 125 124
+             126:     65(ptr) AccessChain 20 64
+             127:     13(int) Load 126
+             128:     13(int) GroupSMax 25 Reduce 127
+             129:     65(ptr) AccessChain 20 64
+                              Store 129 128
+             130:     71(ptr) AccessChain 20 70
+             131:   15(ivec2) Load 130
+             132:     14(int) CompositeExtract 131 0
+             133:     14(int) GroupUMax 25 Reduce 132
+             134:     14(int) CompositeExtract 131 1
+             135:     14(int) GroupUMax 25 Reduce 134
+             136:   15(ivec2) CompositeConstruct 133 135
+             137:     71(ptr) AccessChain 20 70
+                              Store 137 136
+             138:     81(ptr) AccessChain 20 80
+             139:   17(fvec3) Load 138
+             140:   16(float) CompositeExtract 139 0
+             141:   16(float) GroupFMax 25 Reduce 140
+             142:   16(float) CompositeExtract 139 1
+             143:   16(float) GroupFMax 25 Reduce 142
+             144:   16(float) CompositeExtract 139 2
+             145:   16(float) GroupFMax 25 Reduce 144
+             146:   17(fvec3) CompositeConstruct 141 143 145
+             147:     81(ptr) AccessChain 20 80
+                              Store 147 146
+             148:     22(ptr) AccessChain 20 21
+             149:      6(int) Load 148
+             150:      6(int) GroupIAdd 25 Reduce 149
+             151:     22(ptr) AccessChain 20 21
+                              Store 151 150
+             152:     29(ptr) AccessChain 20 28
+             153:    8(ivec2) Load 152
+             154:      7(int) CompositeExtract 153 0
+             155:      7(int) GroupIAdd 25 Reduce 154
+             156:      7(int) CompositeExtract 153 1
+             157:      7(int) GroupIAdd 25 Reduce 156
+             158:    8(ivec2) CompositeConstruct 155 157
+             159:     29(ptr) AccessChain 20 28
+                              Store 159 158
+             160:     39(ptr) AccessChain 20 38
+             161:   10(fvec3) Load 160
+             162:    9(float) CompositeExtract 161 0
+             163:    9(float) GroupFAdd 25 Reduce 162
+             164:    9(float) CompositeExtract 161 1
+             165:    9(float) GroupFAdd 25 Reduce 164
+             166:    9(float) CompositeExtract 161 2
+             167:    9(float) GroupFAdd 25 Reduce 166
+             168:   10(fvec3) CompositeConstruct 163 165 167
+             169:     39(ptr) AccessChain 20 38
+                              Store 169 168
+             170:     51(ptr) AccessChain 20 50
+             171:   12(fvec4) Load 170
+             172:   11(float) CompositeExtract 171 0
+             173:   11(float) GroupFAdd 25 Reduce 172
+             174:   11(float) CompositeExtract 171 1
+             175:   11(float) GroupFAdd 25 Reduce 174
+             176:   11(float) CompositeExtract 171 2
+             177:   11(float) GroupFAdd 25 Reduce 176
+             178:   11(float) CompositeExtract 171 3
+             179:   11(float) GroupFAdd 25 Reduce 178
+             180:   12(fvec4) CompositeConstruct 173 175 177 179
+             181:     51(ptr) AccessChain 20 50
+                              Store 181 180
+             182:     65(ptr) AccessChain 20 64
+             183:     13(int) Load 182
+             184:     13(int) GroupIAdd 25 Reduce 183
+             185:     65(ptr) AccessChain 20 64
+                              Store 185 184
+             186:     71(ptr) AccessChain 20 70
+             187:   15(ivec2) Load 186
+             188:     14(int) CompositeExtract 187 0
+             189:     14(int) GroupIAdd 25 Reduce 188
+             190:     14(int) CompositeExtract 187 1
+             191:     14(int) GroupIAdd 25 Reduce 190
+             192:   15(ivec2) CompositeConstruct 189 191
+             193:     71(ptr) AccessChain 20 70
+                              Store 193 192
+             194:     81(ptr) AccessChain 20 80
+             195:   17(fvec3) Load 194
+             196:   16(float) CompositeExtract 195 0
+             197:   16(float) GroupFAdd 25 Reduce 196
+             198:   16(float) CompositeExtract 195 1
+             199:   16(float) GroupFAdd 25 Reduce 198
+             200:   16(float) CompositeExtract 195 2
+             201:   16(float) GroupFAdd 25 Reduce 200
+             202:   17(fvec3) CompositeConstruct 197 199 201
+             203:     81(ptr) AccessChain 20 80
+                              Store 203 202
+             204:     22(ptr) AccessChain 20 21
+             205:      6(int) Load 204
+             206:      6(int) GroupSMinNonUniformAMD 25 Reduce 205
+             207:     22(ptr) AccessChain 20 21
+                              Store 207 206
+             208:     29(ptr) AccessChain 20 28
+             209:    8(ivec2) Load 208
+             210:      7(int) CompositeExtract 209 0
+             211:      7(int) GroupUMinNonUniformAMD 25 Reduce 210
+             212:      7(int) CompositeExtract 209 1
+             213:      7(int) GroupUMinNonUniformAMD 25 Reduce 212
+             214:    8(ivec2) CompositeConstruct 211 213
+             215:     29(ptr) AccessChain 20 28
+                              Store 215 214
+             216:     39(ptr) AccessChain 20 38
+             217:   10(fvec3) Load 216
+             218:    9(float) CompositeExtract 217 0
+             219:    9(float) GroupFMinNonUniformAMD 25 Reduce 218
+             220:    9(float) CompositeExtract 217 1
+             221:    9(float) GroupFMinNonUniformAMD 25 Reduce 220
+             222:    9(float) CompositeExtract 217 2
+             223:    9(float) GroupFMinNonUniformAMD 25 Reduce 222
+             224:   10(fvec3) CompositeConstruct 219 221 223
+             225:     39(ptr) AccessChain 20 38
+                              Store 225 224
+             226:     51(ptr) AccessChain 20 50
+             227:   12(fvec4) Load 226
+             228:   11(float) CompositeExtract 227 0
+             229:   11(float) GroupFMinNonUniformAMD 25 Reduce 228
+             230:   11(float) CompositeExtract 227 1
+             231:   11(float) GroupFMinNonUniformAMD 25 Reduce 230
+             232:   11(float) CompositeExtract 227 2
+             233:   11(float) GroupFMinNonUniformAMD 25 Reduce 232
+             234:   11(float) CompositeExtract 227 3
+             235:   11(float) GroupFMinNonUniformAMD 25 Reduce 234
+             236:   12(fvec4) CompositeConstruct 229 231 233 235
+             237:     51(ptr) AccessChain 20 50
+                              Store 237 236
+             238:     65(ptr) AccessChain 20 64
+             239:     13(int) Load 238
+             240:     13(int) GroupSMinNonUniformAMD 25 Reduce 239
+             241:     65(ptr) AccessChain 20 64
+                              Store 241 240
+             242:     71(ptr) AccessChain 20 70
+             243:   15(ivec2) Load 242
+             244:     14(int) CompositeExtract 243 0
+             245:     14(int) GroupUMinNonUniformAMD 25 Reduce 244
+             246:     14(int) CompositeExtract 243 1
+             247:     14(int) GroupUMinNonUniformAMD 25 Reduce 246
+             248:   15(ivec2) CompositeConstruct 245 247
+             249:     71(ptr) AccessChain 20 70
+                              Store 249 248
+             250:     81(ptr) AccessChain 20 80
+             251:   17(fvec3) Load 250
+             252:   16(float) CompositeExtract 251 0
+             253:   16(float) GroupFMinNonUniformAMD 25 Reduce 252
+             254:   16(float) CompositeExtract 251 1
+             255:   16(float) GroupFMinNonUniformAMD 25 Reduce 254
+             256:   16(float) CompositeExtract 251 2
+             257:   16(float) GroupFMinNonUniformAMD 25 Reduce 256
+             258:   17(fvec3) CompositeConstruct 253 255 257
+             259:     81(ptr) AccessChain 20 80
+                              Store 259 258
+             260:     22(ptr) AccessChain 20 21
+             261:      6(int) Load 260
+             262:      6(int) GroupSMaxNonUniformAMD 25 Reduce 261
+             263:     22(ptr) AccessChain 20 21
+                              Store 263 262
+             264:     29(ptr) AccessChain 20 28
+             265:    8(ivec2) Load 264
+             266:      7(int) CompositeExtract 265 0
+             267:      7(int) GroupUMaxNonUniformAMD 25 Reduce 266
+             268:      7(int) CompositeExtract 265 1
+             269:      7(int) GroupUMaxNonUniformAMD 25 Reduce 268
+             270:    8(ivec2) CompositeConstruct 267 269
+             271:     29(ptr) AccessChain 20 28
+                              Store 271 270
+             272:     39(ptr) AccessChain 20 38
+             273:   10(fvec3) Load 272
+             274:    9(float) CompositeExtract 273 0
+             275:    9(float) GroupFMaxNonUniformAMD 25 Reduce 274
+             276:    9(float) CompositeExtract 273 1
+             277:    9(float) GroupFMaxNonUniformAMD 25 Reduce 276
+             278:    9(float) CompositeExtract 273 2
+             279:    9(float) GroupFMaxNonUniformAMD 25 Reduce 278
+             280:   10(fvec3) CompositeConstruct 275 277 279
+             281:     39(ptr) AccessChain 20 38
+                              Store 281 280
+             282:     51(ptr) AccessChain 20 50
+             283:   12(fvec4) Load 282
+             284:   11(float) CompositeExtract 283 0
+             285:   11(float) GroupFMaxNonUniformAMD 25 Reduce 284
+             286:   11(float) CompositeExtract 283 1
+             287:   11(float) GroupFMaxNonUniformAMD 25 Reduce 286
+             288:   11(float) CompositeExtract 283 2
+             289:   11(float) GroupFMaxNonUniformAMD 25 Reduce 288
+             290:   11(float) CompositeExtract 283 3
+             291:   11(float) GroupFMaxNonUniformAMD 25 Reduce 290
+             292:   12(fvec4) CompositeConstruct 285 287 289 291
+             293:     51(ptr) AccessChain 20 50
+                              Store 293 292
+             294:     65(ptr) AccessChain 20 64
+             295:     13(int) Load 294
+             296:     13(int) GroupSMaxNonUniformAMD 25 Reduce 295
+             297:     65(ptr) AccessChain 20 64
+                              Store 297 296
+             298:     71(ptr) AccessChain 20 70
+             299:   15(ivec2) Load 298
+             300:     14(int) CompositeExtract 299 0
+             301:     14(int) GroupUMaxNonUniformAMD 25 Reduce 300
+             302:     14(int) CompositeExtract 299 1
+             303:     14(int) GroupUMaxNonUniformAMD 25 Reduce 302
+             304:   15(ivec2) CompositeConstruct 301 303
+             305:     71(ptr) AccessChain 20 70
+                              Store 305 304
+             306:     81(ptr) AccessChain 20 80
+             307:   17(fvec3) Load 306
+             308:   16(float) CompositeExtract 307 0
+             309:   16(float) GroupFMaxNonUniformAMD 25 Reduce 308
+             310:   16(float) CompositeExtract 307 1
+             311:   16(float) GroupFMaxNonUniformAMD 25 Reduce 310
+             312:   16(float) CompositeExtract 307 2
+             313:   16(float) GroupFMaxNonUniformAMD 25 Reduce 312
+             314:   17(fvec3) CompositeConstruct 309 311 313
+             315:     81(ptr) AccessChain 20 80
+                              Store 315 314
+             316:     22(ptr) AccessChain 20 21
+             317:      6(int) Load 316
+             318:      6(int) GroupIAddNonUniformAMD 25 Reduce 317
+             319:     22(ptr) AccessChain 20 21
+                              Store 319 318
+             320:     29(ptr) AccessChain 20 28
+             321:    8(ivec2) Load 320
+             322:      7(int) CompositeExtract 321 0
+             323:      7(int) GroupIAddNonUniformAMD 25 Reduce 322
+             324:      7(int) CompositeExtract 321 1
+             325:      7(int) GroupIAddNonUniformAMD 25 Reduce 324
+             326:    8(ivec2) CompositeConstruct 323 325
+             327:     29(ptr) AccessChain 20 28
+                              Store 327 326
+             328:     39(ptr) AccessChain 20 38
+             329:   10(fvec3) Load 328
+             330:    9(float) CompositeExtract 329 0
+             331:    9(float) GroupFAddNonUniformAMD 25 Reduce 330
+             332:    9(float) CompositeExtract 329 1
+             333:    9(float) GroupFAddNonUniformAMD 25 Reduce 332
+             334:    9(float) CompositeExtract 329 2
+             335:    9(float) GroupFAddNonUniformAMD 25 Reduce 334
+             336:   10(fvec3) CompositeConstruct 331 333 335
+             337:     39(ptr) AccessChain 20 38
+                              Store 337 336
+             338:     51(ptr) AccessChain 20 50
+             339:   12(fvec4) Load 338
+             340:   11(float) CompositeExtract 339 0
+             341:   11(float) GroupFAddNonUniformAMD 25 Reduce 340
+             342:   11(float) CompositeExtract 339 1
+             343:   11(float) GroupFAddNonUniformAMD 25 Reduce 342
+             344:   11(float) CompositeExtract 339 2
+             345:   11(float) GroupFAddNonUniformAMD 25 Reduce 344
+             346:   11(float) CompositeExtract 339 3
+             347:   11(float) GroupFAddNonUniformAMD 25 Reduce 346
+             348:   12(fvec4) CompositeConstruct 341 343 345 347
+             349:     51(ptr) AccessChain 20 50
+                              Store 349 348
+             350:     65(ptr) AccessChain 20 64
+             351:     13(int) Load 350
+             352:     13(int) GroupIAddNonUniformAMD 25 Reduce 351
+             353:     65(ptr) AccessChain 20 64
+                              Store 353 352
+             354:     71(ptr) AccessChain 20 70
+             355:   15(ivec2) Load 354
+             356:     14(int) CompositeExtract 355 0
+             357:     14(int) GroupIAddNonUniformAMD 25 Reduce 356
+             358:     14(int) CompositeExtract 355 1
+             359:     14(int) GroupIAddNonUniformAMD 25 Reduce 358
+             360:   15(ivec2) CompositeConstruct 357 359
+             361:     71(ptr) AccessChain 20 70
+                              Store 361 360
+             362:     81(ptr) AccessChain 20 80
+             363:   17(fvec3) Load 362
+             364:   16(float) CompositeExtract 363 0
+             365:   16(float) GroupFAddNonUniformAMD 25 Reduce 364
+             366:   16(float) CompositeExtract 363 1
+             367:   16(float) GroupFAddNonUniformAMD 25 Reduce 366
+             368:   16(float) CompositeExtract 363 2
+             369:   16(float) GroupFAddNonUniformAMD 25 Reduce 368
+             370:   17(fvec3) CompositeConstruct 365 367 369
+             371:     81(ptr) AccessChain 20 80
+                              Store 371 370
+             372:     22(ptr) AccessChain 20 21
+             373:      6(int) Load 372
+             374:      6(int) GroupSMin 25 InclusiveScan 373
+             375:     22(ptr) AccessChain 20 21
+                              Store 375 374
+             376:     29(ptr) AccessChain 20 28
+             377:    8(ivec2) Load 376
+             378:      7(int) CompositeExtract 377 0
+             379:      7(int) GroupUMin 25 InclusiveScan 378
+             380:      7(int) CompositeExtract 377 1
+             381:      7(int) GroupUMin 25 InclusiveScan 380
+             382:    8(ivec2) CompositeConstruct 379 381
+             383:     29(ptr) AccessChain 20 28
+                              Store 383 382
+             384:     39(ptr) AccessChain 20 38
+             385:   10(fvec3) Load 384
+             386:    9(float) CompositeExtract 385 0
+             387:    9(float) GroupFMin 25 InclusiveScan 386
+             388:    9(float) CompositeExtract 385 1
+             389:    9(float) GroupFMin 25 InclusiveScan 388
+             390:    9(float) CompositeExtract 385 2
+             391:    9(float) GroupFMin 25 InclusiveScan 390
+             392:   10(fvec3) CompositeConstruct 387 389 391
+             393:     39(ptr) AccessChain 20 38
+                              Store 393 392
+             394:     51(ptr) AccessChain 20 50
+             395:   12(fvec4) Load 394
+             396:   11(float) CompositeExtract 395 0
+             397:   11(float) GroupFMin 25 InclusiveScan 396
+             398:   11(float) CompositeExtract 395 1
+             399:   11(float) GroupFMin 25 InclusiveScan 398
+             400:   11(float) CompositeExtract 395 2
+             401:   11(float) GroupFMin 25 InclusiveScan 400
+             402:   11(float) CompositeExtract 395 3
+             403:   11(float) GroupFMin 25 InclusiveScan 402
+             404:   12(fvec4) CompositeConstruct 397 399 401 403
+             405:     51(ptr) AccessChain 20 50
+                              Store 405 404
+             406:     65(ptr) AccessChain 20 64
+             407:     13(int) Load 406
+             408:     13(int) GroupSMin 25 InclusiveScan 407
+             409:     65(ptr) AccessChain 20 64
+                              Store 409 408
+             410:     71(ptr) AccessChain 20 70
+             411:   15(ivec2) Load 410
+             412:     14(int) CompositeExtract 411 0
+             413:     14(int) GroupUMin 25 InclusiveScan 412
+             414:     14(int) CompositeExtract 411 1
+             415:     14(int) GroupUMin 25 InclusiveScan 414
+             416:   15(ivec2) CompositeConstruct 413 415
+             417:     71(ptr) AccessChain 20 70
+                              Store 417 416
+             418:     81(ptr) AccessChain 20 80
+             419:   17(fvec3) Load 418
+             420:   16(float) CompositeExtract 419 0
+             421:   16(float) GroupFMin 25 InclusiveScan 420
+             422:   16(float) CompositeExtract 419 1
+             423:   16(float) GroupFMin 25 InclusiveScan 422
+             424:   16(float) CompositeExtract 419 2
+             425:   16(float) GroupFMin 25 InclusiveScan 424
+             426:   17(fvec3) CompositeConstruct 421 423 425
+             427:     81(ptr) AccessChain 20 80
+                              Store 427 426
+             428:     22(ptr) AccessChain 20 21
+             429:      6(int) Load 428
+             430:      6(int) GroupSMax 25 InclusiveScan 429
+             431:     22(ptr) AccessChain 20 21
+                              Store 431 430
+             432:     29(ptr) AccessChain 20 28
+             433:    8(ivec2) Load 432
+             434:      7(int) CompositeExtract 433 0
+             435:      7(int) GroupUMax 25 InclusiveScan 434
+             436:      7(int) CompositeExtract 433 1
+             437:      7(int) GroupUMax 25 InclusiveScan 436
+             438:    8(ivec2) CompositeConstruct 435 437
+             439:     29(ptr) AccessChain 20 28
+                              Store 439 438
+             440:     39(ptr) AccessChain 20 38
+             441:   10(fvec3) Load 440
+             442:    9(float) CompositeExtract 441 0
+             443:    9(float) GroupFMax 25 InclusiveScan 442
+             444:    9(float) CompositeExtract 441 1
+             445:    9(float) GroupFMax 25 InclusiveScan 444
+             446:    9(float) CompositeExtract 441 2
+             447:    9(float) GroupFMax 25 InclusiveScan 446
+             448:   10(fvec3) CompositeConstruct 443 445 447
+             449:     39(ptr) AccessChain 20 38
+                              Store 449 448
+             450:     51(ptr) AccessChain 20 50
+             451:   12(fvec4) Load 450
+             452:   11(float) CompositeExtract 451 0
+             453:   11(float) GroupFMax 25 InclusiveScan 452
+             454:   11(float) CompositeExtract 451 1
+             455:   11(float) GroupFMax 25 InclusiveScan 454
+             456:   11(float) CompositeExtract 451 2
+             457:   11(float) GroupFMax 25 InclusiveScan 456
+             458:   11(float) CompositeExtract 451 3
+             459:   11(float) GroupFMax 25 InclusiveScan 458
+             460:   12(fvec4) CompositeConstruct 453 455 457 459
+             461:     51(ptr) AccessChain 20 50
+                              Store 461 460
+             462:     65(ptr) AccessChain 20 64
+             463:     13(int) Load 462
+             464:     13(int) GroupSMax 25 InclusiveScan 463
+             465:     65(ptr) AccessChain 20 64
+                              Store 465 464
+             466:     71(ptr) AccessChain 20 70
+             467:   15(ivec2) Load 466
+             468:     14(int) CompositeExtract 467 0
+             469:     14(int) GroupUMax 25 InclusiveScan 468
+             470:     14(int) CompositeExtract 467 1
+             471:     14(int) GroupUMax 25 InclusiveScan 470
+             472:   15(ivec2) CompositeConstruct 469 471
+             473:     71(ptr) AccessChain 20 70
+                              Store 473 472
+             474:     81(ptr) AccessChain 20 80
+             475:   17(fvec3) Load 474
+             476:   16(float) CompositeExtract 475 0
+             477:   16(float) GroupFMax 25 InclusiveScan 476
+             478:   16(float) CompositeExtract 475 1
+             479:   16(float) GroupFMax 25 InclusiveScan 478
+             480:   16(float) CompositeExtract 475 2
+             481:   16(float) GroupFMax 25 InclusiveScan 480
+             482:   17(fvec3) CompositeConstruct 477 479 481
+             483:     81(ptr) AccessChain 20 80
+                              Store 483 482
+             484:     22(ptr) AccessChain 20 21
+             485:      6(int) Load 484
+             486:      6(int) GroupIAdd 25 InclusiveScan 485
+             487:     22(ptr) AccessChain 20 21
+                              Store 487 486
+             488:     29(ptr) AccessChain 20 28
+             489:    8(ivec2) Load 488
+             490:      7(int) CompositeExtract 489 0
+             491:      7(int) GroupIAdd 25 InclusiveScan 490
+             492:      7(int) CompositeExtract 489 1
+             493:      7(int) GroupIAdd 25 InclusiveScan 492
+             494:    8(ivec2) CompositeConstruct 491 493
+             495:     29(ptr) AccessChain 20 28
+                              Store 495 494
+             496:     39(ptr) AccessChain 20 38
+             497:   10(fvec3) Load 496
+             498:    9(float) CompositeExtract 497 0
+             499:    9(float) GroupFAdd 25 InclusiveScan 498
+             500:    9(float) CompositeExtract 497 1
+             501:    9(float) GroupFAdd 25 InclusiveScan 500
+             502:    9(float) CompositeExtract 497 2
+             503:    9(float) GroupFAdd 25 InclusiveScan 502
+             504:   10(fvec3) CompositeConstruct 499 501 503
+             505:     39(ptr) AccessChain 20 38
+                              Store 505 504
+             506:     51(ptr) AccessChain 20 50
+             507:   12(fvec4) Load 506
+             508:   11(float) CompositeExtract 507 0
+             509:   11(float) GroupFAdd 25 InclusiveScan 508
+             510:   11(float) CompositeExtract 507 1
+             511:   11(float) GroupFAdd 25 InclusiveScan 510
+             512:   11(float) CompositeExtract 507 2
+             513:   11(float) GroupFAdd 25 InclusiveScan 512
+             514:   11(float) CompositeExtract 507 3
+             515:   11(float) GroupFAdd 25 InclusiveScan 514
+             516:   12(fvec4) CompositeConstruct 509 511 513 515
+             517:     51(ptr) AccessChain 20 50
+                              Store 517 516
+             518:     65(ptr) AccessChain 20 64
+             519:     13(int) Load 518
+             520:     13(int) GroupIAdd 25 InclusiveScan 519
+             521:     65(ptr) AccessChain 20 64
+                              Store 521 520
+             522:     71(ptr) AccessChain 20 70
+             523:   15(ivec2) Load 522
+             524:     14(int) CompositeExtract 523 0
+             525:     14(int) GroupIAdd 25 InclusiveScan 524
+             526:     14(int) CompositeExtract 523 1
+             527:     14(int) GroupIAdd 25 InclusiveScan 526
+             528:   15(ivec2) CompositeConstruct 525 527
+             529:     71(ptr) AccessChain 20 70
+                              Store 529 528
+             530:     81(ptr) AccessChain 20 80
+             531:   17(fvec3) Load 530
+             532:   16(float) CompositeExtract 531 0
+             533:   16(float) GroupFAdd 25 InclusiveScan 532
+             534:   16(float) CompositeExtract 531 1
+             535:   16(float) GroupFAdd 25 InclusiveScan 534
+             536:   16(float) CompositeExtract 531 2
+             537:   16(float) GroupFAdd 25 InclusiveScan 536
+             538:   17(fvec3) CompositeConstruct 533 535 537
+             539:     81(ptr) AccessChain 20 80
+                              Store 539 538
+             540:     22(ptr) AccessChain 20 21
+             541:      6(int) Load 540
+             542:      6(int) GroupSMin 25 ExclusiveScan 541
+             543:     22(ptr) AccessChain 20 21
+                              Store 543 542
+             544:     29(ptr) AccessChain 20 28
+             545:    8(ivec2) Load 544
+             546:      7(int) CompositeExtract 545 0
+             547:      7(int) GroupUMin 25 ExclusiveScan 546
+             548:      7(int) CompositeExtract 545 1
+             549:      7(int) GroupUMin 25 ExclusiveScan 548
+             550:    8(ivec2) CompositeConstruct 547 549
+             551:     29(ptr) AccessChain 20 28
+                              Store 551 550
+             552:     39(ptr) AccessChain 20 38
+             553:   10(fvec3) Load 552
+             554:    9(float) CompositeExtract 553 0
+             555:    9(float) GroupFMin 25 ExclusiveScan 554
+             556:    9(float) CompositeExtract 553 1
+             557:    9(float) GroupFMin 25 ExclusiveScan 556
+             558:    9(float) CompositeExtract 553 2
+             559:    9(float) GroupFMin 25 ExclusiveScan 558
+             560:   10(fvec3) CompositeConstruct 555 557 559
+             561:     39(ptr) AccessChain 20 38
+                              Store 561 560
+             562:     51(ptr) AccessChain 20 50
+             563:   12(fvec4) Load 562
+             564:   11(float) CompositeExtract 563 0
+             565:   11(float) GroupFMin 25 ExclusiveScan 564
+             566:   11(float) CompositeExtract 563 1
+             567:   11(float) GroupFMin 25 ExclusiveScan 566
+             568:   11(float) CompositeExtract 563 2
+             569:   11(float) GroupFMin 25 ExclusiveScan 568
+             570:   11(float) CompositeExtract 563 3
+             571:   11(float) GroupFMin 25 ExclusiveScan 570
+             572:   12(fvec4) CompositeConstruct 565 567 569 571
+             573:     51(ptr) AccessChain 20 50
+                              Store 573 572
+             574:     65(ptr) AccessChain 20 64
+             575:     13(int) Load 574
+             576:     13(int) GroupSMin 25 ExclusiveScan 575
+             577:     65(ptr) AccessChain 20 64
+                              Store 577 576
+             578:     71(ptr) AccessChain 20 70
+             579:   15(ivec2) Load 578
+             580:     14(int) CompositeExtract 579 0
+             581:     14(int) GroupUMin 25 ExclusiveScan 580
+             582:     14(int) CompositeExtract 579 1
+             583:     14(int) GroupUMin 25 ExclusiveScan 582
+             584:   15(ivec2) CompositeConstruct 581 583
+             585:     71(ptr) AccessChain 20 70
+                              Store 585 584
+             586:     81(ptr) AccessChain 20 80
+             587:   17(fvec3) Load 586
+             588:   16(float) CompositeExtract 587 0
+             589:   16(float) GroupFMin 25 ExclusiveScan 588
+             590:   16(float) CompositeExtract 587 1
+             591:   16(float) GroupFMin 25 ExclusiveScan 590
+             592:   16(float) CompositeExtract 587 2
+             593:   16(float) GroupFMin 25 ExclusiveScan 592
+             594:   17(fvec3) CompositeConstruct 589 591 593
+             595:     81(ptr) AccessChain 20 80
+                              Store 595 594
+             596:     22(ptr) AccessChain 20 21
+             597:      6(int) Load 596
+             598:      6(int) GroupSMax 25 ExclusiveScan 597
+             599:     22(ptr) AccessChain 20 21
+                              Store 599 598
+             600:     29(ptr) AccessChain 20 28
+             601:    8(ivec2) Load 600
+             602:      7(int) CompositeExtract 601 0
+             603:      7(int) GroupUMax 25 ExclusiveScan 602
+             604:      7(int) CompositeExtract 601 1
+             605:      7(int) GroupUMax 25 ExclusiveScan 604
+             606:    8(ivec2) CompositeConstruct 603 605
+             607:     29(ptr) AccessChain 20 28
+                              Store 607 606
+             608:     39(ptr) AccessChain 20 38
+             609:   10(fvec3) Load 608
+             610:    9(float) CompositeExtract 609 0
+             611:    9(float) GroupFMax 25 ExclusiveScan 610
+             612:    9(float) CompositeExtract 609 1
+             613:    9(float) GroupFMax 25 ExclusiveScan 612
+             614:    9(float) CompositeExtract 609 2
+             615:    9(float) GroupFMax 25 ExclusiveScan 614
+             616:   10(fvec3) CompositeConstruct 611 613 615
+             617:     39(ptr) AccessChain 20 38
+                              Store 617 616
+             618:     51(ptr) AccessChain 20 50
+             619:   12(fvec4) Load 618
+             620:   11(float) CompositeExtract 619 0
+             621:   11(float) GroupFMax 25 ExclusiveScan 620
+             622:   11(float) CompositeExtract 619 1
+             623:   11(float) GroupFMax 25 ExclusiveScan 622
+             624:   11(float) CompositeExtract 619 2
+             625:   11(float) GroupFMax 25 ExclusiveScan 624
+             626:   11(float) CompositeExtract 619 3
+             627:   11(float) GroupFMax 25 ExclusiveScan 626
+             628:   12(fvec4) CompositeConstruct 621 623 625 627
+             629:     51(ptr) AccessChain 20 50
+                              Store 629 628
+             630:     65(ptr) AccessChain 20 64
+             631:     13(int) Load 630
+             632:     13(int) GroupSMax 25 ExclusiveScan 631
+             633:     65(ptr) AccessChain 20 64
+                              Store 633 632
+             634:     71(ptr) AccessChain 20 70
+             635:   15(ivec2) Load 634
+             636:     14(int) CompositeExtract 635 0
+             637:     14(int) GroupUMax 25 ExclusiveScan 636
+             638:     14(int) CompositeExtract 635 1
+             639:     14(int) GroupUMax 25 ExclusiveScan 638
+             640:   15(ivec2) CompositeConstruct 637 639
+             641:     71(ptr) AccessChain 20 70
+                              Store 641 640
+             642:     81(ptr) AccessChain 20 80
+             643:   17(fvec3) Load 642
+             644:   16(float) CompositeExtract 643 0
+             645:   16(float) GroupFMax 25 ExclusiveScan 644
+             646:   16(float) CompositeExtract 643 1
+             647:   16(float) GroupFMax 25 ExclusiveScan 646
+             648:   16(float) CompositeExtract 643 2
+             649:   16(float) GroupFMax 25 ExclusiveScan 648
+             650:   17(fvec3) CompositeConstruct 645 647 649
+             651:     81(ptr) AccessChain 20 80
+                              Store 651 650
+             652:     22(ptr) AccessChain 20 21
+             653:      6(int) Load 652
+             654:      6(int) GroupIAdd 25 ExclusiveScan 653
+             655:     22(ptr) AccessChain 20 21
+                              Store 655 654
+             656:     29(ptr) AccessChain 20 28
+             657:    8(ivec2) Load 656
+             658:      7(int) CompositeExtract 657 0
+             659:      7(int) GroupIAdd 25 ExclusiveScan 658
+             660:      7(int) CompositeExtract 657 1
+             661:      7(int) GroupIAdd 25 ExclusiveScan 660
+             662:    8(ivec2) CompositeConstruct 659 661
+             663:     29(ptr) AccessChain 20 28
+                              Store 663 662
+             664:     39(ptr) AccessChain 20 38
+             665:   10(fvec3) Load 664
+             666:    9(float) CompositeExtract 665 0
+             667:    9(float) GroupFAdd 25 ExclusiveScan 666
+             668:    9(float) CompositeExtract 665 1
+             669:    9(float) GroupFAdd 25 ExclusiveScan 668
+             670:    9(float) CompositeExtract 665 2
+             671:    9(float) GroupFAdd 25 ExclusiveScan 670
+             672:   10(fvec3) CompositeConstruct 667 669 671
+             673:     39(ptr) AccessChain 20 38
+                              Store 673 672
+             674:     51(ptr) AccessChain 20 50
+             675:   12(fvec4) Load 674
+             676:   11(float) CompositeExtract 675 0
+             677:   11(float) GroupFAdd 25 ExclusiveScan 676
+             678:   11(float) CompositeExtract 675 1
+             679:   11(float) GroupFAdd 25 ExclusiveScan 678
+             680:   11(float) CompositeExtract 675 2
+             681:   11(float) GroupFAdd 25 ExclusiveScan 680
+             682:   11(float) CompositeExtract 675 3
+             683:   11(float) GroupFAdd 25 ExclusiveScan 682
+             684:   12(fvec4) CompositeConstruct 677 679 681 683
+             685:     51(ptr) AccessChain 20 50
+                              Store 685 684
+             686:     65(ptr) AccessChain 20 64
+             687:     13(int) Load 686
+             688:     13(int) GroupIAdd 25 ExclusiveScan 687
+             689:     65(ptr) AccessChain 20 64
+                              Store 689 688
+             690:     71(ptr) AccessChain 20 70
+             691:   15(ivec2) Load 690
+             692:     14(int) CompositeExtract 691 0
+             693:     14(int) GroupIAdd 25 ExclusiveScan 692
+             694:     14(int) CompositeExtract 691 1
+             695:     14(int) GroupIAdd 25 ExclusiveScan 694
+             696:   15(ivec2) CompositeConstruct 693 695
+             697:     71(ptr) AccessChain 20 70
+                              Store 697 696
+             698:     81(ptr) AccessChain 20 80
+             699:   17(fvec3) Load 698
+             700:   16(float) CompositeExtract 699 0
+             701:   16(float) GroupFAdd 25 ExclusiveScan 700
+             702:   16(float) CompositeExtract 699 1
+             703:   16(float) GroupFAdd 25 ExclusiveScan 702
+             704:   16(float) CompositeExtract 699 2
+             705:   16(float) GroupFAdd 25 ExclusiveScan 704
+             706:   17(fvec3) CompositeConstruct 701 703 705
+             707:     81(ptr) AccessChain 20 80
+                              Store 707 706
+             708:     22(ptr) AccessChain 20 21
+             709:      6(int) Load 708
+             710:      6(int) GroupSMinNonUniformAMD 25 InclusiveScan 709
+             711:     22(ptr) AccessChain 20 21
+                              Store 711 710
+             712:     29(ptr) AccessChain 20 28
+             713:    8(ivec2) Load 712
+             714:      7(int) CompositeExtract 713 0
+             715:      7(int) GroupUMinNonUniformAMD 25 InclusiveScan 714
+             716:      7(int) CompositeExtract 713 1
+             717:      7(int) GroupUMinNonUniformAMD 25 InclusiveScan 716
+             718:    8(ivec2) CompositeConstruct 715 717
+             719:     29(ptr) AccessChain 20 28
+                              Store 719 718
+             720:     39(ptr) AccessChain 20 38
+             721:   10(fvec3) Load 720
+             722:    9(float) CompositeExtract 721 0
+             723:    9(float) GroupFMinNonUniformAMD 25 InclusiveScan 722
+             724:    9(float) CompositeExtract 721 1
+             725:    9(float) GroupFMinNonUniformAMD 25 InclusiveScan 724
+             726:    9(float) CompositeExtract 721 2
+             727:    9(float) GroupFMinNonUniformAMD 25 InclusiveScan 726
+             728:   10(fvec3) CompositeConstruct 723 725 727
+             729:     39(ptr) AccessChain 20 38
+                              Store 729 728
+             730:     51(ptr) AccessChain 20 50
+             731:   12(fvec4) Load 730
+             732:   11(float) CompositeExtract 731 0
+             733:   11(float) GroupFMinNonUniformAMD 25 InclusiveScan 732
+             734:   11(float) CompositeExtract 731 1
+             735:   11(float) GroupFMinNonUniformAMD 25 InclusiveScan 734
+             736:   11(float) CompositeExtract 731 2
+             737:   11(float) GroupFMinNonUniformAMD 25 InclusiveScan 736
+             738:   11(float) CompositeExtract 731 3
+             739:   11(float) GroupFMinNonUniformAMD 25 InclusiveScan 738
+             740:   12(fvec4) CompositeConstruct 733 735 737 739
+             741:     51(ptr) AccessChain 20 50
+                              Store 741 740
+             742:     65(ptr) AccessChain 20 64
+             743:     13(int) Load 742
+             744:     13(int) GroupSMinNonUniformAMD 25 InclusiveScan 743
+             745:     65(ptr) AccessChain 20 64
+                              Store 745 744
+             746:     71(ptr) AccessChain 20 70
+             747:   15(ivec2) Load 746
+             748:     14(int) CompositeExtract 747 0
+             749:     14(int) GroupUMinNonUniformAMD 25 InclusiveScan 748
+             750:     14(int) CompositeExtract 747 1
+             751:     14(int) GroupUMinNonUniformAMD 25 InclusiveScan 750
+             752:   15(ivec2) CompositeConstruct 749 751
+             753:     71(ptr) AccessChain 20 70
+                              Store 753 752
+             754:     81(ptr) AccessChain 20 80
+             755:   17(fvec3) Load 754
+             756:   16(float) CompositeExtract 755 0
+             757:   16(float) GroupFMinNonUniformAMD 25 InclusiveScan 756
+             758:   16(float) CompositeExtract 755 1
+             759:   16(float) GroupFMinNonUniformAMD 25 InclusiveScan 758
+             760:   16(float) CompositeExtract 755 2
+             761:   16(float) GroupFMinNonUniformAMD 25 InclusiveScan 760
+             762:   17(fvec3) CompositeConstruct 757 759 761
+             763:     81(ptr) AccessChain 20 80
+                              Store 763 762
+             764:     22(ptr) AccessChain 20 21
+             765:      6(int) Load 764
+             766:      6(int) GroupSMaxNonUniformAMD 25 InclusiveScan 765
+             767:     22(ptr) AccessChain 20 21
+                              Store 767 766
+             768:     29(ptr) AccessChain 20 28
+             769:    8(ivec2) Load 768
+             770:      7(int) CompositeExtract 769 0
+             771:      7(int) GroupUMaxNonUniformAMD 25 InclusiveScan 770
+             772:      7(int) CompositeExtract 769 1
+             773:      7(int) GroupUMaxNonUniformAMD 25 InclusiveScan 772
+             774:    8(ivec2) CompositeConstruct 771 773
+             775:     29(ptr) AccessChain 20 28
+                              Store 775 774
+             776:     39(ptr) AccessChain 20 38
+             777:   10(fvec3) Load 776
+             778:    9(float) CompositeExtract 777 0
+             779:    9(float) GroupFMaxNonUniformAMD 25 InclusiveScan 778
+             780:    9(float) CompositeExtract 777 1
+             781:    9(float) GroupFMaxNonUniformAMD 25 InclusiveScan 780
+             782:    9(float) CompositeExtract 777 2
+             783:    9(float) GroupFMaxNonUniformAMD 25 InclusiveScan 782
+             784:   10(fvec3) CompositeConstruct 779 781 783
+             785:     39(ptr) AccessChain 20 38
+                              Store 785 784
+             786:     51(ptr) AccessChain 20 50
+             787:   12(fvec4) Load 786
+             788:   11(float) CompositeExtract 787 0
+             789:   11(float) GroupFMaxNonUniformAMD 25 InclusiveScan 788
+             790:   11(float) CompositeExtract 787 1
+             791:   11(float) GroupFMaxNonUniformAMD 25 InclusiveScan 790
+             792:   11(float) CompositeExtract 787 2
+             793:   11(float) GroupFMaxNonUniformAMD 25 InclusiveScan 792
+             794:   11(float) CompositeExtract 787 3
+             795:   11(float) GroupFMaxNonUniformAMD 25 InclusiveScan 794
+             796:   12(fvec4) CompositeConstruct 789 791 793 795
+             797:     51(ptr) AccessChain 20 50
+                              Store 797 796
+             798:     65(ptr) AccessChain 20 64
+             799:     13(int) Load 798
+             800:     13(int) GroupSMaxNonUniformAMD 25 InclusiveScan 799
+             801:     65(ptr) AccessChain 20 64
+                              Store 801 800
+             802:     71(ptr) AccessChain 20 70
+             803:   15(ivec2) Load 802
+             804:     14(int) CompositeExtract 803 0
+             805:     14(int) GroupUMaxNonUniformAMD 25 InclusiveScan 804
+             806:     14(int) CompositeExtract 803 1
+             807:     14(int) GroupUMaxNonUniformAMD 25 InclusiveScan 806
+             808:   15(ivec2) CompositeConstruct 805 807
+             809:     71(ptr) AccessChain 20 70
+                              Store 809 808
+             810:     81(ptr) AccessChain 20 80
+             811:   17(fvec3) Load 810
+             812:   16(float) CompositeExtract 811 0
+             813:   16(float) GroupFMaxNonUniformAMD 25 InclusiveScan 812
+             814:   16(float) CompositeExtract 811 1
+             815:   16(float) GroupFMaxNonUniformAMD 25 InclusiveScan 814
+             816:   16(float) CompositeExtract 811 2
+             817:   16(float) GroupFMaxNonUniformAMD 25 InclusiveScan 816
+             818:   17(fvec3) CompositeConstruct 813 815 817
+             819:     81(ptr) AccessChain 20 80
+                              Store 819 818
+             820:     22(ptr) AccessChain 20 21
+             821:      6(int) Load 820
+             822:      6(int) GroupIAddNonUniformAMD 25 InclusiveScan 821
+             823:     22(ptr) AccessChain 20 21
+                              Store 823 822
+             824:     29(ptr) AccessChain 20 28
+             825:    8(ivec2) Load 824
+             826:      7(int) CompositeExtract 825 0
+             827:      7(int) GroupIAddNonUniformAMD 25 InclusiveScan 826
+             828:      7(int) CompositeExtract 825 1
+             829:      7(int) GroupIAddNonUniformAMD 25 InclusiveScan 828
+             830:    8(ivec2) CompositeConstruct 827 829
+             831:     29(ptr) AccessChain 20 28
+                              Store 831 830
+             832:     39(ptr) AccessChain 20 38
+             833:   10(fvec3) Load 832
+             834:    9(float) CompositeExtract 833 0
+             835:    9(float) GroupFAddNonUniformAMD 25 InclusiveScan 834
+             836:    9(float) CompositeExtract 833 1
+             837:    9(float) GroupFAddNonUniformAMD 25 InclusiveScan 836
+             838:    9(float) CompositeExtract 833 2
+             839:    9(float) GroupFAddNonUniformAMD 25 InclusiveScan 838
+             840:   10(fvec3) CompositeConstruct 835 837 839
+             841:     39(ptr) AccessChain 20 38
+                              Store 841 840
+             842:     51(ptr) AccessChain 20 50
+             843:   12(fvec4) Load 842
+             844:   11(float) CompositeExtract 843 0
+             845:   11(float) GroupFAddNonUniformAMD 25 InclusiveScan 844
+             846:   11(float) CompositeExtract 843 1
+             847:   11(float) GroupFAddNonUniformAMD 25 InclusiveScan 846
+             848:   11(float) CompositeExtract 843 2
+             849:   11(float) GroupFAddNonUniformAMD 25 InclusiveScan 848
+             850:   11(float) CompositeExtract 843 3
+             851:   11(float) GroupFAddNonUniformAMD 25 InclusiveScan 850
+             852:   12(fvec4) CompositeConstruct 845 847 849 851
+             853:     51(ptr) AccessChain 20 50
+                              Store 853 852
+             854:     65(ptr) AccessChain 20 64
+             855:     13(int) Load 854
+             856:     13(int) GroupIAddNonUniformAMD 25 InclusiveScan 855
+             857:     65(ptr) AccessChain 20 64
+                              Store 857 856
+             858:     71(ptr) AccessChain 20 70
+             859:   15(ivec2) Load 858
+             860:     14(int) CompositeExtract 859 0
+             861:     14(int) GroupIAddNonUniformAMD 25 InclusiveScan 860
+             862:     14(int) CompositeExtract 859 1
+             863:     14(int) GroupIAddNonUniformAMD 25 InclusiveScan 862
+             864:   15(ivec2) CompositeConstruct 861 863
+             865:     71(ptr) AccessChain 20 70
+                              Store 865 864
+             866:     81(ptr) AccessChain 20 80
+             867:   17(fvec3) Load 866
+             868:   16(float) CompositeExtract 867 0
+             869:   16(float) GroupFAddNonUniformAMD 25 InclusiveScan 868
+             870:   16(float) CompositeExtract 867 1
+             871:   16(float) GroupFAddNonUniformAMD 25 InclusiveScan 870
+             872:   16(float) CompositeExtract 867 2
+             873:   16(float) GroupFAddNonUniformAMD 25 InclusiveScan 872
+             874:   17(fvec3) CompositeConstruct 869 871 873
+             875:     81(ptr) AccessChain 20 80
+                              Store 875 874
+             876:     22(ptr) AccessChain 20 21
+             877:      6(int) Load 876
+             878:      6(int) GroupSMinNonUniformAMD 25 ExclusiveScan 877
+             879:     22(ptr) AccessChain 20 21
+                              Store 879 878
+             880:     29(ptr) AccessChain 20 28
+             881:    8(ivec2) Load 880
+             882:      7(int) CompositeExtract 881 0
+             883:      7(int) GroupUMinNonUniformAMD 25 ExclusiveScan 882
+             884:      7(int) CompositeExtract 881 1
+             885:      7(int) GroupUMinNonUniformAMD 25 ExclusiveScan 884
+             886:    8(ivec2) CompositeConstruct 883 885
+             887:     29(ptr) AccessChain 20 28
+                              Store 887 886
+             888:     39(ptr) AccessChain 20 38
+             889:   10(fvec3) Load 888
+             890:    9(float) CompositeExtract 889 0
+             891:    9(float) GroupFMinNonUniformAMD 25 ExclusiveScan 890
+             892:    9(float) CompositeExtract 889 1
+             893:    9(float) GroupFMinNonUniformAMD 25 ExclusiveScan 892
+             894:    9(float) CompositeExtract 889 2
+             895:    9(float) GroupFMinNonUniformAMD 25 ExclusiveScan 894
+             896:   10(fvec3) CompositeConstruct 891 893 895
+             897:     39(ptr) AccessChain 20 38
+                              Store 897 896
+             898:     51(ptr) AccessChain 20 50
+             899:   12(fvec4) Load 898
+             900:   11(float) CompositeExtract 899 0
+             901:   11(float) GroupFMinNonUniformAMD 25 ExclusiveScan 900
+             902:   11(float) CompositeExtract 899 1
+             903:   11(float) GroupFMinNonUniformAMD 25 ExclusiveScan 902
+             904:   11(float) CompositeExtract 899 2
+             905:   11(float) GroupFMinNonUniformAMD 25 ExclusiveScan 904
+             906:   11(float) CompositeExtract 899 3
+             907:   11(float) GroupFMinNonUniformAMD 25 ExclusiveScan 906
+             908:   12(fvec4) CompositeConstruct 901 903 905 907
+             909:     51(ptr) AccessChain 20 50
+                              Store 909 908
+             910:     65(ptr) AccessChain 20 64
+             911:     13(int) Load 910
+             912:     13(int) GroupSMinNonUniformAMD 25 ExclusiveScan 911
+             913:     65(ptr) AccessChain 20 64
+                              Store 913 912
+             914:     71(ptr) AccessChain 20 70
+             915:   15(ivec2) Load 914
+             916:     14(int) CompositeExtract 915 0
+             917:     14(int) GroupUMinNonUniformAMD 25 ExclusiveScan 916
+             918:     14(int) CompositeExtract 915 1
+             919:     14(int) GroupUMinNonUniformAMD 25 ExclusiveScan 918
+             920:   15(ivec2) CompositeConstruct 917 919
+             921:     71(ptr) AccessChain 20 70
+                              Store 921 920
+             922:     81(ptr) AccessChain 20 80
+             923:   17(fvec3) Load 922
+             924:   16(float) CompositeExtract 923 0
+             925:   16(float) GroupFMinNonUniformAMD 25 ExclusiveScan 924
+             926:   16(float) CompositeExtract 923 1
+             927:   16(float) GroupFMinNonUniformAMD 25 ExclusiveScan 926
+             928:   16(float) CompositeExtract 923 2
+             929:   16(float) GroupFMinNonUniformAMD 25 ExclusiveScan 928
+             930:   17(fvec3) CompositeConstruct 925 927 929
+             931:     81(ptr) AccessChain 20 80
+                              Store 931 930
+             932:     22(ptr) AccessChain 20 21
+             933:      6(int) Load 932
+             934:      6(int) GroupSMaxNonUniformAMD 25 ExclusiveScan 933
+             935:     22(ptr) AccessChain 20 21
+                              Store 935 934
+             936:     29(ptr) AccessChain 20 28
+             937:    8(ivec2) Load 936
+             938:      7(int) CompositeExtract 937 0
+             939:      7(int) GroupUMaxNonUniformAMD 25 ExclusiveScan 938
+             940:      7(int) CompositeExtract 937 1
+             941:      7(int) GroupUMaxNonUniformAMD 25 ExclusiveScan 940
+             942:    8(ivec2) CompositeConstruct 939 941
+             943:     29(ptr) AccessChain 20 28
+                              Store 943 942
+             944:     39(ptr) AccessChain 20 38
+             945:   10(fvec3) Load 944
+             946:    9(float) CompositeExtract 945 0
+             947:    9(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 946
+             948:    9(float) CompositeExtract 945 1
+             949:    9(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 948
+             950:    9(float) CompositeExtract 945 2
+             951:    9(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 950
+             952:   10(fvec3) CompositeConstruct 947 949 951
+             953:     39(ptr) AccessChain 20 38
+                              Store 953 952
+             954:     51(ptr) AccessChain 20 50
+             955:   12(fvec4) Load 954
+             956:   11(float) CompositeExtract 955 0
+             957:   11(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 956
+             958:   11(float) CompositeExtract 955 1
+             959:   11(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 958
+             960:   11(float) CompositeExtract 955 2
+             961:   11(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 960
+             962:   11(float) CompositeExtract 955 3
+             963:   11(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 962
+             964:   12(fvec4) CompositeConstruct 957 959 961 963
+             965:     51(ptr) AccessChain 20 50
+                              Store 965 964
+             966:     65(ptr) AccessChain 20 64
+             967:     13(int) Load 966
+             968:     13(int) GroupSMaxNonUniformAMD 25 ExclusiveScan 967
+             969:     65(ptr) AccessChain 20 64
+                              Store 969 968
+             970:     71(ptr) AccessChain 20 70
+             971:   15(ivec2) Load 970
+             972:     14(int) CompositeExtract 971 0
+             973:     14(int) GroupUMaxNonUniformAMD 25 ExclusiveScan 972
+             974:     14(int) CompositeExtract 971 1
+             975:     14(int) GroupUMaxNonUniformAMD 25 ExclusiveScan 974
+             976:   15(ivec2) CompositeConstruct 973 975
+             977:     71(ptr) AccessChain 20 70
+                              Store 977 976
+             978:     81(ptr) AccessChain 20 80
+             979:   17(fvec3) Load 978
+             980:   16(float) CompositeExtract 979 0
+             981:   16(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 980
+             982:   16(float) CompositeExtract 979 1
+             983:   16(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 982
+             984:   16(float) CompositeExtract 979 2
+             985:   16(float) GroupFMaxNonUniformAMD 25 ExclusiveScan 984
+             986:   17(fvec3) CompositeConstruct 981 983 985
+             987:     81(ptr) AccessChain 20 80
+                              Store 987 986
+             988:     22(ptr) AccessChain 20 21
+             989:      6(int) Load 988
+             990:      6(int) GroupIAddNonUniformAMD 25 ExclusiveScan 989
+             991:     22(ptr) AccessChain 20 21
+                              Store 991 990
+             992:     29(ptr) AccessChain 20 28
+             993:    8(ivec2) Load 992
+             994:      7(int) CompositeExtract 993 0
+             995:      7(int) GroupIAddNonUniformAMD 25 ExclusiveScan 994
+             996:      7(int) CompositeExtract 993 1
+             997:      7(int) GroupIAddNonUniformAMD 25 ExclusiveScan 996
+             998:    8(ivec2) CompositeConstruct 995 997
+             999:     29(ptr) AccessChain 20 28
+                              Store 999 998
+            1000:     39(ptr) AccessChain 20 38
+            1001:   10(fvec3) Load 1000
+            1002:    9(float) CompositeExtract 1001 0
+            1003:    9(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1002
+            1004:    9(float) CompositeExtract 1001 1
+            1005:    9(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1004
+            1006:    9(float) CompositeExtract 1001 2
+            1007:    9(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1006
+            1008:   10(fvec3) CompositeConstruct 1003 1005 1007
+            1009:     39(ptr) AccessChain 20 38
+                              Store 1009 1008
+            1010:     51(ptr) AccessChain 20 50
+            1011:   12(fvec4) Load 1010
+            1012:   11(float) CompositeExtract 1011 0
+            1013:   11(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1012
+            1014:   11(float) CompositeExtract 1011 1
+            1015:   11(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1014
+            1016:   11(float) CompositeExtract 1011 2
+            1017:   11(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1016
+            1018:   11(float) CompositeExtract 1011 3
+            1019:   11(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1018
+            1020:   12(fvec4) CompositeConstruct 1013 1015 1017 1019
+            1021:     51(ptr) AccessChain 20 50
+                              Store 1021 1020
+            1022:     65(ptr) AccessChain 20 64
+            1023:     13(int) Load 1022
+            1024:     13(int) GroupIAddNonUniformAMD 25 ExclusiveScan 1023
+            1025:     65(ptr) AccessChain 20 64
+                              Store 1025 1024
+            1026:     71(ptr) AccessChain 20 70
+            1027:   15(ivec2) Load 1026
+            1028:     14(int) CompositeExtract 1027 0
+            1029:     14(int) GroupIAddNonUniformAMD 25 ExclusiveScan 1028
+            1030:     14(int) CompositeExtract 1027 1
+            1031:     14(int) GroupIAddNonUniformAMD 25 ExclusiveScan 1030
+            1032:   15(ivec2) CompositeConstruct 1029 1031
+            1033:     71(ptr) AccessChain 20 70
+                              Store 1033 1032
+            1034:     81(ptr) AccessChain 20 80
+            1035:   17(fvec3) Load 1034
+            1036:   16(float) CompositeExtract 1035 0
+            1037:   16(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1036
+            1038:   16(float) CompositeExtract 1035 1
+            1039:   16(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1038
+            1040:   16(float) CompositeExtract 1035 2
+            1041:   16(float) GroupFAddNonUniformAMD 25 ExclusiveScan 1040
+            1042:   17(fvec3) CompositeConstruct 1037 1039 1041
+            1043:     81(ptr) AccessChain 20 80
+                              Store 1043 1042
+                              Return
+                              FunctionEnd
diff --git a/3rdparty/glslang/Test/baseResults/spv.shaderGroupVote.comp.out b/3rdparty/glslang/Test/baseResults/spv.shaderGroupVote.comp.out
index f8bfae804..464787f6d 100644
--- a/3rdparty/glslang/Test/baseResults/spv.shaderGroupVote.comp.out
+++ b/3rdparty/glslang/Test/baseResults/spv.shaderGroupVote.comp.out
@@ -3,10 +3,11 @@ Warning, version 450 is not yet complete; most version-specific features are pre
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 37
+// Id's are bound by 33
 
                               Capability Shader
-                              Capability Groups
+                              Capability SubgroupVoteKHR
+                              Extension  "SPV_KHR_subgroup_vote"
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
                               EntryPoint GLCompute 4  "main"
@@ -22,7 +23,7 @@ Warning, version 450 is not yet complete; most version-specific features are pre
                               Decorate 10(Buffers) BufferBlock
                               Decorate 12 DescriptorSet 0
                               Decorate 12 Binding 0
-                              Decorate 36 BuiltIn WorkgroupSize
+                              Decorate 32 BuiltIn WorkgroupSize
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeBool
@@ -35,11 +36,10 @@ Warning, version 450 is not yet complete; most version-specific features are pre
               14:     13(int) Constant 0
               15:             TypePointer Uniform 9(int)
               18:      9(int) Constant 0
-              21:      9(int) Constant 3
-              31:      9(int) Constant 1
-              34:             TypeVector 9(int) 3
-              35:      9(int) Constant 4
-              36:   34(ivec3) ConstantComposite 35 35 31
+              27:      9(int) Constant 1
+              30:             TypeVector 9(int) 3
+              31:      9(int) Constant 4
+              32:   30(ivec3) ConstantComposite 31 31 27
          4(main):           2 Function None 3
                5:             Label
            8(b1):      7(ptr) Variable Function
@@ -48,20 +48,17 @@ Warning, version 450 is not yet complete; most version-specific features are pre
               19:     6(bool) INotEqual 17 18
                               Store 8(b1) 19
               20:     6(bool) Load 8(b1)
-              22:     6(bool) GroupAny 21 20
-                              Store 8(b1) 22
-              23:     6(bool) Load 8(b1)
-              24:     6(bool) GroupAll 21 23
-                              Store 8(b1) 24
-              25:     6(bool) Load 8(b1)
-              26:     6(bool) GroupAll 21 25
-              27:     6(bool) GroupAny 21 25
-              28:     6(bool) LogicalNot 27
-              29:     6(bool) LogicalOr 26 28
-                              Store 8(b1) 29
-              30:     6(bool) Load 8(b1)
-              32:      9(int) Select 30 31 18
-              33:     15(ptr) AccessChain 12 14
-                              Store 33 32
+              21:     6(bool) SubgroupAllKHR 20
+                              Store 8(b1) 21
+              22:     6(bool) Load 8(b1)
+              23:     6(bool) SubgroupAnyKHR 22
+                              Store 8(b1) 23
+              24:     6(bool) Load 8(b1)
+              25:     6(bool) SubgroupAllEqualKHR 24
+                              Store 8(b1) 25
+              26:     6(bool) Load 8(b1)
+              28:      9(int) Select 26 27 18
+              29:     15(ptr) AccessChain 12 14
+                              Store 29 28
                               Return
                               FunctionEnd
diff --git a/3rdparty/glslang/Test/baseResults/tokenLength.vert.out b/3rdparty/glslang/Test/baseResults/tokenLength.vert.out
index 11bdd4b59..f12874b67 100644
--- a/3rdparty/glslang/Test/baseResults/tokenLength.vert.out
+++ b/3rdparty/glslang/Test/baseResults/tokenLength.vert.out
@@ -11,8 +11,6 @@ ERROR: 0:34: '' : octal literal too big
 ERROR: 0:35: '' : numeric literal too long 
 ERROR: 0:35: '' : numeric literal too big 
 ERROR: 0:36: '' : float literal too long 
-ERROR: 0:36: '' : float literal too long 
-ERROR: 0:36: '' : float literal too long 
 WARNING: 0:39: '#extension' : extension not supported: a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhooooooooooooooooooooooooooooooohhhhhhhhhhhhhhhhh01234
 ERROR: 0:40: '' : name too long 
 WARNING: 0:40: '#extension' : extension not supported: a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhooooooooooooooooooooooooooooooohhhhhhhhhhhhhhhhh01234
@@ -29,7 +27,7 @@ ERROR: 0:62: 'preprocessor evaluation' : undefined macro in expression not allow
 ERROR: 0:67: '' : numeric literal too long 
 ERROR: 0:70: '' : name too long 
 ERROR: 0:70: 'preprocessor evaluation' : undefined macro in expression not allowed in es profile A000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
-ERROR: 28 compilation errors.  No code generated.
+ERROR: 26 compilation errors.  No code generated.
 
 
 Shader version: 300
@@ -73,7 +71,7 @@ ERROR: node is still EOpNull!
 0:23    move second child to first child (temp highp float)
 0:23      'E3' (global highp float)
 0:23      Constant:
-0:23        12.000000
+0:23        1.012346
 0:25  Function Definition: main( (global void)
 0:25    Function Parameters: 
 0:27    Sequence
@@ -104,7 +102,7 @@ ERROR: node is still EOpNull!
 0:36    move second child to first child (temp highp float)
 0:36      'superF' (global highp float)
 0:36      Constant:
-0:36        inf
+0:36        1.012346
 0:?   Linker Objects
 0:?     'BCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789' (in highp float)
 0:?     'ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789' (in highp float)
@@ -169,7 +167,7 @@ ERROR: node is still EOpNull!
 0:23    move second child to first child (temp highp float)
 0:23      'E3' (global highp float)
 0:23      Constant:
-0:23        12.000000
+0:23        1.012346
 0:25  Function Definition: main( (global void)
 0:25    Function Parameters: 
 0:27    Sequence
@@ -200,7 +198,7 @@ ERROR: node is still EOpNull!
 0:36    move second child to first child (temp highp float)
 0:36      'superF' (global highp float)
 0:36      Constant:
-0:36        inf
+0:36        1.012346
 0:?   Linker Objects
 0:?     'BCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789' (in highp float)
 0:?     'ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789' (in highp float)
diff --git a/3rdparty/glslang/Test/hlsl.matrixSwizzle.vert b/3rdparty/glslang/Test/hlsl.matrixSwizzle.vert
new file mode 100644
index 000000000..c06996b31
--- /dev/null
+++ b/3rdparty/glslang/Test/hlsl.matrixSwizzle.vert
@@ -0,0 +1,33 @@
+void ShaderFunction(float inf) : COLOR0
+{
+    float3x4 m;
+
+    // tests that convert to non-matrix swizzles
+
+    m._34  = 1.0; // AST should have a normal component select
+    m._m23 = 2.0; // same code
+    m[2][3] = 2.0; // same code
+
+    m._11_12_13_14 = float4(3.0);      // AST should have normal column selection (first row)
+    m._m10_m11_m12_m13 = float4(3.0);  // AST should have normal column selection (second row)
+    m[1] = float4(3.0);                // same code
+
+    // tests that stay as matrix swizzles
+
+    float3 f3;
+    m._11_22_23 = f3;
+    m._21_12_31 = float3(5.0);
+    m._11_12_21 = 2 * f3;
+
+    // r-value
+    f3 = m._21_12_31;
+}
+
+float3x3 createMat3x3(float3 a, float3 b, float3 c)
+{
+    float3x3 m;
+    m._11_21_31 = a;
+    m._12_22_32 = b;
+    m._13_23_33 = c;
+    return m;
+}
diff --git a/3rdparty/glslang/Test/hlsl.struct.split.assign.frag b/3rdparty/glslang/Test/hlsl.struct.split.assign.frag
new file mode 100644
index 000000000..e7fe02887
--- /dev/null
+++ b/3rdparty/glslang/Test/hlsl.struct.split.assign.frag
@@ -0,0 +1,12 @@
+struct S {
+    float f;
+    float4 pos : SV_Position;
+};
+
+float4 main(int i, S input[3]) : COLOR0
+{
+    S a[3];
+    input = a;
+
+    return float3(1.0);
+}
diff --git a/3rdparty/glslang/Test/spv.shaderBallotAMD.comp b/3rdparty/glslang/Test/spv.shaderBallotAMD.comp
new file mode 100644
index 000000000..d6d370aea
--- /dev/null
+++ b/3rdparty/glslang/Test/spv.shaderBallotAMD.comp
@@ -0,0 +1,165 @@
+#version 450
+
+#extension GL_ARB_gpu_shader_int64: enable
+#extension GL_AMD_gpu_shader_half_float: enable
+#extension GL_AMD_shader_ballot: enable
+
+layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(binding = 0) buffer Buffers
+{
+    int     i;
+    uvec2   uv;
+    vec3    fv;
+    dvec4   dv;
+    int64_t i64;
+    u64vec2 u64v;
+    f16vec3 f16v;
+};
+
+void main()
+{
+	i    = minInvocationsAMD(i);
+    uv   = minInvocationsAMD(uv);
+    fv   = minInvocationsAMD(fv);
+    dv   = minInvocationsAMD(dv);
+    i64  = minInvocationsAMD(i64);
+    u64v = minInvocationsAMD(u64v);
+    f16v = minInvocationsAMD(f16v);
+
+    i    = maxInvocationsAMD(i);
+    uv   = maxInvocationsAMD(uv);
+    fv   = maxInvocationsAMD(fv);
+    dv   = maxInvocationsAMD(dv);
+    i64  = maxInvocationsAMD(i64);
+    u64v = maxInvocationsAMD(u64v);
+    f16v = maxInvocationsAMD(f16v);
+
+    i    = addInvocationsAMD(i);
+    uv   = addInvocationsAMD(uv);
+    fv   = addInvocationsAMD(fv);
+    dv   = addInvocationsAMD(dv);
+    i64  = addInvocationsAMD(i64);
+    u64v = addInvocationsAMD(u64v);
+    f16v = addInvocationsAMD(f16v);
+
+	i    = minInvocationsNonUniformAMD(i);
+    uv   = minInvocationsNonUniformAMD(uv);
+    fv   = minInvocationsNonUniformAMD(fv);
+    dv   = minInvocationsNonUniformAMD(dv);
+    i64  = minInvocationsNonUniformAMD(i64);
+    u64v = minInvocationsNonUniformAMD(u64v);
+    f16v = minInvocationsNonUniformAMD(f16v);
+
+    i    = maxInvocationsNonUniformAMD(i);
+    uv   = maxInvocationsNonUniformAMD(uv);
+    fv   = maxInvocationsNonUniformAMD(fv);
+    dv   = maxInvocationsNonUniformAMD(dv);
+    i64  = maxInvocationsNonUniformAMD(i64);
+    u64v = maxInvocationsNonUniformAMD(u64v);
+    f16v = maxInvocationsNonUniformAMD(f16v);
+
+    i    = addInvocationsNonUniformAMD(i);
+    uv   = addInvocationsNonUniformAMD(uv);
+    fv   = addInvocationsNonUniformAMD(fv);
+    dv   = addInvocationsNonUniformAMD(dv);
+    i64  = addInvocationsNonUniformAMD(i64);
+    u64v = addInvocationsNonUniformAMD(u64v);
+    f16v = addInvocationsNonUniformAMD(f16v);
+
+    i    = minInvocationsInclusiveScanAMD(i);
+    uv   = minInvocationsInclusiveScanAMD(uv);
+    fv   = minInvocationsInclusiveScanAMD(fv);
+    dv   = minInvocationsInclusiveScanAMD(dv);
+    i64  = minInvocationsInclusiveScanAMD(i64);
+    u64v = minInvocationsInclusiveScanAMD(u64v);
+    f16v = minInvocationsInclusiveScanAMD(f16v);
+
+    i    = maxInvocationsInclusiveScanAMD(i);
+    uv   = maxInvocationsInclusiveScanAMD(uv);
+    fv   = maxInvocationsInclusiveScanAMD(fv);
+    dv   = maxInvocationsInclusiveScanAMD(dv);
+    i64  = maxInvocationsInclusiveScanAMD(i64);
+    u64v = maxInvocationsInclusiveScanAMD(u64v);
+    f16v = maxInvocationsInclusiveScanAMD(f16v);
+
+    i    = addInvocationsInclusiveScanAMD(i);
+    uv   = addInvocationsInclusiveScanAMD(uv);
+    fv   = addInvocationsInclusiveScanAMD(fv);
+    dv   = addInvocationsInclusiveScanAMD(dv);
+    i64  = addInvocationsInclusiveScanAMD(i64);
+    u64v = addInvocationsInclusiveScanAMD(u64v);
+    f16v = addInvocationsInclusiveScanAMD(f16v);
+
+    i    = minInvocationsExclusiveScanAMD(i);
+    uv   = minInvocationsExclusiveScanAMD(uv);
+    fv   = minInvocationsExclusiveScanAMD(fv);
+    dv   = minInvocationsExclusiveScanAMD(dv);
+    i64  = minInvocationsExclusiveScanAMD(i64);
+    u64v = minInvocationsExclusiveScanAMD(u64v);
+    f16v = minInvocationsExclusiveScanAMD(f16v);
+
+    i    = maxInvocationsExclusiveScanAMD(i);
+    uv   = maxInvocationsExclusiveScanAMD(uv);
+    fv   = maxInvocationsExclusiveScanAMD(fv);
+    dv   = maxInvocationsExclusiveScanAMD(dv);
+    i64  = maxInvocationsExclusiveScanAMD(i64);
+    u64v = maxInvocationsExclusiveScanAMD(u64v);
+    f16v = maxInvocationsExclusiveScanAMD(f16v);
+
+    i    = addInvocationsExclusiveScanAMD(i);
+    uv   = addInvocationsExclusiveScanAMD(uv);
+    fv   = addInvocationsExclusiveScanAMD(fv);
+    dv   = addInvocationsExclusiveScanAMD(dv);
+    i64  = addInvocationsExclusiveScanAMD(i64);
+    u64v = addInvocationsExclusiveScanAMD(u64v);
+    f16v = addInvocationsExclusiveScanAMD(f16v);
+
+    i    = minInvocationsInclusiveScanNonUniformAMD(i);
+    uv   = minInvocationsInclusiveScanNonUniformAMD(uv);
+    fv   = minInvocationsInclusiveScanNonUniformAMD(fv);
+    dv   = minInvocationsInclusiveScanNonUniformAMD(dv);
+    i64  = minInvocationsInclusiveScanNonUniformAMD(i64);
+    u64v = minInvocationsInclusiveScanNonUniformAMD(u64v);
+    f16v = minInvocationsInclusiveScanNonUniformAMD(f16v);
+
+    i    = maxInvocationsInclusiveScanNonUniformAMD(i);
+    uv   = maxInvocationsInclusiveScanNonUniformAMD(uv);
+    fv   = maxInvocationsInclusiveScanNonUniformAMD(fv);
+    dv   = maxInvocationsInclusiveScanNonUniformAMD(dv);
+    i64  = maxInvocationsInclusiveScanNonUniformAMD(i64);
+    u64v = maxInvocationsInclusiveScanNonUniformAMD(u64v);
+    f16v = maxInvocationsInclusiveScanNonUniformAMD(f16v);
+
+    i    = addInvocationsInclusiveScanNonUniformAMD(i);
+    uv   = addInvocationsInclusiveScanNonUniformAMD(uv);
+    fv   = addInvocationsInclusiveScanNonUniformAMD(fv);
+    dv   = addInvocationsInclusiveScanNonUniformAMD(dv);
+    i64  = addInvocationsInclusiveScanNonUniformAMD(i64);
+    u64v = addInvocationsInclusiveScanNonUniformAMD(u64v);
+    f16v = addInvocationsInclusiveScanNonUniformAMD(f16v);
+
+    i    = minInvocationsExclusiveScanNonUniformAMD(i);
+    uv   = minInvocationsExclusiveScanNonUniformAMD(uv);
+    fv   = minInvocationsExclusiveScanNonUniformAMD(fv);
+    dv   = minInvocationsExclusiveScanNonUniformAMD(dv);
+    i64  = minInvocationsExclusiveScanNonUniformAMD(i64);
+    u64v = minInvocationsExclusiveScanNonUniformAMD(u64v);
+    f16v = minInvocationsExclusiveScanNonUniformAMD(f16v);
+
+    i    = maxInvocationsExclusiveScanNonUniformAMD(i);
+    uv   = maxInvocationsExclusiveScanNonUniformAMD(uv);
+    fv   = maxInvocationsExclusiveScanNonUniformAMD(fv);
+    dv   = maxInvocationsExclusiveScanNonUniformAMD(dv);
+    i64  = maxInvocationsExclusiveScanNonUniformAMD(i64);
+    u64v = maxInvocationsExclusiveScanNonUniformAMD(u64v);
+    f16v = maxInvocationsExclusiveScanNonUniformAMD(f16v);
+
+    i    = addInvocationsExclusiveScanNonUniformAMD(i);
+    uv   = addInvocationsExclusiveScanNonUniformAMD(uv);
+    fv   = addInvocationsExclusiveScanNonUniformAMD(fv);
+    dv   = addInvocationsExclusiveScanNonUniformAMD(dv);
+    i64  = addInvocationsExclusiveScanNonUniformAMD(i64);
+    u64v = addInvocationsExclusiveScanNonUniformAMD(u64v);
+    f16v = addInvocationsExclusiveScanNonUniformAMD(f16v);
+}
diff --git a/3rdparty/glslang/glslang/Include/Types.h b/3rdparty/glslang/glslang/Include/Types.h
index 684408a86..f5a6e8f7d 100644
--- a/3rdparty/glslang/glslang/Include/Types.h
+++ b/3rdparty/glslang/glslang/Include/Types.h
@@ -1319,9 +1319,18 @@ public:
     virtual bool isImage() const   { return basicType == EbtSampler && getSampler().isImage(); }
     virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); }
 
-    // Return true if this is interstage IO
-    virtual bool isBuiltInInterstageIO() const
+    virtual bool isBuiltInInterstageIO(EShLanguage language) const
     {
+        return isPerVertexAndBuiltIn(language) || isLooseAndBuiltIn(language);
+    }
+
+    // Return true if this is an interstage IO builtin
+    virtual bool isPerVertexAndBuiltIn(EShLanguage language) const
+    {
+        if (language == EShLangFragment)
+            return false;
+
+        // Any non-fragment stage
         switch (getQualifier().builtIn) {
         case EbvPosition:
         case EbvPointSize:
@@ -1333,6 +1342,15 @@ public:
         }
     }
 
+    // Return true if this is a loose builtin
+    virtual bool isLooseAndBuiltIn(EShLanguage language) const
+    {
+        if (getQualifier().builtIn == EbvNone)
+            return false;
+
+        return !isPerVertexAndBuiltIn(language);
+    }
+    
     // Recursively checks if the type contains the given basic type
     virtual bool containsBasicType(TBasicType checkType) const
     {
@@ -1401,33 +1419,20 @@ public:
     }
 
     // Recursively checks if the type contains an interstage IO builtin
-    virtual bool containsBuiltInInterstageIO() const
+    virtual bool containsBuiltInInterstageIO(EShLanguage language) const
     {
-        if (isBuiltInInterstageIO())
+        if (isBuiltInInterstageIO(language))
             return true;
 
         if (! structure)
             return false;
         for (unsigned int i = 0; i < structure->size(); ++i) {
-            if ((*structure)[i].type->containsBuiltInInterstageIO())
+            if ((*structure)[i].type->containsBuiltInInterstageIO(language))
                 return true;
         }
         return false;
     }
 
-    // Recursively checks whether a struct contains only interstage IO
-    virtual bool containsOnlyBuiltInInterstageIO() const
-    {
-        if (! structure)
-            return isBuiltInInterstageIO();
-
-        for (unsigned int i = 0; i < structure->size(); ++i) {
-            if (!(*structure)[i].type->containsOnlyBuiltInInterstageIO())
-                return false;
-        }
-        return true;
-    }
-
     virtual bool containsNonOpaque() const
     {
         // list all non-opaque types
diff --git a/3rdparty/glslang/glslang/Include/intermediate.h b/3rdparty/glslang/glslang/Include/intermediate.h
index 78c187eb6..dc87ba905 100644
--- a/3rdparty/glslang/glslang/Include/intermediate.h
+++ b/3rdparty/glslang/glslang/Include/intermediate.h
@@ -335,6 +335,18 @@ enum TOperator {
     EOpMinInvocationsNonUniform,
     EOpMaxInvocationsNonUniform,
     EOpAddInvocationsNonUniform,
+    EOpMinInvocationsInclusiveScan,
+    EOpMaxInvocationsInclusiveScan,
+    EOpAddInvocationsInclusiveScan,
+    EOpMinInvocationsInclusiveScanNonUniform,
+    EOpMaxInvocationsInclusiveScanNonUniform,
+    EOpAddInvocationsInclusiveScanNonUniform,
+    EOpMinInvocationsExclusiveScan,
+    EOpMaxInvocationsExclusiveScan,
+    EOpAddInvocationsExclusiveScan,
+    EOpMinInvocationsExclusiveScanNonUniform,
+    EOpMaxInvocationsExclusiveScanNonUniform,
+    EOpAddInvocationsExclusiveScanNonUniform,
     EOpSwizzleInvocations,
     EOpSwizzleInvocationsMasked,
     EOpWriteInvocation,
@@ -626,6 +638,9 @@ enum TOperator {
     // geometry methods
     EOpMethodAppend,                     // Geometry shader methods
     EOpMethodRestartStrip,               // ...
+
+    // matrix
+    EOpMatrixSwizzle,                    // select multiple matrix components (non-column)
 };
 
 class TIntermTraverser;
diff --git a/3rdparty/glslang/glslang/Include/revision.h b/3rdparty/glslang/glslang/Include/revision.h
index 047e4ed64..5ef393d39 100644
--- a/3rdparty/glslang/glslang/Include/revision.h
+++ b/3rdparty/glslang/glslang/Include/revision.h
@@ -2,5 +2,5 @@
 // For the version, it uses the latest git tag followed by the number of commits.
 // For the date, it uses the current date (when then script is run).
 
-#define GLSLANG_REVISION "Overload400-PrecQual.1760"
-#define GLSLANG_DATE "11-Jan-2017"
+#define GLSLANG_REVISION "Overload400-PrecQual.1773"
+#define GLSLANG_DATE "19-Jan-2017"
diff --git a/3rdparty/glslang/glslang/MachineIndependent/Constant.cpp b/3rdparty/glslang/glslang/MachineIndependent/Constant.cpp
index c709a7859..fff8fd26d 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/Constant.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Constant.cpp
@@ -974,20 +974,20 @@ TIntermTyped* TIntermediate::foldDereference(TIntermTyped* node, int index, cons
 // Make a constant vector node or constant scalar node, representing a given
 // constant vector and constant swizzle into it.
 //
-TIntermTyped* TIntermediate::foldSwizzle(TIntermTyped* node, TVectorFields& fields, const TSourceLoc& loc)
+TIntermTyped* TIntermediate::foldSwizzle(TIntermTyped* node, TSwizzleSelectors<TVectorSelector>& selectors, const TSourceLoc& loc)
 {
     const TConstUnionArray& unionArray = node->getAsConstantUnion()->getConstArray();
-    TConstUnionArray constArray(fields.num);
+    TConstUnionArray constArray(selectors.size());
 
-    for (int i = 0; i < fields.num; i++)
-        constArray[i] = unionArray[fields.offsets[i]];
+    for (int i = 0; i < selectors.size(); i++)
+        constArray[i] = unionArray[selectors[i]];
 
     TIntermTyped* result = addConstantUnion(constArray, node->getType(), loc);
 
     if (result == 0)
         result = node;
     else
-        result->setType(TType(node->getBasicType(), EvqConst, fields.num));
+        result->setType(TType(node->getBasicType(), EvqConst, selectors.size()));
 
     return result;
 }
diff --git a/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp b/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp
index 2ed238120..6ea7b26b2 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp
@@ -1587,6 +1587,96 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "uvec3 minInvocationsAMD(uvec3);"
             "uvec4 minInvocationsAMD(uvec4);"
 
+            "double minInvocationsAMD(double);"
+            "dvec2  minInvocationsAMD(dvec2);"
+            "dvec3  minInvocationsAMD(dvec3);"
+            "dvec4  minInvocationsAMD(dvec4);"
+
+            "int64_t minInvocationsAMD(int64_t);"
+            "i64vec2 minInvocationsAMD(i64vec2);"
+            "i64vec3 minInvocationsAMD(i64vec3);"
+            "i64vec4 minInvocationsAMD(i64vec4);"
+
+            "uint64_t minInvocationsAMD(uint64_t);"
+            "u64vec2  minInvocationsAMD(u64vec2);"
+            "u64vec3  minInvocationsAMD(u64vec3);"
+            "u64vec4  minInvocationsAMD(u64vec4);"
+
+            "float16_t minInvocationsAMD(float16_t);"
+            "f16vec2   minInvocationsAMD(f16vec2);"
+            "f16vec3   minInvocationsAMD(f16vec3);"
+            "f16vec4   minInvocationsAMD(f16vec4);"
+
+            "float minInvocationsInclusiveScanAMD(float);"
+            "vec2  minInvocationsInclusiveScanAMD(vec2);"
+            "vec3  minInvocationsInclusiveScanAMD(vec3);"
+            "vec4  minInvocationsInclusiveScanAMD(vec4);"
+
+            "int   minInvocationsInclusiveScanAMD(int);"
+            "ivec2 minInvocationsInclusiveScanAMD(ivec2);"
+            "ivec3 minInvocationsInclusiveScanAMD(ivec3);"
+            "ivec4 minInvocationsInclusiveScanAMD(ivec4);"
+
+            "uint  minInvocationsInclusiveScanAMD(uint);"
+            "uvec2 minInvocationsInclusiveScanAMD(uvec2);"
+            "uvec3 minInvocationsInclusiveScanAMD(uvec3);"
+            "uvec4 minInvocationsInclusiveScanAMD(uvec4);"
+
+            "double minInvocationsInclusiveScanAMD(double);"
+            "dvec2  minInvocationsInclusiveScanAMD(dvec2);"
+            "dvec3  minInvocationsInclusiveScanAMD(dvec3);"
+            "dvec4  minInvocationsInclusiveScanAMD(dvec4);"
+
+            "int64_t minInvocationsInclusiveScanAMD(int64_t);"
+            "i64vec2 minInvocationsInclusiveScanAMD(i64vec2);"
+            "i64vec3 minInvocationsInclusiveScanAMD(i64vec3);"
+            "i64vec4 minInvocationsInclusiveScanAMD(i64vec4);"
+
+            "uint64_t minInvocationsInclusiveScanAMD(uint64_t);"
+            "u64vec2  minInvocationsInclusiveScanAMD(u64vec2);"
+            "u64vec3  minInvocationsInclusiveScanAMD(u64vec3);"
+            "u64vec4  minInvocationsInclusiveScanAMD(u64vec4);"
+
+            "float16_t minInvocationsInclusiveScanAMD(float16_t);"
+            "f16vec2   minInvocationsInclusiveScanAMD(f16vec2);"
+            "f16vec3   minInvocationsInclusiveScanAMD(f16vec3);"
+            "f16vec4   minInvocationsInclusiveScanAMD(f16vec4);"
+
+            "float minInvocationsExclusiveScanAMD(float);"
+            "vec2  minInvocationsExclusiveScanAMD(vec2);"
+            "vec3  minInvocationsExclusiveScanAMD(vec3);"
+            "vec4  minInvocationsExclusiveScanAMD(vec4);"
+
+            "int   minInvocationsExclusiveScanAMD(int);"
+            "ivec2 minInvocationsExclusiveScanAMD(ivec2);"
+            "ivec3 minInvocationsExclusiveScanAMD(ivec3);"
+            "ivec4 minInvocationsExclusiveScanAMD(ivec4);"
+
+            "uint  minInvocationsExclusiveScanAMD(uint);"
+            "uvec2 minInvocationsExclusiveScanAMD(uvec2);"
+            "uvec3 minInvocationsExclusiveScanAMD(uvec3);"
+            "uvec4 minInvocationsExclusiveScanAMD(uvec4);"
+
+            "double minInvocationsExclusiveScanAMD(double);"
+            "dvec2  minInvocationsExclusiveScanAMD(dvec2);"
+            "dvec3  minInvocationsExclusiveScanAMD(dvec3);"
+            "dvec4  minInvocationsExclusiveScanAMD(dvec4);"
+
+            "int64_t minInvocationsExclusiveScanAMD(int64_t);"
+            "i64vec2 minInvocationsExclusiveScanAMD(i64vec2);"
+            "i64vec3 minInvocationsExclusiveScanAMD(i64vec3);"
+            "i64vec4 minInvocationsExclusiveScanAMD(i64vec4);"
+
+            "uint64_t minInvocationsExclusiveScanAMD(uint64_t);"
+            "u64vec2  minInvocationsExclusiveScanAMD(u64vec2);"
+            "u64vec3  minInvocationsExclusiveScanAMD(u64vec3);"
+            "u64vec4  minInvocationsExclusiveScanAMD(u64vec4);"
+
+            "float16_t minInvocationsExclusiveScanAMD(float16_t);"
+            "f16vec2   minInvocationsExclusiveScanAMD(f16vec2);"
+            "f16vec3   minInvocationsExclusiveScanAMD(f16vec3);"
+            "f16vec4   minInvocationsExclusiveScanAMD(f16vec4);"
+
             "float maxInvocationsAMD(float);"
             "vec2  maxInvocationsAMD(vec2);"
             "vec3  maxInvocationsAMD(vec3);"
@@ -1602,6 +1692,96 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "uvec3 maxInvocationsAMD(uvec3);"
             "uvec4 maxInvocationsAMD(uvec4);"
 
+            "double maxInvocationsAMD(double);"
+            "dvec2  maxInvocationsAMD(dvec2);"
+            "dvec3  maxInvocationsAMD(dvec3);"
+            "dvec4  maxInvocationsAMD(dvec4);"
+
+            "int64_t maxInvocationsAMD(int64_t);"
+            "i64vec2 maxInvocationsAMD(i64vec2);"
+            "i64vec3 maxInvocationsAMD(i64vec3);"
+            "i64vec4 maxInvocationsAMD(i64vec4);"
+
+            "uint64_t maxInvocationsAMD(uint64_t);"
+            "u64vec2  maxInvocationsAMD(u64vec2);"
+            "u64vec3  maxInvocationsAMD(u64vec3);"
+            "u64vec4  maxInvocationsAMD(u64vec4);"
+
+            "float16_t maxInvocationsAMD(float16_t);"
+            "f16vec2   maxInvocationsAMD(f16vec2);"
+            "f16vec3   maxInvocationsAMD(f16vec3);"
+            "f16vec4   maxInvocationsAMD(f16vec4);"
+
+            "float maxInvocationsInclusiveScanAMD(float);"
+            "vec2  maxInvocationsInclusiveScanAMD(vec2);"
+            "vec3  maxInvocationsInclusiveScanAMD(vec3);"
+            "vec4  maxInvocationsInclusiveScanAMD(vec4);"
+
+            "int   maxInvocationsInclusiveScanAMD(int);"
+            "ivec2 maxInvocationsInclusiveScanAMD(ivec2);"
+            "ivec3 maxInvocationsInclusiveScanAMD(ivec3);"
+            "ivec4 maxInvocationsInclusiveScanAMD(ivec4);"
+
+            "uint  maxInvocationsInclusiveScanAMD(uint);"
+            "uvec2 maxInvocationsInclusiveScanAMD(uvec2);"
+            "uvec3 maxInvocationsInclusiveScanAMD(uvec3);"
+            "uvec4 maxInvocationsInclusiveScanAMD(uvec4);"
+
+            "double maxInvocationsInclusiveScanAMD(double);"
+            "dvec2  maxInvocationsInclusiveScanAMD(dvec2);"
+            "dvec3  maxInvocationsInclusiveScanAMD(dvec3);"
+            "dvec4  maxInvocationsInclusiveScanAMD(dvec4);"
+
+            "int64_t maxInvocationsInclusiveScanAMD(int64_t);"
+            "i64vec2 maxInvocationsInclusiveScanAMD(i64vec2);"
+            "i64vec3 maxInvocationsInclusiveScanAMD(i64vec3);"
+            "i64vec4 maxInvocationsInclusiveScanAMD(i64vec4);"
+
+            "uint64_t maxInvocationsInclusiveScanAMD(uint64_t);"
+            "u64vec2  maxInvocationsInclusiveScanAMD(u64vec2);"
+            "u64vec3  maxInvocationsInclusiveScanAMD(u64vec3);"
+            "u64vec4  maxInvocationsInclusiveScanAMD(u64vec4);"
+
+            "float16_t maxInvocationsInclusiveScanAMD(float16_t);"
+            "f16vec2   maxInvocationsInclusiveScanAMD(f16vec2);"
+            "f16vec3   maxInvocationsInclusiveScanAMD(f16vec3);"
+            "f16vec4   maxInvocationsInclusiveScanAMD(f16vec4);"
+
+            "float maxInvocationsExclusiveScanAMD(float);"
+            "vec2  maxInvocationsExclusiveScanAMD(vec2);"
+            "vec3  maxInvocationsExclusiveScanAMD(vec3);"
+            "vec4  maxInvocationsExclusiveScanAMD(vec4);"
+
+            "int   maxInvocationsExclusiveScanAMD(int);"
+            "ivec2 maxInvocationsExclusiveScanAMD(ivec2);"
+            "ivec3 maxInvocationsExclusiveScanAMD(ivec3);"
+            "ivec4 maxInvocationsExclusiveScanAMD(ivec4);"
+
+            "uint  maxInvocationsExclusiveScanAMD(uint);"
+            "uvec2 maxInvocationsExclusiveScanAMD(uvec2);"
+            "uvec3 maxInvocationsExclusiveScanAMD(uvec3);"
+            "uvec4 maxInvocationsExclusiveScanAMD(uvec4);"
+
+            "double maxInvocationsExclusiveScanAMD(double);"
+            "dvec2  maxInvocationsExclusiveScanAMD(dvec2);"
+            "dvec3  maxInvocationsExclusiveScanAMD(dvec3);"
+            "dvec4  maxInvocationsExclusiveScanAMD(dvec4);"
+
+            "int64_t maxInvocationsExclusiveScanAMD(int64_t);"
+            "i64vec2 maxInvocationsExclusiveScanAMD(i64vec2);"
+            "i64vec3 maxInvocationsExclusiveScanAMD(i64vec3);"
+            "i64vec4 maxInvocationsExclusiveScanAMD(i64vec4);"
+
+            "uint64_t maxInvocationsExclusiveScanAMD(uint64_t);"
+            "u64vec2  maxInvocationsExclusiveScanAMD(u64vec2);"
+            "u64vec3  maxInvocationsExclusiveScanAMD(u64vec3);"
+            "u64vec4  maxInvocationsExclusiveScanAMD(u64vec4);"
+
+            "float16_t maxInvocationsExclusiveScanAMD(float16_t);"
+            "f16vec2   maxInvocationsExclusiveScanAMD(f16vec2);"
+            "f16vec3   maxInvocationsExclusiveScanAMD(f16vec3);"
+            "f16vec4   maxInvocationsExclusiveScanAMD(f16vec4);"
+
             "float addInvocationsAMD(float);"
             "vec2  addInvocationsAMD(vec2);"
             "vec3  addInvocationsAMD(vec3);"
@@ -1617,6 +1797,96 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "uvec3 addInvocationsAMD(uvec3);"
             "uvec4 addInvocationsAMD(uvec4);"
 
+            "double  addInvocationsAMD(double);"
+            "dvec2   addInvocationsAMD(dvec2);"
+            "dvec3   addInvocationsAMD(dvec3);"
+            "dvec4   addInvocationsAMD(dvec4);"
+
+            "int64_t addInvocationsAMD(int64_t);"
+            "i64vec2 addInvocationsAMD(i64vec2);"
+            "i64vec3 addInvocationsAMD(i64vec3);"
+            "i64vec4 addInvocationsAMD(i64vec4);"
+
+            "uint64_t addInvocationsAMD(uint64_t);"
+            "u64vec2  addInvocationsAMD(u64vec2);"
+            "u64vec3  addInvocationsAMD(u64vec3);"
+            "u64vec4  addInvocationsAMD(u64vec4);"
+
+            "float16_t addInvocationsAMD(float16_t);"
+            "f16vec2   addInvocationsAMD(f16vec2);"
+            "f16vec3   addInvocationsAMD(f16vec3);"
+            "f16vec4   addInvocationsAMD(f16vec4);"
+
+            "float addInvocationsInclusiveScanAMD(float);"
+            "vec2  addInvocationsInclusiveScanAMD(vec2);"
+            "vec3  addInvocationsInclusiveScanAMD(vec3);"
+            "vec4  addInvocationsInclusiveScanAMD(vec4);"
+
+            "int   addInvocationsInclusiveScanAMD(int);"
+            "ivec2 addInvocationsInclusiveScanAMD(ivec2);"
+            "ivec3 addInvocationsInclusiveScanAMD(ivec3);"
+            "ivec4 addInvocationsInclusiveScanAMD(ivec4);"
+
+            "uint  addInvocationsInclusiveScanAMD(uint);"
+            "uvec2 addInvocationsInclusiveScanAMD(uvec2);"
+            "uvec3 addInvocationsInclusiveScanAMD(uvec3);"
+            "uvec4 addInvocationsInclusiveScanAMD(uvec4);"
+
+            "double  addInvocationsInclusiveScanAMD(double);"
+            "dvec2   addInvocationsInclusiveScanAMD(dvec2);"
+            "dvec3   addInvocationsInclusiveScanAMD(dvec3);"
+            "dvec4   addInvocationsInclusiveScanAMD(dvec4);"
+
+            "int64_t addInvocationsInclusiveScanAMD(int64_t);"
+            "i64vec2 addInvocationsInclusiveScanAMD(i64vec2);"
+            "i64vec3 addInvocationsInclusiveScanAMD(i64vec3);"
+            "i64vec4 addInvocationsInclusiveScanAMD(i64vec4);"
+
+            "uint64_t addInvocationsInclusiveScanAMD(uint64_t);"
+            "u64vec2  addInvocationsInclusiveScanAMD(u64vec2);"
+            "u64vec3  addInvocationsInclusiveScanAMD(u64vec3);"
+            "u64vec4  addInvocationsInclusiveScanAMD(u64vec4);"
+
+            "float16_t addInvocationsInclusiveScanAMD(float16_t);"
+            "f16vec2   addInvocationsInclusiveScanAMD(f16vec2);"
+            "f16vec3   addInvocationsInclusiveScanAMD(f16vec3);"
+            "f16vec4   addInvocationsInclusiveScanAMD(f16vec4);"
+
+            "float addInvocationsExclusiveScanAMD(float);"
+            "vec2  addInvocationsExclusiveScanAMD(vec2);"
+            "vec3  addInvocationsExclusiveScanAMD(vec3);"
+            "vec4  addInvocationsExclusiveScanAMD(vec4);"
+
+            "int   addInvocationsExclusiveScanAMD(int);"
+            "ivec2 addInvocationsExclusiveScanAMD(ivec2);"
+            "ivec3 addInvocationsExclusiveScanAMD(ivec3);"
+            "ivec4 addInvocationsExclusiveScanAMD(ivec4);"
+
+            "uint  addInvocationsExclusiveScanAMD(uint);"
+            "uvec2 addInvocationsExclusiveScanAMD(uvec2);"
+            "uvec3 addInvocationsExclusiveScanAMD(uvec3);"
+            "uvec4 addInvocationsExclusiveScanAMD(uvec4);"
+
+            "double  addInvocationsExclusiveScanAMD(double);"
+            "dvec2   addInvocationsExclusiveScanAMD(dvec2);"
+            "dvec3   addInvocationsExclusiveScanAMD(dvec3);"
+            "dvec4   addInvocationsExclusiveScanAMD(dvec4);"
+
+            "int64_t addInvocationsExclusiveScanAMD(int64_t);"
+            "i64vec2 addInvocationsExclusiveScanAMD(i64vec2);"
+            "i64vec3 addInvocationsExclusiveScanAMD(i64vec3);"
+            "i64vec4 addInvocationsExclusiveScanAMD(i64vec4);"
+
+            "uint64_t addInvocationsExclusiveScanAMD(uint64_t);"
+            "u64vec2  addInvocationsExclusiveScanAMD(u64vec2);"
+            "u64vec3  addInvocationsExclusiveScanAMD(u64vec3);"
+            "u64vec4  addInvocationsExclusiveScanAMD(u64vec4);"
+
+            "float16_t addInvocationsExclusiveScanAMD(float16_t);"
+            "f16vec2   addInvocationsExclusiveScanAMD(f16vec2);"
+            "f16vec3   addInvocationsExclusiveScanAMD(f16vec3);"
+            "f16vec4   addInvocationsExclusiveScanAMD(f16vec4);"
+
             "float minInvocationsNonUniformAMD(float);"
             "vec2  minInvocationsNonUniformAMD(vec2);"
             "vec3  minInvocationsNonUniformAMD(vec3);"
@@ -1632,6 +1902,96 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "uvec3 minInvocationsNonUniformAMD(uvec3);"
             "uvec4 minInvocationsNonUniformAMD(uvec4);"
 
+            "double minInvocationsNonUniformAMD(double);"
+            "dvec2  minInvocationsNonUniformAMD(dvec2);"
+            "dvec3  minInvocationsNonUniformAMD(dvec3);"
+            "dvec4  minInvocationsNonUniformAMD(dvec4);"
+
+            "int64_t minInvocationsNonUniformAMD(int64_t);"
+            "i64vec2 minInvocationsNonUniformAMD(i64vec2);"
+            "i64vec3 minInvocationsNonUniformAMD(i64vec3);"
+            "i64vec4 minInvocationsNonUniformAMD(i64vec4);"
+
+            "uint64_t minInvocationsNonUniformAMD(uint64_t);"
+            "u64vec2  minInvocationsNonUniformAMD(u64vec2);"
+            "u64vec3  minInvocationsNonUniformAMD(u64vec3);"
+            "u64vec4  minInvocationsNonUniformAMD(u64vec4);"
+
+            "float16_t minInvocationsNonUniformAMD(float16_t);"
+            "f16vec2   minInvocationsNonUniformAMD(f16vec2);"
+            "f16vec3   minInvocationsNonUniformAMD(f16vec3);"
+            "f16vec4   minInvocationsNonUniformAMD(f16vec4);"
+
+            "float minInvocationsInclusiveScanNonUniformAMD(float);"
+            "vec2  minInvocationsInclusiveScanNonUniformAMD(vec2);"
+            "vec3  minInvocationsInclusiveScanNonUniformAMD(vec3);"
+            "vec4  minInvocationsInclusiveScanNonUniformAMD(vec4);"
+
+            "int   minInvocationsInclusiveScanNonUniformAMD(int);"
+            "ivec2 minInvocationsInclusiveScanNonUniformAMD(ivec2);"
+            "ivec3 minInvocationsInclusiveScanNonUniformAMD(ivec3);"
+            "ivec4 minInvocationsInclusiveScanNonUniformAMD(ivec4);"
+
+            "uint  minInvocationsInclusiveScanNonUniformAMD(uint);"
+            "uvec2 minInvocationsInclusiveScanNonUniformAMD(uvec2);"
+            "uvec3 minInvocationsInclusiveScanNonUniformAMD(uvec3);"
+            "uvec4 minInvocationsInclusiveScanNonUniformAMD(uvec4);"
+
+            "double minInvocationsInclusiveScanNonUniformAMD(double);"
+            "dvec2  minInvocationsInclusiveScanNonUniformAMD(dvec2);"
+            "dvec3  minInvocationsInclusiveScanNonUniformAMD(dvec3);"
+            "dvec4  minInvocationsInclusiveScanNonUniformAMD(dvec4);"
+
+            "int64_t minInvocationsInclusiveScanNonUniformAMD(int64_t);"
+            "i64vec2 minInvocationsInclusiveScanNonUniformAMD(i64vec2);"
+            "i64vec3 minInvocationsInclusiveScanNonUniformAMD(i64vec3);"
+            "i64vec4 minInvocationsInclusiveScanNonUniformAMD(i64vec4);"
+
+            "uint64_t minInvocationsInclusiveScanNonUniformAMD(uint64_t);"
+            "u64vec2  minInvocationsInclusiveScanNonUniformAMD(u64vec2);"
+            "u64vec3  minInvocationsInclusiveScanNonUniformAMD(u64vec3);"
+            "u64vec4  minInvocationsInclusiveScanNonUniformAMD(u64vec4);"
+
+            "float16_t minInvocationsInclusiveScanNonUniformAMD(float16_t);"
+            "f16vec2   minInvocationsInclusiveScanNonUniformAMD(f16vec2);"
+            "f16vec3   minInvocationsInclusiveScanNonUniformAMD(f16vec3);"
+            "f16vec4   minInvocationsInclusiveScanNonUniformAMD(f16vec4);"
+
+            "float minInvocationsExclusiveScanNonUniformAMD(float);"
+            "vec2  minInvocationsExclusiveScanNonUniformAMD(vec2);"
+            "vec3  minInvocationsExclusiveScanNonUniformAMD(vec3);"
+            "vec4  minInvocationsExclusiveScanNonUniformAMD(vec4);"
+
+            "int   minInvocationsExclusiveScanNonUniformAMD(int);"
+            "ivec2 minInvocationsExclusiveScanNonUniformAMD(ivec2);"
+            "ivec3 minInvocationsExclusiveScanNonUniformAMD(ivec3);"
+            "ivec4 minInvocationsExclusiveScanNonUniformAMD(ivec4);"
+
+            "uint  minInvocationsExclusiveScanNonUniformAMD(uint);"
+            "uvec2 minInvocationsExclusiveScanNonUniformAMD(uvec2);"
+            "uvec3 minInvocationsExclusiveScanNonUniformAMD(uvec3);"
+            "uvec4 minInvocationsExclusiveScanNonUniformAMD(uvec4);"
+
+            "double minInvocationsExclusiveScanNonUniformAMD(double);"
+            "dvec2  minInvocationsExclusiveScanNonUniformAMD(dvec2);"
+            "dvec3  minInvocationsExclusiveScanNonUniformAMD(dvec3);"
+            "dvec4  minInvocationsExclusiveScanNonUniformAMD(dvec4);"
+
+            "int64_t minInvocationsExclusiveScanNonUniformAMD(int64_t);"
+            "i64vec2 minInvocationsExclusiveScanNonUniformAMD(i64vec2);"
+            "i64vec3 minInvocationsExclusiveScanNonUniformAMD(i64vec3);"
+            "i64vec4 minInvocationsExclusiveScanNonUniformAMD(i64vec4);"
+
+            "uint64_t minInvocationsExclusiveScanNonUniformAMD(uint64_t);"
+            "u64vec2  minInvocationsExclusiveScanNonUniformAMD(u64vec2);"
+            "u64vec3  minInvocationsExclusiveScanNonUniformAMD(u64vec3);"
+            "u64vec4  minInvocationsExclusiveScanNonUniformAMD(u64vec4);"
+
+            "float16_t minInvocationsExclusiveScanNonUniformAMD(float16_t);"
+            "f16vec2   minInvocationsExclusiveScanNonUniformAMD(f16vec2);"
+            "f16vec3   minInvocationsExclusiveScanNonUniformAMD(f16vec3);"
+            "f16vec4   minInvocationsExclusiveScanNonUniformAMD(f16vec4);"
+
             "float maxInvocationsNonUniformAMD(float);"
             "vec2  maxInvocationsNonUniformAMD(vec2);"
             "vec3  maxInvocationsNonUniformAMD(vec3);"
@@ -1647,6 +2007,96 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "uvec3 maxInvocationsNonUniformAMD(uvec3);"
             "uvec4 maxInvocationsNonUniformAMD(uvec4);"
 
+            "double maxInvocationsNonUniformAMD(double);"
+            "dvec2  maxInvocationsNonUniformAMD(dvec2);"
+            "dvec3  maxInvocationsNonUniformAMD(dvec3);"
+            "dvec4  maxInvocationsNonUniformAMD(dvec4);"
+
+            "int64_t maxInvocationsNonUniformAMD(int64_t);"
+            "i64vec2 maxInvocationsNonUniformAMD(i64vec2);"
+            "i64vec3 maxInvocationsNonUniformAMD(i64vec3);"
+            "i64vec4 maxInvocationsNonUniformAMD(i64vec4);"
+
+            "uint64_t maxInvocationsNonUniformAMD(uint64_t);"
+            "u64vec2  maxInvocationsNonUniformAMD(u64vec2);"
+            "u64vec3  maxInvocationsNonUniformAMD(u64vec3);"
+            "u64vec4  maxInvocationsNonUniformAMD(u64vec4);"
+
+            "float16_t maxInvocationsNonUniformAMD(float16_t);"
+            "f16vec2   maxInvocationsNonUniformAMD(f16vec2);"
+            "f16vec3   maxInvocationsNonUniformAMD(f16vec3);"
+            "f16vec4   maxInvocationsNonUniformAMD(f16vec4);"
+
+            "float maxInvocationsInclusiveScanNonUniformAMD(float);"
+            "vec2  maxInvocationsInclusiveScanNonUniformAMD(vec2);"
+            "vec3  maxInvocationsInclusiveScanNonUniformAMD(vec3);"
+            "vec4  maxInvocationsInclusiveScanNonUniformAMD(vec4);"
+
+            "int   maxInvocationsInclusiveScanNonUniformAMD(int);"
+            "ivec2 maxInvocationsInclusiveScanNonUniformAMD(ivec2);"
+            "ivec3 maxInvocationsInclusiveScanNonUniformAMD(ivec3);"
+            "ivec4 maxInvocationsInclusiveScanNonUniformAMD(ivec4);"
+
+            "uint  maxInvocationsInclusiveScanNonUniformAMD(uint);"
+            "uvec2 maxInvocationsInclusiveScanNonUniformAMD(uvec2);"
+            "uvec3 maxInvocationsInclusiveScanNonUniformAMD(uvec3);"
+            "uvec4 maxInvocationsInclusiveScanNonUniformAMD(uvec4);"
+
+            "double maxInvocationsInclusiveScanNonUniformAMD(double);"
+            "dvec2  maxInvocationsInclusiveScanNonUniformAMD(dvec2);"
+            "dvec3  maxInvocationsInclusiveScanNonUniformAMD(dvec3);"
+            "dvec4  maxInvocationsInclusiveScanNonUniformAMD(dvec4);"
+
+            "int64_t maxInvocationsInclusiveScanNonUniformAMD(int64_t);"
+            "i64vec2 maxInvocationsInclusiveScanNonUniformAMD(i64vec2);"
+            "i64vec3 maxInvocationsInclusiveScanNonUniformAMD(i64vec3);"
+            "i64vec4 maxInvocationsInclusiveScanNonUniformAMD(i64vec4);"
+
+            "uint64_t maxInvocationsInclusiveScanNonUniformAMD(uint64_t);"
+            "u64vec2  maxInvocationsInclusiveScanNonUniformAMD(u64vec2);"
+            "u64vec3  maxInvocationsInclusiveScanNonUniformAMD(u64vec3);"
+            "u64vec4  maxInvocationsInclusiveScanNonUniformAMD(u64vec4);"
+
+            "float16_t maxInvocationsInclusiveScanNonUniformAMD(float16_t);"
+            "f16vec2   maxInvocationsInclusiveScanNonUniformAMD(f16vec2);"
+            "f16vec3   maxInvocationsInclusiveScanNonUniformAMD(f16vec3);"
+            "f16vec4   maxInvocationsInclusiveScanNonUniformAMD(f16vec4);"
+
+            "float maxInvocationsExclusiveScanNonUniformAMD(float);"
+            "vec2  maxInvocationsExclusiveScanNonUniformAMD(vec2);"
+            "vec3  maxInvocationsExclusiveScanNonUniformAMD(vec3);"
+            "vec4  maxInvocationsExclusiveScanNonUniformAMD(vec4);"
+
+            "int   maxInvocationsExclusiveScanNonUniformAMD(int);"
+            "ivec2 maxInvocationsExclusiveScanNonUniformAMD(ivec2);"
+            "ivec3 maxInvocationsExclusiveScanNonUniformAMD(ivec3);"
+            "ivec4 maxInvocationsExclusiveScanNonUniformAMD(ivec4);"
+
+            "uint  maxInvocationsExclusiveScanNonUniformAMD(uint);"
+            "uvec2 maxInvocationsExclusiveScanNonUniformAMD(uvec2);"
+            "uvec3 maxInvocationsExclusiveScanNonUniformAMD(uvec3);"
+            "uvec4 maxInvocationsExclusiveScanNonUniformAMD(uvec4);"
+
+            "double maxInvocationsExclusiveScanNonUniformAMD(double);"
+            "dvec2  maxInvocationsExclusiveScanNonUniformAMD(dvec2);"
+            "dvec3  maxInvocationsExclusiveScanNonUniformAMD(dvec3);"
+            "dvec4  maxInvocationsExclusiveScanNonUniformAMD(dvec4);"
+
+            "int64_t maxInvocationsExclusiveScanNonUniformAMD(int64_t);"
+            "i64vec2 maxInvocationsExclusiveScanNonUniformAMD(i64vec2);"
+            "i64vec3 maxInvocationsExclusiveScanNonUniformAMD(i64vec3);"
+            "i64vec4 maxInvocationsExclusiveScanNonUniformAMD(i64vec4);"
+
+            "uint64_t maxInvocationsExclusiveScanNonUniformAMD(uint64_t);"
+            "u64vec2  maxInvocationsExclusiveScanNonUniformAMD(u64vec2);"
+            "u64vec3  maxInvocationsExclusiveScanNonUniformAMD(u64vec3);"
+            "u64vec4  maxInvocationsExclusiveScanNonUniformAMD(u64vec4);"
+
+            "float16_t maxInvocationsExclusiveScanNonUniformAMD(float16_t);"
+            "f16vec2   maxInvocationsExclusiveScanNonUniformAMD(f16vec2);"
+            "f16vec3   maxInvocationsExclusiveScanNonUniformAMD(f16vec3);"
+            "f16vec4   maxInvocationsExclusiveScanNonUniformAMD(f16vec4);"
+
             "float addInvocationsNonUniformAMD(float);"
             "vec2  addInvocationsNonUniformAMD(vec2);"
             "vec3  addInvocationsNonUniformAMD(vec3);"
@@ -1662,6 +2112,96 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "uvec3 addInvocationsNonUniformAMD(uvec3);"
             "uvec4 addInvocationsNonUniformAMD(uvec4);"
 
+            "double addInvocationsNonUniformAMD(double);"
+            "dvec2  addInvocationsNonUniformAMD(dvec2);"
+            "dvec3  addInvocationsNonUniformAMD(dvec3);"
+            "dvec4  addInvocationsNonUniformAMD(dvec4);"
+
+            "int64_t addInvocationsNonUniformAMD(int64_t);"
+            "i64vec2 addInvocationsNonUniformAMD(i64vec2);"
+            "i64vec3 addInvocationsNonUniformAMD(i64vec3);"
+            "i64vec4 addInvocationsNonUniformAMD(i64vec4);"
+
+            "uint64_t addInvocationsNonUniformAMD(uint64_t);"
+            "u64vec2  addInvocationsNonUniformAMD(u64vec2);"
+            "u64vec3  addInvocationsNonUniformAMD(u64vec3);"
+            "u64vec4  addInvocationsNonUniformAMD(u64vec4);"
+
+            "float16_t addInvocationsNonUniformAMD(float16_t);"
+            "f16vec2   addInvocationsNonUniformAMD(f16vec2);"
+            "f16vec3   addInvocationsNonUniformAMD(f16vec3);"
+            "f16vec4   addInvocationsNonUniformAMD(f16vec4);"
+
+            "float addInvocationsInclusiveScanNonUniformAMD(float);"
+            "vec2  addInvocationsInclusiveScanNonUniformAMD(vec2);"
+            "vec3  addInvocationsInclusiveScanNonUniformAMD(vec3);"
+            "vec4  addInvocationsInclusiveScanNonUniformAMD(vec4);"
+
+            "int   addInvocationsInclusiveScanNonUniformAMD(int);"
+            "ivec2 addInvocationsInclusiveScanNonUniformAMD(ivec2);"
+            "ivec3 addInvocationsInclusiveScanNonUniformAMD(ivec3);"
+            "ivec4 addInvocationsInclusiveScanNonUniformAMD(ivec4);"
+
+            "uint  addInvocationsInclusiveScanNonUniformAMD(uint);"
+            "uvec2 addInvocationsInclusiveScanNonUniformAMD(uvec2);"
+            "uvec3 addInvocationsInclusiveScanNonUniformAMD(uvec3);"
+            "uvec4 addInvocationsInclusiveScanNonUniformAMD(uvec4);"
+
+            "double addInvocationsInclusiveScanNonUniformAMD(double);"
+            "dvec2  addInvocationsInclusiveScanNonUniformAMD(dvec2);"
+            "dvec3  addInvocationsInclusiveScanNonUniformAMD(dvec3);"
+            "dvec4  addInvocationsInclusiveScanNonUniformAMD(dvec4);"
+
+            "int64_t addInvocationsInclusiveScanNonUniformAMD(int64_t);"
+            "i64vec2 addInvocationsInclusiveScanNonUniformAMD(i64vec2);"
+            "i64vec3 addInvocationsInclusiveScanNonUniformAMD(i64vec3);"
+            "i64vec4 addInvocationsInclusiveScanNonUniformAMD(i64vec4);"
+
+            "uint64_t addInvocationsInclusiveScanNonUniformAMD(uint64_t);"
+            "u64vec2  addInvocationsInclusiveScanNonUniformAMD(u64vec2);"
+            "u64vec3  addInvocationsInclusiveScanNonUniformAMD(u64vec3);"
+            "u64vec4  addInvocationsInclusiveScanNonUniformAMD(u64vec4);"
+
+            "float16_t addInvocationsInclusiveScanNonUniformAMD(float16_t);"
+            "f16vec2   addInvocationsInclusiveScanNonUniformAMD(f16vec2);"
+            "f16vec3   addInvocationsInclusiveScanNonUniformAMD(f16vec3);"
+            "f16vec4   addInvocationsInclusiveScanNonUniformAMD(f16vec4);"
+
+            "float addInvocationsExclusiveScanNonUniformAMD(float);"
+            "vec2  addInvocationsExclusiveScanNonUniformAMD(vec2);"
+            "vec3  addInvocationsExclusiveScanNonUniformAMD(vec3);"
+            "vec4  addInvocationsExclusiveScanNonUniformAMD(vec4);"
+
+            "int   addInvocationsExclusiveScanNonUniformAMD(int);"
+            "ivec2 addInvocationsExclusiveScanNonUniformAMD(ivec2);"
+            "ivec3 addInvocationsExclusiveScanNonUniformAMD(ivec3);"
+            "ivec4 addInvocationsExclusiveScanNonUniformAMD(ivec4);"
+
+            "uint  addInvocationsExclusiveScanNonUniformAMD(uint);"
+            "uvec2 addInvocationsExclusiveScanNonUniformAMD(uvec2);"
+            "uvec3 addInvocationsExclusiveScanNonUniformAMD(uvec3);"
+            "uvec4 addInvocationsExclusiveScanNonUniformAMD(uvec4);"
+
+            "double addInvocationsExclusiveScanNonUniformAMD(double);"
+            "dvec2  addInvocationsExclusiveScanNonUniformAMD(dvec2);"
+            "dvec3  addInvocationsExclusiveScanNonUniformAMD(dvec3);"
+            "dvec4  addInvocationsExclusiveScanNonUniformAMD(dvec4);"
+
+            "int64_t addInvocationsExclusiveScanNonUniformAMD(int64_t);"
+            "i64vec2 addInvocationsExclusiveScanNonUniformAMD(i64vec2);"
+            "i64vec3 addInvocationsExclusiveScanNonUniformAMD(i64vec3);"
+            "i64vec4 addInvocationsExclusiveScanNonUniformAMD(i64vec4);"
+
+            "uint64_t addInvocationsExclusiveScanNonUniformAMD(uint64_t);"
+            "u64vec2  addInvocationsExclusiveScanNonUniformAMD(u64vec2);"
+            "u64vec3  addInvocationsExclusiveScanNonUniformAMD(u64vec3);"
+            "u64vec4  addInvocationsExclusiveScanNonUniformAMD(u64vec4);"
+
+            "float16_t addInvocationsExclusiveScanNonUniformAMD(float16_t);"
+            "f16vec2   addInvocationsExclusiveScanNonUniformAMD(f16vec2);"
+            "f16vec3   addInvocationsExclusiveScanNonUniformAMD(f16vec3);"
+            "f16vec4   addInvocationsExclusiveScanNonUniformAMD(f16vec4);"
+
             "float swizzleInvocationsAMD(float, uvec4);"
             "vec2  swizzleInvocationsAMD(vec2,  uvec4);"
             "vec3  swizzleInvocationsAMD(vec3,  uvec4);"
@@ -4324,6 +4864,19 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             symbolTable.setFunctionExtensions("swizzleInvocationsWithPatternAMD", 1, &E_GL_AMD_shader_ballot);
             symbolTable.setFunctionExtensions("writeInvocationAMD",               1, &E_GL_AMD_shader_ballot);
             symbolTable.setFunctionExtensions("mbcntAMD",                         1, &E_GL_AMD_shader_ballot);
+
+            symbolTable.setFunctionExtensions("minInvocationsInclusiveScanAMD",             1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("maxInvocationsInclusiveScanAMD",             1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("addInvocationsInclusiveScanAMD",             1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("minInvocationsInclusiveScanNonUniformAMD",   1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("maxInvocationsInclusiveScanNonUniformAMD",   1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("addInvocationsInclusiveScanNonUniformAMD",   1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("minInvocationsExclusiveScanAMD",             1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("maxInvocationsExclusiveScanAMD",             1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("addInvocationsExclusiveScanAMD",             1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("minInvocationsExclusiveScanNonUniformAMD",   1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("maxInvocationsExclusiveScanNonUniformAMD",   1, &E_GL_AMD_shader_ballot);
+            symbolTable.setFunctionExtensions("addInvocationsExclusiveScanNonUniformAMD",   1, &E_GL_AMD_shader_ballot);
         }
 
         if (profile != EEsProfile) {
@@ -4986,16 +5539,28 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             symbolTable.relateToOperator("allInvocationsEqualARB",          EOpAllInvocationsEqual);
 
 #ifdef AMD_EXTENSIONS
-            symbolTable.relateToOperator("minInvocationsAMD",               EOpMinInvocations);
-            symbolTable.relateToOperator("maxInvocationsAMD",               EOpMaxInvocations);
-            symbolTable.relateToOperator("addInvocationsAMD",               EOpAddInvocations);
-            symbolTable.relateToOperator("minInvocationsNonUniformAMD",     EOpMinInvocationsNonUniform);
-            symbolTable.relateToOperator("maxInvocationsNonUniformAMD",     EOpMaxInvocationsNonUniform);
-            symbolTable.relateToOperator("addInvocationsNonUniformAMD",     EOpAddInvocationsNonUniform);
-            symbolTable.relateToOperator("swizzleInvocationsAMD",           EOpSwizzleInvocations);
-            symbolTable.relateToOperator("swizzleInvocationsMaskedAMD",     EOpSwizzleInvocationsMasked);
-            symbolTable.relateToOperator("writeInvocationAMD",              EOpWriteInvocation);
-            symbolTable.relateToOperator("mbcntAMD",                        EOpMbcnt);
+            symbolTable.relateToOperator("minInvocationsAMD",                           EOpMinInvocations);
+            symbolTable.relateToOperator("maxInvocationsAMD",                           EOpMaxInvocations);
+            symbolTable.relateToOperator("addInvocationsAMD",                           EOpAddInvocations);
+            symbolTable.relateToOperator("minInvocationsNonUniformAMD",                 EOpMinInvocationsNonUniform);
+            symbolTable.relateToOperator("maxInvocationsNonUniformAMD",                 EOpMaxInvocationsNonUniform);
+            symbolTable.relateToOperator("addInvocationsNonUniformAMD",                 EOpAddInvocationsNonUniform);
+            symbolTable.relateToOperator("minInvocationsInclusiveScanAMD",              EOpMinInvocationsInclusiveScan);
+            symbolTable.relateToOperator("maxInvocationsInclusiveScanAMD",              EOpMaxInvocationsInclusiveScan);
+            symbolTable.relateToOperator("addInvocationsInclusiveScanAMD",              EOpAddInvocationsInclusiveScan);
+            symbolTable.relateToOperator("minInvocationsInclusiveScanNonUniformAMD",    EOpMinInvocationsInclusiveScanNonUniform);
+            symbolTable.relateToOperator("maxInvocationsInclusiveScanNonUniformAMD",    EOpMaxInvocationsInclusiveScanNonUniform);
+            symbolTable.relateToOperator("addInvocationsInclusiveScanNonUniformAMD",    EOpAddInvocationsInclusiveScanNonUniform);
+            symbolTable.relateToOperator("minInvocationsExclusiveScanAMD",              EOpMinInvocationsExclusiveScan);
+            symbolTable.relateToOperator("maxInvocationsExclusiveScanAMD",              EOpMaxInvocationsExclusiveScan);
+            symbolTable.relateToOperator("addInvocationsExclusiveScanAMD",              EOpAddInvocationsExclusiveScan);
+            symbolTable.relateToOperator("minInvocationsExclusiveScanNonUniformAMD",    EOpMinInvocationsExclusiveScanNonUniform);
+            symbolTable.relateToOperator("maxInvocationsExclusiveScanNonUniformAMD",    EOpMaxInvocationsExclusiveScanNonUniform);
+            symbolTable.relateToOperator("addInvocationsExclusiveScanNonUniformAMD",    EOpAddInvocationsExclusiveScanNonUniform);
+            symbolTable.relateToOperator("swizzleInvocationsAMD",                       EOpSwizzleInvocations);
+            symbolTable.relateToOperator("swizzleInvocationsMaskedAMD",                 EOpSwizzleInvocationsMasked);
+            symbolTable.relateToOperator("writeInvocationAMD",                          EOpWriteInvocation);
+            symbolTable.relateToOperator("mbcntAMD",                                    EOpMbcnt);
 
             symbolTable.relateToOperator("min3",    EOpMin3);
             symbolTable.relateToOperator("max3",    EOpMax3);
diff --git a/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp b/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp
index e89bc31bd..8d68517a3 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp
@@ -1392,18 +1392,35 @@ TIntermConstantUnion* TIntermediate::addConstantUnion(double d, TBasicType baseT
     return addConstantUnion(unionArray, TType(baseType, EvqConst), loc, literal);
 }
 
-TIntermTyped* TIntermediate::addSwizzle(TVectorFields& fields, const TSourceLoc& loc)
+// Put vector swizzle selectors onto the given sequence
+void TIntermediate::pushSelector(TIntermSequence& sequence, const TVectorSelector& selector, const TSourceLoc& loc)
+{
+    TIntermConstantUnion* constIntNode = addConstantUnion(selector, loc);
+    sequence.push_back(constIntNode);
+}
+
+// Put matrix swizzle selectors onto the given sequence
+void TIntermediate::pushSelector(TIntermSequence& sequence, const TMatrixSelector& selector, const TSourceLoc& loc)
+{
+    TIntermConstantUnion* constIntNode = addConstantUnion(selector.coord1, loc);
+    sequence.push_back(constIntNode);
+    constIntNode = addConstantUnion(selector.coord2, loc);
+    sequence.push_back(constIntNode);
+}
+
+// Make an aggregate node that has a sequence of all selectors.
+template TIntermTyped* TIntermediate::addSwizzle<TVectorSelector>(TSwizzleSelectors<TVectorSelector>& selector, const TSourceLoc& loc);
+template TIntermTyped* TIntermediate::addSwizzle<TMatrixSelector>(TSwizzleSelectors<TMatrixSelector>& selector, const TSourceLoc& loc);
+template<typename selectorType>
+TIntermTyped* TIntermediate::addSwizzle(TSwizzleSelectors<selectorType>& selector, const TSourceLoc& loc)
 {
     TIntermAggregate* node = new TIntermAggregate(EOpSequence);
 
     node->setLoc(loc);
-    TIntermConstantUnion* constIntNode;
     TIntermSequence &sequenceVector = node->getSequence();
 
-    for (int i = 0; i < fields.num; i++) {
-        constIntNode = addConstantUnion(fields.offsets[i], loc);
-        sequenceVector.push_back(constIntNode);
-    }
+    for (int i = 0; i < selector.size(); i++)
+        pushSelector(sequenceVector, selector[i], loc);
 
     return node;
 }
@@ -1425,10 +1442,10 @@ const TIntermTyped* TIntermediate::findLValueBase(const TIntermTyped* node, bool
         if (binary == nullptr)
             return node;
         TOperator op = binary->getOp();
-        if (op != EOpIndexDirect && op != EOpIndexIndirect && op != EOpIndexDirectStruct && op != EOpVectorSwizzle)
+        if (op != EOpIndexDirect && op != EOpIndexIndirect && op != EOpIndexDirectStruct && op != EOpVectorSwizzle && op != EOpMatrixSwizzle)
             return nullptr;
         if (! swizzleOkay) {
-            if (op == EOpVectorSwizzle)
+            if (op == EOpVectorSwizzle || op == EOpMatrixSwizzle)
                 return nullptr;
             if ((op == EOpIndexDirect || op == EOpIndexIndirect) &&
                 (binary->getLeft()->getType().isVector() || binary->getLeft()->getType().isScalar()) &&
diff --git a/3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp b/3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp
index 2b1a7a3ab..24c8e6095 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp
@@ -129,6 +129,7 @@ bool TParseContextBase::lValueErrorCheck(const TSourceLoc& loc, const char* op,
         case EOpIndexIndirect:     // fall through
         case EOpIndexDirectStruct: // fall through
         case EOpVectorSwizzle:
+        case EOpMatrixSwizzle:
             return lValueErrorCheck(loc, op, binaryNode->getLeft());
         default:
             break;
@@ -208,6 +209,7 @@ void TParseContextBase::rValueErrorCheck(const TSourceLoc& loc, const char* op,
         case EOpIndexIndirect:
         case EOpIndexDirectStruct:
         case EOpVectorSwizzle:
+        case EOpMatrixSwizzle:
             rValueErrorCheck(loc, op, binaryNode->getLeft());
         default:
             break;
@@ -431,6 +433,108 @@ const TFunction* TParseContextBase::selectFunction(
     return incumbent;
 }
 
+//
+// Look at a '.' field selector string and change it into numerical selectors
+// for a vector or scalar.
+//
+// Always return some form of swizzle, so the result is always usable.
+//
+void TParseContextBase::parseSwizzleSelector(const TSourceLoc& loc, const TString& compString, int vecSize,
+                                             TSwizzleSelectors<TVectorSelector>& selector)
+{
+    // Too long?
+    if (compString.size() > MaxSwizzleSelectors)
+        error(loc, "vector swizzle too long", compString.c_str(), "");
+
+    // Use this to test that all swizzle characters are from the same swizzle-namespace-set
+    enum {
+        exyzw,
+        ergba,
+        estpq,
+    } fieldSet[MaxSwizzleSelectors];
+
+    // Decode the swizzle string.
+    int size = std::min(MaxSwizzleSelectors, (int)compString.size());
+    for (int i = 0; i < size; ++i) {
+        switch (compString[i])  {
+        case 'x':
+            selector.push_back(0);
+            fieldSet[i] = exyzw;
+            break;
+        case 'r':
+            selector.push_back(0);
+            fieldSet[i] = ergba;
+            break;
+        case 's':
+            selector.push_back(0);
+            fieldSet[i] = estpq;
+            break;
+
+        case 'y':
+            selector.push_back(1);
+            fieldSet[i] = exyzw;
+            break;
+        case 'g':
+            selector.push_back(1);
+            fieldSet[i] = ergba;
+            break;
+        case 't':
+            selector.push_back(1);
+            fieldSet[i] = estpq;
+            break;
+
+        case 'z':
+            selector.push_back(2);
+            fieldSet[i] = exyzw;
+            break;
+        case 'b':
+            selector.push_back(2);
+            fieldSet[i] = ergba;
+            break;
+        case 'p':
+            selector.push_back(2);
+            fieldSet[i] = estpq;
+            break;
+
+        case 'w':
+            selector.push_back(3);
+            fieldSet[i] = exyzw;
+            break;
+        case 'a':
+            selector.push_back(3);
+            fieldSet[i] = ergba;
+            break;
+        case 'q':
+            selector.push_back(3);
+            fieldSet[i] = estpq;
+            break;
+
+        default:
+            error(loc, "unknown swizzle selection", compString.c_str(), "");
+            break;
+        }
+    }
+
+    // Additional error checking.
+    for (int i = 0; i < selector.size(); ++i) {
+        if (selector[i] >= vecSize) {
+            error(loc, "vector swizzle selection out of range",  compString.c_str(), "");
+            selector.resize(i);
+            break;
+        }
+
+        if (i > 0 && fieldSet[i] != fieldSet[i-1]) {
+            error(loc, "vector swizzle selectors not from the same set", compString.c_str(), "");
+            selector.resize(i);
+            break;
+        }
+    }
+
+    // Ensure it is valid.
+    if (selector.size() == 0)
+        selector.push_back(0);
+}
+
 //
 // Make the passed-in variable information become a member of the
 // global uniform block.  If this doesn't exist yet, make it.
diff --git a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp
index cfb56b04d..29c5c9feb 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp
@@ -258,106 +258,6 @@ void TParseContext::handlePragma(const TSourceLoc& loc, const TVector<TString>&
     }
 }
 
-///////////////////////////////////////////////////////////////////////
-//
-// Sub- vector and matrix fields
-//
-////////////////////////////////////////////////////////////////////////
-
-//
-// Look at a '.' field selector string and change it into offsets
-// for a vector or scalar
-//
-// Returns true if there is no error.
-//
-bool TParseContext::parseVectorFields(const TSourceLoc& loc, const TString& compString, int vecSize, TVectorFields& fields)
-{
-    fields.num = (int)compString.size();
-    if (fields.num > 4) {
-        error(loc, "illegal vector field selection", compString.c_str(), "");
-        return false;
-    }
-
-    enum {
-        exyzw,
-        ergba,
-        estpq,
-    } fieldSet[4];
-
-    for (int i = 0; i < fields.num; ++i) {
-        switch (compString[i])  {
-        case 'x':
-            fields.offsets[i] = 0;
-            fieldSet[i] = exyzw;
-            break;
-        case 'r':
-            fields.offsets[i] = 0;
-            fieldSet[i] = ergba;
-            break;
-        case 's':
-            fields.offsets[i] = 0;
-            fieldSet[i] = estpq;
-            break;
-        case 'y':
-            fields.offsets[i] = 1;
-            fieldSet[i] = exyzw;
-            break;
-        case 'g':
-            fields.offsets[i] = 1;
-            fieldSet[i] = ergba;
-            break;
-        case 't':
-            fields.offsets[i] = 1;
-            fieldSet[i] = estpq;
-            break;
-        case 'z':
-            fields.offsets[i] = 2;
-            fieldSet[i] = exyzw;
-            break;
-        case 'b':
-            fields.offsets[i] = 2;
-            fieldSet[i] = ergba;
-            break;
-        case 'p':
-            fields.offsets[i] = 2;
-            fieldSet[i] = estpq;
-            break;
-
-        case 'w':
-            fields.offsets[i] = 3;
-            fieldSet[i] = exyzw;
-            break;
-        case 'a':
-            fields.offsets[i] = 3;
-            fieldSet[i] = ergba;
-            break;
-        case 'q':
-            fields.offsets[i] = 3;
-            fieldSet[i] = estpq;
-            break;
-        default:
-            error(loc, "illegal vector field selection", compString.c_str(), "");
-            return false;
-        }
-    }
-
-    for (int i = 0; i < fields.num; ++i) {
-        if (fields.offsets[i] >= vecSize) {
-            error(loc, "vector field selection out of range",  compString.c_str(), "");
-            return false;
-        }
-
-        if (i > 0) {
-            if (fieldSet[i] != fieldSet[i-1]) {
-                error(loc, "illegal - vector component fields not from the same set", compString.c_str(), "");
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
 //
 // Handle seeing a variable identifier in the grammar.
 //
@@ -781,17 +681,14 @@ TIntermTyped* TParseContext::handleDotDereference(const TSourceLoc& loc, TInterm
             profileRequires(loc, ~EEsProfile, 420, E_GL_ARB_shading_language_420pack, dotFeature);
         }
 
-        TVectorFields fields;
-        if (! parseVectorFields(loc, field, base->getVectorSize(), fields)) {
-            fields.num = 1;
-            fields.offsets[0] = 0;
-        }
+        TSwizzleSelectors<TVectorSelector> selectors;
+        parseSwizzleSelector(loc, field, base->getVectorSize(), selectors);
 
         if (base->isScalar()) {
-            if (fields.num == 1)
+            if (selectors.size() == 1)
                 return result;
             else {
-                TType type(base->getBasicType(), EvqTemporary, fields.num);
+                TType type(base->getBasicType(), EvqTemporary, selectors.size());
                 // Swizzle operations propagate specialization-constantness
                 if (base->getQualifier().isSpecConstant())
                     type.getQualifier().makeSpecConstant();
@@ -800,17 +697,16 @@ TIntermTyped* TParseContext::handleDotDereference(const TSourceLoc& loc, TInterm
         }
 
         if (base->getType().getQualifier().isFrontEndConstant())
-            result = intermediate.foldSwizzle(base, fields, loc);
+            result = intermediate.foldSwizzle(base, selectors, loc);
         else {
-            if (fields.num == 1) {
-                TIntermTyped* index = intermediate.addConstantUnion(fields.offsets[0], loc);
+            if (selectors.size() == 1) {
+                TIntermTyped* index = intermediate.addConstantUnion(selectors[0], loc);
                 result = intermediate.addIndex(EOpIndexDirect, base, index, loc);
                 result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision));
             } else {
-                TString vectorString = field;
-                TIntermTyped* index = intermediate.addSwizzle(fields, loc);
+                TIntermTyped* index = intermediate.addSwizzle(selectors, loc);
                 result = intermediate.addIndex(EOpVectorSwizzle, base, index, loc);
-                result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision, (int)vectorString.size()));
+                result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision, selectors.size()));
             }
             // Swizzle operations propagate specialization-constantness
             if (base->getType().getQualifier().isSpecConstant())
@@ -5093,7 +4989,7 @@ void TParseContext::inheritGlobalDefaults(TQualifier& dst) const
 //
 TVariable* TParseContext::makeInternalVariable(const char* name, const TType& type) const
 {
-    TString* nameString = new TString(name);
+    TString* nameString = NewPoolTString(name);
     TVariable* variable = new TVariable(nameString, type);
     symbolTable.makeInternalVariable(*variable);
 
diff --git a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h
index 8d169e4ee..2e09a5ae9 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h
@@ -170,6 +170,9 @@ protected:
         std::function<bool(const TType&, const TType&, const TType&)>,
         /* output */ bool& tie);
 
+    virtual void parseSwizzleSelector(const TSourceLoc&, const TString&, int size,
+                                      TSwizzleSelectors<TVectorSelector>&);
+
     // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL)
     TVariable* globalUniformBlock;   // the actual block, inserted into the symbol table
     int firstNewMember;              // the index of the first member not yet inserted into the symbol table
@@ -284,7 +287,6 @@ public:
     void handlePrecisionQualifier(const TSourceLoc&, TQualifier&, TPrecisionQualifier);
     void checkPrecisionQualifier(const TSourceLoc&, TPrecisionQualifier);
 
-    bool parseVectorFields(const TSourceLoc&, const TString&, int vecSize, TVectorFields&);
     void assignError(const TSourceLoc&, const char* op, TString left, TString right);
     void unaryOpError(const TSourceLoc&, const char* op, TString operand);
     void binaryOpError(const TSourceLoc&, const char* op, TString left, TString right);
diff --git a/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp b/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp
index d2f728441..0e64364e2 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/Scan.cpp
@@ -721,7 +721,7 @@ int TScanContext::tokenize(TPpContext* pp, TParserToken& token)
 
         default:
             char buf[2];
-            buf[0] = token;
+            buf[0] = (char)token;
             buf[1] = 0;
             parseContext.error(loc, "unexpected token", buf, "");
             break;
diff --git a/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp b/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp
index 12f967d47..b8a6969d1 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp
@@ -146,6 +146,7 @@ bool TOutputTraverser::visitBinary(TVisit /* visit */, TIntermBinary* node)
         out.debug << (*node->getLeft()->getType().getStruct())[node->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst()].type->getFieldName();
         out.debug << ": direct index for structure";      break;
     case EOpVectorSwizzle: out.debug << "vector swizzle"; break;
+    case EOpMatrixSwizzle: out.debug << "matrix swizzle"; break;
 
     case EOpAdd:    out.debug << "add";                     break;
     case EOpSub:    out.debug << "subtract";                break;
@@ -376,6 +377,21 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node)
     case EOpMinInvocationsNonUniform:   out.debug << "minInvocationsNonUniform";    break;
     case EOpMaxInvocationsNonUniform:   out.debug << "maxInvocationsNonUniform";    break;
     case EOpAddInvocationsNonUniform:   out.debug << "addInvocationsNonUniform";    break;
+
+    case EOpMinInvocationsInclusiveScan:            out.debug << "minInvocationsInclusiveScan";             break;
+    case EOpMaxInvocationsInclusiveScan:            out.debug << "maxInvocationsInclusiveScan";             break;
+    case EOpAddInvocationsInclusiveScan:            out.debug << "addInvocationsInclusiveScan";             break;
+    case EOpMinInvocationsInclusiveScanNonUniform:  out.debug << "minInvocationsInclusiveScanNonUniform";   break;
+    case EOpMaxInvocationsInclusiveScanNonUniform:  out.debug << "maxInvocationsInclusiveScanNonUniform";   break;
+    case EOpAddInvocationsInclusiveScanNonUniform:  out.debug << "addInvocationsInclusiveScanNonUniform";   break;
+
+    case EOpMinInvocationsExclusiveScan:            out.debug << "minInvocationsExclusiveScan";             break;
+    case EOpMaxInvocationsExclusiveScan:            out.debug << "maxInvocationsExclusiveScan";             break;
+    case EOpAddInvocationsExclusiveScan:            out.debug << "addInvocationsExclusiveScan";             break;
+    case EOpMinInvocationsExclusiveScanNonUniform:  out.debug << "minInvocationsExclusiveScanNonUniform";   break;
+    case EOpMaxInvocationsExclusiveScanNonUniform:  out.debug << "maxInvocationsExclusiveScanNonUniform";   break;
+    case EOpAddInvocationsExclusiveScanNonUniform:  out.debug << "addInvocationsExclusiveScanNonUniform";   break;
+
     case EOpMbcnt:                      out.debug << "mbcnt";                       break;
 
     case EOpCubeFaceIndex:          out.debug << "cubeFaceIndex";         break;
diff --git a/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h b/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h
index 24370029f..ec58d50d4 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h
+++ b/3rdparty/glslang/glslang/MachineIndependent/localintermediate.h
@@ -47,19 +47,40 @@ class TInfoSink;
 
 namespace glslang {
 
-struct TVectorFields {
-    TVectorFields() { }
+struct TMatrixSelector {
+    int coord1;  // stay agnostic about column/row; this is parse order
+    int coord2;
+};
 
-    TVectorFields(int c0, int c1, int c2, int c3) : num(4)
+typedef int TVectorSelector;
+
+const int MaxSwizzleSelectors = 4;
+
+template<typename selectorType>
+class TSwizzleSelectors {
+public:
+    TSwizzleSelectors() : size_(0) { }
+
+    void push_back(selectorType comp)
     {
-        offsets[0] = c0;
-        offsets[1] = c1;
-        offsets[2] = c2;
-        offsets[3] = c3;
+        if (size_ < MaxSwizzleSelectors)
+            components[size_++] = comp;
     }
-
-    int offsets[4];
-    int num;
+    void resize(int s)
+    {
+        assert(s <= size_);
+        size_ = s;
+    }
+    int size() const { return size_; }
+    selectorType operator[](int i) const
+    {
+        assert(i < MaxSwizzleSelectors);
+        return components[i];
+    }
+    
+private:
+    int size_;
+    selectorType components[MaxSwizzleSelectors];
 };
 
 //
@@ -248,7 +269,7 @@ public:
     TIntermAggregate* addForLoop(TIntermNode*, TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, const TSourceLoc&);
     TIntermBranch* addBranch(TOperator, const TSourceLoc&);
     TIntermBranch* addBranch(TOperator, TIntermTyped*, const TSourceLoc&);
-    TIntermTyped* addSwizzle(TVectorFields&, const TSourceLoc&);
+    template<typename selectorType> TIntermTyped* addSwizzle(TSwizzleSelectors<selectorType>&, const TSourceLoc&);
 
     // Low level functions to add nodes (no conversions or other higher level transformations)
     // If a type is provided, the node's type will be set to it.
@@ -264,7 +285,7 @@ public:
     TIntermTyped* fold(TIntermAggregate* aggrNode);
     TIntermTyped* foldConstructor(TIntermAggregate* aggrNode);
     TIntermTyped* foldDereference(TIntermTyped* node, int index, const TSourceLoc&);
-    TIntermTyped* foldSwizzle(TIntermTyped* node, TVectorFields& fields, const TSourceLoc&);
+    TIntermTyped* foldSwizzle(TIntermTyped* node, TSwizzleSelectors<TVectorSelector>& fields, const TSourceLoc&);
 
     // Tree ops
     static const TIntermTyped* findLValueBase(const TIntermTyped*, bool swizzleOkay);
@@ -417,6 +438,8 @@ protected:
     bool promoteBinary(TIntermBinary&);
     void addSymbolLinkageNode(TIntermAggregate*& linkage, TSymbolTable&, const TString&);
     bool promoteAggregate(TIntermAggregate&);
+    void pushSelector(TIntermSequence&, const TVectorSelector&, const TSourceLoc&);
+    void pushSelector(TIntermSequence&, const TMatrixSelector&, const TSourceLoc&);
 
     const EShLanguage language;  // stage, known at construction time
     EShSource source;            // source language, known a bit later
diff --git a/3rdparty/glslang/glslang/MachineIndependent/preprocessor/Pp.cpp b/3rdparty/glslang/glslang/MachineIndependent/preprocessor/Pp.cpp
index 49c78ef88..abce3b564 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/preprocessor/Pp.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/preprocessor/Pp.cpp
@@ -968,7 +968,7 @@ int TPpContext::scanHeaderName(TPpToken* ppToken, char delimit)
 
         // found a character to expand the name with
         if (len < MaxTokenLength)
-            ppToken->name[len++] = ch;
+            ppToken->name[len++] = (char)ch;
         else
             tooLong = true;
     } while (true);
diff --git a/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp b/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp
index ffd4cdd98..83e6c89cc 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/preprocessor/PpScanner.cpp
@@ -100,36 +100,26 @@ namespace glslang {
 int TPpContext::lFloatConst(int len, int ch, TPpToken* ppToken)
 {
     bool HasDecimalOrExponent = false;
-    int declen;
-    int str_len;
     int isDouble = 0;
 #ifdef AMD_EXTENSIONS
     int isFloat16 = 0;
     bool enableFloat16 = parseContext.version >= 450 && parseContext.extensionTurnedOn(E_GL_AMD_gpu_shader_half_float);
 #endif
 
-    declen = 0;
+    const auto saveName = [&](int ch) {
+        if (len <= MaxTokenLength)
+            ppToken->name[len++] = static_cast<char>(ch);
+    };
+
+    // Decimal:
 
-    str_len=len;
-    char* str = ppToken->name;
     if (ch == '.') {
         HasDecimalOrExponent = true;
-        str[len++] = (char)ch;
+        saveName(ch);
         ch = getChar();
         while (ch >= '0' && ch <= '9') {
-            if (len < MaxTokenLength) {
-                declen++;
-                if (len > 0 || ch != '0') {
-                    str[len] = (char)ch;
-                    len++;
-                    str_len++;
-                }
-                ch = getChar();
-            } else {
-                parseContext.ppError(ppToken->loc, "float literal too long", "", "");
-                len = 1;
-                str_len = 1;
-            }
+            saveName(ch);
+            ch = getChar();
         }
     }
 
@@ -137,101 +127,74 @@ int TPpContext::lFloatConst(int len, int ch, TPpToken* ppToken)
 
     if (ch == 'e' || ch == 'E') {
         HasDecimalOrExponent = true;
-        if (len >= MaxTokenLength) {
-            parseContext.ppError(ppToken->loc, "float literal too long", "", "");
-            len = 1;
-            str_len = 1;
-        } else {
-            str[len++] = (char)ch;
+        saveName(ch);
+        ch = getChar();
+        if (ch == '+' || ch == '-') {
+            saveName(ch);
             ch = getChar();
-            if (ch == '+') {
-                str[len++] = (char)ch;
-                ch = getChar();
-            } else if (ch == '-') {
-                str[len++] = (char)ch;
+        }
+        if (ch >= '0' && ch <= '9') {
+            while (ch >= '0' && ch <= '9') {
+                saveName(ch);
                 ch = getChar();
             }
-            if (ch >= '0' && ch <= '9') {
-                while (ch >= '0' && ch <= '9') {
-                    if (len < MaxTokenLength) {
-                        str[len++] = (char)ch;
-                        ch = getChar();
-                    } else {
-                        parseContext.ppError(ppToken->loc, "float literal too long", "", "");
-                        len = 1;
-                        str_len = 1;
-                    }
-                }
-            } else {
-                parseContext.ppError(ppToken->loc, "bad character in float exponent", "", "");
-            }
+        } else {
+            parseContext.ppError(ppToken->loc, "bad character in float exponent", "", "");
         }
     }
 
-    if (len == 0) {
-        ppToken->dval = 0.0;
-        strcpy(str, "0.0");
-    } else {
-        if (ch == 'l' || ch == 'L') {
-            parseContext.doubleCheck(ppToken->loc, "double floating-point suffix");
-            if (! HasDecimalOrExponent)
-                parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
-            int ch2 = getChar();
-            if (ch2 != 'f' && ch2 != 'F') {
-                ungetChar();
-                ungetChar();
-            } else {
-                if (len < MaxTokenLength) {
-                    str[len++] = (char)ch;
-                    str[len++] = (char)ch2;
-                    isDouble = 1;
-                } else {
-                    parseContext.ppError(ppToken->loc, "float literal too long", "", "");
-                    len = 1,str_len=1;
-                }
-            }
-#ifdef AMD_EXTENSIONS
-        } else if (enableFloat16 && (ch == 'h' || ch == 'H')) {
-            parseContext.float16Check(ppToken->loc, "half floating-point suffix");
-            if (!HasDecimalOrExponent)
-                parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
-            int ch2 = getChar();
-            if (ch2 != 'f' && ch2 != 'F') {
-                ungetChar();
-                ungetChar();
-            }
-            else {
-                if (len < MaxTokenLength) {
-                    str[len++] = (char)ch;
-                    str[len++] = (char)ch2;
-                    isFloat16 = 1;
-                }
-                else {
-                    parseContext.ppError(ppToken->loc, "float literal too long", "", "");
-                    len = 1, str_len = 1;
-                }
-            }
-#endif
-        } else if (ch == 'f' || ch == 'F') {
-            parseContext.profileRequires(ppToken->loc,  EEsProfile, 300, nullptr, "floating-point suffix");
-            if (! parseContext.relaxedErrors())
-                parseContext.profileRequires(ppToken->loc, ~EEsProfile, 120, nullptr, "floating-point suffix");
-            if (! HasDecimalOrExponent)
-                parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
-            if (len < MaxTokenLength)
-                str[len++] = (char)ch;
-            else {
-                parseContext.ppError(ppToken->loc, "float literal too long", "", "");
-                len = 1,str_len=1;
-            }
-        } else
+    // Suffix:
+
+    if (ch == 'l' || ch == 'L') {
+        parseContext.doubleCheck(ppToken->loc, "double floating-point suffix");
+        if (! HasDecimalOrExponent)
+            parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
+        int ch2 = getChar();
+        if (ch2 != 'f' && ch2 != 'F') {
             ungetChar();
+            ungetChar();
+        } else {
+            saveName(ch);
+            saveName(ch2);
+            isDouble = 1;
+        }
+#ifdef AMD_EXTENSIONS
+    } else if (enableFloat16 && (ch == 'h' || ch == 'H')) {
+        parseContext.float16Check(ppToken->loc, "half floating-point suffix");
+        if (!HasDecimalOrExponent)
+            parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
+        int ch2 = getChar();
+        if (ch2 != 'f' && ch2 != 'F') {
+            ungetChar();
+            ungetChar();
+        } else {
+            saveName(ch);
+            saveName(ch2);
+            isFloat16 = 1;
+        }
+#endif
+    } else if (ch == 'f' || ch == 'F') {
+        parseContext.profileRequires(ppToken->loc,  EEsProfile, 300, nullptr, "floating-point suffix");
+        if (! parseContext.relaxedErrors())
+            parseContext.profileRequires(ppToken->loc, ~EEsProfile, 120, nullptr, "floating-point suffix");
+        if (! HasDecimalOrExponent)
+            parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
+        saveName(ch);
+    } else
+        ungetChar();
 
-        str[len]='\0';
+    // Patch up the name, length, etc.
 
-        ppToken->dval = strtod(str, nullptr);
+    if (len > MaxTokenLength) {
+        len = MaxTokenLength;
+        parseContext.ppError(ppToken->loc, "float literal too long", "", "");
     }
+    ppToken->name[len] = '\0';
 
+    // Get the numerical value
+    ppToken->dval = strtod(ppToken->name, nullptr);
+
+    // Return the right token type
     if (isDouble)
         return PpAtomConstDouble;
 #ifdef AMD_EXTENSIONS
diff --git a/3rdparty/glslang/glslang/MachineIndependent/propagateNoContraction.cpp b/3rdparty/glslang/glslang/MachineIndependent/propagateNoContraction.cpp
index bcf40f928..ae95688ae 100644
--- a/3rdparty/glslang/glslang/MachineIndependent/propagateNoContraction.cpp
+++ b/3rdparty/glslang/glslang/MachineIndependent/propagateNoContraction.cpp
@@ -90,6 +90,7 @@ bool isDereferenceOperation(glslang::TOperator op)
     case glslang::EOpIndexDirectStruct:
     case glslang::EOpIndexIndirect:
     case glslang::EOpVectorSwizzle:
+    case glslang::EOpMatrixSwizzle:
         return true;
     default:
         return false;
diff --git a/3rdparty/glslang/glslang/Public/ShaderLang.h b/3rdparty/glslang/glslang/Public/ShaderLang.h
index 60e539b4a..6c3fb11cb 100644
--- a/3rdparty/glslang/glslang/Public/ShaderLang.h
+++ b/3rdparty/glslang/glslang/Public/ShaderLang.h
@@ -374,16 +374,16 @@ public:
         // the C++ specification.
 
         // For the "system" or <>-style includes; search the "system" paths.
-        virtual IncludeResult* includeSystem(const char* headerName,
-                                             const char* includerName,
-                                             size_t inclusionDepth) { return nullptr; }
+        virtual IncludeResult* includeSystem(const char* /*headerName*/,
+                                             const char* /*includerName*/,
+                                             size_t /*inclusionDepth*/) { return nullptr; }
 
         // For the "local"-only aspect of a "" include. Should not search in the
         // "system" paths, because on returning a failure, the parser will
         // call includeSystem() to look in the "system" locations.
-        virtual IncludeResult* includeLocal(const char* headerName,
-                                            const char* includerName,
-                                            size_t inclusionDepth) { return nullptr; }
+        virtual IncludeResult* includeLocal(const char* /*headerName*/,
+                                            const char* /*includerName*/,
+                                            size_t /*inclusionDepth*/) { return nullptr; }
 
         // Signals that the parser will no longer use the contents of the
         // specified IncludeResult.
diff --git a/3rdparty/glslang/gtests/Hlsl.FromFile.cpp b/3rdparty/glslang/gtests/Hlsl.FromFile.cpp
index e9e301c09..b703398cf 100644
--- a/3rdparty/glslang/gtests/Hlsl.FromFile.cpp
+++ b/3rdparty/glslang/gtests/Hlsl.FromFile.cpp
@@ -154,6 +154,7 @@ INSTANTIATE_TEST_CASE_P(
         {"hlsl.logical.binary.frag", "main"},
         {"hlsl.logical.binary.vec.frag", "main"},
         {"hlsl.matNx1.frag", "main"},
+        {"hlsl.matrixSwizzle.vert", "ShaderFunction"},
         {"hlsl.mintypes.frag", "main"},
         {"hlsl.multiEntry.vert", "RealEntrypoint"},
         {"hlsl.multiReturn.frag", "main"},
@@ -210,6 +211,7 @@ INSTANTIATE_TEST_CASE_P(
         {"hlsl.string.frag", "main"},
         {"hlsl.struct.split-1.vert", "main"},
         {"hlsl.struct.split.array.geom", "main"},
+        {"hlsl.struct.split.assign.frag", "main"},
         {"hlsl.struct.split.call.vert", "main"},
         {"hlsl.struct.split.nested.geom", "main"},
         {"hlsl.struct.split.trivial.geom", "main"},
diff --git a/3rdparty/glslang/gtests/Spv.FromFile.cpp b/3rdparty/glslang/gtests/Spv.FromFile.cpp
index c946ef7ee..f317cdbb5 100644
--- a/3rdparty/glslang/gtests/Spv.FromFile.cpp
+++ b/3rdparty/glslang/gtests/Spv.FromFile.cpp
@@ -358,6 +358,7 @@ INSTANTIATE_TEST_CASE_P(
     Glsl, CompileVulkanToSpirvTestAMD,
     ::testing::ValuesIn(std::vector<std::string>({
         "spv.float16.frag",
+        "spv.shaderBallotAMD.comp"
     })),
     FileNameAsCustomTestSuffix
 );
diff --git a/3rdparty/glslang/hlsl/hlslParseHelper.cpp b/3rdparty/glslang/hlsl/hlslParseHelper.cpp
index fbb91cfc0..6fb6c784c 100755
--- a/3rdparty/glslang/hlsl/hlslParseHelper.cpp
+++ b/3rdparty/glslang/hlsl/hlslParseHelper.cpp
@@ -502,97 +502,87 @@ void HlslParseContext::handlePragma(const TSourceLoc& loc, const TVector<TString
 }
 
 //
-// Look at a '.' field selector string and change it into offsets
-// for a vector or scalar
+// Look at a '.' matrix selector string and change it into components
+// for a matrix. There are two types:
+//
+//   _21    second row, first column (one based)
+//   _m21   third row, second column (zero based)
 //
 // Returns true if there is no error.
 //
-bool HlslParseContext::parseVectorFields(const TSourceLoc& loc, const TString& compString, int vecSize, TVectorFields& fields)
+bool HlslParseContext::parseMatrixSwizzleSelector(const TSourceLoc& loc, const TString& fields, int cols, int rows,
+                                                  TSwizzleSelectors<TMatrixSelector>& components)
 {
-    fields.num = (int)compString.size();
-    if (fields.num > 4) {
-        error(loc, "illegal vector field selection", compString.c_str(), "");
-        return false;
+    int startPos[MaxSwizzleSelectors];
+    int numComps = 0;
+    TString compString = fields;
+
+    // Find where each component starts,
+    // recording the first character position after the '_'.
+    for (size_t c = 0; c < compString.size(); ++c) {
+        if (compString[c] == '_') {
+            if (numComps >= MaxSwizzleSelectors) {
+                error(loc, "matrix component swizzle has too many components", compString.c_str(), "");
+                return false;
+            }
+            if (c > compString.size() - 3 ||
+                    ((compString[c+1] == 'm' || compString[c+1] == 'M') && c > compString.size() - 4)) {
+                error(loc, "matrix component swizzle missing", compString.c_str(), "");
+                return false;
+            }
+            startPos[numComps++] = c + 1;
+        }
     }
 
-    enum {
-        exyzw,
-        ergba,
-        estpq,
-    } fieldSet[4];
-
-        for (int i = 0; i < fields.num; ++i) {
-            switch (compString[i])  {
-            case 'x':
-                fields.offsets[i] = 0;
-                fieldSet[i] = exyzw;
-                break;
-            case 'r':
-                fields.offsets[i] = 0;
-                fieldSet[i] = ergba;
-                break;
-            case 's':
-                fields.offsets[i] = 0;
-                fieldSet[i] = estpq;
-                break;
-            case 'y':
-                fields.offsets[i] = 1;
-                fieldSet[i] = exyzw;
-                break;
-            case 'g':
-                fields.offsets[i] = 1;
-                fieldSet[i] = ergba;
-                break;
-            case 't':
-                fields.offsets[i] = 1;
-                fieldSet[i] = estpq;
-                break;
-            case 'z':
-                fields.offsets[i] = 2;
-                fieldSet[i] = exyzw;
-                break;
-            case 'b':
-                fields.offsets[i] = 2;
-                fieldSet[i] = ergba;
-                break;
-            case 'p':
-                fields.offsets[i] = 2;
-                fieldSet[i] = estpq;
-                break;
-
-            case 'w':
-                fields.offsets[i] = 3;
-                fieldSet[i] = exyzw;
-                break;
-            case 'a':
-                fields.offsets[i] = 3;
-                fieldSet[i] = ergba;
-                break;
-            case 'q':
-                fields.offsets[i] = 3;
-                fieldSet[i] = estpq;
-                break;
-            default:
-                error(loc, "illegal vector field selection", compString.c_str(), "");
-                return false;
-            }
+    // Process each component
+    for (int i = 0; i < numComps; ++i) {
+        int pos = startPos[i];
+        int bias = -1;
+        if (compString[pos] == 'm' || compString[pos] == 'M') {
+            bias = 0;
+            ++pos;
         }
-
-        for (int i = 0; i < fields.num; ++i) {
-            if (fields.offsets[i] >= vecSize) {
-                error(loc, "vector field selection out of range", compString.c_str(), "");
-                return false;
-            }
-
-            if (i > 0) {
-                if (fieldSet[i] != fieldSet[i - 1]) {
-                    error(loc, "illegal - vector component fields not from the same set", compString.c_str(), "");
-                    return false;
-                }
-            }
+        TMatrixSelector comp;
+        comp.coord1 = compString[pos+0] - '0' + bias;
+        comp.coord2 = compString[pos+1] - '0' + bias;
+        if (comp.coord1 < 0 || comp.coord1 >= cols) {
+            error(loc, "matrix row component out of range", compString.c_str(), "");
+            return false;
         }
+        if (comp.coord2 < 0 || comp.coord2 >= rows) {
+            error(loc, "matrix column component out of range", compString.c_str(), "");
+            return false;
+        }
+        components.push_back(comp);
+    }
 
-        return true;
+    return true;
+}
+
+// If the 'comps' express a column of a matrix,
+// return the column.  Column means the first coords all match.
+//
+// Otherwise, return -1.
+//
+int HlslParseContext::getMatrixComponentsColumn(int rows, const TSwizzleSelectors<TMatrixSelector>& selector)
+{
+    int col = -1;
+
+    // right number of comps?
+    if (selector.size() != rows)
+        return -1;
+
+    // all comps in the same column?
+    // rows in order?
+    col = selector[0].coord1;
+    for (int i = 0; i < rows; ++i) {
+        if (col != selector[i].coord1)
+            return -1;
+        if (i != selector[i].coord2)
+            return -1;
+    }
+
+    return col;
 }
 
 //
@@ -850,44 +840,76 @@ TIntermTyped* HlslParseContext::handleDotDereference(const TSourceLoc& loc, TInt
 
     TIntermTyped* result = base;
     if (base->isVector() || base->isScalar()) {
-        TVectorFields fields;
-        if (! parseVectorFields(loc, field, base->getVectorSize(), fields)) {
-            fields.num = 1;
-            fields.offsets[0] = 0;
-        }
+        TSwizzleSelectors<TVectorSelector> selectors;
+        parseSwizzleSelector(loc, field, base->getVectorSize(), selectors);
 
         if (base->isScalar()) {
-            if (fields.num == 1)
+            if (selectors.size() == 1)
                 return result;
             else {
-                TType type(base->getBasicType(), EvqTemporary, fields.num);
+                TType type(base->getBasicType(), EvqTemporary, selectors.size());
                 return addConstructor(loc, base, type);
             }
         }
         if (base->getVectorSize() == 1) {
             TType scalarType(base->getBasicType(), EvqTemporary, 1);
-            if (fields.num == 1)
+            if (selectors.size() == 1)
                 return addConstructor(loc, base, scalarType);
             else {
-                TType vectorType(base->getBasicType(), EvqTemporary, fields.num);
+                TType vectorType(base->getBasicType(), EvqTemporary, selectors.size());
                 return addConstructor(loc, addConstructor(loc, base, scalarType), vectorType);
             }
         }
 
         if (base->getType().getQualifier().isFrontEndConstant())
-            result = intermediate.foldSwizzle(base, fields, loc);
+            result = intermediate.foldSwizzle(base, selectors, loc);
         else {
-            if (fields.num == 1) {
-                TIntermTyped* index = intermediate.addConstantUnion(fields.offsets[0], loc);
+            if (selectors.size() == 1) {
+                TIntermTyped* index = intermediate.addConstantUnion(selectors[0], loc);
                 result = intermediate.addIndex(EOpIndexDirect, base, index, loc);
                 result->setType(TType(base->getBasicType(), EvqTemporary));
             } else {
-                TString vectorString = field;
-                TIntermTyped* index = intermediate.addSwizzle(fields, loc);
+                TIntermTyped* index = intermediate.addSwizzle(selectors, loc);
                 result = intermediate.addIndex(EOpVectorSwizzle, base, index, loc);
-                result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision, (int)vectorString.size()));
+                result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision, selectors.size()));
             }
         }
+    } else if (base->isMatrix()) {
+        TSwizzleSelectors<TMatrixSelector> selectors;
+        if (! parseMatrixSwizzleSelector(loc, field, base->getMatrixCols(), base->getMatrixRows(), selectors))
+            return result;
+
+        if (selectors.size() == 1) {
+            // Representable by m[c][r]
+            if (base->getType().getQualifier().isFrontEndConstant()) {
+                result = intermediate.foldDereference(base, selectors[0].coord1, loc);
+                result = intermediate.foldDereference(result, selectors[0].coord2, loc);
+            } else {
+                result = intermediate.addIndex(EOpIndexDirect, base, intermediate.addConstantUnion(selectors[0].coord1, loc), loc);
+                TType dereferencedCol(base->getType(), 0);
+                result->setType(dereferencedCol);
+                result = intermediate.addIndex(EOpIndexDirect, result, intermediate.addConstantUnion(selectors[0].coord2, loc), loc);
+                TType dereferenced(dereferencedCol, 0);
+                result->setType(dereferenced);
+            }
+        } else {
+            int column = getMatrixComponentsColumn(base->getMatrixRows(), selectors);
+            if (column >= 0) {
+                // Representable by m[c]
+                if (base->getType().getQualifier().isFrontEndConstant())
+                    result = intermediate.foldDereference(base, column, loc);
+                else {
+                    result = intermediate.addIndex(EOpIndexDirect, base, intermediate.addConstantUnion(column, loc), loc);
+                    TType dereferenced(base->getType(), 0);
+                    result->setType(dereferenced);
+                }
+            } else {
+                // general case, not a column, not a single component
+                TIntermTyped* index = intermediate.addSwizzle(selectors, loc);
+                result = intermediate.addIndex(EOpMatrixSwizzle, base, index, loc);
+                result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision, selectors.size()));
+           }
+        }
     } else if (base->getBasicType() == EbtStruct || base->getBasicType() == EbtBlock) {
         const TTypeList* fields = base->getType().getStruct();
         bool fieldFound = false;
@@ -933,7 +955,7 @@ bool HlslParseContext::shouldSplit(const TType& type)
     const TStorageQualifier qualifier = type.getQualifier().storage;
 
     // If it contains interstage IO, but not ONLY interstage IO, split the struct.
-    return type.isStruct() && type.containsBuiltInInterstageIO() &&
+    return type.isStruct() && type.containsBuiltInInterstageIO(language) &&
         (qualifier == EvqVaryingIn || qualifier == EvqVaryingOut);
 }
 
@@ -990,13 +1012,13 @@ TType& HlslParseContext::split(TType& type, TString name, const TType* outerStru
 
         // Get iterator to (now at end) set of builtin iterstage IO members
         const auto firstIo = std::stable_partition(userStructure->begin(), userStructure->end(),
-                                                   [](const TTypeLoc& t) {return !t.type->isBuiltInInterstageIO();});
+                                                   [this](const TTypeLoc& t) {return !t.type->isBuiltInInterstageIO(language);});
 
         // Move those to the builtin IO.  However, we also propagate arrayness (just one level is handled
         // now) to this variable.
         for (auto ioType = firstIo; ioType != userStructure->end(); ++ioType) {
             const TType& memberType = *ioType->type;
-            TVariable* ioVar = makeInternalVariable(name + (name.empty() ? "" : ".") + memberType.getFieldName(), memberType);
+            TVariable* ioVar = makeInternalVariable(name + (name.empty() ? "" : "_") + memberType.getFieldName(), memberType);
 
             if (arraySizes)
                 ioVar->getWritableType().newArraySizes(*arraySizes);
@@ -1013,7 +1035,7 @@ TType& HlslParseContext::split(TType& type, TString name, const TType* outerStru
         // Recurse further into the members.
         for (unsigned int i = 0; i < userStructure->size(); ++i)
             split(*(*userStructure)[i].type,
-                  name + (name.empty() ? "" : ".") + (*userStructure)[i].type->getFieldName(),
+                  name + (name.empty() ? "" : "_") + (*userStructure)[i].type->getFieldName(),
                   outerStructType);
     }
 
@@ -1320,7 +1342,7 @@ TIntermTyped* HlslParseContext::splitAccessStruct(const TSourceLoc& loc, TInterm
 
     const TType& memberType = *members[member].type;
 
-    if (memberType.isBuiltInInterstageIO()) {
+    if (memberType.isBuiltInInterstageIO(language)) {
         // It's one of the interstage IO variables we split off.
         TIntermTyped* builtIn = intermediate.addSymbol(*interstageBuiltInIo[tInterstageIoData(memberType, base->getType())], loc);
 
@@ -1344,7 +1366,7 @@ TIntermTyped* HlslParseContext::splitAccessStruct(const TSourceLoc& loc, TInterm
 
         int newMember = 0;
         for (int m=0; m<member; ++m)
-            if (!members[m].type->isBuiltInInterstageIO())
+            if (!members[m].type->isBuiltInInterstageIO(language))
                 ++newMember;
 
         member = newMember;
@@ -1437,6 +1459,9 @@ TFunction& HlslParseContext::handleFunctionDeclarator(const TSourceLoc& loc, TFu
 // Add interstage IO variables to the linkage in canonical order.
 void HlslParseContext::addInterstageIoToLinkage()
 {
+    TSourceLoc loc;
+    loc.init();
+
     std::vector<tInterstageIoData> io;
     io.reserve(interstageBuiltInIo.size());
 
@@ -1446,8 +1471,75 @@ void HlslParseContext::addInterstageIoToLinkage()
     // Our canonical order is the TBuiltInVariable numeric order.
     std::sort(io.begin(), io.end());
 
-    for (int idx = 0; idx < int(io.size()); ++idx)
-        trackLinkageDeferred(*interstageBuiltInIo[io[idx]]);
+    // We have to (potentially) track two IO blocks, one in, one out.  E.g, a GS may have a
+    // PerVertex block in both directions, possibly with different members.
+    static const TStorageQualifier ioType[2] = { EvqVaryingIn, EvqVaryingOut };
+    static const char* blockName[2] = { "PerVertex_in", "PerVertex_out" };
+
+    TTypeList*   ioBlockTypes[2] = { nullptr, nullptr };
+    TArraySizes* ioBlockArray[2] = { nullptr, nullptr };
+
+    for (int idx = 0; idx < int(io.size()); ++idx) {
+        TVariable* var = interstageBuiltInIo[io[idx]];
+
+        // Add the loose interstage IO to the linkage
+        if (var->getType().isLooseAndBuiltIn(language))
+            trackLinkageDeferred(*var);
+
+        // Add the PerVertex interstage IO to the IO block
+        if (var->getType().isPerVertexAndBuiltIn(language)) {
+            int blockId = 0;
+            switch (var->getType().getQualifier().storage) {
+            case EvqVaryingIn:  blockId = 0; break;
+            case EvqVaryingOut: blockId = 1; break;
+            default: assert(0 && "Invalid storage qualifier");
+            }
+
+            // Lazy creation of type list only if we end up needing it.
+            if (ioBlockTypes[blockId] == nullptr)
+                ioBlockTypes[blockId] = new TTypeList();
+
+            TTypeLoc member = { new TType(EbtVoid), loc };
+            member.type->shallowCopy(var->getType());
+            member.type->setFieldName(var->getName());
+
+            // We may have collected these from different parts of different structures.  If their
+            // array dimensions are not the same, we don't know what to do, so issue an error.
+            if (member.type->isArray()) {
+                if (ioBlockArray[blockId] == nullptr) {
+                    ioBlockArray[blockId] = &member.type->getArraySizes();
+                } else  {
+                    if (*ioBlockArray[blockId] != member.type->getArraySizes())
+                        error(loc, "PerVertex block array dimension mismatch", "", "");
+                }
+                member.type->clearArraySizes();
+            }
+
+            ioBlockTypes[blockId]->push_back(member);
+        }
+    }
+
+    // If there were PerVertex items, add the block to the linkage.  Handle in and out separately.
+    for (int blockId = 0; blockId <= 1; ++blockId) {
+        if (ioBlockTypes[blockId] != nullptr) {
+            const TString* instanceName = NewPoolTString(blockName[blockId]);
+            TQualifier     blockQualifier;
+
+            blockQualifier.clear();
+            blockQualifier.storage = ioType[blockId];
+
+            TType blockType(ioBlockTypes[blockId], *instanceName, blockQualifier);
+
+            if (ioBlockArray[blockId] != nullptr)
+                blockType.newArraySizes(*ioBlockArray[blockId]);
+
+            TVariable* ioBlock = new TVariable(instanceName, blockType);
+            if (!symbolTable.insert(*ioBlock))
+                error(loc, "block instance name redefinition", ioBlock->getName().c_str(), "");
+            else
+                trackLinkageDeferred(*ioBlock);
+        }
+    }
 }
 
 //
@@ -1480,20 +1572,9 @@ TIntermAggregate* HlslParseContext::handleFunctionDefinition(const TSourceLoc& l
         currentFunctionType = new TType(EbtVoid);
     functionReturnsValue = false;
 
-    inEntryPoint = function.getName().compare(intermediate.getEntryPointName().c_str()) == 0;
-    if (inEntryPoint) {
-        intermediate.setEntryPointMangledName(function.getMangledName().c_str());
-        intermediate.incrementEntryPointCount();
-        remapEntryPointIO(function);
-        if (entryPointOutput) {
-            if (shouldFlatten(entryPointOutput->getType()))
-                flatten(loc, *entryPointOutput);
-            if (shouldSplit(entryPointOutput->getType()))
-                split(*entryPointOutput);
-            assignLocations(*entryPointOutput);
-        }
-    } else
-        remapNonEntryPointIO(function);
+    // Entry points need different I/O and other handling, transform it so the
+    // rest of this function doesn't care.
+    transformEntryPoint(loc, function, attributes);
 
     // Insert the $Global constant buffer.
     // TODO: this design fails if new members are declared between function definitions.
@@ -1557,23 +1638,47 @@ TIntermAggregate* HlslParseContext::handleFunctionDefinition(const TSourceLoc& l
     controlFlowNestingLevel = 0;
     postEntryPointReturn = false;
 
-    // Handle function attributes
-    if (inEntryPoint) {
-        const TIntermAggregate* numThreads = attributes[EatNumThreads];
-        if (numThreads != nullptr) {
-            const TIntermSequence& sequence = numThreads->getSequence();
+    return paramNodes;
+}
 
-            for (int lid = 0; lid < int(sequence.size()); ++lid)
-                intermediate.setLocalSize(lid, sequence[lid]->getAsConstantUnion()->getConstArray()[0].getIConst());
-        }
+//
+// Do all special handling for the entry point.
+//
+void HlslParseContext::transformEntryPoint(const TSourceLoc& loc, TFunction& function, const TAttributeMap& attributes)
+{
+    inEntryPoint = function.getName().compare(intermediate.getEntryPointName().c_str()) == 0;
 
-        const TIntermAggregate* maxVertexCount = attributes[EatMaxVertexCount];
-        if (maxVertexCount != nullptr) {
-            intermediate.setVertices(maxVertexCount->getSequence()[0]->getAsConstantUnion()->getConstArray()[0].getIConst());
-        }
+    if (!inEntryPoint) {
+        remapNonEntryPointIO(function);
+        return;
     }
 
-    return paramNodes;
+    // entry point logic...
+
+    intermediate.setEntryPointMangledName(function.getMangledName().c_str());
+    intermediate.incrementEntryPointCount();
+
+    // Handle parameters and return value
+    remapEntryPointIO(function);
+    if (entryPointOutput) {
+        if (shouldFlatten(entryPointOutput->getType()))
+            flatten(loc, *entryPointOutput);
+        if (shouldSplit(entryPointOutput->getType()))
+            split(*entryPointOutput);
+        assignLocations(*entryPointOutput);
+    }
+
+    // Handle function attributes
+    const TIntermAggregate* numThreads = attributes[EatNumThreads];
+    if (numThreads != nullptr) {
+        const TIntermSequence& sequence = numThreads->getSequence();
+
+        for (int lid = 0; lid < int(sequence.size()); ++lid)
+            intermediate.setLocalSize(lid, sequence[lid]->getAsConstantUnion()->getConstArray()[0].getIConst());
+    }
+    const TIntermAggregate* maxVertexCount = attributes[EatMaxVertexCount];
+    if (maxVertexCount != nullptr)
+        intermediate.setVertices(maxVertexCount->getSequence()[0]->getAsConstantUnion()->getConstArray()[0].getIConst());
 }
 
 void HlslParseContext::handleFunctionBody(const TSourceLoc& loc, TFunction& function, TIntermNode* functionBody, TIntermNode*& node)
@@ -1700,13 +1805,19 @@ void HlslParseContext::handleFunctionArgument(TFunction* function,
 }
 
 // Some simple source assignments need to be flattened to a sequence
-// of AST assignments.  Catch these and flatten, otherwise, pass through
+// of AST assignments. Catch these and flatten, otherwise, pass through
 // to intermediate.addAssign().
-TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op, TIntermTyped* left, TIntermTyped* right) const
+//
+// Also, assignment to matrix swizzles requires multiple component assignments,
+// intercept those as well.
+TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op, TIntermTyped* left, TIntermTyped* right)
 {
     if (left == nullptr || right == nullptr)
         return nullptr;
 
+    if (left->getAsOperator() && left->getAsOperator()->getOp() == EOpMatrixSwizzle)
+        return handleAssignToMatrixSwizzle(loc, op, left, right);
+
     const bool isSplitLeft    = wasSplit(left);
     const bool isSplitRight   = wasSplit(right);
 
@@ -1714,7 +1825,7 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
     const bool isFlattenRight = wasFlattened(right);
 
     // OK to do a single assign if both are split, or both are unsplit.  But if one is and the other
-    // isn't, we fall back to a memberwise copy.
+    // isn't, we fall back to a member-wise copy.
     if (! isFlattenLeft && ! isFlattenRight && !isSplitLeft && !isSplitRight)
         return intermediate.addAssign(op, left, right, loc);
 
@@ -1729,10 +1840,6 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
     // If the RHS is a simple symbol node, we'll copy it for each member.
     TIntermSymbol* cloneSymNode = nullptr;
 
-    // Array structs are not yet handled in flattening.  (Compilation error upstream, so
-    // this should never fire).
-    assert(!(left->getType().isStruct() && left->getType().isArray()));
-
     int memberCount = 0;
 
     // Track how many items there are to copy.
@@ -1786,7 +1893,7 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
         const TOperator op = node->getType().isArray() ? EOpIndexDirect : EOpIndexDirectStruct;
         const TType derefType(node->getType(), member);
 
-        if (split && derefType.isBuiltInInterstageIO()) {
+        if (split && derefType.isBuiltInInterstageIO(language)) {
             // copy from interstage IO builtin if needed
             subTree = intermediate.addSymbol(*interstageBuiltInIo.find(tInterstageIoData(derefType, outer->getType()))->second);
         } else if (flattened && isFinalFlattening(derefType)) {
@@ -1823,10 +1930,13 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
                 TIntermTyped* subLeft  = getMember(true,  left,  element, left, element);
                 TIntermTyped* subRight = getMember(false, right, element, right, element);
 
+                TIntermTyped* subSplitLeft =  isSplitLeft  ? getMember(true,  left,  element, splitLeft, element) : subLeft;
+                TIntermTyped* subSplitRight = isSplitRight ? getMember(false, right, element, splitRight, element) : subRight; 
+
                 if (isFinalFlattening(dereferencedType))
                     assignList = intermediate.growAggregate(assignList, intermediate.addAssign(op, subLeft, subRight, loc), loc);
                 else
-                    traverse(subLeft, subRight, splitLeft, splitRight);
+                    traverse(subLeft, subRight, subSplitLeft, subSplitRight);
             }
         } else if (left->getType().isStruct()) {
             // struct case
@@ -1855,14 +1965,14 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
                 // recurse into it if there's something for splitting to do.  That can save a lot of AST verbosity for
                 // a bunch of memberwise copies.
                 if (isFinalFlattening(typeL) || (!isFlattenLeft && !isFlattenRight &&
-                                                 !typeL.containsBuiltInInterstageIO() && !typeR.containsBuiltInInterstageIO())) {
+                                                 !typeL.containsBuiltInInterstageIO(language) && !typeR.containsBuiltInInterstageIO(language))) {
                     assignList = intermediate.growAggregate(assignList, intermediate.addAssign(op, subSplitLeft, subSplitRight, loc), loc);
                 } else {
                     traverse(subLeft, subRight, subSplitLeft, subSplitRight);
                 }
 
-                memberL += (typeL.isBuiltInInterstageIO() ? 0 : 1);
-                memberR += (typeR.isBuiltInInterstageIO() ? 0 : 1);
+                memberL += (typeL.isBuiltInInterstageIO(language) ? 0 : 1);
+                memberR += (typeR.isBuiltInInterstageIO(language) ? 0 : 1);
             }
         } else {
             assert(0);  // we should never be called on a non-flattenable thing, because
@@ -1891,6 +2001,65 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
     return assignList;
 }
 
+// An assignment to matrix swizzle must be decomposed into individual assignments.
+// These must be selected component-wise from the RHS and stored component-wise
+// into the LHS.
+TIntermTyped* HlslParseContext::handleAssignToMatrixSwizzle(const TSourceLoc& loc, TOperator op, TIntermTyped* left, TIntermTyped* right)
+{
+    assert(left->getAsOperator() && left->getAsOperator()->getOp() == EOpMatrixSwizzle);
+
+    if (op != EOpAssign)
+        error(loc, "only simple assignment to non-simple matrix swizzle is supported", "assign", "");
+
+    // isolate the matrix and swizzle nodes
+    TIntermTyped* matrix = left->getAsBinaryNode()->getLeft()->getAsTyped();
+    const TIntermSequence& swizzle = left->getAsBinaryNode()->getRight()->getAsAggregate()->getSequence();
+
+    // if the RHS isn't already a simple vector, let's store into one
+    TIntermSymbol* vector = right->getAsSymbolNode();
+    TIntermTyped* vectorAssign = nullptr;
+    if (vector == nullptr) {
+        // create a new intermediate vector variable to assign to
+        TType vectorType(matrix->getBasicType(), EvqTemporary, matrix->getQualifier().precision, swizzle.size()/2);
+        vector = intermediate.addSymbol(*makeInternalVariable("intermVec", vectorType), loc);
+
+        // assign the right to the new vector
+        vectorAssign = handleAssign(loc, op, vector, right);
+    }
+
+    // Assign the vector components to the matrix components.
+    // Store this as a sequence, so a single aggregate node represents this
+    // entire operation.
+    TIntermAggregate* result = intermediate.makeAggregate(vectorAssign);
+    TType columnType(matrix->getType(), 0);
+    TType componentType(columnType, 0);
+    TType indexType(EbtInt);
+    for (int i = 0; i < (int)swizzle.size(); i += 2) {
+        // the right component, single index into the RHS vector
+        TIntermTyped* rightComp = intermediate.addIndex(EOpIndexDirect, vector,
+                                    intermediate.addConstantUnion(i/2, loc), loc);
+
+        // the left component, double index into the LHS matrix
+        TIntermTyped* leftComp = intermediate.addIndex(EOpIndexDirect, matrix,
+                                    intermediate.addConstantUnion(swizzle[i]->getAsConstantUnion()->getConstArray(),
+                                                                  indexType, loc),
+                                    loc);
+        leftComp->setType(columnType);
+        leftComp = intermediate.addIndex(EOpIndexDirect, leftComp,
+                                    intermediate.addConstantUnion(swizzle[i+1]->getAsConstantUnion()->getConstArray(),
+                                                                  indexType, loc),
+                                    loc);
+        leftComp->setType(componentType);
+
+        // Add the assignment to the aggregate
+        result = intermediate.growAggregate(result, intermediate.addAssign(op, leftComp, rightComp, loc));
+    }
+
+    result->setOp(EOpSequence);
+
+    return result;
+}
+
 //
 // HLSL atomic operations have slightly different arguments than
 // GLSL/AST/SPIRV.  The semantics are converted below in decomposeIntrinsic.
@@ -2281,14 +2450,16 @@ void HlslParseContext::decomposeSampleMethods(const TSourceLoc& loc, TIntermType
                 coordSwizzle = argCoord;
             } else {
                 // Extract coordinate
-                TVectorFields coordFields(0,1,2,3);
-                coordFields.num = argCoord->getType().getVectorSize() - (isMS ? 0 : 1);
+                int swizzleSize = argCoord->getType().getVectorSize() - (isMS ? 0 : 1);
+                TSwizzleSelectors<TVectorSelector> coordFields;
+                for (int i = 0; i < swizzleSize; ++i)
+                    coordFields.push_back(i);
                 TIntermTyped* coordIdx = intermediate.addSwizzle(coordFields, loc);
                 coordSwizzle = intermediate.addIndex(EOpVectorSwizzle, argCoord, coordIdx, loc);
-                coordSwizzle->setType(TType(coordBaseType, EvqTemporary, coordFields.num));
+                coordSwizzle->setType(TType(coordBaseType, EvqTemporary, coordFields.size()));
 
                 // Extract LOD
-                TIntermTyped* lodIdx = intermediate.addConstantUnion(coordFields.num, loc, true);
+                TIntermTyped* lodIdx = intermediate.addConstantUnion(coordFields.size(), loc, true);
                 lodComponent = intermediate.addIndex(EOpIndexDirect, argCoord, lodIdx, loc);
                 lodComponent->setType(TType(coordBaseType, EvqTemporary, 1));
             }
@@ -2994,8 +3165,12 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
         {
             // ivec4 ( x.zyxw * 255.001953 );
             TIntermTyped* arg0 = node->getAsUnaryNode()->getOperand();
-            TVectorFields fields(2,1,0,3);
-            TIntermTyped* swizzleIdx = intermediate.addSwizzle(fields, loc);
+            TSwizzleSelectors<TVectorSelector> selectors;
+            selectors.push_back(2);
+            selectors.push_back(1);
+            selectors.push_back(0);
+            selectors.push_back(3);
+            TIntermTyped* swizzleIdx = intermediate.addSwizzle(selectors, loc);
             TIntermTyped* swizzled = intermediate.addIndex(EOpVectorSwizzle, arg0, swizzleIdx, loc);
             swizzled->setType(arg0->getType());
             swizzled->getWritableType().getQualifier().makeTemporary();
@@ -4940,6 +5115,39 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, TFunction
             // but it is allowed to promote its other arguments.
             if (arg == 0)
                 return false;
+            break;
+        case EOpMethodSample:
+        case EOpMethodSampleBias:
+        case EOpMethodSampleCmp:
+        case EOpMethodSampleCmpLevelZero:
+        case EOpMethodSampleGrad:
+        case EOpMethodSampleLevel:
+        case EOpMethodLoad:
+        case EOpMethodGetDimensions:
+        case EOpMethodGetSamplePosition:
+        case EOpMethodGather:
+        case EOpMethodCalculateLevelOfDetail:
+        case EOpMethodCalculateLevelOfDetailUnclamped:
+        case EOpMethodGatherRed:
+        case EOpMethodGatherGreen:
+        case EOpMethodGatherBlue:
+        case EOpMethodGatherAlpha:
+        case EOpMethodGatherCmp:
+        case EOpMethodGatherCmpRed:
+        case EOpMethodGatherCmpGreen:
+        case EOpMethodGatherCmpBlue:
+        case EOpMethodGatherCmpAlpha:
+        case EOpMethodAppend:
+        case EOpMethodRestartStrip:
+            // those are method calls, the object type can not be changed
+            // they are equal if the dim and type match (is dim sufficient?)
+            if (arg == 0)
+                return from.getSampler().type == to.getSampler().type &&
+                       from.getSampler().arrayed == to.getSampler().arrayed &&
+                       from.getSampler().shadow == to.getSampler().shadow &&
+                       from.getSampler().ms == to.getSampler().ms &&
+                       from.getSampler().dim == to.getSampler().dim;
+            break;
         default:
             break;
         }
@@ -5163,7 +5371,7 @@ TType* HlslParseContext::sanitizeType(TType* type)
             sanitizedType->clearArraySizes();
         return sanitizedType;
     } else {
-        if (type->containsBuiltInInterstageIO()) {
+        if (type->containsBuiltInInterstageIO(language)) {
             // This means the type contains interstage IO, but we've never encountered it before.
             // Copy it, sanitize it, and remember it in the sanitizedTypeMap
             TType* sanitizedType = type->clone();
@@ -5265,7 +5473,7 @@ void HlslParseContext::inheritGlobalDefaults(TQualifier& dst) const
 //
 TVariable* HlslParseContext::makeInternalVariable(const char* name, const TType& type) const
 {
-    TString* nameString = new TString(name);
+    TString* nameString = NewPoolTString(name);
     TVariable* variable = new TVariable(nameString, type);
     symbolTable.makeInternalVariable(*variable);
 
@@ -6272,7 +6480,7 @@ void HlslParseContext::renameShaderFunction(TString*& name) const
     // Replace the entry point name given in the shader with the real entry point name,
     // if there is a substitution.
     if (name != nullptr && *name == sourceEntryPointName)
-        name = new TString(intermediate.getEntryPointName().c_str());
+        name = NewPoolTString(intermediate.getEntryPointName().c_str());
 }
 
 // post-processing
diff --git a/3rdparty/glslang/hlsl/hlslParseHelper.h b/3rdparty/glslang/hlsl/hlslParseHelper.h
index d56bd4260..1bfca8f87 100755
--- a/3rdparty/glslang/hlsl/hlslParseHelper.h
+++ b/3rdparty/glslang/hlsl/hlslParseHelper.h
@@ -73,12 +73,14 @@ public:
     void assignLocations(TVariable& variable);
     TFunction& handleFunctionDeclarator(const TSourceLoc&, TFunction& function, bool prototype);
     TIntermAggregate* handleFunctionDefinition(const TSourceLoc&, TFunction&, const TAttributeMap&);
+    void transformEntryPoint(const TSourceLoc&, TFunction&, const TAttributeMap&);
     void handleFunctionBody(const TSourceLoc&, TFunction&, TIntermNode* functionBody, TIntermNode*& node);
     void remapEntryPointIO(TFunction& function);
     void remapNonEntryPointIO(TFunction& function);
     TIntermNode* handleReturnValue(const TSourceLoc&, TIntermTyped*);
     void handleFunctionArgument(TFunction*, TIntermTyped*& arguments, TIntermTyped* newArg);
-    TIntermTyped* handleAssign(const TSourceLoc&, TOperator, TIntermTyped* left, TIntermTyped* right) const;
+    TIntermTyped* handleAssign(const TSourceLoc&, TOperator, TIntermTyped* left, TIntermTyped* right);
+    TIntermTyped* handleAssignToMatrixSwizzle(const TSourceLoc&, TOperator, TIntermTyped* left, TIntermTyped* right);
     TIntermTyped* handleFunctionCall(const TSourceLoc&, TFunction*, TIntermTyped*);
     void decomposeIntrinsic(const TSourceLoc&, TIntermTyped*& node, TIntermNode* arguments);
     void decomposeSampleMethods(const TSourceLoc&, TIntermTyped*& node, TIntermNode* arguments);
@@ -96,7 +98,8 @@ public:
 
     TIntermAggregate* handleSamplerTextureCombine(const TSourceLoc& loc, TIntermTyped* argTex, TIntermTyped* argSampler);
 
-    bool parseVectorFields(const TSourceLoc&, const TString&, int vecSize, TVectorFields&);
+    bool parseMatrixSwizzleSelector(const TSourceLoc&, const TString&, int cols, int rows, TSwizzleSelectors<TMatrixSelector>&);
+    int getMatrixComponentsColumn(int rows, const TSwizzleSelectors<TMatrixSelector>&);
     void assignError(const TSourceLoc&, const char* op, TString left, TString right);
     void unaryOpError(const TSourceLoc&, const char* op, TString operand);
     void binaryOpError(const TSourceLoc&, const char* op, TString left, TString right);