Updated spirv-cross.

This commit is contained in:
Бранимир Караџић
2019-07-03 08:10:12 -07:00
parent 8580112ecb
commit a87051c9cc
78 changed files with 2630 additions and 53 deletions

View File

@@ -287,7 +287,7 @@ if (SPIRV_CROSS_STATIC)
endif()
set(spirv-cross-abi-major 0)
set(spirv-cross-abi-minor 14)
set(spirv-cross-abi-minor 16)
set(spirv-cross-abi-patch 0)
if (SPIRV_CROSS_SHARED)
@@ -457,6 +457,10 @@ if (SPIRV_CROSS_CLI)
target_link_libraries(spirv-cross-msl-constexpr-test spirv-cross-c)
set_target_properties(spirv-cross-msl-constexpr-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
add_executable(spirv-cross-msl-resource-binding-test tests-other/msl_resource_bindings.cpp)
target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c)
set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra)
endif()
@@ -469,6 +473,8 @@ if (SPIRV_CROSS_CLI)
COMMAND $<TARGET_FILE:spirv-cross-small-vector-test>)
add_test(NAME spirv-cross-msl-constexpr-test
COMMAND $<TARGET_FILE:spirv-cross-msl-constexpr-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv)
add_test(NAME spirv-cross-msl-resource-binding-test
COMMAND $<TARGET_FILE:spirv-cross-msl-resource-binding-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv)
add_test(NAME spirv-cross-test
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel
${spirv-cross-externals}

View File

@@ -514,6 +514,7 @@ struct CLIArguments
bool msl_domain_lower_left = false;
bool msl_argument_buffers = false;
bool msl_texture_buffer_native = false;
bool msl_multiview = false;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
bool emit_line_directives = false;
@@ -592,6 +593,7 @@ static void print_help()
"\t[--msl-argument-buffers]\n"
"\t[--msl-texture-buffer-native]\n"
"\t[--msl-discrete-descriptor-set <index>]\n"
"\t[--msl-multiview]\n"
"\t[--hlsl]\n"
"\t[--reflect]\n"
"\t[--shader-model]\n"
@@ -750,6 +752,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
msl_opts.argument_buffers = args.msl_argument_buffers;
msl_opts.texture_buffer_native = args.msl_texture_buffer_native;
msl_opts.multiview = args.msl_multiview;
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
@@ -1069,6 +1072,7 @@ static int main_inner(int argc, char *argv[])
cbs.add("--msl-discrete-descriptor-set",
[&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); });
cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; });
cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@@ -0,0 +1,29 @@
Texture2D<float4> uTexture : register(t0);
SamplerState _uTexture_sampler : register(s0);
static min16float4 FragColor;
static min16float2 UV;
struct SPIRV_Cross_Input
{
min16float2 UV : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
min16float4 FragColor : SV_Target0;
};
void frag_main()
{
FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
UV = stage_input.UV;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@@ -0,0 +1,48 @@
RWByteAddressBuffer _21 : register(u0);
ByteAddressBuffer _26 : register(t1);
void comp_main()
{
float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y);
_21.Store2(0, asuint(_32[0]));
_21.Store2(8, asuint(_32[1]));
float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y);
_21.Store3(16, asuint(_41[0]));
_21.Store3(32, asuint(_41[1]));
float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y);
_21.Store4(48, asuint(_50[0]));
_21.Store4(64, asuint(_50[1]));
float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z);
_21.Store2(80, asuint(_58[0]));
_21.Store2(88, asuint(_58[1]));
_21.Store2(96, asuint(_58[2]));
float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z);
_21.Store3(112, asuint(_66[0]));
_21.Store3(128, asuint(_66[1]));
_21.Store3(144, asuint(_66[2]));
float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z);
_21.Store4(160, asuint(_74[0]));
_21.Store4(176, asuint(_74[1]));
_21.Store4(192, asuint(_74[2]));
float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w);
_21.Store2(208, asuint(_82[0]));
_21.Store2(216, asuint(_82[1]));
_21.Store2(224, asuint(_82[2]));
_21.Store2(232, asuint(_82[3]));
float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w);
_21.Store3(240, asuint(_90[0]));
_21.Store3(256, asuint(_90[1]));
_21.Store3(272, asuint(_90[2]));
_21.Store3(288, asuint(_90[3]));
float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w);
_21.Store4(304, asuint(_98[0]));
_21.Store4(320, asuint(_98[1]));
_21.Store4(336, asuint(_98[2]));
_21.Store4(352, asuint(_98[3]));
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@@ -0,0 +1,14 @@
RWByteAddressBuffer _9 : register(u0);
void comp_main()
{
_9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4)))));
_9.Store(12, asuint(length(asfloat(_9.Load(0)))));
_9.Store(16, asuint(sign(asfloat(_9.Load(0)))));
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@@ -0,0 +1,49 @@
static float FragColor;
static float3 vRefract;
struct SPIRV_Cross_Input
{
float3 vRefract : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
float FragColor : SV_Target0;
};
float SPIRV_Cross_Reflect(float i, float n)
{
return i - 2.0 * dot(n, i) * n;
}
float SPIRV_Cross_Refract(float i, float n, float eta)
{
float NoI = n * i;
float NoI2 = NoI * NoI;
float k = 1.0 - eta * eta * (1.0 - NoI2);
if (k < 0.0)
{
return 0.0;
}
else
{
return eta * i - (eta * NoI + sqrt(k)) * n;
}
}
void frag_main()
{
FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vRefract = stage_input.vRefract;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@@ -0,0 +1,22 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
half4 FragColor [[color(0)]];
};
struct main0_in
{
half2 UV [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
return out;
}

View File

@@ -0,0 +1,38 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float2x2 m22;
float2x3 m23;
float2x4 m24;
float3x2 m32;
float3x3 m33;
float3x4 m34;
float4x2 m42;
float4x3 m43;
float4x4 m44;
};
struct ReadSSBO
{
float2 v2;
float3 v3;
float4 v4;
};
kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
{
_21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
_21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y);
_21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y);
_21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z);
_21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z);
_21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z);
_21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w);
_21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w);
_21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w);
}

View File

@@ -0,0 +1,21 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float a;
float b;
float c;
float d;
float e;
};
kernel void main0(device SSBO& _9 [[buffer(0)]])
{
_9.c = abs(_9.a - _9.b);
_9.d = abs(_9.a);
_9.e = sign(_9.a);
}

View File

@@ -0,0 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float FragColor [[color(0)]];
};
struct main0_in
{
float3 vRefract [[user(locn0)]];
};
template<typename T>
inline T spvReflect(T i, T n)
{
return i - T(2) * i * n * n;
}
template<typename T>
inline T spvRefract(T i, T n, T eta)
{
T NoI = n * i;
T NoI2 = NoI * NoI;
T k = T(1) - eta * eta * (T(1) - NoI2);
if (k < T(0))
{
return T(0);
}
else
{
return eta * i - (eta * NoI + sqrt(k)) * n;
}
}
fragment main0_out main0(main0_in in [[stage_in]])
{
main0_out out = {};
out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z);
out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y);
out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y;
out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y;
return out;
}

View File

@@ -0,0 +1,18 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
uint2 FragColor [[color(0)]];
};
fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
{
main0_out out = {};
out.FragColor.x = gl_SubgroupSize;
out.FragColor.y = gl_SubgroupInvocationID;
return out;
}

View File

@@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
{
uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));

View File

@@ -0,0 +1,32 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float4 vColor [[user(locn0)]];
float2 vTex_0 [[user(locn1)]];
float2 vTex_1 [[user(locn2)]];
float2 vTex_2 [[user(locn3)]];
float2 vTex_3 [[user(locn4)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
{
main0_out out = {};
float2 vTex[4] = {};
vTex[0] = in.vTex_0;
vTex[1] = in.vTex_1;
vTex[2] = in.vTex_2;
vTex[3] = in.vTex_3;
gl_ViewIndex += spvViewMask[0];
out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
return out;
}

View File

@@ -0,0 +1,89 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float FragColor [[color(0)]];
};
inline uint4 spvSubgroupBallot(bool value)
{
simd_vote vote = simd_ballot(value);
// simd_ballot() returns a 64-bit integer-like object, but
// SPIR-V callers expect a uint4. We must convert.
// FIXME: This won't include higher bits if Apple ever supports
// 128 lanes in an SIMD-group.
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
}
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
{
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
}
inline uint spvSubgroupBallotFindLSB(uint4 ballot)
{
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
}
inline uint spvSubgroupBallotFindMSB(uint4 ballot)
{
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
}
inline uint spvSubgroupBallotBitCount(uint4 ballot)
{
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
}
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
template<typename T>
inline bool spvSubgroupAllEqual(T value)
{
return simd_all(value == simd_broadcast_first(value));
}
template<>
inline bool spvSubgroupAllEqual(bool value)
{
return simd_all(value) || !simd_any(value);
}
fragment main0_out main0()
{
main0_out out = {};
uint gl_SubgroupSize = simd_sum(1);
uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
out.FragColor = float(gl_SubgroupSize);
out.FragColor = float(gl_SubgroupInvocationID);
out.FragColor = float4(gl_SubgroupEqMask).x;
out.FragColor = float4(gl_SubgroupGeMask).x;
out.FragColor = float4(gl_SubgroupGtMask).x;
out.FragColor = float4(gl_SubgroupLeMask).x;
out.FragColor = float4(gl_SubgroupLtMask).x;
uint4 _63 = spvSubgroupBallot(true);
float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20)));
return out;
}

View File

@@ -0,0 +1,31 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct MVPs
{
float4x4 MVP[2];
};
struct main0_out
{
float4 gl_Position [[position]];
uint gl_Layer [[render_target_array_index]];
};
struct main0_in
{
float4 Position [[attribute(0)]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
{
main0_out out = {};
uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
gl_InstanceIndex /= spvViewMask[1];
out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
out.gl_Layer = gl_ViewIndex - spvViewMask[0];
return out;
}

View File

@@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct MVPs
{
float4x4 MVP[2];
};
struct main0_out
{
float4 gl_Position [[position]];
uint gl_Layer [[render_target_array_index]];
};
struct main0_in
{
float4 Position [[attribute(0)]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
{
main0_out out = {};
const uint gl_ViewIndex = 0;
out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
return out;
}

View File

@@ -0,0 +1,25 @@
#version 450
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
void main()
{
int _30;
_30 = 7;
int _27_copy;
for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
{
if (5.0 > float(_27))
{
break;
}
else
{
continue;
}
continue;
}
imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
}

View File

@@ -0,0 +1,19 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#else
#error No extension available for FP16.
#endif
layout(binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@@ -0,0 +1,20 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
layout(set = 0, binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@@ -0,0 +1,36 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) writeonly buffer SSBO
{
mat2 m22;
mat2x3 m23;
mat2x4 m24;
mat3x2 m32;
mat3 m33;
mat3x4 m34;
mat4x2 m42;
mat4x3 m43;
mat4 m44;
} _21;
layout(binding = 1, std430) readonly buffer ReadSSBO
{
vec2 v2;
vec3 v3;
vec4 v4;
} _26;
void main()
{
_21.m22 = outerProduct(_26.v2, _26.v2);
_21.m23 = outerProduct(_26.v3, _26.v2);
_21.m24 = outerProduct(_26.v4, _26.v2);
_21.m32 = outerProduct(_26.v2, _26.v3);
_21.m33 = outerProduct(_26.v3, _26.v3);
_21.m34 = outerProduct(_26.v4, _26.v3);
_21.m42 = outerProduct(_26.v2, _26.v4);
_21.m43 = outerProduct(_26.v3, _26.v4);
_21.m44 = outerProduct(_26.v4, _26.v4);
}

View File

@@ -0,0 +1,19 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO
{
float a;
float b;
float c;
float d;
float e;
} _9;
void main()
{
_9.c = distance(_9.a, _9.b);
_9.d = length(_9.a);
_9.e = normalize(_9.a);
}

View File

@@ -0,0 +1,13 @@
#version 450
layout(location = 0) out float FragColor;
layout(location = 0) in vec3 vRefract;
void main()
{
FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}

View File

@@ -0,0 +1,19 @@
#version 450
layout(location = 0) flat in int vIndex;
layout(location = 0) out vec4 FragColor;
void main()
{
for (;;)
{
if (vIndex != 1)
{
FragColor = vec4(1.0);
break;
}
FragColor = vec4(10.0);
break;
}
}

View File

@@ -30,16 +30,23 @@ void frag_main()
case 0:
{
_30 = 3;
j = _30;
_31 = 0;
j = _31;
break;
}
default:
{
j = _30;
_31 = 0;
j = _31;
break;
}
case 1:
case 11:
{
j = _31;
break;
}
case 2:
{
@@ -59,6 +66,8 @@ void frag_main()
}
case 4:
{
i = 0;
break;
}
case 5:
{

View File

@@ -0,0 +1,29 @@
Texture2D<float4> uTexture : register(t0);
SamplerState _uTexture_sampler : register(s0);
static min16float4 FragColor;
static min16float2 UV;
struct SPIRV_Cross_Input
{
min16float2 UV : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
min16float4 FragColor : SV_Target0;
};
void frag_main()
{
FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
UV = stage_input.UV;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@@ -0,0 +1,48 @@
RWByteAddressBuffer _21 : register(u0);
ByteAddressBuffer _26 : register(t1);
void comp_main()
{
float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y);
_21.Store2(0, asuint(_32[0]));
_21.Store2(8, asuint(_32[1]));
float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y);
_21.Store3(16, asuint(_41[0]));
_21.Store3(32, asuint(_41[1]));
float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y);
_21.Store4(48, asuint(_50[0]));
_21.Store4(64, asuint(_50[1]));
float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z);
_21.Store2(80, asuint(_58[0]));
_21.Store2(88, asuint(_58[1]));
_21.Store2(96, asuint(_58[2]));
float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z);
_21.Store3(112, asuint(_66[0]));
_21.Store3(128, asuint(_66[1]));
_21.Store3(144, asuint(_66[2]));
float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z);
_21.Store4(160, asuint(_74[0]));
_21.Store4(176, asuint(_74[1]));
_21.Store4(192, asuint(_74[2]));
float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w);
_21.Store2(208, asuint(_82[0]));
_21.Store2(216, asuint(_82[1]));
_21.Store2(224, asuint(_82[2]));
_21.Store2(232, asuint(_82[3]));
float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w);
_21.Store3(240, asuint(_90[0]));
_21.Store3(256, asuint(_90[1]));
_21.Store3(272, asuint(_90[2]));
_21.Store3(288, asuint(_90[3]));
float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w);
_21.Store4(304, asuint(_98[0]));
_21.Store4(320, asuint(_98[1]));
_21.Store4(336, asuint(_98[2]));
_21.Store4(352, asuint(_98[3]));
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@@ -0,0 +1,14 @@
RWByteAddressBuffer _9 : register(u0);
void comp_main()
{
_9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4)))));
_9.Store(12, asuint(length(asfloat(_9.Load(0)))));
_9.Store(16, asuint(sign(asfloat(_9.Load(0)))));
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@@ -0,0 +1,49 @@
static float FragColor;
static float3 vRefract;
struct SPIRV_Cross_Input
{
float3 vRefract : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
float FragColor : SV_Target0;
};
float SPIRV_Cross_Reflect(float i, float n)
{
return i - 2.0 * dot(n, i) * n;
}
float SPIRV_Cross_Refract(float i, float n, float eta)
{
float NoI = n * i;
float NoI2 = NoI * NoI;
float k = 1.0 - eta * eta * (1.0 - NoI2);
if (k < 0.0)
{
return 0.0;
}
else
{
return eta * i - (eta * NoI + sqrt(k)) * n;
}
}
void frag_main()
{
FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vRefract = stage_input.vRefract;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@@ -0,0 +1,22 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
half4 FragColor [[color(0)]];
};
struct main0_in
{
half2 UV [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
return out;
}

View File

@@ -0,0 +1,38 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float2x2 m22;
float2x3 m23;
float2x4 m24;
float3x2 m32;
float3x3 m33;
float3x4 m34;
float4x2 m42;
float4x3 m43;
float4x4 m44;
};
struct ReadSSBO
{
float2 v2;
float3 v3;
float4 v4;
};
kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
{
_21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
_21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y);
_21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y);
_21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z);
_21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z);
_21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z);
_21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w);
_21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w);
_21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w);
}

View File

@@ -0,0 +1,21 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float a;
float b;
float c;
float d;
float e;
};
kernel void main0(device SSBO& _9 [[buffer(0)]])
{
_9.c = abs(_9.a - _9.b);
_9.d = abs(_9.a);
_9.e = sign(_9.a);
}

View File

@@ -0,0 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float FragColor [[color(0)]];
};
struct main0_in
{
float3 vRefract [[user(locn0)]];
};
template<typename T>
inline T spvReflect(T i, T n)
{
return i - T(2) * i * n * n;
}
template<typename T>
inline T spvRefract(T i, T n, T eta)
{
T NoI = n * i;
T NoI2 = NoI * NoI;
T k = T(1) - eta * eta * (T(1) - NoI2);
if (k < T(0))
{
return T(0);
}
else
{
return eta * i - (eta * NoI + sqrt(k)) * n;
}
}
fragment main0_out main0(main0_in in [[stage_in]])
{
main0_out out = {};
out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z);
out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y);
out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y;
out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y;
return out;
}

View File

@@ -0,0 +1,18 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
uint2 FragColor [[color(0)]];
};
fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
{
main0_out out = {};
out.FragColor.x = gl_SubgroupSize;
out.FragColor.y = gl_SubgroupInvocationID;
return out;
}

View File

@@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
{
uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));

View File

@@ -0,0 +1,32 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float4 vColor [[user(locn0)]];
float2 vTex_0 [[user(locn1)]];
float2 vTex_1 [[user(locn2)]];
float2 vTex_2 [[user(locn3)]];
float2 vTex_3 [[user(locn4)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
{
main0_out out = {};
float2 vTex[4] = {};
vTex[0] = in.vTex_0;
vTex[1] = in.vTex_1;
vTex[2] = in.vTex_2;
vTex[3] = in.vTex_3;
gl_ViewIndex += spvViewMask[0];
out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
return out;
}

View File

@@ -0,0 +1,143 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float FragColor [[color(0)]];
};
inline uint4 spvSubgroupBallot(bool value)
{
simd_vote vote = simd_ballot(value);
// simd_ballot() returns a 64-bit integer-like object, but
// SPIR-V callers expect a uint4. We must convert.
// FIXME: This won't include higher bits if Apple ever supports
// 128 lanes in an SIMD-group.
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
}
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
{
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
}
inline uint spvSubgroupBallotFindLSB(uint4 ballot)
{
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
}
inline uint spvSubgroupBallotFindMSB(uint4 ballot)
{
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
}
inline uint spvSubgroupBallotBitCount(uint4 ballot)
{
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
}
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
template<typename T>
inline bool spvSubgroupAllEqual(T value)
{
return simd_all(value == simd_broadcast_first(value));
}
template<>
inline bool spvSubgroupAllEqual(bool value)
{
return simd_all(value) || !simd_any(value);
}
fragment main0_out main0()
{
main0_out out = {};
uint gl_SubgroupSize = simd_sum(1);
uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
out.FragColor = float(gl_SubgroupSize);
out.FragColor = float(gl_SubgroupInvocationID);
bool elected = simd_is_first();
out.FragColor = float4(gl_SubgroupEqMask).x;
out.FragColor = float4(gl_SubgroupGeMask).x;
out.FragColor = float4(gl_SubgroupGtMask).x;
out.FragColor = float4(gl_SubgroupLeMask).x;
out.FragColor = float4(gl_SubgroupLtMask).x;
float4 broadcasted = simd_broadcast(float4(10.0), 8u);
float3 first = simd_broadcast_first(float3(20.0));
uint4 ballot_value = spvSubgroupBallot(true);
bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
uint bit_count = spvSubgroupBallotBitCount(ballot_value);
uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
uint lsb = spvSubgroupBallotFindLSB(ballot_value);
uint msb = spvSubgroupBallotFindMSB(ballot_value);
uint shuffled = simd_shuffle(10u, 8u);
uint shuffled_xor = simd_shuffle_xor(30u, 8u);
uint shuffled_up = simd_shuffle_up(20u, 4u);
uint shuffled_down = simd_shuffle_down(20u, 4u);
bool has_all = simd_all(true);
bool has_any = simd_any(true);
bool has_equal = spvSubgroupAllEqual(0);
has_equal = spvSubgroupAllEqual(true);
float4 added = simd_sum(float4(20.0));
int4 iadded = simd_sum(int4(20));
float4 multiplied = simd_product(float4(20.0));
int4 imultiplied = simd_product(int4(20));
float4 lo = simd_min(float4(20.0));
float4 hi = simd_max(float4(20.0));
int4 slo = simd_min(int4(20));
int4 shi = simd_max(int4(20));
uint4 ulo = simd_min(uint4(20u));
uint4 uhi = simd_max(uint4(20u));
uint4 anded = simd_and(ballot_value);
uint4 ored = simd_or(ballot_value);
uint4 xored = simd_xor(ballot_value);
added = simd_prefix_inclusive_sum(added);
iadded = simd_prefix_inclusive_sum(iadded);
multiplied = simd_prefix_inclusive_product(multiplied);
imultiplied = simd_prefix_inclusive_product(imultiplied);
added = simd_prefix_exclusive_sum(multiplied);
multiplied = simd_prefix_exclusive_product(multiplied);
iadded = simd_prefix_exclusive_sum(imultiplied);
imultiplied = simd_prefix_exclusive_product(imultiplied);
added = quad_sum(added);
multiplied = quad_product(multiplied);
iadded = quad_sum(iadded);
imultiplied = quad_product(imultiplied);
lo = quad_min(lo);
hi = quad_max(hi);
ulo = quad_min(ulo);
uhi = quad_max(uhi);
slo = quad_min(slo);
shi = quad_max(shi);
anded = quad_and(anded);
ored = quad_or(ored);
xored = quad_xor(xored);
float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
return out;
}

View File

@@ -0,0 +1,31 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct MVPs
{
float4x4 MVP[2];
};
struct main0_out
{
float4 gl_Position [[position]];
uint gl_Layer [[render_target_array_index]];
};
struct main0_in
{
float4 Position [[attribute(0)]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
{
main0_out out = {};
uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
gl_InstanceIndex /= spvViewMask[1];
out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
out.gl_Layer = gl_ViewIndex - spvViewMask[0];
return out;
}

View File

@@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct MVPs
{
float4x4 MVP[2];
};
struct main0_out
{
float4 gl_Position [[position]];
uint gl_Layer [[render_target_array_index]];
};
struct main0_in
{
float4 Position [[attribute(0)]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
{
main0_out out = {};
const uint gl_ViewIndex = 0;
out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
return out;
}

View File

@@ -0,0 +1,25 @@
#version 450
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
void main()
{
int _30;
_30 = 7;
int _27_copy;
for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
{
if (5.0 > float(_27))
{
break;
}
else
{
continue;
}
continue;
}
imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
}

View File

@@ -0,0 +1,19 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#else
#error No extension available for FP16.
#endif
layout(binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@@ -0,0 +1,20 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
layout(set = 0, binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@@ -0,0 +1,36 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) writeonly buffer SSBO
{
mat2 m22;
mat2x3 m23;
mat2x4 m24;
mat3x2 m32;
mat3 m33;
mat3x4 m34;
mat4x2 m42;
mat4x3 m43;
mat4 m44;
} _21;
layout(binding = 1, std430) readonly buffer ReadSSBO
{
vec2 v2;
vec3 v3;
vec4 v4;
} _26;
void main()
{
_21.m22 = outerProduct(_26.v2, _26.v2);
_21.m23 = outerProduct(_26.v3, _26.v2);
_21.m24 = outerProduct(_26.v4, _26.v2);
_21.m32 = outerProduct(_26.v2, _26.v3);
_21.m33 = outerProduct(_26.v3, _26.v3);
_21.m34 = outerProduct(_26.v4, _26.v3);
_21.m42 = outerProduct(_26.v2, _26.v4);
_21.m43 = outerProduct(_26.v3, _26.v4);
_21.m44 = outerProduct(_26.v4, _26.v4);
}

View File

@@ -0,0 +1,19 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO
{
float a;
float b;
float c;
float d;
float e;
} _9;
void main()
{
_9.c = distance(_9.a, _9.b);
_9.d = length(_9.a);
_9.e = normalize(_9.a);
}

View File

@@ -0,0 +1,13 @@
#version 450
layout(location = 0) out float FragColor;
layout(location = 0) in vec3 vRefract;
void main()
{
FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}

View File

@@ -0,0 +1,20 @@
#version 450
layout(location = 0) flat in int vIndex;
layout(location = 0) out vec4 FragColor;
void main()
{
int v;
if (vIndex != 1)
{
FragColor = vec4(1.0);
return;
}
else
{
v = 10;
}
FragColor = vec4(float(v));
}

View File

@@ -0,0 +1,46 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %FragColor %UV
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
OpName %main "main"
OpName %FragColor "FragColor"
OpName %uTexture "uTexture"
OpName %UV "UV"
OpDecorate %FragColor Location 0
OpDecorate %uTexture DescriptorSet 0
OpDecorate %uTexture Binding 0
OpDecorate %UV Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%float = OpTypeFloat 32
%v4half = OpTypeVector %half 4
%v4float = OpTypeVector %float 4
%_ptr_Output_v4half = OpTypePointer Output %v4half
%FragColor = OpVariable %_ptr_Output_v4half Output
%11 = OpTypeImage %float 2D 0 0 0 1 Unknown
%12 = OpTypeSampledImage %11
%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
%uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
%v2half = OpTypeVector %half 2
%_ptr_Input_v2half = OpTypePointer Input %v2half
%UV = OpVariable %_ptr_Input_v2half Input
%main = OpFunction %void None %3
%5 = OpLabel
%15 = OpLoad %12 %uTexture
%19 = OpLoad %v2half %UV
%23 = OpImageSampleImplicitLod %v4float %15 %19
%24 = OpFConvert %v4half %23
OpStore %FragColor %24
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,37 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
mat2 m22;
mat2x3 m23;
mat2x4 m24;
mat3x2 m32;
mat3 m33;
mat3x4 m34;
mat4x2 m42;
mat4x3 m43;
mat4 m44;
};
layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
{
vec2 v2;
vec3 v3;
vec4 v4;
};
void main()
{
m22 = outerProduct(v2, v2);
m23 = outerProduct(v3, v2);
m24 = outerProduct(v4, v2);
m32 = outerProduct(v2, v3);
m33 = outerProduct(v3, v3);
m34 = outerProduct(v4, v3);
m42 = outerProduct(v2, v4);
m43 = outerProduct(v3, v4);
m44 = outerProduct(v4, v4);
}

View File

@@ -0,0 +1,18 @@
#version 450
layout(local_size_x = 1) in;
layout(std430, set = 0, binding = 0) buffer SSBO
{
float a;
float b;
float c;
float d;
float e;
};
void main()
{
c = distance(a, b);
d = length(a);
e = normalize(a);
}

View File

@@ -0,0 +1,11 @@
#version 450
layout(location = 0) out float FragColor;
layout(location = 0) in vec3 vRefract;
void main()
{
FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}

View File

@@ -0,0 +1,46 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %FragColor %UV
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
OpName %main "main"
OpName %FragColor "FragColor"
OpName %uTexture "uTexture"
OpName %UV "UV"
OpDecorate %FragColor Location 0
OpDecorate %uTexture DescriptorSet 0
OpDecorate %uTexture Binding 0
OpDecorate %UV Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%float = OpTypeFloat 32
%v4half = OpTypeVector %half 4
%v4float = OpTypeVector %float 4
%_ptr_Output_v4half = OpTypePointer Output %v4half
%FragColor = OpVariable %_ptr_Output_v4half Output
%11 = OpTypeImage %float 2D 0 0 0 1 Unknown
%12 = OpTypeSampledImage %11
%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
%uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
%v2half = OpTypeVector %half 2
%_ptr_Input_v2half = OpTypePointer Input %v2half
%UV = OpVariable %_ptr_Input_v2half Input
%main = OpFunction %void None %3
%5 = OpLabel
%15 = OpLoad %12 %uTexture
%19 = OpLoad %v2half %UV
%23 = OpImageSampleImplicitLod %v4float %15 %19
%24 = OpFConvert %v4half %23
OpStore %FragColor %24
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,37 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
mat2 m22;
mat2x3 m23;
mat2x4 m24;
mat3x2 m32;
mat3 m33;
mat3x4 m34;
mat4x2 m42;
mat4x3 m43;
mat4 m44;
};
layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
{
vec2 v2;
vec3 v3;
vec4 v4;
};
void main()
{
m22 = outerProduct(v2, v2);
m23 = outerProduct(v3, v2);
m24 = outerProduct(v4, v2);
m32 = outerProduct(v2, v3);
m33 = outerProduct(v3, v3);
m34 = outerProduct(v4, v3);
m42 = outerProduct(v2, v4);
m43 = outerProduct(v3, v4);
m44 = outerProduct(v4, v4);
}

View File

@@ -0,0 +1,18 @@
#version 450
layout(local_size_x = 1) in;
layout(std430, set = 0, binding = 0) buffer SSBO
{
float a;
float b;
float c;
float d;
float e;
};
void main()
{
c = distance(a, b);
d = length(a);
e = normalize(a);
}

View File

@@ -0,0 +1,11 @@
#version 450
layout(location = 0) out float FragColor;
layout(location = 0) in vec3 vRefract;
void main()
{
FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}

View File

@@ -0,0 +1,10 @@
#version 450
#extension GL_KHR_shader_subgroup_basic : require
layout(location = 0) out uvec2 FragColor;
void main()
{
FragColor.x = gl_SubgroupSize;
FragColor.y = gl_SubgroupInvocationID;
}

View File

@@ -0,0 +1,14 @@
#version 310 es
#extension GL_EXT_multiview : require
precision mediump float;
layout(location = 0) in vec4 vColor;
layout(location = 1) in vec2 vTex[4];
layout(binding = 0) uniform sampler2D uTex;
layout(location = 0) out vec4 FragColor;
void main()
{
FragColor = vColor * texture(uTex, vTex[gl_ViewIndex]);
}

View File

@@ -0,0 +1,119 @@
#version 450
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_vote : require
#extension GL_KHR_shader_subgroup_shuffle : require
#extension GL_KHR_shader_subgroup_shuffle_relative : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_clustered : require
#extension GL_KHR_shader_subgroup_quad : require
layout(location = 0) out float FragColor;
void main()
{
// basic
FragColor = float(gl_SubgroupSize);
FragColor = float(gl_SubgroupInvocationID);
subgroupBarrier();
subgroupMemoryBarrier();
subgroupMemoryBarrierBuffer();
subgroupMemoryBarrierImage();
bool elected = subgroupElect();
// ballot
FragColor = float(gl_SubgroupEqMask);
FragColor = float(gl_SubgroupGeMask);
FragColor = float(gl_SubgroupGtMask);
FragColor = float(gl_SubgroupLeMask);
FragColor = float(gl_SubgroupLtMask);
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
vec3 first = subgroupBroadcastFirst(vec3(20.0));
uvec4 ballot_value = subgroupBallot(true);
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
uint bit_count = subgroupBallotBitCount(ballot_value);
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
uint lsb = subgroupBallotFindLSB(ballot_value);
uint msb = subgroupBallotFindMSB(ballot_value);
// shuffle
uint shuffled = subgroupShuffle(10u, 8u);
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
// shuffle relative
uint shuffled_up = subgroupShuffleUp(20u, 4u);
uint shuffled_down = subgroupShuffleDown(20u, 4u);
// vote
bool has_all = subgroupAll(true);
bool has_any = subgroupAny(true);
bool has_equal = subgroupAllEqual(0);
has_equal = subgroupAllEqual(true);
// arithmetic
vec4 added = subgroupAdd(vec4(20.0));
ivec4 iadded = subgroupAdd(ivec4(20));
vec4 multiplied = subgroupMul(vec4(20.0));
ivec4 imultiplied = subgroupMul(ivec4(20));
vec4 lo = subgroupMin(vec4(20.0));
vec4 hi = subgroupMax(vec4(20.0));
ivec4 slo = subgroupMin(ivec4(20));
ivec4 shi = subgroupMax(ivec4(20));
uvec4 ulo = subgroupMin(uvec4(20));
uvec4 uhi = subgroupMax(uvec4(20));
uvec4 anded = subgroupAnd(ballot_value);
uvec4 ored = subgroupOr(ballot_value);
uvec4 xored = subgroupXor(ballot_value);
added = subgroupInclusiveAdd(added);
iadded = subgroupInclusiveAdd(iadded);
multiplied = subgroupInclusiveMul(multiplied);
imultiplied = subgroupInclusiveMul(imultiplied);
//lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal
//hi = subgroupInclusiveMax(hi);
//slo = subgroupInclusiveMin(slo);
//shi = subgroupInclusiveMax(shi);
//ulo = subgroupInclusiveMin(ulo);
//uhi = subgroupInclusiveMax(uhi);
//anded = subgroupInclusiveAnd(anded);
//ored = subgroupInclusiveOr(ored);
//xored = subgroupInclusiveXor(ored);
//added = subgroupExclusiveAdd(lo);
added = subgroupExclusiveAdd(multiplied);
multiplied = subgroupExclusiveMul(multiplied);
iadded = subgroupExclusiveAdd(imultiplied);
imultiplied = subgroupExclusiveMul(imultiplied);
//lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal
//hi = subgroupExclusiveMax(hi);
//ulo = subgroupExclusiveMin(ulo);
//uhi = subgroupExclusiveMax(uhi);
//slo = subgroupExclusiveMin(slo);
//shi = subgroupExclusiveMax(shi);
//anded = subgroupExclusiveAnd(anded);
//ored = subgroupExclusiveOr(ored);
//xored = subgroupExclusiveXor(ored);
// clustered
added = subgroupClusteredAdd(added, 4u);
multiplied = subgroupClusteredMul(multiplied, 4u);
iadded = subgroupClusteredAdd(iadded, 4u);
imultiplied = subgroupClusteredMul(imultiplied, 4u);
lo = subgroupClusteredMin(lo, 4u);
hi = subgroupClusteredMax(hi, 4u);
ulo = subgroupClusteredMin(ulo, 4u);
uhi = subgroupClusteredMax(uhi, 4u);
slo = subgroupClusteredMin(slo, 4u);
shi = subgroupClusteredMax(shi, 4u);
anded = subgroupClusteredAnd(anded, 4u);
ored = subgroupClusteredOr(ored, 4u);
xored = subgroupClusteredXor(xored, 4u);
// quad
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
}

View File

@@ -0,0 +1,14 @@
#version 310 es
#extension GL_EXT_multiview : require
layout(std140, binding = 0) uniform MVPs
{
mat4 MVP[2];
};
layout(location = 0) in vec4 Position;
void main()
{
gl_Position = MVP[gl_ViewIndex] * Position;
}

View File

@@ -0,0 +1,14 @@
#version 310 es
#extension GL_EXT_multiview : require
layout(std140, binding = 0) uniform MVPs
{
mat4 MVP[2];
};
layout(location = 0) in vec4 Position;
void main()
{
gl_Position = MVP[gl_ViewIndex] * Position;
}

View File

@@ -0,0 +1,68 @@
; SPIR-V
; Version: 1.0
; Generator: Google spiregg; 0
; Bound: 42
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %cs_test "main" %gl_GlobalInvocationID %gl_LocalInvocationIndex
OpExecutionMode %cs_test LocalSize 8 8 1
OpSource HLSL 600
OpName %type_2d_image "type.2d.image"
OpName %outImageTexture "outImageTexture"
OpName %cs_test "cs_test"
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
OpDecorate %outImageTexture DescriptorSet 0
OpDecorate %outImageTexture Binding 1
%float = OpTypeFloat 32
%float_5 = OpConstant %float 5
%float_1 = OpConstant %float 1
%int = OpTypeInt 32 1
%int_7 = OpConstant %int 7
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%type_2d_image = OpTypeImage %float 2D 2 0 0 2 Rgba32f
%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%_ptr_Input_uint = OpTypePointer Input %uint
%void = OpTypeVoid
%19 = OpTypeFunction %void
%v2uint = OpTypeVector %uint 2
%v4float = OpTypeVector %float 4
%bool = OpTypeBool
%outImageTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
%cs_test = OpFunction %void None %19
%23 = OpLabel
%24 = OpLoad %v3uint %gl_GlobalInvocationID
%25 = OpVectorShuffle %v2uint %24 %24 0 1
OpBranch %26
%26 = OpLabel
%27 = OpPhi %int %int_7 %23 %28 %29
%30 = OpPhi %int %int_7 %23 %27 %29
%31 = OpSGreaterThanEqual %bool %27 %int_0
OpLoopMerge %32 %29 None
OpBranchConditional %31 %33 %32
%33 = OpLabel
%34 = OpConvertSToF %float %27
%35 = OpFOrdGreaterThan %bool %float_5 %34
OpSelectionMerge %29 None
OpBranchConditional %35 %36 %29
%36 = OpLabel
OpBranch %32
%29 = OpLabel
%28 = OpISub %int %27 %int_1
OpBranch %26
%32 = OpLabel
%37 = OpISub %int %30 %int_1
%38 = OpConvertSToF %float %37
%39 = OpConvertSToF %float %30
%40 = OpCompositeConstruct %v4float %38 %39 %float_1 %float_1
%41 = OpLoad %type_2d_image %outImageTexture
OpImageWrite %41 %25 %40 None
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,46 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %FragColor %UV
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
OpName %main "main"
OpName %FragColor "FragColor"
OpName %uTexture "uTexture"
OpName %UV "UV"
OpDecorate %FragColor Location 0
OpDecorate %uTexture DescriptorSet 0
OpDecorate %uTexture Binding 0
OpDecorate %UV Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%float = OpTypeFloat 32
%v4half = OpTypeVector %half 4
%v4float = OpTypeVector %float 4
%_ptr_Output_v4half = OpTypePointer Output %v4half
%FragColor = OpVariable %_ptr_Output_v4half Output
%11 = OpTypeImage %float 2D 0 0 0 1 Unknown
%12 = OpTypeSampledImage %11
%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
%uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
%v2half = OpTypeVector %half 2
%_ptr_Input_v2half = OpTypePointer Input %v2half
%UV = OpVariable %_ptr_Input_v2half Input
%main = OpFunction %void None %3
%5 = OpLabel
%15 = OpLoad %12 %uTexture
%19 = OpLoad %v2half %UV
%23 = OpImageSampleImplicitLod %v4float %15 %19
%24 = OpFConvert %v4half %23
OpStore %FragColor %24
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,37 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
mat2 m22;
mat2x3 m23;
mat2x4 m24;
mat3x2 m32;
mat3 m33;
mat3x4 m34;
mat4x2 m42;
mat4x3 m43;
mat4 m44;
};
layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
{
vec2 v2;
vec3 v3;
vec4 v4;
};
void main()
{
m22 = outerProduct(v2, v2);
m23 = outerProduct(v3, v2);
m24 = outerProduct(v4, v2);
m32 = outerProduct(v2, v3);
m33 = outerProduct(v3, v3);
m34 = outerProduct(v4, v3);
m42 = outerProduct(v2, v4);
m43 = outerProduct(v3, v4);
m44 = outerProduct(v4, v4);
}

View File

@@ -0,0 +1,18 @@
#version 450
layout(local_size_x = 1) in;
layout(std430, set = 0, binding = 0) buffer SSBO
{
float a;
float b;
float c;
float d;
float e;
};
void main()
{
c = distance(a, b);
d = length(a);
e = normalize(a);
}

View File

@@ -0,0 +1,11 @@
#version 450
layout(location = 0) out float FragColor;
layout(location = 0) in vec3 vRefract;
void main()
{
FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
FragColor += reflect(vRefract.x, vRefract.y);
FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
FragColor += reflect(vRefract.xy, vRefract.zy).y;
}

View File

@@ -0,0 +1,18 @@
#version 450
layout(location = 0) out vec4 FragColor;
layout(location = 0) flat in int vIndex;
void main()
{
int v;
if (vIndex != 1)
{
FragColor = vec4(1.0);
return;
}
else
{
v = 10;
}
FragColor = vec4(v);
}

View File

@@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const
// We have a back edge if the visit order is set with the temporary magic value 0.
// Crossing edges will have already been recorded with a visit order.
auto itr = visit_order.find(to);
assert(itr != end(visit_order));
return itr->second.get() == 0;
return itr != end(visit_order) && itr->second.get() == 0;
}
bool CFG::has_visited_forward_edge(uint32_t to) const
{
// If > 0, we have visited the edge already, and this is not a back edge branch.
auto itr = visit_order.find(to);
return itr != end(visit_order) && itr->second.get() > 0;
}
bool CFG::post_order_visit(uint32_t block_id)
@@ -83,8 +89,10 @@ bool CFG::post_order_visit(uint32_t block_id)
// If we have already branched to this block (back edge), stop recursion.
// If our branches are back-edges, we do not record them.
// We have to record crossing edges however.
if (visit_order[block_id].get() >= 0)
return !is_back_edge(block_id);
if (has_visited_forward_edge(block_id))
return true;
else if (is_back_edge(block_id))
return false;
// Block back-edges from recursively revisiting ourselves.
visit_order[block_id].get() = 0;
@@ -123,9 +131,39 @@ bool CFG::post_order_visit(uint32_t block_id)
// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
if (block.merge == SPIRBlock::MergeLoop)
if (post_order_visit(block.merge_block))
add_branch(block_id, block.merge_block);
// We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
add_branch(block_id, block.merge_block);
// If this is a selection merge, add an implied branch to the merge target.
// This is needed to avoid cases where an inner branch dominates the outer branch.
// This can happen if one of the branches exit early, e.g.:
// if (cond) { ...; break; } else { var = 100 } use_var(var);
// We can use the variable without a Phi since there is only one possible parent here.
// However, in this case, we need to hoist out the inner variable to outside the branch.
// Use same strategy as loops.
if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
{
// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
// will be hoisted out to outside the selection merge.
// If size > 1, the variable will be automatically hoisted, so we should not mess with it.
// Adding fake branches unconditionally breaks parameter preservation analysis,
// which looks at how variables are accessed through the CFG.
auto pred_itr = preceding_edges.find(block.next_block);
if (pred_itr != end(preceding_edges))
{
auto &pred = pred_itr->second;
if (pred.size() == 1 && *pred.begin() != block_id)
add_branch(block_id, block.next_block);
}
else
{
// If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it.
// We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge.
add_branch(block_id, block.next_block);
}
}
// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
visit_order[block_id].get() = ++visit_count;

View File

@@ -127,6 +127,7 @@ private:
uint32_t visit_count = 0;
bool is_back_edge(uint32_t to) const;
bool has_visited_forward_edge(uint32_t to) const;
};
class DominatorBuilder

View File

@@ -258,7 +258,7 @@ public:
size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const;
// Returns the effective size of a buffer block struct member.
virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
// Returns a set of all global variables which are statically accessed
// by the control flow graph from the current entry point.

View File

@@ -1623,6 +1623,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c
return SPVC_SUCCESS;
}
spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size)
{
SPVC_BEGIN_SAFE_SCOPE
{
*size = compiler->compiler->get_declared_struct_member_size(*static_cast<const SPIRType *>(struct_type), index);
}
SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT)
return SPVC_SUCCESS;
}
spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset)
{
SPVC_BEGIN_SAFE_SCOPE

View File

@@ -33,7 +33,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 14
#define SPVC_C_API_VERSION_MINOR 16
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@@ -303,6 +303,7 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b
#define SPVC_MSL_PUSH_CONSTANT_BINDING (0)
#define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u))
#define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u))
#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u))
/* Obsolete. Sticks around for backwards compatibility. */
#define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1
@@ -659,6 +660,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ
SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size);
SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler,
spvc_type struct_type, size_t array_size, size_t *size);
SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size);
SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler,
spvc_type type, unsigned index, unsigned *offset);

View File

@@ -292,7 +292,13 @@ static const char *vector_swizzle(int vecsize, int index)
{ ".x", ".y", ".z", ".w" },
{ ".xy", ".yz", ".zw", nullptr },
{ ".xyz", ".yzw", nullptr, nullptr },
#if defined(__GNUC__) && (__GNUC__ == 9)
// This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
// This array ends up being compiled as all nullptrs, tripping the assertions below.
{ "", nullptr, nullptr, "$" },
#else
{ "", nullptr, nullptr, nullptr },
#endif
};
assert(vecsize >= 1 && vecsize <= 4);
@@ -4596,7 +4602,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
SmallVector<uint32_t> inherited_expressions;
uint32_t result_type = ops[0];
uint32_t result_type_id = ops[0];
uint32_t id = ops[1];
uint32_t img = ops[2];
uint32_t coord = ops[3];
@@ -4607,6 +4613,8 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
bool fetch = false;
const uint32_t *opt = nullptr;
auto &result_type = get<SPIRType>(result_type_id);
inherited_expressions.push_back(coord);
switch (op)
@@ -4765,14 +4773,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
image_is_depth = true;
if (image_is_depth)
expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
expr = remap_swizzle(result_type, 1, expr);
}
if (!backend.support_small_type_sampling_result && result_type.width < 32)
{
// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
}
// Deals with reads from MSL. We might need to downconvert to fewer components.
if (op == OpImageRead)
expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
expr = remap_swizzle(result_type, 4, expr);
emit_op(result_type, id, expr, forward);
emit_op(result_type_id, id, expr, forward);
for (auto &inherit : inherited_expressions)
inherit_expression_dependencies(id, inherit);
@@ -7101,18 +7116,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
return expr;
}
void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
{
if (var.allocate_temporary_copy)
{
auto &type = get<SPIRType>(var.basetype);
auto &flags = get_decoration_bitset(var.self);
statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
}
}
void CompilerGLSL::flush_variable_declaration(uint32_t id)
{
auto *var = maybe_get<SPIRVariable>(id);
if (var && var->deferred_declaration)
{
statement(variable_decl_function_local(*var), ";");
if (var->allocate_temporary_copy)
{
auto &type = get<SPIRType>(var->basetype);
auto &flags = ir.meta[id].decoration.decoration_flags;
statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";");
}
emit_variable_temporary_copies(*var);
var->deferred_declaration = false;
}
}
@@ -9867,7 +9887,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
{
return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
auto &type = expression_type(id);
bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
{
// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
auto &result_type = get<SPIRType>(type.image.type);
if (result_type.width < 32)
return "mediump ";
}
return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
}
string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@@ -10083,15 +10112,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
switch (imagetype.basetype)
{
case SPIRType::Int:
case SPIRType::Short:
case SPIRType::SByte:
res = "i";
break;
case SPIRType::UInt:
case SPIRType::UShort:
case SPIRType::UByte:
res = "u";
break;
default:
break;
}
// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
return res + "subpassInput" + (type.image.ms ? "MS" : "");
@@ -11319,8 +11355,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
// If we have loop variables, stop masking out access to the variable now.
for (auto var : block.loop_variables)
get<SPIRVariable>(var).loop_variable_enable = true;
for (auto var_id : block.loop_variables)
{
auto &var = get<SPIRVariable>(var_id);
var.loop_variable_enable = true;
// We're not going to declare the variable directly, so emit a copy here.
emit_variable_temporary_copies(var);
}
// Remember deferred declaration state. We will restore it before returning.
SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
@@ -11661,7 +11702,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
}
auto &case_block = get<SPIRBlock>(target_block);
if (i + 1 < num_blocks &&
if (backend.support_case_fallthrough && i + 1 < num_blocks &&
execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
{
// We will fall through here, so just terminate the block chain early.

View File

@@ -404,6 +404,8 @@ protected:
bool array_is_value_type = true;
bool comparison_image_samples_scalar = false;
bool native_pointers = false;
bool support_small_type_sampling_result = false;
bool support_case_fallthrough = true;
} backend;
void emit_struct(SPIRType &type);
@@ -432,6 +434,7 @@ protected:
bool flush_phi_required(uint32_t from, uint32_t to);
void flush_variable_declaration(uint32_t id);
void flush_undeclared_variables(SPIRBlock &block);
void emit_variable_temporary_copies(const SPIRVariable &var);
bool should_dereference(uint32_t id);
bool should_forward(uint32_t id);

View File

@@ -1742,6 +1742,36 @@ void CompilerHLSL::emit_resources()
end_scope();
statement("");
}
if (requires_scalar_reflect)
{
// FP16/FP64? No templates in HLSL.
statement("float SPIRV_Cross_Reflect(float i, float n)");
begin_scope();
statement("return i - 2.0 * dot(n, i) * n;");
end_scope();
statement("");
}
if (requires_scalar_refract)
{
// FP16/FP64? No templates in HLSL.
statement("float SPIRV_Cross_Refract(float i, float n, float eta)");
begin_scope();
statement("float NoI = n * i;");
statement("float NoI2 = NoI * NoI;");
statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
statement("if (k < 0.0)");
begin_scope();
statement("return 0.0;");
end_scope();
statement("else");
begin_scope();
statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
end_scope();
end_scope();
statement("");
}
}
string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@@ -3245,6 +3275,45 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
break;
}
case GLSLstd450Normalize:
// HLSL does not support scalar versions here.
if (expression_type(args[0]).vecsize == 1)
{
// Returns -1 or 1 for valid input, sign() does the job.
emit_unary_func_op(result_type, id, args[0], "sign");
}
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450Reflect:
if (get<SPIRType>(result_type).vecsize == 1)
{
if (!requires_scalar_reflect)
{
requires_scalar_reflect = true;
force_recompile();
}
emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect");
}
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450Refract:
if (get<SPIRType>(result_type).vecsize == 1)
{
if (!requires_scalar_refract)
{
requires_scalar_refract = true;
force_recompile();
}
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract");
}
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
@@ -3954,6 +4023,31 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
break;
}
case OpOuterProduct:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t a = ops[2];
uint32_t b = ops[3];
auto &type = get<SPIRType>(result_type);
string expr = type_to_glsl_constructor(type);
expr += "(";
for (uint32_t col = 0; col < type.columns; col++)
{
expr += to_enclosed_expression(a);
expr += " * ";
expr += to_extract_component_expression(b, col);
if (col + 1 < type.columns)
expr += ", ";
}
expr += ")";
emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
inherit_expression_dependencies(id, a);
inherit_expression_dependencies(id, b);
break;
}
case OpFMod:
{
if (!requires_op_fmod)
@@ -4700,6 +4794,7 @@ string CompilerHLSL::compile()
backend.can_declare_arrays_inline = false;
backend.can_return_array = false;
backend.nonuniform_qualifier = "NonUniformResourceIndex";
backend.support_case_fallthrough = false;
fixup_type_alias();
reorder_type_alias();

View File

@@ -167,6 +167,8 @@ private:
void replace_illegal_names() override;
Options hlsl_options;
// TODO: Refactor this to be more similar to MSL, maybe have some common system in place?
bool requires_op_fmod = false;
bool requires_fp16_packing = false;
bool requires_explicit_fp16_packing = false;
@@ -179,6 +181,8 @@ private:
bool requires_inverse_2x2 = false;
bool requires_inverse_3x3 = false;
bool requires_inverse_4x4 = false;
bool requires_scalar_reflect = false;
bool requires_scalar_refract = false;
uint64_t required_textureSizeVariants = 0;
void require_texture_query_variant(const SPIRType &type);

View File

@@ -105,8 +105,10 @@ void CompilerMSL::build_implicit_builtins()
active_input_builtins.get(BuiltInSubgroupLtMask);
bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
active_input_builtins.get(BuiltInSubgroupGtMask));
bool need_multiview = get_execution_model() == ExecutionModelVertex &&
(msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
needs_subgroup_invocation_id)
need_multiview || needs_subgroup_invocation_id)
{
bool has_frag_coord = false;
bool has_sample_id = false;
@@ -118,6 +120,7 @@ void CompilerMSL::build_implicit_builtins()
bool has_primitive_id = false;
bool has_subgroup_invocation_id = false;
bool has_subgroup_size = false;
bool has_view_idx = false;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
@@ -189,6 +192,22 @@ void CompilerMSL::build_implicit_builtins()
builtin_subgroup_size_id = var.self;
has_subgroup_size = true;
}
if (need_multiview)
{
if (builtin == BuiltInInstanceIndex)
{
// The view index here is derived from the instance index.
builtin_instance_idx_id = var.self;
has_instance_idx = true;
}
if (builtin == BuiltInViewIndex)
{
builtin_view_idx_id = var.self;
has_view_idx = true;
}
}
});
if (!has_frag_coord && need_subpass_input)
@@ -246,7 +265,8 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
}
if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance))
if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
(need_multiview && (!has_instance_idx || !has_view_idx)))
{
uint32_t offset = ir.increase_bound_by(2);
uint32_t type_id = offset;
@@ -265,7 +285,7 @@ void CompilerMSL::build_implicit_builtins()
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
if (!has_vertex_idx)
if (need_vertex_params && !has_vertex_idx)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -276,7 +296,7 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
}
if (!has_base_vertex)
if (need_vertex_params && !has_base_vertex)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -287,7 +307,7 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
}
if (!has_instance_idx)
if (!has_instance_idx) // Needed by both multiview and tessellation
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -296,9 +316,30 @@ void CompilerMSL::build_implicit_builtins()
set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
builtin_instance_idx_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
if (need_multiview)
{
// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
// gl_Layer is an output in vertex-pipeline shaders.
uint32_t type_ptr_out_id = ir.increase_bound_by(2);
SPIRType uint_type_ptr_out;
uint_type_ptr_out = uint_type;
uint_type_ptr_out.pointer = true;
uint_type_ptr_out.parent_type = type_id;
uint_type_ptr_out.storage = StorageClassOutput;
auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
ptr_out_type.self = type_id;
var_id = type_ptr_out_id + 1;
set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
builtin_layer_id = var_id;
mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
}
}
if (!has_base_instance)
if (need_vertex_params && !has_base_instance)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -308,6 +349,17 @@ void CompilerMSL::build_implicit_builtins()
builtin_base_instance_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
}
if (need_multiview && !has_view_idx)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_ViewIndex.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
builtin_view_idx_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
}
}
if (need_tesc_params && (!has_invocation_id || !has_primitive_id))
@@ -428,6 +480,17 @@ void CompilerMSL::build_implicit_builtins()
set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
buffer_size_buffer_id = var_id;
}
if (needs_view_mask_buffer())
{
uint32_t var_id = build_constant_uint_array_pointer();
set_name(var_id, "spvViewMask");
// This should never match anything.
set_decoration(var_id, DecorationDescriptorSet, ~(4u));
set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
view_mask_buffer_id = var_id;
}
}
void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
@@ -708,6 +771,7 @@ string CompilerMSL::compile()
backend.comparison_image_samples_scalar = true;
backend.native_pointers = true;
backend.nonuniform_qualifier = "";
backend.support_small_type_sampling_result = true;
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@@ -731,6 +795,10 @@ string CompilerMSL::compile()
active_interface_variables.insert(swizzle_buffer_id);
if (buffer_size_buffer_id)
active_interface_variables.insert(buffer_size_buffer_id);
if (view_mask_buffer_id)
active_interface_variables.insert(view_mask_buffer_id);
if (builtin_layer_id)
active_interface_variables.insert(builtin_layer_id);
// Create structs to hold input, output and uniform variables.
// Do output first to ensure out. is declared at top of entry function.
@@ -1020,7 +1088,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
added_out = true;
}
type_id = get<SPIRVariable>(arg_id).basetype;
p_type = &get<SPIRType>(type_id);
uint32_t next_id = ir.increase_bound_by(1);
func.add_parameter(type_id, next_id, true);
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
@@ -2370,7 +2437,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
// Increment the current offset to be positioned immediately after the current member.
// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
if (mbr_idx + 1 < mbr_cnt)
curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size_msl(ib_type, mbr_idx));
}
}
@@ -3122,6 +3189,36 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplReflectScalar:
// Metal does not support scalar versions of these functions.
statement("template<typename T>");
statement("inline T spvReflect(T i, T n)");
begin_scope();
statement("return i - T(2) * i * n * n;");
end_scope();
statement("");
break;
case SPVFuncImplRefractScalar:
// Metal does not support scalar versions of these functions.
statement("template<typename T>");
statement("inline T spvRefract(T i, T n, T eta)");
begin_scope();
statement("T NoI = n * i;");
statement("T NoI2 = NoI * NoI;");
statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
statement("if (k < T(0))");
begin_scope();
statement("return T(0);");
end_scope();
statement("else");
begin_scope();
statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
end_scope();
end_scope();
statement("");
break;
default:
break;
}
@@ -4096,7 +4193,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
}
// OpOuterProduct
case OpOuterProduct:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t a = ops[2];
uint32_t b = ops[3];
auto &type = get<SPIRType>(result_type);
string expr = type_to_glsl_constructor(type);
expr += "(";
for (uint32_t col = 0; col < type.columns; col++)
{
expr += to_enclosed_expression(a);
expr += " * ";
expr += to_extract_component_expression(b, col);
if (col + 1 < type.columns)
expr += ", ";
}
expr += ")";
emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
inherit_expression_dependencies(id, a);
inherit_expression_dependencies(id, b);
break;
}
case OpIAddCarry:
case OpISubBorrow:
@@ -4582,6 +4702,57 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
// GLSLstd450InterpolateAtOffset
case GLSLstd450Distance:
// MSL does not support scalar versions here.
if (expression_type(args[0]).vecsize == 1)
{
// Equivalent to length(a - b) -> abs(a - b).
emit_op(result_type, id,
join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"),
should_forward(args[0]) && should_forward(args[1]));
inherit_expression_dependencies(id, args[0]);
inherit_expression_dependencies(id, args[1]);
}
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450Length:
// MSL does not support scalar versions here.
if (expression_type(args[0]).vecsize == 1)
{
// Equivalent to abs().
emit_unary_func_op(result_type, id, args[0], "abs");
}
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450Normalize:
// MSL does not support scalar versions here.
if (expression_type(args[0]).vecsize == 1)
{
// Returns -1 or 1 for valid input, sign() does the job.
emit_unary_func_op(result_type, id, args[0], "sign");
}
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450Reflect:
if (get<SPIRType>(result_type).vecsize == 1)
emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450Refract:
if (get<SPIRType>(result_type).vecsize == 1)
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
else
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
@@ -4766,6 +4937,21 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool
return fname;
}
string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
{
SPIRType t;
t.basetype = SPIRType::Float;
t.vecsize = components;
t.columns = 1;
return join(type_to_glsl_constructor(t), "(", expr, ")");
}
static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
{
// Double is not supported to begin with, but doesn't hurt to check for completion.
return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
}
// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
@@ -4804,6 +4990,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
else if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 1);
alt_coord_component = 1;
break;
@@ -4839,6 +5027,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
else if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 2);
alt_coord_component = 2;
break;
@@ -4849,6 +5039,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
else if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 3);
alt_coord_component = 3;
break;
@@ -4866,6 +5058,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
tex_coords = enclose_expression(tex_coords) + ".xyz";
}
if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 3);
alt_coord_component = 3;
break;
@@ -4896,7 +5091,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
// If projection, use alt coord as divisor
if (is_proj)
tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
{
if (sampling_type_needs_f32_conversion(coord_type))
tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
else
tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
}
if (!farg_str.empty())
farg_str += ", ";
@@ -4930,11 +5130,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
forward = forward && should_forward(dref);
farg_str += ", ";
auto &dref_type = expression_type(dref);
string dref_expr;
if (is_proj)
farg_str +=
to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
dref_expr =
join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
else
farg_str += to_expression(dref);
dref_expr = to_expression(dref);
if (sampling_type_needs_f32_conversion(dref_type))
dref_expr = convert_to_f32(dref_expr, 1);
farg_str += dref_expr;
if (msl_options.is_macos() && (grad_x || grad_y))
{
@@ -5626,6 +5834,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
{
switch (builtin)
{
case BuiltInViewIndex:
if (!msl_options.multiview)
break;
/* fallthrough */
case BuiltInFrontFacing:
case BuiltInPointCoord:
case BuiltInFragCoord:
@@ -6013,7 +6225,12 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask &&
bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV &&
bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask)
bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask &&
((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
bi_type != BuiltInViewIndex) &&
(get_execution_model() == ExecutionModelGLCompute ||
(get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
(bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
{
if (!ep_args.empty())
ep_args += ", ";
@@ -6090,6 +6307,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
{
string ep_args = entry_point_arg_stage_in();
Bitset claimed_bindings;
for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
{
@@ -6104,12 +6322,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
if (!ep_args.empty())
ep_args += ", ";
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
ep_args += " [[buffer(" + convert_to_string(i) + ")]]";
// Check if the argument buffer binding itself has been remapped.
uint32_t buffer_binding;
auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
if (itr != end(resource_bindings))
{
buffer_binding = itr->second.first.msl_buffer;
itr->second.second = true;
}
else
{
// As a fallback, directly map desc set <-> binding.
// If that was taken, take the next buffer binding.
if (claimed_bindings.get(i))
buffer_binding = next_metal_resource_index_buffer;
else
buffer_binding = i;
}
// Makes it more practical for testing, since the push constant block can occupy the first available
// buffer slot if it's not bound explicitly.
next_metal_resource_index_buffer = i + 1;
claimed_bindings.set(buffer_binding);
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
}
entry_point_args_discrete_descriptors(ep_args);
@@ -6401,6 +6637,50 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
}
break;
case BuiltInSubgroupLocalInvocationId:
// This is natively supported in compute shaders.
if (get_execution_model() == ExecutionModelGLCompute)
break;
// This is natively supported in fragment shaders in MSL 2.2.
if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
break;
if (msl_options.is_ios())
SPIRV_CROSS_THROW(
"SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
if (!msl_options.supports_msl_version(2, 1))
SPIRV_CROSS_THROW(
"SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
// Shaders other than compute shaders don't support the SIMD-group
// builtins directly, but we can emulate them using the SIMD-group
// functions. This might break if some of the subgroup terminated
// before reaching the entry point.
entry_func.fixup_hooks_in.push_back([=]() {
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
" = simd_prefix_exclusive_sum(1);");
});
break;
case BuiltInSubgroupSize:
// This is natively supported in compute shaders.
if (get_execution_model() == ExecutionModelGLCompute)
break;
// This is natively supported in fragment shaders in MSL 2.2.
if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
break;
if (msl_options.is_ios())
SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
if (!msl_options.supports_msl_version(2, 1))
SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
entry_func.fixup_hooks_in.push_back(
[=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
break;
case BuiltInSubgroupEqMask:
if (msl_options.is_ios())
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
@@ -6408,7 +6688,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
entry_func.fixup_hooks_in.push_back([=]() {
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
builtin_subgroup_invocation_id_id, " > 32 ? uint4(0, (1 << (",
to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
});
@@ -6489,6 +6769,44 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
});
break;
case BuiltInViewIndex:
if (!msl_options.multiview)
{
// According to the Vulkan spec, when not running under a multiview
// render pass, ViewIndex is 0.
entry_func.fixup_hooks_in.push_back([=]() {
statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
});
}
else if (get_execution_model() == ExecutionModelFragment)
{
// Because we adjusted the view index in the vertex shader, we have to
// adjust it back here.
entry_func.fixup_hooks_in.push_back([=]() {
statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
});
}
else if (get_execution_model() == ExecutionModelVertex)
{
// Metal provides no special support for multiview, so we smuggle
// the view index in the instance index.
entry_func.fixup_hooks_in.push_back([=]() {
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id),
" % ", to_expression(view_mask_buffer_id), "[1];");
statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id),
"[1];");
});
// In addition to setting the variable itself, we also need to
// set the render_target_array_index with it on output. We have to
// offset this by the base view index, because Metal isn't in on
// our little game here.
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
to_expression(view_mask_buffer_id), "[0];");
});
}
break;
default:
break;
}
@@ -7790,6 +8108,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
case BuiltInSamplePosition:
// Shouldn't be reached.
SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
case BuiltInViewIndex:
if (execution.model != ExecutionModelFragment)
SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
// The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
// so we can get it from there.
return "render_target_array_index";
// Fragment function out
case BuiltInFragDepth:
@@ -7820,7 +8144,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
return "thread_index_in_threadgroup";
case BuiltInSubgroupSize:
return "thread_execution_width";
if (execution.model == ExecutionModelFragment)
{
if (!msl_options.supports_msl_version(2, 2))
SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
return "threads_per_simdgroup";
}
else
{
// thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
// but not in fragment.
return "thread_execution_width";
}
case BuiltInNumSubgroups:
if (!msl_options.supports_msl_version(2))
@@ -7833,9 +8168,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
case BuiltInSubgroupLocalInvocationId:
if (!msl_options.supports_msl_version(2))
SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
if (execution.model == ExecutionModelFragment)
{
if (!msl_options.supports_msl_version(2, 2))
SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
return "thread_index_in_simdgroup";
}
else
{
if (!msl_options.supports_msl_version(2))
SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
}
case BuiltInSubgroupEqMask:
case BuiltInSubgroupGeMask:
@@ -7937,6 +8281,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
return "uint";
case BuiltInSamplePosition:
return "float2";
case BuiltInViewIndex:
return "uint";
// Fragment function out
case BuiltInFragDepth:
@@ -7992,7 +8338,7 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
}
// Returns the byte size of a struct member.
size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const
{
auto &type = get<SPIRType>(struct_type.member_types[index]);
@@ -8358,7 +8704,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
uint32_t extension_set = args[2];
if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
{
GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
auto op_450 = static_cast<GLSLstd450>(args[3]);
switch (op_450)
{
case GLSLstd450Radians:
@@ -8373,6 +8719,22 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
return SPVFuncImplFindUMsb;
case GLSLstd450SSign:
return SPVFuncImplSSign;
case GLSLstd450Reflect:
{
auto &type = compiler.get<SPIRType>(args[0]);
if (type.vecsize == 1)
return SPVFuncImplReflectScalar;
else
return SPVFuncImplNone;
}
case GLSLstd450Refract:
{
auto &type = compiler.get<SPIRType>(args[0]);
if (type.vecsize == 1)
return SPVFuncImplRefractScalar;
else
return SPVFuncImplNone;
}
case GLSLstd450MatrixInverse:
{
auto &mat_type = compiler.get<SPIRType>(args[0]);
@@ -8519,6 +8881,9 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
case BuiltInViewportIndex:
case BuiltInFragStencilRefEXT:
case BuiltInPrimitiveId:
case BuiltInSubgroupSize:
case BuiltInSubgroupLocalInvocationId:
case BuiltInViewIndex:
expected_type = SPIRType::UInt;
break;
@@ -8561,6 +8926,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
case BuiltInViewportIndex:
case BuiltInFragStencilRefEXT:
case BuiltInPrimitiveId:
case BuiltInViewIndex:
expected_type = SPIRType::UInt;
break;

View File

@@ -161,6 +161,12 @@ static const uint32_t kSwizzleBufferBinding = ~(1u);
// element to indicate the buffer binding for buffer size buffers to support OpArrayLength.
static const uint32_t kBufferSizeBufferBinding = ~(2u);
// Special constant used in a MSLResourceBinding binding
// element to indicate the buffer binding used for the argument buffer itself.
// This buffer binding should be kept as small as possible as all automatic bindings for buffers
// will start at max(kArgumentBufferBinding) + 1.
static const uint32_t kArgumentBufferBinding = ~(3u);
static const uint32_t kMaxArgumentBuffers = 8;
// Decompiles SPIR-V to Metal Shading Language
@@ -185,12 +191,14 @@ public:
uint32_t shader_patch_output_buffer_index = 27;
uint32_t shader_tess_factor_buffer_index = 26;
uint32_t buffer_size_buffer_index = 25;
uint32_t view_mask_buffer_index = 24;
uint32_t shader_input_wg_index = 0;
bool enable_point_size_builtin = true;
bool disable_rasterization = false;
bool capture_output_to_buffer = false;
bool swizzle_texture_samples = false;
bool tess_domain_origin_lower_left = false;
bool multiview = false;
// Enable use of MSL 2.0 indirect argument buffers.
// MSL 2.0 must also be enabled.
@@ -262,6 +270,13 @@ public:
return !buffers_requiring_array_length.empty();
}
// Provide feedback to calling API to allow it to pass a buffer
// containing the view mask for the current multiview subpass.
bool needs_view_mask_buffer() const
{
return msl_options.multiview;
}
// Provide feedback to calling API to allow it to pass an output
// buffer if the shader needs it.
bool needs_output_buffer() const
@@ -386,6 +401,8 @@ protected:
SPVFuncImplSubgroupBallotFindMSB,
SPVFuncImplSubgroupBallotBitCount,
SPVFuncImplSubgroupAllEqual,
SPVFuncImplReflectScalar,
SPVFuncImplRefractScalar,
SPVFuncImplArrayCopyMultidimMax = 6
};
@@ -406,7 +423,7 @@ protected:
std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
std::string sampler_type(const SPIRType &type);
std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override;
size_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
std::string to_func_call_arg(uint32_t id) override;
std::string to_name(uint32_t id, bool allow_alias = true) const override;
std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
@@ -520,12 +537,15 @@ protected:
uint32_t builtin_base_vertex_id = 0;
uint32_t builtin_instance_idx_id = 0;
uint32_t builtin_base_instance_id = 0;
uint32_t builtin_view_idx_id = 0;
uint32_t builtin_layer_id = 0;
uint32_t builtin_invocation_id_id = 0;
uint32_t builtin_primitive_id_id = 0;
uint32_t builtin_subgroup_invocation_id_id = 0;
uint32_t builtin_subgroup_size_id = 0;
uint32_t swizzle_buffer_id = 0;
uint32_t buffer_size_buffer_id = 0;
uint32_t view_mask_buffer_id = 0;
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@@ -538,6 +558,8 @@ protected:
void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
std::string convert_to_f32(const std::string &expr, uint32_t components);
Options msl_options;
std::set<SPVFuncImpl> spv_function_implementations;
std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;

View File

@@ -203,6 +203,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('3')
if '.line.' in shader:
msl_args.append('--emit-line-directives')
if '.multiview.' in shader:
msl_args.append('--msl-multiview')
subprocess.check_call(msl_args)

Binary file not shown.

View File

@@ -0,0 +1,86 @@
// Testbench for MSL resource binding APIs.
// It does not validate output at the moment, but it's useful for ad-hoc testing.
#include <spirv_cross_c.h>
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#define SPVC_CHECKED_CALL(x) do { \
if ((x) != SPVC_SUCCESS) { \
fprintf(stderr, "Failed at line %d.\n", __LINE__); \
exit(1); \
} \
} while(0)
static std::vector<SpvId> read_file(const char *path)
{
long len;
FILE *file = fopen(path, "rb");
if (!file)
return {};
fseek(file, 0, SEEK_END);
len = ftell(file);
rewind(file);
std::vector<SpvId> buffer(len / sizeof(SpvId));
if (fread(buffer.data(), 1, len, file) != (size_t)len)
{
fclose(file);
return {};
}
fclose(file);
return buffer;
}
int main(int argc, char **argv)
{
if (argc != 2)
return EXIT_FAILURE;
auto buffer = read_file(argv[1]);
if (buffer.empty())
return EXIT_FAILURE;
spvc_context ctx;
spvc_parsed_ir parsed_ir;
spvc_compiler compiler;
SPVC_CHECKED_CALL(spvc_context_create(&ctx));
SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir));
SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler));
SPVC_CHECKED_CALL(spvc_compiler_msl_add_discrete_descriptor_set(compiler, 3));
spvc_compiler_options opts;
SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &opts));
SPVC_CHECKED_CALL(spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS, SPVC_TRUE));
SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_MSL_VERSION, 20000));
SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, opts));
spvc_msl_resource_binding binding;
spvc_msl_resource_binding_init(&binding);
binding.binding = SPVC_MSL_ARGUMENT_BUFFER_BINDING;
binding.stage = SpvExecutionModelFragment;
binding.desc_set = 0;
binding.msl_buffer = 2;
SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
binding.desc_set = 1;
binding.msl_buffer = 3;
SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
const char *str;
SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str));
fprintf(stderr, "Output:\n%s\n", str);
if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 0, SPVC_MSL_ARGUMENT_BUFFER_BINDING))
return EXIT_FAILURE;
if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, SPVC_MSL_ARGUMENT_BUFFER_BINDING))
return EXIT_FAILURE;
}