From aa9ab978a1a7185d436ede6b20f4e889dd8be6ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Sun, 29 Nov 2020 18:54:52 -0800 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 27 +- 3rdparty/spirv-cross/spirv_cross.cpp | 73 +- 3rdparty/spirv-cross/spirv_cross_c.cpp | 16 + 3rdparty/spirv-cross/spirv_cross_c.h | 7 +- .../spirv-cross/spirv_cross_containers.hpp | 3 +- .../spirv-cross/spirv_cross_parsed_ir.cpp | 16 +- .../spirv-cross/spirv_cross_parsed_ir.hpp | 1 + 3rdparty/spirv-cross/spirv_glsl.cpp | 183 +++-- 3rdparty/spirv-cross/spirv_glsl.hpp | 1 - 3rdparty/spirv-cross/spirv_hlsl.cpp | 173 ++--- 3rdparty/spirv-cross/spirv_msl.cpp | 670 +++++++++++++----- 3rdparty/spirv-cross/spirv_msl.hpp | 30 + 12 files changed, 855 insertions(+), 345 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index e00a7f9bd..59f4bd502 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -566,6 +566,10 @@ struct CLIArguments uint32_t msl_r32ui_linear_texture_alignment = 4; uint32_t msl_r32ui_alignment_constant_id = 65535; bool msl_texture_1d_as_2d = false; + bool msl_ios_use_simdgroup_functions = false; + bool msl_emulate_subgroups = false; + uint32_t msl_fixed_subgroup_size = 0; + bool msl_force_sample_rate_shading = false; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; bool glsl_force_flattened_io_blocks = false; @@ -779,7 +783,16 @@ static void print_help_msl() "\t[--msl-r32ui-linear-texture-align-constant-id ]:\n\t\tThe function constant ID to use for the linear texture alignment.\n" "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n" "\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n" - "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n"); + "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n" + "\t[--msl-ios-use-simdgroup-functions]:\n\t\tUse simd_*() functions for subgroup ops instead of quad_*().\n" + "\t\tRecent Apple GPUs support SIMD-groups larger than a quad. Use this option to take advantage of this support.\n" + "\t[--msl-emulate-subgroups]:\n\t\tAssume subgroups of size 1.\n" + "\t\tIntended for Vulkan Portability implementations where Metal support for SIMD-groups is insufficient for true subgroups.\n" + "\t[--msl-fixed-subgroup-size ]:\n\t\tAssign a constant to the SubgroupSize builtin.\n" + "\t\tIntended for Vulkan Portability implementations where VK_EXT_subgroup_size_control is not supported or disabled.\n" + "\t\tIf 0, assume variable subgroup size as actually exposed by Metal.\n" + "\t[--msl-force-sample-rate-shading]:\n\t\tForce fragment shaders to run per sample.\n" + "\t\tThis adds a [[sample_id]] parameter if none is already present.\n"); // clang-format on } @@ -1021,6 +1034,10 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment; msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id; msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d; + msl_opts.ios_use_simdgroup_functions = args.msl_ios_use_simdgroup_functions; + msl_opts.emulate_subgroups = args.msl_emulate_subgroups; + msl_opts.fixed_subgroup_size = args.msl_fixed_subgroup_size; + msl_opts.force_sample_rate_shading = args.msl_force_sample_rate_shading; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); @@ -1370,7 +1387,8 @@ static int main_inner(int argc, char *argv[]) cbs.add("--hlsl-nonwritable-uav-texture-as-srv", [&args](CLIParser &) { args.hlsl_nonwritable_uav_texture_as_srv = true; }); cbs.add("--hlsl-enable-16bit-types", [&args](CLIParser &) { args.hlsl_enable_16bit_types = true; }); - cbs.add("--hlsl-flatten-matrix-vertex-input-semantics", [&args](CLIParser &) { args.hlsl_flatten_matrix_vertex_input_semantics = true; }); + cbs.add("--hlsl-flatten-matrix-vertex-input-semantics", + [&args](CLIParser &) { args.hlsl_flatten_matrix_vertex_input_semantics = true; }); cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; }); cbs.add("-V", [&args](CLIParser &) { args.vulkan_semantics = true; }); cbs.add("--flatten-multidimensional-arrays", [&args](CLIParser &) { args.flatten_multidimensional_arrays = true; }); @@ -1448,6 +1466,11 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-r32ui-linear-texture-align-constant-id", [&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); }); cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; }); + cbs.add("--msl-ios-use-simdgroup-functions", [&args](CLIParser &) { args.msl_ios_use_simdgroup_functions = true; }); + cbs.add("--msl-emulate-subgroups", [&args](CLIParser &) { args.msl_emulate_subgroups = true; }); + cbs.add("--msl-fixed-subgroup-size", + [&args](CLIParser &parser) { args.msl_fixed_subgroup_size = parser.next_uint(); }); + cbs.add("--msl-force-sample-rate-shading", [&args](CLIParser &) { args.msl_force_sample_rate_shading = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index c2d8147ef..21d2f2fcd 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -1682,8 +1682,13 @@ size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, si uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const { auto &result_type = get(spec.basetype); - if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int && result_type.basetype != SPIRType::Boolean) - SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n"); + if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int && + result_type.basetype != SPIRType::Boolean) + { + SPIRV_CROSS_THROW( + "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n"); + } + if (!is_scalar(result_type)) SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); @@ -1692,7 +1697,11 @@ uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const const auto eval_u32 = [&](uint32_t id) -> uint32_t { auto &type = expression_type(id); if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean) - SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n"); + { + SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating " + "specialization constants.\n"); + } + if (!is_scalar(type)) SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); if (const auto *c = this->maybe_get(id)) @@ -1701,37 +1710,41 @@ uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const return evaluate_spec_constant_u32(this->get(id)); }; -#define binary_spec_op(op, binary_op) \ - case Op##op: value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); break -#define binary_spec_op_cast(op, binary_op, type) \ - case Op##op: value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); break +#define binary_spec_op(op, binary_op) \ + case Op##op: \ + value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \ + break +#define binary_spec_op_cast(op, binary_op, type) \ + case Op##op: \ + value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \ + break // Support the basic opcodes which are typically used when computing array sizes. switch (spec.opcode) { - binary_spec_op(IAdd, +); - binary_spec_op(ISub, -); - binary_spec_op(IMul, *); - binary_spec_op(BitwiseAnd, &); - binary_spec_op(BitwiseOr, |); - binary_spec_op(BitwiseXor, ^); - binary_spec_op(LogicalAnd, &); - binary_spec_op(LogicalOr, |); - binary_spec_op(ShiftLeftLogical, <<); - binary_spec_op(ShiftRightLogical, >>); - binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t); - binary_spec_op(LogicalEqual, ==); - binary_spec_op(LogicalNotEqual, !=); - binary_spec_op(IEqual, ==); - binary_spec_op(INotEqual, !=); - binary_spec_op(ULessThan, <); - binary_spec_op(ULessThanEqual, <=); - binary_spec_op(UGreaterThan, >); - binary_spec_op(UGreaterThanEqual, >=); - binary_spec_op_cast(SLessThan, <, int32_t); - binary_spec_op_cast(SLessThanEqual, <=, int32_t); - binary_spec_op_cast(SGreaterThan, >, int32_t); - binary_spec_op_cast(SGreaterThanEqual, >=, int32_t); + binary_spec_op(IAdd, +); + binary_spec_op(ISub, -); + binary_spec_op(IMul, *); + binary_spec_op(BitwiseAnd, &); + binary_spec_op(BitwiseOr, |); + binary_spec_op(BitwiseXor, ^); + binary_spec_op(LogicalAnd, &); + binary_spec_op(LogicalOr, |); + binary_spec_op(ShiftLeftLogical, <<); + binary_spec_op(ShiftRightLogical, >>); + binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t); + binary_spec_op(LogicalEqual, ==); + binary_spec_op(LogicalNotEqual, !=); + binary_spec_op(IEqual, ==); + binary_spec_op(INotEqual, !=); + binary_spec_op(ULessThan, <); + binary_spec_op(ULessThanEqual, <=); + binary_spec_op(UGreaterThan, >); + binary_spec_op(UGreaterThanEqual, >=); + binary_spec_op_cast(SLessThan, <, int32_t); + binary_spec_op_cast(SLessThanEqual, <=, int32_t); + binary_spec_op_cast(SGreaterThan, >, int32_t); + binary_spec_op_cast(SGreaterThanEqual, >=, int32_t); #undef binary_spec_op #undef binary_spec_op_cast diff --git a/3rdparty/spirv-cross/spirv_cross_c.cpp b/3rdparty/spirv-cross/spirv_cross_c.cpp index 5506d8d06..d90d40abb 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.cpp +++ b/3rdparty/spirv-cross/spirv_cross_c.cpp @@ -678,6 +678,22 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID: options->msl.r32ui_alignment_constant_id = value; break; + + case SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS: + options->msl.ios_use_simdgroup_functions = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS: + options->msl.emulate_subgroups = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE: + options->msl.fixed_subgroup_size = value; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING: + options->msl.force_sample_rate_shading = value != 0; + break; #endif default: diff --git a/3rdparty/spirv-cross/spirv_cross_c.h b/3rdparty/spirv-cross/spirv_cross_c.h index 7ccec0aae..88f04aaa2 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.h +++ b/3rdparty/spirv-cross/spirv_cross_c.h @@ -33,7 +33,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 42 +#define SPVC_C_API_VERSION_MINOR 44 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -647,6 +647,11 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING = 75 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/3rdparty/spirv-cross/spirv_cross_containers.hpp b/3rdparty/spirv-cross/spirv_cross_containers.hpp index 892a489b0..357ae6227 100644 --- a/3rdparty/spirv-cross/spirv_cross_containers.hpp +++ b/3rdparty/spirv-cross/spirv_cross_containers.hpp @@ -63,7 +63,8 @@ public: private: #if defined(_MSC_VER) && _MSC_VER < 1900 // MSVC 2013 workarounds, sigh ... - union { + union + { char aligned_char[sizeof(T) * N]; double dummy_aligner; } u; diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp index f409d65fb..1a67fa472 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -526,6 +526,17 @@ void ParsedIR::mark_used_as_array_length(ID id) } } +Bitset ParsedIR::get_buffer_block_type_flags(const SPIRType &type) const +{ + if (type.member_types.empty()) + return {}; + + Bitset all_members_flags = get_member_decoration_bitset(type.self, 0); + for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++) + all_members_flags.merge_and(get_member_decoration_bitset(type.self, i)); + return all_members_flags; +} + Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const { auto &type = get(var.basetype); @@ -542,10 +553,7 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const if (type.member_types.empty()) return base_flags; - Bitset all_members_flags = get_member_decoration_bitset(type.self, 0); - for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++) - all_members_flags.merge_and(get_member_decoration_bitset(type.self, i)); - + auto all_members_flags = get_buffer_block_type_flags(type); base_flags.merge_or(all_members_flags); return base_flags; } diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp index 36d6ac7b2..c6c71706d 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp @@ -139,6 +139,7 @@ public: void mark_used_as_array_length(ID id); uint32_t increase_bound_by(uint32_t count); Bitset get_buffer_block_flags(const SPIRVariable &var) const; + Bitset get_buffer_block_type_flags(const SPIRType &type) const; void add_typed_id(Types type, ID id); void remove_typed_id(Types type, ID id); diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index df14e5297..62538c4f2 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -1718,8 +1718,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) } else if (var.storage == StorageClassOutput) { - if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && - flags.get(DecorationOffset)) + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset)) { // XFB for standalone variables, we can emit all decorations. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); @@ -2013,6 +2012,9 @@ void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_decl block_names.insert(buffer_name); block_ssbo_names.insert(buffer_name); + + // Ensure we emit the correct name when emitting non-forward pointer type. + ir.meta[type.self].decoration.alias = buffer_name; } else if (type.basetype != SPIRType::Struct) buffer_name = type_to_glsl(type); @@ -2022,7 +2024,20 @@ void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_decl if (!forward_declaration) { if (type.basetype == SPIRType::Struct) - statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), ") buffer ", buffer_name); + { + auto flags = ir.get_buffer_block_type_flags(type); + string decorations; + if (flags.get(DecorationRestrict)) + decorations += " restrict"; + if (flags.get(DecorationCoherent)) + decorations += " coherent"; + if (flags.get(DecorationNonReadable)) + decorations += " writeonly"; + if (flags.get(DecorationNonWritable)) + decorations += " readonly"; + statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), + ")", decorations, " buffer ", buffer_name); + } else statement("layout(buffer_reference) buffer ", buffer_name); @@ -2306,6 +2321,12 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) { auto &type = get(var.basetype); + if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && + !options.es && options.version < 410) + { + require_extension_internal("GL_ARB_vertex_attrib_64bit"); + } + // Either make it plain in/out or in/out blocks depending on what shader is doing ... bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); const char *qual = to_storage_qualifiers_glsl(var); @@ -3399,10 +3420,8 @@ void CompilerGLSL::emit_resources() void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) { - static const char *workaround_types[] = { - "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", - "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" - }; + static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", + "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" }; if (!options.vulkan_semantics) { @@ -3809,7 +3828,7 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) for (auto &type_id : workaround_ubo_load_overload_types) { auto &type = get(type_id); - statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type), + statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }"); } statement(""); @@ -3817,7 +3836,7 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) if (requires_transpose_2x2) { - statement("mat2 SPIRV_Cross_Transpose(mat2 m)"); + statement("mat2 spvTranspose(mat2 m)"); begin_scope(); statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);"); end_scope(); @@ -3826,7 +3845,7 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) if (requires_transpose_3x3) { - statement("mat3 SPIRV_Cross_Transpose(mat3 m)"); + statement("mat3 spvTranspose(mat3 m)"); begin_scope(); statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);"); end_scope(); @@ -3835,9 +3854,10 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) if (requires_transpose_4x4) { - statement("mat4 SPIRV_Cross_Transpose(mat4 m)"); + statement("mat4 spvTranspose(mat4 m)"); begin_scope(); - statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); + statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " + "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); end_scope(); statement(""); } @@ -5726,7 +5746,8 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp // GLES has very limited support for shadow samplers. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, // everything else can just throw - if (image_is_comparison(imgtype, tex) && is_legacy_es()) + bool is_comparison = image_is_comparison(imgtype, tex); + if (is_comparison && is_legacy_es()) { if (op == "texture" || op == "textureProj") require_extension_internal("GL_EXT_shadow_samplers"); @@ -5734,8 +5755,20 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); } - bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex); - std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture"; + if (op == "textureSize") + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize not supported in legacy ES"); + if (is_comparison) + SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL"); + require_extension_internal("GL_EXT_gpu_shader4"); + } + + if (op == "texelFetch" && is_legacy_es()) + SPIRV_CROSS_THROW("texelFetch not supported in legacy ES"); + + bool is_es_and_depth = is_legacy_es() && is_comparison; + std::string type_prefix = is_comparison ? "shadow" : "texture"; if (op == "texture") return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); @@ -5754,6 +5787,10 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); else if (op == "textureProjLodOffset") return join(type_prefix, type, "ProjLodOffset"); + else if (op == "textureSize") + return join("textureSize", type); + else if (op == "texelFetch") + return join("texelFetch", type); else { SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); @@ -6162,6 +6199,10 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool opt = &ops[5]; length -= 5; gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400."); break; case OpImageGather: @@ -6170,6 +6211,14 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool opt = &ops[5]; length -= 5; gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + { + if (!expression_is_constant_null(comp)) + SPIRV_CROSS_THROW("textureGather with component requires GLSL 400."); + require_extension_internal("GL_ARB_texture_gather"); + } break; case OpImageFetch: @@ -6438,7 +6487,7 @@ string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) if (args.is_sparse_feedback || args.has_min_lod) fname += "ARB"; - return is_legacy() ? legacy_tex_op(fname, imgtype, tex) : fname; + return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname; } std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) @@ -6685,7 +6734,7 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool farg_str += to_expression(args.bias); } - if (args.component) + if (args.component && !expression_is_constant_null(args.component)) { forward = forward && should_forward(args.component); farg_str += ", "; @@ -10488,7 +10537,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto &type = get(ops[0]); if (type.vecsize > 1) - GLSL_UFOP(not); + GLSL_UFOP(not ); else GLSL_UOP(!); break; @@ -11151,8 +11200,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t result_type = ops[0]; uint32_t id = ops[1]; + uint32_t img = ops[2]; - auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ", + std::string fname = "textureSize"; + if (is_legacy_desktop()) + { + auto &type = expression_type(img); + auto &imgtype = get(type.self); + fname = legacy_tex_op(fname, imgtype, img); + } + else if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100."); + + auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ", bitcast_expression(SPIRType::Int, ops[3]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); @@ -11413,13 +11473,24 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) string expr; if (type.image.sampled == 2) { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_shader_image_size"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); + // The size of an image is always constant. expr = join("imageSize(", to_expression(ops[2]), ")"); } else { // This path is hit for samplerBuffers and multisampled images which do not have LOD. - expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")"); + std::string fname = "textureSize"; + if (is_legacy()) + { + auto &imgtype = get(type.self); + fname = legacy_tex_op(fname, imgtype, ops[2]); + } + expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")"); } auto &restype = get(ops[0]); @@ -12199,7 +12270,7 @@ string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &ex } else SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose."); - return join("SPIRV_Cross_Transpose(", exp_str, ")"); + return join("spvTranspose(", exp_str, ")"); } else return join("transpose(", exp_str, ")"); @@ -14177,8 +14248,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (is_legacy_es()) { uint32_t counter = statement_count; - statement("for (int SPIRV_Cross_Dummy", counter, " = 0; SPIRV_Cross_Dummy", counter, - " < 1; SPIRV_Cross_Dummy", counter, "++)"); + statement("for (int spvDummy", counter, " = 0; spvDummy", counter, + " < 1; spvDummy", counter, "++)"); } else statement("do"); @@ -14304,7 +14375,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) { - emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction, + emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction, get_expression_effective_storage_class(block.return_value)); } @@ -15006,17 +15077,15 @@ bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) { - static const char * const retval[CandidateCount] = { - "GL_KHR_shader_subgroup_ballot", - "GL_KHR_shader_subgroup_basic", - "GL_KHR_shader_subgroup_vote", - "GL_NV_gpu_shader_5", - "GL_NV_shader_thread_group", - "GL_NV_shader_thread_shuffle", - "GL_ARB_shader_ballot", - "GL_ARB_shader_group_vote", - "GL_AMD_gcn_shader" - }; + static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", + "GL_KHR_shader_subgroup_basic", + "GL_KHR_shader_subgroup_vote", + "GL_NV_gpu_shader_5", + "GL_NV_shader_thread_group", + "GL_NV_shader_thread_shuffle", + "GL_ARB_shader_ballot", + "GL_ARB_shader_group_vote", + "GL_AMD_gcn_shader" }; return retval[c]; } @@ -15046,8 +15115,8 @@ const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extens } } -CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector -CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependencies(Feature feature) +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependencies(Feature feature) { switch (feature) { @@ -15064,27 +15133,25 @@ CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependencies(Feature feat } } -CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask -CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependency_mask(Feature feature) +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependency_mask(Feature feature) { return build_mask(get_feature_dependencies(feature)); } bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) { - static const bool retval[FeatureCount] = { - false, false, false, false, false, false, - true, // SubgroupBalloFindLSB_MSB - false, false, false, false, - true, // SubgroupMemBarrier - replaced with workgroup memory barriers - false, false, true, false - }; + static const bool retval[FeatureCount] = { false, false, false, false, false, false, + true, // SubgroupBalloFindLSB_MSB + false, false, false, false, + true, // SubgroupMemBarrier - replaced with workgroup memory barriers + false, false, true, false }; return retval[feature]; } -CompilerGLSL::ShaderSubgroupSupportHelper::Candidate -CompilerGLSL::ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(Feature feature) +CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: + get_KHR_extension_for_feature(Feature feature) { static const Candidate extensions[FeatureCount] = { KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, @@ -15106,8 +15173,7 @@ bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature fea return (feature_mask & (1u << feature)) != 0; } -CompilerGLSL::ShaderSubgroupSupportHelper::Result -CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const +CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const { Result res; @@ -15137,8 +15203,8 @@ CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const return res; } -CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector -CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature ft, const Result &r) +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature ft, const Result &r) { auto c = get_candidates_for_feature(ft); auto cmp = [&r](Candidate a, Candidate b) { @@ -15150,8 +15216,8 @@ CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature ft return c; } -CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector -CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature feature) +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature feature) { switch (feature) { @@ -15192,8 +15258,8 @@ CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature fe } } -CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask -CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(const SmallVector &features) +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( + const SmallVector &features) { FeatureMask mask = 0; for (Feature f : features) @@ -15234,8 +15300,7 @@ void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID return; auto &backing_type = get(var->basetype); - bool is_ubo = backing_type.basetype == SPIRType::Struct && - backing_type.storage == StorageClassUniform && + bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && has_decoration(backing_type.self, DecorationBlock); if (!is_ubo) return; @@ -15269,6 +15334,6 @@ void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID if (rewrite) { request_workaround_wrapper_overload(loaded_type); - expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")"); + expr = join("spvWorkaroundRowMajor(", expr, ")"); } } diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 9b39cb09b..fe0f76d0e 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -243,7 +243,6 @@ public: // - Images which are statically used at least once with Dref opcodes. bool variable_is_depth_or_compare(VariableID id) const; - protected: struct ShaderSubgroupSupportHelper { diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 9d465e5ab..8a1520bac 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -1234,8 +1234,7 @@ void CompilerHLSL::declare_undefined_values() if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); - statement("static ", variable_decl(type, to_name(undef.self), undef.self), - initializer, ";"); + statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); emitted = true; }); @@ -1529,14 +1528,14 @@ void CompilerHLSL::emit_resources() if (requires_fp16_packing) { // HLSL does not pack into a single word sadly :( - statement("uint SPIRV_Cross_packHalf2x16(float2 value)"); + statement("uint spvPackHalf2x16(float2 value)"); begin_scope(); statement("uint2 Packed = f32tof16(value);"); statement("return Packed.x | (Packed.y << 16);"); end_scope(); statement(""); - statement("float2 SPIRV_Cross_unpackHalf2x16(uint value)"); + statement("float2 spvUnpackHalf2x16(uint value)"); begin_scope(); statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); end_scope(); @@ -1545,13 +1544,13 @@ void CompilerHLSL::emit_resources() if (requires_uint2_packing) { - statement("uint64_t SPIRV_Cross_packUint2x32(uint2 value)"); + statement("uint64_t spvPackUint2x32(uint2 value)"); begin_scope(); statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);"); end_scope(); statement(""); - statement("uint2 SPIRV_Cross_unpackUint2x32(uint64_t value)"); + statement("uint2 spvUnpackUint2x32(uint64_t value)"); begin_scope(); statement("uint2 Unpacked;"); statement("Unpacked.x = uint(value & 0xffffffff);"); @@ -1564,14 +1563,14 @@ void CompilerHLSL::emit_resources() if (requires_explicit_fp16_packing) { // HLSL does not pack into a single word sadly :( - statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)"); + statement("uint spvPackFloat2x16(min16float2 value)"); begin_scope(); statement("uint2 Packed = f32tof16(value);"); statement("return Packed.x | (Packed.y << 16);"); end_scope(); statement(""); - statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)"); + statement("min16float2 spvUnpackFloat2x16(uint value)"); begin_scope(); statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); end_scope(); @@ -1581,14 +1580,14 @@ void CompilerHLSL::emit_resources() // HLSL does not seem to have builtins for these operation, so roll them by hand ... if (requires_unorm8_packing) { - statement("uint SPIRV_Cross_packUnorm4x8(float4 value)"); + statement("uint spvPackUnorm4x8(float4 value)"); begin_scope(); statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); end_scope(); statement(""); - statement("float4 SPIRV_Cross_unpackUnorm4x8(uint value)"); + statement("float4 spvUnpackUnorm4x8(uint value)"); begin_scope(); statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); statement("return float4(Packed) / 255.0;"); @@ -1598,14 +1597,14 @@ void CompilerHLSL::emit_resources() if (requires_snorm8_packing) { - statement("uint SPIRV_Cross_packSnorm4x8(float4 value)"); + statement("uint spvPackSnorm4x8(float4 value)"); begin_scope(); statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); end_scope(); statement(""); - statement("float4 SPIRV_Cross_unpackSnorm4x8(uint value)"); + statement("float4 spvUnpackSnorm4x8(uint value)"); begin_scope(); statement("int SignedValue = int(value);"); statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); @@ -1616,14 +1615,14 @@ void CompilerHLSL::emit_resources() if (requires_unorm16_packing) { - statement("uint SPIRV_Cross_packUnorm2x16(float2 value)"); + statement("uint spvPackUnorm2x16(float2 value)"); begin_scope(); statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); statement("return Packed.x | (Packed.y << 16);"); end_scope(); statement(""); - statement("float2 SPIRV_Cross_unpackUnorm2x16(uint value)"); + statement("float2 spvUnpackUnorm2x16(uint value)"); begin_scope(); statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); statement("return float2(Packed) / 65535.0;"); @@ -1633,14 +1632,14 @@ void CompilerHLSL::emit_resources() if (requires_snorm16_packing) { - statement("uint SPIRV_Cross_packSnorm2x16(float2 value)"); + statement("uint spvPackSnorm2x16(float2 value)"); begin_scope(); statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); statement("return uint(Packed.x | (Packed.y << 16));"); end_scope(); statement(""); - statement("float2 SPIRV_Cross_unpackSnorm2x16(uint value)"); + statement("float2 spvUnpackSnorm2x16(uint value)"); begin_scope(); statement("int SignedValue = int(value);"); statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); @@ -1654,7 +1653,7 @@ void CompilerHLSL::emit_resources() static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; for (auto &type : types) { - statement(type, " SPIRV_Cross_bitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); + statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); begin_scope(); statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); @@ -1668,7 +1667,7 @@ void CompilerHLSL::emit_resources() static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; for (auto &type : unsigned_types) { - statement(type, " SPIRV_Cross_bitfieldUExtract(", type, " Base, uint Offset, uint Count)"); + statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)"); begin_scope(); statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); statement("return (Base >> Offset) & Mask;"); @@ -1680,7 +1679,7 @@ void CompilerHLSL::emit_resources() static const char *signed_types[] = { "int", "int2", "int3", "int4" }; for (auto &type : signed_types) { - statement(type, " SPIRV_Cross_bitfieldSExtract(", type, " Base, int Offset, int Count)"); + statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)"); begin_scope(); statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); statement(type, " Masked = (Base >> Offset) & Mask;"); @@ -1695,7 +1694,7 @@ void CompilerHLSL::emit_resources() { statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float2x2 SPIRV_Cross_Inverse(float2x2 m)"); + statement("float2x2 spvInverse(float2x2 m)"); begin_scope(); statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -1719,29 +1718,29 @@ void CompilerHLSL::emit_resources() if (requires_inverse_3x3) { statement("// Returns the determinant of a 2x2 matrix."); - statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)"); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); statement_no_indent(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float3x3 SPIRV_Cross_Inverse(float3x3 m)"); + statement("float3x3 spvInverse(float3x3 m)"); begin_scope(); statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); - statement("adj[0][0] = SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); - statement("adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); - statement("adj[0][2] = SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); statement_no_indent(""); - statement("adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); - statement("adj[1][1] = SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); - statement("adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); statement_no_indent(""); - statement("adj[2][0] = SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); - statement("adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); - statement("adj[2][2] = SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); statement_no_indent(""); statement("// Calculate the determinant as a combination of the cofactors of the first row."); statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); @@ -1758,7 +1757,7 @@ void CompilerHLSL::emit_resources() if (!requires_inverse_3x3) { statement("// Returns the determinant of a 2x2 matrix."); - statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)"); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); @@ -1766,71 +1765,71 @@ void CompilerHLSL::emit_resources() } statement("// Returns the determinant of a 3x3 matrix."); - statement("float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " "float c2, float c3)"); begin_scope(); - statement("return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * " - "SPIRV_Cross_Det2x2(a2, a3, " + statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * " + "spvDet2x2(a2, a3, " "b2, b3);"); end_scope(); statement_no_indent(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float4x4 SPIRV_Cross_Inverse(float4x4 m)"); + statement("float4x4 spvInverse(float4x4 m)"); begin_scope(); statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); statement( - "adj[0][0] = SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " "m[3][3]);"); statement( - "adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " "m[3][3]);"); statement( - "adj[0][2] = SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " "m[3][3]);"); statement( - "adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " "m[2][3]);"); statement_no_indent(""); statement( - "adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " "m[3][3]);"); statement( - "adj[1][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " "m[3][3]);"); statement( - "adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " "m[3][3]);"); statement( - "adj[1][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " "m[2][3]);"); statement_no_indent(""); statement( - "adj[2][0] = SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " "m[3][3]);"); statement( - "adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " "m[3][3]);"); statement( - "adj[2][2] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " "m[3][3]);"); statement( - "adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " "m[2][3]);"); statement_no_indent(""); statement( - "adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " "m[3][2]);"); statement( - "adj[3][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " "m[3][2]);"); statement( - "adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " "m[3][2]);"); statement( - "adj[3][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " "m[2][2]);"); statement_no_indent(""); statement("// Calculate the determinant as a combination of the cofactors of the first row."); @@ -1847,7 +1846,7 @@ void CompilerHLSL::emit_resources() if (requires_scalar_reflect) { // FP16/FP64? No templates in HLSL. - statement("float SPIRV_Cross_Reflect(float i, float n)"); + statement("float spvReflect(float i, float n)"); begin_scope(); statement("return i - 2.0 * dot(n, i) * n;"); end_scope(); @@ -1857,7 +1856,7 @@ void CompilerHLSL::emit_resources() if (requires_scalar_refract) { // FP16/FP64? No templates in HLSL. - statement("float SPIRV_Cross_Refract(float i, float n, float eta)"); + statement("float spvRefract(float i, float n, float eta)"); begin_scope(); statement("float NoI = n * i;"); statement("float NoI2 = NoI * NoI;"); @@ -1877,7 +1876,7 @@ void CompilerHLSL::emit_resources() if (requires_scalar_faceforward) { // FP16/FP64? No templates in HLSL. - statement("float SPIRV_Cross_FaceForward(float n, float i, float nref)"); + statement("float spvFaceForward(float n, float i, float nref)"); begin_scope(); statement("return i * nref < 0.0 ? n : -n;"); end_scope(); @@ -1916,7 +1915,7 @@ void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char if ((variant_mask & mask) == 0) continue; - statement(ret_types[index], " SPIRV_Cross_", (uav ? "image" : "texture"), "Size(", (uav ? "RW" : ""), + statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""), dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ", (uav ? "" : "uint Level, "), "out uint Param)"); begin_scope(); @@ -2282,7 +2281,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret out_argument += "out "; out_argument += type_to_glsl(type); out_argument += " "; - out_argument += "SPIRV_Cross_return_value"; + out_argument += "spvReturnValue"; out_argument += type_to_array_glsl(type); arglist.push_back(move(out_argument)); } @@ -2996,7 +2995,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) if (dref) { if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D) - SPIRV_CROSS_THROW("Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3."); + { + SPIRV_CROSS_THROW( + "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3."); + } if (grad_x || grad_y) SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3."); @@ -3023,8 +3025,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) } else if (proj) { - coord_expr = "float4(" + coord_expr + ", " + - to_extract_component_expression(coord, coord_components) + ")"; + coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")"; } else if (dref) { @@ -3409,7 +3410,7 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i requires_explicit_fp16_packing = true; force_recompile(); } - return "SPIRV_Cross_unpackFloat2x16"; + return "spvUnpackFloat2x16"; } else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) { @@ -3418,7 +3419,7 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i requires_explicit_fp16_packing = true; force_recompile(); } - return "SPIRV_Cross_packFloat2x16"; + return "spvPackFloat2x16"; } else return ""; @@ -3483,7 +3484,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_fp16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packHalf2x16"); + emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16"); break; case GLSLstd450UnpackHalf2x16: @@ -3492,7 +3493,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_fp16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackHalf2x16"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16"); break; case GLSLstd450PackSnorm4x8: @@ -3501,7 +3502,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8"); break; case GLSLstd450UnpackSnorm4x8: @@ -3510,7 +3511,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8"); break; case GLSLstd450PackUnorm4x8: @@ -3519,7 +3520,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8"); break; case GLSLstd450UnpackUnorm4x8: @@ -3528,7 +3529,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8"); break; case GLSLstd450PackSnorm2x16: @@ -3537,7 +3538,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16"); break; case GLSLstd450UnpackSnorm2x16: @@ -3546,7 +3547,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16"); break; case GLSLstd450PackUnorm2x16: @@ -3555,7 +3556,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16"); break; case GLSLstd450UnpackUnorm2x16: @@ -3564,7 +3565,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16"); break; case GLSLstd450PackDouble2x32: @@ -3613,7 +3614,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, force_recompile(); } } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_Inverse"); + emit_unary_func_op(result_type, id, args[0], "spvInverse"); break; } @@ -3636,7 +3637,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_scalar_reflect = true; force_recompile(); } - emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect"); + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); @@ -3650,7 +3651,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_scalar_refract = true; force_recompile(); } - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract"); + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); @@ -3664,7 +3665,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_scalar_faceforward = true; force_recompile(); } - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_FaceForward"); + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); @@ -4701,9 +4702,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) } if (bitcast_type == CompilerHLSL::TypePackUint2x32) - emit_unary_func_op(ops[0], ops[1], ops[2], "SPIRV_Cross_packUint2x32"); + emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32"); else - emit_unary_func_op(ops[0], ops[1], ops[2], "SPIRV_Cross_unpackUint2x32"); + emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32"); } break; @@ -5088,7 +5089,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); statement("uint ", dummy_samples_levels, ";"); - auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", ", + auto expr = join("spvTextureSize(", to_expression(ops[2]), ", ", bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); auto &restype = get(ops[0]); @@ -5114,9 +5115,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) string expr; if (uav) - expr = join("SPIRV_Cross_imageSize(", to_expression(ops[2]), ", ", dummy_samples_levels, ")"); + expr = join("spvImageSize(", to_expression(ops[2]), ", ", dummy_samples_levels, ")"); else - expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + expr = join("spvTextureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::UInt, expr); @@ -5146,9 +5147,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) statement(variable_decl(type, to_name(id)), ";"); if (uav) - statement("SPIRV_Cross_imageSize(", to_expression(ops[2]), ", ", to_name(id), ");"); + statement("spvImageSize(", to_expression(ops[2]), ", ", to_name(id), ");"); else - statement("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");"); + statement("spvTextureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");"); auto &restype = get(ops[0]); auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); @@ -5389,7 +5390,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) force_recompile(); } - auto expr = join("SPIRV_Cross_bitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); bool forward = @@ -5411,9 +5412,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) } if (opcode == OpBitFieldSExtract) - HLSL_TFOP(SPIRV_Cross_bitfieldSExtract); + HLSL_TFOP(spvBitfieldSExtract); else - HLSL_TFOP(SPIRV_Cross_bitfieldUExtract); + HLSL_TFOP(spvBitfieldUExtract); break; } diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index e6033d82a..a9ec650c5 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -168,9 +168,12 @@ void CompilerMSL::build_implicit_builtins() active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) || active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff; + bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId); + bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id || - needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask) + needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask || need_local_invocation_index || + need_workgroup_size) { bool has_frag_coord = false; bool has_sample_id = false; @@ -184,6 +187,8 @@ void CompilerMSL::build_implicit_builtins() bool has_subgroup_size = false; bool has_view_idx = false; bool has_layer = false; + bool has_local_invocation_index = false; + bool has_workgroup_size = false; uint32_t workgroup_id_type = 0; // FIXME: Investigate the fact that there are no checks for the entry point interface variables. @@ -191,7 +196,6 @@ void CompilerMSL::build_implicit_builtins() if (!ir.meta[var.self].decoration.builtin) return; - // Use Metal's native frame-buffer fetch API for subpass inputs. BuiltIn builtin = ir.meta[var.self].decoration.builtin_type; if (var.storage == StorageClassOutput) @@ -207,6 +211,7 @@ void CompilerMSL::build_implicit_builtins() if (var.storage != StorageClassInput) return; + // Use Metal's native frame-buffer fetch API for subpass inputs. if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses)) { switch (builtin) @@ -330,6 +335,20 @@ void CompilerMSL::build_implicit_builtins() } } + if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex) + { + builtin_local_invocation_index_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self); + has_local_invocation_index = true; + } + + if (need_workgroup_size && builtin == BuiltInLocalInvocationId) + { + builtin_workgroup_size_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self); + has_workgroup_size = true; + } + // The base workgroup needs to have the same type and vector size // as the workgroup or invocation ID, so keep track of the type that // was used. @@ -681,6 +700,48 @@ void CompilerMSL::build_implicit_builtins() builtin_sample_mask_id = var_id; mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id); } + + if (need_local_invocation_index && !has_local_invocation_index) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_LocalInvocationIndex. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex); + builtin_local_invocation_index_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id); + } + + if (need_workgroup_size && !has_workgroup_size) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_WorkgroupSize. + uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); + SPIRType uint_type_ptr = get(type_id); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); + builtin_workgroup_size_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); + } } if (needs_swizzle_buffer_def) @@ -1293,7 +1354,12 @@ void CompilerMSL::preprocess_op_codes() needs_subgroup_invocation_id = true; if (preproc.needs_subgroup_size) needs_subgroup_size = true; - if (preproc.needs_sample_id) + // build_implicit_builtins() hasn't run yet, and in fact, this needs to execute + // before then so that gl_SampleID will get added; so we also need to check if + // that function would add gl_FragCoord. + if (preproc.needs_sample_id || msl_options.force_sample_rate_shading || + (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) || + (need_subpass_input && !msl_options.use_framebuffer_fetch_subpasses)))) needs_sample_id = true; } @@ -1489,6 +1555,38 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: break; } } + break; + } + + case OpGroupNonUniformInverseBallot: + { + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + } + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + { + added_arg_ids.insert(builtin_subgroup_size_id); + break; + } + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + switch (operation) + { + case GroupOperationReduce: + added_arg_ids.insert(builtin_subgroup_size_id); + break; + case GroupOperationInclusiveScan: + case GroupOperationExclusiveScan: + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + default: + break; + } + break; } default: @@ -4755,7 +4853,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline T spvSubgroupBroadcast(T value, ushort lane)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return quad_broadcast(value, lane);"); else statement("return simd_broadcast(value, lane);"); @@ -4764,7 +4862,7 @@ void CompilerMSL::emit_custom_functions() statement("template<>"); statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return !!quad_broadcast((ushort)value, lane);"); else statement("return !!simd_broadcast((ushort)value, lane);"); @@ -4773,7 +4871,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline vec spvSubgroupBroadcast(vec value, ushort lane)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return (vec)quad_broadcast((vec)value, lane);"); else statement("return (vec)simd_broadcast((vec)value, lane);"); @@ -4785,19 +4883,28 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline T spvSubgroupBroadcastFirst(T value)"); begin_scope(); - statement("return simd_broadcast_first(value);"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_broadcast_first(value);"); + else + statement("return simd_broadcast_first(value);"); end_scope(); statement(""); statement("template<>"); statement("inline bool spvSubgroupBroadcastFirst(bool value)"); begin_scope(); - statement("return !!simd_broadcast_first((ushort)value);"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_broadcast_first((ushort)value);"); + else + statement("return !!simd_broadcast_first((ushort)value);"); end_scope(); statement(""); statement("template"); statement("inline vec spvSubgroupBroadcastFirst(vec value)"); begin_scope(); - statement("return (vec)simd_broadcast_first((vec)value);"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_broadcast_first((vec)value);"); + else + statement("return (vec)simd_broadcast_first((vec)value);"); end_scope(); statement(""); break; @@ -4805,13 +4912,26 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplSubgroupBallot: statement("inline uint4 spvSubgroupBallot(bool value)"); begin_scope(); - statement("simd_vote vote = simd_ballot(value);"); - statement("// simd_ballot() returns a 64-bit integer-like object, but"); - statement("// SPIR-V callers expect a uint4. We must convert."); - statement("// FIXME: This won't include higher bits if Apple ever supports"); - statement("// 128 lanes in an SIMD-group."); - statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> " - "32) & 0xFFFFFFFF), 0, 0);"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + { + statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);"); + } + else if (msl_options.is_ios()) + { + // The current simd_vote on iOS uses a 32-bit integer-like object. + statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);"); + } + else + { + statement("simd_vote vote = simd_ballot(value);"); + statement("// simd_ballot() returns a 64-bit integer-like object, but"); + statement("// SPIR-V callers expect a uint4. We must convert."); + statement("// FIXME: This won't include higher bits if Apple ever supports"); + statement("// 128 lanes in an SIMD-group."); + statement( + "return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> " + "32) & 0xFFFFFFFF), 0, 0);"); + } end_scope(); statement(""); break; @@ -4827,8 +4947,15 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplSubgroupBallotFindLSB: statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } statement("ballot &= mask;"); statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + " "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);"); @@ -4839,8 +4966,15 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplSubgroupBallotFindMSB: statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } statement("ballot &= mask;"); statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - " "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), " @@ -4857,23 +4991,44 @@ void CompilerMSL::emit_custom_functions() statement(""); statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " - "uint2(0));"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " + "uint2(0));"); + } statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); + } statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); @@ -4887,19 +5042,28 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline bool spvSubgroupAllEqual(T value)"); begin_scope(); - statement("return simd_all(all(value == simd_broadcast_first(value)));"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_all(all(value == quad_broadcast_first(value)));"); + else + statement("return simd_all(all(value == simd_broadcast_first(value)));"); end_scope(); statement(""); statement("template<>"); statement("inline bool spvSubgroupAllEqual(bool value)"); begin_scope(); - statement("return simd_all(value) || !simd_any(value);"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_all(value) || !quad_any(value);"); + else + statement("return simd_all(value) || !simd_any(value);"); end_scope(); statement(""); statement("template"); statement("inline bool spvSubgroupAllEqual(vec value)"); begin_scope(); - statement("return simd_all(all(value == (vec)simd_broadcast_first((vec)value)));"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_all(all(value == (vec)quad_broadcast_first((vec)value)));"); + else + statement("return simd_all(all(value == (vec)simd_broadcast_first((vec)value)));"); end_scope(); statement(""); break; @@ -4908,7 +5072,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline T spvSubgroupShuffle(T value, ushort lane)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return quad_shuffle(value, lane);"); else statement("return simd_shuffle(value, lane);"); @@ -4917,7 +5081,7 @@ void CompilerMSL::emit_custom_functions() statement("template<>"); statement("inline bool spvSubgroupShuffle(bool value, ushort lane)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return !!quad_shuffle((ushort)value, lane);"); else statement("return !!simd_shuffle((ushort)value, lane);"); @@ -4926,7 +5090,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline vec spvSubgroupShuffle(vec value, ushort lane)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return (vec)quad_shuffle((vec)value, lane);"); else statement("return (vec)simd_shuffle((vec)value, lane);"); @@ -4938,7 +5102,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline T spvSubgroupShuffleXor(T value, ushort mask)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return quad_shuffle_xor(value, mask);"); else statement("return simd_shuffle_xor(value, mask);"); @@ -4947,7 +5111,7 @@ void CompilerMSL::emit_custom_functions() statement("template<>"); statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return !!quad_shuffle_xor((ushort)value, mask);"); else statement("return !!simd_shuffle_xor((ushort)value, mask);"); @@ -4956,7 +5120,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline vec spvSubgroupShuffleXor(vec value, ushort mask)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return (vec)quad_shuffle_xor((vec)value, mask);"); else statement("return (vec)simd_shuffle_xor((vec)value, mask);"); @@ -4968,7 +5132,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline T spvSubgroupShuffleUp(T value, ushort delta)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return quad_shuffle_up(value, delta);"); else statement("return simd_shuffle_up(value, delta);"); @@ -4977,7 +5141,7 @@ void CompilerMSL::emit_custom_functions() statement("template<>"); statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return !!quad_shuffle_up((ushort)value, delta);"); else statement("return !!simd_shuffle_up((ushort)value, delta);"); @@ -4986,7 +5150,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline vec spvSubgroupShuffleUp(vec value, ushort delta)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return (vec)quad_shuffle_up((vec)value, delta);"); else statement("return (vec)simd_shuffle_up((vec)value, delta);"); @@ -4998,7 +5162,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline T spvSubgroupShuffleDown(T value, ushort delta)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return quad_shuffle_down(value, delta);"); else statement("return simd_shuffle_down(value, delta);"); @@ -5007,7 +5171,7 @@ void CompilerMSL::emit_custom_functions() statement("template<>"); statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return !!quad_shuffle_down((ushort)value, delta);"); else statement("return !!simd_shuffle_down((ushort)value, delta);"); @@ -5016,7 +5180,7 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline vec spvSubgroupShuffleDown(vec value, ushort delta)"); begin_scope(); - if (msl_options.is_ios()) + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) statement("return (vec)quad_shuffle_down((vec)value, delta);"); else statement("return (vec)simd_shuffle_down((vec)value, delta);"); @@ -6697,8 +6861,8 @@ void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t // Assume an access chain into a struct variable. assert(var_type.basetype == SPIRType::Struct); auto &c = get(ops[3 + var_type.array.size()]); - interface_index = get_extended_member_decoration(var->self, c.scalar(), - SPIRVCrossDecorationInterfaceMemberIndex); + interface_index = + get_extended_member_decoration(var->self, c.scalar(), SPIRVCrossDecorationInterfaceMemberIndex); } // Accumulate indices. We'll have to skip over the one for the struct, if present, because we already accounted // for that getting the base index. @@ -7645,6 +7809,10 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin // Use the wider of the two scopes (smaller value) exe_scope = min(exe_scope, mem_scope); + if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem) + // In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop. + return; + string bar_stmt; if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2)) bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier"; @@ -8105,7 +8273,8 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, } emit_op(result_type, id, join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), - ".interpolate_at_centroid()", component), should_forward(args[0])); + ".interpolate_at_centroid()", component), + should_forward(args[0])); break; } @@ -8326,7 +8495,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) // Fake arrays returns by writing to an out array instead. decl += "thread "; decl += type_to_glsl(type); - decl += " (&SPIRV_Cross_return_value)"; + decl += " (&spvReturnValue)"; decl += type_to_array_glsl(type); if (!func.arguments.empty()) decl += ", "; @@ -8913,8 +9082,8 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool } else if (!msl_options.supports_msl_version(2, 3)) { - SPIRV_CROSS_THROW( - "Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported on macOS prior to MSL 2.3."); + SPIRV_CROSS_THROW("Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported " + "on macOS prior to MSL 2.3."); } } } @@ -8978,13 +9147,8 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool if (args.min_lod) { - if (msl_options.is_macos()) - { - if (!msl_options.supports_msl_version(2, 2)) - SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up on macOS."); - } - else if (msl_options.is_ios()) - SPIRV_CROSS_THROW("min_lod_clamp() is not supported on iOS."); + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up."); forward = forward && should_forward(args.min_lod); farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")"; @@ -9824,9 +9988,11 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInPrimitiveId: if (msl_options.multi_patch_workgroup) return ""; - /* fallthrough */ + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage case BuiltInSubgroupSize: // FIXME: Should work in any stage + if (msl_options.emulate_subgroups) + return ""; return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInPatchVertices: return ""; @@ -10034,15 +10200,18 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { switch (builtin) { + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage + case BuiltInSubgroupSize: // FIXME: Should work in any stage + if (msl_options.emulate_subgroups) + break; + /* fallthrough */ case BuiltInGlobalInvocationId: case BuiltInWorkgroupId: case BuiltInNumWorkgroups: case BuiltInLocalInvocationId: case BuiltInLocalInvocationIndex: - case BuiltInNumSubgroups: - case BuiltInSubgroupId: - case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage - case BuiltInSubgroupSize: // FIXME: Should work in any stage return string(" [[") + builtin_qualifier(builtin) + "]]"; default: @@ -10308,6 +10477,10 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) case BuiltInViewIndex: return get_execution_model() == ExecutionModelFragment && msl_options.multiview && msl_options.multiview_layered_rendering; + // Compute function in + case BuiltInSubgroupId: + case BuiltInNumSubgroups: + return !msl_options.emulate_subgroups; // Any stage function in case BuiltInDeviceIndex: case BuiltInSubgroupEqMask: @@ -10316,15 +10489,27 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) case BuiltInSubgroupLeMask: case BuiltInSubgroupLtMask: return false; - case BuiltInSubgroupLocalInvocationId: case BuiltInSubgroupSize: - return get_execution_model() == ExecutionModelGLCompute || - (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)); + if (msl_options.fixed_subgroup_size != 0) + return false; + /* fallthrough */ + case BuiltInSubgroupLocalInvocationId: + return !msl_options.emulate_subgroups; default: return true; } } +// Returns true if this is a fragment shader that runs per sample, and false otherwise. +bool CompilerMSL::is_sample_rate() const +{ + auto &caps = get_declared_capabilities(); + return get_execution_model() == ExecutionModelFragment && + (msl_options.force_sample_rate_shading || + std::find(caps.begin(), caps.end(), CapabilitySampleRateShading) != caps.end() || + (msl_options.use_framebuffer_fetch_subpasses && need_subpass_input)); +} + void CompilerMSL::entry_point_args_builtin(string &ep_args) { // Builtin variables @@ -10867,7 +11052,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() uint32_t var_id = var.self; BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; - if (var.storage == StorageClassInput && is_builtin_variable(var)) + if (var.storage == StorageClassInput && is_builtin_variable(var) && active_input_builtins.get(bi_type)) { switch (bi_type) { @@ -10877,9 +11062,18 @@ void CompilerMSL::fix_up_shader_inputs_outputs() to_expression(builtin_sample_id_id), ");"); }); break; + case BuiltInFragCoord: + if (is_sample_rate()) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), ".xy += get_sample_position(", + to_expression(builtin_sample_id_id), ") - 0.5;"); + }); + } + break; case BuiltInHelperInvocation: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS."); else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); @@ -10930,67 +11124,78 @@ void CompilerMSL::fix_up_shader_inputs_outputs() entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); }); } break; - case BuiltInSubgroupLocalInvocationId: - // This is natively supported in compute shaders. - if (get_execution_model() == ExecutionModelGLCompute) + case BuiltInSubgroupId: + if (!msl_options.emulate_subgroups) break; - - // This is natively supported in fragment shaders in MSL 2.2. - if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) - break; - - if (msl_options.is_ios()) - SPIRV_CROSS_THROW( - "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS."); - - if (!msl_options.supports_msl_version(2, 1)) - SPIRV_CROSS_THROW( - "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1."); - - // Shaders other than compute shaders don't support the SIMD-group - // builtins directly, but we can emulate them using the SIMD-group - // functions. This might break if some of the subgroup terminated - // before reaching the entry point. + // For subgroup emulation, this is the same as the local invocation index. entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = simd_prefix_exclusive_sum(1);"); + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_local_invocation_index_id), ";"); }); break; - case BuiltInSubgroupSize: - // This is natively supported in compute shaders. - if (get_execution_model() == ExecutionModelGLCompute) + case BuiltInNumSubgroups: + if (!msl_options.emulate_subgroups) break; - - // This is natively supported in fragment shaders in MSL 2.2. - if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + // For subgroup emulation, this is the same as the workgroup size. + entry_func.fixup_hooks_in.push_back([=]() { + auto &type = expression_type(builtin_workgroup_size_id); + string size_expr = to_expression(builtin_workgroup_size_id); + if (type.vecsize >= 3) + size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z"); + else if (type.vecsize == 2) + size_expr = join(size_expr, ".x * ", size_expr, ".y"); + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";"); + }); + break; + case BuiltInSubgroupLocalInvocationId: + if (!msl_options.emulate_subgroups) break; - - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS."); - - if (!msl_options.supports_msl_version(2, 1)) - SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1."); - + // For subgroup emulation, assume subgroups of size 1. entry_func.fixup_hooks_in.push_back( - [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); }); + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); }); + break; + case BuiltInSubgroupSize: + if (msl_options.emulate_subgroups) + { + // For subgroup emulation, assume subgroups of size 1. + entry_func.fixup_hooks_in.push_back( + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); }); + } + else if (msl_options.fixed_subgroup_size != 0) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.fixed_subgroup_size, ";"); + }); + } break; case BuiltInSubgroupEqMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", - to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (", - to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", - to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (", + to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + } }); break; case BuiltInSubgroupGeMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + if (msl_options.fixed_subgroup_size != 0) + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { // Case where index < 32, size < 32: // mask0 = bfi(0, 0xFFFFFFFF, index, size - index); @@ -11006,60 +11211,149 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // This is further complicated by the fact that if you attempt // to bfi/bfe out-of-bounds on Metal, undefined behavior is the // result. - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", - to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", - to_expression(builtin_subgroup_size_id), ", 32) - (int)", - to_expression(builtin_subgroup_invocation_id_id), - ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", - to_expression(builtin_subgroup_size_id), " - (int)max(", - to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); + if (msl_options.fixed_subgroup_size > 32) + { + // Don't use the subgroup size variable with fixed subgroup sizes, + // since the variables could be defined in the wrong order. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), insert_bits(0u, 0xFFFFFFFF," + " (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ", + msl_options.fixed_subgroup_size, " - max(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), uint2(0));"); + } + else if (msl_options.fixed_subgroup_size != 0) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), ", ", + msl_options.fixed_subgroup_size, " - ", + to_expression(builtin_subgroup_invocation_id_id), + "), uint3(0));"); + } + else if (msl_options.is_ios()) + { + // On iOS, the SIMD-group size will currently never exceed 32. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), ", ", + to_expression(builtin_subgroup_size_id), " - ", + to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); + } }); break; case BuiltInSubgroupGtMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { // The same logic applies here, except now the index is one // more than the subgroup invocation ID. - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", - to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", - to_expression(builtin_subgroup_size_id), ", 32) - (int)", - to_expression(builtin_subgroup_invocation_id_id), - " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", - to_expression(builtin_subgroup_size_id), " - (int)max(", - to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); + if (msl_options.fixed_subgroup_size > 32) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ", + msl_options.fixed_subgroup_size, " - max(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), uint2(0));"); + } + else if (msl_options.fixed_subgroup_size != 0) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1, ", + msl_options.fixed_subgroup_size, " - ", + to_expression(builtin_subgroup_invocation_id_id), + " - 1), uint3(0));"); + } + else if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1, ", + to_expression(builtin_subgroup_size_id), " - ", + to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); + } }); break; case BuiltInSubgroupLeMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, 0, min(", - to_expression(builtin_subgroup_invocation_id_id), - " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));"); + } }); break; case BuiltInSubgroupLtMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, 0, min(", - to_expression(builtin_subgroup_invocation_id_id), - ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, ", + to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); + } }); break; case BuiltInViewIndex: @@ -11224,7 +11518,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() break; } } - else if (var.storage == StorageClassOutput && is_builtin_variable(var)) + else if (var.storage == StorageClassOutput && is_builtin_variable(var) && active_output_builtins.get(bi_type)) { if (bi_type == BuiltInSampleMask && get_execution_model() == ExecutionModelFragment && msl_options.additional_fixed_sample_mask != 0xffffffff) @@ -11985,7 +12279,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::ControlPointArray: return join("patch_control_point<", type_to_glsl(get(type.parent_type), id), ">"); - + case SPIRType::Interpolant: return join("interpolant<", type_to_glsl(get(type.parent_type), id), ", interpolation::", has_decoration(type.self, DecorationNoPerspective) ? "no_perspective" : "perspective", ">"); @@ -12348,10 +12642,23 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) const uint32_t *ops = stream(i); auto op = static_cast(i.op); - // Metal 2.0 is required. iOS only supports quad ops. macOS only supports - // broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1). - // Note that iOS makes no distinction between a quad-group and a subgroup; - // all subgroups are quad-groups there. + if (msl_options.emulate_subgroups) + { + // In this mode, only the GroupNonUniform cap is supported. The only op + // we need to handle, then, is OpGroupNonUniformElect. + if (op != OpGroupNonUniformElect) + SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect."); + // In this mode, the subgroup size is assumed to be one, so every invocation + // is elected. + emit_op(ops[0], ops[1], "true", true); + return; + } + + // Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with + // full support in 13.0 (2.2). macOS only supports broadcast and shuffle on + // 10.13 (2.0), with full support in 10.14 (2.1). + // Note that Apple GPUs before A13 make no distinction between a quad-group + // and a SIMD-group; all SIMD-groups are quad-groups on those. if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up."); @@ -12360,12 +12667,32 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); - if (msl_options.is_ios()) + if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions)) { switch (op) { default: - SPIRV_CROSS_THROW("iOS only supports quad-group operations."); + SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up."); + case OpGroupNonUniformBroadcastFirst: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformElect: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformAny: + case OpGroupNonUniformAll: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBallotBitCount: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up."); + break; case OpGroupNonUniformBroadcast: case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: @@ -12402,7 +12729,10 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) switch (op) { case OpGroupNonUniformElect: - emit_op(result_type, id, "simd_is_first()", true); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + emit_op(result_type, id, "quad_is_first()", false); + else + emit_op(result_type, id, "simd_is_first()", false); break; case OpGroupNonUniformBroadcast: @@ -12473,11 +12803,17 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) break; case OpGroupNonUniformAll: - emit_unary_func_op(result_type, id, ops[3], "simd_all"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + emit_unary_func_op(result_type, id, ops[3], "quad_all"); + else + emit_unary_func_op(result_type, id, ops[3], "simd_all"); break; case OpGroupNonUniformAny: - emit_unary_func_op(result_type, id, ops[3], "simd_any"); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + emit_unary_func_op(result_type, id, ops[3], "quad_any"); + else + emit_unary_func_op(result_type, id, ops[3], "simd_any"); break; case OpGroupNonUniformAllEqual: @@ -12872,8 +13208,8 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case ExecutionModelTessellationEvaluation: return "patch_id"; case ExecutionModelFragment: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("PrimitiveId is not supported in fragment on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("PrimitiveId on iOS requires MSL 2.3."); else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2."); return "primitive_id"; @@ -12941,6 +13277,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) return "thread_index_in_threadgroup"; case BuiltInSubgroupSize: + if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0) + // Shouldn't be reached. + SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??"); if (execution.model == ExecutionModelFragment) { if (!msl_options.supports_msl_version(2, 2)) @@ -12955,16 +13294,25 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) } case BuiltInNumSubgroups: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation."); if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup"; case BuiltInSubgroupId: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation."); if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup"; case BuiltInSubgroupLocalInvocationId: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation."); if (execution.model == ExecutionModelFragment) { if (!msl_options.supports_msl_version(2, 2)) @@ -12988,16 +13336,16 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInBaryCoordNV: // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Barycentrics not supported on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); return "barycentric_coord, center_perspective"; case BuiltInBaryCoordNoPerspNV: // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Barycentrics not supported on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); return "barycentric_coord, center_no_perspective"; diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index a26047e04..61d4fbeab 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -364,6 +364,28 @@ public: // and will be addressed using the current ViewIndex. bool arrayed_subpass_input = false; + // Whether to use SIMD-group or quadgroup functions to implement group nnon-uniform + // operations. Some GPUs on iOS do not support the SIMD-group functions, only the + // quadgroup functions. + bool ios_use_simdgroup_functions = false; + + // If set, the subgroup size will be assumed to be one, and subgroup-related + // builtins and operations will be emitted accordingly. This mode is intended to + // be used by MoltenVK on hardware/software configurations which do not provide + // sufficient support for subgroups. + bool emulate_subgroups = false; + + // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, + // allows the SIMD-group size (aka thread execution width) to vary depending on + // register usage and requirements. In certain circumstances--for example, a pipeline + // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- + // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of + // mapping it to the Metal builtin [[thread_execution_width]]. If the thread + // execution width is reduced, the extra invocations will appear to be inactive. + // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped + // to the Metal [[thread_execution_width]] builtin. + uint32_t fixed_subgroup_size = 0; + enum class IndexType { None = 0, @@ -379,6 +401,11 @@ public: // different shaders for these three scenarios. IndexType vertex_index_type = IndexType::None; + // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. + // This will force the shader to run at sample rate, assuming Metal does not optimize + // the extra threads away. + bool force_sample_rate_shading = false; + bool is_ios() const { return platform == iOS; @@ -776,6 +803,7 @@ protected: std::string to_sampler_expression(uint32_t id); std::string to_swizzle_expression(uint32_t id); std::string to_buffer_size_expression(uint32_t id); + bool is_sample_rate() const; bool is_direct_input_builtin(spv::BuiltIn builtin); std::string builtin_qualifier(spv::BuiltIn builtin); std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); @@ -853,6 +881,8 @@ protected: uint32_t builtin_subgroup_size_id = 0; uint32_t builtin_dispatch_base_id = 0; uint32_t builtin_stage_input_size_id = 0; + uint32_t builtin_local_invocation_index_id = 0; + uint32_t builtin_workgroup_size_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0;