From 6ca5060ce8fe121dc95d4cb99b5242f447f78030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Fri, 14 May 2021 19:37:10 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 95 ++ 3rdparty/spirv-cross/spirv_common.hpp | 24 + 3rdparty/spirv-cross/spirv_cross.cpp | 120 +- 3rdparty/spirv-cross/spirv_cross.hpp | 27 +- 3rdparty/spirv-cross/spirv_cross_c.cpp | 100 ++ 3rdparty/spirv-cross/spirv_cross_c.h | 26 +- 3rdparty/spirv-cross/spirv_glsl.cpp | 598 ++++++---- 3rdparty/spirv-cross/spirv_glsl.hpp | 43 +- 3rdparty/spirv-cross/spirv_hlsl.cpp | 148 +-- 3rdparty/spirv-cross/spirv_msl.cpp | 1479 +++++++++++++++++------- 3rdparty/spirv-cross/spirv_msl.hpp | 66 +- 11 files changed, 1976 insertions(+), 750 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index 2b99022e7..953abfadc 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -285,6 +285,61 @@ static bool write_string_to_file(const char *path, const char *string) #pragma warning(pop) #endif +static void print_resources(const Compiler &compiler, spv::StorageClass storage, + const SmallVector &resources) +{ + fprintf(stderr, "%s\n", storage == StorageClassInput ? "builtin inputs" : "builtin outputs"); + fprintf(stderr, "=============\n\n"); + for (auto &res : resources) + { + bool active = compiler.has_active_builtin(res.builtin, storage); + const char *basetype = "?"; + auto &type = compiler.get_type(res.value_type_id); + switch (type.basetype) + { + case SPIRType::Float: basetype = "float"; break; + case SPIRType::Int: basetype = "int"; break; + case SPIRType::UInt: basetype = "uint"; break; + default: break; + } + + uint32_t array_size = 0; + bool array_size_literal = false; + if (!type.array.empty()) + { + array_size = type.array.front(); + array_size_literal = type.array_size_literal.front(); + } + + string type_str = basetype; + if (type.vecsize > 1) + type_str += std::to_string(type.vecsize); + + if (array_size) + { + if (array_size_literal) + type_str += join("[", array_size, "]"); + else + type_str += join("[", array_size, " (spec constant ID)]"); + } + + string builtin_str; + switch (res.builtin) + { + case spv::BuiltInPosition: builtin_str = "Position"; break; + case spv::BuiltInPointSize: builtin_str = "PointSize"; break; + case spv::BuiltInCullDistance: builtin_str = "CullDistance"; break; + case spv::BuiltInClipDistance: builtin_str = "ClipDistance"; break; + case spv::BuiltInTessLevelInner: builtin_str = "TessLevelInner"; break; + case spv::BuiltInTessLevelOuter: builtin_str = "TessLevelOuter"; break; + default: builtin_str = string("builtin #") + to_string(res.builtin); + } + + fprintf(stderr, "Builtin %s (%s) (active: %s).\n", builtin_str.c_str(), type_str.c_str(), active ? "yes" : "no"); + } + fprintf(stderr, "=============\n\n"); +} + static void print_resources(const Compiler &compiler, const char *tag, const SmallVector &resources) { fprintf(stderr, "%s\n", tag); @@ -475,6 +530,8 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res print_resources(compiler, "push", res.push_constant_buffers); print_resources(compiler, "counters", res.atomic_counters); print_resources(compiler, "acceleration structures", res.acceleration_structures); + print_resources(compiler, spv::StorageClassInput, res.builtin_inputs); + print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs); } static void print_push_constant_resources(const Compiler &compiler, const SmallVector &res) @@ -621,6 +678,8 @@ struct CLIArguments SmallVector variable_type_remaps; SmallVector interface_variable_renames; SmallVector hlsl_attr_remap; + SmallVector> masked_stage_outputs; + SmallVector masked_stage_builtins; string entry; string entry_stage; @@ -845,6 +904,11 @@ static void print_help_common() "\t\tGLSL: Rewrites [0, w] Z range (D3D/Metal/Vulkan) to GL-style [-w, w].\n" "\t\tHLSL/MSL: Rewrites [-w, w] Z range (GL) to D3D/Metal/Vulkan-style [0, w].\n" "\t[--flip-vert-y]:\n\t\tInverts gl_Position.y (or equivalent) at the end of a vertex shader. This is equivalent to using negative viewport height.\n" + "\t[--mask-stage-output-location ]:\n" + "\t\tIf a stage output variable with matching location and component is active, optimize away the variable if applicable.\n" + "\t[--mask-stage-output-builtin ]:\n" + "\t\tIf a stage output variable with matching builtin is active, " + "optimize away the variable if it can affect cross-stage linking correctness.\n" ); // clang-format on } @@ -1103,6 +1167,11 @@ static string compile_iteration(const CLIArguments &args, std::vector compiler->set_variable_type_remap_callback(move(remap_cb)); } + for (auto &masked : args.masked_stage_outputs) + compiler->mask_stage_output_by_location(masked.first, masked.second); + for (auto &masked : args.masked_stage_builtins) + compiler->mask_stage_output_by_builtin(masked); + for (auto &rename : args.entry_point_rename) compiler->rename_entry_point(rename.old_name, rename.new_name, rename.execution_model); @@ -1346,6 +1415,7 @@ static string compile_iteration(const CLIArguments &args, std::vector if (args.dump_resources) { + compiler->update_active_builtins(); print_resources(*compiler, res); print_push_constant_resources(*compiler, res.push_constant_buffers); print_spec_constants(*compiler); @@ -1571,6 +1641,31 @@ static int main_inner(int argc, char *argv[]) cbs.add("--no-support-nonzero-baseinstance", [&](CLIParser &) { args.support_nonzero_baseinstance = false; }); cbs.add("--emit-line-directives", [&args](CLIParser &) { args.emit_line_directives = true; }); + cbs.add("--mask-stage-output-location", [&](CLIParser &parser) { + uint32_t location = parser.next_uint(); + uint32_t component = parser.next_uint(); + args.masked_stage_outputs.push_back({ location, component }); + }); + + cbs.add("--mask-stage-output-builtin", [&](CLIParser &parser) { + BuiltIn masked_builtin = BuiltInMax; + std::string builtin = parser.next_string(); + if (builtin == "Position") + masked_builtin = BuiltInPosition; + else if (builtin == "PointSize") + masked_builtin = BuiltInPointSize; + else if (builtin == "CullDistance") + masked_builtin = BuiltInCullDistance; + else if (builtin == "ClipDistance") + masked_builtin = BuiltInClipDistance; + else + { + print_help(); + exit(EXIT_FAILURE); + } + args.masked_stage_builtins.push_back(masked_builtin); + }); + cbs.default_handler = [&args](const char *value) { args.input = value; }; cbs.add("-", [&args](CLIParser &) { args.input = "-"; }); cbs.error_handler = [] { print_help(); }; diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index a7a84188f..f0024d7ed 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -1763,6 +1763,22 @@ struct SetBindingPair } }; +struct LocationComponentPair +{ + uint32_t location; + uint32_t component; + + inline bool operator==(const LocationComponentPair &other) const + { + return location == other.location && component == other.component; + } + + inline bool operator<(const LocationComponentPair &other) const + { + return location < other.location || (location == other.location && component < other.component); + } +}; + struct StageSetBinding { spv::ExecutionModel model; @@ -1785,6 +1801,14 @@ struct InternalHasher return (hash_set * 0x10001b31) ^ hash_binding; } + inline size_t operator()(const LocationComponentPair &value) const + { + // Quality of hash doesn't really matter here. + auto hash_set = std::hash()(value.location); + auto hash_binding = std::hash()(value.component); + return (hash_set * 0x10001b31) ^ hash_binding; + } + inline size_t operator()(const StageSetBinding &value) const { // Quality of hash doesn't really matter here. diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index f024fa78b..e948820d2 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -284,31 +284,6 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) return var; } -StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr) -{ - auto *var = maybe_get_backing_variable(ptr); - - // If the expression has been lowered to a temporary, we need to use the Generic storage class. - // We're looking for the effective storage class of a given expression. - // An access chain or forwarded OpLoads from such access chains - // will generally have the storage class of the underlying variable, but if the load was not forwarded - // we have lost any address space qualifiers. - bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && - (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); - - if (var && !forced_temporary) - { - // Normalize SSBOs to StorageBuffer here. - if (var->storage == StorageClassUniform && - has_decoration(get(var->basetype).self, DecorationBufferBlock)) - return StorageClassStorageBuffer; - else - return var->storage; - } - else - return expression_type(ptr).storage; -} - void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) { auto &e = get(expr); @@ -853,19 +828,79 @@ ShaderResources Compiler::get_shader_resources(const unordered_set * // It is possible for uniform storage classes to be passed as function parameters, so detect // that. To detect function parameters, check of StorageClass of variable is function scope. - if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var)) + if (var.storage == StorageClassFunction || !type.pointer) return; if (active_variables && active_variables->find(var.self) == end(*active_variables)) return; + // In SPIR-V 1.4 and up, every global must be present in the entry point interface list, + // not just IO variables. + bool active_in_entry_point = true; + if (ir.get_spirv_version() < 0x10400) + { + if (var.storage == StorageClassInput || var.storage == StorageClassOutput) + active_in_entry_point = interface_variable_exists_in_entry_point(var.self); + } + else + active_in_entry_point = interface_variable_exists_in_entry_point(var.self); + + if (!active_in_entry_point) + return; + + bool is_builtin = is_builtin_variable(var); + + if (is_builtin) + { + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs; + BuiltInResource resource; + + if (has_decoration(type.self, DecorationBlock)) + { + resource.resource = { var.self, var.basetype, type.self, + get_remapped_declared_block_name(var.self, false) }; + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + resource.value_type_id = type.member_types[i]; + resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); + list.push_back(resource); + } + } + else + { + bool strip_array = + !has_decoration(var.self, DecorationPatch) && ( + get_execution_model() == ExecutionModelTessellationControl || + (get_execution_model() == ExecutionModelTessellationEvaluation && + var.storage == StorageClassInput)); + + resource.resource = { var.self, var.basetype, type.self, get_name(var.self) }; + + if (strip_array && !type.array.empty()) + resource.value_type_id = get_variable_data_type(var).parent_type; + else + resource.value_type_id = get_variable_data_type_id(var); + + assert(resource.value_type_id); + + resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + list.push_back(std::move(resource)); + } + return; + } + // Input - if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self)) + if (var.storage == StorageClassInput) { if (has_decoration(type.self, DecorationBlock)) { res.stage_inputs.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + { var.self, var.basetype, type.self, + get_remapped_declared_block_name(var.self, false) }); } else res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -876,12 +911,12 @@ ShaderResources Compiler::get_shader_resources(const unordered_set * res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Outputs - else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self)) + else if (var.storage == StorageClassOutput) { if (has_decoration(type.self, DecorationBlock)) { res.stage_outputs.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } else res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -3185,6 +3220,29 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 break; } + case OpSelect: + { + // In case of variable pointers, we might access a variable here. + // We cannot prove anything about these accesses however. + for (uint32_t i = 1; i < length; i++) + { + if (i >= 3) + { + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + // Assume we can get partial writes to this variable. + partial_write_variables_to_block[var->self].insert(current_block->self); + } + } + + // Might try to copy a Phi variable here. + notify_variable_access(args[i], current_block->self); + } + break; + } + case OpExtInst: { for (uint32_t i = 4; i < length; i++) @@ -4071,7 +4129,7 @@ void Compiler::update_active_builtins() } // Returns whether this shader uses a builtin of the storage class -bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) +bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const { const Bitset *flags; switch (storage) diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index 84e23ca30..764c6c47b 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -59,6 +59,27 @@ struct Resource std::string name; }; +struct BuiltInResource +{ + // This is mostly here to support reflection of builtins such as Position/PointSize/CullDistance/ClipDistance. + // This needs to be different from Resource since we can collect builtins from blocks. + // A builtin present here does not necessarily mean it's considered an active builtin, + // since variable ID "activeness" is only tracked on OpVariable level, not Block members. + // For that, update_active_builtins() -> has_active_builtin() can be used to further refine the reflection. + spv::BuiltIn builtin; + + // This is the actual value type of the builtin. + // Typically float4, float, array for the gl_PerVertex builtins. + // If the builtin is a control point, the control point array type will be stripped away here as appropriate. + TypeID value_type_id; + + // This refers to the base resource which contains the builtin. + // If resource is a Block, it can hold multiple builtins, or it might not be a block. + // For advanced reflection scenarios, all information in builtin/value_type_id can be deduced, + // it's just more convenient this way. + Resource resource; +}; + struct ShaderResources { SmallVector uniform_buffers; @@ -79,6 +100,9 @@ struct ShaderResources // these correspond to separate texture2D and samplers respectively. SmallVector separate_images; SmallVector separate_samplers; + + SmallVector builtin_inputs; + SmallVector builtin_outputs; }; struct CombinedImageSampler @@ -324,7 +348,7 @@ public: // Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader. void update_active_builtins(); - bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage); + bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage) const; // Query and modify OpExecutionMode. const Bitset &get_execution_mode_bitset() const; @@ -647,7 +671,6 @@ protected: bool expression_is_lvalue(uint32_t id) const; bool variable_storage_is_aliased(const SPIRVariable &var); SPIRVariable *maybe_get_backing_variable(uint32_t chain); - spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); void register_read(uint32_t expr, uint32_t chain, bool forwarded); void register_write(uint32_t chain); diff --git a/3rdparty/spirv-cross/spirv_cross_c.cpp b/3rdparty/spirv-cross/spirv_cross_c.cpp index 730bfa3d6..866c25d10 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.cpp +++ b/3rdparty/spirv-cross/spirv_cross_c.cpp @@ -197,8 +197,11 @@ struct spvc_resources_s : ScratchMemoryAllocation SmallVector separate_images; SmallVector separate_samplers; SmallVector acceleration_structures; + SmallVector builtin_inputs; + SmallVector builtin_outputs; bool copy_resources(SmallVector &outputs, const SmallVector &inputs); + bool copy_resources(SmallVector &outputs, const SmallVector &inputs); bool copy_resources(const ShaderResources &resources); }; @@ -818,6 +821,44 @@ spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spv #endif } +spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler, + unsigned location, unsigned component) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->mask_stage_output_by_location(location, component); + return SPVC_SUCCESS; +#else + (void)location; + (void)component; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->mask_stage_output_by_builtin(spv::BuiltIn(builtin)); + return SPVC_SUCCESS; +#else + (void)builtin; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, const spvc_hlsl_root_constants *constant_info, size_t count) @@ -1551,6 +1592,30 @@ bool spvc_resources_s::copy_resources(SmallVector &outp return true; } +bool spvc_resources_s::copy_resources(SmallVector &outputs, + const SmallVector &inputs) +{ + for (auto &i : inputs) + { + spvc_reflected_builtin_resource br; + + br.value_type_id = i.value_type_id; + br.builtin = SpvBuiltIn(i.builtin); + + auto &r = br.resource; + r.base_type_id = i.resource.base_type_id; + r.type_id = i.resource.type_id; + r.id = i.resource.id; + r.name = context->allocate_name(i.resource.name); + if (!r.name) + return false; + + outputs.push_back(br); + } + + return true; +} + bool spvc_resources_s::copy_resources(const ShaderResources &resources) { if (!copy_resources(uniform_buffers, resources.uniform_buffers)) @@ -1577,6 +1642,10 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources) return false; if (!copy_resources(acceleration_structures, resources.acceleration_structures)) return false; + if (!copy_resources(builtin_inputs, resources.builtin_inputs)) + return false; + if (!copy_resources(builtin_outputs, resources.builtin_outputs)) + return false; return true; } @@ -1735,6 +1804,37 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, return SPVC_SUCCESS; } +spvc_result spvc_resources_get_builtin_resource_list_for_type( + spvc_resources resources, spvc_builtin_resource_type type, + const spvc_reflected_builtin_resource **resource_list, + size_t *resource_size) +{ + const SmallVector *list = nullptr; + switch (type) + { + case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT: + list = &resources->builtin_inputs; + break; + + case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT: + list = &resources->builtin_outputs; + break; + + default: + break; + } + + if (!list) + { + resources->context->report_error("Invalid argument."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + *resource_size = list->size(); + *resource_list = list->data(); + return SPVC_SUCCESS; +} + void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, unsigned argument) { compiler->compiler->set_decoration(id, static_cast(decoration), argument); diff --git a/3rdparty/spirv-cross/spirv_cross_c.h b/3rdparty/spirv-cross/spirv_cross_c.h index 12f988019..1495a7eac 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.h +++ b/3rdparty/spirv-cross/spirv_cross_c.h @@ -40,7 +40,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 46 +#define SPVC_C_API_VERSION_MINOR 47 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -99,6 +99,13 @@ typedef struct spvc_reflected_resource const char *name; } spvc_reflected_resource; +typedef struct spvc_reflected_builtin_resource +{ + SpvBuiltIn builtin; + spvc_type_id value_type_id; + spvc_reflected_resource resource; +} spvc_reflected_builtin_resource; + /* See C++ API. */ typedef struct spvc_entry_point { @@ -221,6 +228,14 @@ typedef enum spvc_resource_type SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff } spvc_resource_type; +typedef enum spvc_builtin_resource_type +{ + SPVC_BUILTIN_RESOURCE_TYPE_UNKNOWN = 0, + SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT = 1, + SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT = 2, + SPVC_BUILTIN_RESOURCE_TYPE_INT_MAX = 0x7fffffff +} spvc_builtin_resource_type; + /* Maps to spirv_cross::SPIRType::BaseType. */ typedef enum spvc_basetype { @@ -722,6 +737,10 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler com SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler, + unsigned location, unsigned component); +SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin); + /* * HLSL specifics. * Maps to C++ API. @@ -805,6 +824,11 @@ SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resou const spvc_reflected_resource **resource_list, size_t *resource_size); +SPVC_PUBLIC_API spvc_result spvc_resources_get_builtin_resource_list_for_type( + spvc_resources resources, spvc_builtin_resource_type type, + const spvc_reflected_builtin_resource **resource_list, + size_t *resource_size); + /* * Decorations. * Maps to C++ API. diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 31ff85042..a8d0925fc 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -559,18 +559,19 @@ string CompilerGLSL::compile() { ir.fixup_reserved_names(); - if (options.vulkan_semantics) - backend.allow_precision_qualifiers = true; - else + if (!options.vulkan_semantics) { // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers backend.nonuniform_qualifier = ""; backend.needs_row_major_load_workaround = true; } + backend.allow_precision_qualifiers = options.vulkan_semantics || options.es; backend.force_gl_in_out_block = true; backend.supports_extensions = true; backend.use_array_constructor = true; + backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320); + if (is_legacy_es()) backend.support_case_fallthrough = false; @@ -764,6 +765,8 @@ void CompilerGLSL::emit_header() { statement("#if defined(GL_AMD_gpu_shader_int16)"); statement("#extension GL_AMD_gpu_shader_int16 : require"); + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); statement("#else"); statement("#error No extension available for Int16."); statement("#endif"); @@ -4395,6 +4398,16 @@ string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_ return to_unpacked_expression(id); } +string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) +{ + string expr = to_expression(id); + + if (has_decoration(id, DecorationNonUniform)) + convert_non_uniform_expression(expr, id); + + return expr; +} + string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) { auto itr = invalid_expressions.find(id); @@ -5533,7 +5546,12 @@ void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint3 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { - bool forward = should_forward(op0) && should_forward(op1); + // Various FP arithmetic opcodes such as add, sub, mul will hit this. + bool force_temporary_precise = backend.support_precise_qualifier && + has_decoration(result_id, DecorationNoContraction) && + type_is_floating_point(get(result_type)); + bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise; + emit_op(result_type, result_id, join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); @@ -5712,6 +5730,27 @@ void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, inherit_expression_dependencies(result_id, op1); } +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, + join(op, "(", to_non_uniform_aware_expression(op0), ", ", + to_unpacked_expression(op1), ")"), false); + flush_all_atomic_capable_variables(); +} + +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, + uint32_t op0, uint32_t op1, uint32_t op2, + const char *op) +{ + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, + join(op, "(", to_non_uniform_aware_expression(op0), ", ", + to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false); + flush_all_atomic_capable_variables(); +} + void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) { @@ -6214,7 +6253,7 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s { // Keep track of the array indices we have used to load the image. // We'll need to use the same array index into the combined image sampler array. - auto image_expr = to_expression(image_id); + auto image_expr = to_non_uniform_aware_expression(image_id); string array_expr; auto array_index = image_expr.find_first_of('['); if (array_index != string::npos) @@ -6442,20 +6481,8 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool auto &result_type = get(result_type_id); inherited_expressions.push_back(coord); - - // Make sure non-uniform decoration is back-propagated to where it needs to be. - if (has_decoration(img, DecorationNonUniformEXT)) - { - // In Vulkan GLSL, we cannot back-propgate nonuniform qualifiers if we - // use a combined image sampler constructor. - // We're only interested in back-propagating if we can trace back through access chains. - // If not, we will apply nonuniform to the sampled image expression itself. - auto *backing = maybe_get_backing_variable(img); - if (backing) - propagate_nonuniform_qualifier(img); - else - nonuniform_expression = true; - } + if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img)) + nonuniform_expression = true; switch (op) { @@ -6794,7 +6821,7 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) // Don't need to consider Shadow state since the dummy sampler is always non-shadow. auto sampled_type = type; sampled_type.basetype = SPIRType::SampledImage; - return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ", + return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ", to_expression(dummy_sampler_id), ")"); } else @@ -6814,7 +6841,7 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) } } - return to_expression(id); + return to_non_uniform_aware_expression(id); } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. @@ -6827,7 +6854,7 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool if (args.base.is_fetch) farg_str = convert_separate_image_to_expression(img); else - farg_str = to_expression(img); + farg_str = to_non_uniform_aware_expression(img); if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos) { @@ -8317,12 +8344,35 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) SPIRV_CROSS_THROW("Stencil export not supported in GLES."); } + case BuiltInPrimitiveShadingRateKHR: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL."); + require_extension_internal("GL_EXT_fragment_shading_rate"); + return "gl_PrimitiveShadingRateEXT"; + } + + case BuiltInShadingRateKHR: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL."); + require_extension_internal("GL_EXT_fragment_shading_rate"); + return "gl_ShadingRateEXT"; + } + case BuiltInDeviceIndex: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); require_extension_internal("GL_EXT_device_group"); return "gl_DeviceIndex"; + case BuiltInFullyCoveredEXT: + if (!options.es) + require_extension_internal("GL_NV_conservative_raster_underestimation"); + else + SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); + return "gl_FragFullyCoveredNV"; + default: return join("gl_BuiltIn_", convert_to_string(builtin)); } @@ -8345,7 +8395,7 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index) } } -void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type, +void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/, AccessChainFlags flags, bool & /*access_chain_is_arrayed*/, uint32_t index) { @@ -8354,27 +8404,19 @@ void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_ expr += "["; - // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier. - bool nonuniform_index = - has_decoration(index, DecorationNonUniformEXT) && - (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock)); - if (nonuniform_index) - { - expr += backend.nonuniform_qualifier; - expr += "("; - } - if (index_is_literal) expr += convert_to_string(index); else expr += to_expression(index, register_expression_read); - if (nonuniform_index) - expr += ")"; - expr += "]"; } +bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t) +{ + return true; +} + string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta) { @@ -8584,7 +8626,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice SPIRV_CROSS_THROW("Member index is out of bounds!"); BuiltIn builtin; - if (is_member_builtin(*type, index, &builtin)) + if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base)) { if (access_chain_is_arrayed) { @@ -8652,13 +8694,30 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // Internally, access chain implementation can also be used on composites, // ignore scalar access workarounds in this case. - StorageClass effective_storage; - if (expression_type(base).pointer) - effective_storage = get_expression_effective_storage_class(base); - else - effective_storage = StorageClassGeneric; + StorageClass effective_storage = StorageClassGeneric; + bool ignore_potential_sliced_writes = false; + if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) + { + if (expression_type(base).pointer) + effective_storage = get_expression_effective_storage_class(base); - if (!row_major_matrix_needs_conversion) + // Special consideration for control points. + // Control points can only be written by InvocationID, so there is no need + // to consider scalar access chains here. + // Cleans up some cases where it's very painful to determine the accurate storage class + // since blocks can be partially masked ... + auto *var = maybe_get_backing_variable(base); + if (var && var->storage == StorageClassOutput && + get_execution_model() == ExecutionModelTessellationControl && + !has_decoration(var->self, DecorationPatch)) + { + ignore_potential_sliced_writes = true; + } + } + else + ignore_potential_sliced_writes = true; + + if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) { // On some backends, we might not be able to safely access individual scalars in a vector. // To work around this, we might have to cast the access chain reference to something which can, @@ -8698,7 +8757,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice expr += "]"; } - if (row_major_matrix_needs_conversion) + if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) { prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, is_packed); @@ -9682,6 +9741,8 @@ void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_ex if (!unroll_array_to_complex_store(lhs_expression, rhs_expression)) { auto lhs = to_dereferenced_expression(lhs_expression); + if (has_decoration(lhs_expression, DecorationNonUniform)) + convert_non_uniform_expression(lhs, lhs_expression); // We might need to cast in order to store to a builtin. cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression)); @@ -9857,12 +9918,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Also, loading from gl_SampleMask array needs special unroll. unroll_array_from_complex_load(id, ptr, expr); - // Shouldn't need to check for ID, but current glslang codegen requires it in some cases - // when loading Image/Sampler descriptors. It does not hurt to check ID as well. - if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT)) + if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform)) { - propagate_nonuniform_qualifier(ptr); - convert_non_uniform_expression(type, expr); + // If we're loading something non-opaque, we need to handle non-uniform descriptor access. + convert_non_uniform_expression(expr, ptr); } if (forward && ptr_expression) @@ -9885,7 +9944,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // it is an array, and our backend does not support arrays as value types. // Emit the temporary, and copy it explicitly. e = &emit_uninitialized_temporary_expression(result_type, id); - emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); + emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); } else e = &emit_op(result_type, id, expr, forward, !usage_tracking); @@ -9966,9 +10025,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (expr.expression_dependencies.empty()) forwarded_temporaries.erase(ops[1]); - if (has_decoration(ops[1], DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ops[1]); - break; } @@ -10006,6 +10062,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + if (has_decoration(ops[2], DecorationNonUniform)) + convert_non_uniform_expression(e, ops[2]); set(id, join(type_to_glsl(get(result_type)), "(", e, ".length())"), result_type, true); break; @@ -10307,14 +10365,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Including the base will prevent this and would trigger multiple reads // from expression causing it to be forced to an actual temporary in GLSL. auto expr = access_chain_internal(ops[2], &ops[3], length, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta); + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | + ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); e->base_expression = ops[2]; } else { - auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); } @@ -11221,9 +11281,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[5]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; - forced_temporaries.insert(id); - emit_binary_func_op(result_type, id, ptr, val, op); - flush_all_atomic_capable_variables(); + + emit_atomic_func_op(result_type, id, ptr, val, op); break; } @@ -11236,9 +11295,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t comp = ops[7]; const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; - forced_temporaries.insert(id); - emit_trinary_func_op(result_type, id, ptr, comp, val, op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(result_type, id, ptr, comp, val, op); break; } @@ -11253,7 +11310,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) (atomic_image && get(type.image.type).basetype == SPIRType::UInt); const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; const char *increment = unsigned_type ? "0u" : "0"; - emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); + emit_op(ops[0], ops[1], + join(op, "(", + to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); flush_all_atomic_capable_variables(); break; } @@ -11266,7 +11325,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[3]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; - statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");"); + statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");"); flush_all_atomic_capable_variables(); break; } @@ -11301,7 +11360,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) else increment = "-1"; - emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); + emit_op(ops[0], ops[1], + join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); } flush_all_atomic_capable_variables(); @@ -11311,9 +11371,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpAtomicIAdd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } @@ -11321,7 +11379,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); - auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); flush_all_atomic_capable_variables(); break; @@ -11331,9 +11389,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpAtomicUMin: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } @@ -11341,36 +11397,28 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpAtomicUMax: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicAnd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicOr: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicXor: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } @@ -11465,16 +11513,33 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpImageQueryLod: { + const char *op = nullptr; if (!options.es && options.version < 400) { require_extension_internal("GL_ARB_texture_query_lod"); // For some reason, the ARB spec is all-caps. - GLSL_BFOP(textureQueryLOD); + op = "textureQueryLOD"; } else if (options.es) SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); else - GLSL_BFOP(textureQueryLod); + op = "textureQueryLod"; + + auto sampler_expr = to_expression(ops[2]); + if (has_decoration(ops[2], DecorationNonUniform)) + { + if (maybe_get_backing_variable(ops[2])) + convert_non_uniform_expression(sampler_expr, ops[2]); + else if (*backend.nonuniform_qualifier != '\0') + sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")"); + } + + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"), + forward); + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); register_control_dependent_expression(ops[1]); break; } @@ -11504,7 +11569,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) string expr; if (type.image.sampled == 2) - expr = join("imageSamples(", to_expression(ops[2]), ")"); + expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")"); else expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); @@ -11615,10 +11680,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) "operand mask was used."); uint32_t samples = ops[5]; - imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")"); } else - imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")"); } else { @@ -11630,13 +11695,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) "operand mask was used."); uint32_t samples = ops[5]; - imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", + imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", to_expression(samples), ")"); } else { // Implement subpass loads via texture barrier style sampling. - imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } imgexpr = remap_swizzle(get(result_type), 4, imgexpr); @@ -11667,12 +11732,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) "operand mask was used."); uint32_t samples = ops[5]; - statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ", + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");"); } else { - statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ", + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(sparse_texel_id), ");"); } imgexpr = join(type_to_glsl(get(result_type)), "(", to_expression(sparse_code_id), ", ", @@ -11689,10 +11754,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t samples = ops[5]; imgexpr = - join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); + join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); } else - imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")"); + imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")"); } if (!sparse) @@ -11733,9 +11798,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); auto expr = join(to_expression(ops[2]), ", ", coord_expr); - if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT)) - convert_non_uniform_expression(expression_type(ops[2]), expr); - auto &e = set(id, expr, result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. @@ -11778,11 +11840,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (operands != ImageOperandsSampleMask || length != 5) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; - statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else - statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); if (var && variable_storage_is_aliased(*var)) @@ -11807,7 +11869,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); // The size of an image is always constant. - expr = join("imageSize(", to_expression(ops[2]), ")"); + expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")"); } else { @@ -12332,9 +12394,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) flush_control_dependent_expressions(current_emitting_block->self); break; case OpTraceNV: - if (has_decoration(ops[0], DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ops[0]); - statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); @@ -12343,9 +12403,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpTraceRayKHR: if (!has_decoration(ops[10], DecorationLocation)) SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR."); - if (has_decoration(ops[0], DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ops[0]); - statement("traceRayEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");"); @@ -12644,6 +12702,11 @@ string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uin return join(type_name, " ", name, type_to_array_glsl(type)); } +bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const +{ + return var.storage == storage; +} + // Emit a structure member. Subclasses may override to modify output, // or to dynamically add a padding member if needed. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, @@ -12671,7 +12734,7 @@ void CompilerGLSL::emit_struct_padding_target(const SPIRType &) { } -const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) +string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) { // GL_EXT_buffer_reference variables can be marked as restrict. if (flags.get(DecorationRestrictPointerEXT)) @@ -12683,6 +12746,11 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B type.basetype != SPIRType::Sampler) return ""; + string qual; + + if (flags.get(DecorationNoContraction) && backend.support_precise_qualifier) + qual = "precise "; + if (options.es) { auto &execution = get_entry_point(); @@ -12697,7 +12765,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B options.fragment.default_int_precision == Options::Mediump && execution.model == ExecutionModelFragment; - return implied_fmediump || implied_imediump ? "" : "mediump "; + qual += (implied_fmediump || implied_imediump) ? "" : "mediump "; } else { @@ -12711,7 +12779,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); - return implied_fhighp || implied_ihighp ? "" : "highp "; + qual += (implied_fhighp || implied_ihighp) ? "" : "highp "; } } else if (backend.allow_precision_qualifiers) @@ -12719,18 +12787,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. // The default is highp however, so only emit mediump in the rare case that a shader has these. if (flags.get(DecorationRelaxedPrecision)) - return "mediump "; - else - return ""; + qual += "mediump "; } - else - return ""; + + return qual; } -const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) { auto &type = expression_type(id); - bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es; + bool use_precision_qualifiers = backend.allow_precision_qualifiers; if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) { // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. @@ -12787,10 +12853,24 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id) res += "coherent "; if (flags.get(DecorationRestrict)) res += "restrict "; + if (flags.get(DecorationNonWritable)) res += "readonly "; + + bool formatted_load = type.image.format == ImageFormatUnknown; if (flags.get(DecorationNonReadable)) + { res += "writeonly "; + formatted_load = false; + } + + if (formatted_load) + { + if (!options.es) + require_extension_internal("GL_EXT_shader_image_load_formatted"); + else + SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL."); + } } res += to_precision_qualifiers_glsl(id); @@ -12869,7 +12949,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable) else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); } - else if (variable.initializer) + else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) { uint32_t expr = variable.initializer; if (ir.ids[expr].get_type() != TypeUndef) @@ -13559,7 +13639,7 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) auto &var = get(v); var.deferred_declaration = false; - if (var.storage == StorageClassWorkgroup) + if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup)) { // Special variable type which cannot have initializer, // need to be declared as standalone variables. @@ -14761,7 +14841,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) { - emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction, + emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction, get_expression_effective_storage_class(block.return_value)); } @@ -14980,7 +15060,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } -void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass) +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass) { statement(lhs, " = ", to_expression(rhs_id), ";"); } @@ -15120,6 +15200,8 @@ void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, case BuiltInFragStencilRefEXT: case BuiltInInstanceCustomIndexNV: case BuiltInSampleMask: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInShadingRateKHR: expected_type = SPIRType::Int; break; @@ -15164,6 +15246,8 @@ void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, case BuiltInViewportIndex: case BuiltInFragStencilRefEXT: case BuiltInSampleMask: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInShadingRateKHR: expected_type = SPIRType::Int; break; @@ -15179,64 +15263,62 @@ void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, } } -void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr) +void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) { if (*backend.nonuniform_qualifier == '\0') return; - // Handle SPV_EXT_descriptor_indexing. - if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage || - type.basetype == SPIRType::Image || type.basetype == SPIRType::AccelerationStructure) + auto *var = maybe_get_backing_variable(ptr_id); + if (!var) + return; + + if (var->storage != StorageClassUniformConstant && + var->storage != StorageClassStorageBuffer && + var->storage != StorageClassUniform) + return; + + auto &backing_type = get(var->basetype); + if (backing_type.array.empty()) + return; + + // If we get here, we know we're accessing an arrayed resource which + // might require nonuniform qualifier. + + auto start_array_index = expr.find_first_of('['); + + if (start_array_index == string::npos) + return; + + // We've opened a bracket, track expressions until we can close the bracket. + // This must be our resource index. + size_t end_array_index = string::npos; + unsigned bracket_count = 1; + for (size_t index = start_array_index + 1; index < expr.size(); index++) { - // The image/sampler ID must be declared as non-uniform. - // However, it is not legal GLSL to have - // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier - // to the array indexing, like - // samplers[nonuniformEXT(index)]. - // While the access chain will generally be nonuniformEXT, it's not necessarily so, - // so we might have to fixup the OpLoad-ed expression late. - - auto start_array_index = expr.find_first_of('['); - - if (start_array_index == string::npos) - return; - - // Check for the edge case that a non-arrayed resource was marked to be nonuniform, - // and the bracket we found is actually part of non-resource related data. - if (expr.find_first_of(',') < start_array_index) - return; - - // We've opened a bracket, track expressions until we can close the bracket. - // This must be our image index. - size_t end_array_index = string::npos; - unsigned bracket_count = 1; - for (size_t index = start_array_index + 1; index < expr.size(); index++) + if (expr[index] == ']') { - if (expr[index] == ']') + if (--bracket_count == 0) { - if (--bracket_count == 0) - { - end_array_index = index; - break; - } + end_array_index = index; + break; } - else if (expr[index] == '[') - bracket_count++; } - - assert(bracket_count == 0); - - // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's - // nothing we can do here to express that. - if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) - return; - - start_array_index++; - - expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", - expr.substr(start_array_index, end_array_index - start_array_index), ")", - expr.substr(end_array_index, string::npos)); + else if (expr[index] == '[') + bracket_count++; } + + assert(bracket_count == 0); + + // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's + // nothing we can do here to express that. + if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) + return; + + start_array_index++; + + expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", + expr.substr(start_array_index, end_array_index - start_array_index), ")", + expr.substr(end_array_index, string::npos)); } void CompilerGLSL::emit_block_hints(const SPIRBlock &) @@ -15342,40 +15424,6 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) } } -void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id) -{ - // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen, - // we need to know NonUniformEXT a little earlier, when the resource is actually loaded. - // Back-propagate the qualifier based on the expression dependency chain. - - if (!has_decoration(id, DecorationNonUniformEXT)) - { - set_decoration(id, DecorationNonUniformEXT); - force_recompile(); - } - - auto *e = maybe_get(id); - auto *combined = maybe_get(id); - auto *chain = maybe_get(id); - if (e) - { - for (auto &expr : e->expression_dependencies) - propagate_nonuniform_qualifier(expr); - for (auto &expr : e->implied_read_expressions) - propagate_nonuniform_qualifier(expr); - } - else if (combined) - { - propagate_nonuniform_qualifier(combined->image); - propagate_nonuniform_qualifier(combined->sampler); - } - else if (chain) - { - for (auto &expr : chain->implied_read_expressions) - propagate_nonuniform_qualifier(expr); - } -} - void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, SmallVector chain) { @@ -15788,3 +15836,149 @@ void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID expr = join("spvWorkaroundRowMajor(", expr, ")"); } } + +void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component) +{ + masked_output_locations.insert({ location, component }); +} + +void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin) +{ + masked_output_builtins.insert(builtin); +} + +bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const +{ + auto &type = get(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + // Blocks by themselves are never masked. Must be masked per-member. + if (is_block) + return false; + + bool is_builtin = has_decoration(var.self, DecorationBuiltIn); + + if (is_builtin) + { + return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn))); + } + else + { + if (!has_decoration(var.self, DecorationLocation)) + return false; + + return is_stage_output_location_masked( + get_decoration(var.self, DecorationLocation), + get_decoration(var.self, DecorationComponent)); + } +} + +bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const +{ + auto &type = get(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + if (!is_block) + return false; + + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + { + return is_stage_output_builtin_masked(builtin); + } + else + { + uint32_t location = get_declared_member_location(var, index, strip_array); + uint32_t component = get_member_decoration(type.self, index, DecorationComponent); + return is_stage_output_location_masked(location, component); + } +} + +bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const +{ + return masked_output_locations.count({ location, component }) != 0; +} + +bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const +{ + return masked_output_builtins.count(builtin) != 0; +} + +uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const +{ + auto &block_type = get(var.basetype); + if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation)) + return get_member_decoration(block_type.self, mbr_idx, DecorationLocation); + else + return get_accumulated_member_location(var, mbr_idx, strip_array); +} + +uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const +{ + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t location = get_decoration(var.self, DecorationLocation); + + for (uint32_t i = 0; i < mbr_idx; i++) + { + auto &mbr_type = get(type.member_types[i]); + + // Start counting from any place we have a new location decoration. + if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) + location = get_member_decoration(type.self, mbr_idx, DecorationLocation); + + uint32_t location_count = type_to_location_count(mbr_type); + location += location_count; + } + + return location; +} + +StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) +{ + auto *var = maybe_get_backing_variable(ptr); + + // If the expression has been lowered to a temporary, we need to use the Generic storage class. + // We're looking for the effective storage class of a given expression. + // An access chain or forwarded OpLoads from such access chains + // will generally have the storage class of the underlying variable, but if the load was not forwarded + // we have lost any address space qualifiers. + bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && + (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); + + if (var && !forced_temporary) + { + if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) + return StorageClassWorkgroup; + if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer)) + return StorageClassStorageBuffer; + + // Normalize SSBOs to StorageBuffer here. + if (var->storage == StorageClassUniform && + has_decoration(get(var->basetype).self, DecorationBufferBlock)) + return StorageClassStorageBuffer; + else + return var->storage; + } + else + return expression_type(ptr).storage; +} + +uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const +{ + uint32_t count; + if (type.basetype == SPIRType::Struct) + { + uint32_t mbr_count = uint32_t(type.member_types.size()); + count = 0; + for (uint32_t i = 0; i < mbr_count; i++) + count += type_to_location_count(get(type.member_types[i])); + } + else + { + count = type.columns > 1 ? type.columns : 1; + } + + uint32_t dim_count = uint32_t(type.array.size()); + for (uint32_t i = 0; i < dim_count; i++) + count *= to_array_size_literal(type, i); + + return count; +} diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 0e0303824..a3501ca99 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -65,7 +65,8 @@ enum AccessChainFlagBits ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2, ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3, ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4, - ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5 + ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5, + ACCESS_CHAIN_FORCE_COMPOSITE_BIT = 1 << 6 }; typedef uint32_t AccessChainFlags; @@ -250,6 +251,16 @@ public: // - Images which are statically used at least once with Dref opcodes. bool variable_is_depth_or_compare(VariableID id) const; + // If a shader output is active in this stage, but inactive in a subsequent stage, + // this can be signalled here. This can be used to work around certain cross-stage matching problems + // which plagues MSL and HLSL in certain scenarios. + // An output which matches one of these will not be emitted in stage output interfaces, but rather treated as a private + // variable. + // This option is only meaningful for MSL and HLSL, since GLSL matches by location directly. + // Masking builtins only takes effect if the builtin in question is part of the stage output interface. + void mask_stage_output_by_location(uint32_t location, uint32_t component); + void mask_stage_output_by_builtin(spv::BuiltIn builtin); + protected: struct ShaderSubgroupSupportHelper { @@ -375,6 +386,7 @@ protected: virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); virtual void emit_fixup(); virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); + virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const; virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); struct TextureFunctionBaseArguments @@ -569,6 +581,7 @@ protected: bool use_array_constructor = false; bool needs_row_major_load_workaround = false; bool support_pointer_to_pointer = false; + bool support_precise_qualifier = false; } backend; void emit_struct(SPIRType &type); @@ -616,6 +629,8 @@ protected: void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op); void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op); void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type); @@ -661,6 +676,9 @@ protected: std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta); + spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); + virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); + virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed); @@ -691,6 +709,7 @@ protected: void emit_uninitialized_temporary(uint32_t type, uint32_t id); SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); + std::string to_non_uniform_aware_expression(uint32_t id); std::string to_expression(uint32_t id, bool register_expression_read = true); std::string to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset); std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type); @@ -716,17 +735,17 @@ protected: virtual std::string to_qualifiers_glsl(uint32_t id); void fixup_io_block_patch_qualifiers(const SPIRVariable &var); void emit_output_variable_initializer(const SPIRVariable &var); - const char *to_precision_qualifiers_glsl(uint32_t id); + std::string to_precision_qualifiers_glsl(uint32_t id); virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); - const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags); + std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags); const char *format_to_glsl(spv::ImageFormat format); virtual std::string layout_for_member(const SPIRType &type, uint32_t index); virtual std::string to_interpolation_qualifiers(const Bitset &flags); std::string layout_for_variable(const SPIRVariable &variable); std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); virtual bool skip_argument(uint32_t id) const; - virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, - spv::StorageClass rhs_storage); + virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage); virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); virtual std::string to_zero_initialized_expression(uint32_t type_id); @@ -741,6 +760,7 @@ protected: uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + uint32_t type_to_location_count(const SPIRType &type) const; std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg); virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type); @@ -881,7 +901,7 @@ protected: virtual void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr); bool unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id); - void convert_non_uniform_expression(const SPIRType &type, std::string &expr); + void convert_non_uniform_expression(std::string &expr, uint32_t ptr_id); void handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id); void disallow_forwarding_in_expression_chain(const SPIRExpression &expr); @@ -900,10 +920,17 @@ protected: void fixup_type_alias(); void reorder_type_alias(); - void propagate_nonuniform_qualifier(uint32_t id); - static const char *vector_swizzle(int vecsize, int index); + bool is_stage_output_location_masked(uint32_t location, uint32_t component) const; + bool is_stage_output_builtin_masked(spv::BuiltIn builtin) const; + bool is_stage_output_variable_masked(const SPIRVariable &var) const; + bool is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const; + uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const; + uint32_t get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const; + std::unordered_set masked_output_locations; + std::unordered_set masked_output_builtins; + private: void init(); }; diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index c0b6d1d9a..9435c4e06 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -570,7 +570,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() switch (builtin) { case BuiltInPosition: - type = "float4"; + type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4"; semantic = legacy ? "POSITION" : "SV_Position"; break; @@ -818,8 +818,8 @@ string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags) res += "patch "; // Seems to be different in actual HLSL. if (flags.get(DecorationSample)) res += "sample "; - if (flags.get(DecorationInvariant)) - res += "invariant "; // Not supported? + if (flags.get(DecorationInvariant) && backend.support_precise_qualifier) + res += "precise "; // Not supported? return res; } @@ -865,24 +865,10 @@ void CompilerHLSL::emit_io_block(const SPIRVariable &var) begin_scope(); type.member_name_cache.clear(); - uint32_t base_location = get_decoration(var.self, DecorationLocation); - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { - string semantic; - if (has_member_decoration(type.self, i, DecorationLocation)) - { - uint32_t location = get_member_decoration(type.self, i, DecorationLocation); - semantic = join(" : ", to_semantic(location, execution.model, var.storage)); - } - else - { - // If the block itself has a location, but not its members, use the implicit location. - // There could be a conflict if the block members partially specialize the locations. - // It is unclear how SPIR-V deals with this. Assume this does not happen for now. - uint32_t location = base_location + i; - semantic = join(" : ", to_semantic(location, execution.model, var.storage)); - } + uint32_t location = get_accumulated_member_location(var, i, false); + string semantic = join(" : ", to_semantic(location, execution.model, var.storage)); add_member_name(type, i); @@ -2249,7 +2235,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) string CompilerHLSL::to_sampler_expression(uint32_t id) { - auto expr = join("_", to_expression(id)); + auto expr = join("_", to_non_uniform_aware_expression(id)); auto index = expr.find_first_of('['); if (index == string::npos) { @@ -2754,14 +2740,17 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) bool proj = false; const uint32_t *opt = nullptr; auto *combined_image = maybe_get(img); - auto img_expr = to_expression(combined_image ? combined_image->image : img); + + if (combined_image && has_decoration(img, DecorationNonUniform)) + { + set_decoration(combined_image->image, DecorationNonUniform); + set_decoration(combined_image->sampler, DecorationNonUniform); + } + + auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img); inherited_expressions.push_back(coord); - // Make sure non-uniform decoration is back-propagated to where it needs to be. - if (has_decoration(img, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(img); - switch (op) { case OpImageSampleDrefImplicitLod: @@ -3016,7 +3005,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) { string sampler_expr; if (combined_image) - sampler_expr = to_expression(combined_image->sampler); + sampler_expr = to_non_uniform_aware_expression(combined_image->sampler); else sampler_expr = to_sampler_expression(img); expr += sampler_expr; @@ -3812,6 +3801,10 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and " "native 16-bit types are enabled."); + string base = chain.base; + if (has_decoration(chain.self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain.self); + bool templated_load = hlsl_options.shader_model >= 62; string load_expr; @@ -3844,7 +3837,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR if (templated_load) load_op = "Load"; - load_expr = join(chain.base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")"); + load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")"); } else if (type.columns == 1) { @@ -3866,7 +3859,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR for (uint32_t r = 0; r < type.vecsize; r++) { - load_expr += join(chain.base, ".Load", template_expr, "(", chain.dynamic_index, + load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, chain.static_index + r * chain.matrix_stride, ")"); if (r + 1 < type.vecsize) load_expr += ", "; @@ -3915,7 +3908,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR for (uint32_t c = 0; c < type.columns; c++) { - load_expr += join(chain.base, ".", load_op, template_expr, "(", chain.dynamic_index, + load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride, ")"); if (c + 1 < type.columns) load_expr += ", "; @@ -3944,7 +3937,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR { for (uint32_t r = 0; r < type.vecsize; r++) { - load_expr += join(chain.base, ".Load", template_expr, "(", chain.dynamic_index, + load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) @@ -3981,9 +3974,6 @@ void CompilerHLSL::emit_load(const Instruction &instruction) uint32_t id = ops[1]; uint32_t ptr = ops[2]; - if (has_decoration(ptr, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ptr); - auto &type = get(result_type); bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct; @@ -4122,9 +4112,6 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val // Make sure we trigger a read of the constituents in the access chain. track_expression_read(chain.self); - if (has_decoration(chain.self, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(chain.self); - SPIRType target_type; target_type.basetype = SPIRType::UInt; target_type.vecsize = type.vecsize; @@ -4148,6 +4135,10 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val bool templated_store = hlsl_options.shader_model >= 62; + auto base = chain.base; + if (has_decoration(chain.self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain.self); + string template_expr; if (templated_store) template_expr = join("<", type_to_glsl(type), ">"); @@ -4183,7 +4174,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val } else store_op = "Store"; - statement(chain.base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ", + statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ", store_expr, ");"); } else if (type.columns == 1) @@ -4214,7 +4205,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val store_expr = join(bitcast_op, "(", store_expr, ")"); } - statement(chain.base, ".Store", template_expr, "(", chain.dynamic_index, + statement(base, ".Store", template_expr, "(", chain.dynamic_index, chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");"); } } @@ -4258,7 +4249,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val store_expr = join(bitcast_op, "(", store_expr, ")"); } - statement(chain.base, ".", store_op, template_expr, "(", chain.dynamic_index, + statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");"); } } @@ -4282,7 +4273,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val auto bitcast_op = bitcast_glsl_op(target_type, type); if (!bitcast_op.empty()) store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(chain.base, ".Store", template_expr, "(", chain.dynamic_index, + statement(base, ".Store", template_expr, "(", chain.dynamic_index, chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); } } @@ -4384,9 +4375,6 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) inherit_expression_dependencies(ops[1], ops[i]); add_implied_read_expression(e, ops[i]); } - - if (has_decoration(ops[1], DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ops[1]); } else { @@ -4486,13 +4474,16 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) if (data_type.storage == StorageClassImage || !chain) { - statement(atomic_op, "(", to_expression(ops[0]), ", ", to_expression(ops[3]), ", ", to_expression(tmp_id), - ");"); + statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ", + to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); } else { + string base = chain->base; + if (has_decoration(chain->self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain->self); // RWByteAddress buffer is always uint in its underlying type. - statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", + statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); } } @@ -4510,14 +4501,17 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) SPIRType::BaseType expr_type; if (data_type.storage == StorageClassImage || !chain) { - statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); expr_type = data_type.basetype; } else { // RWByteAddress buffer is always uint in its underlying type. + string base = chain->base; + if (has_decoration(chain->self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain->self); expr_type = SPIRType::UInt; - statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, + statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ", to_name(id), ");"); } @@ -4618,13 +4612,35 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) } case OpGroupNonUniformShuffle: - SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL."); + emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); + break; case OpGroupNonUniformShuffleXor: - SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL."); + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } case OpGroupNonUniformShuffleUp: - SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL."); + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } case OpGroupNonUniformShuffleDown: - SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL."); + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } case OpGroupNonUniformAll: emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue"); @@ -5150,7 +5166,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); statement("uint ", dummy_samples_levels, ";"); - auto expr = join("spvTextureSize(", to_expression(ops[2]), ", ", + auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ", bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); auto &restype = get(ops[0]); @@ -5176,9 +5192,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) string expr; if (uav) - expr = join("spvImageSize(", to_expression(ops[2]), ", ", dummy_samples_levels, ")"); + expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")"); else - expr = join("spvTextureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::UInt, expr); @@ -5208,9 +5224,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) statement(variable_decl(type, to_name(id)), ";"); if (uav) - statement("spvImageSize(", to_expression(ops[2]), ", ", to_name(id), ");"); + statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");"); else - statement("spvTextureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");"); + statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");"); auto &restype = get(ops[0]); auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); @@ -5241,16 +5257,16 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) if (operands != ImageOperandsSampleMask || instruction.length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t sample = ops[5]; - imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); + imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); } else - imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); + imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); pure = true; } else { - imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]"); // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", // except that the underlying type changes how the data is interpreted. @@ -5299,7 +5315,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); } - statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); + statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); break; @@ -5311,10 +5327,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) uint32_t id = ops[1]; auto expr = to_expression(ops[2]); - if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT)) - convert_non_uniform_expression(expression_type(ops[2]), expr); expr += join("[", to_expression(ops[3]), "]"); - auto &e = set(id, expr, result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. @@ -5492,7 +5505,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpArrayLength: { - auto *var = maybe_get(ops[2]); + auto *var = maybe_get_backing_variable(ops[2]); if (!var) SPIRV_CROSS_THROW("Array length must point directly to an SSBO block."); @@ -5502,7 +5515,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // This must be 32-bit uint, so we're good to go. emit_uninitialized_temporary_expression(ops[0], ops[1]); - statement(to_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); + statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); uint32_t offset = type_struct_member_offset(type, ops[3]); uint32_t stride = type_struct_member_array_stride(type, ops[3]); statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";"); @@ -5718,6 +5731,9 @@ string CompilerHLSL::compile() backend.nonuniform_qualifier = "NonUniformResourceIndex"; backend.support_case_fallthrough = false; + // SM 4.1 does not support precise for some reason. + backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; + fixup_type_alias(); reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index b428afdf6..5570a211e 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -67,6 +67,52 @@ void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) { StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; resource_bindings[tuple] = { binding, false }; + + // If we might need to pad argument buffer members to positionally align + // arg buffer indexes, also maintain a lookup by argument buffer index. + if (msl_options.pad_argument_buffer_resources) + { + StageSetBinding arg_idx_tuple = { binding.stage, binding.desc_set, k_unknown_component }; + +#define ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(rez) \ + arg_idx_tuple.binding = binding.msl_##rez; \ + resource_arg_buff_idx_to_binding_number[arg_idx_tuple] = binding.binding + + switch (binding.basetype) + { + case SPIRType::Void: + case SPIRType::Boolean: + case SPIRType::SByte: + case SPIRType::UByte: + case SPIRType::Short: + case SPIRType::UShort: + case SPIRType::Int: + case SPIRType::UInt: + case SPIRType::Int64: + case SPIRType::UInt64: + case SPIRType::AtomicCounter: + case SPIRType::Half: + case SPIRType::Float: + case SPIRType::Double: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(buffer); + break; + case SPIRType::Image: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture); + break; + case SPIRType::Sampler: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler); + break; + case SPIRType::SampledImage: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture); + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler); + break; + default: + SPIRV_CROSS_THROW("Unexpected argument buffer resource base type. When padding argument buffer elements, " + "all descriptor set resources must be supplied with a base type by the app."); + break; + } +#undef ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP + } } void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index) @@ -100,7 +146,9 @@ void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bo bool CompilerMSL::is_msl_shader_input_used(uint32_t location) { - return location_inputs_in_use.count(location) != 0; + // Don't report internal location allocations to app. + return location_inputs_in_use.count(location) != 0 && + location_inputs_in_use_fallback.count(location) == 0; } uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) const @@ -1685,8 +1733,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // Add the global variables as arguments to the function if (func_id != ir.default_entry_point) { - bool added_in = false; - bool added_out = false; + bool control_point_added_in = false; + bool control_point_added_out = false; + bool patch_added_in = false; + bool patch_added_out = false; + for (uint32_t arg_id : added_arg_ids) { auto &var = get(arg_id); @@ -1694,34 +1745,72 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: auto *p_type = &get(type_id); BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn)); - if (((is_tessellation_shader() && var.storage == StorageClassInput) || - (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)) && - !(has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type)) && - (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || - bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || - p_type->basetype == SPIRType::Struct)) + bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type); + bool is_block = has_decoration(p_type->self, DecorationBlock); + bool is_control_point_storage = + !is_patch && + ((is_tessellation_shader() && var.storage == StorageClassInput) || + (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)); + bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput; + bool is_builtin = is_builtin_variable(var); + bool variable_is_stage_io = + !is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + p_type->basetype == SPIRType::Struct; + bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) && + variable_is_stage_io; + + // If output is masked it is not considered part of the global stage IO interface. + if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput) + is_redirected_to_global_stage_io = !is_stage_output_variable_masked(var); + + if (is_redirected_to_global_stage_io) { // Tessellation control shaders see inputs and per-vertex outputs as arrays. // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays. // We collected them into a structure; we must pass the array of this // structure to the function. std::string name; + if (is_patch) + name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name; + else + name = var.storage == StorageClassInput ? "gl_in" : "gl_out"; + + if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock)) + { + // If we're redirecting a block, we might still need to access the original block + // variable if we're masking some members. + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++) + { + if (is_stage_output_block_member_masked(var, mbr_idx, true)) + { + func.add_parameter(var.basetype, var.self, true); + break; + } + } + } + + // Tessellation control shaders see inputs and per-vertex outputs as arrays. + // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays. + // We collected them into a structure; we must pass the array of this + // structure to the function. if (var.storage == StorageClassInput) { + auto &added_in = is_patch ? patch_added_in : control_point_added_in; if (added_in) continue; - name = "gl_in"; - arg_id = stage_in_ptr_var_id; + arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id; added_in = true; } else if (var.storage == StorageClassOutput) { + auto &added_out = is_patch ? patch_added_out : control_point_added_out; if (added_out) continue; - name = "gl_out"; - arg_id = stage_out_ptr_var_id; + arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id; added_out = true; } + type_id = get(arg_id).basetype; uint32_t next_id = ir.increase_bound_by(1); func.add_parameter(type_id, next_id, true); @@ -1729,7 +1818,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: set_name(next_id, name); } - else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct) + else if (is_builtin && has_decoration(p_type->self, DecorationBlock)) { // Get the pointee type type_id = get_pointee_type_id(type_id); @@ -1739,7 +1828,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: for (auto &mbr_type_id : p_type->member_types) { BuiltIn builtin = BuiltInMax; - bool is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin); + is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin); if (is_builtin && has_active_builtin(builtin, var.storage)) { // Add a arg variable with the same type and decorations as the member @@ -1825,28 +1914,20 @@ void CompilerMSL::mark_as_packable(SPIRType &type) } } -uint32_t CompilerMSL::type_to_location_count(const SPIRType &type) const -{ - // In MSL, we cannot place structs in any context where we need locations. - assert(type.basetype != SPIRType::Struct); - - uint32_t dim = 1; - for (uint32_t i = 0; i < type.array.size(); i++) - dim *= to_array_size_literal(type, i); - - uint32_t count = dim * type.columns; - return count; -} - // If a shader input exists at the location, it is marked as being used by this shader -void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, StorageClass storage) +void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, + StorageClass storage, bool fallback) { if (storage != StorageClassInput) return; uint32_t count = type_to_location_count(type); for (uint32_t i = 0; i < count; i++) + { location_inputs_in_use.insert(location + i); + if (fallback) + location_inputs_in_use_fallback.insert(location + i); + } } uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const @@ -2066,10 +2147,11 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co { entry_func.fixup_hooks_in.push_back([=, &var]() { uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex); + auto invocation = to_tesc_invocation_id(); statement(to_expression(stage_out_ptr_var_id), "[", - builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "].", + invocation, "].", to_member_name(ib_type, index), " = ", to_expression(var.initializer), "[", - builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];"); + invocation, "];"); }); } else @@ -2087,7 +2169,9 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co uint32_t locn = get_decoration(var.self, DecorationLocation); if (storage == StorageClassInput) { - type_id = ensure_correct_input_type(var.basetype, locn, location_meta ? location_meta->num_components : 0); + type_id = ensure_correct_input_type(var.basetype, locn, + location_meta ? location_meta->num_components : 0, + meta.strip_array); if (!location_meta) var.basetype = type_id; @@ -2257,8 +2341,8 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage uint32_t locn = get_decoration(var.self, DecorationLocation) + i; if (storage == StorageClassInput) { - var.basetype = ensure_correct_input_type(var.basetype, locn); - uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn); + var.basetype = ensure_correct_input_type(var.basetype, locn, 0, meta.strip_array); + uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, 0, meta.strip_array); if (storage == StorageClassInput && pull_model_inputs.count(var.self)) ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); else @@ -2273,11 +2357,11 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); mark_location_as_used_by_shader(locn, *usable_type, storage); } - else if (is_builtin && builtin == BuiltInClipDistance) + else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)) { - // Declare the ClipDistance as [[user(clipN)]]. - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, i); + // Declare the Clip/CullDistance as [[user(clip/cullN)]]. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i); } if (get_decoration_bitset(var.self).get(DecorationIndex)) @@ -2351,34 +2435,6 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage } } -uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) -{ - auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); - uint32_t location = get_decoration(var.self, DecorationLocation); - - for (uint32_t i = 0; i < mbr_idx; i++) - { - auto &mbr_type = get(type.member_types[i]); - - // Start counting from any place we have a new location decoration. - if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) - location = get_member_decoration(type.self, mbr_idx, DecorationLocation); - - uint32_t location_count = 1; - - if (mbr_type.columns > 1) - location_count = mbr_type.columns; - - if (!mbr_type.array.empty()) - for (uint32_t j = 0; j < uint32_t(mbr_type.array.size()); j++) - location_count *= to_array_size_literal(mbr_type, j); - - location += location_count; - } - - return location; -} - void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx, InterfaceBlockMeta &meta) @@ -2474,11 +2530,11 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); mark_location_as_used_by_shader(locn, *usable_type, storage); } - else if (is_builtin && builtin == BuiltInClipDistance) + else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)) { - // Declare the ClipDistance as [[user(clipN)]]. - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, i); + // Declare the Clip/CullDistance as [[user(clip/cullN)]]. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i); } if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) @@ -2501,7 +2557,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. - if (!meta.strip_array) + if (!meta.strip_array && meta.allow_local_declaration) { switch (storage) { @@ -2602,7 +2658,7 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor // so redirect to qualified name. set_member_qualified_name(var_type.self, mbr_idx, qual_var_name); } - else if (!meta.strip_array) + else if (!meta.strip_array && meta.allow_local_declaration) { // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. switch (storage) @@ -2631,7 +2687,7 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); if (storage == StorageClassInput) { - mbr_type_id = ensure_correct_input_type(mbr_type_id, locn); + mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, 0, meta.strip_array); var_type.member_types[mbr_idx] = mbr_type_id; if (storage == StorageClassInput && pull_model_inputs.count(var.self)) ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); @@ -2648,7 +2704,7 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor uint32_t locn = get_accumulated_member_location(var, mbr_idx, meta.strip_array); if (storage == StorageClassInput) { - mbr_type_id = ensure_correct_input_type(mbr_type_id, locn); + mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, 0, meta.strip_array); var_type.member_types[mbr_idx] = mbr_type_id; if (storage == StorageClassInput && pull_model_inputs.count(var.self)) ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); @@ -2691,14 +2747,12 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor { entry_func.fixup_hooks_in.push_back([=, &var]() { auto &type = this->get(var.basetype); - uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex); - index += mbr_idx; - - AccessChainMeta chain_meta; - auto constant_chain = access_chain_internal(var.initializer, &builtin_invocation_id_id, 1, 0, &chain_meta); + uint32_t index = get_extended_member_decoration(var.self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex); + auto invocation = to_tesc_invocation_id(); + auto constant_chain = join(to_expression(var.initializer), "[", invocation, "]"); statement(to_expression(stage_out_ptr_var_id), "[", - builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "].", + invocation, "].", to_member_name(ib_type, index), " = ", constant_chain, ".", to_member_name(type, mbr_idx), ";"); }); @@ -2743,100 +2797,19 @@ void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_ BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); // Force the variable to have the proper name. - set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); + string var_name = builtin_to_glsl(builtin, StorageClassFunction); + set_name(var.self, var_name); - if (get_execution_mode_bitset().get(ExecutionModeTriangles)) - { - // Triangles are tricky, because we want only one member in the struct. + // We need to declare the variable early and at entry-point scope. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + bool triangles = get_execution_mode_bitset().get(ExecutionModeTriangles); + string mbr_name; - // We need to declare the variable early and at entry-point scope. - entry_func.add_local_variable(var.self); - vars_needing_early_declaration.push_back(var.self); - - string mbr_name = "gl_TessLevel"; - - // If we already added the other one, we can skip this step. - if (!added_builtin_tess_level) - { - // Add a reference to the variable type to the interface struct. - uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); - - uint32_t type_id = build_extended_vector_type(var_type.self, 4); - - ib_type.member_types.push_back(type_id); - - // Give the member a name - set_member_name(ib_type.self, ib_mbr_idx, mbr_name); - - // We cannot decorate both, but the important part is that - // it's marked as builtin so we can get automatic attribute assignment if needed. - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); - - // There is no qualified alias since we need to flatten the internal array on return. - if (get_decoration_bitset(var.self).get(DecorationLocation)) - { - uint32_t locn = get_decoration(var.self, DecorationLocation); - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, var_type, StorageClassInput); - } - else if (inputs_by_builtin.count(builtin)) - { - uint32_t locn = inputs_by_builtin[builtin].location; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, var_type, StorageClassInput); - } - - added_builtin_tess_level = true; - } - - switch (builtin) - { - case BuiltInTessLevelOuter: - entry_func.fixup_hooks_in.push_back([=, &var]() { - statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".x;"); - statement(to_name(var.self), "[1] = ", ib_var_ref, ".", mbr_name, ".y;"); - statement(to_name(var.self), "[2] = ", ib_var_ref, ".", mbr_name, ".z;"); - }); - break; - - case BuiltInTessLevelInner: - entry_func.fixup_hooks_in.push_back( - [=, &var]() { statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); }); - break; - - default: - assert(false); - break; - } - } - else - { - // Add a reference to the variable type to the interface struct. - uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); - - uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2); - // Change the type of the variable, too. - uint32_t ptr_type_id = ir.increase_bound_by(1); - auto &new_var_type = set(ptr_type_id, get(type_id)); - new_var_type.pointer = true; - new_var_type.pointer_depth++; - new_var_type.storage = StorageClassInput; - new_var_type.parent_type = type_id; - var.basetype = ptr_type_id; - - ib_type.member_types.push_back(type_id); - - // Give the member a name - string mbr_name = to_expression(var.self); - set_member_name(ib_type.self, ib_mbr_idx, mbr_name); - - // Since vectors can be indexed like arrays, there is no need to unpack this. We can - // just refer to the vector directly. So give it a qualified alias. - string qual_var_name = ib_var_ref + "." + mbr_name; - ir.meta[var.self].decoration.qualified_alias = qual_var_name; - - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + const auto mark_locations = [&](const SPIRType &new_var_type) { if (get_decoration_bitset(var.self).get(DecorationLocation)) { uint32_t locn = get_decoration(var.self, DecorationLocation); @@ -2849,6 +2822,170 @@ void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_ set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput); } + }; + + if (triangles) + { + // Triangles are tricky, because we want only one member in the struct. + mbr_name = "gl_TessLevel"; + + // If we already added the other one, we can skip this step. + if (!added_builtin_tess_level) + { + uint32_t type_id = build_extended_vector_type(var_type.self, 4); + + ib_type.member_types.push_back(type_id); + + // Give the member a name + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // We cannot decorate both, but the important part is that + // it's marked as builtin so we can get automatic attribute assignment if needed. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + + mark_locations(var_type); + added_builtin_tess_level = true; + } + } + else + { + mbr_name = var_name; + + uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2); + + uint32_t ptr_type_id = ir.increase_bound_by(1); + auto &new_var_type = set(ptr_type_id, get(type_id)); + new_var_type.pointer = true; + new_var_type.pointer_depth++; + new_var_type.storage = StorageClassInput; + new_var_type.parent_type = type_id; + + ib_type.member_types.push_back(type_id); + + // Give the member a name + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + + mark_locations(new_var_type); + } + + if (builtin == BuiltInTessLevelOuter) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(var_name, "[0] = ", ib_var_ref, ".", mbr_name, ".x;"); + statement(var_name, "[1] = ", ib_var_ref, ".", mbr_name, ".y;"); + statement(var_name, "[2] = ", ib_var_ref, ".", mbr_name, ".z;"); + if (!triangles) + statement(var_name, "[3] = ", ib_var_ref, ".", mbr_name, ".w;"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + if (triangles) + { + statement(var_name, "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); + } + else + { + statement(var_name, "[0] = ", ib_var_ref, ".", mbr_name, ".x;"); + statement(var_name, "[1] = ", ib_var_ref, ".", mbr_name, ".y;"); + } + }); + } +} + +bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const +{ + if (storage == StorageClassOutput) + return !capture_output_to_buffer; + else if (storage == StorageClassInput) + return !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup); + else + return false; +} + +string CompilerMSL::to_tesc_invocation_id() +{ + if (msl_options.multi_patch_workgroup) + { + // n.b. builtin_invocation_id_id here is the dispatch global invocation ID, + // not the TC invocation ID. + return join(to_expression(builtin_invocation_id_id), ".x % ", get_entry_point().output_vertices); + } + else + return builtin_to_glsl(BuiltInInvocationId, StorageClassInput); +} + +void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array) +{ + auto &entry_func = get(ir.default_entry_point); + bool threadgroup_storage = variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup); + + if (threadgroup_storage && msl_options.multi_patch_workgroup) + { + // We need one threadgroup block per patch, so fake this. + entry_func.fixup_hooks_in.push_back([this, &masked_var]() { + auto &type = get_variable_data_type(masked_var); + add_local_variable_name(masked_var.self); + + bool old_is_builtin = is_using_builtin_array; + is_using_builtin_array = true; + + const uint32_t max_control_points_per_patch = 32u; + uint32_t max_num_instances = + (max_control_points_per_patch + get_entry_point().output_vertices - 1u) / + get_entry_point().output_vertices; + statement("threadgroup ", type_to_glsl(type), " ", + "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]", + type_to_array_glsl(type), ";"); + + // Assign a threadgroup slice to each PrimitiveID. + // We assume here that workgroup size is rounded to 32, + // since that's the maximum number of control points per patch. + // We cannot size the array based on fixed dispatch parameters, + // since Metal does not allow that. :( + // FIXME: We will likely need an option to support passing down target workgroup size, + // so we can emit appropriate size here. + statement("threadgroup ", type_to_glsl(type), " ", + "(&", to_name(masked_var.self), ")", + type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[", + "(", to_expression(builtin_invocation_id_id), ".x / ", + get_entry_point().output_vertices, ") % ", + max_num_instances, "];"); + + is_using_builtin_array = old_is_builtin; + }); + } + else + { + entry_func.add_local_variable(masked_var.self); + } + + if (!threadgroup_storage) + { + vars_needing_early_declaration.push_back(masked_var.self); + } + else if (masked_var.initializer) + { + // Cannot directly initialize threadgroup variables. Need fixup hooks. + ID initializer = masked_var.initializer; + if (strip_array) + { + entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() { + auto invocation = to_tesc_invocation_id(); + statement(to_expression(masked_var.self), "[", + invocation, "] = ", + to_expression(initializer), "[", + invocation, "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() { + statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";"); + }); + } } } @@ -2862,11 +2999,25 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); bool is_builtin = is_builtin_variable(var); auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool is_block = has_decoration(var_type.self, DecorationBlock); + + // If stage variables are masked out, emit them as plain variables instead. + // For builtins, we query them one by one later. + // IO blocks are not masked here, we need to mask them per-member instead. + if (storage == StorageClassOutput && is_stage_output_variable_masked(var)) + { + // If we ignore an output, we must still emit it, since it might be used by app. + // Instead, just emit it as early declaration. + emit_local_masked_variable(var, meta.strip_array); + return; + } if (var_type.basetype == SPIRType::Struct) { - if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && - !meta.strip_array) + bool block_requires_flattening = variable_storage_requires_stage_io(storage) || is_block; + bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration; + + if (needs_local_declaration) { // For I/O blocks or structs, we will need to pass the block itself around // to functions if they are used globally in leaf functions. @@ -2874,11 +3025,10 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st // we unflatten I/O blocks while running the shader, // and pass the actual struct type down to leaf functions. // We then unflatten inputs, and flatten outputs in the "fixup" stages. - entry_func.add_local_variable(var.self); - vars_needing_early_declaration.push_back(var.self); + emit_local_masked_variable(var, meta.strip_array); } - if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock)) + if (!block_requires_flattening) { // In Metal tessellation shaders, the interface block itself is arrayed. This makes things // very complicated, since stage-in structures in MSL don't support nested structures. @@ -2889,6 +3039,8 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st } else { + bool masked_block = false; + // Flatten the struct members into the interface struct for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) { @@ -2896,18 +3048,48 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); auto &mbr_type = get(var_type.member_types[mbr_idx]); - if (!is_builtin || has_active_builtin(builtin, storage)) + if (storage == StorageClassOutput && is_stage_output_block_member_masked(var, mbr_idx, meta.strip_array)) + { + if (is_block) + masked_block = true; + + // Non-builtin block output variables are just ignored, since they will still access + // the block variable as-is. They're just not flattened. + if (is_builtin && !meta.strip_array) + { + // Emit a fake variable instead. + uint32_t ids = ir.increase_bound_by(2); + uint32_t ptr_type_id = ids + 0; + uint32_t var_id = ids + 1; + + auto ptr_type = mbr_type; + ptr_type.pointer = true; + ptr_type.pointer_depth++; + ptr_type.parent_type = var_type.member_types[mbr_idx]; + ptr_type.storage = StorageClassOutput; + + uint32_t initializer = 0; + if (var.initializer) + if (auto *c = maybe_get(var.initializer)) + initializer = c->subconstants[mbr_idx]; + + set(ptr_type_id, ptr_type); + set(var_id, ptr_type_id, StorageClassOutput, initializer); + entry_func.add_local_variable(var_id); + vars_needing_early_declaration.push_back(var_id); + set_name(var_id, builtin_to_glsl(builtin, StorageClassOutput)); + set_decoration(var_id, DecorationBuiltIn, builtin); + } + } + else if (!is_builtin || has_active_builtin(builtin, storage)) { bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type); bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; - bool storage_is_stage_io = - (storage == StorageClassInput && !(get_execution_model() == ExecutionModelTessellationControl && - msl_options.multi_patch_workgroup)) || - storage == StorageClassOutput; + bool storage_is_stage_io = variable_storage_requires_stage_io(storage); - // ClipDistance always needs to be declared as user attributes. - if (builtin == BuiltInClipDistance) + // Clip/CullDistance always need to be declared as user attributes. + if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance) is_builtin = false; if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type) @@ -2921,6 +3103,29 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st } } } + + // If we're redirecting a block, we might still need to access the original block + // variable if we're masking some members. + if (masked_block && !needs_local_declaration && + (!is_builtin_variable(var) || get_execution_model() == ExecutionModelTessellationControl)) + { + if (is_builtin_variable(var)) + { + // Ensure correct names for the block members if we're actually going to + // declare gl_PerVertex. + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) + { + set_member_name(var_type.self, mbr_idx, builtin_to_glsl( + BuiltIn(get_member_decoration(var_type.self, mbr_idx, DecorationBuiltIn)), + StorageClassOutput)); + } + + set_name(var_type.self, "gl_PerVertex"); + set_name(var.self, "gl_out_masked"); + stage_out_masked_builtin_type_id = var_type.self; + } + emit_local_masked_variable(var, meta.strip_array); + } } } else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && @@ -2934,14 +3139,11 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st if (!is_builtin || has_active_builtin(builtin, storage)) { bool is_composite_type = is_matrix(var_type) || is_array(var_type); - bool storage_is_stage_io = - (storage == StorageClassInput && - !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup)) || - (storage == StorageClassOutput && !capture_output_to_buffer); + bool storage_is_stage_io = variable_storage_requires_stage_io(storage); bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; - // ClipDistance always needs to be declared as user attributes. - if (builtin == BuiltInClipDistance) + // Clip/CullDistance always needs to be declared as user attributes. + if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance) is_builtin = false; // MSL does not allow matrices or arrays in input or output variables, so need to handle it specially. @@ -2979,10 +3181,16 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t auto &var = get(var_id); auto &type = get_variable_element_type(var); - if (storage == StorageClassInput && type.basetype == SPIRType::Struct) - { - uint32_t mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); + bool flatten_composites = variable_storage_requires_stage_io(var.storage); + bool is_block = has_decoration(type.self, DecorationBlock); + + uint32_t mbr_idx = uint32_t(-1); + if (type.basetype == SPIRType::Struct && (flatten_composites || is_block)) + mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); + + if (mbr_idx != uint32_t(-1)) + { // Only set the lowest InterfaceMemberIndex for each variable member. // IB struct members will be emitted in-order w.r.t. interface member index. if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex)) @@ -3024,23 +3232,49 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) auto &type = this->get(var.basetype); bool is_builtin = is_builtin_variable(var); - auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + bool is_block = has_decoration(type.self, DecorationBlock); + + auto bi_type = BuiltInMax; + bool builtin_is_gl_in_out = false; + if (is_builtin && !is_block) + { + bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + builtin_is_gl_in_out = bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance; + } + + if (is_builtin && is_block) + builtin_is_gl_in_out = true; + uint32_t location = get_decoration(var_id, DecorationLocation); + bool builtin_is_stage_in_out = builtin_is_gl_in_out || + bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || + bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || + bi_type == BuiltInFragDepth || + bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask; + // These builtins are part of the stage in/out structs. bool is_interface_block_builtin = - (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance || - bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || - bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth || - bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) || - (get_execution_model() == ExecutionModelTessellationEvaluation && - (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)); + builtin_is_stage_in_out || + (get_execution_model() == ExecutionModelTessellationEvaluation && + (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)); bool is_active = interface_variable_exists_in_entry_point(var.self); if (is_builtin && is_active) { // Only emit the builtin if it's active in this entry point. Interface variable list might lie. - is_active = has_active_builtin(bi_type, storage); + if (is_block) + { + // If any builtin is active, the block is active. + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; !is_active && i < mbr_cnt; i++) + is_active = has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage); + } + else + { + is_active = has_active_builtin(bi_type, storage); + } } bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch; @@ -3048,7 +3282,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) bool hidden = is_hidden_variable(var, incl_builtins); // ClipDistance is never hidden, we need to emulate it when used as an input. - if (bi_type == BuiltInClipDistance) + if (bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance) hidden = false; // It's not enough to simply avoid marking fragment outputs if the pipeline won't @@ -3283,7 +3517,11 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) && !patch; + // Fixing up flattened stores in TESC is impossible since the memory is group shared either via + // device (not masked) or threadgroup (masked) storage classes and it's race condition city. meta.strip_array = strip_array; + meta.allow_local_declaration = !strip_array && !(get_execution_model() == ExecutionModelTessellationControl && + storage == StorageClassOutput); add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, meta); } @@ -3294,7 +3532,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) // the struct containing them is the correct size and layout. for (auto &input : inputs_by_location) { - if (is_msl_shader_input_used(input.first)) + if (location_inputs_in_use.count(input.first) != 0) continue; // Create a fake variable to put at the location. @@ -3336,18 +3574,18 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) auto &fake_var = set(var_id, ptr_type_id, storage); set_decoration(var_id, DecorationLocation, input.first); meta.strip_array = true; + meta.allow_local_declaration = false; add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta); } } // Sort the members of the structure by their locations. - MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location); + MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::LocationThenBuiltInType); member_sorter.sort(); // The member indices were saved to the original variables, but after the members // were sorted, those indices are now likely incorrect. Fix those up now. - if (!patch) - fix_up_interface_member_indices(storage, ib_type_id); + fix_up_interface_member_indices(storage, ib_type_id); // For patch inputs, add one more member, holding the array of control point data. if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch && @@ -3461,10 +3699,16 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil // Ensure that the type is compatible with the shader input. // If it is, simply return the given type ID. // Otherwise, create a new type, and return its ID. -uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components) +uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components, bool strip_array) { auto &type = get(type_id); + uint32_t max_array_dimensions = strip_array ? 1 : 0; + + // Struct and array types must match exactly. + if (type.basetype == SPIRType::Struct || type.array.size() > max_array_dimensions) + return type_id; + auto p_va = inputs_by_location.find(location); if (p_va == end(inputs_by_location)) { @@ -4599,6 +4843,16 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + // "fsub" intrinsic support + case SPVFuncImplFSub: + statement("template"); + statement("T spvFSub(T l, T r)"); + begin_scope(); + statement("return fma(T(-1), r, l);"); + end_scope(); + statement(""); + break; + // "fmul' intrinsic support case SPVFuncImplFMul: statement("template"); @@ -6361,6 +6615,10 @@ void CompilerMSL::emit_specialization_constants_and_structs() if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id) is_declarable_struct = false; + // Special case. Declare builtin struct anyways if we need to emit a threadgroup version of it. + if (stage_out_masked_builtin_type_id == type_id) + is_declarable_struct = true; + // Align and emit declarable structs...but avoid declaring each more than once. if (is_declarable_struct && declared_structs.count(type_id) == 0) { @@ -6406,16 +6664,22 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id if (ptr_type.storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationEvaluation) return false; - bool multi_patch_tess_ctl = get_execution_model() == ExecutionModelTessellationControl && - msl_options.multi_patch_workgroup && ptr_type.storage == StorageClassInput; - bool flat_matrix = is_matrix(result_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl; - bool flat_struct = result_type.basetype == SPIRType::Struct && ptr_type.storage == StorageClassInput; - bool flat_data_type = flat_matrix || is_array(result_type) || flat_struct; - if (!flat_data_type) - return false; - if (has_decoration(ptr, DecorationPatch)) return false; + bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; + + bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage); + + bool flat_data_type = flattened_io && + (is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct); + + // Edge case, even with multi-patch workgroups, we still need to unroll load + // if we're loading control points directly. + if (ptr_is_io_variable && is_array(result_type)) + flat_data_type = true; + + if (!flat_data_type) + return false; // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out. // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup @@ -6424,12 +6688,31 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex); auto *var = maybe_get_backing_variable(ptr); - bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; auto &expr_type = get_pointee_type(ptr_type.self); const auto &iface_type = expression_type(stage_in_ptr_var_id); - if (result_type.array.size() > 2) + if (!flattened_io) + { + // Simplest case for multi-patch workgroups, just unroll array as-is. + if (interface_index == uint32_t(-1)) + return false; + + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1); + + for (uint32_t i = 0; i < num_control_points; i++) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else if (result_type.array.size() > 2) { SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions."); } @@ -6439,7 +6722,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable."); if (interface_index == uint32_t(-1)) SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); - if (result_type.basetype == SPIRType::Struct || flat_matrix) + if (result_type.basetype == SPIRType::Struct || is_matrix(result_type)) SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO."); expr += type_to_glsl(result_type) + "({ "; @@ -6453,44 +6736,19 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id expr += type_to_glsl(sub_type) + "({ "; interface_index = base_interface_index; uint32_t array_size = to_array_size_literal(result_type, 0); - if (multi_patch_tess_ctl) + for (uint32_t j = 0; j < array_size; j++, interface_index++) { - for (uint32_t j = 0; j < array_size; j++) - { - const uint32_t indices[3] = { i, interface_index, j }; + const uint32_t indices[2] = { i, interface_index }; - AccessChainMeta meta; - expr += - access_chain_internal(stage_in_ptr_var_id, indices, 3, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - // If the expression has more vector components than the result type, insert - // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might - // happen if we replace the type of an input variable. - if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && - expr_type.vecsize > sub_type.vecsize) - expr += vector_swizzle(sub_type.vecsize, 0); + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && + expr_type.vecsize > sub_type.vecsize) + expr += vector_swizzle(sub_type.vecsize, 0); - if (j + 1 < array_size) - expr += ", "; - } - } - else - { - for (uint32_t j = 0; j < array_size; j++, interface_index++) - { - const uint32_t indices[2] = { i, interface_index }; - - AccessChainMeta meta; - expr += - access_chain_internal(stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && - expr_type.vecsize > sub_type.vecsize) - expr += vector_swizzle(sub_type.vecsize, 0); - - if (j + 1 < array_size) - expr += ", "; - } + if (j + 1 < array_size) + expr += ", "; } expr += " })"; if (i + 1 < num_control_points) @@ -6498,7 +6756,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id } expr += " })"; } - else if (flat_struct) + else if (result_type.basetype == SPIRType::Struct) { bool is_array_of_struct = is_array(result_type); if (is_array_of_struct && !ptr_is_io_variable) @@ -6531,7 +6789,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id const auto &mbr_type = get(struct_type.member_types[j]); const auto &expr_mbr_type = get(expr_type.member_types[j]); - if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl) + if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput) { expr += type_to_glsl(mbr_type) + "("; for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++) @@ -6541,8 +6799,8 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id const uint32_t indices[2] = { i, interface_index }; AccessChainMeta meta; expr += access_chain_internal( - stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); } else expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); @@ -6558,48 +6816,23 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id { expr += type_to_glsl(mbr_type) + "({ "; uint32_t array_size = to_array_size_literal(mbr_type, 0); - if (multi_patch_tess_ctl) + for (uint32_t k = 0; k < array_size; k++, interface_index++) { - for (uint32_t k = 0; k < array_size; k++) + if (is_array_of_struct) { - if (is_array_of_struct) - { - const uint32_t indices[3] = { i, interface_index, k }; - AccessChainMeta meta; - expr += access_chain_internal( - stage_in_ptr_var_id, indices, 3, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - } - else - expr += join(to_expression(ptr), ".", to_member_name(iface_type, interface_index), "[", - k, "]"); - if (expr_mbr_type.vecsize > mbr_type.vecsize) - expr += vector_swizzle(mbr_type.vecsize, 0); - - if (k + 1 < array_size) - expr += ", "; + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); } - } - else - { - for (uint32_t k = 0; k < array_size; k++, interface_index++) - { - if (is_array_of_struct) - { - const uint32_t indices[2] = { i, interface_index }; - AccessChainMeta meta; - expr += access_chain_internal( - stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - } - else - expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); - if (expr_mbr_type.vecsize > mbr_type.vecsize) - expr += vector_swizzle(mbr_type.vecsize, 0); + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); - if (k + 1 < array_size) - expr += ", "; - } + if (k + 1 < array_size) + expr += ", "; } expr += " })"; } @@ -6629,7 +6862,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id if (is_array_of_struct) expr += " })"; } - else if (flat_matrix) + else if (is_matrix(result_type)) { bool is_array_of_matrix = is_array(result_type); if (is_array_of_matrix && !ptr_is_io_variable) @@ -6655,9 +6888,8 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id const uint32_t indices[2] = { i, interface_index }; AccessChainMeta meta; - expr += - access_chain_internal(stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); if (expr_type.vecsize > result_type.vecsize) expr += vector_swizzle(result_type.vecsize, 0); if (j + 1 < result_type.columns) @@ -6748,48 +6980,102 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l bool patch = false; bool flat_data = false; bool ptr_is_chain = false; - bool multi_patch = get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup; + bool flatten_composites = false; + + bool is_block = false; + + if (var) + is_block = has_decoration(get_variable_data_type(*var).self, DecorationBlock); if (var) { + flatten_composites = variable_storage_requires_stage_io(var->storage); patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var)); // Should match strip_array in add_interface_block. flat_data = var->storage == StorageClassInput || (var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl); + // Patch inputs are treated as normal block IO variables, so they don't deal with this path at all. + if (patch && (!is_block || var->storage == StorageClassInput)) + flat_data = false; + // We might have a chained access chain, where // we first take the access chain to the control point, and then we chain into a member or something similar. // In this case, we need to skip gl_in/gl_out remapping. + // Also, skip ptr chain for patches. ptr_is_chain = var->self != ID(ops[2]); } - BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn)); - if (var && flat_data && !patch && - (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || - bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || - get_variable_data_type(*var).basetype == SPIRType::Struct)) + bool builtin_variable = false; + bool variable_is_flat = false; + + if (var && flat_data) { + builtin_variable = is_builtin_variable(*var); + + BuiltIn bi_type = BuiltInMax; + if (builtin_variable && !is_block) + bi_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + + variable_is_flat = !builtin_variable || is_block || + bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance; + } + + if (variable_is_flat) + { + // If output is masked, it is emitted as a "normal" variable, just go through normal code paths. + // Only check this for the first level of access chain. + // Dealing with this for partial access chains should be possible, but awkward. + if (var->storage == StorageClassOutput && !ptr_is_chain) + { + bool masked = false; + if (is_block) + { + uint32_t relevant_member_index = patch ? 3 : 4; + // FIXME: This won't work properly if the application first access chains into gl_out element, + // then access chains into the member. Super weird, but theoretically possible ... + if (length > relevant_member_index) + { + uint32_t mbr_idx = get(ops[relevant_member_index]).scalar(); + masked = is_stage_output_block_member_masked(*var, mbr_idx, true); + } + } + else if (var) + masked = is_stage_output_variable_masked(*var); + + if (masked) + return false; + } + AccessChainMeta meta; SmallVector indices; uint32_t next_id = ir.increase_bound_by(1); indices.reserve(length - 3 + 1); - uint32_t first_non_array_index = ptr_is_chain ? 3 : 4; - VariableID stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + uint32_t first_non_array_index = (ptr_is_chain ? 3 : 4) - (patch ? 1 : 0); + + VariableID stage_var_id; + if (patch) + stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id; + else + stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id; - if (!ptr_is_chain) + if (!ptr_is_chain && !patch) { // Index into gl_in/gl_out with first array index. - indices.push_back(ops[3]); + indices.push_back(ops[first_non_array_index - 1]); } auto &result_ptr_type = get(ops[0]); uint32_t const_mbr_id = next_id++; - uint32_t index = get_extended_decoration(var->self, SPIRVCrossDecorationInterfaceMemberIndex); - if (var->storage == StorageClassInput || has_decoration(get_variable_element_type(*var).self, DecorationBlock)) + uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); + + if (flatten_composites || is_block) { uint32_t i = first_non_array_index; auto *type = &get_variable_element_type(*var); @@ -6797,19 +7083,20 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l { // Maybe this is a struct type in the input class, in which case // we put it as a decoration on the corresponding member. - index = get_extended_member_decoration(var->self, get_constant(ops[first_non_array_index]).scalar(), + uint32_t mbr_idx = get_constant(ops[first_non_array_index]).scalar(); + index = get_extended_member_decoration(var->self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex); assert(index != uint32_t(-1)); i++; - type = &get(type->member_types[get_constant(ops[first_non_array_index]).scalar()]); + type = &get(type->member_types[mbr_idx]); } // In this case, we're poking into flattened structures and arrays, so now we have to // combine the following indices. If we encounter a non-constant index, // we're hosed. - for (; i < length; ++i) + for (; flatten_composites && i < length; ++i) { - if ((multi_patch || (!is_array(*type) && !is_matrix(*type))) && type->basetype != SPIRType::Struct) + if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct) break; auto *c = maybe_get(ops[i]); @@ -6820,7 +7107,16 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // We're in flattened space, so just increment the member index into IO block. // We can only do this once in the current implementation, so either: // Struct, Matrix or 1-dimensional array for a control point. - index += c->scalar(); + if (type->basetype == SPIRType::Struct && var->storage == StorageClassOutput) + { + // Need to consider holes, since individual block members might be masked away. + uint32_t mbr_idx = c->scalar(); + for (uint32_t j = 0; j < mbr_idx; j++) + if (!is_stage_output_block_member_masked(*var, j, true)) + index++; + } + else + index += c->scalar(); if (type->parent_type) type = &get(type->parent_type); @@ -6828,31 +7124,48 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l type = &get(type->member_types[c->scalar()]); } - if ((!multi_patch && (is_matrix(result_ptr_type) || is_array(result_ptr_type))) || - result_ptr_type.basetype == SPIRType::Struct) - { - // We're not going to emit the actual member name, we let any further OpLoad take care of that. - // Tag the access chain with the member index we're referencing. - set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); - } - else + // We're not going to emit the actual member name, we let any further OpLoad take care of that. + // Tag the access chain with the member index we're referencing. + bool defer_access_chain = flatten_composites && (is_matrix(result_ptr_type) || is_array(result_ptr_type) || + result_ptr_type.basetype == SPIRType::Struct); + + if (!defer_access_chain) { // Access the appropriate member of gl_in/gl_out. set(const_mbr_id, get_uint_type_id(), index, false); indices.push_back(const_mbr_id); + // Member index is now irrelevant. + index = uint32_t(-1); + // Append any straggling access chain indices. if (i < length) indices.insert(indices.end(), ops + i, ops + length); } + else + { + // We must have consumed the entire access chain if we're deferring it. + assert(i == length); + } + + if (index != uint32_t(-1)) + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); + else + unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex); } else { - assert(index != uint32_t(-1)); - set(const_mbr_id, get_uint_type_id(), index, false); - indices.push_back(const_mbr_id); + if (index != uint32_t(-1)) + { + set(const_mbr_id, get_uint_type_id(), index, false); + indices.push_back(const_mbr_id); + } - indices.insert(indices.end(), ops + 4, ops + length); + // Member index is now irrelevant. + index = uint32_t(-1); + unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex); + + indices.insert(indices.end(), ops + first_non_array_index, ops + length); } // We use the pointer to the base of the input/output array here, @@ -6862,7 +7175,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l if (!ptr_is_chain) { // This is the start of an access chain, use ptr_chain to index into control point array. - e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, true); + e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, !patch); } else { @@ -6878,7 +7191,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // First one is the gl_in/gl_out struct itself, then an index into that array. // If we have traversed further, we use a normal access chain formulation. auto *ptr_expr = maybe_get(ptr); - if (ptr_expr && ptr_expr->implied_read_expressions.size() == 2) + if (flatten_composites && ptr_expr && ptr_expr->implied_read_expressions.size() == 2) { e = join(to_expression(ptr), access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()), @@ -7011,6 +7324,21 @@ void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, cons } } +bool CompilerMSL::access_chain_needs_stage_io_builtin_translation(uint32_t base) +{ + auto *var = maybe_get_backing_variable(base); + if (!var || !is_tessellation_shader()) + return true; + + // We only need to rewrite builtin access chains when accessing flattened builtins like gl_ClipDistance_N. + // Avoid overriding it back to just gl_ClipDistance. + // This can only happen in scenarios where we cannot flatten/unflatten access chains, so, the only case + // where this triggers is evaluation shader inputs. + bool redirect_builtin = get_execution_model() == ExecutionModelTessellationEvaluation ? + var->storage == StorageClassOutput : false; + return redirect_builtin; +} + // Sets the interface member index for an access chain to a pull-model interpolant. void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length) { @@ -7261,19 +7589,26 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; case OpFMul: - if (msl_options.invariant_float_math) + if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) MSL_BFOP(spvFMul); else MSL_BOP(*); break; case OpFAdd: - if (msl_options.invariant_float_math) + if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) MSL_BFOP(spvFAdd); else MSL_BOP(+); break; + case OpFSub: + if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + MSL_BFOP(spvFSub); + else + MSL_BOP(-); + break; + // Atomics case OpAtomicExchange: { @@ -7715,7 +8050,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpVectorTimesMatrix: case OpMatrixTimesVector: { - if (!msl_options.invariant_float_math) + if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction)) { CompilerGLSL::emit_instruction(instruction); break; @@ -7757,7 +8092,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpMatrixTimesMatrix: { - if (!msl_options.invariant_float_math) + if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction)) { CompilerGLSL::emit_instruction(instruction); break; @@ -8046,20 +8381,46 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin flush_all_active_variables(); } -void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage, - StorageClass rhs_storage) +static bool storage_class_array_is_thread(StorageClass storage) +{ + switch (storage) + { + case StorageClassInput: + case StorageClassOutput: + case StorageClassGeneric: + case StorageClassFunction: + case StorageClassPrivate: + return true; + + default: + return false; + } +} + +void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id, + StorageClass lhs_storage, StorageClass rhs_storage) { // Allow Metal to use the array template to make arrays a value type. // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback. - bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction || - lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate); - bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction || - rhs_storage == StorageClassOutput || - rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate); + bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage); + bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage); + + bool lhs_is_array_template = lhs_is_thread_storage; + bool rhs_is_array_template = rhs_is_thread_storage; + + // Special considerations for stage IO variables. + // If the variable is actually backed by non-user visible device storage, we use array templates for those. + auto *lhs_var = maybe_get_backing_variable(lhs_id); + if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage)) + lhs_is_array_template = true; + + auto *rhs_var = maybe_get_backing_variable(rhs_id); + if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage)) + rhs_is_array_template = true; // If threadgroup storage qualifiers are *not* used: // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier. - if (lhs_thread && rhs_thread && !using_builtin_array()) + if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array()) { statement(lhs, " = ", to_expression(rhs_id), ";"); } @@ -8101,15 +8462,15 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla add_spv_func_and_recompile(SPVFuncImplArrayCopy); const char *tag = nullptr; - if (lhs_thread && is_constant) + if (lhs_is_thread_storage && is_constant) tag = "FromConstantToStack"; else if (lhs_storage == StorageClassWorkgroup && is_constant) tag = "FromConstantToThreadGroup"; - else if (lhs_thread && rhs_thread) + else if (lhs_is_thread_storage && rhs_is_thread_storage) tag = "FromStackToStack"; - else if (lhs_storage == StorageClassWorkgroup && rhs_thread) + else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage) tag = "FromStackToThreadGroup"; - else if (lhs_thread && rhs_storage == StorageClassWorkgroup) + else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup) tag = "FromThreadGroupToStack"; else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) tag = "FromThreadGroupToThreadGroup"; @@ -8119,19 +8480,21 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla tag = "FromConstantToDevice"; else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup) tag = "FromThreadGroupToDevice"; - else if (lhs_storage == StorageClassStorageBuffer && rhs_thread) + else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage) tag = "FromStackToDevice"; else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer) tag = "FromDeviceToThreadGroup"; - else if (lhs_thread && rhs_storage == StorageClassStorageBuffer) + else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer) tag = "FromDeviceToStack"; else SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); // Pass internal array of spvUnsafeArray<> into wrapper functions - if (lhs_thread && !msl_options.force_native_arrays) + if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);"); + if (lhs_is_array_template && !msl_options.force_native_arrays) statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); - else if (rhs_thread && !msl_options.force_native_arrays) + else if (rhs_is_array_template && !msl_options.force_native_arrays) statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); else statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); @@ -8198,8 +8561,9 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) if (p_v_lhs) flush_variable_declaration(p_v_lhs->self); - emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs), - get_expression_effective_storage_class(id_rhs)); + auto lhs_storage = get_expression_effective_storage_class(id_lhs); + auto rhs_storage = get_expression_effective_storage_class(id_rhs); + emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage); register_write(id_lhs); return true; @@ -8651,7 +9015,8 @@ void CompilerMSL::emit_interface_block(uint32_t ib_var_id) { auto &ib_var = get(ib_var_id); auto &ib_type = get_variable_data_type(ib_var); - assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); + //assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); + assert(ib_type.basetype == SPIRType::Struct); emit_struct(ib_type); } } @@ -10045,8 +10410,10 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ // we need flat arrays, but if we're somehow declaring gl_PerVertex for constant array reasons, we want // template array types to be declared. bool is_ib_in_out = - ((stage_out_var_id && get_stage_out_struct_type().self == type.self) || - (stage_in_var_id && get_stage_in_struct_type().self == type.self)); + ((stage_out_var_id && get_stage_out_struct_type().self == type.self && + variable_storage_requires_stage_io(StorageClassOutput)) || + (stage_in_var_id && get_stage_in_struct_type().self == type.self && + variable_storage_requires_stage_io(StorageClassInput))); if (is_ib_in_out && is_member_builtin(type, index, &builtin)) is_using_builtin_array = true; array_type = type_to_array_glsl(physical_type); @@ -10168,8 +10535,14 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInClipDistance: - if (has_member_decoration(type.self, index, DecorationLocation)) - return join(" [[user(clip", get_member_decoration(type.self, index, DecorationLocation), ")]]"); + if (has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + else + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + + case BuiltInCullDistance: + if (has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]"); else return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); @@ -10290,7 +10663,9 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in break; case BuiltInClipDistance: - return join(" [[user(clip", get_member_decoration(type.self, index, DecorationLocation), ")]]"); + return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + case BuiltInCullDistance: + return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]"); default: break; @@ -10482,12 +10857,16 @@ uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn auto &mbr_type = get(get(type_id).member_types[index]); uint32_t count = type_to_location_count(mbr_type); - // This should always be 1. - if (count != 1) - return k_unknown_location; - loc = 0; - while (location_inputs_in_use.count(loc) != 0) + + const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool { + for (uint32_t i = 0; i < location_count; i++) + if (location_inputs_in_use.count(location + i) != 0) + return true; + return false; + }; + + while (location_range_in_use(loc, count)) loc++; set_member_decoration(type_id, index, DecorationLocation, loc); @@ -10503,7 +10882,7 @@ uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn else builtin_to_automatic_input_location[builtin] = loc; - mark_location_as_used_by_shader(loc, mbr_type, StorageClassInput); + mark_location_as_used_by_shader(loc, mbr_type, StorageClassInput, true); return loc; } @@ -10640,7 +11019,25 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo case StorageClassOutput: if (capture_output_to_buffer) - addr_space = "device"; + { + if (var && type.storage == StorageClassOutput) + { + bool is_masked = is_stage_output_variable_masked(*var); + + if (is_masked) + { + if (is_tessellation_shader()) + addr_space = "threadgroup"; + else + addr_space = "thread"; + } + else if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) + addr_space = "threadgroup"; + } + + if (!addr_space) + addr_space = "device"; + } break; default: @@ -11416,7 +11813,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() entry_func.fixup_hooks_in.push_back([=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = min(", to_expression(builtin_invocation_id_id), ".x / ", this->get_entry_point().output_vertices, - ", spvIndirectParams[1]);"); + ", spvIndirectParams[1] - 1);"); }); break; case BuiltInPatchVertices: @@ -12003,7 +12400,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) auto &var = get(arg.id); auto &type = get_variable_data_type(var); auto &var_type = get(arg.type); - StorageClass storage = var_type.storage; + StorageClass type_storage = var_type.storage; bool is_pointer = var_type.pointer; // If we need to modify the name of the variable, make sure we use the original variable. @@ -12038,17 +12435,41 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // Allow Metal to use the array template to make arrays a value type string address_space = get_argument_address_space(var); - bool builtin = is_builtin_variable(var); + bool builtin = has_decoration(var.self, DecorationBuiltIn); auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn)); - is_using_builtin_array = builtin; + if (address_space == "threadgroup") is_using_builtin_array = true; if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl = join(cv_qualifier, type_to_glsl(type, arg.id)); else if (builtin) - decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id)); - else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) + { + // Only use templated array for Clip/Cull distance when feasible. + // In other scenarios, we need need to override array length for tess levels (if used as outputs), + // or we need to emit the expected type for builtins (uint vs int). + auto storage = get(var.basetype).storage; + + if (storage == StorageClassInput && + (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter)) + { + is_using_builtin_array = false; + } + else if (builtin_type != BuiltInClipDistance && builtin_type != BuiltInCullDistance) + { + is_using_builtin_array = true; + } + + if (storage == StorageClassOutput && variable_storage_requires_stage_io(storage) && + !is_stage_output_builtin_masked(builtin_type)) + is_using_builtin_array = true; + + if (is_using_builtin_array) + decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id)); + else + decl = join(cv_qualifier, type_to_glsl(type, arg.id)); + } + else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type)) { is_using_builtin_array = true; decl += join(cv_qualifier, type_to_glsl(type, arg.id), "*"); @@ -12072,10 +12493,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl = join(cv_qualifier, type_to_glsl(type, arg.id)); } - bool opaque_handle = storage == StorageClassUniformConstant; + bool opaque_handle = type_storage == StorageClassUniformConstant; if (!builtin && !opaque_handle && !is_pointer && - (storage == StorageClassFunction || storage == StorageClassGeneric)) + (type_storage == StorageClassFunction || type_storage == StorageClassGeneric)) { // If the argument is a pure value and not an opaque type, we will pass by value. if (msl_options.force_native_arrays && is_array(type)) @@ -12116,7 +12537,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (msl_options.argument_buffers) { uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet); - if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && + if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && descriptor_set_is_argument_buffer(desc_set)) { // An awkward case where we need to emit *more* address space declarations (yay!). @@ -12158,7 +12579,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } else { - decl += " (&"; + auto array_size_decl = type_to_array_glsl(type); + if (array_size_decl.empty()) + decl += "& "; + else + decl += " (&"; + const char *restrict_kw = to_restrict(name_id); if (*restrict_kw) { @@ -12166,8 +12592,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += restrict_kw; } decl += to_expression(name_id); - decl += ")"; - decl += type_to_array_glsl(type); + + if (!array_size_decl.empty()) + { + decl += ")"; + decl += array_size_decl; + } } } else if (!opaque_handle && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) @@ -12612,8 +13042,10 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id) { string quals; + auto *var = maybe_get(id); auto &type = expression_type(id); - if (type.storage == StorageClassWorkgroup) + + if (type.storage == StorageClassWorkgroup || (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))) quals += "threadgroup "; return quals; @@ -12794,18 +13226,55 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type) } } -// Threadgroup arrays can't have a wrapper type +bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const +{ + if (variable.storage == storage) + return true; + + if (storage == StorageClassWorkgroup) + { + auto model = get_execution_model(); + + // Specially masked IO block variable. + // Normally, we will never access IO blocks directly here. + // The only scenario which that should occur is with a masked IO block. + if (model == ExecutionModelTessellationControl && variable.storage == StorageClassOutput && + has_decoration(get(variable.basetype).self, DecorationBlock)) + { + return true; + } + + return variable.storage == StorageClassOutput && + model == ExecutionModelTessellationControl && + is_stage_output_variable_masked(variable); + } + else if (storage == StorageClassStorageBuffer) + { + // We won't be able to catch writes to control point outputs here since variable + // refers to a function local pointer. + // This is fine, as there cannot be concurrent writers to that memory anyways, + // so we just ignore that case. + + return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) && + !variable_storage_requires_stage_io(variable.storage) && + (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable)); + } + else + { + return false; + } +} + std::string CompilerMSL::variable_decl(const SPIRVariable &variable) { - if (variable.storage == StorageClassWorkgroup) - { + bool old_is_using_builtin_array = is_using_builtin_array; + + // Threadgroup arrays can't have a wrapper type. + if (variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) is_using_builtin_array = true; - } + std::string expr = CompilerGLSL::variable_decl(variable); - if (variable.storage == StorageClassWorkgroup) - { - is_using_builtin_array = false; - } + is_using_builtin_array = old_is_using_builtin_array; return expr; } @@ -13372,7 +13841,6 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) { switch (builtin) { - // Handle HLSL-style 0-based vertex/instance index. // Override GLSL compiler strictness case BuiltInVertexId: @@ -13505,9 +13973,9 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInSampleMask: if (get_execution_model() == ExecutionModelTessellationControl) break; - if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && + !is_stage_output_builtin_masked(builtin)) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); - break; case BuiltInBaryCoordNV: @@ -13517,31 +13985,21 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) break; case BuiltInTessLevelOuter: - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (get_execution_model() == ExecutionModelTessellationControl && + storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) { - if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && - current_function && (current_function->self == ir.default_entry_point)) - return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); - else - break; - } - if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "].edgeTessellationFactor"); + } break; case BuiltInTessLevelInner: - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (get_execution_model() == ExecutionModelTessellationControl && + storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) { - if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && - current_function && (current_function->self == ir.default_entry_point)) - return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); - else - break; - } - if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "].insideTessellationFactor"); + } break; default: @@ -13795,6 +14253,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) // Vertex function out case BuiltInClipDistance: + case BuiltInCullDistance: return "float"; case BuiltInPointSize: return "float"; @@ -14414,9 +14873,11 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o return SPVFuncImplMod; case OpFAdd: - if (compiler.msl_options.invariant_float_math) + case OpFSub: + if (compiler.msl_options.invariant_float_math || + compiler.has_decoration(args[1], DecorationNoContraction)) { - return SPVFuncImplFAdd; + return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub; } break; @@ -14425,7 +14886,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpMatrixTimesVector: case OpVectorTimesMatrix: case OpMatrixTimesMatrix: - if (compiler.msl_options.invariant_float_math) + if (compiler.msl_options.invariant_float_math || + compiler.has_decoration(args[1], DecorationNoContraction)) { return SPVFuncImplFMul; } @@ -14638,30 +15100,23 @@ void CompilerMSL::MemberSorter::sort() } } -// Sort first by builtin status (put builtins at end), then by the sorting aspect. bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2) { auto &mbr_meta1 = meta.members[mbr_idx1]; auto &mbr_meta2 = meta.members[mbr_idx2]; - if (mbr_meta1.builtin != mbr_meta2.builtin) - return mbr_meta2.builtin; - else - switch (sort_aspect) - { - case Location: + + if (sort_aspect == LocationThenBuiltInType) + { + // Sort first by builtin status (put builtins at end), then by the sorting aspect. + if (mbr_meta1.builtin != mbr_meta2.builtin) + return mbr_meta2.builtin; + else if (mbr_meta1.builtin) + return mbr_meta1.builtin_type < mbr_meta2.builtin_type; + else return mbr_meta1.location < mbr_meta2.location; - case LocationReverse: - return mbr_meta1.location > mbr_meta2.location; - case Offset: - return mbr_meta1.offset < mbr_meta2.offset; - case OffsetThenLocationReverse: - return (mbr_meta1.offset < mbr_meta2.offset) || - ((mbr_meta1.offset == mbr_meta2.offset) && (mbr_meta1.location > mbr_meta2.location)); - case Alphabetical: - return mbr_meta1.alias < mbr_meta2.alias; - default: - return false; - } + } + else + return mbr_meta1.offset < mbr_meta2.offset; } CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) @@ -15094,10 +15549,63 @@ void CompilerMSL::analyze_argument_buffers() }); uint32_t member_index = 0; + uint32_t next_arg_buff_index = 0; for (auto &resource : resources) { auto &var = *resource.var; auto &type = get_variable_data_type(var); + + // If needed, synthesize and add padding members. + // member_index and next_arg_buff_index are incremented when padding members are added. + if (msl_options.pad_argument_buffer_resources) + { + while (resource.index > next_arg_buff_index) + { + auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); + switch (rez_bind.basetype) + { + case SPIRType::Void: + case SPIRType::Boolean: + case SPIRType::SByte: + case SPIRType::UByte: + case SPIRType::Short: + case SPIRType::UShort: + case SPIRType::Int: + case SPIRType::UInt: + case SPIRType::Int64: + case SPIRType::UInt64: + case SPIRType::AtomicCounter: + case SPIRType::Half: + case SPIRType::Float: + case SPIRType::Double: + add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::Image: + add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::Sampler: + add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::SampledImage: + if (next_arg_buff_index == rez_bind.msl_sampler) + add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + else + add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + default: + break; + } + } + + // Adjust the number of slots consumed by current member itself. + // If actual member is an array, allow runtime array resolution as well. + uint32_t elem_cnt = type.array.empty() ? 1 : to_array_size_literal(type); + if (elem_cnt == 0) + elem_cnt = get_resource_array_size(var.self); + + next_arg_buff_index += elem_cnt; + } + string mbr_name = ensure_valid_name(resource.name, "m"); if (resource.plane > 0) mbr_name += join(plane_name_suffix, resource.plane); @@ -15196,6 +15704,125 @@ void CompilerMSL::analyze_argument_buffers() } } +// Return the resource type of the app-provided resources for the descriptor set, +// that matches the resource index of the argument buffer index. +// This is a two-step lookup, first lookup the resource binding number from the argument buffer index, +// then lookup the resource binding using the binding number. +MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) +{ + auto stage = get_entry_point().model; + StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx }; + auto arg_itr = resource_arg_buff_idx_to_binding_number.find(arg_idx_tuple); + if (arg_itr != end(resource_arg_buff_idx_to_binding_number)) + { + StageSetBinding bind_tuple = { stage, desc_set, arg_itr->second }; + auto bind_itr = resource_bindings.find(bind_tuple); + if (bind_itr != end(resource_bindings)) + return bind_itr->second.first; + } + SPIRV_CROSS_THROW("Argument buffer resource base type could not be determined. When padding argument buffer " + "elements, all descriptor set resources must be supplied with a base type by the app."); +} + +// Adds an argument buffer padding argument buffer type as one or more members of the struct type at the member index. +// Metal does not support arrays of buffers, so these are emitted as multiple struct members. +void CompilerMSL::add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, MSLResourceBinding &rez_bind) +{ + if (!argument_buffer_padding_buffer_type_id) + { + uint32_t buff_type_id = ir.increase_bound_by(2); + auto &buff_type = set(buff_type_id); + buff_type.basetype = rez_bind.basetype; + buff_type.storage = StorageClassUniformConstant; + + uint32_t ptr_type_id = buff_type_id + 1; + auto &ptr_type = set(ptr_type_id); + ptr_type = buff_type; + ptr_type.pointer = true; + ptr_type.pointer_depth++; + ptr_type.parent_type = buff_type_id; + + argument_buffer_padding_buffer_type_id = ptr_type_id; + } + + for (uint32_t rez_idx = 0; rez_idx < rez_bind.count; rez_idx++) + add_argument_buffer_padding_type(argument_buffer_padding_buffer_type_id, struct_type, mbr_idx, arg_buff_index, 1); +} + +// Adds an argument buffer padding argument image type as a member of the struct type at the member index. +void CompilerMSL::add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, MSLResourceBinding &rez_bind) +{ + if (!argument_buffer_padding_image_type_id) + { + uint32_t base_type_id = ir.increase_bound_by(2); + auto &base_type = set(base_type_id); + base_type.basetype = SPIRType::Float; + base_type.width = 32; + + uint32_t img_type_id = base_type_id + 1; + auto &img_type = set(img_type_id); + img_type.basetype = SPIRType::Image; + img_type.storage = StorageClassUniformConstant; + + img_type.image.type = base_type_id; + img_type.image.dim = Dim2D; + img_type.image.depth = false; + img_type.image.arrayed = false; + img_type.image.ms = false; + img_type.image.sampled = 1; + img_type.image.format = ImageFormatUnknown; + img_type.image.access = AccessQualifierMax; + + argument_buffer_padding_image_type_id = img_type_id; + } + + add_argument_buffer_padding_type(argument_buffer_padding_image_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count); +} + +// Adds an argument buffer padding argument sampler type as a member of the struct type at the member index. +void CompilerMSL::add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, MSLResourceBinding &rez_bind) +{ + if (!argument_buffer_padding_sampler_type_id) + { + uint32_t samp_type_id = ir.increase_bound_by(1); + auto &samp_type = set(samp_type_id); + samp_type.basetype = SPIRType::Sampler; + samp_type.storage = StorageClassUniformConstant; + + argument_buffer_padding_sampler_type_id = samp_type_id; + } + + add_argument_buffer_padding_type(argument_buffer_padding_sampler_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count); +} + +// Adds the argument buffer padding argument type as a member of the struct type at the member index. +// Advances both arg_buff_index and mbr_idx to next argument slots. +void CompilerMSL::add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, uint32_t count) +{ + uint32_t type_id = mbr_type_id; + if (count > 1) + { + uint32_t ary_type_id = ir.increase_bound_by(1); + auto &ary_type = set(ary_type_id); + ary_type = get(type_id); + ary_type.array.push_back(count); + ary_type.array_size_literal.push_back(true); + ary_type.parent_type = type_id; + type_id = ary_type_id; + } + + set_member_name(struct_type.self, mbr_idx, join("_m", arg_buff_index, "_pad")); + set_extended_member_decoration(struct_type.self, mbr_idx, SPIRVCrossDecorationResourceIndexPrimary, arg_buff_index); + struct_type.member_types.push_back(type_id); + + arg_buff_index += count; + mbr_idx++; +} + void CompilerMSL::activate_argument_buffer_resources() { // For ABI compatibility, force-enable all resources which are part of argument buffers. diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index 52e96761e..95e0c966c 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -71,15 +71,23 @@ struct MSLShaderInput // resources consumed by this binding, if the binding represents an array of resources. // If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value // will be used to declare the array size in MSL, which does not support run-time-sized arrays. -// For resources that are not held in a run-time-sized array, the count field does not need to be populated. +// If pad_argument_buffer_resources is enabled, the base_type and count values are used to +// specify the base type and array size of the resource in the argument buffer, if that resource +// is not defined and used by the shader. With pad_argument_buffer_resources enabled, this +// information will be used to pad the argument buffer structure, in order to align that +// structure consistently for all uses, across all shaders, of the descriptor set represented +// by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not +// need to be populated, and if the resource is also not a run-time sized array, the count +// field does not need to be populated. // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. -// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a -// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. +// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will +// become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. struct MSLResourceBinding { spv::ExecutionModel stage = spv::ExecutionModelMax; + SPIRType::BaseType basetype = SPIRType::Unknown; uint32_t desc_set = 0; uint32_t binding = 0; uint32_t count = 0; @@ -346,6 +354,19 @@ public: // and would otherwise declare a different IAB. bool force_active_argument_buffer_resources = false; + // Aligns each resource in an argument buffer to its assigned index value, id(N), + // by adding synthetic padding members in the argument buffer struct for any resources + // in the argument buffer that are not defined and used by the shader. This allows + // the shader to index into the correct argument in a descriptor set argument buffer + // that is shared across shaders, where not all resources in the argument buffer are + // defined in each shader. For this to work, an MSLResourceBinding must be provided for + // all descriptors in any descriptor set held in an argument buffer in the shader, and + // that MSLResourceBinding must have the basetype and count members populated correctly. + // The implementation here assumes any inline blocks in the argument buffer is provided + // in a Metal buffer, and doesn't take into consideration inline blocks that are + // optionally embedded directly into the argument buffer via add_inline_uniform_block(). + bool pad_argument_buffer_resources = false; + // Forces the use of plain arrays, which works around certain driver bugs on certain versions // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. // May reduce performance in scenarios where arrays are copied around as value-types. @@ -634,6 +655,7 @@ protected: SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations SPVFuncImplFMul, SPVFuncImplFAdd, + SPVFuncImplFSub, SPVFuncImplCubemapTo2DArrayFace, SPVFuncImplUnsafeArray, // Allow Metal to use the array template to make arrays a value type SPVFuncImplInverse4x4, @@ -715,6 +737,8 @@ protected: // Threadgroup arrays can't have a wrapper type std::string variable_decl(const SPIRVariable &variable) override; + bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; + // GCC workaround of lambdas calling protected functions (for older GCC versions) std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; @@ -780,8 +804,11 @@ protected: }; std::unordered_map location_meta; bool strip_array = false; + bool allow_local_declaration = false; }; + std::string to_tesc_invocation_id(); + void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array); void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, @@ -794,14 +821,15 @@ protected: void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var, uint32_t index, InterfaceBlockMeta &meta); - uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array); void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); - void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, spv::StorageClass storage); + void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, + spv::StorageClass storage, bool fallback = false); uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); - uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components = 0); + uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, + uint32_t num_components, bool strip_array); void emit_custom_templates(); void emit_custom_functions(); @@ -886,8 +914,8 @@ protected: void add_pragma_line(const std::string &line); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); - void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, - spv::StorageClass rhs_storage) override; + void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; void build_implicit_builtins(); uint32_t build_constant_uint_array_pointer(); void emit_entry_point_declarations() override; @@ -913,6 +941,9 @@ protected: uint32_t view_mask_buffer_id = 0; uint32_t dynamic_offsets_buffer_id = 0; uint32_t uint_type_id = 0; + uint32_t argument_buffer_padding_buffer_type_id = 0; + uint32_t argument_buffer_padding_image_type_id = 0; + uint32_t argument_buffer_padding_sampler_type_id = 0; bool does_shader_write_sample_mask = false; @@ -922,6 +953,7 @@ protected: void analyze_sampled_image_usage(); + bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed) override; void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); @@ -941,6 +973,7 @@ protected: std::map inputs_by_location; std::unordered_map inputs_by_builtin; std::unordered_set location_inputs_in_use; + std::unordered_set location_inputs_in_use_fallback; std::unordered_map fragment_output_components; std::unordered_map builtin_to_automatic_input_location; std::set pragma_lines; @@ -948,7 +981,7 @@ protected: SmallVector vars_needing_early_declaration; std::unordered_map, InternalHasher> resource_bindings; - uint32_t type_to_location_count(const SPIRType &type) const; + std::unordered_map resource_arg_buff_idx_to_binding_number; uint32_t next_metal_resource_index_buffer = 0; uint32_t next_metal_resource_index_texture = 0; @@ -962,6 +995,7 @@ protected: VariableID patch_stage_out_var_id = 0; VariableID stage_in_ptr_var_id = 0; VariableID stage_out_ptr_var_id = 0; + VariableID stage_out_masked_builtin_type_id = 0; // Handle HLSL-style 0-based vertex/instance index. enum class TriState @@ -1027,6 +1061,11 @@ protected: void analyze_argument_buffers(); bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; + MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx); + void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); + void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); + void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); + void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); uint32_t get_target_components_for_fragment_location(uint32_t location) const; uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, @@ -1044,6 +1083,8 @@ protected: bool type_is_pointer_to_pointer(const SPIRType &type) const; bool is_supported_argument_buffer_type(const SPIRType &type) const; + bool variable_storage_requires_stage_io(spv::StorageClass storage) const; + // OpcodeHandler that handles several MSL preprocessing operations. struct OpCodePreprocessor : OpcodeHandler { @@ -1087,11 +1128,8 @@ protected: { enum SortAspect { - Location, - LocationReverse, - Offset, - OffsetThenLocationReverse, - Alphabetical + LocationThenBuiltInType, + Offset }; void sort();