diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index 0cdbda23d..51fffe7a1 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -220,7 +220,7 @@ static inline std::string convert_to_string(int32_t value) // INT_MIN is ... special on some backends. If we use a decimal literal, and negate it, we // could accidentally promote the literal to long first, then negate. // To workaround it, emit int(0x80000000) instead. - if (value == std::numeric_limits::min()) + if (value == (std::numeric_limits::min)()) return "int(0x80000000)"; else return std::to_string(value); @@ -231,7 +231,7 @@ static inline std::string convert_to_string(int64_t value, const std::string &in // INT64_MIN is ... special on some backends. // If we use a decimal literal, and negate it, we might overflow the representable numbers. // To workaround it, emit int(0x80000000) instead. - if (value == std::numeric_limits::min()) + if (value == (std::numeric_limits::min)()) return join(int64_type, "(0x8000000000000000u", (long_long_literal_suffix ? "ll" : "l"), ")"); else return std::to_string(value) + (long_long_literal_suffix ? "ll" : "l"); diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 4ab985efd..3f0fec334 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -640,6 +640,13 @@ bool Compiler::is_physical_pointer(const SPIRType &type) const return type.op == OpTypePointer && type.storage == StorageClassPhysicalStorageBuffer; } +bool Compiler::is_physical_pointer_to_buffer_block(const SPIRType &type) const +{ + return is_physical_pointer(type) && get_pointee_type(type).self == type.parent_type && + (has_decoration(type.self, DecorationBlock) || + has_decoration(type.self, DecorationBufferBlock)); +} + bool Compiler::is_runtime_size_array(const SPIRType &type) { return type.op == OpTypeRuntimeArray; @@ -5024,8 +5031,7 @@ void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const { auto &type = compiler.get(type_id); - return type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && - type.pointer_depth == 1 && !compiler.type_is_array_of_pointers(type); + return compiler.is_physical_pointer(type); } uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const @@ -5055,7 +5061,8 @@ void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t ty access_chain_to_physical_block[var_id] = &meta; auto &type = compiler.get(type_id); - if (type.basetype != SPIRType::Struct) + + if (!compiler.is_physical_pointer_to_buffer_block(type)) non_block_types.insert(type_id); if (meta.alignment == 0) @@ -5114,9 +5121,7 @@ bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const { auto *type = &compiler.get(type_id); - while (type->pointer && - type->storage == StorageClassPhysicalStorageBufferEXT && - !type_is_bda_block_entry(type_id)) + while (compiler.is_physical_pointer(*type) && !type_is_bda_block_entry(type_id)) { type_id = type->parent_type; type = &compiler.get(type_id); @@ -5131,12 +5136,10 @@ void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from for (auto &member : type.member_types) { auto &subtype = compiler.get(member); - if (subtype.basetype != SPIRType::Struct && subtype.pointer && - subtype.storage == spv::StorageClassPhysicalStorageBufferEXT) - { + + if (compiler.is_physical_pointer(subtype) && !compiler.is_physical_pointer_to_buffer_block(subtype)) non_block_types.insert(get_base_non_block_type_id(member)); - } - else if (subtype.basetype == SPIRType::Struct && !subtype.pointer) + else if (subtype.basetype == SPIRType::Struct && !compiler.is_pointer(subtype)) analyze_non_block_types_from_block(subtype); } } @@ -5149,9 +5152,14 @@ void Compiler::analyze_non_block_pointer_types() // Analyze any block declaration we have to make. It might contain // physical pointers to POD types which we never used, and thus never added to the list. // We'll need to add those pointer types to the set of types we declare. - ir.for_each_typed_id([&](uint32_t, SPIRType &type) { - if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + ir.for_each_typed_id([&](uint32_t id, SPIRType &type) { + // Only analyze the raw block struct, not any pointer-to-struct, since that's just redundant. + if (type.self == id && + (has_decoration(type.self, DecorationBlock) || + has_decoration(type.self, DecorationBufferBlock))) + { handler.analyze_non_block_types_from_block(type); + } }); physical_storage_non_block_pointer_types.reserve(handler.non_block_types.size()); diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index 9fe6c41c4..2ba602502 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -685,6 +685,7 @@ protected: bool is_array(const SPIRType &type) const; bool is_pointer(const SPIRType &type) const; bool is_physical_pointer(const SPIRType &type) const; + bool is_physical_pointer_to_buffer_block(const SPIRType &type) const; static bool is_runtime_size_array(const SPIRType &type); uint32_t expression_type_id(uint32_t id) const; const SPIRType &expression_type(uint32_t id) const; diff --git a/3rdparty/spirv-cross/spirv_cross_containers.hpp b/3rdparty/spirv-cross/spirv_cross_containers.hpp index e79b32093..c496cb75b 100644 --- a/3rdparty/spirv-cross/spirv_cross_containers.hpp +++ b/3rdparty/spirv-cross/spirv_cross_containers.hpp @@ -576,6 +576,7 @@ public: if (!ptr) return nullptr; + vacants.reserve(num_objects); for (unsigned i = 0; i < num_objects; i++) vacants.push_back(&ptr[i]); diff --git a/3rdparty/spirv-cross/spirv_cross_error_handling.hpp b/3rdparty/spirv-cross/spirv_cross_error_handling.hpp index e96ebb9a7..91e6cf4f8 100644 --- a/3rdparty/spirv-cross/spirv_cross_error_handling.hpp +++ b/3rdparty/spirv-cross/spirv_cross_error_handling.hpp @@ -66,6 +66,11 @@ public: : std::runtime_error(str) { } + + explicit CompilerError(const char *str) + : std::runtime_error(str) + { + } }; #define SPIRV_CROSS_THROW(x) throw CompilerError(x) diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 0efc44aee..0d37ba2e8 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -1768,7 +1768,9 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin for (uint32_t i = 0; i < type.member_types.size(); i++) { auto &memb_type = get(type.member_types[i]); - auto member_flags = ir.meta[type.self].members[i].decoration_flags; + + auto *type_meta = ir.find_meta(type.self); + auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{}; // Verify alignment rules. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); @@ -2145,11 +2147,11 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) // If SPIR-V does not comply with either layout, we cannot really work around it. if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) { - attr.push_back(buffer_to_packing_standard(type, false)); + attr.push_back(buffer_to_packing_standard(type, false, true)); } else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) { - attr.push_back(buffer_to_packing_standard(type, true)); + attr.push_back(buffer_to_packing_standard(type, true, true)); } // For images, the type itself adds a layout qualifer. @@ -2170,7 +2172,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) return res; } -string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout) +string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, + bool support_std430_without_scalar_layout, + bool support_enhanced_layouts) { if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) return "std430"; @@ -2182,6 +2186,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo return "scalar"; } else if (support_std430_without_scalar_layout && + support_enhanced_layouts && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { if (options.es && !options.vulkan_semantics) @@ -2193,7 +2198,8 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std430"; } - else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + else if (support_enhanced_layouts && + buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) { // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. @@ -2207,7 +2213,9 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std140"; } - else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) + else if (options.vulkan_semantics && + support_enhanced_layouts && + buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) { set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); @@ -2221,6 +2229,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo return "std430"; } else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + support_enhanced_layouts && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. @@ -2319,7 +2328,7 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de auto &type = get(type_id); string buffer_name; - if (forward_declaration) + if (forward_declaration && is_physical_pointer_to_buffer_block(type)) { // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. @@ -2352,10 +2361,10 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de // Ensure we emit the correct name when emitting non-forward pointer type. ir.meta[type.self].decoration.alias = buffer_name; } - else if (type.basetype != SPIRType::Struct) - buffer_name = type_to_glsl(type); else - buffer_name = to_name(type.self, false); + { + buffer_name = type_to_glsl(type); + } if (!forward_declaration) { @@ -2364,13 +2373,13 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de if (itr != physical_storage_type_to_alignment.end()) alignment = itr->second.alignment; - if (type.basetype == SPIRType::Struct) + if (is_physical_pointer_to_buffer_block(type)) { SmallVector attributes; attributes.push_back("buffer_reference"); if (alignment) attributes.push_back(join("buffer_reference_align = ", alignment)); - attributes.push_back(buffer_to_packing_standard(type, true)); + attributes.push_back(buffer_to_packing_standard(type, true, true)); auto flags = ir.get_buffer_block_type_flags(type); string decorations; @@ -2385,14 +2394,32 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name); } - else if (alignment) - statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name); else - statement("layout(buffer_reference) buffer ", buffer_name); + { + string packing_standard; + if (type.basetype == SPIRType::Struct) + { + // The non-block type is embedded in a block, so we cannot use enhanced layouts :( + packing_standard = buffer_to_packing_standard(type, true, false) + ", "; + } + else if (is_array(get_pointee_type(type))) + { + SPIRType wrap_type{OpTypeStruct}; + wrap_type.self = ir.increase_bound_by(1); + wrap_type.member_types.push_back(get_pointee_type_id(type_id)); + ir.set_member_decoration(wrap_type.self, 0, DecorationOffset, 0); + packing_standard = buffer_to_packing_standard(wrap_type, true, false) + ", "; + } + + if (alignment) + statement("layout(", packing_standard, "buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name); + else + statement("layout(", packing_standard, "buffer_reference) buffer ", buffer_name); + } begin_scope(); - if (type.basetype == SPIRType::Struct) + if (is_physical_pointer_to_buffer_block(type)) { type.member_name_cache.clear(); @@ -3705,31 +3732,34 @@ void CompilerGLSL::emit_resources() if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { - for (auto type : physical_storage_non_block_pointer_types) - { - emit_buffer_reference_block(type, false); - } - // Output buffer reference blocks. // Do this in two stages, one with forward declaration, // and one without. Buffer reference blocks can reference themselves // to support things like linked lists. - ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { - if (type.basetype == SPIRType::Struct && type.pointer && - type.pointer_depth == 1 && !type_is_array_of_pointers(type) && - type.storage == StorageClassPhysicalStorageBufferEXT) + ir.for_each_typed_id([&](uint32_t id, SPIRType &type) { + if (is_physical_pointer(type)) { - emit_buffer_reference_block(self, true); + bool emit_type = true; + if (!is_physical_pointer_to_buffer_block(type)) + { + // Only forward-declare if we intend to emit it in the non_block_pointer types. + // Otherwise, these are just "benign" pointer types that exist as a result of access chains. + emit_type = std::find(physical_storage_non_block_pointer_types.begin(), + physical_storage_non_block_pointer_types.end(), + id) != physical_storage_non_block_pointer_types.end(); + } + + if (emit_type) + emit_buffer_reference_block(id, true); } }); - ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { - if (type.basetype == SPIRType::Struct && - type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && - type.storage == StorageClassPhysicalStorageBufferEXT) - { - emit_buffer_reference_block(self, false); - } + for (auto type : physical_storage_non_block_pointer_types) + emit_buffer_reference_block(type, false); + + ir.for_each_typed_id([&](uint32_t id, SPIRType &type) { + if (is_physical_pointer_to_buffer_block(type)) + emit_buffer_reference_block(id, false); }); } @@ -5011,11 +5041,8 @@ string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std return expr.substr(1); else if (backend.native_pointers) return join('*', expr); - else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && - expr_type.pointer_depth == 1) - { + else if (is_physical_pointer(expr_type) && !is_physical_pointer_to_buffer_block(expr_type)) return join(enclose_expression(expr), ".value"); - } else return expr; } @@ -15695,17 +15722,29 @@ string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) // depend on a specific object's use of that type. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) { - if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type)) { // Need to create a magic type name which compacts the entire type information. - string name = type_to_glsl(get_pointee_type(type)); - for (size_t i = 0; i < type.array.size(); i++) + auto *parent = &get_pointee_type(type); + string name = type_to_glsl(*parent); + + uint32_t array_stride = get_decoration(type.parent_type, DecorationArrayStride); + + // Resolve all array dimensions in one go since once we lose the pointer type, + // array information is left to to_array_type_glsl. The base type loses array information. + while (is_array(*parent)) { - if (type.array_size_literal[i]) - name += join(type.array[i], "_"); + if (parent->array_size_literal.back()) + name += join(type.array.back(), "_"); else - name += join("id", type.array[i], "_"); + name += join("id", type.array.back(), "_"); + + name += "stride_" + std::to_string(array_stride); + + array_stride = get_decoration(parent->parent_type, DecorationArrayStride); + parent = &get(parent->parent_type); } + name += "Pointer"; return name; } diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 4cba0bb63..3ce044e39 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -833,7 +833,9 @@ protected: bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t *failed_index = nullptr, uint32_t start_offset = 0, uint32_t end_offset = ~(0u)); - std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout); + std::string buffer_to_packing_standard(const SPIRType &type, + bool support_std430_without_scalar_layout, + bool support_enhanced_layouts); uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing); uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index de81738e7..e73751d7f 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -1938,10 +1938,14 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // When using the pointer, we need to know which variable it is actually loaded from. uint32_t base_id = ops[2]; auto *var = maybe_get_backing_variable(base_id); - if (var && atomic_image_vars_emulated.count(var->self)) + if (var) { - if (!get(var->basetype).array.empty()) - SPIRV_CROSS_THROW("Cannot emulate array of storage images with atomics. Use MSL 3.1 for native support."); + if (atomic_image_vars_emulated.count(var->self) && + !get(var->basetype).array.empty()) + { + SPIRV_CROSS_THROW( + "Cannot emulate array of storage images with atomics. Use MSL 3.1 for native support."); + } if (global_var_ids.find(base_id) != global_var_ids.end()) added_arg_ids.insert(base_id); @@ -7412,19 +7416,28 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplVariableDescriptorArray: - statement("template"); - statement("struct spvDescriptorArray"); - begin_scope(); - statement("spvDescriptorArray(const device spvDescriptor* ptr) : ptr(ptr)"); - begin_scope(); - end_scope(); - statement("const device T& operator [] (size_t i) const"); - begin_scope(); - statement("return ptr[i].value;"); - end_scope(); - statement("const device spvDescriptor* ptr;"); - end_scope_decl(); - statement(""); + if (spv_function_implementations.count(SPVFuncImplVariableDescriptor) != 0) + { + statement("template"); + statement("struct spvDescriptorArray"); + begin_scope(); + statement("spvDescriptorArray(const device spvDescriptor* ptr) : ptr(ptr)"); + begin_scope(); + end_scope(); + statement("const device T& operator [] (size_t i) const"); + begin_scope(); + statement("return ptr[i].value;"); + end_scope(); + statement("const device spvDescriptor* ptr;"); + end_scope_decl(); + statement(""); + } + else + { + statement("template"); + statement("struct spvDescriptorArray;"); + statement(""); + } if (msl_options.runtime_array_rich_descriptor && spv_function_implementations.count(SPVFuncImplVariableSizedDescriptor) != 0) @@ -7458,6 +7471,22 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplReduceAdd: + // Metal doesn't support __builtin_reduce_add or simd_reduce_add, so we need this. + // Metal also doesn't support the other vector builtins, which would have been useful to make this a single template. + + statement("template "); + statement("T reduce_add(vec v) { return v.x + v.y; }"); + + statement("template "); + statement("T reduce_add(vec v) { return v.x + v.y + v.z; }"); + + statement("template "); + statement("T reduce_add(vec v) { return v.x + v.y + v.z + v.w; }"); + + statement(""); + break; + default: break; } @@ -9641,6 +9670,132 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpSDot: + case OpUDot: + case OpSUDot: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec1 = ops[2]; + uint32_t vec2 = ops[3]; + + auto &input_type1 = expression_type(vec1); + auto &input_type2 = expression_type(vec2); + + string vec1input, vec2input; + auto input_size = input_type1.vecsize; + if (instruction.length == 5) + { + if (ops[4] == PackedVectorFormatPackedVectorFormat4x8Bit) + { + string type = opcode == OpSDot || opcode == OpSUDot ? "char4" : "uchar4"; + vec1input = join("as_type<", type, ">(", to_expression(vec1), ")"); + type = opcode == OpSDot ? "char4" : "uchar4"; + vec2input = join("as_type<", type, ">(", to_expression(vec2), ")"); + input_size = 4; + } + else + SPIRV_CROSS_THROW("Packed vector formats other than 4x8Bit for integer dot product is not supported."); + } + else + { + // Inputs are sign or zero-extended to their target width. + SPIRType::BaseType vec1_expected_type = + opcode != OpUDot ? + to_signed_basetype(input_type1.width) : + to_unsigned_basetype(input_type1.width); + + SPIRType::BaseType vec2_expected_type = + opcode != OpSDot ? + to_unsigned_basetype(input_type2.width) : + to_signed_basetype(input_type2.width); + + vec1input = bitcast_expression(vec1_expected_type, vec1); + vec2input = bitcast_expression(vec2_expected_type, vec2); + } + + auto &type = get(result_type); + + // We'll get the appropriate sign-extend or zero-extend, no matter which type we cast to here. + // The addition in reduce_add is sign-invariant. + auto result_type_cast = join(type_to_glsl(type), input_size); + + string exp = join("reduce_add(", + result_type_cast, "(", vec1input, ") * ", + result_type_cast, "(", vec2input, "))"); + + emit_op(result_type, id, exp, should_forward(vec1) && should_forward(vec2)); + inherit_expression_dependencies(id, vec1); + inherit_expression_dependencies(id, vec2); + break; + } + + case OpSDotAccSat: + case OpUDotAccSat: + case OpSUDotAccSat: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec1 = ops[2]; + uint32_t vec2 = ops[3]; + uint32_t acc = ops[4]; + + auto input_type1 = expression_type(vec1); + auto input_type2 = expression_type(vec2); + + string vec1input, vec2input; + if (instruction.length == 6) + { + if (ops[5] == PackedVectorFormatPackedVectorFormat4x8Bit) + { + string type = opcode == OpSDotAccSat || opcode == OpSUDotAccSat ? "char4" : "uchar4"; + vec1input = join("as_type<", type, ">(", to_expression(vec1), ")"); + type = opcode == OpSDotAccSat ? "char4" : "uchar4"; + vec2input = join("as_type<", type, ">(", to_expression(vec2), ")"); + input_type1.vecsize = 4; + input_type2.vecsize = 4; + } + else + SPIRV_CROSS_THROW("Packed vector formats other than 4x8Bit for integer dot product is not supported."); + } + else + { + // Inputs are sign or zero-extended to their target width. + SPIRType::BaseType vec1_expected_type = + opcode != OpUDotAccSat ? + to_signed_basetype(input_type1.width) : + to_unsigned_basetype(input_type1.width); + + SPIRType::BaseType vec2_expected_type = + opcode != OpSDotAccSat ? + to_unsigned_basetype(input_type2.width) : + to_signed_basetype(input_type2.width); + + vec1input = bitcast_expression(vec1_expected_type, vec1); + vec2input = bitcast_expression(vec2_expected_type, vec2); + } + + auto &type = get(result_type); + + SPIRType::BaseType pre_saturate_type = + opcode != OpUDotAccSat ? + to_signed_basetype(type.width) : + to_unsigned_basetype(type.width); + + input_type1.basetype = pre_saturate_type; + input_type2.basetype = pre_saturate_type; + + string exp = join(type_to_glsl(type), "(addsat(reduce_add(", + type_to_glsl(input_type1), "(", vec1input, ") * ", + type_to_glsl(input_type2), "(", vec2input, ")), ", + bitcast_expression(pre_saturate_type, acc), "))"); + + emit_op(result_type, id, exp, should_forward(vec1) && should_forward(vec2)); + inherit_expression_dependencies(id, vec1); + inherit_expression_dependencies(id, vec2); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; @@ -10052,6 +10207,9 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, // Emulate texture2D atomic operations if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) { + auto &flags = ir.get_decoration_bitset(var->self); + if (decoration_flags_signal_volatile(flags)) + exp += "volatile "; exp += "device"; } else @@ -12708,6 +12866,11 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) return get_type_address_space(type, argument.self, true); } +bool CompilerMSL::decoration_flags_signal_volatile(const Bitset &flags) +{ + return flags.get(DecorationVolatile) || flags.get(DecorationCoherent); +} + string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) { // This can be called for variable pointer contexts as well, so be very careful about which method we choose. @@ -12817,7 +12980,7 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : ""; } - return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space); + return join(decoration_flags_signal_volatile(flags) ? "volatile " : "", addr_space); } const char *CompilerMSL::to_restrict(uint32_t id, bool space) @@ -13551,7 +13714,9 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) // Emulate texture2D atomic operations if (atomic_image_vars_emulated.count(var.self)) { - ep_args += ", device atomic_" + type_to_glsl(get(basetype.image.type), 0); + auto &flags = ir.get_decoration_bitset(var.self); + const char *cv_flags = decoration_flags_signal_volatile(flags) ? "volatile " : ""; + ep_args += join(", ", cv_flags, "device atomic_", type_to_glsl(get(basetype.image.type), 0)); ep_args += "* " + r.name + "_atomic"; ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")"; if (interlocked_resources.count(var_id)) @@ -14646,7 +14811,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) auto *backing_var = maybe_get_backing_variable(name_id); if (backing_var && atomic_image_vars_emulated.count(backing_var->self)) { - decl += ", device atomic_" + type_to_glsl(get(var_type.image.type), 0); + auto &flags = ir.get_decoration_bitset(backing_var->self); + const char *cv_flags = decoration_flags_signal_volatile(flags) ? "volatile " : ""; + decl += join(", ", cv_flags, "device atomic_", type_to_glsl(get(var_type.image.type), 0)); decl += "* " + to_expression(name_id) + "_atomic"; } @@ -17266,6 +17433,14 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpGroupNonUniformQuadSwap: return SPVFuncImplQuadSwap; + case OpSDot: + case OpUDot: + case OpSUDot: + case OpSDotAccSat: + case OpUDotAccSat: + case OpSUDotAccSat: + return SPVFuncImplReduceAdd; + default: break; } diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index 06744b873..ed9180972 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -823,7 +823,8 @@ protected: SPVFuncImplVariableDescriptor, SPVFuncImplVariableSizedDescriptor, SPVFuncImplVariableDescriptorArray, - SPVFuncImplPaddedStd140 + SPVFuncImplPaddedStd140, + SPVFuncImplReduceAdd }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too @@ -1044,6 +1045,7 @@ protected: bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; std::string get_argument_address_space(const SPIRVariable &argument); std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); + static bool decoration_flags_signal_volatile(const Bitset &flags); const char *to_restrict(uint32_t id, bool space); SPIRType &get_stage_in_struct_type(); SPIRType &get_stage_out_struct_type();