diff --git a/3rdparty/spirv-tools/Android.mk b/3rdparty/spirv-tools/Android.mk index d2fe6a66b..94281162a 100644 --- a/3rdparty/spirv-tools/Android.mk +++ b/3rdparty/spirv-tools/Android.mk @@ -75,6 +75,7 @@ SPVTOOLS_SRC_FILES := \ SPVTOOLS_OPT_SRC_FILES := \ source/opt/aggressive_dead_code_elim_pass.cpp \ + source/opt/amd_ext_to_khr.cpp \ source/opt/basic_block.cpp \ source/opt/block_merge_pass.cpp \ source/opt/block_merge_util.cpp \ diff --git a/3rdparty/spirv-tools/BUILD.gn b/3rdparty/spirv-tools/BUILD.gn index 6237af388..d62aaabe0 100644 --- a/3rdparty/spirv-tools/BUILD.gn +++ b/3rdparty/spirv-tools/BUILD.gn @@ -451,6 +451,8 @@ static_library("spvtools_opt") { sources = [ "source/opt/aggressive_dead_code_elim_pass.cpp", "source/opt/aggressive_dead_code_elim_pass.h", + "source/opt/amd_ext_to_khr.cpp", + "source/opt/amd_ext_to_khr.h", "source/opt/basic_block.cpp", "source/opt/basic_block.h", "source/opt/block_merge_pass.cpp", @@ -658,6 +660,7 @@ static_library("spvtools_opt") { deps = [ ":spvtools", + ":spvtools_vendor_tables_spv-amd-shader-ballot", ] public_deps = [ ":spvtools_headers", @@ -729,6 +732,8 @@ static_library("spvtools_reduce") { "source/reduce/remove_instruction_reduction_opportunity.h", "source/reduce/remove_opname_instruction_reduction_opportunity_finder.cpp", "source/reduce/remove_opname_instruction_reduction_opportunity_finder.h", + "source/reduce/remove_relaxed_precision_decoration_opportunity_finder.cpp", + "source/reduce/remove_relaxed_precision_decoration_opportunity_finder.h", "source/reduce/remove_selection_reduction_opportunity.cpp", "source/reduce/remove_selection_reduction_opportunity.h", "source/reduce/remove_selection_reduction_opportunity_finder.cpp", diff --git a/3rdparty/spirv-tools/include/generated/build-version.inc b/3rdparty/spirv-tools/include/generated/build-version.inc index 287af1d6f..a050519d2 100644 --- a/3rdparty/spirv-tools/include/generated/build-version.inc +++ b/3rdparty/spirv-tools/include/generated/build-version.inc @@ -1 +1 @@ -"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-16-g8336d192" +"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-25-g65e362b7" diff --git a/3rdparty/spirv-tools/include/generated/core.insts-unified1.inc b/3rdparty/spirv-tools/include/generated/core.insts-unified1.inc index ad5451592..9287ea511 100644 --- a/3rdparty/spirv-tools/include/generated/core.insts-unified1.inc +++ b/3rdparty/spirv-tools/include/generated/core.insts-unified1.inc @@ -425,12 +425,12 @@ static const spv_opcode_desc_t kOpcodeTableEntries[] = { {"ImageSampleFootprintNV", SpvOpImageSampleFootprintNV, 1, pygen_variable_caps_ImageFootprintNV, 7, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_OPTIONAL_IMAGE}, 1, 1, 1, pygen_variable_exts_SPV_NV_shader_image_footprint, 0xffffffffu, 0xffffffffu}, {"GroupNonUniformPartitionNV", SpvOpGroupNonUniformPartitionNV, 1, pygen_variable_caps_GroupNonUniformPartitionedNV, 3, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID}, 1, 1, 1, pygen_variable_exts_SPV_NV_shader_subgroup_partitioned, 0xffffffffu, 0xffffffffu}, {"WritePackedPrimitiveIndices4x8NV", SpvOpWritePackedPrimitiveIndices4x8NV, 1, pygen_variable_caps_MeshShadingNV, 2, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_mesh_shader, 0xffffffffu, 0xffffffffu}, - {"ReportIntersectionNV", SpvOpReportIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 4, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 1, 1, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, - {"IgnoreIntersectionNV", SpvOpIgnoreIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, - {"TerminateRayNV", SpvOpTerminateRayNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, - {"TraceNV", SpvOpTraceNV, 1, pygen_variable_caps_RayTracingNV, 11, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, - {"TypeAccelerationStructureNV", SpvOpTypeAccelerationStructureNV, 1, pygen_variable_caps_RayTracingNV, 1, {SPV_OPERAND_TYPE_RESULT_ID}, 1, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, - {"ExecuteCallableNV", SpvOpExecuteCallableNV, 1, pygen_variable_caps_RayTracingNV, 2, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, + {"ReportIntersectionNV", SpvOpReportIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 4, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 1, 1, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu}, + {"IgnoreIntersectionNV", SpvOpIgnoreIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu}, + {"TerminateRayNV", SpvOpTerminateRayNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu}, + {"TraceNV", SpvOpTraceNV, 1, pygen_variable_caps_RayTracingNV, 11, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu}, + {"TypeAccelerationStructureNV", SpvOpTypeAccelerationStructureNV, 1, pygen_variable_caps_RayTracingNV, 1, {SPV_OPERAND_TYPE_RESULT_ID}, 1, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu}, + {"ExecuteCallableNV", SpvOpExecuteCallableNV, 1, pygen_variable_caps_RayTracingNV, 2, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu}, {"TypeCooperativeMatrixNV", SpvOpTypeCooperativeMatrixNV, 1, pygen_variable_caps_CooperativeMatrixNV, 5, {SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_SCOPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 1, 0, 1, pygen_variable_exts_SPV_NV_cooperative_matrix, 0xffffffffu, 0xffffffffu}, {"CooperativeMatrixLoadNV", SpvOpCooperativeMatrixLoadNV, 1, pygen_variable_caps_CooperativeMatrixNV, 6, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS}, 1, 1, 1, pygen_variable_exts_SPV_NV_cooperative_matrix, 0xffffffffu, 0xffffffffu}, {"CooperativeMatrixStoreNV", SpvOpCooperativeMatrixStoreNV, 1, pygen_variable_caps_CooperativeMatrixNV, 5, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS}, 0, 0, 1, pygen_variable_exts_SPV_NV_cooperative_matrix, 0xffffffffu, 0xffffffffu}, diff --git a/3rdparty/spirv-tools/include/generated/operand.kinds-unified1.inc b/3rdparty/spirv-tools/include/generated/operand.kinds-unified1.inc index 4f9a9731c..21795b1fc 100644 --- a/3rdparty/spirv-tools/include/generated/operand.kinds-unified1.inc +++ b/3rdparty/spirv-tools/include/generated/operand.kinds-unified1.inc @@ -546,7 +546,7 @@ static const spv_operand_desc_t pygen_variable_DecorationEntries[] = { {"PerViewNV", 5272, 1, pygen_variable_caps_MeshShadingNV, 1, pygen_variable_exts_SPV_NV_mesh_shader, {}, 0xffffffffu, 0xffffffffu}, {"PerTaskNV", 5273, 1, pygen_variable_caps_MeshShadingNV, 1, pygen_variable_exts_SPV_NV_mesh_shader, {}, 0xffffffffu, 0xffffffffu}, {"PerVertexNV", 5285, 1, pygen_variable_caps_FragmentBarycentricNV, 1, pygen_variable_exts_SPV_NV_fragment_shader_barycentric, {}, 0xffffffffu, 0xffffffffu}, - {"NonUniformEXT", 5300, 1, pygen_variable_caps_ShaderNonUniformEXT, 0, nullptr, {}, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu}, + {"NonUniformEXT", 5300, 1, pygen_variable_caps_ShaderNonUniformEXT, 0, nullptr, {}, 0xffffffffu, 0xffffffffu}, {"RestrictPointerEXT", 5355, 1, pygen_variable_caps_PhysicalStorageBufferAddressesEXT, 1, pygen_variable_exts_SPV_EXT_physical_storage_buffer, {}, 0xffffffffu, 0xffffffffu}, {"AliasedPointerEXT", 5356, 1, pygen_variable_caps_PhysicalStorageBufferAddressesEXT, 1, pygen_variable_exts_SPV_EXT_physical_storage_buffer, {}, 0xffffffffu, 0xffffffffu}, {"CounterBuffer", 5634, 0, nullptr, 1, pygen_variable_exts_SPV_GOOGLE_hlsl_functionality1, {SPV_OPERAND_TYPE_ID}, SPV_SPIRV_VERSION_WORD(1,4), 0xffffffffu}, diff --git a/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp b/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp index cec1f1683..4e54b1a1c 100644 --- a/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp +++ b/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp @@ -823,6 +823,11 @@ Optimizer::PassToken CreateDescriptorScalarReplacementPass(); // function that has a single OpKill. This allows more code to be inlined. Optimizer::PassToken CreateWrapOpKillPass(); +// Replaces the extensions VK_AMD_shader_ballot,VK_AMD_gcn_shader, and +// VK_AMD_shader_trinary_minmax with equivalent code using core instructions and +// capabilities. +Optimizer::PassToken CreateAmdExtToKhrPass(); + } // namespace spvtools #endif // INCLUDE_SPIRV_TOOLS_OPTIMIZER_HPP_ diff --git a/3rdparty/spirv-tools/source/enum_set.h b/3rdparty/spirv-tools/source/enum_set.h index e4ef297cd..2e7046d4e 100644 --- a/3rdparty/spirv-tools/source/enum_set.h +++ b/3rdparty/spirv-tools/source/enum_set.h @@ -69,6 +69,26 @@ class EnumSet { return *this; } + friend bool operator==(const EnumSet& a, const EnumSet& b) { + if (a.mask_ != b.mask_) { + return false; + } + + if (a.overflow_ == nullptr && b.overflow_ == nullptr) { + return true; + } + + if (a.overflow_ == nullptr || b.overflow_ == nullptr) { + return false; + } + + return *a.overflow_ == *b.overflow_; + } + + friend bool operator!=(const EnumSet& a, const EnumSet& b) { + return !(a == b); + } + // Adds the given enum value to the set. This has no effect if the // enum value is already in the set. void Add(EnumType c) { AddWord(ToWord(c)); } diff --git a/3rdparty/spirv-tools/source/opt/CMakeLists.txt b/3rdparty/spirv-tools/source/opt/CMakeLists.txt index bf7fe1370..2309ca919 100644 --- a/3rdparty/spirv-tools/source/opt/CMakeLists.txt +++ b/3rdparty/spirv-tools/source/opt/CMakeLists.txt @@ -13,6 +13,7 @@ # limitations under the License. set(SPIRV_TOOLS_OPT_SOURCES aggressive_dead_code_elim_pass.h + amd_ext_to_khr.h basic_block.h block_merge_pass.h block_merge_util.h @@ -117,6 +118,7 @@ set(SPIRV_TOOLS_OPT_SOURCES wrap_opkill.h aggressive_dead_code_elim_pass.cpp + amd_ext_to_khr.cpp basic_block.cpp block_merge_pass.cpp block_merge_util.cpp diff --git a/3rdparty/spirv-tools/source/opt/aggressive_dead_code_elim_pass.cpp b/3rdparty/spirv-tools/source/opt/aggressive_dead_code_elim_pass.cpp index 04bfea1df..761ff7c33 100644 --- a/3rdparty/spirv-tools/source/opt/aggressive_dead_code_elim_pass.cpp +++ b/3rdparty/spirv-tools/source/opt/aggressive_dead_code_elim_pass.cpp @@ -664,6 +664,9 @@ Pass::Status AggressiveDCEPass::ProcessImpl() { // been marked, it is safe to remove dead global values. modified |= ProcessGlobalValues(); + // Sanity check. + assert(to_kill_.size() == 0 || modified); + // Kill all dead instructions. for (auto inst : to_kill_) { context()->KillInst(inst); @@ -846,6 +849,7 @@ bool AggressiveDCEPass::ProcessGlobalValues() { if (!IsDead(ptr_ty_inst)) continue; } to_kill_.push_back(&val); + modified = true; } } diff --git a/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp b/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp new file mode 100644 index 000000000..1cb5ba5a4 --- /dev/null +++ b/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp @@ -0,0 +1,539 @@ +// Copyright (c) 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "source/opt/amd_ext_to_khr.h" + +#include "ir_builder.h" +#include "source/opt/ir_context.h" +#include "spv-amd-shader-ballot.insts.inc" +#include "type_manager.h" + +namespace spvtools { +namespace opt { + +namespace { + +enum ExtOpcodes { + AmdShaderBallotSwizzleInvocationsAMD = 1, + AmdShaderBallotSwizzleInvocationsMaskedAMD = 2, + AmdShaderBallotWriteInvocationAMD = 3, + AmdShaderBallotMbcntAMD = 4 +}; + +analysis::Type* GetUIntType(IRContext* ctx) { + analysis::Integer int_type(32, false); + return ctx->get_type_mgr()->GetRegisteredType(&int_type); +} + +// Returns a folding rule that will replace the opcode with |opcode| and add +// the capabilities required. The folding rule assumes it is folding an +// OpGroup*NonUniformAMD instruction from the SPV_AMD_shader_ballot extension. +FoldingRule ReplaceGroupNonuniformOperationOpCode(SpvOp new_opcode) { + switch (new_opcode) { + case SpvOpGroupNonUniformIAdd: + case SpvOpGroupNonUniformFAdd: + case SpvOpGroupNonUniformUMin: + case SpvOpGroupNonUniformSMin: + case SpvOpGroupNonUniformFMin: + case SpvOpGroupNonUniformUMax: + case SpvOpGroupNonUniformSMax: + case SpvOpGroupNonUniformFMax: + break; + default: + assert( + false && + "Should be replacing with a group non uniform arithmetic operation."); + } + + return [new_opcode](IRContext* ctx, Instruction* inst, + const std::vector&) { + switch (inst->opcode()) { + case SpvOpGroupIAddNonUniformAMD: + case SpvOpGroupFAddNonUniformAMD: + case SpvOpGroupUMinNonUniformAMD: + case SpvOpGroupSMinNonUniformAMD: + case SpvOpGroupFMinNonUniformAMD: + case SpvOpGroupUMaxNonUniformAMD: + case SpvOpGroupSMaxNonUniformAMD: + case SpvOpGroupFMaxNonUniformAMD: + break; + default: + assert(false && + "Should be replacing a group non uniform arithmetic operation."); + } + + ctx->AddCapability(SpvCapabilityGroupNonUniformArithmetic); + inst->SetOpcode(new_opcode); + return true; + }; +} + +// Returns a folding rule that will replace the SwizzleInvocationsAMD extended +// instruction in the SPV_AMD_shader_ballot extension. +// +// The instruction +// +// %offset = OpConstantComposite %v3uint %x %y %z %w +// %result = OpExtInst %type %1 SwizzleInvocationsAMD %data %offset +// +// is replaced with +// +// potentially new constants and types +// +// clang-format off +// %uint_max = OpConstant %uint 0xFFFFFFFF +// %v4uint = OpTypeVector %uint 4 +// %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max +// %null = OpConstantNull %type +// clang-format on +// +// and the following code in the function body +// +// clang-format off +// %id = OpLoad %uint %SubgroupLocalInvocationId +// %quad_idx = OpBitwiseAnd %uint %id %uint_3 +// %quad_ldr = OpBitwiseXor %uint %id %quad_idx +// %my_offset = OpVectorExtractDynamic %uint %offset %quad_idx +// %target_inv = OpIAdd %uint %quad_ldr %my_offset +// %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv +// %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv +// %result = OpSelect %type %is_active %shuffle %null +// clang-format on +// +// Also adding the capabilities and builtins that are needed. +FoldingRule ReplaceSwizzleInvocations() { + return [](IRContext* ctx, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = ctx->get_type_mgr(); + analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); + + ctx->AddExtension("SPV_KHR_shader_ballot"); + ctx->AddCapability(SpvCapabilityGroupNonUniformBallot); + ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle); + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + + uint32_t data_id = inst->GetSingleWordInOperand(2); + uint32_t offset_id = inst->GetSingleWordInOperand(3); + + // Get the subgroup invocation id. + uint32_t var_id = + ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); + assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); + Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); + Instruction* var_ptr_type = + ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); + uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1); + + Instruction* id = ir_builder.AddLoad(uint_type_id, var_id); + + uint32_t quad_mask = ir_builder.GetUintConstantId(3); + + // This gives the offset in the group of 4 of this invocation. + Instruction* quad_idx = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseAnd, id->result_id(), quad_mask); + + // Get the invocation id of the first invocation in the group of 4. + Instruction* quad_ldr = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseXor, id->result_id(), quad_idx->result_id()); + + // Get the offset of the target invocation from the offset vector. + Instruction* my_offset = + ir_builder.AddBinaryOp(uint_type_id, SpvOpVectorExtractDynamic, + offset_id, quad_idx->result_id()); + + // Determine the index of the invocation to read from. + Instruction* target_inv = ir_builder.AddBinaryOp( + uint_type_id, SpvOpIAdd, quad_ldr->result_id(), my_offset->result_id()); + + // Do the group operations + uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF); + uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup); + const auto* ballot_value_const = const_mgr->GetConstant( + type_mgr->GetUIntVectorType(4), + {uint_max_id, uint_max_id, uint_max_id, uint_max_id}); + Instruction* ballot_value = + const_mgr->GetDefiningInstruction(ballot_value_const); + Instruction* is_active = ir_builder.AddNaryOp( + type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract, + {subgroup_scope, ballot_value->result_id(), target_inv->result_id()}); + Instruction* shuffle = ir_builder.AddNaryOp( + inst->type_id(), SpvOpGroupNonUniformShuffle, + {subgroup_scope, data_id, target_inv->result_id()}); + + // Create the null constant to use in the select. + const auto* null = const_mgr->GetConstant( + type_mgr->GetType(inst->type_id()), std::vector()); + Instruction* null_inst = const_mgr->GetDefiningInstruction(null); + + // Build the select. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}}); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; + }; +} + +// Returns a folding rule that will replace the SwizzleInvocationsMaskedAMD +// extended instruction in the SPV_AMD_shader_ballot extension. +// +// The instruction +// +// %mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z +// %result = OpExtInst %uint %1 SwizzleInvocationsMaskedAMD %data %mask +// +// is replaced with +// +// potentially new constants and types +// +// clang-format off +// %uint_mask_extend = OpConstant %uint 0xFFFFFFE0 +// %uint_max = OpConstant %uint 0xFFFFFFFF +// %v4uint = OpTypeVector %uint 4 +// %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max +// clang-format on +// +// and the following code in the function body +// +// clang-format off +// %id = OpLoad %uint %SubgroupLocalInvocationId +// %and_mask = OpBitwiseOr %uint %uint_x %uint_mask_extend +// %and = OpBitwiseAnd %uint %id %and_mask +// %or = OpBitwiseOr %uint %and %uint_y +// %target_inv = OpBitwiseXor %uint %or %uint_z +// %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv +// %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv +// %result = OpSelect %type %is_active %shuffle %uint_0 +// clang-format on +// +// Also adding the capabilities and builtins that are needed. +FoldingRule ReplaceSwizzleInvocationsMasked() { + return [](IRContext* ctx, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = ctx->get_type_mgr(); + analysis::DefUseManager* def_use_mgr = ctx->get_def_use_mgr(); + analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); + + // ctx->AddCapability(SpvCapabilitySubgroupBallotKHR); + ctx->AddCapability(SpvCapabilityGroupNonUniformBallot); + ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle); + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + + // Get the operands to inst, and the components of the mask + uint32_t data_id = inst->GetSingleWordInOperand(2); + + Instruction* mask_inst = + def_use_mgr->GetDef(inst->GetSingleWordInOperand(3)); + assert(mask_inst->opcode() == SpvOpConstantComposite && + "The mask is suppose to be a vector constant."); + assert(mask_inst->NumInOperands() == 3 && + "The mask is suppose to have 3 components."); + + uint32_t uint_x = mask_inst->GetSingleWordInOperand(0); + uint32_t uint_y = mask_inst->GetSingleWordInOperand(1); + uint32_t uint_z = mask_inst->GetSingleWordInOperand(2); + + // Get the subgroup invocation id. + uint32_t var_id = + ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); + ctx->AddExtension("SPV_KHR_shader_ballot"); + assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); + Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); + Instruction* var_ptr_type = + ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); + uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1); + + Instruction* id = ir_builder.AddLoad(uint_type_id, var_id); + + // Do the bitwise operations. + uint32_t mask_extended = ir_builder.GetUintConstantId(0xFFFFFFE0); + Instruction* and_mask = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseOr, + uint_x, mask_extended); + Instruction* and_result = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseAnd, id->result_id(), and_mask->result_id()); + Instruction* or_result = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseOr, and_result->result_id(), uint_y); + Instruction* target_inv = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseXor, or_result->result_id(), uint_z); + + // Do the group operations + uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF); + uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup); + const auto* ballot_value_const = const_mgr->GetConstant( + type_mgr->GetUIntVectorType(4), + {uint_max_id, uint_max_id, uint_max_id, uint_max_id}); + Instruction* ballot_value = + const_mgr->GetDefiningInstruction(ballot_value_const); + Instruction* is_active = ir_builder.AddNaryOp( + type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract, + {subgroup_scope, ballot_value->result_id(), target_inv->result_id()}); + Instruction* shuffle = ir_builder.AddNaryOp( + inst->type_id(), SpvOpGroupNonUniformShuffle, + {subgroup_scope, data_id, target_inv->result_id()}); + + // Create the null constant to use in the select. + const auto* null = const_mgr->GetConstant( + type_mgr->GetType(inst->type_id()), std::vector()); + Instruction* null_inst = const_mgr->GetDefiningInstruction(null); + + // Build the select. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}}); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; + }; +} + +// Returns a folding rule that will replace the WriteInvocationAMD extended +// instruction in the SPV_AMD_shader_ballot extension. +// +// The instruction +// +// clang-format off +// %result = OpExtInst %type %1 WriteInvocationAMD %input_value %write_value %invocation_index +// clang-format on +// +// with +// +// %id = OpLoad %uint %SubgroupLocalInvocationId +// %cmp = OpIEqual %bool %id %invocation_index +// %result = OpSelect %type %cmp %write_value %input_value +// +// Also adding the capabilities and builtins that are needed. +FoldingRule ReplaceWriteInvocation() { + return [](IRContext* ctx, Instruction* inst, + const std::vector&) { + uint32_t var_id = + ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); + ctx->AddCapability(SpvCapabilitySubgroupBallotKHR); + ctx->AddExtension("SPV_KHR_shader_ballot"); + assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); + Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); + Instruction* var_ptr_type = + ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + Instruction* t = + ir_builder.AddLoad(var_ptr_type->GetSingleWordInOperand(1), var_id); + analysis::Bool bool_type; + uint32_t bool_type_id = ctx->get_type_mgr()->GetTypeInstruction(&bool_type); + Instruction* cmp = + ir_builder.AddBinaryOp(bool_type_id, SpvOpIEqual, t->result_id(), + inst->GetSingleWordInOperand(4)); + + // Build a select. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {cmp->result_id()}}); + new_operands.push_back(inst->GetInOperand(3)); + new_operands.push_back(inst->GetInOperand(2)); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; + }; +} + +// Returns a folding rule that will replace the MbcntAMD extended instruction in +// the SPV_AMD_shader_ballot extension. +// +// The instruction +// +// %result = OpExtInst %uint %1 MbcntAMD %mask +// +// with +// +// Get SubgroupLtMask and convert the first 64-bits into a uint64_t because +// AMD's shader compiler expects a 64-bit integer mask. +// +// %var = OpLoad %v4uint %SubgroupLtMaskKHR +// %shuffle = OpVectorShuffle %v2uint %var %var 0 1 +// %cast = OpBitcast %ulong %shuffle +// +// Perform the mask and count the bits. +// +// %and = OpBitwiseAnd %ulong %cast %mask +// %result = OpBitCount %uint %and +// +// Also adding the capabilities and builtins that are needed. +FoldingRule ReplaceMbcnt() { + return [](IRContext* context, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = context->get_type_mgr(); + analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr(); + + uint32_t var_id = context->GetBuiltinInputVarId(SpvBuiltInSubgroupLtMask); + assert(var_id != 0 && "Could not get SubgroupLtMask variable."); + context->AddCapability(SpvCapabilityGroupNonUniformBallot); + Instruction* var_inst = def_use_mgr->GetDef(var_id); + Instruction* var_ptr_type = def_use_mgr->GetDef(var_inst->type_id()); + Instruction* var_type = + def_use_mgr->GetDef(var_ptr_type->GetSingleWordInOperand(1)); + assert(var_type->opcode() == SpvOpTypeVector && + "Variable is suppose to be a vector of 4 ints"); + + // Get the type for the shuffle. + analysis::Vector temp_type(GetUIntType(context), 2); + const analysis::Type* shuffle_type = + context->get_type_mgr()->GetRegisteredType(&temp_type); + uint32_t shuffle_type_id = type_mgr->GetTypeInstruction(shuffle_type); + + uint32_t mask_id = inst->GetSingleWordInOperand(2); + Instruction* mask_inst = def_use_mgr->GetDef(mask_id); + + // Testing with amd's shader compiler shows that a 64-bit mask is expected. + assert(type_mgr->GetType(mask_inst->type_id())->AsInteger() != nullptr); + assert(type_mgr->GetType(mask_inst->type_id())->AsInteger()->width() == 64); + + InstructionBuilder ir_builder( + context, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + Instruction* load = ir_builder.AddLoad(var_type->result_id(), var_id); + Instruction* shuffle = ir_builder.AddVectorShuffle( + shuffle_type_id, load->result_id(), load->result_id(), {0, 1}); + Instruction* bitcast = ir_builder.AddUnaryOp( + mask_inst->type_id(), SpvOpBitcast, shuffle->result_id()); + Instruction* t = ir_builder.AddBinaryOp( + mask_inst->type_id(), SpvOpBitwiseAnd, bitcast->result_id(), mask_id); + + inst->SetOpcode(SpvOpBitCount); + inst->SetInOperands({{SPV_OPERAND_TYPE_ID, {t->result_id()}}}); + context->UpdateDefUse(inst); + return true; + }; +} + +class AmdExtFoldingRules : public FoldingRules { + public: + explicit AmdExtFoldingRules(IRContext* ctx) : FoldingRules(ctx) {} + + protected: + virtual void AddFoldingRules() override { + rules_[SpvOpGroupIAddNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformIAdd)); + rules_[SpvOpGroupFAddNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFAdd)); + rules_[SpvOpGroupUMinNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformUMin)); + rules_[SpvOpGroupSMinNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformSMin)); + rules_[SpvOpGroupFMinNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFMin)); + rules_[SpvOpGroupUMaxNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformUMax)); + rules_[SpvOpGroupSMaxNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformSMax)); + rules_[SpvOpGroupFMaxNonUniformAMD].push_back( + ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFMax)); + + uint32_t extension_id = + context()->module()->GetExtInstImportId("SPV_AMD_shader_ballot"); + + ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsAMD}].push_back( + ReplaceSwizzleInvocations()); + ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsMaskedAMD}] + .push_back(ReplaceSwizzleInvocationsMasked()); + ext_rules_[{extension_id, AmdShaderBallotWriteInvocationAMD}].push_back( + ReplaceWriteInvocation()); + ext_rules_[{extension_id, AmdShaderBallotMbcntAMD}].push_back( + ReplaceMbcnt()); + } +}; + +class AmdExtConstFoldingRules : public ConstantFoldingRules { + public: + AmdExtConstFoldingRules(IRContext* ctx) : ConstantFoldingRules(ctx) {} + + protected: + virtual void AddFoldingRules() override {} +}; + +} // namespace + +Pass::Status AmdExtensionToKhrPass::Process() { + bool changed = false; + + // Traverse the body of the functions to replace instructions that require + // the extensions. + InstructionFolder folder( + context(), + std::unique_ptr(new AmdExtFoldingRules(context())), + MakeUnique(context())); + for (Function& func : *get_module()) { + func.ForEachInst([&changed, &folder](Instruction* inst) { + if (folder.FoldInstruction(inst)) { + changed = true; + } + }); + } + + // Now that instruction that require the extensions have been removed, we can + // remove the extension instructions. + std::vector to_be_killed; + for (Instruction& inst : context()->module()->extensions()) { + if (inst.opcode() == SpvOpExtension) { + if (!strcmp("SPV_AMD_shader_ballot", + reinterpret_cast( + &(inst.GetInOperand(0).words[0])))) { + to_be_killed.push_back(&inst); + } + } + } + + for (Instruction& inst : context()->ext_inst_imports()) { + if (inst.opcode() == SpvOpExtInstImport) { + if (!strcmp("SPV_AMD_shader_ballot", + reinterpret_cast( + &(inst.GetInOperand(0).words[0])))) { + to_be_killed.push_back(&inst); + } + } + } + + for (Instruction* inst : to_be_killed) { + context()->KillInst(inst); + changed = true; + } + + // The replacements that take place use instructions that are missing before + // SPIR-V 1.3. If we changed something, we will have to make sure the version + // is at least SPIR-V 1.3 to make sure those instruction can be used. + if (changed) { + uint32_t version = get_module()->version(); + if (version < 0x00010300 /*1.3*/) { + get_module()->set_version(0x00010300); + } + } + return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +} // namespace opt +} // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.h b/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.h new file mode 100644 index 000000000..fd3dab4e7 --- /dev/null +++ b/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.h @@ -0,0 +1,51 @@ +// Copyright (c) 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SOURCE_OPT_AMD_EXT_TO_KHR_H_ +#define SOURCE_OPT_AMD_EXT_TO_KHR_H_ + +#include "source/opt/ir_context.h" +#include "source/opt/module.h" +#include "source/opt/pass.h" + +namespace spvtools { +namespace opt { + +// Replaces the extensions VK_AMD_shader_ballot, VK_AMD_gcn_shader, and +// VK_AMD_shader_trinary_minmax with equivalant code using core instructions and +// capabilities. +class AmdExtensionToKhrPass : public Pass { + public: + const char* name() const override { return "amd-ext-to-khr"; } + Status Process() override; + + IRContext::Analysis GetPreservedAnalyses() override { + return IRContext::kAnalysisInstrToBlockMapping | + IRContext::kAnalysisDecorations | IRContext::kAnalysisCombinators | + IRContext::kAnalysisCFG | IRContext::kAnalysisDominatorAnalysis | + IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisNameMap | + IRContext::kAnalysisScalarEvolution | + IRContext::kAnalysisRegisterPressure | + IRContext::kAnalysisValueNumberTable | + IRContext::kAnalysisStructuredCFG | + IRContext::kAnalysisBuiltinVarId | + IRContext::kAnalysisIdToFuncMapping | IRContext::kAnalysisTypes | + IRContext::kAnalysisDefUse | IRContext::kAnalysisConstants; + } +}; + +} // namespace opt +} // namespace spvtools + +#endif // SOURCE_OPT_AMD_EXT_TO_KHR_H_ diff --git a/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp b/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp index 23a799821..06a1a81e6 100644 --- a/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp +++ b/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp @@ -809,9 +809,62 @@ ConstantFoldingRule FoldFClampFeedingCompare(uint32_t cmp_opcode) { }; } +ConstantFoldingRule FoldFMix() { + return [](IRContext* context, Instruction* inst, + const std::vector& constants) + -> const analysis::Constant* { + analysis::ConstantManager* const_mgr = context->get_constant_mgr(); + assert(inst->opcode() == SpvOpExtInst && + "Expecting an extended instruction."); + assert(inst->GetSingleWordInOperand(0) == + context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + "Expecting a GLSLstd450 extended instruction."); + assert(inst->GetSingleWordInOperand(1) == GLSLstd450FMix && + "Expecting and FMix instruction."); + + if (!inst->IsFloatingPointFoldingAllowed()) { + return nullptr; + } + + // Make sure all FMix operands are constants. + for (uint32_t i = 1; i < 4; i++) { + if (constants[i] == nullptr) { + return nullptr; + } + } + + const analysis::Constant* one; + if (constants[1]->type()->AsFloat()->width() == 32) { + one = const_mgr->GetConstant(constants[1]->type(), + utils::FloatProxy(1.0f).GetWords()); + } else { + one = const_mgr->GetConstant(constants[1]->type(), + utils::FloatProxy(1.0).GetWords()); + } + + const analysis::Constant* temp1 = + FOLD_FPARITH_OP(-)(constants[1]->type(), one, constants[3], const_mgr); + if (temp1 == nullptr) { + return nullptr; + } + + const analysis::Constant* temp2 = FOLD_FPARITH_OP(*)( + constants[1]->type(), constants[1], temp1, const_mgr); + if (temp2 == nullptr) { + return nullptr; + } + const analysis::Constant* temp3 = FOLD_FPARITH_OP(*)( + constants[2]->type(), constants[2], constants[3], const_mgr); + if (temp3 == nullptr) { + return nullptr; + } + return FOLD_FPARITH_OP(+)(temp2->type(), temp2, temp3, const_mgr); + }; +} + } // namespace -ConstantFoldingRules::ConstantFoldingRules() { +void ConstantFoldingRules::AddFoldingRules() { // Add all folding rules to the list for the opcodes to which they apply. // Note that the order in which rules are added to the list matters. If a rule // applies to the instruction, the rest of the rules will not be attempted. @@ -877,6 +930,14 @@ ConstantFoldingRules::ConstantFoldingRules() { rules_[SpvOpFNegate].push_back(FoldFNegate()); rules_[SpvOpQuantizeToF16].push_back(FoldQuantizeToF16()); + + // Add rules for GLSLstd450 + FeatureManager* feature_manager = context_->get_feature_mgr(); + uint32_t ext_inst_glslstd450_id = + feature_manager->GetExtInstImportId_GLSLstd450(); + if (ext_inst_glslstd450_id != 0) { + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMix}].push_back(FoldFMix()); + } } } // namespace opt } // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/const_folding_rules.h b/3rdparty/spirv-tools/source/opt/const_folding_rules.h index c1865792b..41ee2aa22 100644 --- a/3rdparty/spirv-tools/source/opt/const_folding_rules.h +++ b/3rdparty/spirv-tools/source/opt/const_folding_rules.h @@ -53,24 +53,74 @@ using ConstantFoldingRule = std::function& constants)>; class ConstantFoldingRules { + protected: + // The |Key| and |Value| structs are used to by-pass a "decorated name length + // exceeded, name was truncated" warning on VS2013 and VS2015. + struct Key { + uint32_t instruction_set; + uint32_t opcode; + }; + + friend bool operator<(const Key& a, const Key& b) { + if (a.instruction_set < b.instruction_set) { + return true; + } + if (a.instruction_set > b.instruction_set) { + return false; + } + return a.opcode < b.opcode; + } + + struct Value { + std::vector value; + void push_back(ConstantFoldingRule rule) { value.push_back(rule); } + }; + public: - ConstantFoldingRules(); + ConstantFoldingRules(IRContext* ctx) : context_(ctx) {} + virtual ~ConstantFoldingRules() = default; // Returns true if there is at least 1 folding rule for |opcode|. - bool HasFoldingRule(SpvOp opcode) const { return rules_.count(opcode); } + bool HasFoldingRule(const Instruction* inst) const { + return !GetRulesForInstruction(inst).empty(); + } - // Returns an vector of constant folding rules for |opcode|. - const std::vector& GetRulesForOpcode( - SpvOp opcode) const { - auto it = rules_.find(opcode); - if (it != rules_.end()) { - return it->second; + // Returns true if there is at least 1 folding rule for |inst|. + const std::vector& GetRulesForInstruction( + const Instruction* inst) const { + if (inst->opcode() != SpvOpExtInst) { + auto it = rules_.find(inst->opcode()); + if (it != rules_.end()) { + return it->second.value; + } + } else { + uint32_t ext_inst_id = inst->GetSingleWordInOperand(0); + uint32_t ext_opcode = inst->GetSingleWordInOperand(1); + auto it = ext_rules_.find({ext_inst_id, ext_opcode}); + if (it != ext_rules_.end()) { + return it->second.value; + } } return empty_vector_; } + // Add the folding rules. + virtual void AddFoldingRules(); + + protected: + // |rules[opcode]| is the set of rules that can be applied to instructions + // with |opcode| as the opcode. + std::unordered_map rules_; + + // The folding rules for extended instructions. + std::map ext_rules_; + private: - std::unordered_map> rules_; + // The context that the instruction to be folded will be a part of. + IRContext* context_; + + // The empty set of rules to be used as the default return value in + // |GetRulesForInstruction|. std::vector empty_vector_; }; diff --git a/3rdparty/spirv-tools/source/opt/feature_manager.cpp b/3rdparty/spirv-tools/source/opt/feature_manager.cpp index b7fc16a50..63d50b6d7 100644 --- a/3rdparty/spirv-tools/source/opt/feature_manager.cpp +++ b/3rdparty/spirv-tools/source/opt/feature_manager.cpp @@ -31,12 +31,19 @@ void FeatureManager::Analyze(Module* module) { void FeatureManager::AddExtensions(Module* module) { for (auto ext : module->extensions()) { - const std::string name = - reinterpret_cast(ext.GetInOperand(0u).words.data()); - Extension extension; - if (GetExtensionFromString(name.c_str(), &extension)) { - extensions_.Add(extension); - } + AddExtension(&ext); + } +} + +void FeatureManager::AddExtension(Instruction* ext) { + assert(ext->opcode() == SpvOpExtension && + "Expecting an extension instruction."); + + const std::string name = + reinterpret_cast(ext->GetInOperand(0u).words.data()); + Extension extension; + if (GetExtensionFromString(name.c_str(), &extension)) { + extensions_.Add(extension); } } @@ -63,5 +70,27 @@ void FeatureManager::AddExtInstImportIds(Module* module) { extinst_importid_GLSLstd450_ = module->GetExtInstImportId("GLSL.std.450"); } +bool operator==(const FeatureManager& a, const FeatureManager& b) { + // We check that the addresses of the grammars are the same because they + // are large objects, and this is faster. It can be changed if needed as a + // later time. + if (&a.grammar_ != &b.grammar_) { + return false; + } + + if (a.capabilities_ != b.capabilities_) { + return false; + } + + if (a.extensions_ != b.extensions_) { + return false; + } + + if (a.extinst_importid_GLSLstd450_ != b.extinst_importid_GLSLstd450_) { + return false; + } + + return true; +} } // namespace opt } // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/feature_manager.h b/3rdparty/spirv-tools/source/opt/feature_manager.h index 80b2cccf6..761a20888 100644 --- a/3rdparty/spirv-tools/source/opt/feature_manager.h +++ b/3rdparty/spirv-tools/source/opt/feature_manager.h @@ -45,14 +45,22 @@ class FeatureManager { return extinst_importid_GLSLstd450_; } - private: - // Analyzes |module| and records enabled extensions. - void AddExtensions(Module* module); + friend bool operator==(const FeatureManager& a, const FeatureManager& b); + friend bool operator!=(const FeatureManager& a, const FeatureManager& b) { + return !(a == b); + } // Adds the given |capability| and all implied capabilities into the current // FeatureManager. void AddCapability(SpvCapability capability); + // Add the extension |ext| to the feature manager. + void AddExtension(Instruction* ext); + + private: + // Analyzes |module| and records enabled extensions. + void AddExtensions(Module* module); + // Analyzes |module| and records enabled capabilities. void AddCapabilities(Module* module); diff --git a/3rdparty/spirv-tools/source/opt/fold.cpp b/3rdparty/spirv-tools/source/opt/fold.cpp index 944f43870..276e8358a 100644 --- a/3rdparty/spirv-tools/source/opt/fold.cpp +++ b/3rdparty/spirv-tools/source/opt/fold.cpp @@ -234,13 +234,12 @@ bool InstructionFolder::FoldInstructionInternal(Instruction* inst) const { return true; } - SpvOp opcode = inst->opcode(); analysis::ConstantManager* const_manager = context_->get_constant_mgr(); - std::vector constants = const_manager->GetOperandConstants(inst); - for (const FoldingRule& rule : GetFoldingRules().GetRulesForOpcode(opcode)) { + for (const FoldingRule& rule : + GetFoldingRules().GetRulesForInstruction(inst)) { if (rule(context_, inst, constants)) { return true; } @@ -623,7 +622,7 @@ Instruction* InstructionFolder::FoldInstructionToConstant( analysis::ConstantManager* const_mgr = context_->get_constant_mgr(); if (!inst->IsFoldableByFoldScalar() && - !GetConstantFoldingRules().HasFoldingRule(inst->opcode())) { + !GetConstantFoldingRules().HasFoldingRule(inst)) { return nullptr; } // Collect the values of the constant parameters. @@ -641,19 +640,16 @@ Instruction* InstructionFolder::FoldInstructionToConstant( } }); - if (GetConstantFoldingRules().HasFoldingRule(inst->opcode())) { - const analysis::Constant* folded_const = nullptr; - for (auto rule : - GetConstantFoldingRules().GetRulesForOpcode(inst->opcode())) { - folded_const = rule(context_, inst, constants); - if (folded_const != nullptr) { - Instruction* const_inst = - const_mgr->GetDefiningInstruction(folded_const, inst->type_id()); - assert(const_inst->type_id() == inst->type_id()); - // May be a new instruction that needs to be analysed. - context_->UpdateDefUse(const_inst); - return const_inst; - } + const analysis::Constant* folded_const = nullptr; + for (auto rule : GetConstantFoldingRules().GetRulesForInstruction(inst)) { + folded_const = rule(context_, inst, constants); + if (folded_const != nullptr) { + Instruction* const_inst = + const_mgr->GetDefiningInstruction(folded_const, inst->type_id()); + assert(const_inst->type_id() == inst->type_id()); + // May be a new instruction that needs to be analysed. + context_->UpdateDefUse(const_inst); + return const_inst; } } diff --git a/3rdparty/spirv-tools/source/opt/fold.h b/3rdparty/spirv-tools/source/opt/fold.h index 0dc7c0ebb..9e7c4705e 100644 --- a/3rdparty/spirv-tools/source/opt/fold.h +++ b/3rdparty/spirv-tools/source/opt/fold.h @@ -28,7 +28,23 @@ namespace opt { class InstructionFolder { public: - explicit InstructionFolder(IRContext* context) : context_(context) {} + explicit InstructionFolder(IRContext* context) + : context_(context), + const_folding_rules_(new ConstantFoldingRules(context)), + folding_rules_(new FoldingRules(context)) { + folding_rules_->AddFoldingRules(); + const_folding_rules_->AddFoldingRules(); + } + + explicit InstructionFolder( + IRContext* context, std::unique_ptr&& folding_rules, + std::unique_ptr&& constant_folding_rules) + : context_(context), + const_folding_rules_(std::move(constant_folding_rules)), + folding_rules_(std::move(folding_rules)) { + folding_rules_->AddFoldingRules(); + const_folding_rules_->AddFoldingRules(); + } // Returns the result of folding a scalar instruction with the given |opcode| // and |operands|. Each entry in |operands| is a pointer to an @@ -95,18 +111,18 @@ class InstructionFolder { bool FoldInstruction(Instruction* inst) const; // Return true if this opcode has a const folding rule associtated with it. - bool HasConstFoldingRule(SpvOp opcode) const { - return GetConstantFoldingRules().HasFoldingRule(opcode); + bool HasConstFoldingRule(const Instruction* inst) const { + return GetConstantFoldingRules().HasFoldingRule(inst); } private: // Returns a reference to the ConstnatFoldingRules instance. const ConstantFoldingRules& GetConstantFoldingRules() const { - return const_folding_rules; + return *const_folding_rules_; } // Returns a reference to the FoldingRules instance. - const FoldingRules& GetFoldingRules() const { return folding_rules; } + const FoldingRules& GetFoldingRules() const { return *folding_rules_; } // Returns the single-word result from performing the given unary operation on // the operand value which is passed in as a 32-bit word. @@ -159,10 +175,10 @@ class InstructionFolder { IRContext* context_; // Folding rules used by |FoldInstructionToConstant| and |FoldInstruction|. - ConstantFoldingRules const_folding_rules; + std::unique_ptr const_folding_rules_; // Folding rules used by |FoldInstruction|. - FoldingRules folding_rules; + std::unique_ptr folding_rules_; }; } // namespace opt diff --git a/3rdparty/spirv-tools/source/opt/folding_rules.cpp b/3rdparty/spirv-tools/source/opt/folding_rules.cpp index 18d51498f..a125dda62 100644 --- a/3rdparty/spirv-tools/source/opt/folding_rules.cpp +++ b/3rdparty/spirv-tools/source/opt/folding_rules.cpp @@ -2200,7 +2200,7 @@ FoldingRule RemoveRedundantOperands() { } // namespace -FoldingRules::FoldingRules() { +void FoldingRules::AddFoldingRules() { // Add all folding rules to the list for the opcodes to which they apply. // Note that the order in which rules are added to the list matters. If a rule // applies to the instruction, the rest of the rules will not be attempted. @@ -2216,8 +2216,6 @@ FoldingRules::FoldingRules() { rules_[SpvOpEntryPoint].push_back(RemoveRedundantOperands()); - rules_[SpvOpExtInst].push_back(RedundantFMix()); - rules_[SpvOpFAdd].push_back(RedundantFAdd()); rules_[SpvOpFAdd].push_back(MergeAddNegateArithmetic()); rules_[SpvOpFAdd].push_back(MergeAddAddArithmetic()); @@ -2271,6 +2269,15 @@ FoldingRules::FoldingRules() { rules_[SpvOpUDiv].push_back(MergeDivNegateArithmetic()); rules_[SpvOpVectorShuffle].push_back(VectorShuffleFeedingShuffle()); + + FeatureManager* feature_manager = context_->get_feature_mgr(); + // Add rules for GLSLstd450 + uint32_t ext_inst_glslstd450_id = + feature_manager->GetExtInstImportId_GLSLstd450(); + if (ext_inst_glslstd450_id != 0) { + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMix}].push_back( + RedundantFMix()); + } } } // namespace opt } // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/folding_rules.h b/3rdparty/spirv-tools/source/opt/folding_rules.h index 33fdbffe9..f1a86395c 100644 --- a/3rdparty/spirv-tools/source/opt/folding_rules.h +++ b/3rdparty/spirv-tools/source/opt/folding_rules.h @@ -58,19 +58,58 @@ using FoldingRule = std::function; - const std::vector& GetRulesForOpcode(SpvOp opcode) const { - auto it = rules_.find(opcode); - if (it != rules_.end()) { - return it->second; + explicit FoldingRules(IRContext* ctx) : context_(ctx) {} + virtual ~FoldingRules() = default; + + const FoldingRuleSet& GetRulesForInstruction(Instruction* inst) const { + if (inst->opcode() != SpvOpExtInst) { + auto it = rules_.find(inst->opcode()); + if (it != rules_.end()) { + return it->second; + } + } else { + uint32_t ext_inst_id = inst->GetSingleWordInOperand(0); + uint32_t ext_opcode = inst->GetSingleWordInOperand(1); + auto it = ext_rules_.find({ext_inst_id, ext_opcode}); + if (it != ext_rules_.end()) { + return it->second; + } } return empty_vector_; } + IRContext* context() { return context_; } + + // Adds the folding rules for the object. + virtual void AddFoldingRules(); + + protected: + // The folding rules for core instructions. + std::unordered_map rules_; + + // The folding rules for extended instructions. + struct Key { + uint32_t instruction_set; + uint32_t opcode; + }; + + friend bool operator<(const Key& a, const Key& b) { + if (a.instruction_set < b.instruction_set) { + return true; + } + if (a.instruction_set > b.instruction_set) { + return false; + } + return a.opcode < b.opcode; + } + + std::map ext_rules_; + private: - std::unordered_map> rules_; - std::vector empty_vector_; + IRContext* context_; + FoldingRuleSet empty_vector_; }; } // namespace opt diff --git a/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp b/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp index 662937924..03221ef48 100644 --- a/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp +++ b/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp @@ -341,7 +341,7 @@ uint32_t InstBuffAddrCheckPass::GenSearchAndTest(Instruction* ref_inst, std::initializer_list{ {SPV_OPERAND_TYPE_CAPABILITY, {SpvCapabilityInt64}}})); get_def_use_mgr()->AnalyzeInstDefUse(&*cap_int64_inst); - get_module()->AddCapability(std::move(cap_int64_inst)); + context()->AddCapability(std::move(cap_int64_inst)); } // Convert reference pointer to uint64 uint32_t ref_ptr_id = ref_inst->GetSingleWordInOperand(0); diff --git a/3rdparty/spirv-tools/source/opt/instruction.cpp b/3rdparty/spirv-tools/source/opt/instruction.cpp index ec736406e..49f91426d 100644 --- a/3rdparty/spirv-tools/source/opt/instruction.cpp +++ b/3rdparty/spirv-tools/source/opt/instruction.cpp @@ -469,7 +469,7 @@ bool Instruction::IsOpaqueType() const { bool Instruction::IsFoldable() const { return IsFoldableByFoldScalar() || - context()->get_instruction_folder().HasConstFoldingRule(opcode()); + context()->get_instruction_folder().HasConstFoldingRule(this); } bool Instruction::IsFoldableByFoldScalar() const { diff --git a/3rdparty/spirv-tools/source/opt/instrument_pass.cpp b/3rdparty/spirv-tools/source/opt/instrument_pass.cpp index 418759b67..246cdbb4e 100644 --- a/3rdparty/spirv-tools/source/opt/instrument_pass.cpp +++ b/3rdparty/spirv-tools/source/opt/instrument_pass.cpp @@ -451,15 +451,7 @@ analysis::Type* InstrumentPass::GetUintRuntimeArrayType(uint32_t width) { void InstrumentPass::AddStorageBufferExt() { if (storage_buffer_ext_defined_) return; if (!get_feature_mgr()->HasExtension(kSPV_KHR_storage_buffer_storage_class)) { - const std::string ext_name("SPV_KHR_storage_buffer_storage_class"); - const auto num_chars = ext_name.size(); - // Compute num words, accommodate the terminating null character. - const auto num_words = (num_chars + 1 + 3) / 4; - std::vector ext_words(num_words, 0u); - std::memcpy(ext_words.data(), ext_name.data(), num_chars); - context()->AddExtension(std::unique_ptr( - new Instruction(context(), SpvOpExtension, 0u, 0u, - {{SPV_OPERAND_TYPE_LITERAL_STRING, ext_words}}))); + context()->AddExtension("SPV_KHR_storage_buffer_storage_class"); } storage_buffer_ext_defined_ = true; } diff --git a/3rdparty/spirv-tools/source/opt/ir_builder.h b/3rdparty/spirv-tools/source/opt/ir_builder.h index f12dc95d1..a0ca40cee 100644 --- a/3rdparty/spirv-tools/source/opt/ir_builder.h +++ b/3rdparty/spirv-tools/source/opt/ir_builder.h @@ -482,6 +482,26 @@ class InstructionBuilder { return AddInstruction(std::move(new_inst)); } + Instruction* AddVectorShuffle(uint32_t result_type, uint32_t vec1, + uint32_t vec2, + const std::vector& components) { + std::vector operands; + operands.push_back({SPV_OPERAND_TYPE_ID, {vec1}}); + operands.push_back({SPV_OPERAND_TYPE_ID, {vec2}}); + for (uint32_t id : components) { + operands.push_back({SPV_OPERAND_TYPE_LITERAL_INTEGER, {id}}); + } + + uint32_t result_id = GetContext()->TakeNextId(); + if (result_id == 0) { + return nullptr; + } + + std::unique_ptr new_inst(new Instruction( + GetContext(), SpvOpVectorShuffle, result_type, result_id, operands)); + return AddInstruction(std::move(new_inst)); + } + // Inserts the new instruction before the insertion point. Instruction* AddInstruction(std::unique_ptr&& insn) { Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn)); diff --git a/3rdparty/spirv-tools/source/opt/ir_context.cpp b/3rdparty/spirv-tools/source/opt/ir_context.cpp index 3e32980f2..1c747b7a3 100644 --- a/3rdparty/spirv-tools/source/opt/ir_context.cpp +++ b/3rdparty/spirv-tools/source/opt/ir_context.cpp @@ -156,14 +156,20 @@ Instruction* IRContext::KillInst(Instruction* inst) { decoration_mgr_->RemoveDecoration(inst); } } - if (type_mgr_ && IsTypeInst(inst->opcode())) { type_mgr_->RemoveId(inst->result_id()); } - if (constant_mgr_ && IsConstantInst(inst->opcode())) { constant_mgr_->RemoveId(inst->result_id()); } + if (inst->opcode() == SpvOpCapability || inst->opcode() == SpvOpExtension) { + // We reset the feature manager, instead of updating it, because it is just + // as much work. We would have to remove all capabilities implied by this + // capability that are not also implied by the remaining OpCapability + // instructions. We could update extensions, but we will see if it is + // needed. + ResetFeatureManager(); + } RemoveFromIdToName(inst); @@ -252,7 +258,6 @@ bool IRContext::IsConsistent() { #ifndef SPIRV_CHECK_CONTEXT return true; #endif - if (AreAnalysesValid(kAnalysisDefUse)) { analysis::DefUseManager new_def_use(module()); if (*get_def_use_mgr() != new_def_use) { @@ -286,6 +291,15 @@ bool IRContext::IsConsistent() { return false; } } + + if (feature_mgr_ != nullptr) { + FeatureManager current(grammar_); + current.Analyze(module()); + + if (current != *feature_mgr_) { + return false; + } + } return true; } @@ -687,7 +701,8 @@ uint32_t IRContext::GetBuiltinInputVarId(uint32_t builtin) { case SpvBuiltInVertexIndex: case SpvBuiltInInstanceIndex: case SpvBuiltInPrimitiveId: - case SpvBuiltInInvocationId: { + case SpvBuiltInInvocationId: + case SpvBuiltInSubgroupLocalInvocationId: { analysis::Integer uint_ty(32, false); reg_type = type_mgr->GetRegisteredType(&uint_ty); break; @@ -707,6 +722,13 @@ uint32_t IRContext::GetBuiltinInputVarId(uint32_t builtin) { reg_type = type_mgr->GetRegisteredType(&v3float_ty); break; } + case SpvBuiltInSubgroupLtMask: { + analysis::Integer uint_ty(32, false); + analysis::Type* reg_uint_ty = type_mgr->GetRegisteredType(&uint_ty); + analysis::Vector v4uint_ty(reg_uint_ty, 4); + reg_type = type_mgr->GetRegisteredType(&v4uint_ty); + break; + } default: { assert(false && "unhandled builtin"); return 0; diff --git a/3rdparty/spirv-tools/source/opt/ir_context.h b/3rdparty/spirv-tools/source/opt/ir_context.h index 05df9c037..e297fb1f5 100644 --- a/3rdparty/spirv-tools/source/opt/ir_context.h +++ b/3rdparty/spirv-tools/source/opt/ir_context.h @@ -190,9 +190,13 @@ class IRContext { // Clears all debug instructions (excluding OpLine & OpNoLine). inline void debug_clear(); + // Add |capability| to the module, if it is not already enabled. + inline void AddCapability(SpvCapability capability); + // Appends a capability instruction to this module. inline void AddCapability(std::unique_ptr&& c); // Appends an extension instruction to this module. + inline void AddExtension(const std::string& ext_name); inline void AddExtension(std::unique_ptr&& e); // Appends an extended instruction set instruction to this module. inline void AddExtInstImport(std::unique_ptr&& e); @@ -487,6 +491,8 @@ class IRContext { return feature_mgr_.get(); } + void ResetFeatureManager() { feature_mgr_.reset(nullptr); } + // Returns the grammar for this context. const AssemblyGrammar& grammar() const { return grammar_; } @@ -923,15 +929,45 @@ IteratorRange IRContext::debugs3() const { void IRContext::debug_clear() { module_->debug_clear(); } +void IRContext::AddCapability(SpvCapability capability) { + if (!get_feature_mgr()->HasCapability(capability)) { + std::unique_ptr capability_inst(new Instruction( + this, SpvOpCapability, 0, 0, + {{SPV_OPERAND_TYPE_CAPABILITY, {static_cast(capability)}}})); + AddCapability(std::move(capability_inst)); + } +} + void IRContext::AddCapability(std::unique_ptr&& c) { AddCombinatorsForCapability(c->GetSingleWordInOperand(0)); + if (feature_mgr_ != nullptr) { + feature_mgr_->AddCapability( + static_cast(c->GetSingleWordInOperand(0))); + } + if (AreAnalysesValid(kAnalysisDefUse)) { + get_def_use_mgr()->AnalyzeInstDefUse(c.get()); + } module()->AddCapability(std::move(c)); } +void IRContext::AddExtension(const std::string& ext_name) { + const auto num_chars = ext_name.size(); + // Compute num words, accommodate the terminating null character. + const auto num_words = (num_chars + 1 + 3) / 4; + std::vector ext_words(num_words, 0u); + std::memcpy(ext_words.data(), ext_name.data(), num_chars); + AddExtension(std::unique_ptr( + new Instruction(this, SpvOpExtension, 0u, 0u, + {{SPV_OPERAND_TYPE_LITERAL_STRING, ext_words}}))); +} + void IRContext::AddExtension(std::unique_ptr&& e) { if (AreAnalysesValid(kAnalysisDefUse)) { get_def_use_mgr()->AnalyzeInstDefUse(e.get()); } + if (feature_mgr_ != nullptr) { + feature_mgr_->AddExtension(&*e); + } module()->AddExtension(std::move(e)); } diff --git a/3rdparty/spirv-tools/source/opt/module.h b/3rdparty/spirv-tools/source/opt/module.h index ede0bbbf3..cf7c274de 100644 --- a/3rdparty/spirv-tools/source/opt/module.h +++ b/3rdparty/spirv-tools/source/opt/module.h @@ -133,6 +133,8 @@ class Module { inline uint32_t version() const { return header_.version; } + inline void set_version(uint32_t v) { header_.version = v; } + // Iterators for capabilities instructions contained in this module. inline inst_iterator capability_begin(); inline inst_iterator capability_end(); diff --git a/3rdparty/spirv-tools/source/opt/optimizer.cpp b/3rdparty/spirv-tools/source/opt/optimizer.cpp index cbdda2d92..635b075ff 100644 --- a/3rdparty/spirv-tools/source/opt/optimizer.cpp +++ b/3rdparty/spirv-tools/source/opt/optimizer.cpp @@ -14,6 +14,7 @@ #include "spirv-tools/optimizer.hpp" +#include #include #include #include @@ -492,6 +493,8 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag) { RegisterPass(CreateGraphicsRobustAccessPass()); } else if (pass_name == "wrap-opkill") { RegisterPass(CreateWrapOpKillPass()); + } else if (pass_name == "amd-ext-to-khr") { + RegisterPass(CreateAmdExtToKhrPass()); } else { Errorf(consumer(), nullptr, {}, "Unknown flag '--%s'. Use --help for a list of valid flags", @@ -549,26 +552,25 @@ bool Optimizer::Run(const uint32_t* original_binary, impl_->pass_manager.SetTargetEnv(impl_->target_env); auto status = impl_->pass_manager.Run(context.get()); - bool binary_changed = false; - if (status == opt::Pass::Status::SuccessWithChange) { - binary_changed = true; - } else if (status == opt::Pass::Status::SuccessWithoutChange) { - if (optimized_binary->size() != original_binary_size || - (memcmp(optimized_binary->data(), original_binary, - original_binary_size) != 0)) { - binary_changed = true; - Log(consumer(), SPV_MSG_WARNING, nullptr, {}, - "Binary unexpectedly changed despite optimizer saying there was no " - "change"); - } + if (status == opt::Pass::Status::Failure) { + return false; } - if (binary_changed) { - optimized_binary->clear(); - context->module()->ToBinary(optimized_binary, /* skip_nop = */ true); - } + optimized_binary->clear(); + context->module()->ToBinary(optimized_binary, /* skip_nop = */ true); - return status != opt::Pass::Status::Failure; +#ifndef NDEBUG + if (status == opt::Pass::Status::SuccessWithoutChange) { + auto changed = optimized_binary->size() != original_binary_size || + memcmp(optimized_binary->data(), original_binary, + original_binary_size) != 0; + assert(!changed && + "Binary unexpectedly changed despite optimizer saying there was no " + "change"); + } +#endif // !NDEBUG + + return true; } Optimizer& Optimizer::SetPrintAll(std::ostream* out) { @@ -919,4 +921,9 @@ Optimizer::PassToken CreateWrapOpKillPass() { return MakeUnique(MakeUnique()); } +Optimizer::PassToken CreateAmdExtToKhrPass() { + return MakeUnique( + MakeUnique()); +} + } // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/pass.h b/3rdparty/spirv-tools/source/opt/pass.h index 0667c3dd1..686e9fc1d 100644 --- a/3rdparty/spirv-tools/source/opt/pass.h +++ b/3rdparty/spirv-tools/source/opt/pass.h @@ -26,6 +26,7 @@ #include "source/opt/ir_context.h" #include "source/opt/module.h" #include "spirv-tools/libspirv.hpp" +#include "types.h" namespace spvtools { namespace opt { diff --git a/3rdparty/spirv-tools/source/opt/passes.h b/3rdparty/spirv-tools/source/opt/passes.h index 1dede0ef2..d53af8ff2 100644 --- a/3rdparty/spirv-tools/source/opt/passes.h +++ b/3rdparty/spirv-tools/source/opt/passes.h @@ -18,6 +18,7 @@ // A single header to include all passes. #include "source/opt/aggressive_dead_code_elim_pass.h" +#include "source/opt/amd_ext_to_khr.h" #include "source/opt/block_merge_pass.h" #include "source/opt/ccp_pass.h" #include "source/opt/cfg_cleanup_pass.h" diff --git a/3rdparty/spirv-tools/source/opt/simplification_pass.cpp b/3rdparty/spirv-tools/source/opt/simplification_pass.cpp index 5780e5da7..7b0887c55 100644 --- a/3rdparty/spirv-tools/source/opt/simplification_pass.cpp +++ b/3rdparty/spirv-tools/source/opt/simplification_pass.cpp @@ -49,7 +49,7 @@ bool SimplificationPass::SimplifyFunction(Function* function) { cfg()->ForEachBlockInReversePostOrder( function->entry().get(), [&modified, &process_phis, &work_list, &in_work_list, &inst_to_kill, - folder, this](BasicBlock* bb) { + &folder, this](BasicBlock* bb) { for (Instruction* inst = &*bb->begin(); inst; inst = inst->NextNode()) { if (inst->opcode() == SpvOpPhi) { process_phis.insert(inst); diff --git a/3rdparty/spirv-tools/source/opt/type_manager.h b/3rdparty/spirv-tools/source/opt/type_manager.h index bec72d2fc..8fcf8aa63 100644 --- a/3rdparty/spirv-tools/source/opt/type_manager.h +++ b/3rdparty/spirv-tools/source/opt/type_manager.h @@ -139,6 +139,61 @@ class TypeManager { const Type* GetMemberType(const Type* parent_type, const std::vector& access_chain); + Type* GetUIntType() { + Integer int_type(32, false); + return GetRegisteredType(&int_type); + } + + uint32_t GetUIntTypeId() { return GetTypeInstruction(GetUIntType()); } + + Type* GetSIntType() { + Integer int_type(32, true); + return GetRegisteredType(&int_type); + } + + uint32_t GetSIntTypeId() { return GetTypeInstruction(GetSIntType()); } + + Type* GetFloatType() { + Float float_type(32); + return GetRegisteredType(&float_type); + } + + uint32_t GetFloatTypeId() { return GetTypeInstruction(GetFloatType()); } + + Type* GetUIntVectorType(uint32_t size) { + Vector vec_type(GetUIntType(), size); + return GetRegisteredType(&vec_type); + } + + uint32_t GetUIntVectorTypeId(uint32_t size) { + return GetTypeInstruction(GetUIntVectorType(size)); + } + + Type* GetSIntVectorType(uint32_t size) { + Vector vec_type(GetSIntType(), size); + return GetRegisteredType(&vec_type); + } + + uint32_t GetSIntVectorTypeId(uint32_t size) { + return GetTypeInstruction(GetSIntVectorType(size)); + } + + Type* GetFloatVectorType(uint32_t size) { + Vector vec_type(GetFloatType(), size); + return GetRegisteredType(&vec_type); + } + + uint32_t GetFloatVectorTypeId(uint32_t size) { + return GetTypeInstruction(GetFloatVectorType(size)); + } + + Type* GetBoolType() { + Bool bool_type; + return GetRegisteredType(&bool_type); + } + + uint32_t GetBoolTypeId() { return GetTypeInstruction(GetBoolType()); } + private: using TypeToIdMap = std::unordered_map; diff --git a/3rdparty/spirv-tools/source/opt/upgrade_memory_model.cpp b/3rdparty/spirv-tools/source/opt/upgrade_memory_model.cpp index ef9f62035..f3bee9eeb 100644 --- a/3rdparty/spirv-tools/source/opt/upgrade_memory_model.cpp +++ b/3rdparty/spirv-tools/source/opt/upgrade_memory_model.cpp @@ -53,7 +53,7 @@ void UpgradeMemoryModel::UpgradeMemoryModelInstruction() { // 2. Add the OpCapability. // 3. Modify the memory model. Instruction* memory_model = get_module()->GetMemoryModel(); - get_module()->AddCapability(MakeUnique( + context()->AddCapability(MakeUnique( context(), SpvOpCapability, 0, 0, std::initializer_list{ {SPV_OPERAND_TYPE_CAPABILITY, {SpvCapabilityVulkanMemoryModelKHR}}})); @@ -61,7 +61,7 @@ void UpgradeMemoryModel::UpgradeMemoryModelInstruction() { std::vector words(extension.size() / 4 + 1, 0); char* dst = reinterpret_cast(words.data()); strncpy(dst, extension.c_str(), extension.size()); - get_module()->AddExtension( + context()->AddExtension( MakeUnique(context(), SpvOpExtension, 0, 0, std::initializer_list{ {SPV_OPERAND_TYPE_LITERAL_STRING, words}})); diff --git a/3rdparty/spirv-tools/test/opt/CMakeLists.txt b/3rdparty/spirv-tools/test/opt/CMakeLists.txt index 7c92f8e82..47ce41f0c 100644 --- a/3rdparty/spirv-tools/test/opt/CMakeLists.txt +++ b/3rdparty/spirv-tools/test/opt/CMakeLists.txt @@ -17,6 +17,7 @@ add_subdirectory(loop_optimizations) add_spvtools_unittest(TARGET opt SRCS aggressive_dead_code_elim_test.cpp + amd_ext_to_khr.cpp assembly_builder_test.cpp block_merge_test.cpp ccp_test.cpp diff --git a/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp b/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp new file mode 100644 index 000000000..7a6d4b463 --- /dev/null +++ b/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp @@ -0,0 +1,338 @@ +// Copyright (c) 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gmock/gmock.h" + +#include "test/opt/pass_fixture.h" +#include "test/opt/pass_utils.h" + +namespace spvtools { +namespace opt { +namespace { + +using AmdExtToKhrTest = PassTest<::testing::Test>; + +using ::testing::HasSubstr; + +std::string GetTest(std::string op_code, std::string new_op_code) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot" +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[undef:%\w+]] = OpUndef %uint +; CHECK-NEXT: )" + new_op_code + + R"( %uint %uint_3 Reduce [[undef]] + OpCapability Shader + OpCapability Groups + OpExtension "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %uint + %8 = )" + op_code + + R"( %uint %uint_3 Reduce %7 + OpReturn + OpFunctionEnd + +)"; + return text; +} + +TEST_F(AmdExtToKhrTest, ReplaceGroupIAddNonUniformAMD) { + std::string text = + GetTest("OpGroupIAddNonUniformAMD", "OpGroupNonUniformIAdd"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupFAddNonUniformAMD) { + std::string text = + GetTest("OpGroupFAddNonUniformAMD", "OpGroupNonUniformFAdd"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupUMinNonUniformAMD) { + std::string text = + GetTest("OpGroupUMinNonUniformAMD", "OpGroupNonUniformUMin"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupSMinNonUniformAMD) { + std::string text = + GetTest("OpGroupSMinNonUniformAMD", "OpGroupNonUniformSMin"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupFMinNonUniformAMD) { + std::string text = + GetTest("OpGroupFMinNonUniformAMD", "OpGroupNonUniformFMin"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupUMaxNonUniformAMD) { + std::string text = + GetTest("OpGroupUMaxNonUniformAMD", "OpGroupNonUniformUMax"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupSMaxNonUniformAMD) { + std::string text = + GetTest("OpGroupSMaxNonUniformAMD", "OpGroupNonUniformSMax"); + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceGroupFMaxNonUniformAMD) { + std::string text = + GetTest("OpGroupFMaxNonUniformAMD", "OpGroupNonUniformFMax"); + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceMbcntAMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot" +; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLtMask +; CHECK: [[var]] = OpVariable %_ptr_Input_v4uint Input +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[ld:%\w+]] = OpLoad %v4uint [[var]] +; CHECK-NEXT: [[shuffle:%\w+]] = OpVectorShuffle %v2uint [[ld]] [[ld]] 0 1 +; CHECK-NEXT: [[bitcast:%\w+]] = OpBitcast %ulong [[shuffle]] +; CHECK-NEXT: [[and:%\w+]] = OpBitwiseAnd %ulong [[bitcast]] %ulong_0 +; CHECK-NEXT: [[result:%\w+]] = OpBitCount %uint [[and]] + OpCapability Shader + OpCapability Int64 + OpExtension "SPV_AMD_shader_ballot" + %1 = OpExtInstImport "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "func" + OpExecutionMode %2 OriginUpperLeft + %void = OpTypeVoid + %4 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %ulong = OpTypeInt 64 0 + %ulong_0 = OpConstant %ulong 0 + %2 = OpFunction %void None %4 + %8 = OpLabel + %9 = OpExtInst %uint %1 MbcntAMD %ulong_0 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceSwizzleInvocationsAMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot" +; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLocalInvocationId +; CHECK: [[subgroup:%\w+]] = OpConstant %uint 3 +; CHECK: [[offset:%\w+]] = OpConstantComposite %v4uint +; CHECK: [[var]] = OpVariable %_ptr_Input_uint Input +; CHECK: [[uint_max:%\w+]] = OpConstant %uint 4294967295 +; CHECK: [[ballot_value:%\w+]] = OpConstantComposite %v4uint [[uint_max]] [[uint_max]] [[uint_max]] [[uint_max]] +; CHECK: [[null:%\w+]] = OpConstantNull [[type:%\w+]] +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[data:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[id:%\w+]] = OpLoad %uint [[var]] +; CHECK-NEXT: [[quad_idx:%\w+]] = OpBitwiseAnd %uint [[id]] %uint_3 +; CHECK-NEXT: [[quad_ldr:%\w+]] = OpBitwiseXor %uint [[id]] [[quad_idx]] +; CHECK-NEXT: [[my_offset:%\w+]] = OpVectorExtractDynamic %uint [[offset]] [[quad_idx]] +; CHECK-NEXT: [[target_inv:%\w+]] = OpIAdd %uint [[quad_ldr]] [[my_offset]] +; CHECK-NEXT: [[is_active:%\w+]] = OpGroupNonUniformBallotBitExtract %bool [[subgroup]] [[ballot_value]] [[target_inv]] +; CHECK-NEXT: [[shuffle:%\w+]] = OpGroupNonUniformShuffle [[type]] [[subgroup]] [[data]] [[target_inv]] +; CHECK-NEXT: [[result:%\w+]] = OpSelect [[type]] [[is_active]] [[shuffle]] [[null]] + OpCapability Shader + OpExtension "SPV_AMD_shader_ballot" + %ext = OpExtInstImport "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %uint_x = OpConstant %uint 1 + %uint_y = OpConstant %uint 2 + %uint_z = OpConstant %uint 3 + %uint_w = OpConstant %uint 0 + %v4uint = OpTypeVector %uint 4 + %offset = OpConstantComposite %v4uint %uint_x %uint_y %uint_z %uint_x + %1 = OpFunction %void None %3 + %6 = OpLabel + %data = OpUndef %uint + %9 = OpExtInst %uint %ext SwizzleInvocationsAMD %data %offset + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceSwizzleInvocationsMaskedAMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot" +; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLocalInvocationId +; CHECK: [[x:%\w+]] = OpConstant %uint 19 +; CHECK: [[y:%\w+]] = OpConstant %uint 12 +; CHECK: [[z:%\w+]] = OpConstant %uint 16 +; CHECK: [[var]] = OpVariable %_ptr_Input_uint Input +; CHECK: [[mask_extend:%\w+]] = OpConstant %uint 4294967264 +; CHECK: [[uint_max:%\w+]] = OpConstant %uint 4294967295 +; CHECK: [[subgroup:%\w+]] = OpConstant %uint 3 +; CHECK: [[ballot_value:%\w+]] = OpConstantComposite %v4uint [[uint_max]] [[uint_max]] [[uint_max]] [[uint_max]] +; CHECK: [[null:%\w+]] = OpConstantNull [[type:%\w+]] +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[data:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[id:%\w+]] = OpLoad %uint [[var]] +; CHECK-NEXT: [[and_mask:%\w+]] = OpBitwiseOr %uint [[x]] [[mask_extend]] +; CHECK-NEXT: [[and:%\w+]] = OpBitwiseAnd %uint [[id]] [[and_mask]] +; CHECK-NEXT: [[or:%\w+]] = OpBitwiseOr %uint [[and]] [[y]] +; CHECK-NEXT: [[target_inv:%\w+]] = OpBitwiseXor %uint [[or]] [[z]] +; CHECK-NEXT: [[is_active:%\w+]] = OpGroupNonUniformBallotBitExtract %bool [[subgroup]] [[ballot_value]] [[target_inv]] +; CHECK-NEXT: [[shuffle:%\w+]] = OpGroupNonUniformShuffle [[type]] [[subgroup]] [[data]] [[target_inv]] +; CHECK-NEXT: [[result:%\w+]] = OpSelect [[type]] [[is_active]] [[shuffle]] [[null]] + OpCapability Shader + OpExtension "SPV_AMD_shader_ballot" + %ext = OpExtInstImport "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %uint_x = OpConstant %uint 19 + %uint_y = OpConstant %uint 12 + %uint_z = OpConstant %uint 16 + %v3uint = OpTypeVector %uint 3 + %mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z + %1 = OpFunction %void None %3 + %6 = OpLabel + %data = OpUndef %uint + %9 = OpExtInst %uint %ext SwizzleInvocationsMaskedAMD %data %mask + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} +TEST_F(AmdExtToKhrTest, ReplaceWriteInvocationAMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot" +; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLocalInvocationId +; CHECK: [[var]] = OpVariable %_ptr_Input_uint Input +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[input_val:%\w+]] = OpUndef %uint +; CHECK-NEXT: [[write_val:%\w+]] = OpUndef %uint +; CHECK-NEXT: [[ld:%\w+]] = OpLoad %uint [[var]] +; CHECK-NEXT: [[cmp:%\w+]] = OpIEqual %bool [[ld]] %uint_3 +; CHECK-NEXT: [[result:%\w+]] = OpSelect %uint [[cmp]] [[write_val]] [[input_val]] + OpCapability Shader + OpExtension "SPV_AMD_shader_ballot" + %ext = OpExtInstImport "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %uint + %8 = OpUndef %uint + %9 = OpExtInst %uint %ext WriteInvocationAMD %7 %8 %uint_3 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, SetVersion) { + const std::string text = R"( + OpCapability Shader + OpCapability Int64 + OpExtension "SPV_AMD_shader_ballot" + %1 = OpExtInstImport "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "func" + OpExecutionMode %2 OriginUpperLeft + %void = OpTypeVoid + %4 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %ulong = OpTypeInt 64 0 + %ulong_0 = OpConstant %ulong 0 + %2 = OpFunction %void None %4 + %8 = OpLabel + %9 = OpExtInst %uint %1 MbcntAMD %ulong_0 + OpReturn + OpFunctionEnd +)"; + + // Set the version to 1.1 and make sure it is upgraded to 1.3. + SetTargetEnv(SPV_ENV_UNIVERSAL_1_1); + SetDisassembleOptions(0); + auto result = SinglePassRunAndDisassemble( + text, /* skip_nop = */ true, /* skip_validation = */ false); + + EXPECT_EQ(Pass::Status::SuccessWithChange, std::get<1>(result)); + const std::string& output = std::get<0>(result); + EXPECT_THAT(output, HasSubstr("Version: 1.3")); +} + +TEST_F(AmdExtToKhrTest, SetVersion1) { + const std::string text = R"( + OpCapability Shader + OpCapability Int64 + OpExtension "SPV_AMD_shader_ballot" + %1 = OpExtInstImport "SPV_AMD_shader_ballot" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "func" + OpExecutionMode %2 OriginUpperLeft + %void = OpTypeVoid + %4 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %ulong = OpTypeInt 64 0 + %ulong_0 = OpConstant %ulong 0 + %2 = OpFunction %void None %4 + %8 = OpLabel + %9 = OpExtInst %uint %1 MbcntAMD %ulong_0 + OpReturn + OpFunctionEnd +)"; + + // Set the version to 1.4 and make sure it is stays the same. + SetTargetEnv(SPV_ENV_UNIVERSAL_1_4); + SetDisassembleOptions(0); + auto result = SinglePassRunAndDisassemble( + text, /* skip_nop = */ true, /* skip_validation = */ false); + + EXPECT_EQ(Pass::Status::SuccessWithChange, std::get<1>(result)); + const std::string& output = std::get<0>(result); + EXPECT_THAT(output, HasSubstr("Version: 1.4")); +} + +} // namespace +} // namespace opt +} // namespace spvtools diff --git a/3rdparty/spirv-tools/test/opt/fold_test.cpp b/3rdparty/spirv-tools/test/opt/fold_test.cpp index a9f30890f..b5998c722 100644 --- a/3rdparty/spirv-tools/test/opt/fold_test.cpp +++ b/3rdparty/spirv-tools/test/opt/fold_test.cpp @@ -210,6 +210,7 @@ OpName %main "main" %float_2049 = OpConstant %float 2049 %float_n2049 = OpConstant %float -2049 %float_0p5 = OpConstant %float 0.5 +%float_0p2 = OpConstant %float 0.2 %float_pi = OpConstant %float 1.5555 %float_1e16 = OpConstant %float 1e16 %float_n1e16 = OpConstant %float -1e16 @@ -1465,24 +1466,14 @@ INSTANTIATE_TEST_SUITE_P(FloatConstantFoldingTest, FloatInstructionFoldingTest, "OpReturn\n" + "OpFunctionEnd", 2, std::numeric_limits::quiet_NaN()), - // Test case 20: QuantizeToF16 inf + // Test case 20: FMix 1.0 4.0 0.2 InstructionFoldingCase( Header() + "%main = OpFunction %void None %void_func\n" + "%main_lab = OpLabel\n" + - "%2 = OpFDiv %float %float_1 %float_0\n" + - "%3 = OpQuantizeToF16 %float %3\n" + + "%2 = OpExtInst %float %1 FMix %float_1 %float_4 %float_0p2\n" + "OpReturn\n" + "OpFunctionEnd", - 2, std::numeric_limits::infinity()), - // Test case 21: QuantizeToF16 -inf - InstructionFoldingCase( - Header() + "%main = OpFunction %void None %void_func\n" + - "%main_lab = OpLabel\n" + - "%2 = OpFDiv %float %float_n1 %float_0\n" + - "%3 = OpQuantizeToF16 %float %3\n" + - "OpReturn\n" + - "OpFunctionEnd", - 2, -std::numeric_limits::infinity()) + 2, 1.6f) )); // clang-format on diff --git a/3rdparty/spirv-tools/test/opt/inst_buff_addr_check_test.cpp b/3rdparty/spirv-tools/test/opt/inst_buff_addr_check_test.cpp index f859ee556..c31266e6f 100644 --- a/3rdparty/spirv-tools/test/opt/inst_buff_addr_check_test.cpp +++ b/3rdparty/spirv-tools/test/opt/inst_buff_addr_check_test.cpp @@ -383,7 +383,6 @@ OpTypeForwardPointer %_ptr_PhysicalStorageBufferEXT_blockType PhysicalStorageBuf R"(OpCapability Shader OpCapability PhysicalStorageBufferAddressesEXT OpCapability Int64 -OpCapability Int64 OpExtension "SPV_EXT_physical_storage_buffer" OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" diff --git a/3rdparty/spirv-tools/tools/opt/opt.cpp b/3rdparty/spirv-tools/tools/opt/opt.cpp index c18b64c50..b229c8413 100644 --- a/3rdparty/spirv-tools/tools/opt/opt.cpp +++ b/3rdparty/spirv-tools/tools/opt/opt.cpp @@ -109,6 +109,11 @@ NOTE: The optimizer is a work in progress. Options (in lexicographical order):)", program, program); printf(R"( + --amd-ext-to-khr + Replaces the extensions VK_AMD_shader_ballot, VK_AMD_gcn_shader, + and VK_AMD_shader_trinary_minmax with equivalant code using core + instructions and capabilities.)"); + printf(R"( --ccp Apply the conditional constant propagation transform. This will propagate constant values throughout the program, and simplify