diff --git a/3rdparty/spirv-tools/Android.mk b/3rdparty/spirv-tools/Android.mk index 94281162a..361cfce7a 100644 --- a/3rdparty/spirv-tools/Android.mk +++ b/3rdparty/spirv-tools/Android.mk @@ -89,6 +89,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/composite.cpp \ source/opt/const_folding_rules.cpp \ source/opt/constants.cpp \ + source/opt/convert_to_half_pass.cpp \ source/opt/copy_prop_arrays.cpp \ source/opt/dead_branch_elim_pass.cpp \ source/opt/dead_insert_elim_pass.cpp \ @@ -153,6 +154,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/reduce_load_size.cpp \ source/opt/redundancy_elimination.cpp \ source/opt/register_pressure.cpp \ + source/opt/relax_float_ops_pass.cpp \ source/opt/remove_duplicates_pass.cpp \ source/opt/replace_invalid_opc.cpp \ source/opt/scalar_analysis.cpp \ diff --git a/3rdparty/spirv-tools/BUILD.gn b/3rdparty/spirv-tools/BUILD.gn index d62aaabe0..492f2856d 100644 --- a/3rdparty/spirv-tools/BUILD.gn +++ b/3rdparty/spirv-tools/BUILD.gn @@ -479,6 +479,8 @@ static_library("spvtools_opt") { "source/opt/const_folding_rules.h", "source/opt/constants.cpp", "source/opt/constants.h", + "source/opt/convert_to_half_pass.cpp", + "source/opt/convert_to_half_pass.h", "source/opt/copy_prop_arrays.cpp", "source/opt/copy_prop_arrays.h", "source/opt/dead_branch_elim_pass.cpp", @@ -611,6 +613,8 @@ static_library("spvtools_opt") { "source/opt/reflect.h", "source/opt/register_pressure.cpp", "source/opt/register_pressure.h", + "source/opt/relax_float_ops_pass.cpp", + "source/opt/relax_float_ops_pass.h", "source/opt/remove_duplicates_pass.cpp", "source/opt/remove_duplicates_pass.h", "source/opt/replace_invalid_opc.cpp", @@ -766,7 +770,7 @@ static_library("spvtools_reduce") { } group("SPIRV-Tools") { - deps = [ + public_deps = [ ":spvtools", ":spvtools_link", ":spvtools_opt", @@ -846,6 +850,8 @@ if (build_with_chromium) { "//testing/gmock", "//testing/gtest", "//testing/gtest:gtest_main", + "//third_party/googletest:gmock", + "//third_party/googletest:gtest", ] if (is_clang) { diff --git a/3rdparty/spirv-tools/CHANGES b/3rdparty/spirv-tools/CHANGES index 57afc6387..10062f93b 100644 --- a/3rdparty/spirv-tools/CHANGES +++ b/3rdparty/spirv-tools/CHANGES @@ -1,7 +1,25 @@ Revision history for SPIRV-Tools v2019.5-dev 2019-08-08 - - Start v2019.5-dev + - General: + - Optimizer + - Add descriptor array scalar replacement (#2742) + - Add pass to wrap OpKill in a function call (#2790) + - Fold FMix during constant folding. (#2818) + - Add pass to replace AMD shader ballot extension (#2811) + - Add pass to make Float32 operation relax precision (#2808) + - Add pass to make relax precision operation Float16 (#2808) + Fixes: + Instrument: Fix version 2 output record write for tess eval shaders. (#2782) + Instrument: Add support for Buffer Device Address extension (#2792) + Fix check for changed binary in API call. (#2798) + - Validator + Fixes: + Fix validation of constant matrices (#2794) + Update "remquor" validation + - Reduce + - Remove relaxed precision decorations (#2797) + Fixes: v2019.4 2019-08-08 - General: diff --git a/3rdparty/spirv-tools/include/generated/build-version.inc b/3rdparty/spirv-tools/include/generated/build-version.inc index a050519d2..2dda9bc16 100644 --- a/3rdparty/spirv-tools/include/generated/build-version.inc +++ b/3rdparty/spirv-tools/include/generated/build-version.inc @@ -1 +1 @@ -"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-25-g65e362b7" +"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-37-g76261e2a" diff --git a/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp b/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp index 4e54b1a1c..509051de8 100644 --- a/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp +++ b/3rdparty/spirv-tools/include/spirv-tools/optimizer.hpp @@ -68,6 +68,11 @@ class Optimizer { // The constructed instance will have an empty message consumer, which just // ignores all messages from the library. Use SetMessageConsumer() to supply // one if messages are of concern. + // + // For collections of passes that are meant to transform the input into + // another execution environment, then the source environment should be + // supplied. e.g. for VulkanToWebGPUPasses the environment should be + // SPV_ENV_VULKAN_1_1 not SPV_ENV_WEBGPU_0. explicit Optimizer(spv_target_env env); // Disables copy/move constructor/assignment operations. @@ -674,6 +679,22 @@ Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor = 0); // processed (see IsSSATargetVar for details). Optimizer::PassToken CreateSSARewritePass(); +// Create pass to convert relaxed precision instructions to half precision. +// This pass converts as many relaxed float32 arithmetic operations to half as +// possible. It converts any float32 operands to half if needed. It converts +// any resulting half precision values back to float32 as needed. No variables +// are changed. No image operations are changed. +// +// Best if run late since it will generate better code with unneeded function +// scope loads and stores and composite inserts and extracts removed. Also best +// if followed by instruction simplification, redundancy elimination and DCE. +Optimizer::PassToken CreateConvertRelaxedToHalfPass(); + +// Create relax float ops pass. +// This pass decorates all float32 result instructions with RelaxedPrecision +// if not already so decorated. +Optimizer::PassToken CreateRelaxFloatOpsPass(); + // Create copy propagate arrays pass. // This pass looks to copy propagate memory references for arrays. It looks // for specific code patterns to recognize array copies. diff --git a/3rdparty/spirv-tools/source/CMakeLists.txt b/3rdparty/spirv-tools/source/CMakeLists.txt index cb63ff0a2..37348e9a4 100644 --- a/3rdparty/spirv-tools/source/CMakeLists.txt +++ b/3rdparty/spirv-tools/source/CMakeLists.txt @@ -339,7 +339,9 @@ spvtools_pch(SPIRV_SOURCES pch_source) add_library(${SPIRV_TOOLS} ${SPIRV_SOURCES}) spvtools_default_compile_options(${SPIRV_TOOLS}) target_include_directories(${SPIRV_TOOLS} - PUBLIC ${spirv-tools_SOURCE_DIR}/include + PUBLIC + $ + $ PRIVATE ${spirv-tools_BINARY_DIR} PRIVATE ${SPIRV_HEADER_INCLUDE_DIR} ) @@ -350,7 +352,9 @@ add_dependencies( ${SPIRV_TOOLS} core_tables enum_string_mapping extinst_tables add_library(${SPIRV_TOOLS}-shared SHARED ${SPIRV_SOURCES}) spvtools_default_compile_options(${SPIRV_TOOLS}-shared) target_include_directories(${SPIRV_TOOLS}-shared - PUBLIC ${spirv-tools_SOURCE_DIR}/include + PUBLIC + $ + $ PRIVATE ${spirv-tools_BINARY_DIR} PRIVATE ${SPIRV_HEADER_INCLUDE_DIR} ) @@ -372,10 +376,11 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") endif() if(ENABLE_SPIRV_TOOLS_INSTALL) - install(TARGETS ${SPIRV_TOOLS} ${SPIRV_TOOLS}-shared + install(TARGETS ${SPIRV_TOOLS} ${SPIRV_TOOLS}-shared EXPORT ${SPIRV_TOOLS}Targets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(EXPORT ${SPIRV_TOOLS}Targets DESTINATION lib/cmake) endif(ENABLE_SPIRV_TOOLS_INSTALL) if(MSVC) diff --git a/3rdparty/spirv-tools/source/fuzz/CMakeLists.txt b/3rdparty/spirv-tools/source/fuzz/CMakeLists.txt index 49ee843ab..7b5deb897 100644 --- a/3rdparty/spirv-tools/source/fuzz/CMakeLists.txt +++ b/3rdparty/spirv-tools/source/fuzz/CMakeLists.txt @@ -118,8 +118,12 @@ if(SPIRV_BUILD_FUZZER) endif() target_include_directories(SPIRV-Tools-fuzz - PUBLIC ${spirv-tools_SOURCE_DIR}/include - PUBLIC ${SPIRV_HEADER_INCLUDE_DIR} + PUBLIC + $ + $ + PUBLIC + $ + $ PRIVATE ${spirv-tools_BINARY_DIR} PRIVATE ${CMAKE_BINARY_DIR}) @@ -133,10 +137,11 @@ if(SPIRV_BUILD_FUZZER) spvtools_check_symbol_exports(SPIRV-Tools-fuzz) if(ENABLE_SPIRV_TOOLS_INSTALL) - install(TARGETS SPIRV-Tools-fuzz + install(TARGETS SPIRV-Tools-fuzz EXPORT SPIRV-Tools-fuzzTargets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(EXPORT SPIRV-Tools-fuzzTargets DESTINATION lib/cmake) endif(ENABLE_SPIRV_TOOLS_INSTALL) endif(SPIRV_BUILD_FUZZER) diff --git a/3rdparty/spirv-tools/source/fuzz/data_descriptor.h b/3rdparty/spirv-tools/source/fuzz/data_descriptor.h index 731bd2191..856c653c7 100644 --- a/3rdparty/spirv-tools/source/fuzz/data_descriptor.h +++ b/3rdparty/spirv-tools/source/fuzz/data_descriptor.h @@ -36,4 +36,4 @@ struct DataDescriptorEquals { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_DATA_DESCRIPTOR_H_ +#endif // SOURCE_FUZZ_DATA_DESCRIPTOR_H_ diff --git a/3rdparty/spirv-tools/source/fuzz/fact_manager.h b/3rdparty/spirv-tools/source/fuzz/fact_manager.h index f6ea2477d..e2fccbb7a 100644 --- a/3rdparty/spirv-tools/source/fuzz/fact_manager.h +++ b/3rdparty/spirv-tools/source/fuzz/fact_manager.h @@ -128,4 +128,4 @@ class FactManager { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FACT_MANAGER_H_ +#endif // SOURCE_FUZZ_FACT_MANAGER_H_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass.h index 4d0861e6f..cd1b19443 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass.h @@ -58,4 +58,4 @@ class FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_H_ +#endif // SOURCE_FUZZ_FUZZER_PASS_H_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_breaks.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_breaks.h index ad1985653..12a5095f0 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_breaks.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_breaks.h @@ -35,4 +35,4 @@ class FuzzerPassAddDeadBreaks : public FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_BREAKS_H_ +#endif // SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_BREAKS_H_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_continues.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_continues.h index 6cadc9765..d067f1c4d 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_continues.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_dead_continues.h @@ -36,4 +36,4 @@ class FuzzerPassAddDeadContinues : public FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_CONTINUES_H_ +#endif // SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_CONTINUES_H_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_useful_constructs.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_useful_constructs.h index a8ac9a303..7dc00f13e 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_useful_constructs.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_add_useful_constructs.h @@ -43,4 +43,4 @@ class FuzzerPassAddUsefulConstructs : public FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_ADD_USEFUL_CONSTRUCTS_ +#endif // SOURCE_FUZZ_FUZZER_PASS_ADD_USEFUL_CONSTRUCTS_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_obfuscate_constants.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_obfuscate_constants.h index 03477a564..f34717b2e 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_obfuscate_constants.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_obfuscate_constants.h @@ -104,4 +104,4 @@ class FuzzerPassObfuscateConstants : public FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_OBFUSCATE_CONSTANTS_ +#endif // SOURCE_FUZZ_FUZZER_PASS_OBFUSCATE_CONSTANTS_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_permute_blocks.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_permute_blocks.h index d8aed72bb..6735e952b 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_permute_blocks.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_permute_blocks.h @@ -36,4 +36,4 @@ class FuzzerPassPermuteBlocks : public FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_PERMUTE_BLOCKS_ +#endif // SOURCE_FUZZ_FUZZER_PASS_PERMUTE_BLOCKS_ diff --git a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_split_blocks.h b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_split_blocks.h index 951022b2f..6e56dde95 100644 --- a/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_split_blocks.h +++ b/3rdparty/spirv-tools/source/fuzz/fuzzer_pass_split_blocks.h @@ -36,4 +36,4 @@ class FuzzerPassSplitBlocks : public FuzzerPass { } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_FUZZER_PASS_SPLIT_BLOCKS_ +#endif // SOURCE_FUZZ_FUZZER_PASS_SPLIT_BLOCKS_ diff --git a/3rdparty/spirv-tools/source/fuzz/uniform_buffer_element_descriptor.h b/3rdparty/spirv-tools/source/fuzz/uniform_buffer_element_descriptor.h index d35de5762..f5d7320a2 100644 --- a/3rdparty/spirv-tools/source/fuzz/uniform_buffer_element_descriptor.h +++ b/3rdparty/spirv-tools/source/fuzz/uniform_buffer_element_descriptor.h @@ -49,4 +49,4 @@ opt::Instruction* FindUniformVariable( } // namespace fuzz } // namespace spvtools -#endif // #define SOURCE_FUZZ_UNIFORM_BUFFER_ELEMENT_DESCRIPTOR_H_ +#endif // SOURCE_FUZZ_UNIFORM_BUFFER_ELEMENT_DESCRIPTOR_H_ diff --git a/3rdparty/spirv-tools/source/link/CMakeLists.txt b/3rdparty/spirv-tools/source/link/CMakeLists.txt index 8ca4df39f..b8c4332b8 100644 --- a/3rdparty/spirv-tools/source/link/CMakeLists.txt +++ b/3rdparty/spirv-tools/source/link/CMakeLists.txt @@ -17,8 +17,12 @@ add_library(SPIRV-Tools-link spvtools_default_compile_options(SPIRV-Tools-link) target_include_directories(SPIRV-Tools-link - PUBLIC ${spirv-tools_SOURCE_DIR}/include - PUBLIC ${SPIRV_HEADER_INCLUDE_DIR} + PUBLIC + $ + $ + PUBLIC + $ + $ PRIVATE ${spirv-tools_BINARY_DIR} ) # We need the IR functionnalities from the optimizer @@ -29,8 +33,9 @@ set_property(TARGET SPIRV-Tools-link PROPERTY FOLDER "SPIRV-Tools libraries") spvtools_check_symbol_exports(SPIRV-Tools-link) if(ENABLE_SPIRV_TOOLS_INSTALL) - install(TARGETS SPIRV-Tools-link + install(TARGETS SPIRV-Tools-link EXPORT SPIRV-Tools-linkTargets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(EXPORT SPIRV-Tools-linkTargets DESTINATION lib/cmake) endif(ENABLE_SPIRV_TOOLS_INSTALL) diff --git a/3rdparty/spirv-tools/source/opt/CMakeLists.txt b/3rdparty/spirv-tools/source/opt/CMakeLists.txt index 2309ca919..b18610a90 100644 --- a/3rdparty/spirv-tools/source/opt/CMakeLists.txt +++ b/3rdparty/spirv-tools/source/opt/CMakeLists.txt @@ -27,6 +27,7 @@ set(SPIRV_TOOLS_OPT_SOURCES composite.h const_folding_rules.h constants.h + convert_to_half_pass.h copy_prop_arrays.h dead_branch_elim_pass.h dead_insert_elim_pass.h @@ -93,6 +94,7 @@ set(SPIRV_TOOLS_OPT_SOURCES redundancy_elimination.h reflect.h register_pressure.h + relax_float_ops_pass.h remove_duplicates_pass.h replace_invalid_opc.h scalar_analysis.h @@ -132,6 +134,7 @@ set(SPIRV_TOOLS_OPT_SOURCES composite.cpp const_folding_rules.cpp constants.cpp + convert_to_half_pass.cpp copy_prop_arrays.cpp dead_branch_elim_pass.cpp dead_insert_elim_pass.cpp @@ -196,6 +199,7 @@ set(SPIRV_TOOLS_OPT_SOURCES reduce_load_size.cpp redundancy_elimination.cpp register_pressure.cpp + relax_float_ops_pass.cpp remove_duplicates_pass.cpp replace_invalid_opc.cpp scalar_analysis.cpp @@ -231,8 +235,12 @@ add_library(SPIRV-Tools-opt ${SPIRV_TOOLS_OPT_SOURCES}) spvtools_default_compile_options(SPIRV-Tools-opt) target_include_directories(SPIRV-Tools-opt - PUBLIC ${spirv-tools_SOURCE_DIR}/include - PUBLIC ${SPIRV_HEADER_INCLUDE_DIR} + PUBLIC + $ + $ + PUBLIC + $ + $ PRIVATE ${spirv-tools_BINARY_DIR} ) # We need the assembling and disassembling functionalities in the main library. @@ -243,8 +251,9 @@ set_property(TARGET SPIRV-Tools-opt PROPERTY FOLDER "SPIRV-Tools libraries") spvtools_check_symbol_exports(SPIRV-Tools-opt) if(ENABLE_SPIRV_TOOLS_INSTALL) - install(TARGETS SPIRV-Tools-opt + install(TARGETS SPIRV-Tools-opt EXPORT SPIRV-Tools-optTargets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(EXPORT SPIRV-Tools-optTargets DESTINATION lib/cmake) endif(ENABLE_SPIRV_TOOLS_INSTALL) diff --git a/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp b/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp index 1cb5ba5a4..e9b7f8613 100644 --- a/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp +++ b/3rdparty/spirv-tools/source/opt/amd_ext_to_khr.cpp @@ -14,6 +14,9 @@ #include "source/opt/amd_ext_to_khr.h" +#include +#include + #include "ir_builder.h" #include "source/opt/ir_context.h" #include "spv-amd-shader-ballot.insts.inc" @@ -24,22 +27,125 @@ namespace opt { namespace { -enum ExtOpcodes { +enum AmdShaderBallotExtOpcodes { AmdShaderBallotSwizzleInvocationsAMD = 1, AmdShaderBallotSwizzleInvocationsMaskedAMD = 2, AmdShaderBallotWriteInvocationAMD = 3, AmdShaderBallotMbcntAMD = 4 }; +enum AmdShaderTrinaryMinMaxExtOpCodes { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 +}; + +enum AmdGcnShader { CubeFaceCoordAMD = 2, CubeFaceIndexAMD = 1, TimeAMD = 3 }; + analysis::Type* GetUIntType(IRContext* ctx) { analysis::Integer int_type(32, false); return ctx->get_type_mgr()->GetRegisteredType(&int_type); } +bool NotImplementedYet(IRContext*, Instruction*, + const std::vector&) { + assert(false && "Not implemented."); + return false; +} + +// Returns a folding rule that replaces |op(a,b,c)| by |op(op(a,b),c)|, where +// |op| is either min or max. |opcode| is the binary opcode in the GLSLstd450 +// extended instruction set that corresponds to the trinary instruction being +// replaced. +template +bool ReplaceTrinaryMinMax(IRContext* ctx, Instruction* inst, + const std::vector&) { + uint32_t glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + if (glsl405_ext_inst_id == 0) { + ctx->AddExtInstImport("GLSL.std.450"); + glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + } + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + + uint32_t op1 = inst->GetSingleWordInOperand(2); + uint32_t op2 = inst->GetSingleWordInOperand(3); + uint32_t op3 = inst->GetSingleWordInOperand(4); + + Instruction* temp = ir_builder.AddNaryExtendedInstruction( + inst->type_id(), glsl405_ext_inst_id, opcode, {op1, op2}); + + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {glsl405_ext_inst_id}}); + new_operands.push_back({SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER, + {static_cast(opcode)}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {temp->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {op3}}); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; +} + +// Returns a folding rule that replaces |mid(a,b,c)| by |clamp(a, min(b,c), +// max(b,c)|. The three parameters are the opcode that correspond to the min, +// max, and clamp operations for the type of the instruction being replaced. +template +bool ReplaceTrinaryMid(IRContext* ctx, Instruction* inst, + const std::vector&) { + uint32_t glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + if (glsl405_ext_inst_id == 0) { + ctx->AddExtInstImport("GLSL.std.450"); + glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + } + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + + uint32_t op1 = inst->GetSingleWordInOperand(2); + uint32_t op2 = inst->GetSingleWordInOperand(3); + uint32_t op3 = inst->GetSingleWordInOperand(4); + + Instruction* min = ir_builder.AddNaryExtendedInstruction( + inst->type_id(), glsl405_ext_inst_id, static_cast(min_opcode), + {op2, op3}); + Instruction* max = ir_builder.AddNaryExtendedInstruction( + inst->type_id(), glsl405_ext_inst_id, static_cast(max_opcode), + {op2, op3}); + + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {glsl405_ext_inst_id}}); + new_operands.push_back({SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER, + {static_cast(clamp_opcode)}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {op1}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {min->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {max->result_id()}}); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; +} + // Returns a folding rule that will replace the opcode with |opcode| and add // the capabilities required. The folding rule assumes it is folding an // OpGroup*NonUniformAMD instruction from the SPV_AMD_shader_ballot extension. -FoldingRule ReplaceGroupNonuniformOperationOpCode(SpvOp new_opcode) { +template +bool ReplaceGroupNonuniformOperationOpCode( + IRContext* ctx, Instruction* inst, + const std::vector&) { switch (new_opcode) { case SpvOpGroupNonUniformIAdd: case SpvOpGroupNonUniformFAdd: @@ -56,27 +162,24 @@ FoldingRule ReplaceGroupNonuniformOperationOpCode(SpvOp new_opcode) { "Should be replacing with a group non uniform arithmetic operation."); } - return [new_opcode](IRContext* ctx, Instruction* inst, - const std::vector&) { - switch (inst->opcode()) { - case SpvOpGroupIAddNonUniformAMD: - case SpvOpGroupFAddNonUniformAMD: - case SpvOpGroupUMinNonUniformAMD: - case SpvOpGroupSMinNonUniformAMD: - case SpvOpGroupFMinNonUniformAMD: - case SpvOpGroupUMaxNonUniformAMD: - case SpvOpGroupSMaxNonUniformAMD: - case SpvOpGroupFMaxNonUniformAMD: - break; - default: - assert(false && - "Should be replacing a group non uniform arithmetic operation."); - } + switch (inst->opcode()) { + case SpvOpGroupIAddNonUniformAMD: + case SpvOpGroupFAddNonUniformAMD: + case SpvOpGroupUMinNonUniformAMD: + case SpvOpGroupSMinNonUniformAMD: + case SpvOpGroupFMinNonUniformAMD: + case SpvOpGroupUMaxNonUniformAMD: + case SpvOpGroupSMaxNonUniformAMD: + case SpvOpGroupFMaxNonUniformAMD: + break; + default: + assert(false && + "Should be replacing a group non uniform arithmetic operation."); + } - ctx->AddCapability(SpvCapabilityGroupNonUniformArithmetic); - inst->SetOpcode(new_opcode); - return true; - }; + ctx->AddCapability(SpvCapabilityGroupNonUniformArithmetic); + inst->SetOpcode(new_opcode); + return true; } // Returns a folding rule that will replace the SwizzleInvocationsAMD extended @@ -112,84 +215,82 @@ FoldingRule ReplaceGroupNonuniformOperationOpCode(SpvOp new_opcode) { // clang-format on // // Also adding the capabilities and builtins that are needed. -FoldingRule ReplaceSwizzleInvocations() { - return [](IRContext* ctx, Instruction* inst, - const std::vector&) { - analysis::TypeManager* type_mgr = ctx->get_type_mgr(); - analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); +bool ReplaceSwizzleInvocations(IRContext* ctx, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = ctx->get_type_mgr(); + analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); - ctx->AddExtension("SPV_KHR_shader_ballot"); - ctx->AddCapability(SpvCapabilityGroupNonUniformBallot); - ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle); + ctx->AddExtension("SPV_KHR_shader_ballot"); + ctx->AddCapability(SpvCapabilityGroupNonUniformBallot); + ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle); - InstructionBuilder ir_builder( - ctx, inst, - IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); - uint32_t data_id = inst->GetSingleWordInOperand(2); - uint32_t offset_id = inst->GetSingleWordInOperand(3); + uint32_t data_id = inst->GetSingleWordInOperand(2); + uint32_t offset_id = inst->GetSingleWordInOperand(3); - // Get the subgroup invocation id. - uint32_t var_id = - ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); - assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); - Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); - Instruction* var_ptr_type = - ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); - uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1); + // Get the subgroup invocation id. + uint32_t var_id = + ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); + assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); + Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); + Instruction* var_ptr_type = + ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); + uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1); - Instruction* id = ir_builder.AddLoad(uint_type_id, var_id); + Instruction* id = ir_builder.AddLoad(uint_type_id, var_id); - uint32_t quad_mask = ir_builder.GetUintConstantId(3); + uint32_t quad_mask = ir_builder.GetUintConstantId(3); - // This gives the offset in the group of 4 of this invocation. - Instruction* quad_idx = ir_builder.AddBinaryOp( - uint_type_id, SpvOpBitwiseAnd, id->result_id(), quad_mask); + // This gives the offset in the group of 4 of this invocation. + Instruction* quad_idx = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseAnd, + id->result_id(), quad_mask); - // Get the invocation id of the first invocation in the group of 4. - Instruction* quad_ldr = ir_builder.AddBinaryOp( - uint_type_id, SpvOpBitwiseXor, id->result_id(), quad_idx->result_id()); + // Get the invocation id of the first invocation in the group of 4. + Instruction* quad_ldr = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseXor, id->result_id(), quad_idx->result_id()); - // Get the offset of the target invocation from the offset vector. - Instruction* my_offset = - ir_builder.AddBinaryOp(uint_type_id, SpvOpVectorExtractDynamic, - offset_id, quad_idx->result_id()); + // Get the offset of the target invocation from the offset vector. + Instruction* my_offset = + ir_builder.AddBinaryOp(uint_type_id, SpvOpVectorExtractDynamic, offset_id, + quad_idx->result_id()); - // Determine the index of the invocation to read from. - Instruction* target_inv = ir_builder.AddBinaryOp( - uint_type_id, SpvOpIAdd, quad_ldr->result_id(), my_offset->result_id()); + // Determine the index of the invocation to read from. + Instruction* target_inv = ir_builder.AddBinaryOp( + uint_type_id, SpvOpIAdd, quad_ldr->result_id(), my_offset->result_id()); - // Do the group operations - uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF); - uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup); - const auto* ballot_value_const = const_mgr->GetConstant( - type_mgr->GetUIntVectorType(4), - {uint_max_id, uint_max_id, uint_max_id, uint_max_id}); - Instruction* ballot_value = - const_mgr->GetDefiningInstruction(ballot_value_const); - Instruction* is_active = ir_builder.AddNaryOp( - type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract, - {subgroup_scope, ballot_value->result_id(), target_inv->result_id()}); - Instruction* shuffle = ir_builder.AddNaryOp( - inst->type_id(), SpvOpGroupNonUniformShuffle, - {subgroup_scope, data_id, target_inv->result_id()}); + // Do the group operations + uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF); + uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup); + const auto* ballot_value_const = const_mgr->GetConstant( + type_mgr->GetUIntVectorType(4), + {uint_max_id, uint_max_id, uint_max_id, uint_max_id}); + Instruction* ballot_value = + const_mgr->GetDefiningInstruction(ballot_value_const); + Instruction* is_active = ir_builder.AddNaryOp( + type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract, + {subgroup_scope, ballot_value->result_id(), target_inv->result_id()}); + Instruction* shuffle = + ir_builder.AddNaryOp(inst->type_id(), SpvOpGroupNonUniformShuffle, + {subgroup_scope, data_id, target_inv->result_id()}); - // Create the null constant to use in the select. - const auto* null = const_mgr->GetConstant( - type_mgr->GetType(inst->type_id()), std::vector()); - Instruction* null_inst = const_mgr->GetDefiningInstruction(null); + // Create the null constant to use in the select. + const auto* null = const_mgr->GetConstant(type_mgr->GetType(inst->type_id()), + std::vector()); + Instruction* null_inst = const_mgr->GetDefiningInstruction(null); - // Build the select. - inst->SetOpcode(SpvOpSelect); - Instruction::OperandList new_operands; - new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}}); - new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}}); - new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}}); + // Build the select. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}}); - inst->SetInOperands(std::move(new_operands)); - ctx->UpdateDefUse(inst); - return true; - }; + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; } // Returns a folding rule that will replace the SwizzleInvocationsMaskedAMD @@ -225,89 +326,86 @@ FoldingRule ReplaceSwizzleInvocations() { // clang-format on // // Also adding the capabilities and builtins that are needed. -FoldingRule ReplaceSwizzleInvocationsMasked() { - return [](IRContext* ctx, Instruction* inst, - const std::vector&) { - analysis::TypeManager* type_mgr = ctx->get_type_mgr(); - analysis::DefUseManager* def_use_mgr = ctx->get_def_use_mgr(); - analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); +bool ReplaceSwizzleInvocationsMasked( + IRContext* ctx, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = ctx->get_type_mgr(); + analysis::DefUseManager* def_use_mgr = ctx->get_def_use_mgr(); + analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); - // ctx->AddCapability(SpvCapabilitySubgroupBallotKHR); - ctx->AddCapability(SpvCapabilityGroupNonUniformBallot); - ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle); + ctx->AddCapability(SpvCapabilityGroupNonUniformBallot); + ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle); - InstructionBuilder ir_builder( - ctx, inst, - IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); - // Get the operands to inst, and the components of the mask - uint32_t data_id = inst->GetSingleWordInOperand(2); + // Get the operands to inst, and the components of the mask + uint32_t data_id = inst->GetSingleWordInOperand(2); - Instruction* mask_inst = - def_use_mgr->GetDef(inst->GetSingleWordInOperand(3)); - assert(mask_inst->opcode() == SpvOpConstantComposite && - "The mask is suppose to be a vector constant."); - assert(mask_inst->NumInOperands() == 3 && - "The mask is suppose to have 3 components."); + Instruction* mask_inst = def_use_mgr->GetDef(inst->GetSingleWordInOperand(3)); + assert(mask_inst->opcode() == SpvOpConstantComposite && + "The mask is suppose to be a vector constant."); + assert(mask_inst->NumInOperands() == 3 && + "The mask is suppose to have 3 components."); - uint32_t uint_x = mask_inst->GetSingleWordInOperand(0); - uint32_t uint_y = mask_inst->GetSingleWordInOperand(1); - uint32_t uint_z = mask_inst->GetSingleWordInOperand(2); + uint32_t uint_x = mask_inst->GetSingleWordInOperand(0); + uint32_t uint_y = mask_inst->GetSingleWordInOperand(1); + uint32_t uint_z = mask_inst->GetSingleWordInOperand(2); - // Get the subgroup invocation id. - uint32_t var_id = - ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); - ctx->AddExtension("SPV_KHR_shader_ballot"); - assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); - Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); - Instruction* var_ptr_type = - ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); - uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1); + // Get the subgroup invocation id. + uint32_t var_id = + ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); + ctx->AddExtension("SPV_KHR_shader_ballot"); + assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); + Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); + Instruction* var_ptr_type = + ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); + uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1); - Instruction* id = ir_builder.AddLoad(uint_type_id, var_id); + Instruction* id = ir_builder.AddLoad(uint_type_id, var_id); - // Do the bitwise operations. - uint32_t mask_extended = ir_builder.GetUintConstantId(0xFFFFFFE0); - Instruction* and_mask = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseOr, - uint_x, mask_extended); - Instruction* and_result = ir_builder.AddBinaryOp( - uint_type_id, SpvOpBitwiseAnd, id->result_id(), and_mask->result_id()); - Instruction* or_result = ir_builder.AddBinaryOp( - uint_type_id, SpvOpBitwiseOr, and_result->result_id(), uint_y); - Instruction* target_inv = ir_builder.AddBinaryOp( - uint_type_id, SpvOpBitwiseXor, or_result->result_id(), uint_z); + // Do the bitwise operations. + uint32_t mask_extended = ir_builder.GetUintConstantId(0xFFFFFFE0); + Instruction* and_mask = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseOr, + uint_x, mask_extended); + Instruction* and_result = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseAnd, id->result_id(), and_mask->result_id()); + Instruction* or_result = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseOr, and_result->result_id(), uint_y); + Instruction* target_inv = ir_builder.AddBinaryOp( + uint_type_id, SpvOpBitwiseXor, or_result->result_id(), uint_z); - // Do the group operations - uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF); - uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup); - const auto* ballot_value_const = const_mgr->GetConstant( - type_mgr->GetUIntVectorType(4), - {uint_max_id, uint_max_id, uint_max_id, uint_max_id}); - Instruction* ballot_value = - const_mgr->GetDefiningInstruction(ballot_value_const); - Instruction* is_active = ir_builder.AddNaryOp( - type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract, - {subgroup_scope, ballot_value->result_id(), target_inv->result_id()}); - Instruction* shuffle = ir_builder.AddNaryOp( - inst->type_id(), SpvOpGroupNonUniformShuffle, - {subgroup_scope, data_id, target_inv->result_id()}); + // Do the group operations + uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF); + uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup); + const auto* ballot_value_const = const_mgr->GetConstant( + type_mgr->GetUIntVectorType(4), + {uint_max_id, uint_max_id, uint_max_id, uint_max_id}); + Instruction* ballot_value = + const_mgr->GetDefiningInstruction(ballot_value_const); + Instruction* is_active = ir_builder.AddNaryOp( + type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract, + {subgroup_scope, ballot_value->result_id(), target_inv->result_id()}); + Instruction* shuffle = + ir_builder.AddNaryOp(inst->type_id(), SpvOpGroupNonUniformShuffle, + {subgroup_scope, data_id, target_inv->result_id()}); - // Create the null constant to use in the select. - const auto* null = const_mgr->GetConstant( - type_mgr->GetType(inst->type_id()), std::vector()); - Instruction* null_inst = const_mgr->GetDefiningInstruction(null); + // Create the null constant to use in the select. + const auto* null = const_mgr->GetConstant(type_mgr->GetType(inst->type_id()), + std::vector()); + Instruction* null_inst = const_mgr->GetDefiningInstruction(null); - // Build the select. - inst->SetOpcode(SpvOpSelect); - Instruction::OperandList new_operands; - new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}}); - new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}}); - new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}}); + // Build the select. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}}); - inst->SetInOperands(std::move(new_operands)); - ctx->UpdateDefUse(inst); - return true; - }; + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; } // Returns a folding rule that will replace the WriteInvocationAMD extended @@ -326,40 +424,38 @@ FoldingRule ReplaceSwizzleInvocationsMasked() { // %result = OpSelect %type %cmp %write_value %input_value // // Also adding the capabilities and builtins that are needed. -FoldingRule ReplaceWriteInvocation() { - return [](IRContext* ctx, Instruction* inst, - const std::vector&) { - uint32_t var_id = - ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); - ctx->AddCapability(SpvCapabilitySubgroupBallotKHR); - ctx->AddExtension("SPV_KHR_shader_ballot"); - assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); - Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); - Instruction* var_ptr_type = - ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); +bool ReplaceWriteInvocation(IRContext* ctx, Instruction* inst, + const std::vector&) { + uint32_t var_id = + ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId); + ctx->AddCapability(SpvCapabilitySubgroupBallotKHR); + ctx->AddExtension("SPV_KHR_shader_ballot"); + assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable."); + Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id); + Instruction* var_ptr_type = + ctx->get_def_use_mgr()->GetDef(var_inst->type_id()); - InstructionBuilder ir_builder( - ctx, inst, - IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); - Instruction* t = - ir_builder.AddLoad(var_ptr_type->GetSingleWordInOperand(1), var_id); - analysis::Bool bool_type; - uint32_t bool_type_id = ctx->get_type_mgr()->GetTypeInstruction(&bool_type); - Instruction* cmp = - ir_builder.AddBinaryOp(bool_type_id, SpvOpIEqual, t->result_id(), - inst->GetSingleWordInOperand(4)); + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + Instruction* t = + ir_builder.AddLoad(var_ptr_type->GetSingleWordInOperand(1), var_id); + analysis::Bool bool_type; + uint32_t bool_type_id = ctx->get_type_mgr()->GetTypeInstruction(&bool_type); + Instruction* cmp = + ir_builder.AddBinaryOp(bool_type_id, SpvOpIEqual, t->result_id(), + inst->GetSingleWordInOperand(4)); - // Build a select. - inst->SetOpcode(SpvOpSelect); - Instruction::OperandList new_operands; - new_operands.push_back({SPV_OPERAND_TYPE_ID, {cmp->result_id()}}); - new_operands.push_back(inst->GetInOperand(3)); - new_operands.push_back(inst->GetInOperand(2)); + // Build a select. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {cmp->result_id()}}); + new_operands.push_back(inst->GetInOperand(3)); + new_operands.push_back(inst->GetInOperand(2)); - inst->SetInOperands(std::move(new_operands)); - ctx->UpdateDefUse(inst); - return true; - }; + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; } // Returns a folding rule that will replace the MbcntAMD extended instruction in @@ -384,51 +480,324 @@ FoldingRule ReplaceWriteInvocation() { // %result = OpBitCount %uint %and // // Also adding the capabilities and builtins that are needed. -FoldingRule ReplaceMbcnt() { - return [](IRContext* context, Instruction* inst, - const std::vector&) { - analysis::TypeManager* type_mgr = context->get_type_mgr(); - analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr(); +bool ReplaceMbcnt(IRContext* context, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = context->get_type_mgr(); + analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr(); - uint32_t var_id = context->GetBuiltinInputVarId(SpvBuiltInSubgroupLtMask); - assert(var_id != 0 && "Could not get SubgroupLtMask variable."); - context->AddCapability(SpvCapabilityGroupNonUniformBallot); - Instruction* var_inst = def_use_mgr->GetDef(var_id); - Instruction* var_ptr_type = def_use_mgr->GetDef(var_inst->type_id()); - Instruction* var_type = - def_use_mgr->GetDef(var_ptr_type->GetSingleWordInOperand(1)); - assert(var_type->opcode() == SpvOpTypeVector && - "Variable is suppose to be a vector of 4 ints"); + uint32_t var_id = context->GetBuiltinInputVarId(SpvBuiltInSubgroupLtMask); + assert(var_id != 0 && "Could not get SubgroupLtMask variable."); + context->AddCapability(SpvCapabilityGroupNonUniformBallot); + Instruction* var_inst = def_use_mgr->GetDef(var_id); + Instruction* var_ptr_type = def_use_mgr->GetDef(var_inst->type_id()); + Instruction* var_type = + def_use_mgr->GetDef(var_ptr_type->GetSingleWordInOperand(1)); + assert(var_type->opcode() == SpvOpTypeVector && + "Variable is suppose to be a vector of 4 ints"); - // Get the type for the shuffle. - analysis::Vector temp_type(GetUIntType(context), 2); - const analysis::Type* shuffle_type = - context->get_type_mgr()->GetRegisteredType(&temp_type); - uint32_t shuffle_type_id = type_mgr->GetTypeInstruction(shuffle_type); + // Get the type for the shuffle. + analysis::Vector temp_type(GetUIntType(context), 2); + const analysis::Type* shuffle_type = + context->get_type_mgr()->GetRegisteredType(&temp_type); + uint32_t shuffle_type_id = type_mgr->GetTypeInstruction(shuffle_type); - uint32_t mask_id = inst->GetSingleWordInOperand(2); - Instruction* mask_inst = def_use_mgr->GetDef(mask_id); + uint32_t mask_id = inst->GetSingleWordInOperand(2); + Instruction* mask_inst = def_use_mgr->GetDef(mask_id); - // Testing with amd's shader compiler shows that a 64-bit mask is expected. - assert(type_mgr->GetType(mask_inst->type_id())->AsInteger() != nullptr); - assert(type_mgr->GetType(mask_inst->type_id())->AsInteger()->width() == 64); + // Testing with amd's shader compiler shows that a 64-bit mask is expected. + assert(type_mgr->GetType(mask_inst->type_id())->AsInteger() != nullptr); + assert(type_mgr->GetType(mask_inst->type_id())->AsInteger()->width() == 64); - InstructionBuilder ir_builder( - context, inst, - IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); - Instruction* load = ir_builder.AddLoad(var_type->result_id(), var_id); - Instruction* shuffle = ir_builder.AddVectorShuffle( - shuffle_type_id, load->result_id(), load->result_id(), {0, 1}); - Instruction* bitcast = ir_builder.AddUnaryOp( - mask_inst->type_id(), SpvOpBitcast, shuffle->result_id()); - Instruction* t = ir_builder.AddBinaryOp( - mask_inst->type_id(), SpvOpBitwiseAnd, bitcast->result_id(), mask_id); + InstructionBuilder ir_builder( + context, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + Instruction* load = ir_builder.AddLoad(var_type->result_id(), var_id); + Instruction* shuffle = ir_builder.AddVectorShuffle( + shuffle_type_id, load->result_id(), load->result_id(), {0, 1}); + Instruction* bitcast = ir_builder.AddUnaryOp( + mask_inst->type_id(), SpvOpBitcast, shuffle->result_id()); + Instruction* t = ir_builder.AddBinaryOp(mask_inst->type_id(), SpvOpBitwiseAnd, + bitcast->result_id(), mask_id); - inst->SetOpcode(SpvOpBitCount); - inst->SetInOperands({{SPV_OPERAND_TYPE_ID, {t->result_id()}}}); - context->UpdateDefUse(inst); - return true; - }; + inst->SetOpcode(SpvOpBitCount); + inst->SetInOperands({{SPV_OPERAND_TYPE_ID, {t->result_id()}}}); + context->UpdateDefUse(inst); + return true; +} + +// A folding rule that will replace the CubeFaceCoordAMD extended +// instruction in the SPV_AMD_gcn_shader_ballot. Returns true if the folding is +// successful. +// +// The instruction +// +// %result = OpExtInst %v2float %1 CubeFaceCoordAMD %input +// +// with +// +// %x = OpCompositeExtract %float %input 0 +// %y = OpCompositeExtract %float %input 1 +// %z = OpCompositeExtract %float %input 2 +// %nx = OpFNegate %float %x +// %ny = OpFNegate %float %y +// %nz = OpFNegate %float %z +// %ax = OpExtInst %float %n_1 FAbs %x +// %ay = OpExtInst %float %n_1 FAbs %y +// %az = OpExtInst %float %n_1 FAbs %z +// %amax_x_y = OpExtInst %float %n_1 FMax %ay %ax +// %amax = OpExtInst %float %n_1 FMax %az %amax_x_y +// %cubema = OpFMul %float %float_2 %amax +// %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y +// %not_is_z_max = OpLogicalNot %bool %is_z_max +// %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax +// %is_y_max = OpLogicalAnd %bool %not_is_z_max %y_gt_x +// %is_z_neg = OpFOrdLessThan %bool %z %float_0 +// %cubesc_case_1 = OpSelect %float %is_z_neg %nx %x +// %is_x_neg = OpFOrdLessThan %bool %x %float_0 +// %cubesc_case_2 = OpSelect %float %is_x_neg %z %nz +// %sel = OpSelect %float %is_y_max %x %cubesc_case_2 +// %cubesc = OpSelect %float %is_z_max %cubesc_case_1 %sel +// %is_y_neg = OpFOrdLessThan %bool %y %float_0 +// %cubetc_case_1 = OpSelect %float %is_y_neg %nz %z +// %cubetc = OpSelect %float %is_y_max %cubetc_case_1 %ny +// %cube = OpCompositeConstruct %v2float %cubesc %cubetc +// %denom = OpCompositeConstruct %v2float %cubema %cubema +// %div = OpFDiv %v2float %cube %denom +// %result = OpFAdd %v2float %div %const +// +// Also adding the capabilities and builtins that are needed. +bool ReplaceCubeFaceCoord(IRContext* ctx, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = ctx->get_type_mgr(); + analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); + + uint32_t float_type_id = type_mgr->GetFloatTypeId(); + const analysis::Type* v2_float_type = type_mgr->GetFloatVectorType(2); + uint32_t v2_float_type_id = type_mgr->GetId(v2_float_type); + uint32_t bool_id = type_mgr->GetBoolTypeId(); + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + + uint32_t input_id = inst->GetSingleWordInOperand(2); + uint32_t glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + if (glsl405_ext_inst_id == 0) { + ctx->AddExtInstImport("GLSL.std.450"); + glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + } + + // Get the constants that will be used. + uint32_t f0_const_id = const_mgr->GetFloatConst(0.0); + uint32_t f2_const_id = const_mgr->GetFloatConst(2.0); + uint32_t f0_5_const_id = const_mgr->GetFloatConst(0.5); + const analysis::Constant* vec_const = + const_mgr->GetConstant(v2_float_type, {f0_5_const_id, f0_5_const_id}); + uint32_t vec_const_id = + const_mgr->GetDefiningInstruction(vec_const)->result_id(); + + // Extract the input values. + Instruction* x = ir_builder.AddCompositeExtract(float_type_id, input_id, {0}); + Instruction* y = ir_builder.AddCompositeExtract(float_type_id, input_id, {1}); + Instruction* z = ir_builder.AddCompositeExtract(float_type_id, input_id, {2}); + + // Negate the input values. + Instruction* nx = + ir_builder.AddUnaryOp(float_type_id, SpvOpFNegate, x->result_id()); + Instruction* ny = + ir_builder.AddUnaryOp(float_type_id, SpvOpFNegate, y->result_id()); + Instruction* nz = + ir_builder.AddUnaryOp(float_type_id, SpvOpFNegate, z->result_id()); + + // Get the abolsute values of the inputs. + Instruction* ax = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {x->result_id()}); + Instruction* ay = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {y->result_id()}); + Instruction* az = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {z->result_id()}); + + // Find which values are negative. Used in later computations. + Instruction* is_z_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan, + z->result_id(), f0_const_id); + Instruction* is_y_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan, + y->result_id(), f0_const_id); + Instruction* is_x_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan, + x->result_id(), f0_const_id); + + // Compute cubema + Instruction* amax_x_y = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FMax, + {ax->result_id(), ay->result_id()}); + Instruction* amax = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FMax, + {az->result_id(), amax_x_y->result_id()}); + Instruction* cubema = ir_builder.AddBinaryOp(float_type_id, SpvOpFMul, + f2_const_id, amax->result_id()); + + // Do the comparisons needed for computing cubesc and cubetc. + Instruction* is_z_max = + ir_builder.AddBinaryOp(bool_id, SpvOpFOrdGreaterThanEqual, + az->result_id(), amax_x_y->result_id()); + Instruction* not_is_z_max = + ir_builder.AddUnaryOp(bool_id, SpvOpLogicalNot, is_z_max->result_id()); + Instruction* y_gr_x = ir_builder.AddBinaryOp( + bool_id, SpvOpFOrdGreaterThanEqual, ay->result_id(), ax->result_id()); + Instruction* is_y_max = ir_builder.AddBinaryOp( + bool_id, SpvOpLogicalAnd, not_is_z_max->result_id(), y_gr_x->result_id()); + + // Select the correct value for cubesc. + Instruction* cubesc_case_1 = ir_builder.AddSelect( + float_type_id, is_z_neg->result_id(), nx->result_id(), x->result_id()); + Instruction* cubesc_case_2 = ir_builder.AddSelect( + float_type_id, is_x_neg->result_id(), z->result_id(), nz->result_id()); + Instruction* sel = + ir_builder.AddSelect(float_type_id, is_y_max->result_id(), x->result_id(), + cubesc_case_2->result_id()); + Instruction* cubesc = + ir_builder.AddSelect(float_type_id, is_z_max->result_id(), + cubesc_case_1->result_id(), sel->result_id()); + + // Select the correct value for cubetc. + Instruction* cubetc_case_1 = ir_builder.AddSelect( + float_type_id, is_y_neg->result_id(), nz->result_id(), z->result_id()); + Instruction* cubetc = + ir_builder.AddSelect(float_type_id, is_y_max->result_id(), + cubetc_case_1->result_id(), ny->result_id()); + + // Do the division + Instruction* cube = ir_builder.AddCompositeConstruct( + v2_float_type_id, {cubesc->result_id(), cubetc->result_id()}); + Instruction* denom = ir_builder.AddCompositeConstruct( + v2_float_type_id, {cubema->result_id(), cubema->result_id()}); + Instruction* div = ir_builder.AddBinaryOp( + v2_float_type_id, SpvOpFDiv, cube->result_id(), denom->result_id()); + + // Get the final result by adding 0.5 to |div|. + inst->SetOpcode(SpvOpFAdd); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {div->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {vec_const_id}}); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; +} + +// A folding rule that will replace the CubeFaceCoordAMD extended +// instruction in the SPV_AMD_gcn_shader_ballot. Returns true if the folding +// is successful. +// +// The instruction +// +// %result = OpExtInst %v2float %1 CubeFaceCoordAMD %input +// +// with +// +// %x = OpCompositeExtract %float %input 0 +// %y = OpCompositeExtract %float %input 1 +// %z = OpCompositeExtract %float %input 2 +// %ax = OpExtInst %float %n_1 FAbs %x +// %ay = OpExtInst %float %n_1 FAbs %y +// %az = OpExtInst %float %n_1 FAbs %z +// %is_z_neg = OpFOrdLessThan %bool %z %float_0 +// %is_y_neg = OpFOrdLessThan %bool %y %float_0 +// %is_x_neg = OpFOrdLessThan %bool %x %float_0 +// %amax_x_y = OpExtInst %float %n_1 FMax %ay %ax +// %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y +// %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax +// %case_z = OpSelect %float %is_z_neg %float_5 %float4 +// %case_y = OpSelect %float %is_y_neg %float_3 %float2 +// %case_x = OpSelect %float %is_x_neg %float_1 %float0 +// %sel = OpSelect %float %y_gt_x %case_y %case_x +// %result = OpSelect %float %is_z_max %case_z %sel +// +// Also adding the capabilities and builtins that are needed. +bool ReplaceCubeFaceIndex(IRContext* ctx, Instruction* inst, + const std::vector&) { + analysis::TypeManager* type_mgr = ctx->get_type_mgr(); + analysis::ConstantManager* const_mgr = ctx->get_constant_mgr(); + + uint32_t float_type_id = type_mgr->GetFloatTypeId(); + uint32_t bool_id = type_mgr->GetBoolTypeId(); + + InstructionBuilder ir_builder( + ctx, inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + + uint32_t input_id = inst->GetSingleWordInOperand(2); + uint32_t glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + if (glsl405_ext_inst_id == 0) { + ctx->AddExtInstImport("GLSL.std.450"); + glsl405_ext_inst_id = + ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450(); + } + + // Get the constants that will be used. + uint32_t f0_const_id = const_mgr->GetFloatConst(0.0); + uint32_t f1_const_id = const_mgr->GetFloatConst(1.0); + uint32_t f2_const_id = const_mgr->GetFloatConst(2.0); + uint32_t f3_const_id = const_mgr->GetFloatConst(3.0); + uint32_t f4_const_id = const_mgr->GetFloatConst(4.0); + uint32_t f5_const_id = const_mgr->GetFloatConst(5.0); + + // Extract the input values. + Instruction* x = ir_builder.AddCompositeExtract(float_type_id, input_id, {0}); + Instruction* y = ir_builder.AddCompositeExtract(float_type_id, input_id, {1}); + Instruction* z = ir_builder.AddCompositeExtract(float_type_id, input_id, {2}); + + // Get the absolute values of the inputs. + Instruction* ax = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {x->result_id()}); + Instruction* ay = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {y->result_id()}); + Instruction* az = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {z->result_id()}); + + // Find which values are negative. Used in later computations. + Instruction* is_z_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan, + z->result_id(), f0_const_id); + Instruction* is_y_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan, + y->result_id(), f0_const_id); + Instruction* is_x_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan, + x->result_id(), f0_const_id); + + // Find the max value. + Instruction* amax_x_y = ir_builder.AddNaryExtendedInstruction( + float_type_id, glsl405_ext_inst_id, GLSLstd450FMax, + {ax->result_id(), ay->result_id()}); + Instruction* is_z_max = + ir_builder.AddBinaryOp(bool_id, SpvOpFOrdGreaterThanEqual, + az->result_id(), amax_x_y->result_id()); + Instruction* y_gr_x = ir_builder.AddBinaryOp( + bool_id, SpvOpFOrdGreaterThanEqual, ay->result_id(), ax->result_id()); + + // Get the value for each case. + Instruction* case_z = ir_builder.AddSelect( + float_type_id, is_z_neg->result_id(), f5_const_id, f4_const_id); + Instruction* case_y = ir_builder.AddSelect( + float_type_id, is_y_neg->result_id(), f3_const_id, f2_const_id); + Instruction* case_x = ir_builder.AddSelect( + float_type_id, is_x_neg->result_id(), f1_const_id, f0_const_id); + + // Select the correct case. + Instruction* sel = + ir_builder.AddSelect(float_type_id, y_gr_x->result_id(), + case_y->result_id(), case_x->result_id()); + + // Get the final result by adding 0.5 to |div|. + inst->SetOpcode(SpvOpSelect); + Instruction::OperandList new_operands; + new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_z_max->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {case_z->result_id()}}); + new_operands.push_back({SPV_OPERAND_TYPE_ID, {sel->result_id()}}); + + inst->SetInOperands(std::move(new_operands)); + ctx->UpdateDefUse(inst); + return true; } class AmdExtFoldingRules : public FoldingRules { @@ -438,33 +807,70 @@ class AmdExtFoldingRules : public FoldingRules { protected: virtual void AddFoldingRules() override { rules_[SpvOpGroupIAddNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformIAdd)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupFAddNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFAdd)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupUMinNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformUMin)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupSMinNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformSMin)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupFMinNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFMin)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupUMaxNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformUMax)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupSMaxNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformSMax)); + ReplaceGroupNonuniformOperationOpCode); rules_[SpvOpGroupFMaxNonUniformAMD].push_back( - ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFMax)); + ReplaceGroupNonuniformOperationOpCode); uint32_t extension_id = context()->module()->GetExtInstImportId("SPV_AMD_shader_ballot"); - ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsAMD}].push_back( - ReplaceSwizzleInvocations()); - ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsMaskedAMD}] - .push_back(ReplaceSwizzleInvocationsMasked()); - ext_rules_[{extension_id, AmdShaderBallotWriteInvocationAMD}].push_back( - ReplaceWriteInvocation()); - ext_rules_[{extension_id, AmdShaderBallotMbcntAMD}].push_back( - ReplaceMbcnt()); + if (extension_id != 0) { + ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsAMD}] + .push_back(ReplaceSwizzleInvocations); + ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsMaskedAMD}] + .push_back(ReplaceSwizzleInvocationsMasked); + ext_rules_[{extension_id, AmdShaderBallotWriteInvocationAMD}].push_back( + ReplaceWriteInvocation); + ext_rules_[{extension_id, AmdShaderBallotMbcntAMD}].push_back( + ReplaceMbcnt); + } + + extension_id = context()->module()->GetExtInstImportId( + "SPV_AMD_shader_trinary_minmax"); + + if (extension_id != 0) { + ext_rules_[{extension_id, FMin3AMD}].push_back( + ReplaceTrinaryMinMax); + ext_rules_[{extension_id, UMin3AMD}].push_back( + ReplaceTrinaryMinMax); + ext_rules_[{extension_id, SMin3AMD}].push_back( + ReplaceTrinaryMinMax); + ext_rules_[{extension_id, FMax3AMD}].push_back( + ReplaceTrinaryMinMax); + ext_rules_[{extension_id, UMax3AMD}].push_back( + ReplaceTrinaryMinMax); + ext_rules_[{extension_id, SMax3AMD}].push_back( + ReplaceTrinaryMinMax); + ext_rules_[{extension_id, FMid3AMD}].push_back( + ReplaceTrinaryMid); + ext_rules_[{extension_id, UMid3AMD}].push_back( + ReplaceTrinaryMid); + ext_rules_[{extension_id, SMid3AMD}].push_back( + ReplaceTrinaryMid); + } + + extension_id = + context()->module()->GetExtInstImportId("SPV_AMD_gcn_shader"); + + if (extension_id != 0) { + ext_rules_[{extension_id, CubeFaceCoordAMD}].push_back( + ReplaceCubeFaceCoord); + ext_rules_[{extension_id, CubeFaceIndexAMD}].push_back( + ReplaceCubeFaceIndex); + ext_rules_[{extension_id, TimeAMD}].push_back(NotImplementedYet); + } } }; @@ -497,12 +903,15 @@ Pass::Status AmdExtensionToKhrPass::Process() { // Now that instruction that require the extensions have been removed, we can // remove the extension instructions. + std::set ext_to_remove = {"SPV_AMD_shader_ballot", + "SPV_AMD_shader_trinary_minmax", + "SPV_AMD_gcn_shader"}; + std::vector to_be_killed; for (Instruction& inst : context()->module()->extensions()) { if (inst.opcode() == SpvOpExtension) { - if (!strcmp("SPV_AMD_shader_ballot", - reinterpret_cast( - &(inst.GetInOperand(0).words[0])))) { + if (ext_to_remove.count(reinterpret_cast( + &(inst.GetInOperand(0).words[0]))) != 0) { to_be_killed.push_back(&inst); } } @@ -510,9 +919,8 @@ Pass::Status AmdExtensionToKhrPass::Process() { for (Instruction& inst : context()->ext_inst_imports()) { if (inst.opcode() == SpvOpExtInstImport) { - if (!strcmp("SPV_AMD_shader_ballot", - reinterpret_cast( - &(inst.GetInOperand(0).words[0])))) { + if (ext_to_remove.count(reinterpret_cast( + &(inst.GetInOperand(0).words[0]))) != 0) { to_be_killed.push_back(&inst); } } diff --git a/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp b/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp index 06a1a81e6..2a2493fd6 100644 --- a/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp +++ b/3rdparty/spirv-tools/source/opt/const_folding_rules.cpp @@ -296,6 +296,51 @@ ConstantFoldingRule FoldFPUnaryOp(UnaryScalarFoldingRule scalar_rule) { }; } +// Returns the result of folding the constants in |constants| according the +// |scalar_rule|. If |result_type| is a vector, then |scalar_rule| is applied +// per component. +const analysis::Constant* FoldFPBinaryOp( + BinaryScalarFoldingRule scalar_rule, uint32_t result_type_id, + const std::vector& constants, + IRContext* context) { + analysis::ConstantManager* const_mgr = context->get_constant_mgr(); + analysis::TypeManager* type_mgr = context->get_type_mgr(); + const analysis::Type* result_type = type_mgr->GetType(result_type_id); + const analysis::Vector* vector_type = result_type->AsVector(); + + if (constants[0] == nullptr || constants[1] == nullptr) { + return nullptr; + } + + if (vector_type != nullptr) { + std::vector a_components; + std::vector b_components; + std::vector results_components; + + a_components = constants[0]->GetVectorComponents(const_mgr); + b_components = constants[1]->GetVectorComponents(const_mgr); + + // Fold each component of the vector. + for (uint32_t i = 0; i < a_components.size(); ++i) { + results_components.push_back(scalar_rule(vector_type->element_type(), + a_components[i], b_components[i], + const_mgr)); + if (results_components[i] == nullptr) { + return nullptr; + } + } + + // Build the constant object and return it. + std::vector ids; + for (const analysis::Constant* member : results_components) { + ids.push_back(const_mgr->GetDefiningInstruction(member)->result_id()); + } + return const_mgr->GetConstant(vector_type, ids); + } else { + return scalar_rule(result_type, constants[0], constants[1], const_mgr); + } +} + // Returns a |ConstantFoldingRule| that folds floating point scalars using // |scalar_rule| and vectors of floating point by applying |scalar_rule| to the // elements of the vector. The |ConstantFoldingRule| that is returned assumes @@ -305,46 +350,14 @@ ConstantFoldingRule FoldFPBinaryOp(BinaryScalarFoldingRule scalar_rule) { return [scalar_rule](IRContext* context, Instruction* inst, const std::vector& constants) -> const analysis::Constant* { - analysis::ConstantManager* const_mgr = context->get_constant_mgr(); - analysis::TypeManager* type_mgr = context->get_type_mgr(); - const analysis::Type* result_type = type_mgr->GetType(inst->type_id()); - const analysis::Vector* vector_type = result_type->AsVector(); - if (!inst->IsFloatingPointFoldingAllowed()) { return nullptr; } - - if (constants[0] == nullptr || constants[1] == nullptr) { - return nullptr; - } - - if (vector_type != nullptr) { - std::vector a_components; - std::vector b_components; - std::vector results_components; - - a_components = constants[0]->GetVectorComponents(const_mgr); - b_components = constants[1]->GetVectorComponents(const_mgr); - - // Fold each component of the vector. - for (uint32_t i = 0; i < a_components.size(); ++i) { - results_components.push_back(scalar_rule(vector_type->element_type(), - a_components[i], - b_components[i], const_mgr)); - if (results_components[i] == nullptr) { - return nullptr; - } - } - - // Build the constant object and return it. - std::vector ids; - for (const analysis::Constant* member : results_components) { - ids.push_back(const_mgr->GetDefiningInstruction(member)->result_id()); - } - return const_mgr->GetConstant(vector_type, ids); - } else { - return scalar_rule(result_type, constants[0], constants[1], const_mgr); + if (inst->opcode() == SpvOpExtInst) { + return FoldFPBinaryOp(scalar_rule, inst->type_id(), + {constants[1], constants[2]}, context); } + return FoldFPBinaryOp(scalar_rule, inst->type_id(), constants, context); }; } @@ -435,29 +448,33 @@ UnaryScalarFoldingRule FoldQuantizeToF16Scalar() { // This macro defines a |BinaryScalarFoldingRule| that applies |op|. The // operator |op| must work for both float and double, and use syntax "f1 op f2". -#define FOLD_FPARITH_OP(op) \ - [](const analysis::Type* result_type, const analysis::Constant* a, \ - const analysis::Constant* b, \ - analysis::ConstantManager* const_mgr_in_macro) \ - -> const analysis::Constant* { \ - assert(result_type != nullptr && a != nullptr && b != nullptr); \ - assert(result_type == a->type() && result_type == b->type()); \ - const analysis::Float* float_type_in_macro = result_type->AsFloat(); \ - assert(float_type_in_macro != nullptr); \ - if (float_type_in_macro->width() == 32) { \ - float fa = a->GetFloat(); \ - float fb = b->GetFloat(); \ - utils::FloatProxy result_in_macro(fa op fb); \ - std::vector words_in_macro = result_in_macro.GetWords(); \ - return const_mgr_in_macro->GetConstant(result_type, words_in_macro); \ - } else if (float_type_in_macro->width() == 64) { \ - double fa = a->GetDouble(); \ - double fb = b->GetDouble(); \ - utils::FloatProxy result_in_macro(fa op fb); \ - std::vector words_in_macro = result_in_macro.GetWords(); \ - return const_mgr_in_macro->GetConstant(result_type, words_in_macro); \ - } \ - return nullptr; \ +#define FOLD_FPARITH_OP(op) \ + [](const analysis::Type* result_type_in_macro, const analysis::Constant* a, \ + const analysis::Constant* b, \ + analysis::ConstantManager* const_mgr_in_macro) \ + -> const analysis::Constant* { \ + assert(result_type_in_macro != nullptr && a != nullptr && b != nullptr); \ + assert(result_type_in_macro == a->type() && \ + result_type_in_macro == b->type()); \ + const analysis::Float* float_type_in_macro = \ + result_type_in_macro->AsFloat(); \ + assert(float_type_in_macro != nullptr); \ + if (float_type_in_macro->width() == 32) { \ + float fa = a->GetFloat(); \ + float fb = b->GetFloat(); \ + utils::FloatProxy result_in_macro(fa op fb); \ + std::vector words_in_macro = result_in_macro.GetWords(); \ + return const_mgr_in_macro->GetConstant(result_type_in_macro, \ + words_in_macro); \ + } else if (float_type_in_macro->width() == 64) { \ + double fa = a->GetDouble(); \ + double fb = b->GetDouble(); \ + utils::FloatProxy result_in_macro(fa op fb); \ + std::vector words_in_macro = result_in_macro.GetWords(); \ + return const_mgr_in_macro->GetConstant(result_type_in_macro, \ + words_in_macro); \ + } \ + return nullptr; \ } // Define the folding rule for conversion between floating point and integer @@ -834,34 +851,225 @@ ConstantFoldingRule FoldFMix() { } const analysis::Constant* one; - if (constants[1]->type()->AsFloat()->width() == 32) { - one = const_mgr->GetConstant(constants[1]->type(), + bool is_vector = false; + const analysis::Type* result_type = constants[1]->type(); + const analysis::Type* base_type = result_type; + if (base_type->AsVector()) { + is_vector = true; + base_type = base_type->AsVector()->element_type(); + } + assert(base_type->AsFloat() != nullptr && + "FMix is suppose to act on floats or vectors of floats."); + + if (base_type->AsFloat()->width() == 32) { + one = const_mgr->GetConstant(base_type, utils::FloatProxy(1.0f).GetWords()); } else { - one = const_mgr->GetConstant(constants[1]->type(), + one = const_mgr->GetConstant(base_type, utils::FloatProxy(1.0).GetWords()); } - const analysis::Constant* temp1 = - FOLD_FPARITH_OP(-)(constants[1]->type(), one, constants[3], const_mgr); + if (is_vector) { + uint32_t one_id = const_mgr->GetDefiningInstruction(one)->result_id(); + one = + const_mgr->GetConstant(result_type, std::vector(4, one_id)); + } + + const analysis::Constant* temp1 = FoldFPBinaryOp( + FOLD_FPARITH_OP(-), inst->type_id(), {one, constants[3]}, context); if (temp1 == nullptr) { return nullptr; } - const analysis::Constant* temp2 = FOLD_FPARITH_OP(*)( - constants[1]->type(), constants[1], temp1, const_mgr); + const analysis::Constant* temp2 = FoldFPBinaryOp( + FOLD_FPARITH_OP(*), inst->type_id(), {constants[1], temp1}, context); if (temp2 == nullptr) { return nullptr; } - const analysis::Constant* temp3 = FOLD_FPARITH_OP(*)( - constants[2]->type(), constants[2], constants[3], const_mgr); + const analysis::Constant* temp3 = + FoldFPBinaryOp(FOLD_FPARITH_OP(*), inst->type_id(), + {constants[2], constants[3]}, context); if (temp3 == nullptr) { return nullptr; } - return FOLD_FPARITH_OP(+)(temp2->type(), temp2, temp3, const_mgr); + return FoldFPBinaryOp(FOLD_FPARITH_OP(+), inst->type_id(), {temp2, temp3}, + context); }; } +template +IntType FoldIClamp(IntType x, IntType min_val, IntType max_val) { + if (x < min_val) { + x = min_val; + } + if (x > max_val) { + x = max_val; + } + return x; +} + +const analysis::Constant* FoldMin(const analysis::Type* result_type, + const analysis::Constant* a, + const analysis::Constant* b, + analysis::ConstantManager*) { + if (const analysis::Integer* int_type = result_type->AsInteger()) { + if (int_type->width() == 32) { + if (int_type->IsSigned()) { + int32_t va = a->GetS32(); + int32_t vb = b->GetS32(); + return (va < vb ? a : b); + } else { + uint32_t va = a->GetU32(); + uint32_t vb = b->GetU32(); + return (va < vb ? a : b); + } + } else if (int_type->width() == 64) { + if (int_type->IsSigned()) { + int64_t va = a->GetS64(); + int64_t vb = b->GetS64(); + return (va < vb ? a : b); + } else { + uint64_t va = a->GetU64(); + uint64_t vb = b->GetU64(); + return (va < vb ? a : b); + } + } + } else if (const analysis::Float* float_type = result_type->AsFloat()) { + if (float_type->width() == 32) { + float va = a->GetFloat(); + float vb = b->GetFloat(); + return (va < vb ? a : b); + } else if (float_type->width() == 64) { + double va = a->GetDouble(); + double vb = b->GetDouble(); + return (va < vb ? a : b); + } + } + return nullptr; +} + +const analysis::Constant* FoldMax(const analysis::Type* result_type, + const analysis::Constant* a, + const analysis::Constant* b, + analysis::ConstantManager*) { + if (const analysis::Integer* int_type = result_type->AsInteger()) { + if (int_type->width() == 32) { + if (int_type->IsSigned()) { + int32_t va = a->GetS32(); + int32_t vb = b->GetS32(); + return (va > vb ? a : b); + } else { + uint32_t va = a->GetU32(); + uint32_t vb = b->GetU32(); + return (va > vb ? a : b); + } + } else if (int_type->width() == 64) { + if (int_type->IsSigned()) { + int64_t va = a->GetS64(); + int64_t vb = b->GetS64(); + return (va > vb ? a : b); + } else { + uint64_t va = a->GetU64(); + uint64_t vb = b->GetU64(); + return (va > vb ? a : b); + } + } + } else if (const analysis::Float* float_type = result_type->AsFloat()) { + if (float_type->width() == 32) { + float va = a->GetFloat(); + float vb = b->GetFloat(); + return (va > vb ? a : b); + } else if (float_type->width() == 64) { + double va = a->GetDouble(); + double vb = b->GetDouble(); + return (va > vb ? a : b); + } + } + return nullptr; +} + +// Fold an clamp instruction when all three operands are constant. +const analysis::Constant* FoldClamp1( + IRContext* context, Instruction* inst, + const std::vector& constants) { + assert(inst->opcode() == SpvOpExtInst && + "Expecting an extended instruction."); + assert(inst->GetSingleWordInOperand(0) == + context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + "Expecting a GLSLstd450 extended instruction."); + + // Make sure all Clamp operands are constants. + for (uint32_t i = 1; i < 3; i++) { + if (constants[i] == nullptr) { + return nullptr; + } + } + + const analysis::Constant* temp = FoldFPBinaryOp( + FoldMax, inst->type_id(), {constants[1], constants[2]}, context); + if (temp == nullptr) { + return nullptr; + } + return FoldFPBinaryOp(FoldMin, inst->type_id(), {temp, constants[3]}, + context); +} + +// Fold a clamp instruction when |x >= min_val|. +const analysis::Constant* FoldClamp2( + IRContext* context, Instruction* inst, + const std::vector& constants) { + assert(inst->opcode() == SpvOpExtInst && + "Expecting an extended instruction."); + assert(inst->GetSingleWordInOperand(0) == + context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + "Expecting a GLSLstd450 extended instruction."); + + const analysis::Constant* x = constants[1]; + const analysis::Constant* min_val = constants[2]; + + if (x == nullptr || min_val == nullptr) { + return nullptr; + } + + const analysis::Constant* temp = + FoldFPBinaryOp(FoldMax, inst->type_id(), {x, min_val}, context); + if (temp == min_val) { + // We can assume that |min_val| is less than |max_val|. Therefore, if the + // result of the max operation is |min_val|, we know the result of the min + // operation, even if |max_val| is not a constant. + return min_val; + } + return nullptr; +} + +// Fold a clamp instruction when |x >= max_val|. +const analysis::Constant* FoldClamp3( + IRContext* context, Instruction* inst, + const std::vector& constants) { + assert(inst->opcode() == SpvOpExtInst && + "Expecting an extended instruction."); + assert(inst->GetSingleWordInOperand(0) == + context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + "Expecting a GLSLstd450 extended instruction."); + + const analysis::Constant* x = constants[1]; + const analysis::Constant* max_val = constants[3]; + + if (x == nullptr || max_val == nullptr) { + return nullptr; + } + + const analysis::Constant* temp = + FoldFPBinaryOp(FoldMin, inst->type_id(), {x, max_val}, context); + if (temp == max_val) { + // We can assume that |min_val| is less than |max_val|. Therefore, if the + // result of the max operation is |min_val|, we know the result of the min + // operation, even if |max_val| is not a constant. + return max_val; + } + return nullptr; +} + } // namespace void ConstantFoldingRules::AddFoldingRules() { @@ -937,6 +1145,36 @@ void ConstantFoldingRules::AddFoldingRules() { feature_manager->GetExtInstImportId_GLSLstd450(); if (ext_inst_glslstd450_id != 0) { ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMix}].push_back(FoldFMix()); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SMin}].push_back( + FoldFPBinaryOp(FoldMin)); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UMin}].push_back( + FoldFPBinaryOp(FoldMin)); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMin}].push_back( + FoldFPBinaryOp(FoldMin)); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SMax}].push_back( + FoldFPBinaryOp(FoldMax)); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UMax}].push_back( + FoldFPBinaryOp(FoldMax)); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMax}].push_back( + FoldFPBinaryOp(FoldMax)); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UClamp}].push_back( + FoldClamp1); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UClamp}].push_back( + FoldClamp2); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UClamp}].push_back( + FoldClamp3); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SClamp}].push_back( + FoldClamp1); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SClamp}].push_back( + FoldClamp2); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SClamp}].push_back( + FoldClamp3); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FClamp}].push_back( + FoldClamp1); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FClamp}].push_back( + FoldClamp2); + ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FClamp}].push_back( + FoldClamp3); } } } // namespace opt diff --git a/3rdparty/spirv-tools/source/opt/constants.cpp b/3rdparty/spirv-tools/source/opt/constants.cpp index 5c1468be5..0887ec2c6 100644 --- a/3rdparty/spirv-tools/source/opt/constants.cpp +++ b/3rdparty/spirv-tools/source/opt/constants.cpp @@ -389,6 +389,13 @@ const Constant* ConstantManager::GetConstant( return cst ? RegisterConstant(std::move(cst)) : nullptr; } +uint32_t ConstantManager::GetFloatConst(float val) { + Type* float_type = context()->get_type_mgr()->GetFloatType(); + utils::FloatProxy v(val); + const Constant* c = GetConstant(float_type, v.GetWords()); + return GetDefiningInstruction(c)->result_id(); +} + std::vector Constant::GetVectorComponents( analysis::ConstantManager* const_mgr) const { std::vector components; diff --git a/3rdparty/spirv-tools/source/opt/constants.h b/3rdparty/spirv-tools/source/opt/constants.h index 34855001c..d65d28d60 100644 --- a/3rdparty/spirv-tools/source/opt/constants.h +++ b/3rdparty/spirv-tools/source/opt/constants.h @@ -626,6 +626,9 @@ class ConstantManager { } } + // Returns the id of a 32-bit floating point constant with value |val|. + uint32_t GetFloatConst(float val); + private: // Creates a Constant instance with the given type and a vector of constant // defining words. Returns a unique pointer to the created Constant instance diff --git a/3rdparty/spirv-tools/source/opt/convert_to_half_pass.cpp b/3rdparty/spirv-tools/source/opt/convert_to_half_pass.cpp new file mode 100644 index 000000000..4c02c73e2 --- /dev/null +++ b/3rdparty/spirv-tools/source/opt/convert_to_half_pass.cpp @@ -0,0 +1,460 @@ +// Copyright (c) 2019 The Khronos Group Inc. +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "convert_to_half_pass.h" + +#include "source/opt/ir_builder.h" + +namespace { + +// Indices of operands in SPIR-V instructions +static const int kImageSampleDrefIdInIdx = 2; + +} // anonymous namespace + +namespace spvtools { +namespace opt { + +bool ConvertToHalfPass::IsArithmetic(Instruction* inst) { + return target_ops_core_.count(inst->opcode()) != 0 || + (inst->opcode() == SpvOpExtInst && + inst->GetSingleWordInOperand(0) == + context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0); +} + +bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) { + uint32_t ty_id = inst->type_id(); + if (ty_id == 0) return false; + return Pass::IsFloat(ty_id, width); +} + +bool ConvertToHalfPass::IsRelaxed(Instruction* inst) { + uint32_t r_id = inst->result_id(); + for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false)) + if (r_inst->opcode() == SpvOpDecorate && + r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision) + return true; + return false; +} + +analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) { + analysis::Float float_ty(width); + return context()->get_type_mgr()->GetRegisteredType(&float_ty); +} + +analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len, + uint32_t width) { + analysis::Type* reg_float_ty = FloatScalarType(width); + analysis::Vector vec_ty(reg_float_ty, v_len); + return context()->get_type_mgr()->GetRegisteredType(&vec_ty); +} + +analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt, + uint32_t vty_id, + uint32_t width) { + Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id); + uint32_t v_len = vty_inst->GetSingleWordInOperand(1); + analysis::Type* reg_vec_ty = FloatVectorType(v_len, width); + analysis::Matrix mat_ty(reg_vec_ty, v_cnt); + return context()->get_type_mgr()->GetRegisteredType(&mat_ty); +} + +uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) { + analysis::Type* reg_equiv_ty; + Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id); + if (ty_inst->opcode() == SpvOpTypeMatrix) + reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1), + ty_inst->GetSingleWordInOperand(0), width); + else if (ty_inst->opcode() == SpvOpTypeVector) + reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width); + else // SpvOpTypeFloat + reg_equiv_ty = FloatScalarType(width); + return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty); +} + +void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width, + InstructionBuilder* builder) { + Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp); + uint32_t ty_id = val_inst->type_id(); + uint32_t nty_id = EquivFloatTypeId(ty_id, width); + if (nty_id == ty_id) return; + Instruction* cvt_inst; + if (val_inst->opcode() == SpvOpUndef) + cvt_inst = builder->AddNullaryOp(nty_id, SpvOpUndef); + else + cvt_inst = builder->AddUnaryOp(nty_id, SpvOpFConvert, *val_idp); + *val_idp = cvt_inst->result_id(); +} + +bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) { + if (inst->opcode() != SpvOpFConvert) return false; + uint32_t mty_id = inst->type_id(); + Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id); + if (mty_inst->opcode() != SpvOpTypeMatrix) return false; + uint32_t vty_id = mty_inst->GetSingleWordInOperand(0); + uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1); + Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id); + uint32_t cty_id = vty_inst->GetSingleWordInOperand(0); + Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id); + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + // Convert each component vector, combine them with OpCompositeConstruct + // and replace original instruction. + uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16; + uint32_t orig_mat_id = inst->GetSingleWordInOperand(0); + uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width); + std::vector opnds = {}; + for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) { + Instruction* ext_inst = builder.AddIdLiteralOp( + orig_vty_id, SpvOpCompositeExtract, orig_mat_id, vidx); + Instruction* cvt_inst = + builder.AddUnaryOp(vty_id, SpvOpFConvert, ext_inst->result_id()); + opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}}); + } + uint32_t mat_id = TakeNextId(); + std::unique_ptr mat_inst(new Instruction( + context(), SpvOpCompositeConstruct, mty_id, mat_id, opnds)); + (void)builder.AddInstruction(std::move(mat_inst)); + context()->ReplaceAllUsesWith(inst->result_id(), mat_id); + // Turn original instruction into copy so it is valid. + inst->SetOpcode(SpvOpCopyObject); + inst->SetResultType(EquivFloatTypeId(mty_id, orig_width)); + get_def_use_mgr()->AnalyzeInstUse(inst); + return true; +} + +void ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) { + context()->get_decoration_mgr()->RemoveDecorationsFrom( + id, [](const Instruction& dec) { + if (dec.opcode() == SpvOpDecorate && + dec.GetSingleWordInOperand(1u) == SpvDecorationRelaxedPrecision) + return true; + else + return false; + }); +} + +bool ConvertToHalfPass::GenHalfArith(Instruction* inst) { + bool modified = false; + // Convert all float32 based operands to float16 equivalent and change + // instruction type to float16 equivalent. + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + inst->ForEachInId([&builder, &modified, this](uint32_t* idp) { + Instruction* op_inst = get_def_use_mgr()->GetDef(*idp); + if (!IsFloat(op_inst, 32)) return; + GenConvert(idp, 16, &builder); + modified = true; + }); + if (IsFloat(inst, 32)) { + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + modified = true; + } + if (modified) get_def_use_mgr()->AnalyzeInstUse(inst); + return modified; +} + +bool ConvertToHalfPass::ProcessPhi(Instruction* inst) { + // Skip if not float32 + if (!IsFloat(inst, 32)) return false; + // Skip if no relaxed operands. + bool relaxed_found = false; + uint32_t ocnt = 0; + inst->ForEachInId([&ocnt, &relaxed_found, this](uint32_t* idp) { + if (ocnt % 2 == 0) { + Instruction* val_inst = get_def_use_mgr()->GetDef(*idp); + if (IsRelaxed(val_inst)) relaxed_found = true; + } + ++ocnt; + }); + if (!relaxed_found) return false; + // Add float16 converts of any float32 operands and change type + // of phi to float16 equivalent. Operand converts need to be added to + // preceeding blocks. + ocnt = 0; + uint32_t* prev_idp; + inst->ForEachInId([&ocnt, &prev_idp, this](uint32_t* idp) { + if (ocnt % 2 == 0) { + prev_idp = idp; + } else { + Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp); + if (IsFloat(val_inst, 32)) { + BasicBlock* bp = context()->get_instr_block(*idp); + auto insert_before = bp->tail(); + if (insert_before != bp->begin()) { + --insert_before; + if (insert_before->opcode() != SpvOpSelectionMerge && + insert_before->opcode() != SpvOpLoopMerge) + ++insert_before; + } + InstructionBuilder builder(context(), &*insert_before, + IRContext::kAnalysisDefUse | + IRContext::kAnalysisInstrToBlockMapping); + GenConvert(prev_idp, 16, &builder); + } + } + ++ocnt; + }); + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + get_def_use_mgr()->AnalyzeInstUse(inst); + return true; +} + +bool ConvertToHalfPass::ProcessExtract(Instruction* inst) { + bool modified = false; + uint32_t comp_id = inst->GetSingleWordInOperand(0); + Instruction* comp_inst = get_def_use_mgr()->GetDef(comp_id); + // If extract is relaxed float32 based type and the composite is a relaxed + // float32 based type, convert it to float16 equivalent. This is slightly + // aggressive and pushes any likely conversion to apply to the whole + // composite rather than apply to each extracted component later. This + // can be a win if the platform can convert the entire composite in the same + // time as one component. It risks converting components that may not be + // used, although empirical data on a large set of real-world shaders seems + // to suggest this is not common and the composite convert is the best choice. + if (IsFloat(inst, 32) && IsRelaxed(inst) && IsFloat(comp_inst, 32) && + IsRelaxed(comp_inst)) { + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + GenConvert(&comp_id, 16, &builder); + inst->SetInOperand(0, {comp_id}); + comp_inst = get_def_use_mgr()->GetDef(comp_id); + modified = true; + } + // If the composite is a float16 based type, make sure the type of the + // extract agrees. + if (IsFloat(comp_inst, 16) && !IsFloat(inst, 16)) { + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + modified = true; + } + if (modified) get_def_use_mgr()->AnalyzeInstUse(inst); + return modified; +} + +bool ConvertToHalfPass::ProcessConvert(Instruction* inst) { + // If float32 and relaxed, change to float16 convert + if (IsFloat(inst, 32) && IsRelaxed(inst)) { + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + get_def_use_mgr()->AnalyzeInstUse(inst); + } + // If operand and result types are the same, replace result with operand + // and change convert to copy to keep validator happy; DCE will clean it up + uint32_t val_id = inst->GetSingleWordInOperand(0); + Instruction* val_inst = get_def_use_mgr()->GetDef(val_id); + if (inst->type_id() == val_inst->type_id()) { + context()->ReplaceAllUsesWith(inst->result_id(), val_id); + inst->SetOpcode(SpvOpCopyObject); + } + return true; // modified +} + +bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) { + bool modified = false; + // If image reference, only need to convert dref args back to float32 + if (dref_image_ops_.count(inst->opcode()) != 0) { + uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx); + Instruction* dref_inst = get_def_use_mgr()->GetDef(dref_id); + if (IsFloat(dref_inst, 16) && IsRelaxed(dref_inst)) { + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + GenConvert(&dref_id, 32, &builder); + inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id}); + get_def_use_mgr()->AnalyzeInstUse(inst); + modified = true; + } + } + return modified; +} + +bool ConvertToHalfPass::ProcessDefault(Instruction* inst) { + bool modified = false; + // If non-relaxed instruction has changed operands, need to convert + // them back to float32 + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + inst->ForEachInId([&builder, &modified, this](uint32_t* idp) { + Instruction* op_inst = get_def_use_mgr()->GetDef(*idp); + if (!IsFloat(op_inst, 16)) return; + if (!IsRelaxed(op_inst)) return; + uint32_t old_id = *idp; + GenConvert(idp, 32, &builder); + if (*idp != old_id) modified = true; + }); + if (modified) get_def_use_mgr()->AnalyzeInstUse(inst); + return modified; +} + +bool ConvertToHalfPass::GenHalfCode(Instruction* inst) { + bool modified = false; + // Remember id for later deletion of RelaxedPrecision decoration + bool inst_relaxed = IsRelaxed(inst); + if (inst_relaxed) relaxed_ids_.push_back(inst->result_id()); + if (IsArithmetic(inst) && inst_relaxed) + modified = GenHalfArith(inst); + else if (inst->opcode() == SpvOpPhi) + modified = ProcessPhi(inst); + else if (inst->opcode() == SpvOpCompositeExtract) + modified = ProcessExtract(inst); + else if (inst->opcode() == SpvOpFConvert) + modified = ProcessConvert(inst); + else if (image_ops_.count(inst->opcode()) != 0) + modified = ProcessImageRef(inst); + else + modified = ProcessDefault(inst); + return modified; +} + +bool ConvertToHalfPass::ProcessFunction(Function* func) { + bool modified = false; + cfg()->ForEachBlockInReversePostOrder( + func->entry().get(), [&modified, this](BasicBlock* bb) { + for (auto ii = bb->begin(); ii != bb->end(); ++ii) + modified |= GenHalfCode(&*ii); + }); + cfg()->ForEachBlockInReversePostOrder( + func->entry().get(), [&modified, this](BasicBlock* bb) { + for (auto ii = bb->begin(); ii != bb->end(); ++ii) + modified |= MatConvertCleanup(&*ii); + }); + return modified; +} + +Pass::Status ConvertToHalfPass::ProcessImpl() { + Pass::ProcessFunction pfn = [this](Function* fp) { + return ProcessFunction(fp); + }; + bool modified = context()->ProcessEntryPointCallTree(pfn); + // If modified, make sure module has Float16 capability + if (modified) context()->AddCapability(SpvCapabilityFloat16); + // Remove all RelaxedPrecision decorations from instructions and globals + for (auto c_id : relaxed_ids_) RemoveRelaxedDecoration(c_id); + for (auto& val : get_module()->types_values()) { + uint32_t v_id = val.result_id(); + if (v_id != 0) RemoveRelaxedDecoration(v_id); + } + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +Pass::Status ConvertToHalfPass::Process() { + Initialize(); + return ProcessImpl(); +} + +void ConvertToHalfPass::Initialize() { + target_ops_core_ = { + SpvOpVectorExtractDynamic, + SpvOpVectorInsertDynamic, + SpvOpVectorShuffle, + SpvOpCompositeConstruct, + SpvOpCompositeInsert, + SpvOpCopyObject, + SpvOpTranspose, + SpvOpConvertSToF, + SpvOpConvertUToF, + // SpvOpFConvert, + // SpvOpQuantizeToF16, + SpvOpFNegate, + SpvOpFAdd, + SpvOpFSub, + SpvOpFMul, + SpvOpFDiv, + SpvOpFMod, + SpvOpVectorTimesScalar, + SpvOpMatrixTimesScalar, + SpvOpVectorTimesMatrix, + SpvOpMatrixTimesVector, + SpvOpMatrixTimesMatrix, + SpvOpOuterProduct, + SpvOpDot, + SpvOpSelect, + SpvOpFOrdEqual, + SpvOpFUnordEqual, + SpvOpFOrdNotEqual, + SpvOpFUnordNotEqual, + SpvOpFOrdLessThan, + SpvOpFUnordLessThan, + SpvOpFOrdGreaterThan, + SpvOpFUnordGreaterThan, + SpvOpFOrdLessThanEqual, + SpvOpFUnordLessThanEqual, + SpvOpFOrdGreaterThanEqual, + SpvOpFUnordGreaterThanEqual, + }; + target_ops_450_ = { + GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs, + GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract, + GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos, + GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan, + GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh, + GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow, + GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2, + GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant, + GLSLstd450MatrixInverse, + // TODO(greg-lunarg): GLSLstd450ModfStruct, + GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix, + GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma, + // TODO(greg-lunarg): GLSLstd450FrexpStruct, + GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross, + GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect, + GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp}; + image_ops_ = {SpvOpImageSampleImplicitLod, + SpvOpImageSampleExplicitLod, + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjImplicitLod, + SpvOpImageSampleProjExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageFetch, + SpvOpImageGather, + SpvOpImageDrefGather, + SpvOpImageRead, + SpvOpImageSparseSampleImplicitLod, + SpvOpImageSparseSampleExplicitLod, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjImplicitLod, + SpvOpImageSparseSampleProjExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseFetch, + SpvOpImageSparseGather, + SpvOpImageSparseDrefGather, + SpvOpImageSparseTexelsResident, + SpvOpImageSparseRead}; + dref_image_ops_ = { + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageDrefGather, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseDrefGather, + }; + relaxed_ids_.clear(); +} + +} // namespace opt +} // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/convert_to_half_pass.h b/3rdparty/spirv-tools/source/opt/convert_to_half_pass.h new file mode 100644 index 000000000..522584861 --- /dev/null +++ b/3rdparty/spirv-tools/source/opt/convert_to_half_pass.h @@ -0,0 +1,134 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_ +#define LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_ + +#include "source/opt/ir_builder.h" +#include "source/opt/pass.h" + +namespace spvtools { +namespace opt { + +class ConvertToHalfPass : public Pass { + public: + ConvertToHalfPass() : Pass() {} + + ~ConvertToHalfPass() override = default; + + IRContext::Analysis GetPreservedAnalyses() override { + return IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping; + } + + // See optimizer.hpp for pass user documentation. + Status Process() override; + + const char* name() const override { return "convert-to-half-pass"; } + + private: + // Return true if |inst| is an arithmetic op that can be of type float16 + bool IsArithmetic(Instruction* inst); + + // Return true if |inst| returns scalar, vector or matrix type with base + // float and |width| + bool IsFloat(Instruction* inst, uint32_t width); + + // Return true if |inst| is decorated with RelaxedPrecision + bool IsRelaxed(Instruction* inst); + + // Return type id for float with |width| + analysis::Type* FloatScalarType(uint32_t width); + + // Return type id for vector of length |vlen| of float of |width| + analysis::Type* FloatVectorType(uint32_t v_len, uint32_t width); + + // Return type id for matrix of |v_cnt| vectors of length identical to + // |vty_id| of float of |width| + analysis::Type* FloatMatrixType(uint32_t v_cnt, uint32_t vty_id, + uint32_t width); + + // Return equivalent to float type |ty_id| with |width| + uint32_t EquivFloatTypeId(uint32_t ty_id, uint32_t width); + + // Append instructions to builder to convert value |*val_idp| to type + // |ty_id| but with |width|. Set |*val_idp| to the new id. + void GenConvert(uint32_t* val_idp, uint32_t width, + InstructionBuilder* builder); + + // Remove RelaxedPrecision decoration of |id|. + void RemoveRelaxedDecoration(uint32_t id); + + // If |inst| is an arithmetic, phi, extract or convert instruction of float32 + // base type and decorated with RelaxedPrecision, change it to the equivalent + // float16 based type instruction. Specifically, insert instructions to + // convert all operands to float16 (if needed) and change its type to the + // equivalent float16 type. Otherwise, insert instructions to convert its + // operands back to their original types, if needed. + bool GenHalfCode(Instruction* inst); + + // Gen code for relaxed arithmetic |inst| + bool GenHalfArith(Instruction* inst); + + // Gen code for relaxed phi |inst| + bool ProcessPhi(Instruction* inst); + + // Gen code for relaxed extract |inst| + bool ProcessExtract(Instruction* inst); + + // Gen code for relaxed convert |inst| + bool ProcessConvert(Instruction* inst); + + // Gen code for image reference |inst| + bool ProcessImageRef(Instruction* inst); + + // Process default non-relaxed |inst| + bool ProcessDefault(Instruction* inst); + + // If |inst| is an FConvert of a matrix type, decompose it to a series + // of vector extracts, converts and inserts into an Undef. These are + // generated by GenHalfCode because they are easier to manipulate, but are + // invalid so we need to clean them up. + bool MatConvertCleanup(Instruction* inst); + + // Call GenHalfCode on every instruction in |func|. + // If code is generated for an instruction, replace the instruction + // with the new instructions that are generated. + bool ProcessFunction(Function* func); + + Pass::Status ProcessImpl(); + + // Initialize state for converting to half + void Initialize(); + + // Set of core operations to be processed + std::unordered_set target_ops_core_; + + // Set of 450 extension operations to be processed + std::unordered_set target_ops_450_; + + // Set of sample operations + std::unordered_set image_ops_; + + // Set of dref sample operations + std::unordered_set dref_image_ops_; + + // Ids of all converted instructions + std::vector relaxed_ids_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_ diff --git a/3rdparty/spirv-tools/source/opt/feature_manager.h b/3rdparty/spirv-tools/source/opt/feature_manager.h index 761a20888..2fe329108 100644 --- a/3rdparty/spirv-tools/source/opt/feature_manager.h +++ b/3rdparty/spirv-tools/source/opt/feature_manager.h @@ -57,6 +57,9 @@ class FeatureManager { // Add the extension |ext| to the feature manager. void AddExtension(Instruction* ext); + // Analyzes |module| and records imported external instruction sets. + void AddExtInstImportIds(Module* module); + private: // Analyzes |module| and records enabled extensions. void AddExtensions(Module* module); @@ -64,9 +67,6 @@ class FeatureManager { // Analyzes |module| and records enabled capabilities. void AddCapabilities(Module* module); - // Analyzes |module| and records imported external instruction sets. - void AddExtInstImportIds(Module* module); - // Auxiliary object for querying SPIR-V grammar facts. const AssemblyGrammar& grammar_; diff --git a/3rdparty/spirv-tools/source/opt/inst_bindless_check_pass.cpp b/3rdparty/spirv-tools/source/opt/inst_bindless_check_pass.cpp index b283354b0..4587343fd 100644 --- a/3rdparty/spirv-tools/source/opt/inst_bindless_check_pass.cpp +++ b/3rdparty/spirv-tools/source/opt/inst_bindless_check_pass.cpp @@ -296,7 +296,7 @@ void InstBindlessCheckPass::GenCheckCode( // reference. if (new_ref_id != 0) { Instruction* phi_inst = builder.AddPhi( - ref_type_id, {new_ref_id, valid_blk_id, builder.GetNullId(ref_type_id), + ref_type_id, {new_ref_id, valid_blk_id, GetNullId(ref_type_id), last_invalid_blk_id}); context()->ReplaceAllUsesWith(ref->ref_inst->result_id(), phi_inst->result_id()); diff --git a/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp b/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp index 03221ef48..ef29ce552 100644 --- a/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp +++ b/3rdparty/spirv-tools/source/opt/inst_buff_addr_check_pass.cpp @@ -108,8 +108,8 @@ void InstBuffAddrCheckPass::GenCheckCode( // reference. if (new_ref_id != 0) { Instruction* phi_inst = builder.AddPhi( - ref_type_id, {new_ref_id, valid_blk_id, builder.GetNullId(ref_type_id), - invalid_blk_id}); + ref_type_id, + {new_ref_id, valid_blk_id, GetNullId(ref_type_id), invalid_blk_id}); context()->ReplaceAllUsesWith(ref_inst->result_id(), phi_inst->result_id()); } new_blocks->push_back(std::move(new_blk_ptr)); diff --git a/3rdparty/spirv-tools/source/opt/ir_builder.h b/3rdparty/spirv-tools/source/opt/ir_builder.h index a0ca40cee..6720e8926 100644 --- a/3rdparty/spirv-tools/source/opt/ir_builder.h +++ b/3rdparty/spirv-tools/source/opt/ir_builder.h @@ -109,13 +109,13 @@ class InstructionBuilder { return AddInstruction(std::move(newQuadOp)); } - Instruction* AddIdLiteralOp(uint32_t type_id, SpvOp opcode, uint32_t operand1, - uint32_t operand2) { + Instruction* AddIdLiteralOp(uint32_t type_id, SpvOp opcode, uint32_t id, + uint32_t uliteral) { // TODO(1841): Handle id overflow. std::unique_ptr newBinOp(new Instruction( GetContext(), opcode, type_id, GetContext()->TakeNextId(), - {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {operand1}}, - {spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, {operand2}}})); + {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {id}}, + {spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, {uliteral}}})); return AddInstruction(std::move(newBinOp)); } @@ -358,16 +358,6 @@ class InstructionBuilder { return uint_inst->result_id(); } - uint32_t GetNullId(uint32_t type_id) { - analysis::TypeManager* type_mgr = GetContext()->get_type_mgr(); - analysis::ConstantManager* const_mgr = GetContext()->get_constant_mgr(); - const analysis::Type* type = type_mgr->GetType(type_id); - const analysis::Constant* null_const = const_mgr->GetConstant(type, {}); - Instruction* null_inst = - const_mgr->GetDefiningInstruction(null_const, type_id); - return null_inst->result_id(); - } - // Adds either a signed or unsigned 32 bit integer constant to the binary // depedning on the |sign|. If |sign| is true then the value is added as a // signed constant otherwise as an unsigned constant. If |sign| is false the @@ -502,6 +492,27 @@ class InstructionBuilder { return AddInstruction(std::move(new_inst)); } + Instruction* AddNaryExtendedInstruction( + uint32_t result_type, uint32_t set, uint32_t instruction, + const std::vector& ext_operands) { + std::vector operands; + operands.push_back({SPV_OPERAND_TYPE_ID, {set}}); + operands.push_back( + {SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER, {instruction}}); + for (uint32_t id : ext_operands) { + operands.push_back({SPV_OPERAND_TYPE_ID, {id}}); + } + + uint32_t result_id = GetContext()->TakeNextId(); + if (result_id == 0) { + return nullptr; + } + + std::unique_ptr new_inst(new Instruction( + GetContext(), SpvOpExtInst, result_type, result_id, operands)); + return AddInstruction(std::move(new_inst)); + } + // Inserts the new instruction before the insertion point. Instruction* AddInstruction(std::unique_ptr&& insn) { Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn)); diff --git a/3rdparty/spirv-tools/source/opt/ir_context.h b/3rdparty/spirv-tools/source/opt/ir_context.h index e297fb1f5..3bbf18091 100644 --- a/3rdparty/spirv-tools/source/opt/ir_context.h +++ b/3rdparty/spirv-tools/source/opt/ir_context.h @@ -199,6 +199,7 @@ class IRContext { inline void AddExtension(const std::string& ext_name); inline void AddExtension(std::unique_ptr&& e); // Appends an extended instruction set instruction to this module. + inline void AddExtInstImport(const std::string& name); inline void AddExtInstImport(std::unique_ptr&& e); // Set the memory model for this module. inline void SetMemoryModel(std::unique_ptr&& m); @@ -971,9 +972,26 @@ void IRContext::AddExtension(std::unique_ptr&& e) { module()->AddExtension(std::move(e)); } +void IRContext::AddExtInstImport(const std::string& name) { + const auto num_chars = name.size(); + // Compute num words, accommodate the terminating null character. + const auto num_words = (num_chars + 1 + 3) / 4; + std::vector ext_words(num_words, 0u); + std::memcpy(ext_words.data(), name.data(), num_chars); + AddExtInstImport(std::unique_ptr( + new Instruction(this, SpvOpExtInstImport, 0u, TakeNextId(), + {{SPV_OPERAND_TYPE_LITERAL_STRING, ext_words}}))); +} + void IRContext::AddExtInstImport(std::unique_ptr&& e) { AddCombinatorsForExtension(e.get()); + if (AreAnalysesValid(kAnalysisDefUse)) { + get_def_use_mgr()->AnalyzeInstDefUse(e.get()); + } module()->AddExtInstImport(std::move(e)); + if (feature_mgr_ != nullptr) { + feature_mgr_->AddExtInstImportIds(module()); + } } void IRContext::SetMemoryModel(std::unique_ptr&& m) { diff --git a/3rdparty/spirv-tools/source/opt/optimizer.cpp b/3rdparty/spirv-tools/source/opt/optimizer.cpp index 635b075ff..78b7646fd 100644 --- a/3rdparty/spirv-tools/source/opt/optimizer.cpp +++ b/3rdparty/spirv-tools/source/opt/optimizer.cpp @@ -415,6 +415,10 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag) { } else if (pass_name == "inst-buff-addr-check") { RegisterPass(CreateInstBuffAddrCheckPass(7, 23, 2)); RegisterPass(CreateAggressiveDCEPass()); + } else if (pass_name == "convert-relaxed-to-half") { + RegisterPass(CreateConvertRelaxedToHalfPass()); + } else if (pass_name == "relax-float-ops") { + RegisterPass(CreateRelaxFloatOpsPass()); } else if (pass_name == "simplify-instructions") { RegisterPass(CreateSimplificationPass()); } else if (pass_name == "ssa-rewrite") { @@ -877,6 +881,16 @@ Optimizer::PassToken CreateInstBuffAddrCheckPass(uint32_t desc_set, MakeUnique(desc_set, shader_id, version)); } +Optimizer::PassToken CreateConvertRelaxedToHalfPass() { + return MakeUnique( + MakeUnique()); +} + +Optimizer::PassToken CreateRelaxFloatOpsPass() { + return MakeUnique( + MakeUnique()); +} + Optimizer::PassToken CreateCodeSinkingPass() { return MakeUnique( MakeUnique()); diff --git a/3rdparty/spirv-tools/source/opt/pass.cpp b/3rdparty/spirv-tools/source/opt/pass.cpp index f9e4a5d47..09b78af93 100644 --- a/3rdparty/spirv-tools/source/opt/pass.cpp +++ b/3rdparty/spirv-tools/source/opt/pass.cpp @@ -54,6 +54,36 @@ uint32_t Pass::GetPointeeTypeId(const Instruction* ptrInst) const { return ptrTypeInst->GetSingleWordInOperand(kTypePointerTypeIdInIdx); } +Instruction* Pass::GetBaseType(uint32_t ty_id) { + Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id); + if (ty_inst->opcode() == SpvOpTypeMatrix) { + uint32_t vty_id = ty_inst->GetSingleWordInOperand(0); + ty_inst = get_def_use_mgr()->GetDef(vty_id); + } + if (ty_inst->opcode() == SpvOpTypeVector) { + uint32_t cty_id = ty_inst->GetSingleWordInOperand(0); + ty_inst = get_def_use_mgr()->GetDef(cty_id); + } + return ty_inst; +} + +bool Pass::IsFloat(uint32_t ty_id, uint32_t width) { + Instruction* ty_inst = GetBaseType(ty_id); + if (ty_inst->opcode() != SpvOpTypeFloat) return false; + return ty_inst->GetSingleWordInOperand(0) == width; +} + +uint32_t Pass::GetNullId(uint32_t type_id) { + if (IsFloat(type_id, 16)) context()->AddCapability(SpvCapabilityFloat16); + analysis::TypeManager* type_mgr = context()->get_type_mgr(); + analysis::ConstantManager* const_mgr = context()->get_constant_mgr(); + const analysis::Type* type = type_mgr->GetType(type_id); + const analysis::Constant* null_const = const_mgr->GetConstant(type, {}); + Instruction* null_inst = + const_mgr->GetDefiningInstruction(null_const, type_id); + return null_inst->result_id(); +} + uint32_t Pass::GenerateCopy(Instruction* object_to_copy, uint32_t new_type_id, Instruction* insertion_position) { analysis::TypeManager* type_mgr = context()->get_type_mgr(); diff --git a/3rdparty/spirv-tools/source/opt/pass.h b/3rdparty/spirv-tools/source/opt/pass.h index 686e9fc1d..a8c9c4b43 100644 --- a/3rdparty/spirv-tools/source/opt/pass.h +++ b/3rdparty/spirv-tools/source/opt/pass.h @@ -109,6 +109,16 @@ class Pass { // Return type id for |ptrInst|'s pointee uint32_t GetPointeeTypeId(const Instruction* ptrInst) const; + // Return base type of |ty_id| type + Instruction* GetBaseType(uint32_t ty_id); + + // Return true if |inst| returns scalar, vector or matrix type with base + // float and |width| + bool IsFloat(uint32_t ty_id, uint32_t width); + + // Return the id of OpConstantNull of type |type_id|. Create if necessary. + uint32_t GetNullId(uint32_t type_id); + protected: // Constructs a new pass. // diff --git a/3rdparty/spirv-tools/source/opt/passes.h b/3rdparty/spirv-tools/source/opt/passes.h index d53af8ff2..3d08f9085 100644 --- a/3rdparty/spirv-tools/source/opt/passes.h +++ b/3rdparty/spirv-tools/source/opt/passes.h @@ -25,6 +25,7 @@ #include "source/opt/code_sink.h" #include "source/opt/combine_access_chains.h" #include "source/opt/compact_ids_pass.h" +#include "source/opt/convert_to_half_pass.h" #include "source/opt/copy_prop_arrays.h" #include "source/opt/dead_branch_elim_pass.h" #include "source/opt/dead_insert_elim_pass.h" @@ -63,6 +64,7 @@ #include "source/opt/process_lines_pass.h" #include "source/opt/reduce_load_size.h" #include "source/opt/redundancy_elimination.h" +#include "source/opt/relax_float_ops_pass.h" #include "source/opt/remove_duplicates_pass.h" #include "source/opt/replace_invalid_opc.h" #include "source/opt/scalar_replacement_pass.h" diff --git a/3rdparty/spirv-tools/source/opt/relax_float_ops_pass.cpp b/3rdparty/spirv-tools/source/opt/relax_float_ops_pass.cpp new file mode 100644 index 000000000..73f16ddf3 --- /dev/null +++ b/3rdparty/spirv-tools/source/opt/relax_float_ops_pass.cpp @@ -0,0 +1,178 @@ +// Copyright (c) 2019 The Khronos Group Inc. +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "relax_float_ops_pass.h" + +#include "source/opt/ir_builder.h" + +namespace spvtools { +namespace opt { + +bool RelaxFloatOpsPass::IsRelaxable(Instruction* inst) { + return target_ops_core_f_rslt_.count(inst->opcode()) != 0 || + target_ops_core_f_opnd_.count(inst->opcode()) != 0 || + sample_ops_.count(inst->opcode()) != 0 || + (inst->opcode() == SpvOpExtInst && + inst->GetSingleWordInOperand(0) == + context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0); +} + +bool RelaxFloatOpsPass::IsFloat32(Instruction* inst) { + uint32_t ty_id; + if (target_ops_core_f_opnd_.count(inst->opcode()) != 0) { + uint32_t opnd_id = inst->GetSingleWordInOperand(0); + Instruction* opnd_inst = get_def_use_mgr()->GetDef(opnd_id); + ty_id = opnd_inst->type_id(); + } else { + ty_id = inst->type_id(); + if (ty_id == 0) return false; + } + return IsFloat(ty_id, 32); +} + +bool RelaxFloatOpsPass::IsRelaxed(uint32_t r_id) { + for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false)) + if (r_inst->opcode() == SpvOpDecorate && + r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision) + return true; + return false; +} + +bool RelaxFloatOpsPass::ProcessInst(Instruction* r_inst) { + uint32_t r_id = r_inst->result_id(); + if (r_id == 0) return false; + if (!IsFloat32(r_inst)) return false; + if (IsRelaxed(r_id)) return false; + if (!IsRelaxable(r_inst)) return false; + get_decoration_mgr()->AddDecoration(r_id, SpvDecorationRelaxedPrecision); + return true; +} + +bool RelaxFloatOpsPass::ProcessFunction(Function* func) { + bool modified = false; + cfg()->ForEachBlockInReversePostOrder( + func->entry().get(), [&modified, this](BasicBlock* bb) { + for (auto ii = bb->begin(); ii != bb->end(); ++ii) + modified |= ProcessInst(&*ii); + }); + return modified; +} + +Pass::Status RelaxFloatOpsPass::ProcessImpl() { + Pass::ProcessFunction pfn = [this](Function* fp) { + return ProcessFunction(fp); + }; + bool modified = context()->ProcessEntryPointCallTree(pfn); + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +Pass::Status RelaxFloatOpsPass::Process() { + Initialize(); + return ProcessImpl(); +} + +void RelaxFloatOpsPass::Initialize() { + target_ops_core_f_rslt_ = { + SpvOpLoad, + SpvOpPhi, + SpvOpVectorExtractDynamic, + SpvOpVectorInsertDynamic, + SpvOpVectorShuffle, + SpvOpCompositeExtract, + SpvOpCompositeConstruct, + SpvOpCompositeInsert, + SpvOpCopyObject, + SpvOpTranspose, + SpvOpConvertSToF, + SpvOpConvertUToF, + SpvOpFConvert, + // SpvOpQuantizeToF16, + SpvOpFNegate, + SpvOpFAdd, + SpvOpFSub, + SpvOpFMul, + SpvOpFDiv, + SpvOpFMod, + SpvOpVectorTimesScalar, + SpvOpMatrixTimesScalar, + SpvOpVectorTimesMatrix, + SpvOpMatrixTimesVector, + SpvOpMatrixTimesMatrix, + SpvOpOuterProduct, + SpvOpDot, + SpvOpSelect, + }; + target_ops_core_f_opnd_ = { + SpvOpFOrdEqual, + SpvOpFUnordEqual, + SpvOpFOrdNotEqual, + SpvOpFUnordNotEqual, + SpvOpFOrdLessThan, + SpvOpFUnordLessThan, + SpvOpFOrdGreaterThan, + SpvOpFUnordGreaterThan, + SpvOpFOrdLessThanEqual, + SpvOpFUnordLessThanEqual, + SpvOpFOrdGreaterThanEqual, + SpvOpFUnordGreaterThanEqual, + }; + target_ops_450_ = { + GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs, + GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract, + GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos, + GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan, + GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh, + GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow, + GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2, + GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant, + GLSLstd450MatrixInverse, + // TODO(greg-lunarg): GLSLstd450ModfStruct, + GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix, + GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma, + // TODO(greg-lunarg): GLSLstd450FrexpStruct, + GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross, + GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect, + GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp}; + sample_ops_ = {SpvOpImageSampleImplicitLod, + SpvOpImageSampleExplicitLod, + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjImplicitLod, + SpvOpImageSampleProjExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageFetch, + SpvOpImageGather, + SpvOpImageDrefGather, + SpvOpImageRead, + SpvOpImageSparseSampleImplicitLod, + SpvOpImageSparseSampleExplicitLod, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjImplicitLod, + SpvOpImageSparseSampleProjExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseFetch, + SpvOpImageSparseGather, + SpvOpImageSparseDrefGather, + SpvOpImageSparseTexelsResident, + SpvOpImageSparseRead}; +} + +} // namespace opt +} // namespace spvtools diff --git a/3rdparty/spirv-tools/source/opt/relax_float_ops_pass.h b/3rdparty/spirv-tools/source/opt/relax_float_ops_pass.h new file mode 100644 index 000000000..5ee3d73c8 --- /dev/null +++ b/3rdparty/spirv-tools/source/opt/relax_float_ops_pass.h @@ -0,0 +1,80 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_ +#define LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_ + +#include "source/opt/ir_builder.h" +#include "source/opt/pass.h" + +namespace spvtools { +namespace opt { + +class RelaxFloatOpsPass : public Pass { + public: + RelaxFloatOpsPass() : Pass() {} + + ~RelaxFloatOpsPass() override = default; + + IRContext::Analysis GetPreservedAnalyses() override { + return IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping; + } + + // See optimizer.hpp for pass user documentation. + Status Process() override; + + const char* name() const override { return "convert-to-half-pass"; } + + private: + // Return true if |inst| can have the RelaxedPrecision decoration applied + // to it. + bool IsRelaxable(Instruction* inst); + + // Return true if |inst| returns scalar, vector or matrix type with base + // float and width 32 + bool IsFloat32(Instruction* inst); + + // Return true if |r_id| is decorated with RelaxedPrecision + bool IsRelaxed(uint32_t r_id); + + // If |inst| is an instruction of float32-based type and is not decorated + // RelaxedPrecision, add such a decoration to the module. + bool ProcessInst(Instruction* inst); + + // Call ProcessInst on every instruction in |func|. + bool ProcessFunction(Function* func); + + Pass::Status ProcessImpl(); + + // Initialize state for converting to half + void Initialize(); + + // Set of float result core operations to be processed + std::unordered_set target_ops_core_f_rslt_; + + // Set of float operand core operations to be processed + std::unordered_set target_ops_core_f_opnd_; + + // Set of 450 extension operations to be processed + std::unordered_set target_ops_450_; + + // Set of sample operations + std::unordered_set sample_ops_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_ diff --git a/3rdparty/spirv-tools/source/reduce/CMakeLists.txt b/3rdparty/spirv-tools/source/reduce/CMakeLists.txt index 7651e8664..35acf3fd4 100644 --- a/3rdparty/spirv-tools/source/reduce/CMakeLists.txt +++ b/3rdparty/spirv-tools/source/reduce/CMakeLists.txt @@ -81,8 +81,12 @@ add_library(SPIRV-Tools-reduce ${SPIRV_TOOLS_REDUCE_SOURCES}) spvtools_default_compile_options(SPIRV-Tools-reduce) target_include_directories(SPIRV-Tools-reduce - PUBLIC ${spirv-tools_SOURCE_DIR}/include - PUBLIC ${SPIRV_HEADER_INCLUDE_DIR} + PUBLIC + $ + $ + PUBLIC + $ + $ PRIVATE ${spirv-tools_BINARY_DIR} ) # The reducer reuses a lot of functionality from the SPIRV-Tools library. @@ -94,8 +98,9 @@ set_property(TARGET SPIRV-Tools-reduce PROPERTY FOLDER "SPIRV-Tools libraries") spvtools_check_symbol_exports(SPIRV-Tools-reduce) if(ENABLE_SPIRV_TOOLS_INSTALL) - install(TARGETS SPIRV-Tools-reduce + install(TARGETS SPIRV-Tools-reduce EXPORT SPIRV-Tools-reduceTargets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(EXPORT SPIRV-Tools-reduceTargets DESTINATION lib/cmake) endif(ENABLE_SPIRV_TOOLS_INSTALL) diff --git a/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_vulkantowebgpu_fuzzer.cpp b/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_vulkantowebgpu_fuzzer.cpp index 0b2ecc322..9371c0df0 100644 --- a/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_vulkantowebgpu_fuzzer.cpp +++ b/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_vulkantowebgpu_fuzzer.cpp @@ -18,7 +18,7 @@ #include "spirv-tools/optimizer.hpp" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0); + spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1); optimizer.SetMessageConsumer([](spv_message_level_t, const char*, const spv_position_t&, const char*) {}); diff --git a/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_webgputovulkan_fuzzer.cpp b/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_webgputovulkan_fuzzer.cpp index 1e44857dc..78ddbb75c 100644 --- a/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_webgputovulkan_fuzzer.cpp +++ b/3rdparty/spirv-tools/test/fuzzers/spvtools_opt_webgputovulkan_fuzzer.cpp @@ -18,7 +18,7 @@ #include "spirv-tools/optimizer.hpp" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1); + spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0); optimizer.SetMessageConsumer([](spv_message_level_t, const char*, const spv_position_t&, const char*) {}); diff --git a/3rdparty/spirv-tools/test/opt/CMakeLists.txt b/3rdparty/spirv-tools/test/opt/CMakeLists.txt index 47ce41f0c..327f26563 100644 --- a/3rdparty/spirv-tools/test/opt/CMakeLists.txt +++ b/3rdparty/spirv-tools/test/opt/CMakeLists.txt @@ -28,6 +28,7 @@ add_spvtools_unittest(TARGET opt compact_ids_test.cpp constants_test.cpp constant_manager_test.cpp + convert_relaxed_to_half_test.cpp copy_prop_array_test.cpp dead_branch_elim_test.cpp dead_insert_elim_test.cpp @@ -80,6 +81,7 @@ add_spvtools_unittest(TARGET opt reduce_load_size_test.cpp redundancy_elimination_test.cpp register_liveness.cpp + relax_float_ops_test.cpp replace_invalid_opc_test.cpp scalar_analysis.cpp scalar_replacement_test.cpp diff --git a/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp b/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp index 7a6d4b463..d943d3425 100644 --- a/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp +++ b/3rdparty/spirv-tools/test/opt/amd_ext_to_khr.cpp @@ -233,6 +233,7 @@ TEST_F(AmdExtToKhrTest, ReplaceSwizzleInvocationsMaskedAMD) { SinglePassRunAndMatch(text, true); } + TEST_F(AmdExtToKhrTest, ReplaceWriteInvocationAMD) { const std::string text = R"( ; CHECK: OpCapability Shader @@ -269,6 +270,585 @@ TEST_F(AmdExtToKhrTest, ReplaceWriteInvocationAMD) { SinglePassRunAndMatch(text, true); } +TEST_F(AmdExtToKhrTest, ReplaceFMin3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeFloat 32 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] FMin [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] FMin [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %float + %8 = OpUndef %float + %9 = OpUndef %float + %10 = OpExtInst %float %ext FMin3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceSMin3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeInt 32 1 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] SMin [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] SMin [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %int + %8 = OpUndef %int + %9 = OpUndef %int + %10 = OpExtInst %int %ext SMin3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceUMin3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeInt 32 0 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %uint + %8 = OpUndef %uint + %9 = OpUndef %uint + %10 = OpExtInst %uint %ext UMin3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceFMax3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeFloat 32 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] FMax [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] FMax [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %float + %8 = OpUndef %float + %9 = OpUndef %float + %10 = OpExtInst %float %ext FMax3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceSMax3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeInt 32 1 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] SMax [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] SMax [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %int + %8 = OpUndef %int + %9 = OpUndef %int + %10 = OpExtInst %int %ext SMax3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceUMax3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeInt 32 0 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %uint + %8 = OpUndef %uint + %9 = OpUndef %uint + %10 = OpExtInst %uint %ext UMax3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceVecUMax3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeVector +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[x]] [[y]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[temp]] [[z]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %vec = OpTypeVector %uint 4 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %vec + %8 = OpUndef %vec + %9 = OpUndef %vec + %10 = OpExtInst %vec %ext UMax3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceFMid3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeFloat 32 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] FMin [[y]] [[z]] +; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] FMax [[y]] [[z]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] FClamp [[x]] [[min]] [[max]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %float + %8 = OpUndef %float + %9 = OpUndef %float + %10 = OpExtInst %float %ext FMid3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceSMid3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeInt 32 1 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] SMin [[y]] [[z]] +; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] SMax [[y]] [[z]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] SClamp [[x]] [[min]] [[max]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %int + %8 = OpUndef %int + %9 = OpUndef %int + %10 = OpExtInst %int %ext SMid3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceUMid3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeInt 32 0 +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[y]] [[z]] +; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[y]] [[z]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UClamp [[x]] [[min]] [[max]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %uint + %8 = OpUndef %uint + %9 = OpUndef %uint + %10 = OpExtInst %uint %ext UMid3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceVecUMid3AMD) { + const std::string text = R"( +; CHECK: OpCapability Shader +; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax" +; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax" +; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450" +; CHECK: [[type:%\w+]] = OpTypeVector +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]] +; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[y]] [[z]] +; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[y]] [[z]] +; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UClamp [[x]] [[min]] [[max]] + OpCapability Shader + OpExtension "SPV_AMD_shader_trinary_minmax" + %ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "func" + OpExecutionMode %1 OriginUpperLeft + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %vec = OpTypeVector %uint 3 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %uint_3 = OpConstant %uint 3 + %1 = OpFunction %void None %3 + %6 = OpLabel + %7 = OpUndef %vec + %8 = OpUndef %vec + %9 = OpUndef %vec + %10 = OpExtInst %vec %ext UMid3AMD %7 %8 %9 + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceCubeFaceCoordAMD) { + // Sorry for the Check test. The code sequence is so long, I do not think + // that a match test would be anymore legible. This tests the replacement of + // the CubeFaceCoordAMD instruction. + const std::string before = R"( + OpCapability Shader + OpExtension "SPV_KHR_storage_buffer_storage_class" + OpExtension "SPV_AMD_gcn_shader" + %1 = OpExtInstImport "SPV_AMD_gcn_shader" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" + OpExecutionMode %2 LocalSize 1 1 1 + %void = OpTypeVoid + %4 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %2 = OpFunction %void None %4 + %8 = OpLabel + %9 = OpUndef %v3float + %10 = OpExtInst %v2float %1 CubeFaceCoordAMD %9 + OpReturn + OpFunctionEnd +)"; + + const std::string after = R"(OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +%12 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %2 "main" +OpExecutionMode %2 LocalSize 1 1 1 +%void = OpTypeVoid +%4 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v2float = OpTypeVector %float 2 +%v3float = OpTypeVector %float 3 +%bool = OpTypeBool +%float_0 = OpConstant %float 0 +%float_2 = OpConstant %float 2 +%float_0_5 = OpConstant %float 0.5 +%16 = OpConstantComposite %v2float %float_0_5 %float_0_5 +%2 = OpFunction %void None %4 +%8 = OpLabel +%9 = OpUndef %v3float +%17 = OpCompositeExtract %float %9 0 +%18 = OpCompositeExtract %float %9 1 +%19 = OpCompositeExtract %float %9 2 +%20 = OpFNegate %float %17 +%21 = OpFNegate %float %18 +%22 = OpFNegate %float %19 +%23 = OpExtInst %float %12 FAbs %17 +%24 = OpExtInst %float %12 FAbs %18 +%25 = OpExtInst %float %12 FAbs %19 +%26 = OpFOrdLessThan %bool %19 %float_0 +%27 = OpFOrdLessThan %bool %18 %float_0 +%28 = OpFOrdLessThan %bool %17 %float_0 +%29 = OpExtInst %float %12 FMax %23 %24 +%30 = OpExtInst %float %12 FMax %25 %29 +%31 = OpFMul %float %float_2 %30 +%32 = OpFOrdGreaterThanEqual %bool %25 %29 +%33 = OpLogicalNot %bool %32 +%34 = OpFOrdGreaterThanEqual %bool %24 %23 +%35 = OpLogicalAnd %bool %33 %34 +%36 = OpSelect %float %26 %20 %17 +%37 = OpSelect %float %28 %19 %22 +%38 = OpSelect %float %35 %17 %37 +%39 = OpSelect %float %32 %36 %38 +%40 = OpSelect %float %27 %22 %19 +%41 = OpSelect %float %35 %40 %21 +%42 = OpCompositeConstruct %v2float %39 %41 +%43 = OpCompositeConstruct %v2float %31 %31 +%44 = OpFDiv %v2float %42 %43 +%10 = OpFAdd %v2float %44 %16 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck(before, after, true); +} + +TEST_F(AmdExtToKhrTest, ReplaceCubeFaceIndexAMD) { + // Sorry for the Check test. The code sequence is so long, I do not think + // that a match test would be anymore legible. This tests the replacement of + // the CubeFaceIndexAMD instruction. + const std::string before = R"(OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpExtension "SPV_AMD_gcn_shader" +%1 = OpExtInstImport "SPV_AMD_gcn_shader" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %2 "main" +OpExecutionMode %2 LocalSize 1 1 1 +%void = OpTypeVoid +%4 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v3float = OpTypeVector %float 3 +%2 = OpFunction %void None %4 +%7 = OpLabel +%8 = OpUndef %v3float +%9 = OpExtInst %float %1 CubeFaceIndexAMD %8 +OpReturn +OpFunctionEnd +)"; + + const std::string after = R"(OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +%11 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %2 "main" +OpExecutionMode %2 LocalSize 1 1 1 +%void = OpTypeVoid +%4 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v3float = OpTypeVector %float 3 +%bool = OpTypeBool +%float_0 = OpConstant %float 0 +%float_1 = OpConstant %float 1 +%float_2 = OpConstant %float 2 +%float_3 = OpConstant %float 3 +%float_4 = OpConstant %float 4 +%float_5 = OpConstant %float 5 +%2 = OpFunction %void None %4 +%7 = OpLabel +%8 = OpUndef %v3float +%18 = OpCompositeExtract %float %8 0 +%19 = OpCompositeExtract %float %8 1 +%20 = OpCompositeExtract %float %8 2 +%21 = OpExtInst %float %11 FAbs %18 +%22 = OpExtInst %float %11 FAbs %19 +%23 = OpExtInst %float %11 FAbs %20 +%24 = OpFOrdLessThan %bool %20 %float_0 +%25 = OpFOrdLessThan %bool %19 %float_0 +%26 = OpFOrdLessThan %bool %18 %float_0 +%27 = OpExtInst %float %11 FMax %21 %22 +%28 = OpFOrdGreaterThanEqual %bool %23 %27 +%29 = OpFOrdGreaterThanEqual %bool %22 %21 +%30 = OpSelect %float %24 %float_5 %float_4 +%31 = OpSelect %float %25 %float_3 %float_2 +%32 = OpSelect %float %26 %float_1 %float_0 +%33 = OpSelect %float %29 %31 %32 +%9 = OpSelect %float %28 %30 %33 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck(before, after, true); +} + TEST_F(AmdExtToKhrTest, SetVersion) { const std::string text = R"( OpCapability Shader diff --git a/3rdparty/spirv-tools/test/opt/convert_relaxed_to_half_test.cpp b/3rdparty/spirv-tools/test/opt/convert_relaxed_to_half_test.cpp new file mode 100644 index 000000000..3ac80099f --- /dev/null +++ b/3rdparty/spirv-tools/test/opt/convert_relaxed_to_half_test.cpp @@ -0,0 +1,1227 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Convert Relaxed to Half tests + +#include +#include + +#include "test/opt/assembly_builder.h" +#include "test/opt/pass_fixture.h" +#include "test/opt/pass_utils.h" + +namespace spvtools { +namespace opt { +namespace { + +using ConvertToHalfTest = PassTest<::testing::Test>; + +TEST_F(ConvertToHalfTest, ConvertToHalfBasic) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // }; + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float c; + // } + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // psout.Color = g_tTex1df4.Sample(g_sSamp, i.Tex0) * c; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %48 RelaxedPrecision +OpDecorate %63 RelaxedPrecision +OpDecorate %65 RelaxedPrecision +OpDecorate %66 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%19 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_19 = OpTypePointer UniformConstant %19 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_19 UniformConstant +%23 = OpTypeSampler +%_ptr_UniformConstant_23 = OpTypePointer UniformConstant %23 +%g_sSamp = OpVariable %_ptr_UniformConstant_23 UniformConstant +%27 = OpTypeSampledImage %19 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%19 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_19 = OpTypePointer UniformConstant %19 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_19 UniformConstant +%23 = OpTypeSampler +%_ptr_UniformConstant_23 = OpTypePointer UniformConstant %23 +%g_sSamp = OpVariable %_ptr_UniformConstant_23 UniformConstant +%27 = OpTypeSampledImage %19 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%48 = OpLoad %float %i_Tex0 +%58 = OpLoad %19 %g_tTex1df4 +%59 = OpLoad %23 %g_sSamp +%60 = OpSampledImage %27 %58 %59 +%63 = OpImageSampleImplicitLod %v4float %60 %48 +%64 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%65 = OpLoad %float %64 +%66 = OpVectorTimesScalar %v4float %63 %65 +OpStore %_entryPointOutput_Color %66 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%48 = OpLoad %float %i_Tex0 +%58 = OpLoad %19 %g_tTex1df4 +%59 = OpLoad %23 %g_sSamp +%60 = OpSampledImage %27 %58 %59 +%63 = OpImageSampleImplicitLod %v4float %60 %48 +%64 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%65 = OpLoad %float %64 +%69 = OpFConvert %v4half %63 +%70 = OpFConvert %half %65 +%66 = OpVectorTimesScalar %v4half %69 %70 +%71 = OpFConvert %v4float %66 +OpStore %_entryPointOutput_Color %71 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithDrefSample) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerComparisonState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // cbuffer cbuff{ + // float c1; + // float c2; + // }; + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // float Tex1 : TEXCOORD1; + // }; + // + // struct PS_OUTPUT + // { + // float Color : SV_Target0; + // }; + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // float txval10 = g_tTex1df4.SampleCmp(g_sSamp, i.Tex0 * 0.1, c1 + 0.1); + // float txval11 = g_tTex1df4.SampleCmp(g_sSamp, i.Tex1 * 0.2, c2 + 0.2); + // float t = txval10 + txval11; + // float t2 = t / 2.0; + // psout.Color = t2; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c1" +OpMemberName %cbuff 1 "c2" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %i_Tex1 "i.Tex1" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 Offset 4 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %i_Tex1 Location 1 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %100 RelaxedPrecision +OpDecorate %76 RelaxedPrecision +OpDecorate %79 RelaxedPrecision +OpDecorate %98 RelaxedPrecision +OpDecorate %101 RelaxedPrecision +OpDecorate %110 RelaxedPrecision +OpDecorate %102 RelaxedPrecision +OpDecorate %112 RelaxedPrecision +OpDecorate %104 RelaxedPrecision +OpDecorate %113 RelaxedPrecision +OpDecorate %114 RelaxedPrecision +OpDecorate %116 RelaxedPrecision +OpDecorate %119 RelaxedPrecision +OpDecorate %121 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%16 = OpTypeImage %float 1D 1 0 0 1 Unknown +%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_16 UniformConstant +%20 = OpTypeSampler +%_ptr_UniformConstant_20 = OpTypePointer UniformConstant %20 +%g_sSamp = OpVariable %_ptr_UniformConstant_20 UniformConstant +%24 = OpTypeSampledImage %16 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%float_0_100000001 = OpConstant %float 0.100000001 +%cbuff = OpTypeStruct %float %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%v2float = OpTypeVector %float 2 +%int_1 = OpConstant %int 1 +%float_0_200000003 = OpConstant %float 0.200000003 +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%i_Tex1 = OpVariable %_ptr_Input_float Input +%_ptr_Output_float = OpTypePointer Output %float +%_entryPointOutput_Color = OpVariable %_ptr_Output_float Output +%float_0_5 = OpConstant %float 0.5 +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c1" +OpMemberName %cbuff 1 "c2" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %i_Tex1 "i.Tex1" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 Offset 4 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %i_Tex1 Location 1 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%25 = OpTypeFunction %void +%float = OpTypeFloat 32 +%27 = OpTypeImage %float 1D 1 0 0 1 Unknown +%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_27 UniformConstant +%29 = OpTypeSampler +%_ptr_UniformConstant_29 = OpTypePointer UniformConstant %29 +%g_sSamp = OpVariable %_ptr_UniformConstant_29 UniformConstant +%31 = OpTypeSampledImage %27 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%float_0_100000001 = OpConstant %float 0.100000001 +%cbuff = OpTypeStruct %float %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%v2float = OpTypeVector %float 2 +%int_1 = OpConstant %int 1 +%float_0_200000003 = OpConstant %float 0.200000003 +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%i_Tex1 = OpVariable %_ptr_Input_float Input +%_ptr_Output_float = OpTypePointer Output %float +%_entryPointOutput_Color = OpVariable %_ptr_Output_float Output +%float_0_5 = OpConstant %float 0.5 +%half = OpTypeFloat 16 +%v2half = OpTypeVector %half 2 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%76 = OpLoad %float %i_Tex0 +%79 = OpLoad %float %i_Tex1 +%93 = OpLoad %16 %g_tTex1df4 +%94 = OpLoad %20 %g_sSamp +%95 = OpSampledImage %24 %93 %94 +%98 = OpFMul %float %76 %float_0_100000001 +%99 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%100 = OpLoad %float %99 +%101 = OpFAdd %float %100 %float_0_100000001 +%102 = OpCompositeConstruct %v2float %98 %101 +%104 = OpImageSampleDrefImplicitLod %float %95 %102 %101 +%105 = OpLoad %16 %g_tTex1df4 +%106 = OpLoad %20 %g_sSamp +%107 = OpSampledImage %24 %105 %106 +%110 = OpFMul %float %79 %float_0_200000003 +%111 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%112 = OpLoad %float %111 +%113 = OpFAdd %float %112 %float_0_200000003 +%114 = OpCompositeConstruct %v2float %110 %113 +%116 = OpImageSampleDrefImplicitLod %float %107 %114 %113 +%119 = OpFAdd %float %104 %116 +%121 = OpFMul %float %119 %float_0_5 +OpStore %_entryPointOutput_Color %121 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %25 +%43 = OpLabel +%11 = OpLoad %float %i_Tex0 +%12 = OpLoad %float %i_Tex1 +%44 = OpLoad %27 %g_tTex1df4 +%45 = OpLoad %29 %g_sSamp +%46 = OpSampledImage %31 %44 %45 +%53 = OpFConvert %half %11 +%54 = OpFConvert %half %float_0_100000001 +%13 = OpFMul %half %53 %54 +%47 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%10 = OpLoad %float %47 +%55 = OpFConvert %half %10 +%56 = OpFConvert %half %float_0_100000001 +%14 = OpFAdd %half %55 %56 +%16 = OpCompositeConstruct %v2half %13 %14 +%58 = OpFConvert %float %14 +%18 = OpImageSampleDrefImplicitLod %float %46 %16 %58 +%48 = OpLoad %27 %g_tTex1df4 +%49 = OpLoad %29 %g_sSamp +%50 = OpSampledImage %31 %48 %49 +%59 = OpFConvert %half %12 +%60 = OpFConvert %half %float_0_200000003 +%15 = OpFMul %half %59 %60 +%51 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%17 = OpLoad %float %51 +%61 = OpFConvert %half %17 +%62 = OpFConvert %half %float_0_200000003 +%19 = OpFAdd %half %61 %62 +%20 = OpCompositeConstruct %v2half %15 %19 +%63 = OpFConvert %float %19 +%21 = OpImageSampleDrefImplicitLod %float %50 %20 %63 +%64 = OpFConvert %half %18 +%65 = OpFConvert %half %21 +%22 = OpFAdd %half %64 %65 +%66 = OpFConvert %half %float_0_5 +%23 = OpFMul %half %22 %66 +%67 = OpFConvert %float %23 +OpStore %_entryPointOutput_Color %67 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithVectorMatrixMult) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float4x4 M; + // } + // + // PS_OUTPUT main() + // { + // PS_OUTPUT psout; + // float4 txval10 = g_tTex1df4.Sample(g_sSamp, 0.1); + // float4 t = mul(txval10, M); + // psout.Color = t; + // return psout; + //} + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "M" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 RowMajor +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 0 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %56 RelaxedPrecision +OpDecorate %58 RelaxedPrecision +OpDecorate %60 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%14 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_14 UniformConstant +%18 = OpTypeSampler +%_ptr_UniformConstant_18 = OpTypePointer UniformConstant %18 +%g_sSamp = OpVariable %_ptr_UniformConstant_18 UniformConstant +%22 = OpTypeSampledImage %14 +%float_0_100000001 = OpConstant %float 0.100000001 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "M" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 RowMajor +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 0 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%14 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_14 UniformConstant +%18 = OpTypeSampler +%_ptr_UniformConstant_18 = OpTypePointer UniformConstant %18 +%g_sSamp = OpVariable %_ptr_UniformConstant_18 UniformConstant +%22 = OpTypeSampledImage %14 +%float_0_100000001 = OpConstant %float 0.100000001 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +%mat4v4half = OpTypeMatrix %v4half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%53 = OpLoad %14 %g_tTex1df4 +%54 = OpLoad %18 %g_sSamp +%55 = OpSampledImage %22 %53 %54 +%56 = OpImageSampleImplicitLod %v4float %55 %float_0_100000001 +%57 = OpAccessChain %_ptr_Uniform_mat4v4float %_ %int_0 +%58 = OpLoad %mat4v4float %57 +%60 = OpMatrixTimesVector %v4float %58 %56 +OpStore %_entryPointOutput_Color %60 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%53 = OpLoad %14 %g_tTex1df4 +%54 = OpLoad %18 %g_sSamp +%55 = OpSampledImage %22 %53 %54 +%56 = OpImageSampleImplicitLod %v4float %55 %float_0_100000001 +%57 = OpAccessChain %_ptr_Uniform_mat4v4float %_ %int_0 +%58 = OpLoad %mat4v4float %57 +%67 = OpCompositeExtract %v4float %58 0 +%68 = OpFConvert %v4half %67 +%69 = OpCompositeExtract %v4float %58 1 +%70 = OpFConvert %v4half %69 +%71 = OpCompositeExtract %v4float %58 2 +%72 = OpFConvert %v4half %71 +%73 = OpCompositeExtract %v4float %58 3 +%74 = OpFConvert %v4half %73 +%75 = OpCompositeConstruct %mat4v4half %68 %70 %72 %74 +%64 = OpCopyObject %mat4v4float %58 +%65 = OpFConvert %v4half %56 +%60 = OpMatrixTimesVector %v4half %75 %65 +%66 = OpFConvert %v4float %60 +OpStore %_entryPointOutput_Color %66 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithPhi) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // bool b; + // float4x4 M; + // } + // + // PS_OUTPUT main() + // { + // PS_OUTPUT psout; + // float4 t; + // + // if (b) + // t = g_tTex1df4.Sample(g_sSamp, 0.1); + // else + // t = float4(0.0, 0.0, 0.0, 0.0); + // + // float4 t2 = t * 2.0; + // psout.Color = t2; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "b" +OpMemberName %cbuff 1 "M" +OpName %_ "" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 RowMajor +OpMemberDecorate %cbuff 1 Offset 16 +OpMemberDecorate %cbuff 1 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %72 RelaxedPrecision +OpDecorate %85 RelaxedPrecision +OpDecorate %74 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%uint = OpTypeInt 32 0 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %uint %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%bool = OpTypeBool +%uint_0 = OpConstant %uint 0 +%29 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_29 = OpTypePointer UniformConstant %29 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_29 UniformConstant +%33 = OpTypeSampler +%_ptr_UniformConstant_33 = OpTypePointer UniformConstant %33 +%g_sSamp = OpVariable %_ptr_UniformConstant_33 UniformConstant +%37 = OpTypeSampledImage %29 +%float_0_100000001 = OpConstant %float 0.100000001 +%float_0 = OpConstant %float 0 +%43 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%float_2 = OpConstant %float 2 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "b" +OpMemberName %cbuff 1 "M" +OpName %_ "" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 RowMajor +OpMemberDecorate %cbuff 1 Offset 16 +OpMemberDecorate %cbuff 1 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%uint = OpTypeInt 32 0 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %uint %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%bool = OpTypeBool +%uint_0 = OpConstant %uint 0 +%29 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_29 = OpTypePointer UniformConstant %29 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_29 UniformConstant +%33 = OpTypeSampler +%_ptr_UniformConstant_33 = OpTypePointer UniformConstant %33 +%g_sSamp = OpVariable %_ptr_UniformConstant_33 UniformConstant +%37 = OpTypeSampledImage %29 +%float_0_100000001 = OpConstant %float 0.100000001 +%float_0 = OpConstant %float 0 +%43 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%float_2 = OpConstant %float 2 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%63 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 +%64 = OpLoad %uint %63 +%65 = OpINotEqual %bool %64 %uint_0 +OpSelectionMerge %66 None +OpBranchConditional %65 %67 %68 +%67 = OpLabel +%69 = OpLoad %29 %g_tTex1df4 +%70 = OpLoad %33 %g_sSamp +%71 = OpSampledImage %37 %69 %70 +%72 = OpImageSampleImplicitLod %v4float %71 %float_0_100000001 +OpBranch %66 +%68 = OpLabel +OpBranch %66 +%66 = OpLabel +%85 = OpPhi %v4float %72 %67 %43 %68 +%74 = OpVectorTimesScalar %v4float %85 %float_2 +OpStore %_entryPointOutput_Color %74 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%63 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 +%64 = OpLoad %uint %63 +%65 = OpINotEqual %bool %64 %uint_0 +OpSelectionMerge %66 None +OpBranchConditional %65 %67 %68 +%67 = OpLabel +%69 = OpLoad %29 %g_tTex1df4 +%70 = OpLoad %33 %g_sSamp +%71 = OpSampledImage %37 %69 %70 +%72 = OpImageSampleImplicitLod %v4float %71 %float_0_100000001 +%88 = OpFConvert %v4half %72 +OpBranch %66 +%68 = OpLabel +%89 = OpFConvert %v4half %43 +OpBranch %66 +%66 = OpLabel +%85 = OpPhi %v4half %88 %67 %89 %68 +%90 = OpFConvert %half %float_2 +%74 = OpVectorTimesScalar %v4half %85 %90 +%91 = OpFConvert %v4float %74 +OpStore %_entryPointOutput_Color %91 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithLoopAndFConvert) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // The loop causes an FConvert to be generated at the bottom of the loop + // for the Phi. The FConvert is later processed and turned into a (dead) + // copy. + // + // clang-format off + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float4 a[10]; + // } + // + // PS_OUTPUT main() + // { + // PS_OUTPUT psout; + // float4 t = 0.0;; + // + // for (int i = 0; i<10; ++i) + // t = t + a[i]; + // + // float4 t2 = t / 10.0; + // psout.Color = t2; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "a" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %_arr_v4float_uint_10 ArrayStride 16 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %96 RelaxedPrecision +OpDecorate %81 RelaxedPrecision +OpDecorate %75 RelaxedPrecision +OpDecorate %76 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%float_0 = OpConstant %float 0 +%15 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%int_10 = OpConstant %int 10 +%bool = OpTypeBool +%uint = OpTypeInt 32 0 +%uint_10 = OpConstant %uint 10 +%_arr_v4float_uint_10 = OpTypeArray %v4float %uint_10 +%cbuff = OpTypeStruct %_arr_v4float_uint_10 +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%int_1 = OpConstant %int 1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%float_0_100000001 = OpConstant %float 0.100000001 +%94 = OpConstantComposite %v4float %float_0_100000001 %float_0_100000001 %float_0_100000001 %float_0_100000001 +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "a" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %_arr_v4float_uint_10 ArrayStride 16 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%float_0 = OpConstant %float 0 +%15 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%int_10 = OpConstant %int 10 +%bool = OpTypeBool +%uint = OpTypeInt 32 0 +%uint_10 = OpConstant %uint 10 +%_arr_v4float_uint_10 = OpTypeArray %v4float %uint_10 +%cbuff = OpTypeStruct %_arr_v4float_uint_10 +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%int_1 = OpConstant %int 1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%float_0_100000001 = OpConstant %float 0.100000001 +%94 = OpConstantComposite %v4float %float_0_100000001 %float_0_100000001 %float_0_100000001 %float_0_100000001 +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +OpBranch %65 +%65 = OpLabel +%96 = OpPhi %v4float %15 %5 %76 %71 +%95 = OpPhi %int %int_0 %5 %78 %71 +%70 = OpSLessThan %bool %95 %int_10 +OpLoopMerge %66 %71 None +OpBranchConditional %70 %71 %66 +%71 = OpLabel +%74 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %95 +%75 = OpLoad %v4float %74 +%76 = OpFAdd %v4float %96 %75 +%78 = OpIAdd %int %95 %int_1 +OpBranch %65 +%66 = OpLabel +%81 = OpFMul %v4float %96 %94 +OpStore %_entryPointOutput_Color %81 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%99 = OpFConvert %v4half %15 +OpBranch %65 +%65 = OpLabel +%96 = OpPhi %v4half %99 %5 %76 %71 +%95 = OpPhi %int %int_0 %5 %78 %71 +%70 = OpSLessThan %bool %95 %int_10 +OpLoopMerge %66 %71 None +OpBranchConditional %70 %71 %66 +%71 = OpLabel +%74 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %95 +%75 = OpLoad %v4float %74 +%103 = OpFConvert %v4half %75 +%76 = OpFAdd %v4half %96 %103 +%78 = OpIAdd %int %95 %int_1 +%100 = OpCopyObject %v4half %76 +OpBranch %65 +%66 = OpLabel +%101 = OpFConvert %v4half %94 +%81 = OpFMul %v4half %96 %101 +%102 = OpFConvert %v4float %81 +OpStore %_entryPointOutput_Color %102 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithExtracts) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // The extra converts in the func_after can be DCE'd. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // }; + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float c; + // } + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // float4 tx = g_tTex1df4.Sample(g_sSamp, i.Tex0); + // float4 t = float4(tx.y, tx.z, tx.x, tx.w) * c; + // psout.Color = t; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %65 RelaxedPrecision +OpDecorate %82 RelaxedPrecision +OpDecorate %84 RelaxedPrecision +OpDecorate %86 RelaxedPrecision +OpDecorate %88 RelaxedPrecision +OpDecorate %90 RelaxedPrecision +OpDecorate %91 RelaxedPrecision +OpDecorate %93 RelaxedPrecision +OpDecorate %94 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%17 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant +%21 = OpTypeSampler +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21 +%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant +%25 = OpTypeSampledImage %17 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%17 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant +%21 = OpTypeSampler +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21 +%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant +%25 = OpTypeSampledImage %17 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%65 = OpLoad %float %i_Tex0 +%77 = OpLoad %17 %g_tTex1df4 +%78 = OpLoad %21 %g_sSamp +%79 = OpSampledImage %25 %77 %78 +%82 = OpImageSampleImplicitLod %v4float %79 %65 +%84 = OpCompositeExtract %float %82 1 +%86 = OpCompositeExtract %float %82 2 +%88 = OpCompositeExtract %float %82 0 +%90 = OpCompositeExtract %float %82 3 +%91 = OpCompositeConstruct %v4float %84 %86 %88 %90 +%92 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%93 = OpLoad %float %92 +%94 = OpVectorTimesScalar %v4float %91 %93 +OpStore %_entryPointOutput_Color %94 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%65 = OpLoad %float %i_Tex0 +%77 = OpLoad %17 %g_tTex1df4 +%78 = OpLoad %21 %g_sSamp +%79 = OpSampledImage %25 %77 %78 +%82 = OpImageSampleImplicitLod %v4float %79 %65 +%97 = OpFConvert %v4half %82 +%84 = OpCompositeExtract %half %97 1 +%98 = OpFConvert %v4half %82 +%86 = OpCompositeExtract %half %98 2 +%99 = OpFConvert %v4half %82 +%88 = OpCompositeExtract %half %99 0 +%100 = OpFConvert %v4half %82 +%90 = OpCompositeExtract %half %100 3 +%91 = OpCompositeConstruct %v4half %84 %86 %88 %90 +%92 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%93 = OpLoad %float %92 +%101 = OpFConvert %half %93 +%94 = OpVectorTimesScalar %v4half %91 %101 +%102 = OpFConvert %v4float %94 +OpStore %_entryPointOutput_Color %102 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +} // namespace +} // namespace opt +} // namespace spvtools diff --git a/3rdparty/spirv-tools/test/opt/fold_test.cpp b/3rdparty/spirv-tools/test/opt/fold_test.cpp index b5998c722..cb81f1c4a 100644 --- a/3rdparty/spirv-tools/test/opt/fold_test.cpp +++ b/3rdparty/spirv-tools/test/opt/fold_test.cpp @@ -222,6 +222,7 @@ OpName %main "main" %v2float_3_2 = OpConstantComposite %v2float %float_3 %float_2 %v2float_4_4 = OpConstantComposite %v2float %float_4 %float_4 %v2float_2_0p5 = OpConstantComposite %v2float %float_2 %float_0p5 +%v2float_0p2_0p5 = OpConstantComposite %v2float %float_0p2 %float_0p5 %v2float_null = OpConstantNull %v2float %double_n1 = OpConstant %double -1 %105 = OpConstant %double 0 ; Need a def with an numerical id to define id maps. @@ -231,7 +232,9 @@ OpName %main "main" %double_2 = OpConstant %double 2 %double_3 = OpConstant %double 3 %double_4 = OpConstant %double 4 +%double_5 = OpConstant %double 5 %double_0p5 = OpConstant %double 0.5 +%double_0p2 = OpConstant %double 0.2 %v2double_0_0 = OpConstantComposite %v2double %double_0 %double_0 %v2double_2_2 = OpConstantComposite %v2double %double_2 %double_2 %v2double_2_3 = OpConstantComposite %v2double %double_2 %double_3 @@ -557,7 +560,155 @@ INSTANTIATE_TEST_SUITE_P(TestCase, IntegerInstructionFoldingTest, "%2 = OpSNegate %int %int_min\n" + "OpReturn\n" + "OpFunctionEnd", - 2, std::numeric_limits::min()) + 2, std::numeric_limits::min()), + // Test case 30: fold UMin 3 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UMin %uint_3 %uint_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 3), + // Test case 31: fold UMin 4 2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UMin %uint_4 %uint_2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2), + // Test case 32: fold SMin 3 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 UMin %int_3 %int_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 3), + // Test case 33: fold SMin 4 2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 SMin %int_4 %int_2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2), + // Test case 34: fold UMax 3 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UMax %uint_3 %uint_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 4), + // Test case 35: fold UMax 3 2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UMax %uint_3 %uint_2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 3), + // Test case 36: fold SMax 3 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 UMax %int_3 %int_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 4), + // Test case 37: fold SMax 3 2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 SMax %int_3 %int_2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 3), + // Test case 38: fold UClamp 2 3 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UClamp %uint_2 %uint_3 %uint_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 3), + // Test case 39: fold UClamp 2 0 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UClamp %uint_2 %uint_0 %uint_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2), + // Test case 40: fold UClamp 2 0 1 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %uint %1 UClamp %uint_2 %uint_0 %uint_1\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1), + // Test case 41: fold SClamp 2 3 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 SClamp %int_2 %int_3 %int_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 3), + // Test case 42: fold SClamp 2 0 4 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 SClamp %int_2 %int_0 %int_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2), + // Test case 43: fold SClamp 2 0 1 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %int %1 SClamp %int_2 %int_0 %int_1\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1), + // Test case 44: SClamp 1 2 x + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %int\n" + + "%2 = OpExtInst %int %1 SClamp %int_1 %int_2 %undef\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2), + // Test case 45: SClamp 2 x 1 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %int\n" + + "%2 = OpExtInst %int %1 SClamp %int_2 %undef %int_1\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1), + // Test case 44: UClamp 1 2 x + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %uint\n" + + "%2 = OpExtInst %uint %1 UClamp %uint_1 %uint_2 %undef\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2), + // Test case 45: UClamp 2 x 1 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %uint\n" + + "%2 = OpExtInst %uint %1 UClamp %uint_2 %undef %uint_1\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1) )); // clang-format on @@ -643,6 +794,58 @@ INSTANTIATE_TEST_SUITE_P(TestCase, IntVectorInstructionFoldingTest, )); // clang-format on +using FloatVectorInstructionFoldingTest = + ::testing::TestWithParam>>; + +TEST_P(FloatVectorInstructionFoldingTest, Case) { + const auto& tc = GetParam(); + + // Build module. + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, tc.test_body, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ASSERT_NE(nullptr, context); + + // Fold the instruction to test. + analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr(); + Instruction* inst = def_use_mgr->GetDef(tc.id_to_fold); + SpvOp original_opcode = inst->opcode(); + bool succeeded = context->get_instruction_folder().FoldInstruction(inst); + + // Make sure the instruction folded as expected. + EXPECT_EQ(succeeded, inst == nullptr || inst->opcode() != original_opcode); + if (succeeded && inst != nullptr) { + EXPECT_EQ(inst->opcode(), SpvOpCopyObject); + inst = def_use_mgr->GetDef(inst->GetSingleWordInOperand(0)); + std::vector opcodes = {SpvOpConstantComposite}; + EXPECT_THAT(opcodes, Contains(inst->opcode())); + analysis::ConstantManager* const_mrg = context->get_constant_mgr(); + const analysis::Constant* result = const_mrg->GetConstantFromInst(inst); + EXPECT_NE(result, nullptr); + if (result != nullptr) { + const std::vector& componenets = + result->AsVectorConstant()->GetComponents(); + EXPECT_EQ(componenets.size(), tc.expected_result.size()); + for (size_t i = 0; i < componenets.size(); i++) { + EXPECT_EQ(tc.expected_result[i], componenets[i]->GetFloat()); + } + } + } +} + +// clang-format off +INSTANTIATE_TEST_SUITE_P(TestCase, FloatVectorInstructionFoldingTest, +::testing::Values( + // Test case 0: FMix {2.0, 2.0}, {2.0, 3.0} {0.2,0.5} + InstructionFoldingCase>( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %v2float %1 FMix %v2float_2_3 %v2float_0_0 %v2float_0p2_0p5\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, {1.6f,1.5f}) +)); +// clang-format on using BooleanInstructionFoldingTest = ::testing::TestWithParam>; @@ -1473,7 +1676,81 @@ INSTANTIATE_TEST_SUITE_P(FloatConstantFoldingTest, FloatInstructionFoldingTest, "%2 = OpExtInst %float %1 FMix %float_1 %float_4 %float_0p2\n" + "OpReturn\n" + "OpFunctionEnd", - 2, 1.6f) + 2, 1.6f), + // Test case 21: FMin 1.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FMin %float_1 %float_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1.0f), + // Test case 22: FMin 4.0 0.2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FMin %float_4 %float_0p2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 0.2f), + // Test case 21: FMax 1.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FMax %float_1 %float_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 4.0f), + // Test case 22: FMax 1.0 0.2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FMax %float_1 %float_0p2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1.0f), + // Test case 23: FClamp 1.0 0.2 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FClamp %float_1 %float_0p2 %float_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1.0f), + // Test case 24: FClamp 0.2 2.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FClamp %float_0p2 %float_2 %float_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2.0f), + // Test case 25: FClamp 2049.0 2.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %float %1 FClamp %float_2049 %float_2 %float_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 4.0f), + // Test case 26: FClamp 1.0 2.0 x + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %float\n" + + "%2 = OpExtInst %float %1 FClamp %float_1 %float_2 %undef\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2.0), + // Test case 27: FClamp 1.0 x 0.5 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %float\n" + + "%2 = OpExtInst %float %1 FClamp %float_1 %undef %float_0p5\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 0.5) )); // clang-format on @@ -1616,7 +1893,81 @@ INSTANTIATE_TEST_SUITE_P(DoubleConstantFoldingTest, DoubleInstructionFoldingTest "%2 = OpFNegate %double %double_2\n" + "OpReturn\n" + "OpFunctionEnd", - 2, -2) + 2, -2), + // Test case 12: FMin 1.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FMin %double_1 %double_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1.0), + // Test case 13: FMin 4.0 0.2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FMin %double_4 %double_0p2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 0.2), + // Test case 14: FMax 1.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FMax %double_1 %double_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 4.0), + // Test case 15: FMax 1.0 0.2 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FMax %double_1 %double_0p2\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1.0), + // Test case 16: FClamp 1.0 0.2 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FClamp %double_1 %double_0p2 %double_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 1.0), + // Test case 17: FClamp 0.2 2.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FClamp %double_0p2 %double_2 %double_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2.0), + // Test case 18: FClamp 5.0 2.0 4.0 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%2 = OpExtInst %double %1 FClamp %double_5 %double_2 %double_4\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 4.0), + // Test case 19: FClamp 1.0 2.0 x + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %double\n" + + "%2 = OpExtInst %double %1 FClamp %double_1 %double_2 %undef\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 2.0), + // Test case 20: FClamp 1.0 x 0.5 + InstructionFoldingCase( + Header() + "%main = OpFunction %void None %void_func\n" + + "%main_lab = OpLabel\n" + + "%undef = OpUndef %double\n" + + "%2 = OpExtInst %double %1 FClamp %double_1 %undef %double_0p5\n" + + "OpReturn\n" + + "OpFunctionEnd", + 2, 0.5) )); // clang-format on diff --git a/3rdparty/spirv-tools/test/opt/optimizer_test.cpp b/3rdparty/spirv-tools/test/opt/optimizer_test.cpp index ee6e949b4..2f7666ab0 100644 --- a/3rdparty/spirv-tools/test/opt/optimizer_test.cpp +++ b/3rdparty/spirv-tools/test/opt/optimizer_test.cpp @@ -222,7 +222,7 @@ TEST(Optimizer, CanRegisterPassesFromFlags) { } TEST(Optimizer, VulkanToWebGPUSetsCorrectPasses) { - Optimizer opt(SPV_ENV_WEBGPU_0); + Optimizer opt(SPV_ENV_VULKAN_1_1); opt.RegisterVulkanToWebGPUPasses(); std::vector pass_names = opt.GetPassNames(); @@ -267,7 +267,7 @@ TEST_P(VulkanToWebGPUPassTest, Ran) { tools.Assemble(GetParam().input, &binary); } - Optimizer opt(SPV_ENV_WEBGPU_0); + Optimizer opt(SPV_ENV_VULKAN_1_1); opt.RegisterVulkanToWebGPUPasses(); std::vector optimized; @@ -622,7 +622,7 @@ INSTANTIATE_TEST_SUITE_P( "compact-ids"}})); TEST(Optimizer, WebGPUToVulkanSetsCorrectPasses) { - Optimizer opt(SPV_ENV_VULKAN_1_1); + Optimizer opt(SPV_ENV_WEBGPU_0); opt.RegisterWebGPUToVulkanPasses(); std::vector pass_names = opt.GetPassNames(); @@ -659,7 +659,7 @@ TEST_P(WebGPUToVulkanPassTest, Ran) { tools.Assemble(GetParam().input, &binary); } - Optimizer opt(SPV_ENV_VULKAN_1_1); + Optimizer opt(SPV_ENV_WEBGPU_0); opt.RegisterWebGPUToVulkanPasses(); std::vector optimized; diff --git a/3rdparty/spirv-tools/test/opt/relax_float_ops_test.cpp b/3rdparty/spirv-tools/test/opt/relax_float_ops_test.cpp new file mode 100644 index 000000000..14cde0b94 --- /dev/null +++ b/3rdparty/spirv-tools/test/opt/relax_float_ops_test.cpp @@ -0,0 +1,142 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Relax float ops tests + +#include +#include + +#include "test/opt/assembly_builder.h" +#include "test/opt/pass_fixture.h" +#include "test/opt/pass_utils.h" + +namespace spvtools { +namespace opt { +namespace { + +using RelaxFloatOpsTest = PassTest<::testing::Test>; + +TEST_F(RelaxFloatOpsTest, RelaxFloatOpsBasic) { + // All float result instructions in functions should be relaxed + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // float Tex1 : TEXCOORD1; + // }; + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // float4 txval10 = g_tTex1df4.Sample(g_sSamp, i.Tex0); + // float4 txval11 = g_tTex1df4.Sample(g_sSamp, i.Tex1); + // float4 t = txval10 + txval11; + // float4 t2 = t / 2.0; + // psout.Color = t2; + // return psout; + // } + // clang-format on + + const std::string defs0 = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %i_Tex0 "i.Tex0" +OpName %i_Tex1 "i.Tex1" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpDecorate %i_Tex0 Location 0 +OpDecorate %i_Tex1 Location 1 +OpDecorate %_entryPointOutput_Color Location 0 +)"; + + const std::string defs1 = + R"(%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%17 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant +%21 = OpTypeSampler +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21 +%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant +%25 = OpTypeSampledImage %17 +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%i_Tex1 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%float_0_5 = OpConstant %float 0.5 +%116 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +)"; + + const std::string relax_decos = + R"(OpDecorate %60 RelaxedPrecision +OpDecorate %63 RelaxedPrecision +OpDecorate %82 RelaxedPrecision +OpDecorate %88 RelaxedPrecision +OpDecorate %91 RelaxedPrecision +OpDecorate %94 RelaxedPrecision +)"; + + const std::string func_orig = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%60 = OpLoad %float %i_Tex0 +%63 = OpLoad %float %i_Tex1 +%77 = OpLoad %17 %g_tTex1df4 +%78 = OpLoad %21 %g_sSamp +%79 = OpSampledImage %25 %77 %78 +%82 = OpImageSampleImplicitLod %v4float %79 %60 +%83 = OpLoad %17 %g_tTex1df4 +%84 = OpLoad %21 %g_sSamp +%85 = OpSampledImage %25 %83 %84 +%88 = OpImageSampleImplicitLod %v4float %85 %63 +%91 = OpFAdd %v4float %82 %88 +%94 = OpFMul %v4float %91 %116 +OpStore %_entryPointOutput_Color %94 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck( + defs0 + defs1 + func_orig, defs0 + relax_decos + defs1 + func_orig, true, + true); +} + +} // namespace +} // namespace opt +} // namespace spvtools diff --git a/3rdparty/spirv-tools/tools/opt/opt.cpp b/3rdparty/spirv-tools/tools/opt/opt.cpp index b229c8413..667b0e310 100644 --- a/3rdparty/spirv-tools/tools/opt/opt.cpp +++ b/3rdparty/spirv-tools/tools/opt/opt.cpp @@ -80,13 +80,13 @@ std::string GetSizePasses() { } std::string GetVulkanToWebGPUPasses() { - spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0); + spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1); optimizer.RegisterVulkanToWebGPUPasses(); return GetListOfPassesAsString(optimizer); } std::string GetWebGPUToVulkanPasses() { - spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1); + spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0); optimizer.RegisterWebGPUToVulkanPasses(); return GetListOfPassesAsString(optimizer); } @@ -141,6 +141,16 @@ Options (in lexicographical order):)", and constant index access chains in entry point call tree functions.)"); printf(R"( + --convert-relaxed-to-half + Convert all RelaxedPrecision arithmetic operations to half + precision, inserting conversion operations where needed. + Run after function scope variable load and store elimination + for better results. Simplify-instructions, redundancy-elimination + and DCE should be run after this pass to eliminate excess + conversions. This conversion is useful when the target platform + does not support RelaxedPrecision or ignores it. This pass also + removes all RelaxedPrecision decorations.)"); + printf(R"( --copy-propagate-arrays Does propagation of memory references when an array is a copy of another. It will only propagate an array if the source is never @@ -393,6 +403,10 @@ Options (in lexicographical order):)", Looks for instructions in the same function that compute the same value, and deletes the redundant ones.)"); printf(R"( + --relax-float-ops + Decorate all float operations with RelaxedPrecision if not already + so decorated. This does not decorate types or variables.)"); + printf(R"( --relax-struct-store Allow store from one struct type to a different type with compatible layout and members. This option is forwarded to the @@ -778,7 +792,7 @@ OptStatus ParseFlags(int argc, const char** argv, return {OPT_STOP, 1}; } - optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0); + optimizer->SetTargetEnv(SPV_ENV_VULKAN_1_1); optimizer->RegisterVulkanToWebGPUPasses(); } else if (0 == strcmp(cur_arg, "--webgpu-to-vulkan")) { webgpu_to_vulkan_set = true; @@ -796,7 +810,7 @@ OptStatus ParseFlags(int argc, const char** argv, return {OPT_STOP, 1}; } - optimizer->SetTargetEnv(SPV_ENV_VULKAN_1_1); + optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0); optimizer->RegisterWebGPUToVulkanPasses(); } else if (0 == strcmp(cur_arg, "--validate-after-all")) { optimizer->SetValidateAfterAll(true);