Updated spirv-tools.

This commit is contained in:
Бранимир Караџић
2019-09-07 08:05:36 -07:00
parent 4284db32f9
commit f3a0654c9a
46 changed files with 4364 additions and 379 deletions

View File

@@ -89,6 +89,7 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/composite.cpp \
source/opt/const_folding_rules.cpp \
source/opt/constants.cpp \
source/opt/convert_to_half_pass.cpp \
source/opt/copy_prop_arrays.cpp \
source/opt/dead_branch_elim_pass.cpp \
source/opt/dead_insert_elim_pass.cpp \
@@ -153,6 +154,7 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/reduce_load_size.cpp \
source/opt/redundancy_elimination.cpp \
source/opt/register_pressure.cpp \
source/opt/relax_float_ops_pass.cpp \
source/opt/remove_duplicates_pass.cpp \
source/opt/replace_invalid_opc.cpp \
source/opt/scalar_analysis.cpp \

View File

@@ -479,6 +479,8 @@ static_library("spvtools_opt") {
"source/opt/const_folding_rules.h",
"source/opt/constants.cpp",
"source/opt/constants.h",
"source/opt/convert_to_half_pass.cpp",
"source/opt/convert_to_half_pass.h",
"source/opt/copy_prop_arrays.cpp",
"source/opt/copy_prop_arrays.h",
"source/opt/dead_branch_elim_pass.cpp",
@@ -611,6 +613,8 @@ static_library("spvtools_opt") {
"source/opt/reflect.h",
"source/opt/register_pressure.cpp",
"source/opt/register_pressure.h",
"source/opt/relax_float_ops_pass.cpp",
"source/opt/relax_float_ops_pass.h",
"source/opt/remove_duplicates_pass.cpp",
"source/opt/remove_duplicates_pass.h",
"source/opt/replace_invalid_opc.cpp",
@@ -766,7 +770,7 @@ static_library("spvtools_reduce") {
}
group("SPIRV-Tools") {
deps = [
public_deps = [
":spvtools",
":spvtools_link",
":spvtools_opt",
@@ -846,6 +850,8 @@ if (build_with_chromium) {
"//testing/gmock",
"//testing/gtest",
"//testing/gtest:gtest_main",
"//third_party/googletest:gmock",
"//third_party/googletest:gtest",
]
if (is_clang) {

View File

@@ -1,7 +1,25 @@
Revision history for SPIRV-Tools
v2019.5-dev 2019-08-08
- Start v2019.5-dev
- General:
- Optimizer
- Add descriptor array scalar replacement (#2742)
- Add pass to wrap OpKill in a function call (#2790)
- Fold FMix during constant folding. (#2818)
- Add pass to replace AMD shader ballot extension (#2811)
- Add pass to make Float32 operation relax precision (#2808)
- Add pass to make relax precision operation Float16 (#2808)
Fixes:
Instrument: Fix version 2 output record write for tess eval shaders. (#2782)
Instrument: Add support for Buffer Device Address extension (#2792)
Fix check for changed binary in API call. (#2798)
- Validator
Fixes:
Fix validation of constant matrices (#2794)
Update "remquor" validation
- Reduce
- Remove relaxed precision decorations (#2797)
Fixes:
v2019.4 2019-08-08
- General:

View File

@@ -1 +1 @@
"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-25-g65e362b7"
"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-37-g76261e2a"

View File

@@ -68,6 +68,11 @@ class Optimizer {
// The constructed instance will have an empty message consumer, which just
// ignores all messages from the library. Use SetMessageConsumer() to supply
// one if messages are of concern.
//
// For collections of passes that are meant to transform the input into
// another execution environment, then the source environment should be
// supplied. e.g. for VulkanToWebGPUPasses the environment should be
// SPV_ENV_VULKAN_1_1 not SPV_ENV_WEBGPU_0.
explicit Optimizer(spv_target_env env);
// Disables copy/move constructor/assignment operations.
@@ -674,6 +679,22 @@ Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor = 0);
// processed (see IsSSATargetVar for details).
Optimizer::PassToken CreateSSARewritePass();
// Create pass to convert relaxed precision instructions to half precision.
// This pass converts as many relaxed float32 arithmetic operations to half as
// possible. It converts any float32 operands to half if needed. It converts
// any resulting half precision values back to float32 as needed. No variables
// are changed. No image operations are changed.
//
// Best if run late since it will generate better code with unneeded function
// scope loads and stores and composite inserts and extracts removed. Also best
// if followed by instruction simplification, redundancy elimination and DCE.
Optimizer::PassToken CreateConvertRelaxedToHalfPass();
// Create relax float ops pass.
// This pass decorates all float32 result instructions with RelaxedPrecision
// if not already so decorated.
Optimizer::PassToken CreateRelaxFloatOpsPass();
// Create copy propagate arrays pass.
// This pass looks to copy propagate memory references for arrays. It looks
// for specific code patterns to recognize array copies.

View File

@@ -339,7 +339,9 @@ spvtools_pch(SPIRV_SOURCES pch_source)
add_library(${SPIRV_TOOLS} ${SPIRV_SOURCES})
spvtools_default_compile_options(${SPIRV_TOOLS})
target_include_directories(${SPIRV_TOOLS}
PUBLIC ${spirv-tools_SOURCE_DIR}/include
PUBLIC
$<BUILD_INTERFACE:${spirv-tools_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/include>
PRIVATE ${spirv-tools_BINARY_DIR}
PRIVATE ${SPIRV_HEADER_INCLUDE_DIR}
)
@@ -350,7 +352,9 @@ add_dependencies( ${SPIRV_TOOLS} core_tables enum_string_mapping extinst_tables
add_library(${SPIRV_TOOLS}-shared SHARED ${SPIRV_SOURCES})
spvtools_default_compile_options(${SPIRV_TOOLS}-shared)
target_include_directories(${SPIRV_TOOLS}-shared
PUBLIC ${spirv-tools_SOURCE_DIR}/include
PUBLIC
$<BUILD_INTERFACE:${spirv-tools_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/include>
PRIVATE ${spirv-tools_BINARY_DIR}
PRIVATE ${SPIRV_HEADER_INCLUDE_DIR}
)
@@ -372,10 +376,11 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
endif()
if(ENABLE_SPIRV_TOOLS_INSTALL)
install(TARGETS ${SPIRV_TOOLS} ${SPIRV_TOOLS}-shared
install(TARGETS ${SPIRV_TOOLS} ${SPIRV_TOOLS}-shared EXPORT ${SPIRV_TOOLS}Targets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT ${SPIRV_TOOLS}Targets DESTINATION lib/cmake)
endif(ENABLE_SPIRV_TOOLS_INSTALL)
if(MSVC)

View File

@@ -118,8 +118,12 @@ if(SPIRV_BUILD_FUZZER)
endif()
target_include_directories(SPIRV-Tools-fuzz
PUBLIC ${spirv-tools_SOURCE_DIR}/include
PUBLIC ${SPIRV_HEADER_INCLUDE_DIR}
PUBLIC
$<BUILD_INTERFACE:${spirv-tools_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/include>
PUBLIC
$<BUILD_INTERFACE:${SPIRV_HEADER_INCLUDE_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE ${spirv-tools_BINARY_DIR}
PRIVATE ${CMAKE_BINARY_DIR})
@@ -133,10 +137,11 @@ if(SPIRV_BUILD_FUZZER)
spvtools_check_symbol_exports(SPIRV-Tools-fuzz)
if(ENABLE_SPIRV_TOOLS_INSTALL)
install(TARGETS SPIRV-Tools-fuzz
install(TARGETS SPIRV-Tools-fuzz EXPORT SPIRV-Tools-fuzzTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT SPIRV-Tools-fuzzTargets DESTINATION lib/cmake)
endif(ENABLE_SPIRV_TOOLS_INSTALL)
endif(SPIRV_BUILD_FUZZER)

View File

@@ -36,4 +36,4 @@ struct DataDescriptorEquals {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_DATA_DESCRIPTOR_H_
#endif // SOURCE_FUZZ_DATA_DESCRIPTOR_H_

View File

@@ -128,4 +128,4 @@ class FactManager {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FACT_MANAGER_H_
#endif // SOURCE_FUZZ_FACT_MANAGER_H_

View File

@@ -58,4 +58,4 @@ class FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_H_
#endif // SOURCE_FUZZ_FUZZER_PASS_H_

View File

@@ -35,4 +35,4 @@ class FuzzerPassAddDeadBreaks : public FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_BREAKS_H_
#endif // SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_BREAKS_H_

View File

@@ -36,4 +36,4 @@ class FuzzerPassAddDeadContinues : public FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_CONTINUES_H_
#endif // SOURCE_FUZZ_FUZZER_PASS_ADD_DEAD_CONTINUES_H_

View File

@@ -43,4 +43,4 @@ class FuzzerPassAddUsefulConstructs : public FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_ADD_USEFUL_CONSTRUCTS_
#endif // SOURCE_FUZZ_FUZZER_PASS_ADD_USEFUL_CONSTRUCTS_

View File

@@ -104,4 +104,4 @@ class FuzzerPassObfuscateConstants : public FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_OBFUSCATE_CONSTANTS_
#endif // SOURCE_FUZZ_FUZZER_PASS_OBFUSCATE_CONSTANTS_

View File

@@ -36,4 +36,4 @@ class FuzzerPassPermuteBlocks : public FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_PERMUTE_BLOCKS_
#endif // SOURCE_FUZZ_FUZZER_PASS_PERMUTE_BLOCKS_

View File

@@ -36,4 +36,4 @@ class FuzzerPassSplitBlocks : public FuzzerPass {
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_FUZZER_PASS_SPLIT_BLOCKS_
#endif // SOURCE_FUZZ_FUZZER_PASS_SPLIT_BLOCKS_

View File

@@ -49,4 +49,4 @@ opt::Instruction* FindUniformVariable(
} // namespace fuzz
} // namespace spvtools
#endif // #define SOURCE_FUZZ_UNIFORM_BUFFER_ELEMENT_DESCRIPTOR_H_
#endif // SOURCE_FUZZ_UNIFORM_BUFFER_ELEMENT_DESCRIPTOR_H_

View File

@@ -17,8 +17,12 @@ add_library(SPIRV-Tools-link
spvtools_default_compile_options(SPIRV-Tools-link)
target_include_directories(SPIRV-Tools-link
PUBLIC ${spirv-tools_SOURCE_DIR}/include
PUBLIC ${SPIRV_HEADER_INCLUDE_DIR}
PUBLIC
$<BUILD_INTERFACE:${spirv-tools_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/include>
PUBLIC
$<BUILD_INTERFACE:${SPIRV_HEADER_INCLUDE_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE ${spirv-tools_BINARY_DIR}
)
# We need the IR functionnalities from the optimizer
@@ -29,8 +33,9 @@ set_property(TARGET SPIRV-Tools-link PROPERTY FOLDER "SPIRV-Tools libraries")
spvtools_check_symbol_exports(SPIRV-Tools-link)
if(ENABLE_SPIRV_TOOLS_INSTALL)
install(TARGETS SPIRV-Tools-link
install(TARGETS SPIRV-Tools-link EXPORT SPIRV-Tools-linkTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT SPIRV-Tools-linkTargets DESTINATION lib/cmake)
endif(ENABLE_SPIRV_TOOLS_INSTALL)

View File

@@ -27,6 +27,7 @@ set(SPIRV_TOOLS_OPT_SOURCES
composite.h
const_folding_rules.h
constants.h
convert_to_half_pass.h
copy_prop_arrays.h
dead_branch_elim_pass.h
dead_insert_elim_pass.h
@@ -93,6 +94,7 @@ set(SPIRV_TOOLS_OPT_SOURCES
redundancy_elimination.h
reflect.h
register_pressure.h
relax_float_ops_pass.h
remove_duplicates_pass.h
replace_invalid_opc.h
scalar_analysis.h
@@ -132,6 +134,7 @@ set(SPIRV_TOOLS_OPT_SOURCES
composite.cpp
const_folding_rules.cpp
constants.cpp
convert_to_half_pass.cpp
copy_prop_arrays.cpp
dead_branch_elim_pass.cpp
dead_insert_elim_pass.cpp
@@ -196,6 +199,7 @@ set(SPIRV_TOOLS_OPT_SOURCES
reduce_load_size.cpp
redundancy_elimination.cpp
register_pressure.cpp
relax_float_ops_pass.cpp
remove_duplicates_pass.cpp
replace_invalid_opc.cpp
scalar_analysis.cpp
@@ -231,8 +235,12 @@ add_library(SPIRV-Tools-opt ${SPIRV_TOOLS_OPT_SOURCES})
spvtools_default_compile_options(SPIRV-Tools-opt)
target_include_directories(SPIRV-Tools-opt
PUBLIC ${spirv-tools_SOURCE_DIR}/include
PUBLIC ${SPIRV_HEADER_INCLUDE_DIR}
PUBLIC
$<BUILD_INTERFACE:${spirv-tools_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/include>
PUBLIC
$<BUILD_INTERFACE:${SPIRV_HEADER_INCLUDE_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE ${spirv-tools_BINARY_DIR}
)
# We need the assembling and disassembling functionalities in the main library.
@@ -243,8 +251,9 @@ set_property(TARGET SPIRV-Tools-opt PROPERTY FOLDER "SPIRV-Tools libraries")
spvtools_check_symbol_exports(SPIRV-Tools-opt)
if(ENABLE_SPIRV_TOOLS_INSTALL)
install(TARGETS SPIRV-Tools-opt
install(TARGETS SPIRV-Tools-opt EXPORT SPIRV-Tools-optTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT SPIRV-Tools-optTargets DESTINATION lib/cmake)
endif(ENABLE_SPIRV_TOOLS_INSTALL)

File diff suppressed because it is too large Load Diff

View File

@@ -296,6 +296,51 @@ ConstantFoldingRule FoldFPUnaryOp(UnaryScalarFoldingRule scalar_rule) {
};
}
// Returns the result of folding the constants in |constants| according the
// |scalar_rule|. If |result_type| is a vector, then |scalar_rule| is applied
// per component.
const analysis::Constant* FoldFPBinaryOp(
BinaryScalarFoldingRule scalar_rule, uint32_t result_type_id,
const std::vector<const analysis::Constant*>& constants,
IRContext* context) {
analysis::ConstantManager* const_mgr = context->get_constant_mgr();
analysis::TypeManager* type_mgr = context->get_type_mgr();
const analysis::Type* result_type = type_mgr->GetType(result_type_id);
const analysis::Vector* vector_type = result_type->AsVector();
if (constants[0] == nullptr || constants[1] == nullptr) {
return nullptr;
}
if (vector_type != nullptr) {
std::vector<const analysis::Constant*> a_components;
std::vector<const analysis::Constant*> b_components;
std::vector<const analysis::Constant*> results_components;
a_components = constants[0]->GetVectorComponents(const_mgr);
b_components = constants[1]->GetVectorComponents(const_mgr);
// Fold each component of the vector.
for (uint32_t i = 0; i < a_components.size(); ++i) {
results_components.push_back(scalar_rule(vector_type->element_type(),
a_components[i], b_components[i],
const_mgr));
if (results_components[i] == nullptr) {
return nullptr;
}
}
// Build the constant object and return it.
std::vector<uint32_t> ids;
for (const analysis::Constant* member : results_components) {
ids.push_back(const_mgr->GetDefiningInstruction(member)->result_id());
}
return const_mgr->GetConstant(vector_type, ids);
} else {
return scalar_rule(result_type, constants[0], constants[1], const_mgr);
}
}
// Returns a |ConstantFoldingRule| that folds floating point scalars using
// |scalar_rule| and vectors of floating point by applying |scalar_rule| to the
// elements of the vector. The |ConstantFoldingRule| that is returned assumes
@@ -305,46 +350,14 @@ ConstantFoldingRule FoldFPBinaryOp(BinaryScalarFoldingRule scalar_rule) {
return [scalar_rule](IRContext* context, Instruction* inst,
const std::vector<const analysis::Constant*>& constants)
-> const analysis::Constant* {
analysis::ConstantManager* const_mgr = context->get_constant_mgr();
analysis::TypeManager* type_mgr = context->get_type_mgr();
const analysis::Type* result_type = type_mgr->GetType(inst->type_id());
const analysis::Vector* vector_type = result_type->AsVector();
if (!inst->IsFloatingPointFoldingAllowed()) {
return nullptr;
}
if (constants[0] == nullptr || constants[1] == nullptr) {
return nullptr;
}
if (vector_type != nullptr) {
std::vector<const analysis::Constant*> a_components;
std::vector<const analysis::Constant*> b_components;
std::vector<const analysis::Constant*> results_components;
a_components = constants[0]->GetVectorComponents(const_mgr);
b_components = constants[1]->GetVectorComponents(const_mgr);
// Fold each component of the vector.
for (uint32_t i = 0; i < a_components.size(); ++i) {
results_components.push_back(scalar_rule(vector_type->element_type(),
a_components[i],
b_components[i], const_mgr));
if (results_components[i] == nullptr) {
return nullptr;
}
}
// Build the constant object and return it.
std::vector<uint32_t> ids;
for (const analysis::Constant* member : results_components) {
ids.push_back(const_mgr->GetDefiningInstruction(member)->result_id());
}
return const_mgr->GetConstant(vector_type, ids);
} else {
return scalar_rule(result_type, constants[0], constants[1], const_mgr);
if (inst->opcode() == SpvOpExtInst) {
return FoldFPBinaryOp(scalar_rule, inst->type_id(),
{constants[1], constants[2]}, context);
}
return FoldFPBinaryOp(scalar_rule, inst->type_id(), constants, context);
};
}
@@ -435,29 +448,33 @@ UnaryScalarFoldingRule FoldQuantizeToF16Scalar() {
// This macro defines a |BinaryScalarFoldingRule| that applies |op|. The
// operator |op| must work for both float and double, and use syntax "f1 op f2".
#define FOLD_FPARITH_OP(op) \
[](const analysis::Type* result_type, const analysis::Constant* a, \
const analysis::Constant* b, \
analysis::ConstantManager* const_mgr_in_macro) \
-> const analysis::Constant* { \
assert(result_type != nullptr && a != nullptr && b != nullptr); \
assert(result_type == a->type() && result_type == b->type()); \
const analysis::Float* float_type_in_macro = result_type->AsFloat(); \
assert(float_type_in_macro != nullptr); \
if (float_type_in_macro->width() == 32) { \
float fa = a->GetFloat(); \
float fb = b->GetFloat(); \
utils::FloatProxy<float> result_in_macro(fa op fb); \
std::vector<uint32_t> words_in_macro = result_in_macro.GetWords(); \
return const_mgr_in_macro->GetConstant(result_type, words_in_macro); \
} else if (float_type_in_macro->width() == 64) { \
double fa = a->GetDouble(); \
double fb = b->GetDouble(); \
utils::FloatProxy<double> result_in_macro(fa op fb); \
std::vector<uint32_t> words_in_macro = result_in_macro.GetWords(); \
return const_mgr_in_macro->GetConstant(result_type, words_in_macro); \
} \
return nullptr; \
#define FOLD_FPARITH_OP(op) \
[](const analysis::Type* result_type_in_macro, const analysis::Constant* a, \
const analysis::Constant* b, \
analysis::ConstantManager* const_mgr_in_macro) \
-> const analysis::Constant* { \
assert(result_type_in_macro != nullptr && a != nullptr && b != nullptr); \
assert(result_type_in_macro == a->type() && \
result_type_in_macro == b->type()); \
const analysis::Float* float_type_in_macro = \
result_type_in_macro->AsFloat(); \
assert(float_type_in_macro != nullptr); \
if (float_type_in_macro->width() == 32) { \
float fa = a->GetFloat(); \
float fb = b->GetFloat(); \
utils::FloatProxy<float> result_in_macro(fa op fb); \
std::vector<uint32_t> words_in_macro = result_in_macro.GetWords(); \
return const_mgr_in_macro->GetConstant(result_type_in_macro, \
words_in_macro); \
} else if (float_type_in_macro->width() == 64) { \
double fa = a->GetDouble(); \
double fb = b->GetDouble(); \
utils::FloatProxy<double> result_in_macro(fa op fb); \
std::vector<uint32_t> words_in_macro = result_in_macro.GetWords(); \
return const_mgr_in_macro->GetConstant(result_type_in_macro, \
words_in_macro); \
} \
return nullptr; \
}
// Define the folding rule for conversion between floating point and integer
@@ -834,34 +851,225 @@ ConstantFoldingRule FoldFMix() {
}
const analysis::Constant* one;
if (constants[1]->type()->AsFloat()->width() == 32) {
one = const_mgr->GetConstant(constants[1]->type(),
bool is_vector = false;
const analysis::Type* result_type = constants[1]->type();
const analysis::Type* base_type = result_type;
if (base_type->AsVector()) {
is_vector = true;
base_type = base_type->AsVector()->element_type();
}
assert(base_type->AsFloat() != nullptr &&
"FMix is suppose to act on floats or vectors of floats.");
if (base_type->AsFloat()->width() == 32) {
one = const_mgr->GetConstant(base_type,
utils::FloatProxy<float>(1.0f).GetWords());
} else {
one = const_mgr->GetConstant(constants[1]->type(),
one = const_mgr->GetConstant(base_type,
utils::FloatProxy<double>(1.0).GetWords());
}
const analysis::Constant* temp1 =
FOLD_FPARITH_OP(-)(constants[1]->type(), one, constants[3], const_mgr);
if (is_vector) {
uint32_t one_id = const_mgr->GetDefiningInstruction(one)->result_id();
one =
const_mgr->GetConstant(result_type, std::vector<uint32_t>(4, one_id));
}
const analysis::Constant* temp1 = FoldFPBinaryOp(
FOLD_FPARITH_OP(-), inst->type_id(), {one, constants[3]}, context);
if (temp1 == nullptr) {
return nullptr;
}
const analysis::Constant* temp2 = FOLD_FPARITH_OP(*)(
constants[1]->type(), constants[1], temp1, const_mgr);
const analysis::Constant* temp2 = FoldFPBinaryOp(
FOLD_FPARITH_OP(*), inst->type_id(), {constants[1], temp1}, context);
if (temp2 == nullptr) {
return nullptr;
}
const analysis::Constant* temp3 = FOLD_FPARITH_OP(*)(
constants[2]->type(), constants[2], constants[3], const_mgr);
const analysis::Constant* temp3 =
FoldFPBinaryOp(FOLD_FPARITH_OP(*), inst->type_id(),
{constants[2], constants[3]}, context);
if (temp3 == nullptr) {
return nullptr;
}
return FOLD_FPARITH_OP(+)(temp2->type(), temp2, temp3, const_mgr);
return FoldFPBinaryOp(FOLD_FPARITH_OP(+), inst->type_id(), {temp2, temp3},
context);
};
}
template <class IntType>
IntType FoldIClamp(IntType x, IntType min_val, IntType max_val) {
if (x < min_val) {
x = min_val;
}
if (x > max_val) {
x = max_val;
}
return x;
}
const analysis::Constant* FoldMin(const analysis::Type* result_type,
const analysis::Constant* a,
const analysis::Constant* b,
analysis::ConstantManager*) {
if (const analysis::Integer* int_type = result_type->AsInteger()) {
if (int_type->width() == 32) {
if (int_type->IsSigned()) {
int32_t va = a->GetS32();
int32_t vb = b->GetS32();
return (va < vb ? a : b);
} else {
uint32_t va = a->GetU32();
uint32_t vb = b->GetU32();
return (va < vb ? a : b);
}
} else if (int_type->width() == 64) {
if (int_type->IsSigned()) {
int64_t va = a->GetS64();
int64_t vb = b->GetS64();
return (va < vb ? a : b);
} else {
uint64_t va = a->GetU64();
uint64_t vb = b->GetU64();
return (va < vb ? a : b);
}
}
} else if (const analysis::Float* float_type = result_type->AsFloat()) {
if (float_type->width() == 32) {
float va = a->GetFloat();
float vb = b->GetFloat();
return (va < vb ? a : b);
} else if (float_type->width() == 64) {
double va = a->GetDouble();
double vb = b->GetDouble();
return (va < vb ? a : b);
}
}
return nullptr;
}
const analysis::Constant* FoldMax(const analysis::Type* result_type,
const analysis::Constant* a,
const analysis::Constant* b,
analysis::ConstantManager*) {
if (const analysis::Integer* int_type = result_type->AsInteger()) {
if (int_type->width() == 32) {
if (int_type->IsSigned()) {
int32_t va = a->GetS32();
int32_t vb = b->GetS32();
return (va > vb ? a : b);
} else {
uint32_t va = a->GetU32();
uint32_t vb = b->GetU32();
return (va > vb ? a : b);
}
} else if (int_type->width() == 64) {
if (int_type->IsSigned()) {
int64_t va = a->GetS64();
int64_t vb = b->GetS64();
return (va > vb ? a : b);
} else {
uint64_t va = a->GetU64();
uint64_t vb = b->GetU64();
return (va > vb ? a : b);
}
}
} else if (const analysis::Float* float_type = result_type->AsFloat()) {
if (float_type->width() == 32) {
float va = a->GetFloat();
float vb = b->GetFloat();
return (va > vb ? a : b);
} else if (float_type->width() == 64) {
double va = a->GetDouble();
double vb = b->GetDouble();
return (va > vb ? a : b);
}
}
return nullptr;
}
// Fold an clamp instruction when all three operands are constant.
const analysis::Constant* FoldClamp1(
IRContext* context, Instruction* inst,
const std::vector<const analysis::Constant*>& constants) {
assert(inst->opcode() == SpvOpExtInst &&
"Expecting an extended instruction.");
assert(inst->GetSingleWordInOperand(0) ==
context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
"Expecting a GLSLstd450 extended instruction.");
// Make sure all Clamp operands are constants.
for (uint32_t i = 1; i < 3; i++) {
if (constants[i] == nullptr) {
return nullptr;
}
}
const analysis::Constant* temp = FoldFPBinaryOp(
FoldMax, inst->type_id(), {constants[1], constants[2]}, context);
if (temp == nullptr) {
return nullptr;
}
return FoldFPBinaryOp(FoldMin, inst->type_id(), {temp, constants[3]},
context);
}
// Fold a clamp instruction when |x >= min_val|.
const analysis::Constant* FoldClamp2(
IRContext* context, Instruction* inst,
const std::vector<const analysis::Constant*>& constants) {
assert(inst->opcode() == SpvOpExtInst &&
"Expecting an extended instruction.");
assert(inst->GetSingleWordInOperand(0) ==
context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
"Expecting a GLSLstd450 extended instruction.");
const analysis::Constant* x = constants[1];
const analysis::Constant* min_val = constants[2];
if (x == nullptr || min_val == nullptr) {
return nullptr;
}
const analysis::Constant* temp =
FoldFPBinaryOp(FoldMax, inst->type_id(), {x, min_val}, context);
if (temp == min_val) {
// We can assume that |min_val| is less than |max_val|. Therefore, if the
// result of the max operation is |min_val|, we know the result of the min
// operation, even if |max_val| is not a constant.
return min_val;
}
return nullptr;
}
// Fold a clamp instruction when |x >= max_val|.
const analysis::Constant* FoldClamp3(
IRContext* context, Instruction* inst,
const std::vector<const analysis::Constant*>& constants) {
assert(inst->opcode() == SpvOpExtInst &&
"Expecting an extended instruction.");
assert(inst->GetSingleWordInOperand(0) ==
context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
"Expecting a GLSLstd450 extended instruction.");
const analysis::Constant* x = constants[1];
const analysis::Constant* max_val = constants[3];
if (x == nullptr || max_val == nullptr) {
return nullptr;
}
const analysis::Constant* temp =
FoldFPBinaryOp(FoldMin, inst->type_id(), {x, max_val}, context);
if (temp == max_val) {
// We can assume that |min_val| is less than |max_val|. Therefore, if the
// result of the max operation is |min_val|, we know the result of the min
// operation, even if |max_val| is not a constant.
return max_val;
}
return nullptr;
}
} // namespace
void ConstantFoldingRules::AddFoldingRules() {
@@ -937,6 +1145,36 @@ void ConstantFoldingRules::AddFoldingRules() {
feature_manager->GetExtInstImportId_GLSLstd450();
if (ext_inst_glslstd450_id != 0) {
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMix}].push_back(FoldFMix());
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SMin}].push_back(
FoldFPBinaryOp(FoldMin));
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UMin}].push_back(
FoldFPBinaryOp(FoldMin));
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMin}].push_back(
FoldFPBinaryOp(FoldMin));
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SMax}].push_back(
FoldFPBinaryOp(FoldMax));
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UMax}].push_back(
FoldFPBinaryOp(FoldMax));
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMax}].push_back(
FoldFPBinaryOp(FoldMax));
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UClamp}].push_back(
FoldClamp1);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UClamp}].push_back(
FoldClamp2);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450UClamp}].push_back(
FoldClamp3);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SClamp}].push_back(
FoldClamp1);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SClamp}].push_back(
FoldClamp2);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450SClamp}].push_back(
FoldClamp3);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FClamp}].push_back(
FoldClamp1);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FClamp}].push_back(
FoldClamp2);
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FClamp}].push_back(
FoldClamp3);
}
}
} // namespace opt

View File

@@ -389,6 +389,13 @@ const Constant* ConstantManager::GetConstant(
return cst ? RegisterConstant(std::move(cst)) : nullptr;
}
uint32_t ConstantManager::GetFloatConst(float val) {
Type* float_type = context()->get_type_mgr()->GetFloatType();
utils::FloatProxy<float> v(val);
const Constant* c = GetConstant(float_type, v.GetWords());
return GetDefiningInstruction(c)->result_id();
}
std::vector<const analysis::Constant*> Constant::GetVectorComponents(
analysis::ConstantManager* const_mgr) const {
std::vector<const analysis::Constant*> components;

View File

@@ -626,6 +626,9 @@ class ConstantManager {
}
}
// Returns the id of a 32-bit floating point constant with value |val|.
uint32_t GetFloatConst(float val);
private:
// Creates a Constant instance with the given type and a vector of constant
// defining words. Returns a unique pointer to the created Constant instance

View File

@@ -0,0 +1,460 @@
// Copyright (c) 2019 The Khronos Group Inc.
// Copyright (c) 2019 Valve Corporation
// Copyright (c) 2019 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "convert_to_half_pass.h"
#include "source/opt/ir_builder.h"
namespace {
// Indices of operands in SPIR-V instructions
static const int kImageSampleDrefIdInIdx = 2;
} // anonymous namespace
namespace spvtools {
namespace opt {
bool ConvertToHalfPass::IsArithmetic(Instruction* inst) {
return target_ops_core_.count(inst->opcode()) != 0 ||
(inst->opcode() == SpvOpExtInst &&
inst->GetSingleWordInOperand(0) ==
context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0);
}
bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
uint32_t ty_id = inst->type_id();
if (ty_id == 0) return false;
return Pass::IsFloat(ty_id, width);
}
bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
uint32_t r_id = inst->result_id();
for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
if (r_inst->opcode() == SpvOpDecorate &&
r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision)
return true;
return false;
}
analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
analysis::Float float_ty(width);
return context()->get_type_mgr()->GetRegisteredType(&float_ty);
}
analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len,
uint32_t width) {
analysis::Type* reg_float_ty = FloatScalarType(width);
analysis::Vector vec_ty(reg_float_ty, v_len);
return context()->get_type_mgr()->GetRegisteredType(&vec_ty);
}
analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt,
uint32_t vty_id,
uint32_t width) {
Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
uint32_t v_len = vty_inst->GetSingleWordInOperand(1);
analysis::Type* reg_vec_ty = FloatVectorType(v_len, width);
analysis::Matrix mat_ty(reg_vec_ty, v_cnt);
return context()->get_type_mgr()->GetRegisteredType(&mat_ty);
}
uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
analysis::Type* reg_equiv_ty;
Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id);
if (ty_inst->opcode() == SpvOpTypeMatrix)
reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1),
ty_inst->GetSingleWordInOperand(0), width);
else if (ty_inst->opcode() == SpvOpTypeVector)
reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width);
else // SpvOpTypeFloat
reg_equiv_ty = FloatScalarType(width);
return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty);
}
void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
InstructionBuilder* builder) {
Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
uint32_t ty_id = val_inst->type_id();
uint32_t nty_id = EquivFloatTypeId(ty_id, width);
if (nty_id == ty_id) return;
Instruction* cvt_inst;
if (val_inst->opcode() == SpvOpUndef)
cvt_inst = builder->AddNullaryOp(nty_id, SpvOpUndef);
else
cvt_inst = builder->AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
*val_idp = cvt_inst->result_id();
}
bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) {
if (inst->opcode() != SpvOpFConvert) return false;
uint32_t mty_id = inst->type_id();
Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id);
if (mty_inst->opcode() != SpvOpTypeMatrix) return false;
uint32_t vty_id = mty_inst->GetSingleWordInOperand(0);
uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1);
Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
uint32_t cty_id = vty_inst->GetSingleWordInOperand(0);
Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id);
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
// Convert each component vector, combine them with OpCompositeConstruct
// and replace original instruction.
uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16;
uint32_t orig_mat_id = inst->GetSingleWordInOperand(0);
uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width);
std::vector<Operand> opnds = {};
for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) {
Instruction* ext_inst = builder.AddIdLiteralOp(
orig_vty_id, SpvOpCompositeExtract, orig_mat_id, vidx);
Instruction* cvt_inst =
builder.AddUnaryOp(vty_id, SpvOpFConvert, ext_inst->result_id());
opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}});
}
uint32_t mat_id = TakeNextId();
std::unique_ptr<Instruction> mat_inst(new Instruction(
context(), SpvOpCompositeConstruct, mty_id, mat_id, opnds));
(void)builder.AddInstruction(std::move(mat_inst));
context()->ReplaceAllUsesWith(inst->result_id(), mat_id);
// Turn original instruction into copy so it is valid.
inst->SetOpcode(SpvOpCopyObject);
inst->SetResultType(EquivFloatTypeId(mty_id, orig_width));
get_def_use_mgr()->AnalyzeInstUse(inst);
return true;
}
void ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) {
context()->get_decoration_mgr()->RemoveDecorationsFrom(
id, [](const Instruction& dec) {
if (dec.opcode() == SpvOpDecorate &&
dec.GetSingleWordInOperand(1u) == SpvDecorationRelaxedPrecision)
return true;
else
return false;
});
}
bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
bool modified = false;
// Convert all float32 based operands to float16 equivalent and change
// instruction type to float16 equivalent.
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
if (!IsFloat(op_inst, 32)) return;
GenConvert(idp, 16, &builder);
modified = true;
});
if (IsFloat(inst, 32)) {
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
modified = true;
}
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
return modified;
}
bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
// Skip if not float32
if (!IsFloat(inst, 32)) return false;
// Skip if no relaxed operands.
bool relaxed_found = false;
uint32_t ocnt = 0;
inst->ForEachInId([&ocnt, &relaxed_found, this](uint32_t* idp) {
if (ocnt % 2 == 0) {
Instruction* val_inst = get_def_use_mgr()->GetDef(*idp);
if (IsRelaxed(val_inst)) relaxed_found = true;
}
++ocnt;
});
if (!relaxed_found) return false;
// Add float16 converts of any float32 operands and change type
// of phi to float16 equivalent. Operand converts need to be added to
// preceeding blocks.
ocnt = 0;
uint32_t* prev_idp;
inst->ForEachInId([&ocnt, &prev_idp, this](uint32_t* idp) {
if (ocnt % 2 == 0) {
prev_idp = idp;
} else {
Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp);
if (IsFloat(val_inst, 32)) {
BasicBlock* bp = context()->get_instr_block(*idp);
auto insert_before = bp->tail();
if (insert_before != bp->begin()) {
--insert_before;
if (insert_before->opcode() != SpvOpSelectionMerge &&
insert_before->opcode() != SpvOpLoopMerge)
++insert_before;
}
InstructionBuilder builder(context(), &*insert_before,
IRContext::kAnalysisDefUse |
IRContext::kAnalysisInstrToBlockMapping);
GenConvert(prev_idp, 16, &builder);
}
}
++ocnt;
});
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
get_def_use_mgr()->AnalyzeInstUse(inst);
return true;
}
bool ConvertToHalfPass::ProcessExtract(Instruction* inst) {
bool modified = false;
uint32_t comp_id = inst->GetSingleWordInOperand(0);
Instruction* comp_inst = get_def_use_mgr()->GetDef(comp_id);
// If extract is relaxed float32 based type and the composite is a relaxed
// float32 based type, convert it to float16 equivalent. This is slightly
// aggressive and pushes any likely conversion to apply to the whole
// composite rather than apply to each extracted component later. This
// can be a win if the platform can convert the entire composite in the same
// time as one component. It risks converting components that may not be
// used, although empirical data on a large set of real-world shaders seems
// to suggest this is not common and the composite convert is the best choice.
if (IsFloat(inst, 32) && IsRelaxed(inst) && IsFloat(comp_inst, 32) &&
IsRelaxed(comp_inst)) {
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
GenConvert(&comp_id, 16, &builder);
inst->SetInOperand(0, {comp_id});
comp_inst = get_def_use_mgr()->GetDef(comp_id);
modified = true;
}
// If the composite is a float16 based type, make sure the type of the
// extract agrees.
if (IsFloat(comp_inst, 16) && !IsFloat(inst, 16)) {
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
modified = true;
}
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
return modified;
}
bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
// If float32 and relaxed, change to float16 convert
if (IsFloat(inst, 32) && IsRelaxed(inst)) {
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
get_def_use_mgr()->AnalyzeInstUse(inst);
}
// If operand and result types are the same, replace result with operand
// and change convert to copy to keep validator happy; DCE will clean it up
uint32_t val_id = inst->GetSingleWordInOperand(0);
Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
if (inst->type_id() == val_inst->type_id()) {
context()->ReplaceAllUsesWith(inst->result_id(), val_id);
inst->SetOpcode(SpvOpCopyObject);
}
return true; // modified
}
bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
bool modified = false;
// If image reference, only need to convert dref args back to float32
if (dref_image_ops_.count(inst->opcode()) != 0) {
uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
Instruction* dref_inst = get_def_use_mgr()->GetDef(dref_id);
if (IsFloat(dref_inst, 16) && IsRelaxed(dref_inst)) {
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
GenConvert(&dref_id, 32, &builder);
inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
get_def_use_mgr()->AnalyzeInstUse(inst);
modified = true;
}
}
return modified;
}
bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
bool modified = false;
// If non-relaxed instruction has changed operands, need to convert
// them back to float32
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
if (!IsFloat(op_inst, 16)) return;
if (!IsRelaxed(op_inst)) return;
uint32_t old_id = *idp;
GenConvert(idp, 32, &builder);
if (*idp != old_id) modified = true;
});
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
return modified;
}
bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
bool modified = false;
// Remember id for later deletion of RelaxedPrecision decoration
bool inst_relaxed = IsRelaxed(inst);
if (inst_relaxed) relaxed_ids_.push_back(inst->result_id());
if (IsArithmetic(inst) && inst_relaxed)
modified = GenHalfArith(inst);
else if (inst->opcode() == SpvOpPhi)
modified = ProcessPhi(inst);
else if (inst->opcode() == SpvOpCompositeExtract)
modified = ProcessExtract(inst);
else if (inst->opcode() == SpvOpFConvert)
modified = ProcessConvert(inst);
else if (image_ops_.count(inst->opcode()) != 0)
modified = ProcessImageRef(inst);
else
modified = ProcessDefault(inst);
return modified;
}
bool ConvertToHalfPass::ProcessFunction(Function* func) {
bool modified = false;
cfg()->ForEachBlockInReversePostOrder(
func->entry().get(), [&modified, this](BasicBlock* bb) {
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
modified |= GenHalfCode(&*ii);
});
cfg()->ForEachBlockInReversePostOrder(
func->entry().get(), [&modified, this](BasicBlock* bb) {
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
modified |= MatConvertCleanup(&*ii);
});
return modified;
}
Pass::Status ConvertToHalfPass::ProcessImpl() {
Pass::ProcessFunction pfn = [this](Function* fp) {
return ProcessFunction(fp);
};
bool modified = context()->ProcessEntryPointCallTree(pfn);
// If modified, make sure module has Float16 capability
if (modified) context()->AddCapability(SpvCapabilityFloat16);
// Remove all RelaxedPrecision decorations from instructions and globals
for (auto c_id : relaxed_ids_) RemoveRelaxedDecoration(c_id);
for (auto& val : get_module()->types_values()) {
uint32_t v_id = val.result_id();
if (v_id != 0) RemoveRelaxedDecoration(v_id);
}
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}
Pass::Status ConvertToHalfPass::Process() {
Initialize();
return ProcessImpl();
}
void ConvertToHalfPass::Initialize() {
target_ops_core_ = {
SpvOpVectorExtractDynamic,
SpvOpVectorInsertDynamic,
SpvOpVectorShuffle,
SpvOpCompositeConstruct,
SpvOpCompositeInsert,
SpvOpCopyObject,
SpvOpTranspose,
SpvOpConvertSToF,
SpvOpConvertUToF,
// SpvOpFConvert,
// SpvOpQuantizeToF16,
SpvOpFNegate,
SpvOpFAdd,
SpvOpFSub,
SpvOpFMul,
SpvOpFDiv,
SpvOpFMod,
SpvOpVectorTimesScalar,
SpvOpMatrixTimesScalar,
SpvOpVectorTimesMatrix,
SpvOpMatrixTimesVector,
SpvOpMatrixTimesMatrix,
SpvOpOuterProduct,
SpvOpDot,
SpvOpSelect,
SpvOpFOrdEqual,
SpvOpFUnordEqual,
SpvOpFOrdNotEqual,
SpvOpFUnordNotEqual,
SpvOpFOrdLessThan,
SpvOpFUnordLessThan,
SpvOpFOrdGreaterThan,
SpvOpFUnordGreaterThan,
SpvOpFOrdLessThanEqual,
SpvOpFUnordLessThanEqual,
SpvOpFOrdGreaterThanEqual,
SpvOpFUnordGreaterThanEqual,
};
target_ops_450_ = {
GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs,
GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract,
GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos,
GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan,
GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh,
GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow,
GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2,
GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant,
GLSLstd450MatrixInverse,
// TODO(greg-lunarg): GLSLstd450ModfStruct,
GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix,
GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma,
// TODO(greg-lunarg): GLSLstd450FrexpStruct,
GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross,
GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect,
GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp};
image_ops_ = {SpvOpImageSampleImplicitLod,
SpvOpImageSampleExplicitLod,
SpvOpImageSampleDrefImplicitLod,
SpvOpImageSampleDrefExplicitLod,
SpvOpImageSampleProjImplicitLod,
SpvOpImageSampleProjExplicitLod,
SpvOpImageSampleProjDrefImplicitLod,
SpvOpImageSampleProjDrefExplicitLod,
SpvOpImageFetch,
SpvOpImageGather,
SpvOpImageDrefGather,
SpvOpImageRead,
SpvOpImageSparseSampleImplicitLod,
SpvOpImageSparseSampleExplicitLod,
SpvOpImageSparseSampleDrefImplicitLod,
SpvOpImageSparseSampleDrefExplicitLod,
SpvOpImageSparseSampleProjImplicitLod,
SpvOpImageSparseSampleProjExplicitLod,
SpvOpImageSparseSampleProjDrefImplicitLod,
SpvOpImageSparseSampleProjDrefExplicitLod,
SpvOpImageSparseFetch,
SpvOpImageSparseGather,
SpvOpImageSparseDrefGather,
SpvOpImageSparseTexelsResident,
SpvOpImageSparseRead};
dref_image_ops_ = {
SpvOpImageSampleDrefImplicitLod,
SpvOpImageSampleDrefExplicitLod,
SpvOpImageSampleProjDrefImplicitLod,
SpvOpImageSampleProjDrefExplicitLod,
SpvOpImageDrefGather,
SpvOpImageSparseSampleDrefImplicitLod,
SpvOpImageSparseSampleDrefExplicitLod,
SpvOpImageSparseSampleProjDrefImplicitLod,
SpvOpImageSparseSampleProjDrefExplicitLod,
SpvOpImageSparseDrefGather,
};
relaxed_ids_.clear();
}
} // namespace opt
} // namespace spvtools

View File

@@ -0,0 +1,134 @@
// Copyright (c) 2019 Valve Corporation
// Copyright (c) 2019 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_
#define LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_
#include "source/opt/ir_builder.h"
#include "source/opt/pass.h"
namespace spvtools {
namespace opt {
class ConvertToHalfPass : public Pass {
public:
ConvertToHalfPass() : Pass() {}
~ConvertToHalfPass() override = default;
IRContext::Analysis GetPreservedAnalyses() override {
return IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping;
}
// See optimizer.hpp for pass user documentation.
Status Process() override;
const char* name() const override { return "convert-to-half-pass"; }
private:
// Return true if |inst| is an arithmetic op that can be of type float16
bool IsArithmetic(Instruction* inst);
// Return true if |inst| returns scalar, vector or matrix type with base
// float and |width|
bool IsFloat(Instruction* inst, uint32_t width);
// Return true if |inst| is decorated with RelaxedPrecision
bool IsRelaxed(Instruction* inst);
// Return type id for float with |width|
analysis::Type* FloatScalarType(uint32_t width);
// Return type id for vector of length |vlen| of float of |width|
analysis::Type* FloatVectorType(uint32_t v_len, uint32_t width);
// Return type id for matrix of |v_cnt| vectors of length identical to
// |vty_id| of float of |width|
analysis::Type* FloatMatrixType(uint32_t v_cnt, uint32_t vty_id,
uint32_t width);
// Return equivalent to float type |ty_id| with |width|
uint32_t EquivFloatTypeId(uint32_t ty_id, uint32_t width);
// Append instructions to builder to convert value |*val_idp| to type
// |ty_id| but with |width|. Set |*val_idp| to the new id.
void GenConvert(uint32_t* val_idp, uint32_t width,
InstructionBuilder* builder);
// Remove RelaxedPrecision decoration of |id|.
void RemoveRelaxedDecoration(uint32_t id);
// If |inst| is an arithmetic, phi, extract or convert instruction of float32
// base type and decorated with RelaxedPrecision, change it to the equivalent
// float16 based type instruction. Specifically, insert instructions to
// convert all operands to float16 (if needed) and change its type to the
// equivalent float16 type. Otherwise, insert instructions to convert its
// operands back to their original types, if needed.
bool GenHalfCode(Instruction* inst);
// Gen code for relaxed arithmetic |inst|
bool GenHalfArith(Instruction* inst);
// Gen code for relaxed phi |inst|
bool ProcessPhi(Instruction* inst);
// Gen code for relaxed extract |inst|
bool ProcessExtract(Instruction* inst);
// Gen code for relaxed convert |inst|
bool ProcessConvert(Instruction* inst);
// Gen code for image reference |inst|
bool ProcessImageRef(Instruction* inst);
// Process default non-relaxed |inst|
bool ProcessDefault(Instruction* inst);
// If |inst| is an FConvert of a matrix type, decompose it to a series
// of vector extracts, converts and inserts into an Undef. These are
// generated by GenHalfCode because they are easier to manipulate, but are
// invalid so we need to clean them up.
bool MatConvertCleanup(Instruction* inst);
// Call GenHalfCode on every instruction in |func|.
// If code is generated for an instruction, replace the instruction
// with the new instructions that are generated.
bool ProcessFunction(Function* func);
Pass::Status ProcessImpl();
// Initialize state for converting to half
void Initialize();
// Set of core operations to be processed
std::unordered_set<uint32_t> target_ops_core_;
// Set of 450 extension operations to be processed
std::unordered_set<uint32_t> target_ops_450_;
// Set of sample operations
std::unordered_set<uint32_t> image_ops_;
// Set of dref sample operations
std::unordered_set<uint32_t> dref_image_ops_;
// Ids of all converted instructions
std::vector<uint32_t> relaxed_ids_;
};
} // namespace opt
} // namespace spvtools
#endif // LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_

View File

@@ -57,6 +57,9 @@ class FeatureManager {
// Add the extension |ext| to the feature manager.
void AddExtension(Instruction* ext);
// Analyzes |module| and records imported external instruction sets.
void AddExtInstImportIds(Module* module);
private:
// Analyzes |module| and records enabled extensions.
void AddExtensions(Module* module);
@@ -64,9 +67,6 @@ class FeatureManager {
// Analyzes |module| and records enabled capabilities.
void AddCapabilities(Module* module);
// Analyzes |module| and records imported external instruction sets.
void AddExtInstImportIds(Module* module);
// Auxiliary object for querying SPIR-V grammar facts.
const AssemblyGrammar& grammar_;

View File

@@ -296,7 +296,7 @@ void InstBindlessCheckPass::GenCheckCode(
// reference.
if (new_ref_id != 0) {
Instruction* phi_inst = builder.AddPhi(
ref_type_id, {new_ref_id, valid_blk_id, builder.GetNullId(ref_type_id),
ref_type_id, {new_ref_id, valid_blk_id, GetNullId(ref_type_id),
last_invalid_blk_id});
context()->ReplaceAllUsesWith(ref->ref_inst->result_id(),
phi_inst->result_id());

View File

@@ -108,8 +108,8 @@ void InstBuffAddrCheckPass::GenCheckCode(
// reference.
if (new_ref_id != 0) {
Instruction* phi_inst = builder.AddPhi(
ref_type_id, {new_ref_id, valid_blk_id, builder.GetNullId(ref_type_id),
invalid_blk_id});
ref_type_id,
{new_ref_id, valid_blk_id, GetNullId(ref_type_id), invalid_blk_id});
context()->ReplaceAllUsesWith(ref_inst->result_id(), phi_inst->result_id());
}
new_blocks->push_back(std::move(new_blk_ptr));

View File

@@ -109,13 +109,13 @@ class InstructionBuilder {
return AddInstruction(std::move(newQuadOp));
}
Instruction* AddIdLiteralOp(uint32_t type_id, SpvOp opcode, uint32_t operand1,
uint32_t operand2) {
Instruction* AddIdLiteralOp(uint32_t type_id, SpvOp opcode, uint32_t id,
uint32_t uliteral) {
// TODO(1841): Handle id overflow.
std::unique_ptr<Instruction> newBinOp(new Instruction(
GetContext(), opcode, type_id, GetContext()->TakeNextId(),
{{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {operand1}},
{spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, {operand2}}}));
{{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {id}},
{spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, {uliteral}}}));
return AddInstruction(std::move(newBinOp));
}
@@ -358,16 +358,6 @@ class InstructionBuilder {
return uint_inst->result_id();
}
uint32_t GetNullId(uint32_t type_id) {
analysis::TypeManager* type_mgr = GetContext()->get_type_mgr();
analysis::ConstantManager* const_mgr = GetContext()->get_constant_mgr();
const analysis::Type* type = type_mgr->GetType(type_id);
const analysis::Constant* null_const = const_mgr->GetConstant(type, {});
Instruction* null_inst =
const_mgr->GetDefiningInstruction(null_const, type_id);
return null_inst->result_id();
}
// Adds either a signed or unsigned 32 bit integer constant to the binary
// depedning on the |sign|. If |sign| is true then the value is added as a
// signed constant otherwise as an unsigned constant. If |sign| is false the
@@ -502,6 +492,27 @@ class InstructionBuilder {
return AddInstruction(std::move(new_inst));
}
Instruction* AddNaryExtendedInstruction(
uint32_t result_type, uint32_t set, uint32_t instruction,
const std::vector<uint32_t>& ext_operands) {
std::vector<Operand> operands;
operands.push_back({SPV_OPERAND_TYPE_ID, {set}});
operands.push_back(
{SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER, {instruction}});
for (uint32_t id : ext_operands) {
operands.push_back({SPV_OPERAND_TYPE_ID, {id}});
}
uint32_t result_id = GetContext()->TakeNextId();
if (result_id == 0) {
return nullptr;
}
std::unique_ptr<Instruction> new_inst(new Instruction(
GetContext(), SpvOpExtInst, result_type, result_id, operands));
return AddInstruction(std::move(new_inst));
}
// Inserts the new instruction before the insertion point.
Instruction* AddInstruction(std::unique_ptr<Instruction>&& insn) {
Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn));

View File

@@ -199,6 +199,7 @@ class IRContext {
inline void AddExtension(const std::string& ext_name);
inline void AddExtension(std::unique_ptr<Instruction>&& e);
// Appends an extended instruction set instruction to this module.
inline void AddExtInstImport(const std::string& name);
inline void AddExtInstImport(std::unique_ptr<Instruction>&& e);
// Set the memory model for this module.
inline void SetMemoryModel(std::unique_ptr<Instruction>&& m);
@@ -971,9 +972,26 @@ void IRContext::AddExtension(std::unique_ptr<Instruction>&& e) {
module()->AddExtension(std::move(e));
}
void IRContext::AddExtInstImport(const std::string& name) {
const auto num_chars = name.size();
// Compute num words, accommodate the terminating null character.
const auto num_words = (num_chars + 1 + 3) / 4;
std::vector<uint32_t> ext_words(num_words, 0u);
std::memcpy(ext_words.data(), name.data(), num_chars);
AddExtInstImport(std::unique_ptr<Instruction>(
new Instruction(this, SpvOpExtInstImport, 0u, TakeNextId(),
{{SPV_OPERAND_TYPE_LITERAL_STRING, ext_words}})));
}
void IRContext::AddExtInstImport(std::unique_ptr<Instruction>&& e) {
AddCombinatorsForExtension(e.get());
if (AreAnalysesValid(kAnalysisDefUse)) {
get_def_use_mgr()->AnalyzeInstDefUse(e.get());
}
module()->AddExtInstImport(std::move(e));
if (feature_mgr_ != nullptr) {
feature_mgr_->AddExtInstImportIds(module());
}
}
void IRContext::SetMemoryModel(std::unique_ptr<Instruction>&& m) {

View File

@@ -415,6 +415,10 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag) {
} else if (pass_name == "inst-buff-addr-check") {
RegisterPass(CreateInstBuffAddrCheckPass(7, 23, 2));
RegisterPass(CreateAggressiveDCEPass());
} else if (pass_name == "convert-relaxed-to-half") {
RegisterPass(CreateConvertRelaxedToHalfPass());
} else if (pass_name == "relax-float-ops") {
RegisterPass(CreateRelaxFloatOpsPass());
} else if (pass_name == "simplify-instructions") {
RegisterPass(CreateSimplificationPass());
} else if (pass_name == "ssa-rewrite") {
@@ -877,6 +881,16 @@ Optimizer::PassToken CreateInstBuffAddrCheckPass(uint32_t desc_set,
MakeUnique<opt::InstBuffAddrCheckPass>(desc_set, shader_id, version));
}
Optimizer::PassToken CreateConvertRelaxedToHalfPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::ConvertToHalfPass>());
}
Optimizer::PassToken CreateRelaxFloatOpsPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::RelaxFloatOpsPass>());
}
Optimizer::PassToken CreateCodeSinkingPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::CodeSinkingPass>());

View File

@@ -54,6 +54,36 @@ uint32_t Pass::GetPointeeTypeId(const Instruction* ptrInst) const {
return ptrTypeInst->GetSingleWordInOperand(kTypePointerTypeIdInIdx);
}
Instruction* Pass::GetBaseType(uint32_t ty_id) {
Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id);
if (ty_inst->opcode() == SpvOpTypeMatrix) {
uint32_t vty_id = ty_inst->GetSingleWordInOperand(0);
ty_inst = get_def_use_mgr()->GetDef(vty_id);
}
if (ty_inst->opcode() == SpvOpTypeVector) {
uint32_t cty_id = ty_inst->GetSingleWordInOperand(0);
ty_inst = get_def_use_mgr()->GetDef(cty_id);
}
return ty_inst;
}
bool Pass::IsFloat(uint32_t ty_id, uint32_t width) {
Instruction* ty_inst = GetBaseType(ty_id);
if (ty_inst->opcode() != SpvOpTypeFloat) return false;
return ty_inst->GetSingleWordInOperand(0) == width;
}
uint32_t Pass::GetNullId(uint32_t type_id) {
if (IsFloat(type_id, 16)) context()->AddCapability(SpvCapabilityFloat16);
analysis::TypeManager* type_mgr = context()->get_type_mgr();
analysis::ConstantManager* const_mgr = context()->get_constant_mgr();
const analysis::Type* type = type_mgr->GetType(type_id);
const analysis::Constant* null_const = const_mgr->GetConstant(type, {});
Instruction* null_inst =
const_mgr->GetDefiningInstruction(null_const, type_id);
return null_inst->result_id();
}
uint32_t Pass::GenerateCopy(Instruction* object_to_copy, uint32_t new_type_id,
Instruction* insertion_position) {
analysis::TypeManager* type_mgr = context()->get_type_mgr();

View File

@@ -109,6 +109,16 @@ class Pass {
// Return type id for |ptrInst|'s pointee
uint32_t GetPointeeTypeId(const Instruction* ptrInst) const;
// Return base type of |ty_id| type
Instruction* GetBaseType(uint32_t ty_id);
// Return true if |inst| returns scalar, vector or matrix type with base
// float and |width|
bool IsFloat(uint32_t ty_id, uint32_t width);
// Return the id of OpConstantNull of type |type_id|. Create if necessary.
uint32_t GetNullId(uint32_t type_id);
protected:
// Constructs a new pass.
//

View File

@@ -25,6 +25,7 @@
#include "source/opt/code_sink.h"
#include "source/opt/combine_access_chains.h"
#include "source/opt/compact_ids_pass.h"
#include "source/opt/convert_to_half_pass.h"
#include "source/opt/copy_prop_arrays.h"
#include "source/opt/dead_branch_elim_pass.h"
#include "source/opt/dead_insert_elim_pass.h"
@@ -63,6 +64,7 @@
#include "source/opt/process_lines_pass.h"
#include "source/opt/reduce_load_size.h"
#include "source/opt/redundancy_elimination.h"
#include "source/opt/relax_float_ops_pass.h"
#include "source/opt/remove_duplicates_pass.h"
#include "source/opt/replace_invalid_opc.h"
#include "source/opt/scalar_replacement_pass.h"

View File

@@ -0,0 +1,178 @@
// Copyright (c) 2019 The Khronos Group Inc.
// Copyright (c) 2019 Valve Corporation
// Copyright (c) 2019 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "relax_float_ops_pass.h"
#include "source/opt/ir_builder.h"
namespace spvtools {
namespace opt {
bool RelaxFloatOpsPass::IsRelaxable(Instruction* inst) {
return target_ops_core_f_rslt_.count(inst->opcode()) != 0 ||
target_ops_core_f_opnd_.count(inst->opcode()) != 0 ||
sample_ops_.count(inst->opcode()) != 0 ||
(inst->opcode() == SpvOpExtInst &&
inst->GetSingleWordInOperand(0) ==
context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0);
}
bool RelaxFloatOpsPass::IsFloat32(Instruction* inst) {
uint32_t ty_id;
if (target_ops_core_f_opnd_.count(inst->opcode()) != 0) {
uint32_t opnd_id = inst->GetSingleWordInOperand(0);
Instruction* opnd_inst = get_def_use_mgr()->GetDef(opnd_id);
ty_id = opnd_inst->type_id();
} else {
ty_id = inst->type_id();
if (ty_id == 0) return false;
}
return IsFloat(ty_id, 32);
}
bool RelaxFloatOpsPass::IsRelaxed(uint32_t r_id) {
for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
if (r_inst->opcode() == SpvOpDecorate &&
r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision)
return true;
return false;
}
bool RelaxFloatOpsPass::ProcessInst(Instruction* r_inst) {
uint32_t r_id = r_inst->result_id();
if (r_id == 0) return false;
if (!IsFloat32(r_inst)) return false;
if (IsRelaxed(r_id)) return false;
if (!IsRelaxable(r_inst)) return false;
get_decoration_mgr()->AddDecoration(r_id, SpvDecorationRelaxedPrecision);
return true;
}
bool RelaxFloatOpsPass::ProcessFunction(Function* func) {
bool modified = false;
cfg()->ForEachBlockInReversePostOrder(
func->entry().get(), [&modified, this](BasicBlock* bb) {
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
modified |= ProcessInst(&*ii);
});
return modified;
}
Pass::Status RelaxFloatOpsPass::ProcessImpl() {
Pass::ProcessFunction pfn = [this](Function* fp) {
return ProcessFunction(fp);
};
bool modified = context()->ProcessEntryPointCallTree(pfn);
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}
Pass::Status RelaxFloatOpsPass::Process() {
Initialize();
return ProcessImpl();
}
void RelaxFloatOpsPass::Initialize() {
target_ops_core_f_rslt_ = {
SpvOpLoad,
SpvOpPhi,
SpvOpVectorExtractDynamic,
SpvOpVectorInsertDynamic,
SpvOpVectorShuffle,
SpvOpCompositeExtract,
SpvOpCompositeConstruct,
SpvOpCompositeInsert,
SpvOpCopyObject,
SpvOpTranspose,
SpvOpConvertSToF,
SpvOpConvertUToF,
SpvOpFConvert,
// SpvOpQuantizeToF16,
SpvOpFNegate,
SpvOpFAdd,
SpvOpFSub,
SpvOpFMul,
SpvOpFDiv,
SpvOpFMod,
SpvOpVectorTimesScalar,
SpvOpMatrixTimesScalar,
SpvOpVectorTimesMatrix,
SpvOpMatrixTimesVector,
SpvOpMatrixTimesMatrix,
SpvOpOuterProduct,
SpvOpDot,
SpvOpSelect,
};
target_ops_core_f_opnd_ = {
SpvOpFOrdEqual,
SpvOpFUnordEqual,
SpvOpFOrdNotEqual,
SpvOpFUnordNotEqual,
SpvOpFOrdLessThan,
SpvOpFUnordLessThan,
SpvOpFOrdGreaterThan,
SpvOpFUnordGreaterThan,
SpvOpFOrdLessThanEqual,
SpvOpFUnordLessThanEqual,
SpvOpFOrdGreaterThanEqual,
SpvOpFUnordGreaterThanEqual,
};
target_ops_450_ = {
GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs,
GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract,
GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos,
GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan,
GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh,
GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow,
GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2,
GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant,
GLSLstd450MatrixInverse,
// TODO(greg-lunarg): GLSLstd450ModfStruct,
GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix,
GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma,
// TODO(greg-lunarg): GLSLstd450FrexpStruct,
GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross,
GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect,
GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp};
sample_ops_ = {SpvOpImageSampleImplicitLod,
SpvOpImageSampleExplicitLod,
SpvOpImageSampleDrefImplicitLod,
SpvOpImageSampleDrefExplicitLod,
SpvOpImageSampleProjImplicitLod,
SpvOpImageSampleProjExplicitLod,
SpvOpImageSampleProjDrefImplicitLod,
SpvOpImageSampleProjDrefExplicitLod,
SpvOpImageFetch,
SpvOpImageGather,
SpvOpImageDrefGather,
SpvOpImageRead,
SpvOpImageSparseSampleImplicitLod,
SpvOpImageSparseSampleExplicitLod,
SpvOpImageSparseSampleDrefImplicitLod,
SpvOpImageSparseSampleDrefExplicitLod,
SpvOpImageSparseSampleProjImplicitLod,
SpvOpImageSparseSampleProjExplicitLod,
SpvOpImageSparseSampleProjDrefImplicitLod,
SpvOpImageSparseSampleProjDrefExplicitLod,
SpvOpImageSparseFetch,
SpvOpImageSparseGather,
SpvOpImageSparseDrefGather,
SpvOpImageSparseTexelsResident,
SpvOpImageSparseRead};
}
} // namespace opt
} // namespace spvtools

View File

@@ -0,0 +1,80 @@
// Copyright (c) 2019 Valve Corporation
// Copyright (c) 2019 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_
#define LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_
#include "source/opt/ir_builder.h"
#include "source/opt/pass.h"
namespace spvtools {
namespace opt {
class RelaxFloatOpsPass : public Pass {
public:
RelaxFloatOpsPass() : Pass() {}
~RelaxFloatOpsPass() override = default;
IRContext::Analysis GetPreservedAnalyses() override {
return IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping;
}
// See optimizer.hpp for pass user documentation.
Status Process() override;
const char* name() const override { return "convert-to-half-pass"; }
private:
// Return true if |inst| can have the RelaxedPrecision decoration applied
// to it.
bool IsRelaxable(Instruction* inst);
// Return true if |inst| returns scalar, vector or matrix type with base
// float and width 32
bool IsFloat32(Instruction* inst);
// Return true if |r_id| is decorated with RelaxedPrecision
bool IsRelaxed(uint32_t r_id);
// If |inst| is an instruction of float32-based type and is not decorated
// RelaxedPrecision, add such a decoration to the module.
bool ProcessInst(Instruction* inst);
// Call ProcessInst on every instruction in |func|.
bool ProcessFunction(Function* func);
Pass::Status ProcessImpl();
// Initialize state for converting to half
void Initialize();
// Set of float result core operations to be processed
std::unordered_set<uint32_t> target_ops_core_f_rslt_;
// Set of float operand core operations to be processed
std::unordered_set<uint32_t> target_ops_core_f_opnd_;
// Set of 450 extension operations to be processed
std::unordered_set<uint32_t> target_ops_450_;
// Set of sample operations
std::unordered_set<uint32_t> sample_ops_;
};
} // namespace opt
} // namespace spvtools
#endif // LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_

View File

@@ -81,8 +81,12 @@ add_library(SPIRV-Tools-reduce ${SPIRV_TOOLS_REDUCE_SOURCES})
spvtools_default_compile_options(SPIRV-Tools-reduce)
target_include_directories(SPIRV-Tools-reduce
PUBLIC ${spirv-tools_SOURCE_DIR}/include
PUBLIC ${SPIRV_HEADER_INCLUDE_DIR}
PUBLIC
$<BUILD_INTERFACE:${spirv-tools_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/include>
PUBLIC
$<BUILD_INTERFACE:${SPIRV_HEADER_INCLUDE_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE ${spirv-tools_BINARY_DIR}
)
# The reducer reuses a lot of functionality from the SPIRV-Tools library.
@@ -94,8 +98,9 @@ set_property(TARGET SPIRV-Tools-reduce PROPERTY FOLDER "SPIRV-Tools libraries")
spvtools_check_symbol_exports(SPIRV-Tools-reduce)
if(ENABLE_SPIRV_TOOLS_INSTALL)
install(TARGETS SPIRV-Tools-reduce
install(TARGETS SPIRV-Tools-reduce EXPORT SPIRV-Tools-reduceTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT SPIRV-Tools-reduceTargets DESTINATION lib/cmake)
endif(ENABLE_SPIRV_TOOLS_INSTALL)

View File

@@ -18,7 +18,7 @@
#include "spirv-tools/optimizer.hpp"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0);
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1);
optimizer.SetMessageConsumer([](spv_message_level_t, const char*,
const spv_position_t&, const char*) {});

View File

@@ -18,7 +18,7 @@
#include "spirv-tools/optimizer.hpp"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1);
spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0);
optimizer.SetMessageConsumer([](spv_message_level_t, const char*,
const spv_position_t&, const char*) {});

View File

@@ -28,6 +28,7 @@ add_spvtools_unittest(TARGET opt
compact_ids_test.cpp
constants_test.cpp
constant_manager_test.cpp
convert_relaxed_to_half_test.cpp
copy_prop_array_test.cpp
dead_branch_elim_test.cpp
dead_insert_elim_test.cpp
@@ -80,6 +81,7 @@ add_spvtools_unittest(TARGET opt
reduce_load_size_test.cpp
redundancy_elimination_test.cpp
register_liveness.cpp
relax_float_ops_test.cpp
replace_invalid_opc_test.cpp
scalar_analysis.cpp
scalar_replacement_test.cpp

View File

@@ -233,6 +233,7 @@ TEST_F(AmdExtToKhrTest, ReplaceSwizzleInvocationsMaskedAMD) {
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceWriteInvocationAMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
@@ -269,6 +270,585 @@ TEST_F(AmdExtToKhrTest, ReplaceWriteInvocationAMD) {
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceFMin3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeFloat 32
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] FMin [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] FMin [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %float
%8 = OpUndef %float
%9 = OpUndef %float
%10 = OpExtInst %float %ext FMin3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceSMin3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeInt 32 1
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] SMin [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] SMin [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %int
%8 = OpUndef %int
%9 = OpUndef %int
%10 = OpExtInst %int %ext SMin3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceUMin3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeInt 32 0
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %uint
%8 = OpUndef %uint
%9 = OpUndef %uint
%10 = OpExtInst %uint %ext UMin3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceFMax3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeFloat 32
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] FMax [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] FMax [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %float
%8 = OpUndef %float
%9 = OpUndef %float
%10 = OpExtInst %float %ext FMax3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceSMax3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeInt 32 1
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] SMax [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] SMax [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %int
%8 = OpUndef %int
%9 = OpUndef %int
%10 = OpExtInst %int %ext SMax3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceUMax3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeInt 32 0
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %uint
%8 = OpUndef %uint
%9 = OpUndef %uint
%10 = OpExtInst %uint %ext UMax3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceVecUMax3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeVector
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[temp:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[x]] [[y]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[temp]] [[z]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%vec = OpTypeVector %uint 4
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %vec
%8 = OpUndef %vec
%9 = OpUndef %vec
%10 = OpExtInst %vec %ext UMax3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceFMid3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeFloat 32
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] FMin [[y]] [[z]]
; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] FMax [[y]] [[z]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] FClamp [[x]] [[min]] [[max]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %float
%8 = OpUndef %float
%9 = OpUndef %float
%10 = OpExtInst %float %ext FMid3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceSMid3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeInt 32 1
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] SMin [[y]] [[z]]
; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] SMax [[y]] [[z]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] SClamp [[x]] [[min]] [[max]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %int
%8 = OpUndef %int
%9 = OpUndef %int
%10 = OpExtInst %int %ext SMid3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceUMid3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeInt 32 0
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[y]] [[z]]
; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[y]] [[z]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UClamp [[x]] [[min]] [[max]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %uint
%8 = OpUndef %uint
%9 = OpUndef %uint
%10 = OpExtInst %uint %ext UMid3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceVecUMid3AMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_trinary_minmax"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_trinary_minmax"
; CHECK: [[ext:%\w+]] = OpExtInstImport "GLSL.std.450"
; CHECK: [[type:%\w+]] = OpTypeVector
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[x:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[y:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[z:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[min:%\w+]] = OpExtInst [[type]] [[ext]] UMin [[y]] [[z]]
; CHECK-NEXT: [[max:%\w+]] = OpExtInst [[type]] [[ext]] UMax [[y]] [[z]]
; CHECK-NEXT: [[result:%\w+]] = OpExtInst [[type]] [[ext]] UClamp [[x]] [[min]] [[max]]
OpCapability Shader
OpExtension "SPV_AMD_shader_trinary_minmax"
%ext = OpExtInstImport "SPV_AMD_shader_trinary_minmax"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%vec = OpTypeVector %uint 3
%int = OpTypeInt 32 1
%float = OpTypeFloat 32
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %vec
%8 = OpUndef %vec
%9 = OpUndef %vec
%10 = OpExtInst %vec %ext UMid3AMD %7 %8 %9
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceCubeFaceCoordAMD) {
// Sorry for the Check test. The code sequence is so long, I do not think
// that a match test would be anymore legible. This tests the replacement of
// the CubeFaceCoordAMD instruction.
const std::string before = R"(
OpCapability Shader
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpExtension "SPV_AMD_gcn_shader"
%1 = OpExtInstImport "SPV_AMD_gcn_shader"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %2 "main"
OpExecutionMode %2 LocalSize 1 1 1
%void = OpTypeVoid
%4 = OpTypeFunction %void
%float = OpTypeFloat 32
%v2float = OpTypeVector %float 2
%v3float = OpTypeVector %float 3
%2 = OpFunction %void None %4
%8 = OpLabel
%9 = OpUndef %v3float
%10 = OpExtInst %v2float %1 CubeFaceCoordAMD %9
OpReturn
OpFunctionEnd
)";
const std::string after = R"(OpCapability Shader
OpExtension "SPV_KHR_storage_buffer_storage_class"
%12 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %2 "main"
OpExecutionMode %2 LocalSize 1 1 1
%void = OpTypeVoid
%4 = OpTypeFunction %void
%float = OpTypeFloat 32
%v2float = OpTypeVector %float 2
%v3float = OpTypeVector %float 3
%bool = OpTypeBool
%float_0 = OpConstant %float 0
%float_2 = OpConstant %float 2
%float_0_5 = OpConstant %float 0.5
%16 = OpConstantComposite %v2float %float_0_5 %float_0_5
%2 = OpFunction %void None %4
%8 = OpLabel
%9 = OpUndef %v3float
%17 = OpCompositeExtract %float %9 0
%18 = OpCompositeExtract %float %9 1
%19 = OpCompositeExtract %float %9 2
%20 = OpFNegate %float %17
%21 = OpFNegate %float %18
%22 = OpFNegate %float %19
%23 = OpExtInst %float %12 FAbs %17
%24 = OpExtInst %float %12 FAbs %18
%25 = OpExtInst %float %12 FAbs %19
%26 = OpFOrdLessThan %bool %19 %float_0
%27 = OpFOrdLessThan %bool %18 %float_0
%28 = OpFOrdLessThan %bool %17 %float_0
%29 = OpExtInst %float %12 FMax %23 %24
%30 = OpExtInst %float %12 FMax %25 %29
%31 = OpFMul %float %float_2 %30
%32 = OpFOrdGreaterThanEqual %bool %25 %29
%33 = OpLogicalNot %bool %32
%34 = OpFOrdGreaterThanEqual %bool %24 %23
%35 = OpLogicalAnd %bool %33 %34
%36 = OpSelect %float %26 %20 %17
%37 = OpSelect %float %28 %19 %22
%38 = OpSelect %float %35 %17 %37
%39 = OpSelect %float %32 %36 %38
%40 = OpSelect %float %27 %22 %19
%41 = OpSelect %float %35 %40 %21
%42 = OpCompositeConstruct %v2float %39 %41
%43 = OpCompositeConstruct %v2float %31 %31
%44 = OpFDiv %v2float %42 %43
%10 = OpFAdd %v2float %44 %16
OpReturn
OpFunctionEnd
)";
SinglePassRunAndCheck<AmdExtensionToKhrPass>(before, after, true);
}
TEST_F(AmdExtToKhrTest, ReplaceCubeFaceIndexAMD) {
// Sorry for the Check test. The code sequence is so long, I do not think
// that a match test would be anymore legible. This tests the replacement of
// the CubeFaceIndexAMD instruction.
const std::string before = R"(OpCapability Shader
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpExtension "SPV_AMD_gcn_shader"
%1 = OpExtInstImport "SPV_AMD_gcn_shader"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %2 "main"
OpExecutionMode %2 LocalSize 1 1 1
%void = OpTypeVoid
%4 = OpTypeFunction %void
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%2 = OpFunction %void None %4
%7 = OpLabel
%8 = OpUndef %v3float
%9 = OpExtInst %float %1 CubeFaceIndexAMD %8
OpReturn
OpFunctionEnd
)";
const std::string after = R"(OpCapability Shader
OpExtension "SPV_KHR_storage_buffer_storage_class"
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %2 "main"
OpExecutionMode %2 LocalSize 1 1 1
%void = OpTypeVoid
%4 = OpTypeFunction %void
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%bool = OpTypeBool
%float_0 = OpConstant %float 0
%float_1 = OpConstant %float 1
%float_2 = OpConstant %float 2
%float_3 = OpConstant %float 3
%float_4 = OpConstant %float 4
%float_5 = OpConstant %float 5
%2 = OpFunction %void None %4
%7 = OpLabel
%8 = OpUndef %v3float
%18 = OpCompositeExtract %float %8 0
%19 = OpCompositeExtract %float %8 1
%20 = OpCompositeExtract %float %8 2
%21 = OpExtInst %float %11 FAbs %18
%22 = OpExtInst %float %11 FAbs %19
%23 = OpExtInst %float %11 FAbs %20
%24 = OpFOrdLessThan %bool %20 %float_0
%25 = OpFOrdLessThan %bool %19 %float_0
%26 = OpFOrdLessThan %bool %18 %float_0
%27 = OpExtInst %float %11 FMax %21 %22
%28 = OpFOrdGreaterThanEqual %bool %23 %27
%29 = OpFOrdGreaterThanEqual %bool %22 %21
%30 = OpSelect %float %24 %float_5 %float_4
%31 = OpSelect %float %25 %float_3 %float_2
%32 = OpSelect %float %26 %float_1 %float_0
%33 = OpSelect %float %29 %31 %32
%9 = OpSelect %float %28 %30 %33
OpReturn
OpFunctionEnd
)";
SinglePassRunAndCheck<AmdExtensionToKhrPass>(before, after, true);
}
TEST_F(AmdExtToKhrTest, SetVersion) {
const std::string text = R"(
OpCapability Shader

File diff suppressed because it is too large Load Diff

View File

@@ -222,6 +222,7 @@ OpName %main "main"
%v2float_3_2 = OpConstantComposite %v2float %float_3 %float_2
%v2float_4_4 = OpConstantComposite %v2float %float_4 %float_4
%v2float_2_0p5 = OpConstantComposite %v2float %float_2 %float_0p5
%v2float_0p2_0p5 = OpConstantComposite %v2float %float_0p2 %float_0p5
%v2float_null = OpConstantNull %v2float
%double_n1 = OpConstant %double -1
%105 = OpConstant %double 0 ; Need a def with an numerical id to define id maps.
@@ -231,7 +232,9 @@ OpName %main "main"
%double_2 = OpConstant %double 2
%double_3 = OpConstant %double 3
%double_4 = OpConstant %double 4
%double_5 = OpConstant %double 5
%double_0p5 = OpConstant %double 0.5
%double_0p2 = OpConstant %double 0.2
%v2double_0_0 = OpConstantComposite %v2double %double_0 %double_0
%v2double_2_2 = OpConstantComposite %v2double %double_2 %double_2
%v2double_2_3 = OpConstantComposite %v2double %double_2 %double_3
@@ -557,7 +560,155 @@ INSTANTIATE_TEST_SUITE_P(TestCase, IntegerInstructionFoldingTest,
"%2 = OpSNegate %int %int_min\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, std::numeric_limits<int32_t>::min())
2, std::numeric_limits<int32_t>::min()),
// Test case 30: fold UMin 3 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UMin %uint_3 %uint_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 3),
// Test case 31: fold UMin 4 2
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UMin %uint_4 %uint_2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2),
// Test case 32: fold SMin 3 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 UMin %int_3 %int_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 3),
// Test case 33: fold SMin 4 2
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 SMin %int_4 %int_2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2),
// Test case 34: fold UMax 3 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UMax %uint_3 %uint_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 4),
// Test case 35: fold UMax 3 2
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UMax %uint_3 %uint_2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 3),
// Test case 36: fold SMax 3 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 UMax %int_3 %int_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 4),
// Test case 37: fold SMax 3 2
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 SMax %int_3 %int_2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 3),
// Test case 38: fold UClamp 2 3 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UClamp %uint_2 %uint_3 %uint_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 3),
// Test case 39: fold UClamp 2 0 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UClamp %uint_2 %uint_0 %uint_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2),
// Test case 40: fold UClamp 2 0 1
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %uint %1 UClamp %uint_2 %uint_0 %uint_1\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1),
// Test case 41: fold SClamp 2 3 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 SClamp %int_2 %int_3 %int_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 3),
// Test case 42: fold SClamp 2 0 4
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 SClamp %int_2 %int_0 %int_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2),
// Test case 43: fold SClamp 2 0 1
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %int %1 SClamp %int_2 %int_0 %int_1\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1),
// Test case 44: SClamp 1 2 x
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %int\n" +
"%2 = OpExtInst %int %1 SClamp %int_1 %int_2 %undef\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2),
// Test case 45: SClamp 2 x 1
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %int\n" +
"%2 = OpExtInst %int %1 SClamp %int_2 %undef %int_1\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1),
// Test case 44: UClamp 1 2 x
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %uint\n" +
"%2 = OpExtInst %uint %1 UClamp %uint_1 %uint_2 %undef\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2),
// Test case 45: UClamp 2 x 1
InstructionFoldingCase<uint32_t>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %uint\n" +
"%2 = OpExtInst %uint %1 UClamp %uint_2 %undef %uint_1\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1)
));
// clang-format on
@@ -643,6 +794,58 @@ INSTANTIATE_TEST_SUITE_P(TestCase, IntVectorInstructionFoldingTest,
));
// clang-format on
using FloatVectorInstructionFoldingTest =
::testing::TestWithParam<InstructionFoldingCase<std::vector<float>>>;
TEST_P(FloatVectorInstructionFoldingTest, Case) {
const auto& tc = GetParam();
// Build module.
std::unique_ptr<IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, tc.test_body,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ASSERT_NE(nullptr, context);
// Fold the instruction to test.
analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr();
Instruction* inst = def_use_mgr->GetDef(tc.id_to_fold);
SpvOp original_opcode = inst->opcode();
bool succeeded = context->get_instruction_folder().FoldInstruction(inst);
// Make sure the instruction folded as expected.
EXPECT_EQ(succeeded, inst == nullptr || inst->opcode() != original_opcode);
if (succeeded && inst != nullptr) {
EXPECT_EQ(inst->opcode(), SpvOpCopyObject);
inst = def_use_mgr->GetDef(inst->GetSingleWordInOperand(0));
std::vector<SpvOp> opcodes = {SpvOpConstantComposite};
EXPECT_THAT(opcodes, Contains(inst->opcode()));
analysis::ConstantManager* const_mrg = context->get_constant_mgr();
const analysis::Constant* result = const_mrg->GetConstantFromInst(inst);
EXPECT_NE(result, nullptr);
if (result != nullptr) {
const std::vector<const analysis::Constant*>& componenets =
result->AsVectorConstant()->GetComponents();
EXPECT_EQ(componenets.size(), tc.expected_result.size());
for (size_t i = 0; i < componenets.size(); i++) {
EXPECT_EQ(tc.expected_result[i], componenets[i]->GetFloat());
}
}
}
}
// clang-format off
INSTANTIATE_TEST_SUITE_P(TestCase, FloatVectorInstructionFoldingTest,
::testing::Values(
// Test case 0: FMix {2.0, 2.0}, {2.0, 3.0} {0.2,0.5}
InstructionFoldingCase<std::vector<float>>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %v2float %1 FMix %v2float_2_3 %v2float_0_0 %v2float_0p2_0p5\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, {1.6f,1.5f})
));
// clang-format on
using BooleanInstructionFoldingTest =
::testing::TestWithParam<InstructionFoldingCase<bool>>;
@@ -1473,7 +1676,81 @@ INSTANTIATE_TEST_SUITE_P(FloatConstantFoldingTest, FloatInstructionFoldingTest,
"%2 = OpExtInst %float %1 FMix %float_1 %float_4 %float_0p2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.6f)
2, 1.6f),
// Test case 21: FMin 1.0 4.0
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FMin %float_1 %float_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.0f),
// Test case 22: FMin 4.0 0.2
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FMin %float_4 %float_0p2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 0.2f),
// Test case 21: FMax 1.0 4.0
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FMax %float_1 %float_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 4.0f),
// Test case 22: FMax 1.0 0.2
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FMax %float_1 %float_0p2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.0f),
// Test case 23: FClamp 1.0 0.2 4.0
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FClamp %float_1 %float_0p2 %float_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.0f),
// Test case 24: FClamp 0.2 2.0 4.0
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FClamp %float_0p2 %float_2 %float_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2.0f),
// Test case 25: FClamp 2049.0 2.0 4.0
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %float %1 FClamp %float_2049 %float_2 %float_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 4.0f),
// Test case 26: FClamp 1.0 2.0 x
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %float\n" +
"%2 = OpExtInst %float %1 FClamp %float_1 %float_2 %undef\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2.0),
// Test case 27: FClamp 1.0 x 0.5
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %float\n" +
"%2 = OpExtInst %float %1 FClamp %float_1 %undef %float_0p5\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 0.5)
));
// clang-format on
@@ -1616,7 +1893,81 @@ INSTANTIATE_TEST_SUITE_P(DoubleConstantFoldingTest, DoubleInstructionFoldingTest
"%2 = OpFNegate %double %double_2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, -2)
2, -2),
// Test case 12: FMin 1.0 4.0
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FMin %double_1 %double_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.0),
// Test case 13: FMin 4.0 0.2
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FMin %double_4 %double_0p2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 0.2),
// Test case 14: FMax 1.0 4.0
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FMax %double_1 %double_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 4.0),
// Test case 15: FMax 1.0 0.2
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FMax %double_1 %double_0p2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.0),
// Test case 16: FClamp 1.0 0.2 4.0
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FClamp %double_1 %double_0p2 %double_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 1.0),
// Test case 17: FClamp 0.2 2.0 4.0
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FClamp %double_0p2 %double_2 %double_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2.0),
// Test case 18: FClamp 5.0 2.0 4.0
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpExtInst %double %1 FClamp %double_5 %double_2 %double_4\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 4.0),
// Test case 19: FClamp 1.0 2.0 x
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %double\n" +
"%2 = OpExtInst %double %1 FClamp %double_1 %double_2 %undef\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 2.0),
// Test case 20: FClamp 1.0 x 0.5
InstructionFoldingCase<double>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%undef = OpUndef %double\n" +
"%2 = OpExtInst %double %1 FClamp %double_1 %undef %double_0p5\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, 0.5)
));
// clang-format on

View File

@@ -222,7 +222,7 @@ TEST(Optimizer, CanRegisterPassesFromFlags) {
}
TEST(Optimizer, VulkanToWebGPUSetsCorrectPasses) {
Optimizer opt(SPV_ENV_WEBGPU_0);
Optimizer opt(SPV_ENV_VULKAN_1_1);
opt.RegisterVulkanToWebGPUPasses();
std::vector<const char*> pass_names = opt.GetPassNames();
@@ -267,7 +267,7 @@ TEST_P(VulkanToWebGPUPassTest, Ran) {
tools.Assemble(GetParam().input, &binary);
}
Optimizer opt(SPV_ENV_WEBGPU_0);
Optimizer opt(SPV_ENV_VULKAN_1_1);
opt.RegisterVulkanToWebGPUPasses();
std::vector<uint32_t> optimized;
@@ -622,7 +622,7 @@ INSTANTIATE_TEST_SUITE_P(
"compact-ids"}}));
TEST(Optimizer, WebGPUToVulkanSetsCorrectPasses) {
Optimizer opt(SPV_ENV_VULKAN_1_1);
Optimizer opt(SPV_ENV_WEBGPU_0);
opt.RegisterWebGPUToVulkanPasses();
std::vector<const char*> pass_names = opt.GetPassNames();
@@ -659,7 +659,7 @@ TEST_P(WebGPUToVulkanPassTest, Ran) {
tools.Assemble(GetParam().input, &binary);
}
Optimizer opt(SPV_ENV_VULKAN_1_1);
Optimizer opt(SPV_ENV_WEBGPU_0);
opt.RegisterWebGPUToVulkanPasses();
std::vector<uint32_t> optimized;

View File

@@ -0,0 +1,142 @@
// Copyright (c) 2019 Valve Corporation
// Copyright (c) 2019 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Relax float ops tests
#include <string>
#include <vector>
#include "test/opt/assembly_builder.h"
#include "test/opt/pass_fixture.h"
#include "test/opt/pass_utils.h"
namespace spvtools {
namespace opt {
namespace {
using RelaxFloatOpsTest = PassTest<::testing::Test>;
TEST_F(RelaxFloatOpsTest, RelaxFloatOpsBasic) {
// All float result instructions in functions should be relaxed
// clang-format off
//
// SamplerState g_sSamp : register(s0);
// uniform Texture1D <float4> g_tTex1df4 : register(t0);
//
// struct PS_INPUT
// {
// float Tex0 : TEXCOORD0;
// float Tex1 : TEXCOORD1;
// };
//
// struct PS_OUTPUT
// {
// float4 Color : SV_Target0;
// };
//
// PS_OUTPUT main(PS_INPUT i)
// {
// PS_OUTPUT psout;
// float4 txval10 = g_tTex1df4.Sample(g_sSamp, i.Tex0);
// float4 txval11 = g_tTex1df4.Sample(g_sSamp, i.Tex1);
// float4 t = txval10 + txval11;
// float4 t2 = t / 2.0;
// psout.Color = t2;
// return psout;
// }
// clang-format on
const std::string defs0 =
R"(OpCapability Shader
OpCapability Sampled1D
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color
OpExecutionMode %main OriginUpperLeft
OpSource HLSL 500
OpName %main "main"
OpName %g_tTex1df4 "g_tTex1df4"
OpName %g_sSamp "g_sSamp"
OpName %i_Tex0 "i.Tex0"
OpName %i_Tex1 "i.Tex1"
OpName %_entryPointOutput_Color "@entryPointOutput.Color"
OpDecorate %g_tTex1df4 DescriptorSet 0
OpDecorate %g_tTex1df4 Binding 0
OpDecorate %g_sSamp DescriptorSet 0
OpDecorate %g_sSamp Binding 0
OpDecorate %i_Tex0 Location 0
OpDecorate %i_Tex1 Location 1
OpDecorate %_entryPointOutput_Color Location 0
)";
const std::string defs1 =
R"(%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%17 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17
%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant
%21 = OpTypeSampler
%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21
%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant
%25 = OpTypeSampledImage %17
%_ptr_Input_float = OpTypePointer Input %float
%i_Tex0 = OpVariable %_ptr_Input_float Input
%i_Tex1 = OpVariable %_ptr_Input_float Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output
%float_0_5 = OpConstant %float 0.5
%116 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
)";
const std::string relax_decos =
R"(OpDecorate %60 RelaxedPrecision
OpDecorate %63 RelaxedPrecision
OpDecorate %82 RelaxedPrecision
OpDecorate %88 RelaxedPrecision
OpDecorate %91 RelaxedPrecision
OpDecorate %94 RelaxedPrecision
)";
const std::string func_orig =
R"(%main = OpFunction %void None %3
%5 = OpLabel
%60 = OpLoad %float %i_Tex0
%63 = OpLoad %float %i_Tex1
%77 = OpLoad %17 %g_tTex1df4
%78 = OpLoad %21 %g_sSamp
%79 = OpSampledImage %25 %77 %78
%82 = OpImageSampleImplicitLod %v4float %79 %60
%83 = OpLoad %17 %g_tTex1df4
%84 = OpLoad %21 %g_sSamp
%85 = OpSampledImage %25 %83 %84
%88 = OpImageSampleImplicitLod %v4float %85 %63
%91 = OpFAdd %v4float %82 %88
%94 = OpFMul %v4float %91 %116
OpStore %_entryPointOutput_Color %94
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SinglePassRunAndCheck<RelaxFloatOpsPass>(
defs0 + defs1 + func_orig, defs0 + relax_decos + defs1 + func_orig, true,
true);
}
} // namespace
} // namespace opt
} // namespace spvtools

View File

@@ -80,13 +80,13 @@ std::string GetSizePasses() {
}
std::string GetVulkanToWebGPUPasses() {
spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0);
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1);
optimizer.RegisterVulkanToWebGPUPasses();
return GetListOfPassesAsString(optimizer);
}
std::string GetWebGPUToVulkanPasses() {
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1);
spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0);
optimizer.RegisterWebGPUToVulkanPasses();
return GetListOfPassesAsString(optimizer);
}
@@ -141,6 +141,16 @@ Options (in lexicographical order):)",
and constant index access chains in entry point call tree
functions.)");
printf(R"(
--convert-relaxed-to-half
Convert all RelaxedPrecision arithmetic operations to half
precision, inserting conversion operations where needed.
Run after function scope variable load and store elimination
for better results. Simplify-instructions, redundancy-elimination
and DCE should be run after this pass to eliminate excess
conversions. This conversion is useful when the target platform
does not support RelaxedPrecision or ignores it. This pass also
removes all RelaxedPrecision decorations.)");
printf(R"(
--copy-propagate-arrays
Does propagation of memory references when an array is a copy of
another. It will only propagate an array if the source is never
@@ -393,6 +403,10 @@ Options (in lexicographical order):)",
Looks for instructions in the same function that compute the
same value, and deletes the redundant ones.)");
printf(R"(
--relax-float-ops
Decorate all float operations with RelaxedPrecision if not already
so decorated. This does not decorate types or variables.)");
printf(R"(
--relax-struct-store
Allow store from one struct type to a different type with
compatible layout and members. This option is forwarded to the
@@ -778,7 +792,7 @@ OptStatus ParseFlags(int argc, const char** argv,
return {OPT_STOP, 1};
}
optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0);
optimizer->SetTargetEnv(SPV_ENV_VULKAN_1_1);
optimizer->RegisterVulkanToWebGPUPasses();
} else if (0 == strcmp(cur_arg, "--webgpu-to-vulkan")) {
webgpu_to_vulkan_set = true;
@@ -796,7 +810,7 @@ OptStatus ParseFlags(int argc, const char** argv,
return {OPT_STOP, 1};
}
optimizer->SetTargetEnv(SPV_ENV_VULKAN_1_1);
optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0);
optimizer->RegisterWebGPUToVulkanPasses();
} else if (0 == strcmp(cur_arg, "--validate-after-all")) {
optimizer->SetValidateAfterAll(true);