Updated spirv-tools.

This commit is contained in:
Бранимир Караџић
2019-09-01 21:42:18 -07:00
parent fe3a29eb44
commit 9a31ca80fc
37 changed files with 1413 additions and 110 deletions

View File

@@ -75,6 +75,7 @@ SPVTOOLS_SRC_FILES := \
SPVTOOLS_OPT_SRC_FILES := \
source/opt/aggressive_dead_code_elim_pass.cpp \
source/opt/amd_ext_to_khr.cpp \
source/opt/basic_block.cpp \
source/opt/block_merge_pass.cpp \
source/opt/block_merge_util.cpp \

View File

@@ -451,6 +451,8 @@ static_library("spvtools_opt") {
sources = [
"source/opt/aggressive_dead_code_elim_pass.cpp",
"source/opt/aggressive_dead_code_elim_pass.h",
"source/opt/amd_ext_to_khr.cpp",
"source/opt/amd_ext_to_khr.h",
"source/opt/basic_block.cpp",
"source/opt/basic_block.h",
"source/opt/block_merge_pass.cpp",
@@ -658,6 +660,7 @@ static_library("spvtools_opt") {
deps = [
":spvtools",
":spvtools_vendor_tables_spv-amd-shader-ballot",
]
public_deps = [
":spvtools_headers",
@@ -729,6 +732,8 @@ static_library("spvtools_reduce") {
"source/reduce/remove_instruction_reduction_opportunity.h",
"source/reduce/remove_opname_instruction_reduction_opportunity_finder.cpp",
"source/reduce/remove_opname_instruction_reduction_opportunity_finder.h",
"source/reduce/remove_relaxed_precision_decoration_opportunity_finder.cpp",
"source/reduce/remove_relaxed_precision_decoration_opportunity_finder.h",
"source/reduce/remove_selection_reduction_opportunity.cpp",
"source/reduce/remove_selection_reduction_opportunity.h",
"source/reduce/remove_selection_reduction_opportunity_finder.cpp",

View File

@@ -1 +1 @@
"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-16-g8336d192"
"v2019.5-dev", "SPIRV-Tools v2019.5-dev v2019.4-25-g65e362b7"

View File

@@ -425,12 +425,12 @@ static const spv_opcode_desc_t kOpcodeTableEntries[] = {
{"ImageSampleFootprintNV", SpvOpImageSampleFootprintNV, 1, pygen_variable_caps_ImageFootprintNV, 7, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_OPTIONAL_IMAGE}, 1, 1, 1, pygen_variable_exts_SPV_NV_shader_image_footprint, 0xffffffffu, 0xffffffffu},
{"GroupNonUniformPartitionNV", SpvOpGroupNonUniformPartitionNV, 1, pygen_variable_caps_GroupNonUniformPartitionedNV, 3, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID}, 1, 1, 1, pygen_variable_exts_SPV_NV_shader_subgroup_partitioned, 0xffffffffu, 0xffffffffu},
{"WritePackedPrimitiveIndices4x8NV", SpvOpWritePackedPrimitiveIndices4x8NV, 1, pygen_variable_caps_MeshShadingNV, 2, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_mesh_shader, 0xffffffffu, 0xffffffffu},
{"ReportIntersectionNV", SpvOpReportIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 4, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 1, 1, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"IgnoreIntersectionNV", SpvOpIgnoreIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"TerminateRayNV", SpvOpTerminateRayNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"TraceNV", SpvOpTraceNV, 1, pygen_variable_caps_RayTracingNV, 11, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"TypeAccelerationStructureNV", SpvOpTypeAccelerationStructureNV, 1, pygen_variable_caps_RayTracingNV, 1, {SPV_OPERAND_TYPE_RESULT_ID}, 1, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"ExecuteCallableNV", SpvOpExecuteCallableNV, 1, pygen_variable_caps_RayTracingNV, 2, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"ReportIntersectionNV", SpvOpReportIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 4, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 1, 1, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu},
{"IgnoreIntersectionNV", SpvOpIgnoreIntersectionNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu},
{"TerminateRayNV", SpvOpTerminateRayNV, 1, pygen_variable_caps_RayTracingNV, 0, {}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu},
{"TraceNV", SpvOpTraceNV, 1, pygen_variable_caps_RayTracingNV, 11, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu},
{"TypeAccelerationStructureNV", SpvOpTypeAccelerationStructureNV, 1, pygen_variable_caps_RayTracingNV, 1, {SPV_OPERAND_TYPE_RESULT_ID}, 1, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu},
{"ExecuteCallableNV", SpvOpExecuteCallableNV, 1, pygen_variable_caps_RayTracingNV, 2, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 0, 0, 1, pygen_variable_exts_SPV_NV_ray_tracing, 0xffffffffu, 0xffffffffu},
{"TypeCooperativeMatrixNV", SpvOpTypeCooperativeMatrixNV, 1, pygen_variable_caps_CooperativeMatrixNV, 5, {SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_SCOPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID}, 1, 0, 1, pygen_variable_exts_SPV_NV_cooperative_matrix, 0xffffffffu, 0xffffffffu},
{"CooperativeMatrixLoadNV", SpvOpCooperativeMatrixLoadNV, 1, pygen_variable_caps_CooperativeMatrixNV, 6, {SPV_OPERAND_TYPE_TYPE_ID, SPV_OPERAND_TYPE_RESULT_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS}, 1, 1, 1, pygen_variable_exts_SPV_NV_cooperative_matrix, 0xffffffffu, 0xffffffffu},
{"CooperativeMatrixStoreNV", SpvOpCooperativeMatrixStoreNV, 1, pygen_variable_caps_CooperativeMatrixNV, 5, {SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_ID, SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS}, 0, 0, 1, pygen_variable_exts_SPV_NV_cooperative_matrix, 0xffffffffu, 0xffffffffu},

View File

@@ -546,7 +546,7 @@ static const spv_operand_desc_t pygen_variable_DecorationEntries[] = {
{"PerViewNV", 5272, 1, pygen_variable_caps_MeshShadingNV, 1, pygen_variable_exts_SPV_NV_mesh_shader, {}, 0xffffffffu, 0xffffffffu},
{"PerTaskNV", 5273, 1, pygen_variable_caps_MeshShadingNV, 1, pygen_variable_exts_SPV_NV_mesh_shader, {}, 0xffffffffu, 0xffffffffu},
{"PerVertexNV", 5285, 1, pygen_variable_caps_FragmentBarycentricNV, 1, pygen_variable_exts_SPV_NV_fragment_shader_barycentric, {}, 0xffffffffu, 0xffffffffu},
{"NonUniformEXT", 5300, 1, pygen_variable_caps_ShaderNonUniformEXT, 0, nullptr, {}, SPV_SPIRV_VERSION_WORD(1, 0), 0xffffffffu},
{"NonUniformEXT", 5300, 1, pygen_variable_caps_ShaderNonUniformEXT, 0, nullptr, {}, 0xffffffffu, 0xffffffffu},
{"RestrictPointerEXT", 5355, 1, pygen_variable_caps_PhysicalStorageBufferAddressesEXT, 1, pygen_variable_exts_SPV_EXT_physical_storage_buffer, {}, 0xffffffffu, 0xffffffffu},
{"AliasedPointerEXT", 5356, 1, pygen_variable_caps_PhysicalStorageBufferAddressesEXT, 1, pygen_variable_exts_SPV_EXT_physical_storage_buffer, {}, 0xffffffffu, 0xffffffffu},
{"CounterBuffer", 5634, 0, nullptr, 1, pygen_variable_exts_SPV_GOOGLE_hlsl_functionality1, {SPV_OPERAND_TYPE_ID}, SPV_SPIRV_VERSION_WORD(1,4), 0xffffffffu},

View File

@@ -823,6 +823,11 @@ Optimizer::PassToken CreateDescriptorScalarReplacementPass();
// function that has a single OpKill. This allows more code to be inlined.
Optimizer::PassToken CreateWrapOpKillPass();
// Replaces the extensions VK_AMD_shader_ballot,VK_AMD_gcn_shader, and
// VK_AMD_shader_trinary_minmax with equivalent code using core instructions and
// capabilities.
Optimizer::PassToken CreateAmdExtToKhrPass();
} // namespace spvtools
#endif // INCLUDE_SPIRV_TOOLS_OPTIMIZER_HPP_

View File

@@ -69,6 +69,26 @@ class EnumSet {
return *this;
}
friend bool operator==(const EnumSet& a, const EnumSet& b) {
if (a.mask_ != b.mask_) {
return false;
}
if (a.overflow_ == nullptr && b.overflow_ == nullptr) {
return true;
}
if (a.overflow_ == nullptr || b.overflow_ == nullptr) {
return false;
}
return *a.overflow_ == *b.overflow_;
}
friend bool operator!=(const EnumSet& a, const EnumSet& b) {
return !(a == b);
}
// Adds the given enum value to the set. This has no effect if the
// enum value is already in the set.
void Add(EnumType c) { AddWord(ToWord(c)); }

View File

@@ -13,6 +13,7 @@
# limitations under the License.
set(SPIRV_TOOLS_OPT_SOURCES
aggressive_dead_code_elim_pass.h
amd_ext_to_khr.h
basic_block.h
block_merge_pass.h
block_merge_util.h
@@ -117,6 +118,7 @@ set(SPIRV_TOOLS_OPT_SOURCES
wrap_opkill.h
aggressive_dead_code_elim_pass.cpp
amd_ext_to_khr.cpp
basic_block.cpp
block_merge_pass.cpp
block_merge_util.cpp

View File

@@ -664,6 +664,9 @@ Pass::Status AggressiveDCEPass::ProcessImpl() {
// been marked, it is safe to remove dead global values.
modified |= ProcessGlobalValues();
// Sanity check.
assert(to_kill_.size() == 0 || modified);
// Kill all dead instructions.
for (auto inst : to_kill_) {
context()->KillInst(inst);
@@ -846,6 +849,7 @@ bool AggressiveDCEPass::ProcessGlobalValues() {
if (!IsDead(ptr_ty_inst)) continue;
}
to_kill_.push_back(&val);
modified = true;
}
}

View File

@@ -0,0 +1,539 @@
// Copyright (c) 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "source/opt/amd_ext_to_khr.h"
#include "ir_builder.h"
#include "source/opt/ir_context.h"
#include "spv-amd-shader-ballot.insts.inc"
#include "type_manager.h"
namespace spvtools {
namespace opt {
namespace {
enum ExtOpcodes {
AmdShaderBallotSwizzleInvocationsAMD = 1,
AmdShaderBallotSwizzleInvocationsMaskedAMD = 2,
AmdShaderBallotWriteInvocationAMD = 3,
AmdShaderBallotMbcntAMD = 4
};
analysis::Type* GetUIntType(IRContext* ctx) {
analysis::Integer int_type(32, false);
return ctx->get_type_mgr()->GetRegisteredType(&int_type);
}
// Returns a folding rule that will replace the opcode with |opcode| and add
// the capabilities required. The folding rule assumes it is folding an
// OpGroup*NonUniformAMD instruction from the SPV_AMD_shader_ballot extension.
FoldingRule ReplaceGroupNonuniformOperationOpCode(SpvOp new_opcode) {
switch (new_opcode) {
case SpvOpGroupNonUniformIAdd:
case SpvOpGroupNonUniformFAdd:
case SpvOpGroupNonUniformUMin:
case SpvOpGroupNonUniformSMin:
case SpvOpGroupNonUniformFMin:
case SpvOpGroupNonUniformUMax:
case SpvOpGroupNonUniformSMax:
case SpvOpGroupNonUniformFMax:
break;
default:
assert(
false &&
"Should be replacing with a group non uniform arithmetic operation.");
}
return [new_opcode](IRContext* ctx, Instruction* inst,
const std::vector<const analysis::Constant*>&) {
switch (inst->opcode()) {
case SpvOpGroupIAddNonUniformAMD:
case SpvOpGroupFAddNonUniformAMD:
case SpvOpGroupUMinNonUniformAMD:
case SpvOpGroupSMinNonUniformAMD:
case SpvOpGroupFMinNonUniformAMD:
case SpvOpGroupUMaxNonUniformAMD:
case SpvOpGroupSMaxNonUniformAMD:
case SpvOpGroupFMaxNonUniformAMD:
break;
default:
assert(false &&
"Should be replacing a group non uniform arithmetic operation.");
}
ctx->AddCapability(SpvCapabilityGroupNonUniformArithmetic);
inst->SetOpcode(new_opcode);
return true;
};
}
// Returns a folding rule that will replace the SwizzleInvocationsAMD extended
// instruction in the SPV_AMD_shader_ballot extension.
//
// The instruction
//
// %offset = OpConstantComposite %v3uint %x %y %z %w
// %result = OpExtInst %type %1 SwizzleInvocationsAMD %data %offset
//
// is replaced with
//
// potentially new constants and types
//
// clang-format off
// %uint_max = OpConstant %uint 0xFFFFFFFF
// %v4uint = OpTypeVector %uint 4
// %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max
// %null = OpConstantNull %type
// clang-format on
//
// and the following code in the function body
//
// clang-format off
// %id = OpLoad %uint %SubgroupLocalInvocationId
// %quad_idx = OpBitwiseAnd %uint %id %uint_3
// %quad_ldr = OpBitwiseXor %uint %id %quad_idx
// %my_offset = OpVectorExtractDynamic %uint %offset %quad_idx
// %target_inv = OpIAdd %uint %quad_ldr %my_offset
// %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv
// %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv
// %result = OpSelect %type %is_active %shuffle %null
// clang-format on
//
// Also adding the capabilities and builtins that are needed.
FoldingRule ReplaceSwizzleInvocations() {
return [](IRContext* ctx, Instruction* inst,
const std::vector<const analysis::Constant*>&) {
analysis::TypeManager* type_mgr = ctx->get_type_mgr();
analysis::ConstantManager* const_mgr = ctx->get_constant_mgr();
ctx->AddExtension("SPV_KHR_shader_ballot");
ctx->AddCapability(SpvCapabilityGroupNonUniformBallot);
ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle);
InstructionBuilder ir_builder(
ctx, inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
uint32_t data_id = inst->GetSingleWordInOperand(2);
uint32_t offset_id = inst->GetSingleWordInOperand(3);
// Get the subgroup invocation id.
uint32_t var_id =
ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId);
assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable.");
Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id);
Instruction* var_ptr_type =
ctx->get_def_use_mgr()->GetDef(var_inst->type_id());
uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1);
Instruction* id = ir_builder.AddLoad(uint_type_id, var_id);
uint32_t quad_mask = ir_builder.GetUintConstantId(3);
// This gives the offset in the group of 4 of this invocation.
Instruction* quad_idx = ir_builder.AddBinaryOp(
uint_type_id, SpvOpBitwiseAnd, id->result_id(), quad_mask);
// Get the invocation id of the first invocation in the group of 4.
Instruction* quad_ldr = ir_builder.AddBinaryOp(
uint_type_id, SpvOpBitwiseXor, id->result_id(), quad_idx->result_id());
// Get the offset of the target invocation from the offset vector.
Instruction* my_offset =
ir_builder.AddBinaryOp(uint_type_id, SpvOpVectorExtractDynamic,
offset_id, quad_idx->result_id());
// Determine the index of the invocation to read from.
Instruction* target_inv = ir_builder.AddBinaryOp(
uint_type_id, SpvOpIAdd, quad_ldr->result_id(), my_offset->result_id());
// Do the group operations
uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF);
uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup);
const auto* ballot_value_const = const_mgr->GetConstant(
type_mgr->GetUIntVectorType(4),
{uint_max_id, uint_max_id, uint_max_id, uint_max_id});
Instruction* ballot_value =
const_mgr->GetDefiningInstruction(ballot_value_const);
Instruction* is_active = ir_builder.AddNaryOp(
type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract,
{subgroup_scope, ballot_value->result_id(), target_inv->result_id()});
Instruction* shuffle = ir_builder.AddNaryOp(
inst->type_id(), SpvOpGroupNonUniformShuffle,
{subgroup_scope, data_id, target_inv->result_id()});
// Create the null constant to use in the select.
const auto* null = const_mgr->GetConstant(
type_mgr->GetType(inst->type_id()), std::vector<uint32_t>());
Instruction* null_inst = const_mgr->GetDefiningInstruction(null);
// Build the select.
inst->SetOpcode(SpvOpSelect);
Instruction::OperandList new_operands;
new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}});
new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}});
new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}});
inst->SetInOperands(std::move(new_operands));
ctx->UpdateDefUse(inst);
return true;
};
}
// Returns a folding rule that will replace the SwizzleInvocationsMaskedAMD
// extended instruction in the SPV_AMD_shader_ballot extension.
//
// The instruction
//
// %mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z
// %result = OpExtInst %uint %1 SwizzleInvocationsMaskedAMD %data %mask
//
// is replaced with
//
// potentially new constants and types
//
// clang-format off
// %uint_mask_extend = OpConstant %uint 0xFFFFFFE0
// %uint_max = OpConstant %uint 0xFFFFFFFF
// %v4uint = OpTypeVector %uint 4
// %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max
// clang-format on
//
// and the following code in the function body
//
// clang-format off
// %id = OpLoad %uint %SubgroupLocalInvocationId
// %and_mask = OpBitwiseOr %uint %uint_x %uint_mask_extend
// %and = OpBitwiseAnd %uint %id %and_mask
// %or = OpBitwiseOr %uint %and %uint_y
// %target_inv = OpBitwiseXor %uint %or %uint_z
// %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv
// %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv
// %result = OpSelect %type %is_active %shuffle %uint_0
// clang-format on
//
// Also adding the capabilities and builtins that are needed.
FoldingRule ReplaceSwizzleInvocationsMasked() {
return [](IRContext* ctx, Instruction* inst,
const std::vector<const analysis::Constant*>&) {
analysis::TypeManager* type_mgr = ctx->get_type_mgr();
analysis::DefUseManager* def_use_mgr = ctx->get_def_use_mgr();
analysis::ConstantManager* const_mgr = ctx->get_constant_mgr();
// ctx->AddCapability(SpvCapabilitySubgroupBallotKHR);
ctx->AddCapability(SpvCapabilityGroupNonUniformBallot);
ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle);
InstructionBuilder ir_builder(
ctx, inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
// Get the operands to inst, and the components of the mask
uint32_t data_id = inst->GetSingleWordInOperand(2);
Instruction* mask_inst =
def_use_mgr->GetDef(inst->GetSingleWordInOperand(3));
assert(mask_inst->opcode() == SpvOpConstantComposite &&
"The mask is suppose to be a vector constant.");
assert(mask_inst->NumInOperands() == 3 &&
"The mask is suppose to have 3 components.");
uint32_t uint_x = mask_inst->GetSingleWordInOperand(0);
uint32_t uint_y = mask_inst->GetSingleWordInOperand(1);
uint32_t uint_z = mask_inst->GetSingleWordInOperand(2);
// Get the subgroup invocation id.
uint32_t var_id =
ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId);
ctx->AddExtension("SPV_KHR_shader_ballot");
assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable.");
Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id);
Instruction* var_ptr_type =
ctx->get_def_use_mgr()->GetDef(var_inst->type_id());
uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1);
Instruction* id = ir_builder.AddLoad(uint_type_id, var_id);
// Do the bitwise operations.
uint32_t mask_extended = ir_builder.GetUintConstantId(0xFFFFFFE0);
Instruction* and_mask = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseOr,
uint_x, mask_extended);
Instruction* and_result = ir_builder.AddBinaryOp(
uint_type_id, SpvOpBitwiseAnd, id->result_id(), and_mask->result_id());
Instruction* or_result = ir_builder.AddBinaryOp(
uint_type_id, SpvOpBitwiseOr, and_result->result_id(), uint_y);
Instruction* target_inv = ir_builder.AddBinaryOp(
uint_type_id, SpvOpBitwiseXor, or_result->result_id(), uint_z);
// Do the group operations
uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF);
uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup);
const auto* ballot_value_const = const_mgr->GetConstant(
type_mgr->GetUIntVectorType(4),
{uint_max_id, uint_max_id, uint_max_id, uint_max_id});
Instruction* ballot_value =
const_mgr->GetDefiningInstruction(ballot_value_const);
Instruction* is_active = ir_builder.AddNaryOp(
type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract,
{subgroup_scope, ballot_value->result_id(), target_inv->result_id()});
Instruction* shuffle = ir_builder.AddNaryOp(
inst->type_id(), SpvOpGroupNonUniformShuffle,
{subgroup_scope, data_id, target_inv->result_id()});
// Create the null constant to use in the select.
const auto* null = const_mgr->GetConstant(
type_mgr->GetType(inst->type_id()), std::vector<uint32_t>());
Instruction* null_inst = const_mgr->GetDefiningInstruction(null);
// Build the select.
inst->SetOpcode(SpvOpSelect);
Instruction::OperandList new_operands;
new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}});
new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}});
new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}});
inst->SetInOperands(std::move(new_operands));
ctx->UpdateDefUse(inst);
return true;
};
}
// Returns a folding rule that will replace the WriteInvocationAMD extended
// instruction in the SPV_AMD_shader_ballot extension.
//
// The instruction
//
// clang-format off
// %result = OpExtInst %type %1 WriteInvocationAMD %input_value %write_value %invocation_index
// clang-format on
//
// with
//
// %id = OpLoad %uint %SubgroupLocalInvocationId
// %cmp = OpIEqual %bool %id %invocation_index
// %result = OpSelect %type %cmp %write_value %input_value
//
// Also adding the capabilities and builtins that are needed.
FoldingRule ReplaceWriteInvocation() {
return [](IRContext* ctx, Instruction* inst,
const std::vector<const analysis::Constant*>&) {
uint32_t var_id =
ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId);
ctx->AddCapability(SpvCapabilitySubgroupBallotKHR);
ctx->AddExtension("SPV_KHR_shader_ballot");
assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable.");
Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id);
Instruction* var_ptr_type =
ctx->get_def_use_mgr()->GetDef(var_inst->type_id());
InstructionBuilder ir_builder(
ctx, inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
Instruction* t =
ir_builder.AddLoad(var_ptr_type->GetSingleWordInOperand(1), var_id);
analysis::Bool bool_type;
uint32_t bool_type_id = ctx->get_type_mgr()->GetTypeInstruction(&bool_type);
Instruction* cmp =
ir_builder.AddBinaryOp(bool_type_id, SpvOpIEqual, t->result_id(),
inst->GetSingleWordInOperand(4));
// Build a select.
inst->SetOpcode(SpvOpSelect);
Instruction::OperandList new_operands;
new_operands.push_back({SPV_OPERAND_TYPE_ID, {cmp->result_id()}});
new_operands.push_back(inst->GetInOperand(3));
new_operands.push_back(inst->GetInOperand(2));
inst->SetInOperands(std::move(new_operands));
ctx->UpdateDefUse(inst);
return true;
};
}
// Returns a folding rule that will replace the MbcntAMD extended instruction in
// the SPV_AMD_shader_ballot extension.
//
// The instruction
//
// %result = OpExtInst %uint %1 MbcntAMD %mask
//
// with
//
// Get SubgroupLtMask and convert the first 64-bits into a uint64_t because
// AMD's shader compiler expects a 64-bit integer mask.
//
// %var = OpLoad %v4uint %SubgroupLtMaskKHR
// %shuffle = OpVectorShuffle %v2uint %var %var 0 1
// %cast = OpBitcast %ulong %shuffle
//
// Perform the mask and count the bits.
//
// %and = OpBitwiseAnd %ulong %cast %mask
// %result = OpBitCount %uint %and
//
// Also adding the capabilities and builtins that are needed.
FoldingRule ReplaceMbcnt() {
return [](IRContext* context, Instruction* inst,
const std::vector<const analysis::Constant*>&) {
analysis::TypeManager* type_mgr = context->get_type_mgr();
analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr();
uint32_t var_id = context->GetBuiltinInputVarId(SpvBuiltInSubgroupLtMask);
assert(var_id != 0 && "Could not get SubgroupLtMask variable.");
context->AddCapability(SpvCapabilityGroupNonUniformBallot);
Instruction* var_inst = def_use_mgr->GetDef(var_id);
Instruction* var_ptr_type = def_use_mgr->GetDef(var_inst->type_id());
Instruction* var_type =
def_use_mgr->GetDef(var_ptr_type->GetSingleWordInOperand(1));
assert(var_type->opcode() == SpvOpTypeVector &&
"Variable is suppose to be a vector of 4 ints");
// Get the type for the shuffle.
analysis::Vector temp_type(GetUIntType(context), 2);
const analysis::Type* shuffle_type =
context->get_type_mgr()->GetRegisteredType(&temp_type);
uint32_t shuffle_type_id = type_mgr->GetTypeInstruction(shuffle_type);
uint32_t mask_id = inst->GetSingleWordInOperand(2);
Instruction* mask_inst = def_use_mgr->GetDef(mask_id);
// Testing with amd's shader compiler shows that a 64-bit mask is expected.
assert(type_mgr->GetType(mask_inst->type_id())->AsInteger() != nullptr);
assert(type_mgr->GetType(mask_inst->type_id())->AsInteger()->width() == 64);
InstructionBuilder ir_builder(
context, inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
Instruction* load = ir_builder.AddLoad(var_type->result_id(), var_id);
Instruction* shuffle = ir_builder.AddVectorShuffle(
shuffle_type_id, load->result_id(), load->result_id(), {0, 1});
Instruction* bitcast = ir_builder.AddUnaryOp(
mask_inst->type_id(), SpvOpBitcast, shuffle->result_id());
Instruction* t = ir_builder.AddBinaryOp(
mask_inst->type_id(), SpvOpBitwiseAnd, bitcast->result_id(), mask_id);
inst->SetOpcode(SpvOpBitCount);
inst->SetInOperands({{SPV_OPERAND_TYPE_ID, {t->result_id()}}});
context->UpdateDefUse(inst);
return true;
};
}
class AmdExtFoldingRules : public FoldingRules {
public:
explicit AmdExtFoldingRules(IRContext* ctx) : FoldingRules(ctx) {}
protected:
virtual void AddFoldingRules() override {
rules_[SpvOpGroupIAddNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformIAdd));
rules_[SpvOpGroupFAddNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFAdd));
rules_[SpvOpGroupUMinNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformUMin));
rules_[SpvOpGroupSMinNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformSMin));
rules_[SpvOpGroupFMinNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFMin));
rules_[SpvOpGroupUMaxNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformUMax));
rules_[SpvOpGroupSMaxNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformSMax));
rules_[SpvOpGroupFMaxNonUniformAMD].push_back(
ReplaceGroupNonuniformOperationOpCode(SpvOpGroupNonUniformFMax));
uint32_t extension_id =
context()->module()->GetExtInstImportId("SPV_AMD_shader_ballot");
ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsAMD}].push_back(
ReplaceSwizzleInvocations());
ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsMaskedAMD}]
.push_back(ReplaceSwizzleInvocationsMasked());
ext_rules_[{extension_id, AmdShaderBallotWriteInvocationAMD}].push_back(
ReplaceWriteInvocation());
ext_rules_[{extension_id, AmdShaderBallotMbcntAMD}].push_back(
ReplaceMbcnt());
}
};
class AmdExtConstFoldingRules : public ConstantFoldingRules {
public:
AmdExtConstFoldingRules(IRContext* ctx) : ConstantFoldingRules(ctx) {}
protected:
virtual void AddFoldingRules() override {}
};
} // namespace
Pass::Status AmdExtensionToKhrPass::Process() {
bool changed = false;
// Traverse the body of the functions to replace instructions that require
// the extensions.
InstructionFolder folder(
context(),
std::unique_ptr<AmdExtFoldingRules>(new AmdExtFoldingRules(context())),
MakeUnique<AmdExtConstFoldingRules>(context()));
for (Function& func : *get_module()) {
func.ForEachInst([&changed, &folder](Instruction* inst) {
if (folder.FoldInstruction(inst)) {
changed = true;
}
});
}
// Now that instruction that require the extensions have been removed, we can
// remove the extension instructions.
std::vector<Instruction*> to_be_killed;
for (Instruction& inst : context()->module()->extensions()) {
if (inst.opcode() == SpvOpExtension) {
if (!strcmp("SPV_AMD_shader_ballot",
reinterpret_cast<const char*>(
&(inst.GetInOperand(0).words[0])))) {
to_be_killed.push_back(&inst);
}
}
}
for (Instruction& inst : context()->ext_inst_imports()) {
if (inst.opcode() == SpvOpExtInstImport) {
if (!strcmp("SPV_AMD_shader_ballot",
reinterpret_cast<const char*>(
&(inst.GetInOperand(0).words[0])))) {
to_be_killed.push_back(&inst);
}
}
}
for (Instruction* inst : to_be_killed) {
context()->KillInst(inst);
changed = true;
}
// The replacements that take place use instructions that are missing before
// SPIR-V 1.3. If we changed something, we will have to make sure the version
// is at least SPIR-V 1.3 to make sure those instruction can be used.
if (changed) {
uint32_t version = get_module()->version();
if (version < 0x00010300 /*1.3*/) {
get_module()->set_version(0x00010300);
}
}
return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}
} // namespace opt
} // namespace spvtools

View File

@@ -0,0 +1,51 @@
// Copyright (c) 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_AMD_EXT_TO_KHR_H_
#define SOURCE_OPT_AMD_EXT_TO_KHR_H_
#include "source/opt/ir_context.h"
#include "source/opt/module.h"
#include "source/opt/pass.h"
namespace spvtools {
namespace opt {
// Replaces the extensions VK_AMD_shader_ballot, VK_AMD_gcn_shader, and
// VK_AMD_shader_trinary_minmax with equivalant code using core instructions and
// capabilities.
class AmdExtensionToKhrPass : public Pass {
public:
const char* name() const override { return "amd-ext-to-khr"; }
Status Process() override;
IRContext::Analysis GetPreservedAnalyses() override {
return IRContext::kAnalysisInstrToBlockMapping |
IRContext::kAnalysisDecorations | IRContext::kAnalysisCombinators |
IRContext::kAnalysisCFG | IRContext::kAnalysisDominatorAnalysis |
IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisNameMap |
IRContext::kAnalysisScalarEvolution |
IRContext::kAnalysisRegisterPressure |
IRContext::kAnalysisValueNumberTable |
IRContext::kAnalysisStructuredCFG |
IRContext::kAnalysisBuiltinVarId |
IRContext::kAnalysisIdToFuncMapping | IRContext::kAnalysisTypes |
IRContext::kAnalysisDefUse | IRContext::kAnalysisConstants;
}
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_AMD_EXT_TO_KHR_H_

View File

@@ -809,9 +809,62 @@ ConstantFoldingRule FoldFClampFeedingCompare(uint32_t cmp_opcode) {
};
}
ConstantFoldingRule FoldFMix() {
return [](IRContext* context, Instruction* inst,
const std::vector<const analysis::Constant*>& constants)
-> const analysis::Constant* {
analysis::ConstantManager* const_mgr = context->get_constant_mgr();
assert(inst->opcode() == SpvOpExtInst &&
"Expecting an extended instruction.");
assert(inst->GetSingleWordInOperand(0) ==
context->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
"Expecting a GLSLstd450 extended instruction.");
assert(inst->GetSingleWordInOperand(1) == GLSLstd450FMix &&
"Expecting and FMix instruction.");
if (!inst->IsFloatingPointFoldingAllowed()) {
return nullptr;
}
// Make sure all FMix operands are constants.
for (uint32_t i = 1; i < 4; i++) {
if (constants[i] == nullptr) {
return nullptr;
}
}
const analysis::Constant* one;
if (constants[1]->type()->AsFloat()->width() == 32) {
one = const_mgr->GetConstant(constants[1]->type(),
utils::FloatProxy<float>(1.0f).GetWords());
} else {
one = const_mgr->GetConstant(constants[1]->type(),
utils::FloatProxy<double>(1.0).GetWords());
}
const analysis::Constant* temp1 =
FOLD_FPARITH_OP(-)(constants[1]->type(), one, constants[3], const_mgr);
if (temp1 == nullptr) {
return nullptr;
}
const analysis::Constant* temp2 = FOLD_FPARITH_OP(*)(
constants[1]->type(), constants[1], temp1, const_mgr);
if (temp2 == nullptr) {
return nullptr;
}
const analysis::Constant* temp3 = FOLD_FPARITH_OP(*)(
constants[2]->type(), constants[2], constants[3], const_mgr);
if (temp3 == nullptr) {
return nullptr;
}
return FOLD_FPARITH_OP(+)(temp2->type(), temp2, temp3, const_mgr);
};
}
} // namespace
ConstantFoldingRules::ConstantFoldingRules() {
void ConstantFoldingRules::AddFoldingRules() {
// Add all folding rules to the list for the opcodes to which they apply.
// Note that the order in which rules are added to the list matters. If a rule
// applies to the instruction, the rest of the rules will not be attempted.
@@ -877,6 +930,14 @@ ConstantFoldingRules::ConstantFoldingRules() {
rules_[SpvOpFNegate].push_back(FoldFNegate());
rules_[SpvOpQuantizeToF16].push_back(FoldQuantizeToF16());
// Add rules for GLSLstd450
FeatureManager* feature_manager = context_->get_feature_mgr();
uint32_t ext_inst_glslstd450_id =
feature_manager->GetExtInstImportId_GLSLstd450();
if (ext_inst_glslstd450_id != 0) {
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMix}].push_back(FoldFMix());
}
}
} // namespace opt
} // namespace spvtools

View File

@@ -53,24 +53,74 @@ using ConstantFoldingRule = std::function<const analysis::Constant*(
const std::vector<const analysis::Constant*>& constants)>;
class ConstantFoldingRules {
protected:
// The |Key| and |Value| structs are used to by-pass a "decorated name length
// exceeded, name was truncated" warning on VS2013 and VS2015.
struct Key {
uint32_t instruction_set;
uint32_t opcode;
};
friend bool operator<(const Key& a, const Key& b) {
if (a.instruction_set < b.instruction_set) {
return true;
}
if (a.instruction_set > b.instruction_set) {
return false;
}
return a.opcode < b.opcode;
}
struct Value {
std::vector<ConstantFoldingRule> value;
void push_back(ConstantFoldingRule rule) { value.push_back(rule); }
};
public:
ConstantFoldingRules();
ConstantFoldingRules(IRContext* ctx) : context_(ctx) {}
virtual ~ConstantFoldingRules() = default;
// Returns true if there is at least 1 folding rule for |opcode|.
bool HasFoldingRule(SpvOp opcode) const { return rules_.count(opcode); }
bool HasFoldingRule(const Instruction* inst) const {
return !GetRulesForInstruction(inst).empty();
}
// Returns an vector of constant folding rules for |opcode|.
const std::vector<ConstantFoldingRule>& GetRulesForOpcode(
SpvOp opcode) const {
auto it = rules_.find(opcode);
if (it != rules_.end()) {
return it->second;
// Returns true if there is at least 1 folding rule for |inst|.
const std::vector<ConstantFoldingRule>& GetRulesForInstruction(
const Instruction* inst) const {
if (inst->opcode() != SpvOpExtInst) {
auto it = rules_.find(inst->opcode());
if (it != rules_.end()) {
return it->second.value;
}
} else {
uint32_t ext_inst_id = inst->GetSingleWordInOperand(0);
uint32_t ext_opcode = inst->GetSingleWordInOperand(1);
auto it = ext_rules_.find({ext_inst_id, ext_opcode});
if (it != ext_rules_.end()) {
return it->second.value;
}
}
return empty_vector_;
}
// Add the folding rules.
virtual void AddFoldingRules();
protected:
// |rules[opcode]| is the set of rules that can be applied to instructions
// with |opcode| as the opcode.
std::unordered_map<uint32_t, Value> rules_;
// The folding rules for extended instructions.
std::map<Key, Value> ext_rules_;
private:
std::unordered_map<uint32_t, std::vector<ConstantFoldingRule>> rules_;
// The context that the instruction to be folded will be a part of.
IRContext* context_;
// The empty set of rules to be used as the default return value in
// |GetRulesForInstruction|.
std::vector<ConstantFoldingRule> empty_vector_;
};

View File

@@ -31,12 +31,19 @@ void FeatureManager::Analyze(Module* module) {
void FeatureManager::AddExtensions(Module* module) {
for (auto ext : module->extensions()) {
const std::string name =
reinterpret_cast<const char*>(ext.GetInOperand(0u).words.data());
Extension extension;
if (GetExtensionFromString(name.c_str(), &extension)) {
extensions_.Add(extension);
}
AddExtension(&ext);
}
}
void FeatureManager::AddExtension(Instruction* ext) {
assert(ext->opcode() == SpvOpExtension &&
"Expecting an extension instruction.");
const std::string name =
reinterpret_cast<const char*>(ext->GetInOperand(0u).words.data());
Extension extension;
if (GetExtensionFromString(name.c_str(), &extension)) {
extensions_.Add(extension);
}
}
@@ -63,5 +70,27 @@ void FeatureManager::AddExtInstImportIds(Module* module) {
extinst_importid_GLSLstd450_ = module->GetExtInstImportId("GLSL.std.450");
}
bool operator==(const FeatureManager& a, const FeatureManager& b) {
// We check that the addresses of the grammars are the same because they
// are large objects, and this is faster. It can be changed if needed as a
// later time.
if (&a.grammar_ != &b.grammar_) {
return false;
}
if (a.capabilities_ != b.capabilities_) {
return false;
}
if (a.extensions_ != b.extensions_) {
return false;
}
if (a.extinst_importid_GLSLstd450_ != b.extinst_importid_GLSLstd450_) {
return false;
}
return true;
}
} // namespace opt
} // namespace spvtools

View File

@@ -45,14 +45,22 @@ class FeatureManager {
return extinst_importid_GLSLstd450_;
}
private:
// Analyzes |module| and records enabled extensions.
void AddExtensions(Module* module);
friend bool operator==(const FeatureManager& a, const FeatureManager& b);
friend bool operator!=(const FeatureManager& a, const FeatureManager& b) {
return !(a == b);
}
// Adds the given |capability| and all implied capabilities into the current
// FeatureManager.
void AddCapability(SpvCapability capability);
// Add the extension |ext| to the feature manager.
void AddExtension(Instruction* ext);
private:
// Analyzes |module| and records enabled extensions.
void AddExtensions(Module* module);
// Analyzes |module| and records enabled capabilities.
void AddCapabilities(Module* module);

View File

@@ -234,13 +234,12 @@ bool InstructionFolder::FoldInstructionInternal(Instruction* inst) const {
return true;
}
SpvOp opcode = inst->opcode();
analysis::ConstantManager* const_manager = context_->get_constant_mgr();
std::vector<const analysis::Constant*> constants =
const_manager->GetOperandConstants(inst);
for (const FoldingRule& rule : GetFoldingRules().GetRulesForOpcode(opcode)) {
for (const FoldingRule& rule :
GetFoldingRules().GetRulesForInstruction(inst)) {
if (rule(context_, inst, constants)) {
return true;
}
@@ -623,7 +622,7 @@ Instruction* InstructionFolder::FoldInstructionToConstant(
analysis::ConstantManager* const_mgr = context_->get_constant_mgr();
if (!inst->IsFoldableByFoldScalar() &&
!GetConstantFoldingRules().HasFoldingRule(inst->opcode())) {
!GetConstantFoldingRules().HasFoldingRule(inst)) {
return nullptr;
}
// Collect the values of the constant parameters.
@@ -641,19 +640,16 @@ Instruction* InstructionFolder::FoldInstructionToConstant(
}
});
if (GetConstantFoldingRules().HasFoldingRule(inst->opcode())) {
const analysis::Constant* folded_const = nullptr;
for (auto rule :
GetConstantFoldingRules().GetRulesForOpcode(inst->opcode())) {
folded_const = rule(context_, inst, constants);
if (folded_const != nullptr) {
Instruction* const_inst =
const_mgr->GetDefiningInstruction(folded_const, inst->type_id());
assert(const_inst->type_id() == inst->type_id());
// May be a new instruction that needs to be analysed.
context_->UpdateDefUse(const_inst);
return const_inst;
}
const analysis::Constant* folded_const = nullptr;
for (auto rule : GetConstantFoldingRules().GetRulesForInstruction(inst)) {
folded_const = rule(context_, inst, constants);
if (folded_const != nullptr) {
Instruction* const_inst =
const_mgr->GetDefiningInstruction(folded_const, inst->type_id());
assert(const_inst->type_id() == inst->type_id());
// May be a new instruction that needs to be analysed.
context_->UpdateDefUse(const_inst);
return const_inst;
}
}

View File

@@ -28,7 +28,23 @@ namespace opt {
class InstructionFolder {
public:
explicit InstructionFolder(IRContext* context) : context_(context) {}
explicit InstructionFolder(IRContext* context)
: context_(context),
const_folding_rules_(new ConstantFoldingRules(context)),
folding_rules_(new FoldingRules(context)) {
folding_rules_->AddFoldingRules();
const_folding_rules_->AddFoldingRules();
}
explicit InstructionFolder(
IRContext* context, std::unique_ptr<FoldingRules>&& folding_rules,
std::unique_ptr<ConstantFoldingRules>&& constant_folding_rules)
: context_(context),
const_folding_rules_(std::move(constant_folding_rules)),
folding_rules_(std::move(folding_rules)) {
folding_rules_->AddFoldingRules();
const_folding_rules_->AddFoldingRules();
}
// Returns the result of folding a scalar instruction with the given |opcode|
// and |operands|. Each entry in |operands| is a pointer to an
@@ -95,18 +111,18 @@ class InstructionFolder {
bool FoldInstruction(Instruction* inst) const;
// Return true if this opcode has a const folding rule associtated with it.
bool HasConstFoldingRule(SpvOp opcode) const {
return GetConstantFoldingRules().HasFoldingRule(opcode);
bool HasConstFoldingRule(const Instruction* inst) const {
return GetConstantFoldingRules().HasFoldingRule(inst);
}
private:
// Returns a reference to the ConstnatFoldingRules instance.
const ConstantFoldingRules& GetConstantFoldingRules() const {
return const_folding_rules;
return *const_folding_rules_;
}
// Returns a reference to the FoldingRules instance.
const FoldingRules& GetFoldingRules() const { return folding_rules; }
const FoldingRules& GetFoldingRules() const { return *folding_rules_; }
// Returns the single-word result from performing the given unary operation on
// the operand value which is passed in as a 32-bit word.
@@ -159,10 +175,10 @@ class InstructionFolder {
IRContext* context_;
// Folding rules used by |FoldInstructionToConstant| and |FoldInstruction|.
ConstantFoldingRules const_folding_rules;
std::unique_ptr<ConstantFoldingRules> const_folding_rules_;
// Folding rules used by |FoldInstruction|.
FoldingRules folding_rules;
std::unique_ptr<FoldingRules> folding_rules_;
};
} // namespace opt

View File

@@ -2200,7 +2200,7 @@ FoldingRule RemoveRedundantOperands() {
} // namespace
FoldingRules::FoldingRules() {
void FoldingRules::AddFoldingRules() {
// Add all folding rules to the list for the opcodes to which they apply.
// Note that the order in which rules are added to the list matters. If a rule
// applies to the instruction, the rest of the rules will not be attempted.
@@ -2216,8 +2216,6 @@ FoldingRules::FoldingRules() {
rules_[SpvOpEntryPoint].push_back(RemoveRedundantOperands());
rules_[SpvOpExtInst].push_back(RedundantFMix());
rules_[SpvOpFAdd].push_back(RedundantFAdd());
rules_[SpvOpFAdd].push_back(MergeAddNegateArithmetic());
rules_[SpvOpFAdd].push_back(MergeAddAddArithmetic());
@@ -2271,6 +2269,15 @@ FoldingRules::FoldingRules() {
rules_[SpvOpUDiv].push_back(MergeDivNegateArithmetic());
rules_[SpvOpVectorShuffle].push_back(VectorShuffleFeedingShuffle());
FeatureManager* feature_manager = context_->get_feature_mgr();
// Add rules for GLSLstd450
uint32_t ext_inst_glslstd450_id =
feature_manager->GetExtInstImportId_GLSLstd450();
if (ext_inst_glslstd450_id != 0) {
ext_rules_[{ext_inst_glslstd450_id, GLSLstd450FMix}].push_back(
RedundantFMix());
}
}
} // namespace opt
} // namespace spvtools

View File

@@ -58,19 +58,58 @@ using FoldingRule = std::function<bool(
class FoldingRules {
public:
FoldingRules();
using FoldingRuleSet = std::vector<FoldingRule>;
const std::vector<FoldingRule>& GetRulesForOpcode(SpvOp opcode) const {
auto it = rules_.find(opcode);
if (it != rules_.end()) {
return it->second;
explicit FoldingRules(IRContext* ctx) : context_(ctx) {}
virtual ~FoldingRules() = default;
const FoldingRuleSet& GetRulesForInstruction(Instruction* inst) const {
if (inst->opcode() != SpvOpExtInst) {
auto it = rules_.find(inst->opcode());
if (it != rules_.end()) {
return it->second;
}
} else {
uint32_t ext_inst_id = inst->GetSingleWordInOperand(0);
uint32_t ext_opcode = inst->GetSingleWordInOperand(1);
auto it = ext_rules_.find({ext_inst_id, ext_opcode});
if (it != ext_rules_.end()) {
return it->second;
}
}
return empty_vector_;
}
IRContext* context() { return context_; }
// Adds the folding rules for the object.
virtual void AddFoldingRules();
protected:
// The folding rules for core instructions.
std::unordered_map<uint32_t, FoldingRuleSet> rules_;
// The folding rules for extended instructions.
struct Key {
uint32_t instruction_set;
uint32_t opcode;
};
friend bool operator<(const Key& a, const Key& b) {
if (a.instruction_set < b.instruction_set) {
return true;
}
if (a.instruction_set > b.instruction_set) {
return false;
}
return a.opcode < b.opcode;
}
std::map<Key, FoldingRuleSet> ext_rules_;
private:
std::unordered_map<uint32_t, std::vector<FoldingRule>> rules_;
std::vector<FoldingRule> empty_vector_;
IRContext* context_;
FoldingRuleSet empty_vector_;
};
} // namespace opt

View File

@@ -341,7 +341,7 @@ uint32_t InstBuffAddrCheckPass::GenSearchAndTest(Instruction* ref_inst,
std::initializer_list<Operand>{
{SPV_OPERAND_TYPE_CAPABILITY, {SpvCapabilityInt64}}}));
get_def_use_mgr()->AnalyzeInstDefUse(&*cap_int64_inst);
get_module()->AddCapability(std::move(cap_int64_inst));
context()->AddCapability(std::move(cap_int64_inst));
}
// Convert reference pointer to uint64
uint32_t ref_ptr_id = ref_inst->GetSingleWordInOperand(0);

View File

@@ -469,7 +469,7 @@ bool Instruction::IsOpaqueType() const {
bool Instruction::IsFoldable() const {
return IsFoldableByFoldScalar() ||
context()->get_instruction_folder().HasConstFoldingRule(opcode());
context()->get_instruction_folder().HasConstFoldingRule(this);
}
bool Instruction::IsFoldableByFoldScalar() const {

View File

@@ -451,15 +451,7 @@ analysis::Type* InstrumentPass::GetUintRuntimeArrayType(uint32_t width) {
void InstrumentPass::AddStorageBufferExt() {
if (storage_buffer_ext_defined_) return;
if (!get_feature_mgr()->HasExtension(kSPV_KHR_storage_buffer_storage_class)) {
const std::string ext_name("SPV_KHR_storage_buffer_storage_class");
const auto num_chars = ext_name.size();
// Compute num words, accommodate the terminating null character.
const auto num_words = (num_chars + 1 + 3) / 4;
std::vector<uint32_t> ext_words(num_words, 0u);
std::memcpy(ext_words.data(), ext_name.data(), num_chars);
context()->AddExtension(std::unique_ptr<Instruction>(
new Instruction(context(), SpvOpExtension, 0u, 0u,
{{SPV_OPERAND_TYPE_LITERAL_STRING, ext_words}})));
context()->AddExtension("SPV_KHR_storage_buffer_storage_class");
}
storage_buffer_ext_defined_ = true;
}

View File

@@ -482,6 +482,26 @@ class InstructionBuilder {
return AddInstruction(std::move(new_inst));
}
Instruction* AddVectorShuffle(uint32_t result_type, uint32_t vec1,
uint32_t vec2,
const std::vector<uint32_t>& components) {
std::vector<Operand> operands;
operands.push_back({SPV_OPERAND_TYPE_ID, {vec1}});
operands.push_back({SPV_OPERAND_TYPE_ID, {vec2}});
for (uint32_t id : components) {
operands.push_back({SPV_OPERAND_TYPE_LITERAL_INTEGER, {id}});
}
uint32_t result_id = GetContext()->TakeNextId();
if (result_id == 0) {
return nullptr;
}
std::unique_ptr<Instruction> new_inst(new Instruction(
GetContext(), SpvOpVectorShuffle, result_type, result_id, operands));
return AddInstruction(std::move(new_inst));
}
// Inserts the new instruction before the insertion point.
Instruction* AddInstruction(std::unique_ptr<Instruction>&& insn) {
Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn));

View File

@@ -156,14 +156,20 @@ Instruction* IRContext::KillInst(Instruction* inst) {
decoration_mgr_->RemoveDecoration(inst);
}
}
if (type_mgr_ && IsTypeInst(inst->opcode())) {
type_mgr_->RemoveId(inst->result_id());
}
if (constant_mgr_ && IsConstantInst(inst->opcode())) {
constant_mgr_->RemoveId(inst->result_id());
}
if (inst->opcode() == SpvOpCapability || inst->opcode() == SpvOpExtension) {
// We reset the feature manager, instead of updating it, because it is just
// as much work. We would have to remove all capabilities implied by this
// capability that are not also implied by the remaining OpCapability
// instructions. We could update extensions, but we will see if it is
// needed.
ResetFeatureManager();
}
RemoveFromIdToName(inst);
@@ -252,7 +258,6 @@ bool IRContext::IsConsistent() {
#ifndef SPIRV_CHECK_CONTEXT
return true;
#endif
if (AreAnalysesValid(kAnalysisDefUse)) {
analysis::DefUseManager new_def_use(module());
if (*get_def_use_mgr() != new_def_use) {
@@ -286,6 +291,15 @@ bool IRContext::IsConsistent() {
return false;
}
}
if (feature_mgr_ != nullptr) {
FeatureManager current(grammar_);
current.Analyze(module());
if (current != *feature_mgr_) {
return false;
}
}
return true;
}
@@ -687,7 +701,8 @@ uint32_t IRContext::GetBuiltinInputVarId(uint32_t builtin) {
case SpvBuiltInVertexIndex:
case SpvBuiltInInstanceIndex:
case SpvBuiltInPrimitiveId:
case SpvBuiltInInvocationId: {
case SpvBuiltInInvocationId:
case SpvBuiltInSubgroupLocalInvocationId: {
analysis::Integer uint_ty(32, false);
reg_type = type_mgr->GetRegisteredType(&uint_ty);
break;
@@ -707,6 +722,13 @@ uint32_t IRContext::GetBuiltinInputVarId(uint32_t builtin) {
reg_type = type_mgr->GetRegisteredType(&v3float_ty);
break;
}
case SpvBuiltInSubgroupLtMask: {
analysis::Integer uint_ty(32, false);
analysis::Type* reg_uint_ty = type_mgr->GetRegisteredType(&uint_ty);
analysis::Vector v4uint_ty(reg_uint_ty, 4);
reg_type = type_mgr->GetRegisteredType(&v4uint_ty);
break;
}
default: {
assert(false && "unhandled builtin");
return 0;

View File

@@ -190,9 +190,13 @@ class IRContext {
// Clears all debug instructions (excluding OpLine & OpNoLine).
inline void debug_clear();
// Add |capability| to the module, if it is not already enabled.
inline void AddCapability(SpvCapability capability);
// Appends a capability instruction to this module.
inline void AddCapability(std::unique_ptr<Instruction>&& c);
// Appends an extension instruction to this module.
inline void AddExtension(const std::string& ext_name);
inline void AddExtension(std::unique_ptr<Instruction>&& e);
// Appends an extended instruction set instruction to this module.
inline void AddExtInstImport(std::unique_ptr<Instruction>&& e);
@@ -487,6 +491,8 @@ class IRContext {
return feature_mgr_.get();
}
void ResetFeatureManager() { feature_mgr_.reset(nullptr); }
// Returns the grammar for this context.
const AssemblyGrammar& grammar() const { return grammar_; }
@@ -923,15 +929,45 @@ IteratorRange<Module::const_inst_iterator> IRContext::debugs3() const {
void IRContext::debug_clear() { module_->debug_clear(); }
void IRContext::AddCapability(SpvCapability capability) {
if (!get_feature_mgr()->HasCapability(capability)) {
std::unique_ptr<Instruction> capability_inst(new Instruction(
this, SpvOpCapability, 0, 0,
{{SPV_OPERAND_TYPE_CAPABILITY, {static_cast<uint32_t>(capability)}}}));
AddCapability(std::move(capability_inst));
}
}
void IRContext::AddCapability(std::unique_ptr<Instruction>&& c) {
AddCombinatorsForCapability(c->GetSingleWordInOperand(0));
if (feature_mgr_ != nullptr) {
feature_mgr_->AddCapability(
static_cast<SpvCapability>(c->GetSingleWordInOperand(0)));
}
if (AreAnalysesValid(kAnalysisDefUse)) {
get_def_use_mgr()->AnalyzeInstDefUse(c.get());
}
module()->AddCapability(std::move(c));
}
void IRContext::AddExtension(const std::string& ext_name) {
const auto num_chars = ext_name.size();
// Compute num words, accommodate the terminating null character.
const auto num_words = (num_chars + 1 + 3) / 4;
std::vector<uint32_t> ext_words(num_words, 0u);
std::memcpy(ext_words.data(), ext_name.data(), num_chars);
AddExtension(std::unique_ptr<Instruction>(
new Instruction(this, SpvOpExtension, 0u, 0u,
{{SPV_OPERAND_TYPE_LITERAL_STRING, ext_words}})));
}
void IRContext::AddExtension(std::unique_ptr<Instruction>&& e) {
if (AreAnalysesValid(kAnalysisDefUse)) {
get_def_use_mgr()->AnalyzeInstDefUse(e.get());
}
if (feature_mgr_ != nullptr) {
feature_mgr_->AddExtension(&*e);
}
module()->AddExtension(std::move(e));
}

View File

@@ -133,6 +133,8 @@ class Module {
inline uint32_t version() const { return header_.version; }
inline void set_version(uint32_t v) { header_.version = v; }
// Iterators for capabilities instructions contained in this module.
inline inst_iterator capability_begin();
inline inst_iterator capability_end();

View File

@@ -14,6 +14,7 @@
#include "spirv-tools/optimizer.hpp"
#include <cassert>
#include <memory>
#include <string>
#include <unordered_map>
@@ -492,6 +493,8 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag) {
RegisterPass(CreateGraphicsRobustAccessPass());
} else if (pass_name == "wrap-opkill") {
RegisterPass(CreateWrapOpKillPass());
} else if (pass_name == "amd-ext-to-khr") {
RegisterPass(CreateAmdExtToKhrPass());
} else {
Errorf(consumer(), nullptr, {},
"Unknown flag '--%s'. Use --help for a list of valid flags",
@@ -549,26 +552,25 @@ bool Optimizer::Run(const uint32_t* original_binary,
impl_->pass_manager.SetTargetEnv(impl_->target_env);
auto status = impl_->pass_manager.Run(context.get());
bool binary_changed = false;
if (status == opt::Pass::Status::SuccessWithChange) {
binary_changed = true;
} else if (status == opt::Pass::Status::SuccessWithoutChange) {
if (optimized_binary->size() != original_binary_size ||
(memcmp(optimized_binary->data(), original_binary,
original_binary_size) != 0)) {
binary_changed = true;
Log(consumer(), SPV_MSG_WARNING, nullptr, {},
"Binary unexpectedly changed despite optimizer saying there was no "
"change");
}
if (status == opt::Pass::Status::Failure) {
return false;
}
if (binary_changed) {
optimized_binary->clear();
context->module()->ToBinary(optimized_binary, /* skip_nop = */ true);
}
optimized_binary->clear();
context->module()->ToBinary(optimized_binary, /* skip_nop = */ true);
return status != opt::Pass::Status::Failure;
#ifndef NDEBUG
if (status == opt::Pass::Status::SuccessWithoutChange) {
auto changed = optimized_binary->size() != original_binary_size ||
memcmp(optimized_binary->data(), original_binary,
original_binary_size) != 0;
assert(!changed &&
"Binary unexpectedly changed despite optimizer saying there was no "
"change");
}
#endif // !NDEBUG
return true;
}
Optimizer& Optimizer::SetPrintAll(std::ostream* out) {
@@ -919,4 +921,9 @@ Optimizer::PassToken CreateWrapOpKillPass() {
return MakeUnique<Optimizer::PassToken::Impl>(MakeUnique<opt::WrapOpKill>());
}
Optimizer::PassToken CreateAmdExtToKhrPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::AmdExtensionToKhrPass>());
}
} // namespace spvtools

View File

@@ -26,6 +26,7 @@
#include "source/opt/ir_context.h"
#include "source/opt/module.h"
#include "spirv-tools/libspirv.hpp"
#include "types.h"
namespace spvtools {
namespace opt {

View File

@@ -18,6 +18,7 @@
// A single header to include all passes.
#include "source/opt/aggressive_dead_code_elim_pass.h"
#include "source/opt/amd_ext_to_khr.h"
#include "source/opt/block_merge_pass.h"
#include "source/opt/ccp_pass.h"
#include "source/opt/cfg_cleanup_pass.h"

View File

@@ -49,7 +49,7 @@ bool SimplificationPass::SimplifyFunction(Function* function) {
cfg()->ForEachBlockInReversePostOrder(
function->entry().get(),
[&modified, &process_phis, &work_list, &in_work_list, &inst_to_kill,
folder, this](BasicBlock* bb) {
&folder, this](BasicBlock* bb) {
for (Instruction* inst = &*bb->begin(); inst; inst = inst->NextNode()) {
if (inst->opcode() == SpvOpPhi) {
process_phis.insert(inst);

View File

@@ -139,6 +139,61 @@ class TypeManager {
const Type* GetMemberType(const Type* parent_type,
const std::vector<uint32_t>& access_chain);
Type* GetUIntType() {
Integer int_type(32, false);
return GetRegisteredType(&int_type);
}
uint32_t GetUIntTypeId() { return GetTypeInstruction(GetUIntType()); }
Type* GetSIntType() {
Integer int_type(32, true);
return GetRegisteredType(&int_type);
}
uint32_t GetSIntTypeId() { return GetTypeInstruction(GetSIntType()); }
Type* GetFloatType() {
Float float_type(32);
return GetRegisteredType(&float_type);
}
uint32_t GetFloatTypeId() { return GetTypeInstruction(GetFloatType()); }
Type* GetUIntVectorType(uint32_t size) {
Vector vec_type(GetUIntType(), size);
return GetRegisteredType(&vec_type);
}
uint32_t GetUIntVectorTypeId(uint32_t size) {
return GetTypeInstruction(GetUIntVectorType(size));
}
Type* GetSIntVectorType(uint32_t size) {
Vector vec_type(GetSIntType(), size);
return GetRegisteredType(&vec_type);
}
uint32_t GetSIntVectorTypeId(uint32_t size) {
return GetTypeInstruction(GetSIntVectorType(size));
}
Type* GetFloatVectorType(uint32_t size) {
Vector vec_type(GetFloatType(), size);
return GetRegisteredType(&vec_type);
}
uint32_t GetFloatVectorTypeId(uint32_t size) {
return GetTypeInstruction(GetFloatVectorType(size));
}
Type* GetBoolType() {
Bool bool_type;
return GetRegisteredType(&bool_type);
}
uint32_t GetBoolTypeId() { return GetTypeInstruction(GetBoolType()); }
private:
using TypeToIdMap = std::unordered_map<const Type*, uint32_t, HashTypePointer,
CompareTypePointers>;

View File

@@ -53,7 +53,7 @@ void UpgradeMemoryModel::UpgradeMemoryModelInstruction() {
// 2. Add the OpCapability.
// 3. Modify the memory model.
Instruction* memory_model = get_module()->GetMemoryModel();
get_module()->AddCapability(MakeUnique<Instruction>(
context()->AddCapability(MakeUnique<Instruction>(
context(), SpvOpCapability, 0, 0,
std::initializer_list<Operand>{
{SPV_OPERAND_TYPE_CAPABILITY, {SpvCapabilityVulkanMemoryModelKHR}}}));
@@ -61,7 +61,7 @@ void UpgradeMemoryModel::UpgradeMemoryModelInstruction() {
std::vector<uint32_t> words(extension.size() / 4 + 1, 0);
char* dst = reinterpret_cast<char*>(words.data());
strncpy(dst, extension.c_str(), extension.size());
get_module()->AddExtension(
context()->AddExtension(
MakeUnique<Instruction>(context(), SpvOpExtension, 0, 0,
std::initializer_list<Operand>{
{SPV_OPERAND_TYPE_LITERAL_STRING, words}}));

View File

@@ -17,6 +17,7 @@ add_subdirectory(loop_optimizations)
add_spvtools_unittest(TARGET opt
SRCS aggressive_dead_code_elim_test.cpp
amd_ext_to_khr.cpp
assembly_builder_test.cpp
block_merge_test.cpp
ccp_test.cpp

View File

@@ -0,0 +1,338 @@
// Copyright (c) 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "gmock/gmock.h"
#include "test/opt/pass_fixture.h"
#include "test/opt/pass_utils.h"
namespace spvtools {
namespace opt {
namespace {
using AmdExtToKhrTest = PassTest<::testing::Test>;
using ::testing::HasSubstr;
std::string GetTest(std::string op_code, std::string new_op_code) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot"
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[undef:%\w+]] = OpUndef %uint
; CHECK-NEXT: )" + new_op_code +
R"( %uint %uint_3 Reduce [[undef]]
OpCapability Shader
OpCapability Groups
OpExtension "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %uint
%8 = )" + op_code +
R"( %uint %uint_3 Reduce %7
OpReturn
OpFunctionEnd
)";
return text;
}
TEST_F(AmdExtToKhrTest, ReplaceGroupIAddNonUniformAMD) {
std::string text =
GetTest("OpGroupIAddNonUniformAMD", "OpGroupNonUniformIAdd");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupFAddNonUniformAMD) {
std::string text =
GetTest("OpGroupFAddNonUniformAMD", "OpGroupNonUniformFAdd");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupUMinNonUniformAMD) {
std::string text =
GetTest("OpGroupUMinNonUniformAMD", "OpGroupNonUniformUMin");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupSMinNonUniformAMD) {
std::string text =
GetTest("OpGroupSMinNonUniformAMD", "OpGroupNonUniformSMin");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupFMinNonUniformAMD) {
std::string text =
GetTest("OpGroupFMinNonUniformAMD", "OpGroupNonUniformFMin");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupUMaxNonUniformAMD) {
std::string text =
GetTest("OpGroupUMaxNonUniformAMD", "OpGroupNonUniformUMax");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupSMaxNonUniformAMD) {
std::string text =
GetTest("OpGroupSMaxNonUniformAMD", "OpGroupNonUniformSMax");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceGroupFMaxNonUniformAMD) {
std::string text =
GetTest("OpGroupFMaxNonUniformAMD", "OpGroupNonUniformFMax");
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceMbcntAMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot"
; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLtMask
; CHECK: [[var]] = OpVariable %_ptr_Input_v4uint Input
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[ld:%\w+]] = OpLoad %v4uint [[var]]
; CHECK-NEXT: [[shuffle:%\w+]] = OpVectorShuffle %v2uint [[ld]] [[ld]] 0 1
; CHECK-NEXT: [[bitcast:%\w+]] = OpBitcast %ulong [[shuffle]]
; CHECK-NEXT: [[and:%\w+]] = OpBitwiseAnd %ulong [[bitcast]] %ulong_0
; CHECK-NEXT: [[result:%\w+]] = OpBitCount %uint [[and]]
OpCapability Shader
OpCapability Int64
OpExtension "SPV_AMD_shader_ballot"
%1 = OpExtInstImport "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "func"
OpExecutionMode %2 OriginUpperLeft
%void = OpTypeVoid
%4 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%ulong = OpTypeInt 64 0
%ulong_0 = OpConstant %ulong 0
%2 = OpFunction %void None %4
%8 = OpLabel
%9 = OpExtInst %uint %1 MbcntAMD %ulong_0
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceSwizzleInvocationsAMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot"
; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLocalInvocationId
; CHECK: [[subgroup:%\w+]] = OpConstant %uint 3
; CHECK: [[offset:%\w+]] = OpConstantComposite %v4uint
; CHECK: [[var]] = OpVariable %_ptr_Input_uint Input
; CHECK: [[uint_max:%\w+]] = OpConstant %uint 4294967295
; CHECK: [[ballot_value:%\w+]] = OpConstantComposite %v4uint [[uint_max]] [[uint_max]] [[uint_max]] [[uint_max]]
; CHECK: [[null:%\w+]] = OpConstantNull [[type:%\w+]]
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[data:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[id:%\w+]] = OpLoad %uint [[var]]
; CHECK-NEXT: [[quad_idx:%\w+]] = OpBitwiseAnd %uint [[id]] %uint_3
; CHECK-NEXT: [[quad_ldr:%\w+]] = OpBitwiseXor %uint [[id]] [[quad_idx]]
; CHECK-NEXT: [[my_offset:%\w+]] = OpVectorExtractDynamic %uint [[offset]] [[quad_idx]]
; CHECK-NEXT: [[target_inv:%\w+]] = OpIAdd %uint [[quad_ldr]] [[my_offset]]
; CHECK-NEXT: [[is_active:%\w+]] = OpGroupNonUniformBallotBitExtract %bool [[subgroup]] [[ballot_value]] [[target_inv]]
; CHECK-NEXT: [[shuffle:%\w+]] = OpGroupNonUniformShuffle [[type]] [[subgroup]] [[data]] [[target_inv]]
; CHECK-NEXT: [[result:%\w+]] = OpSelect [[type]] [[is_active]] [[shuffle]] [[null]]
OpCapability Shader
OpExtension "SPV_AMD_shader_ballot"
%ext = OpExtInstImport "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uint_x = OpConstant %uint 1
%uint_y = OpConstant %uint 2
%uint_z = OpConstant %uint 3
%uint_w = OpConstant %uint 0
%v4uint = OpTypeVector %uint 4
%offset = OpConstantComposite %v4uint %uint_x %uint_y %uint_z %uint_x
%1 = OpFunction %void None %3
%6 = OpLabel
%data = OpUndef %uint
%9 = OpExtInst %uint %ext SwizzleInvocationsAMD %data %offset
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceSwizzleInvocationsMaskedAMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot"
; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLocalInvocationId
; CHECK: [[x:%\w+]] = OpConstant %uint 19
; CHECK: [[y:%\w+]] = OpConstant %uint 12
; CHECK: [[z:%\w+]] = OpConstant %uint 16
; CHECK: [[var]] = OpVariable %_ptr_Input_uint Input
; CHECK: [[mask_extend:%\w+]] = OpConstant %uint 4294967264
; CHECK: [[uint_max:%\w+]] = OpConstant %uint 4294967295
; CHECK: [[subgroup:%\w+]] = OpConstant %uint 3
; CHECK: [[ballot_value:%\w+]] = OpConstantComposite %v4uint [[uint_max]] [[uint_max]] [[uint_max]] [[uint_max]]
; CHECK: [[null:%\w+]] = OpConstantNull [[type:%\w+]]
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[data:%\w+]] = OpUndef [[type]]
; CHECK-NEXT: [[id:%\w+]] = OpLoad %uint [[var]]
; CHECK-NEXT: [[and_mask:%\w+]] = OpBitwiseOr %uint [[x]] [[mask_extend]]
; CHECK-NEXT: [[and:%\w+]] = OpBitwiseAnd %uint [[id]] [[and_mask]]
; CHECK-NEXT: [[or:%\w+]] = OpBitwiseOr %uint [[and]] [[y]]
; CHECK-NEXT: [[target_inv:%\w+]] = OpBitwiseXor %uint [[or]] [[z]]
; CHECK-NEXT: [[is_active:%\w+]] = OpGroupNonUniformBallotBitExtract %bool [[subgroup]] [[ballot_value]] [[target_inv]]
; CHECK-NEXT: [[shuffle:%\w+]] = OpGroupNonUniformShuffle [[type]] [[subgroup]] [[data]] [[target_inv]]
; CHECK-NEXT: [[result:%\w+]] = OpSelect [[type]] [[is_active]] [[shuffle]] [[null]]
OpCapability Shader
OpExtension "SPV_AMD_shader_ballot"
%ext = OpExtInstImport "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uint_x = OpConstant %uint 19
%uint_y = OpConstant %uint 12
%uint_z = OpConstant %uint 16
%v3uint = OpTypeVector %uint 3
%mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z
%1 = OpFunction %void None %3
%6 = OpLabel
%data = OpUndef %uint
%9 = OpExtInst %uint %ext SwizzleInvocationsMaskedAMD %data %mask
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, ReplaceWriteInvocationAMD) {
const std::string text = R"(
; CHECK: OpCapability Shader
; CHECK-NOT: OpExtension "SPV_AMD_shader_ballot"
; CHECK-NOT: OpExtInstImport "SPV_AMD_shader_ballot"
; CHECK: OpDecorate [[var:%\w+]] BuiltIn SubgroupLocalInvocationId
; CHECK: [[var]] = OpVariable %_ptr_Input_uint Input
; CHECK: OpFunction
; CHECK-NEXT: OpLabel
; CHECK-NEXT: [[input_val:%\w+]] = OpUndef %uint
; CHECK-NEXT: [[write_val:%\w+]] = OpUndef %uint
; CHECK-NEXT: [[ld:%\w+]] = OpLoad %uint [[var]]
; CHECK-NEXT: [[cmp:%\w+]] = OpIEqual %bool [[ld]] %uint_3
; CHECK-NEXT: [[result:%\w+]] = OpSelect %uint [[cmp]] [[write_val]] [[input_val]]
OpCapability Shader
OpExtension "SPV_AMD_shader_ballot"
%ext = OpExtInstImport "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %1 "func"
OpExecutionMode %1 OriginUpperLeft
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uint_3 = OpConstant %uint 3
%1 = OpFunction %void None %3
%6 = OpLabel
%7 = OpUndef %uint
%8 = OpUndef %uint
%9 = OpExtInst %uint %ext WriteInvocationAMD %7 %8 %uint_3
OpReturn
OpFunctionEnd
)";
SinglePassRunAndMatch<AmdExtensionToKhrPass>(text, true);
}
TEST_F(AmdExtToKhrTest, SetVersion) {
const std::string text = R"(
OpCapability Shader
OpCapability Int64
OpExtension "SPV_AMD_shader_ballot"
%1 = OpExtInstImport "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "func"
OpExecutionMode %2 OriginUpperLeft
%void = OpTypeVoid
%4 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%ulong = OpTypeInt 64 0
%ulong_0 = OpConstant %ulong 0
%2 = OpFunction %void None %4
%8 = OpLabel
%9 = OpExtInst %uint %1 MbcntAMD %ulong_0
OpReturn
OpFunctionEnd
)";
// Set the version to 1.1 and make sure it is upgraded to 1.3.
SetTargetEnv(SPV_ENV_UNIVERSAL_1_1);
SetDisassembleOptions(0);
auto result = SinglePassRunAndDisassemble<AmdExtensionToKhrPass>(
text, /* skip_nop = */ true, /* skip_validation = */ false);
EXPECT_EQ(Pass::Status::SuccessWithChange, std::get<1>(result));
const std::string& output = std::get<0>(result);
EXPECT_THAT(output, HasSubstr("Version: 1.3"));
}
TEST_F(AmdExtToKhrTest, SetVersion1) {
const std::string text = R"(
OpCapability Shader
OpCapability Int64
OpExtension "SPV_AMD_shader_ballot"
%1 = OpExtInstImport "SPV_AMD_shader_ballot"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "func"
OpExecutionMode %2 OriginUpperLeft
%void = OpTypeVoid
%4 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%ulong = OpTypeInt 64 0
%ulong_0 = OpConstant %ulong 0
%2 = OpFunction %void None %4
%8 = OpLabel
%9 = OpExtInst %uint %1 MbcntAMD %ulong_0
OpReturn
OpFunctionEnd
)";
// Set the version to 1.4 and make sure it is stays the same.
SetTargetEnv(SPV_ENV_UNIVERSAL_1_4);
SetDisassembleOptions(0);
auto result = SinglePassRunAndDisassemble<AmdExtensionToKhrPass>(
text, /* skip_nop = */ true, /* skip_validation = */ false);
EXPECT_EQ(Pass::Status::SuccessWithChange, std::get<1>(result));
const std::string& output = std::get<0>(result);
EXPECT_THAT(output, HasSubstr("Version: 1.4"));
}
} // namespace
} // namespace opt
} // namespace spvtools

View File

@@ -210,6 +210,7 @@ OpName %main "main"
%float_2049 = OpConstant %float 2049
%float_n2049 = OpConstant %float -2049
%float_0p5 = OpConstant %float 0.5
%float_0p2 = OpConstant %float 0.2
%float_pi = OpConstant %float 1.5555
%float_1e16 = OpConstant %float 1e16
%float_n1e16 = OpConstant %float -1e16
@@ -1465,24 +1466,14 @@ INSTANTIATE_TEST_SUITE_P(FloatConstantFoldingTest, FloatInstructionFoldingTest,
"OpReturn\n" +
"OpFunctionEnd",
2, std::numeric_limits<float>::quiet_NaN()),
// Test case 20: QuantizeToF16 inf
// Test case 20: FMix 1.0 4.0 0.2
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpFDiv %float %float_1 %float_0\n" +
"%3 = OpQuantizeToF16 %float %3\n" +
"%2 = OpExtInst %float %1 FMix %float_1 %float_4 %float_0p2\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, std::numeric_limits<float>::infinity()),
// Test case 21: QuantizeToF16 -inf
InstructionFoldingCase<float>(
Header() + "%main = OpFunction %void None %void_func\n" +
"%main_lab = OpLabel\n" +
"%2 = OpFDiv %float %float_n1 %float_0\n" +
"%3 = OpQuantizeToF16 %float %3\n" +
"OpReturn\n" +
"OpFunctionEnd",
2, -std::numeric_limits<float>::infinity())
2, 1.6f)
));
// clang-format on

View File

@@ -383,7 +383,6 @@ OpTypeForwardPointer %_ptr_PhysicalStorageBufferEXT_blockType PhysicalStorageBuf
R"(OpCapability Shader
OpCapability PhysicalStorageBufferAddressesEXT
OpCapability Int64
OpCapability Int64
OpExtension "SPV_EXT_physical_storage_buffer"
OpExtension "SPV_KHR_storage_buffer_storage_class"
%1 = OpExtInstImport "GLSL.std.450"

View File

@@ -109,6 +109,11 @@ NOTE: The optimizer is a work in progress.
Options (in lexicographical order):)",
program, program);
printf(R"(
--amd-ext-to-khr
Replaces the extensions VK_AMD_shader_ballot, VK_AMD_gcn_shader,
and VK_AMD_shader_trinary_minmax with equivalant code using core
instructions and capabilities.)");
printf(R"(
--ccp
Apply the conditional constant propagation transform. This will
propagate constant values throughout the program, and simplify