Updated spirv-tools.

This commit is contained in:
Бранимир Караџић
2025-06-07 10:25:58 -07:00
parent c198dc278d
commit 88e74f02bf
41 changed files with 9276 additions and 8575 deletions

View File

@@ -1,26 +1,10 @@
// Copyright (c) 2017-2024 The Khronos Group Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and/or associated documentation files (the "Materials"),
// to deal in the Materials without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Materials, and to permit persons to whom the
// Materials are furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Materials.
// Copyright: 2017-2024 The Khronos Group Inc.
// License: MIT
//
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
//
// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
// IN THE MATERIALS.
#ifndef SPIRV_EXTINST_DebugInfo_H_
#define SPIRV_EXTINST_DebugInfo_H_

View File

@@ -1,26 +1,10 @@
// Copyright (c) 2018-2024 The Khronos Group Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and/or associated documentation files (the "Materials"),
// to deal in the Materials without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Materials, and to permit persons to whom the
// Materials are furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Materials.
// Copyright: 2018-2024 The Khronos Group Inc.
// License: MIT
//
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
//
// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
// IN THE MATERIALS.
#ifndef SPIRV_EXTINST_NonSemanticShaderDebugInfo100_H_
#define SPIRV_EXTINST_NonSemanticShaderDebugInfo100_H_

View File

@@ -1,26 +1,10 @@
// Copyright (c) 2018-2024 The Khronos Group Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and/or associated documentation files (the "Materials"),
// to deal in the Materials without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Materials, and to permit persons to whom the
// Materials are furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Materials.
// Copyright: 2018-2024 The Khronos Group Inc.
// License: MIT
//
// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
//
// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
// IN THE MATERIALS.
#ifndef SPIRV_EXTINST_OpenCLDebugInfo100_H_
#define SPIRV_EXTINST_OpenCLDebugInfo100_H_

View File

@@ -1 +1 @@
"v2025.2", "SPIRV-Tools v2025.2 v2025.2.rc2-38-g4b1de6d5"
"v2025.2", "SPIRV-Tools v2025.2 v2025.2.rc2-58-g007a1f89"

File diff suppressed because it is too large Load Diff

View File

@@ -47,6 +47,7 @@ enum Extension : uint32_t {
kSPV_EXT_arithmetic_fence,
kSPV_EXT_demote_to_helper_invocation,
kSPV_EXT_descriptor_indexing,
kSPV_EXT_float8,
kSPV_EXT_fragment_fully_covered,
kSPV_EXT_fragment_invocation_density,
kSPV_EXT_fragment_shader_interlock,
@@ -71,6 +72,7 @@ enum Extension : uint32_t {
kSPV_INTEL_arbitrary_precision_floating_point,
kSPV_INTEL_arbitrary_precision_integers,
kSPV_INTEL_bfloat16_conversion,
kSPV_INTEL_bindless_images,
kSPV_INTEL_blocking_pipes,
kSPV_INTEL_cache_controls,
kSPV_INTEL_debug_module,

View File

@@ -391,6 +391,18 @@ typedef enum spv_number_kind_t {
SPV_NUMBER_FLOATING,
} spv_number_kind_t;
// Represent the encoding of floating point values
typedef enum spv_fp_encoding_t {
SPV_FP_ENCODING_UNKNOWN =
0, // The encoding is not specified. Has to be deduced from bitwidth
SPV_FP_ENCODING_IEEE754_BINARY16, // half float
SPV_FP_ENCODING_IEEE754_BINARY32, // single float
SPV_FP_ENCODING_IEEE754_BINARY64, // double float
SPV_FP_ENCODING_BFLOAT16,
SPV_FP_ENCODING_FLOAT8_E4M3,
SPV_FP_ENCODING_FLOAT8_E5M2,
} spv_fp_encoding_t;
typedef enum spv_text_to_binary_options_t {
SPV_TEXT_TO_BINARY_OPTION_NONE = SPV_BIT(0),
// Numeric IDs in the binary will have the same values as in the source.
@@ -446,6 +458,8 @@ typedef struct spv_parsed_operand_t {
spv_number_kind_t number_kind;
// The number of bits for a literal number type.
uint32_t number_bit_width;
// The encoding used for floating point values
spv_fp_encoding_t fp_encoding;
} spv_parsed_operand_t;
// An instruction parsed from a binary SPIR-V module.

View File

@@ -190,6 +190,7 @@ class Parser {
struct NumberType {
spv_number_kind_t type;
uint32_t bit_width;
spv_fp_encoding_t encoding;
};
// The state used to parse a single SPIR-V binary module.
@@ -385,8 +386,6 @@ spv_result_t Parser::parseInstruction() {
assert(_.requires_endian_conversion ||
(_.endian_converted_words.size() == 1));
recordNumberType(inst_offset, &inst);
if (_.requires_endian_conversion) {
// We must wait until here to set this pointer, because the vector might
// have been be resized while we accumulated its elements.
@@ -398,6 +397,8 @@ spv_result_t Parser::parseInstruction() {
}
inst.num_words = inst_word_count;
recordNumberType(inst_offset, &inst);
// We must wait until here to set this pointer, because the vector might
// have been be resized while we accumulated its elements.
inst.operands = _.operands.data();
@@ -833,6 +834,7 @@ spv_result_t Parser::setNumericTypeInfoForType(
parsed_operand->number_kind = info.type;
parsed_operand->number_bit_width = info.bit_width;
parsed_operand->fp_encoding = info.encoding;
// Round up the word count.
parsed_operand->num_words = static_cast<uint16_t>((info.bit_width + 31) / 32);
return SPV_SUCCESS;
@@ -850,6 +852,17 @@ void Parser::recordNumberType(size_t inst_offset,
} else if (spv::Op::OpTypeFloat == opcode) {
info.type = SPV_NUMBER_FLOATING;
info.bit_width = peekAt(inst_offset + 2);
if (inst->num_words >= 4) {
const spvtools::OperandDesc* desc;
spv_result_t status = spvtools::LookupOperand(
SPV_OPERAND_TYPE_FPENCODING, peekAt(inst_offset + 3), &desc);
if (status == SPV_SUCCESS) {
info.encoding = spvFPEncodingFromOperandFPEncoding(
static_cast<spv::FPEncoding>(desc->value));
} else {
info.encoding = SPV_FP_ENCODING_UNKNOWN;
}
}
}
// The *result* Id of a type generating instruction is the type Id.
_.type_id_to_number_type_info[inst->result_id] = info;

View File

@@ -768,7 +768,7 @@ void InstructionDisassembler::EmitInstructionImpl(
{line_length + 2, last_instruction_comment_alignment_, kCommentColumn});
// Round up the alignment to a multiple of 4 for more niceness.
align = (align + 3) & ~0x3u;
last_instruction_comment_alignment_ = align;
last_instruction_comment_alignment_ = std::min({align, 256u});
stream_ << std::string(align - line_length, ' ') << "; " << comments.str();
} else {

View File

@@ -217,6 +217,14 @@ spv_result_t FriendlyNameMapper::ParseInstruction(
SaveName(result_id, "bfloat16");
break;
}
if (spv::FPEncoding(inst.words[3]) == spv::FPEncoding::Float8E4M3EXT) {
SaveName(result_id, "fp8e4m3");
break;
}
if (spv::FPEncoding(inst.words[3]) == spv::FPEncoding::Float8E5M2EXT) {
SaveName(result_id, "fp8e5m2");
break;
}
}
switch (bit_width) {
case 16:

View File

@@ -263,6 +263,7 @@ int32_t spvOpcodeGeneratesType(spv::Op op) {
case spv::Op::OpTypeTensorLayoutNV:
case spv::Op::OpTypeTensorViewNV:
case spv::Op::OpTypeTensorARM:
case spv::Op::OpTypeTaskSequenceINTEL:
return true;
default:
// In particular, OpTypeForwardPointer does not generate a type,

View File

@@ -619,3 +619,17 @@ std::function<bool(unsigned)> spvDbgInfoExtOperandCanBeForwardDeclaredFunction(
}
return out;
}
spv_fp_encoding_t spvFPEncodingFromOperandFPEncoding(spv::FPEncoding encoding) {
switch (encoding) {
case spv::FPEncoding::BFloat16KHR:
return SPV_FP_ENCODING_BFLOAT16;
case spv::FPEncoding::Float8E4M3EXT:
return SPV_FP_ENCODING_FLOAT8_E4M3;
case spv::FPEncoding::Float8E5M2EXT:
return SPV_FP_ENCODING_FLOAT8_E5M2;
case spv::FPEncoding::Max:
break;
}
return SPV_FP_ENCODING_UNKNOWN;
}

View File

@@ -122,4 +122,7 @@ std::function<bool(unsigned)> spvOperandCanBeForwardDeclaredFunction(
std::function<bool(unsigned)> spvDbgInfoExtOperandCanBeForwardDeclaredFunction(
spv::Op opcode, spv_ext_inst_type_t ext_type, uint32_t key);
// Converts an spv::FPEncoding to spv_fp_encoding_t
spv_fp_encoding_t spvFPEncodingFromOperandFPEncoding(spv::FPEncoding encoding);
#endif // SOURCE_OPERAND_H_

View File

@@ -914,6 +914,10 @@ bool AggressiveDCEPass::ProcessGlobalValues() {
context()->AnalyzeUses(&dbg);
continue;
}
// Save debug build identifier even if no other instructions refer to it.
if (dbg.GetShader100DebugOpcode() ==
NonSemanticShaderDebugInfo100DebugBuildIdentifier)
continue;
to_kill_.push_back(&dbg);
modified = true;
}
@@ -971,7 +975,6 @@ Pass::Status AggressiveDCEPass::Process() {
void AggressiveDCEPass::InitExtensions() {
extensions_allowlist_.clear();
// clang-format off
extensions_allowlist_.insert({
"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
@@ -1039,9 +1042,10 @@ void AggressiveDCEPass::InitExtensions() {
"SPV_KHR_ray_tracing_position_fetch",
"SPV_KHR_fragment_shading_rate",
"SPV_KHR_quad_control",
"SPV_NV_shader_invocation_reorder"
"SPV_NV_shader_invocation_reorder",
"SPV_NV_cluster_acceleration_structure",
"SPV_NV_linear_swept_spheres",
});
// clang-format on
}
Instruction* AggressiveDCEPass::GetHeaderBranch(BasicBlock* blk) {

View File

@@ -670,6 +670,8 @@ void CopyPropagateArrays::UpdateUses(Instruction* original_ptr_inst,
if (!dominator_analysis->Dominates(new_ptr_inst, use)) {
assert(dominator_analysis->Dominates(use, new_ptr_inst));
use->InsertAfter(new_ptr_inst);
context()->set_instr_block(use,
context()->get_instr_block(new_ptr_inst));
}
}

View File

@@ -250,8 +250,7 @@ Instruction* Instruction::GetBaseAddress() const {
case spv::Op::OpInBoundsPtrAccessChain:
case spv::Op::OpImageTexelPointer:
case spv::Op::OpCopyObject:
// All of these instructions have the base pointer use a base pointer
// in in-operand 0.
// All of these instructions have their base pointer in in-operand 0.
base = base_inst->GetSingleWordInOperand(0);
base_inst = context()->get_def_use_mgr()->GetDef(base);
break;

View File

@@ -399,42 +399,75 @@ Pass::Status LocalAccessChainConvertPass::Process() {
void LocalAccessChainConvertPass::InitExtensions() {
extensions_allowlist_.clear();
extensions_allowlist_.insert(
{"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax", "SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot", "SPV_AMD_shader_ballot",
"SPV_AMD_gpu_shader_half_float", "SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote", "SPV_KHR_8bit_storage", "SPV_KHR_16bit_storage",
"SPV_KHR_device_group", "SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes", "SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering", "SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough", "SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
// SPV_KHR_variable_pointers
// Currently do not support extended pointer expressions
"SPV_AMD_gpu_shader_int16", "SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops", "SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod", "SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered", "SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string", "SPV_GOOGLE_hlsl_functionality1",
"SPV_GOOGLE_user_type", "SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_demote_to_helper_invocation", "SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives", "SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate", "SPV_NV_mesh_shader", "SPV_EXT_mesh_shader",
"SPV_NV_ray_tracing", "SPV_KHR_ray_tracing", "SPV_KHR_ray_query",
"SPV_EXT_fragment_invocation_density", "SPV_KHR_terminate_invocation",
"SPV_KHR_subgroup_uniform_control_flow", "SPV_KHR_integer_dot_product",
"SPV_EXT_shader_image_int64", "SPV_KHR_non_semantic_info",
"SPV_KHR_uniform_group_instructions",
"SPV_KHR_fragment_shader_barycentric", "SPV_KHR_vulkan_memory_model",
"SPV_NV_bindless_texture", "SPV_EXT_shader_atomic_float_add",
"SPV_EXT_fragment_shader_interlock",
"SPV_KHR_compute_shader_derivatives", "SPV_NV_cooperative_matrix",
"SPV_KHR_cooperative_matrix", "SPV_KHR_ray_tracing_position_fetch",
"SPV_AMDX_shader_enqueue", "SPV_KHR_fragment_shading_rate",
"SPV_KHR_quad_control", "SPV_NV_shader_invocation_reorder"});
extensions_allowlist_.insert({
"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
"SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot",
"SPV_AMD_shader_ballot",
"SPV_AMD_gpu_shader_half_float",
"SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote",
"SPV_KHR_8bit_storage",
"SPV_KHR_16bit_storage",
"SPV_KHR_device_group",
"SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes",
"SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering",
"SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough",
"SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
// SPV_KHR_variable_pointers
// Currently do not support extended pointer expressions
"SPV_AMD_gpu_shader_int16",
"SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops",
"SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod",
"SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered",
"SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string",
"SPV_GOOGLE_hlsl_functionality1",
"SPV_GOOGLE_user_type",
"SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_demote_to_helper_invocation",
"SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives",
"SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate",
"SPV_NV_mesh_shader",
"SPV_EXT_mesh_shader",
"SPV_NV_ray_tracing",
"SPV_KHR_ray_tracing",
"SPV_KHR_ray_query",
"SPV_EXT_fragment_invocation_density",
"SPV_KHR_terminate_invocation",
"SPV_KHR_subgroup_uniform_control_flow",
"SPV_KHR_integer_dot_product",
"SPV_EXT_shader_image_int64",
"SPV_KHR_non_semantic_info",
"SPV_KHR_uniform_group_instructions",
"SPV_KHR_fragment_shader_barycentric",
"SPV_KHR_vulkan_memory_model",
"SPV_NV_bindless_texture",
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_fragment_shader_interlock",
"SPV_KHR_compute_shader_derivatives",
"SPV_NV_cooperative_matrix",
"SPV_KHR_cooperative_matrix",
"SPV_KHR_ray_tracing_position_fetch",
"SPV_AMDX_shader_enqueue",
"SPV_KHR_fragment_shading_rate",
"SPV_KHR_quad_control",
"SPV_NV_shader_invocation_reorder",
"SPV_NV_cluster_acceleration_structure",
"SPV_NV_linear_swept_spheres",
});
}
bool LocalAccessChainConvertPass::AnyIndexIsOutOfBounds(

View File

@@ -235,72 +235,76 @@ Pass::Status LocalSingleBlockLoadStoreElimPass::Process() {
void LocalSingleBlockLoadStoreElimPass::InitExtensions() {
extensions_allowlist_.clear();
extensions_allowlist_.insert({"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
"SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot",
"SPV_AMD_shader_ballot",
"SPV_AMDX_shader_enqueue",
"SPV_AMD_gpu_shader_half_float",
"SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote",
"SPV_KHR_8bit_storage",
"SPV_KHR_16bit_storage",
"SPV_KHR_device_group",
"SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes",
"SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering",
"SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough",
"SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
"SPV_KHR_variable_pointers",
"SPV_AMD_gpu_shader_int16",
"SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops",
"SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod",
"SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered",
"SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string",
"SPV_GOOGLE_hlsl_functionality1",
"SPV_GOOGLE_user_type",
"SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_demote_to_helper_invocation",
"SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives",
"SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate",
"SPV_NV_mesh_shader",
"SPV_EXT_mesh_shader",
"SPV_NV_ray_tracing",
"SPV_KHR_ray_tracing",
"SPV_KHR_ray_query",
"SPV_EXT_fragment_invocation_density",
"SPV_EXT_physical_storage_buffer",
"SPV_KHR_physical_storage_buffer",
"SPV_KHR_terminate_invocation",
"SPV_KHR_subgroup_uniform_control_flow",
"SPV_KHR_integer_dot_product",
"SPV_EXT_shader_image_int64",
"SPV_KHR_non_semantic_info",
"SPV_KHR_uniform_group_instructions",
"SPV_KHR_fragment_shader_barycentric",
"SPV_KHR_vulkan_memory_model",
"SPV_NV_bindless_texture",
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_fragment_shader_interlock",
"SPV_KHR_compute_shader_derivatives",
"SPV_NV_cooperative_matrix",
"SPV_KHR_cooperative_matrix",
"SPV_KHR_ray_tracing_position_fetch",
"SPV_KHR_fragment_shading_rate",
"SPV_KHR_quad_control",
"SPV_NV_shader_invocation_reorder"});
extensions_allowlist_.insert({
"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
"SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot",
"SPV_AMD_shader_ballot",
"SPV_AMDX_shader_enqueue",
"SPV_AMD_gpu_shader_half_float",
"SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote",
"SPV_KHR_8bit_storage",
"SPV_KHR_16bit_storage",
"SPV_KHR_device_group",
"SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes",
"SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering",
"SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough",
"SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
"SPV_KHR_variable_pointers",
"SPV_AMD_gpu_shader_int16",
"SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops",
"SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod",
"SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered",
"SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string",
"SPV_GOOGLE_hlsl_functionality1",
"SPV_GOOGLE_user_type",
"SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_demote_to_helper_invocation",
"SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives",
"SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate",
"SPV_NV_mesh_shader",
"SPV_EXT_mesh_shader",
"SPV_NV_ray_tracing",
"SPV_KHR_ray_tracing",
"SPV_KHR_ray_query",
"SPV_EXT_fragment_invocation_density",
"SPV_EXT_physical_storage_buffer",
"SPV_KHR_physical_storage_buffer",
"SPV_KHR_terminate_invocation",
"SPV_KHR_subgroup_uniform_control_flow",
"SPV_KHR_integer_dot_product",
"SPV_EXT_shader_image_int64",
"SPV_KHR_non_semantic_info",
"SPV_KHR_uniform_group_instructions",
"SPV_KHR_fragment_shader_barycentric",
"SPV_KHR_vulkan_memory_model",
"SPV_NV_bindless_texture",
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_fragment_shader_interlock",
"SPV_KHR_compute_shader_derivatives",
"SPV_NV_cooperative_matrix",
"SPV_KHR_cooperative_matrix",
"SPV_KHR_ray_tracing_position_fetch",
"SPV_KHR_fragment_shading_rate",
"SPV_KHR_quad_control",
"SPV_NV_shader_invocation_reorder",
"SPV_NV_cluster_acceleration_structure",
"SPV_NV_linear_swept_spheres",
});
}
} // namespace opt

View File

@@ -88,71 +88,75 @@ Pass::Status LocalSingleStoreElimPass::Process() {
}
void LocalSingleStoreElimPass::InitExtensionAllowList() {
extensions_allowlist_.insert({"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
"SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot",
"SPV_AMD_shader_ballot",
"SPV_AMD_gpu_shader_half_float",
"SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote",
"SPV_KHR_8bit_storage",
"SPV_KHR_16bit_storage",
"SPV_KHR_device_group",
"SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes",
"SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering",
"SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough",
"SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
"SPV_KHR_variable_pointers",
"SPV_AMD_gpu_shader_int16",
"SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops",
"SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod",
"SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered",
"SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string",
"SPV_GOOGLE_hlsl_functionality1",
"SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives",
"SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate",
"SPV_NV_mesh_shader",
"SPV_EXT_mesh_shader",
"SPV_NV_ray_tracing",
"SPV_KHR_ray_query",
"SPV_EXT_fragment_invocation_density",
"SPV_EXT_physical_storage_buffer",
"SPV_KHR_physical_storage_buffer",
"SPV_KHR_terminate_invocation",
"SPV_KHR_subgroup_uniform_control_flow",
"SPV_KHR_integer_dot_product",
"SPV_EXT_shader_image_int64",
"SPV_KHR_non_semantic_info",
"SPV_KHR_uniform_group_instructions",
"SPV_KHR_fragment_shader_barycentric",
"SPV_KHR_vulkan_memory_model",
"SPV_NV_bindless_texture",
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_fragment_shader_interlock",
"SPV_KHR_compute_shader_derivatives",
"SPV_NV_cooperative_matrix",
"SPV_KHR_cooperative_matrix",
"SPV_KHR_ray_tracing_position_fetch",
"SPV_AMDX_shader_enqueue",
"SPV_KHR_fragment_shading_rate",
"SPV_KHR_ray_tracing",
"SPV_KHR_quad_control",
"SPV_GOOGLE_user_type",
"SPV_NV_shader_invocation_reorder"});
extensions_allowlist_.insert({
"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
"SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot",
"SPV_AMD_shader_ballot",
"SPV_AMD_gpu_shader_half_float",
"SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote",
"SPV_KHR_8bit_storage",
"SPV_KHR_16bit_storage",
"SPV_KHR_device_group",
"SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes",
"SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering",
"SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough",
"SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
"SPV_KHR_variable_pointers",
"SPV_AMD_gpu_shader_int16",
"SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops",
"SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod",
"SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered",
"SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string",
"SPV_GOOGLE_hlsl_functionality1",
"SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives",
"SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate",
"SPV_NV_mesh_shader",
"SPV_EXT_mesh_shader",
"SPV_NV_ray_tracing",
"SPV_KHR_ray_query",
"SPV_EXT_fragment_invocation_density",
"SPV_EXT_physical_storage_buffer",
"SPV_KHR_physical_storage_buffer",
"SPV_KHR_terminate_invocation",
"SPV_KHR_subgroup_uniform_control_flow",
"SPV_KHR_integer_dot_product",
"SPV_EXT_shader_image_int64",
"SPV_KHR_non_semantic_info",
"SPV_KHR_uniform_group_instructions",
"SPV_KHR_fragment_shader_barycentric",
"SPV_KHR_vulkan_memory_model",
"SPV_NV_bindless_texture",
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_fragment_shader_interlock",
"SPV_KHR_compute_shader_derivatives",
"SPV_NV_cooperative_matrix",
"SPV_KHR_cooperative_matrix",
"SPV_KHR_ray_tracing_position_fetch",
"SPV_AMDX_shader_enqueue",
"SPV_KHR_fragment_shading_rate",
"SPV_KHR_ray_tracing",
"SPV_KHR_quad_control",
"SPV_GOOGLE_user_type",
"SPV_NV_shader_invocation_reorder",
"SPV_NV_cluster_acceleration_structure",
"SPV_NV_linear_swept_spheres",
});
}
bool LocalSingleStoreElimPass::ProcessVariable(Instruction* var_inst) {
std::vector<Instruction*> users;

View File

@@ -100,17 +100,25 @@ Instruction* MemPass::GetPtr(uint32_t ptrId, uint32_t* varId) {
Instruction* ptrInst = get_def_use_mgr()->GetDef(*varId);
Instruction* varInst;
if (ptrInst->opcode() == spv::Op::OpConstantNull) {
*varId = 0;
return ptrInst;
switch (ptrInst->opcode()) {
case spv::Op::OpVariable:
case spv::Op::OpFunctionParameter:
varInst = ptrInst;
break;
case spv::Op::OpAccessChain:
case spv::Op::OpInBoundsAccessChain:
case spv::Op::OpPtrAccessChain:
case spv::Op::OpInBoundsPtrAccessChain:
case spv::Op::OpImageTexelPointer:
case spv::Op::OpCopyObject:
varInst = ptrInst->GetBaseAddress();
break;
default:
*varId = 0;
return ptrInst;
break;
}
if (ptrInst->opcode() != spv::Op::OpVariable &&
ptrInst->opcode() != spv::Op::OpFunctionParameter) {
varInst = ptrInst->GetBaseAddress();
} else {
varInst = ptrInst;
}
if (varInst->opcode() == spv::Op::OpVariable) {
*varId = varInst->result_id();
} else {

View File

@@ -90,13 +90,13 @@ Function* PrivateToLocalPass::FindLocalFunction(const Instruction& inst) const {
Function* target_function = nullptr;
context()->get_def_use_mgr()->ForEachUser(
inst.result_id(),
[&target_function, &found_first_use, this](Instruction* use) {
[&target_function, &found_first_use, inst, this](Instruction* use) {
BasicBlock* current_block = context()->get_instr_block(use);
if (current_block == nullptr) {
return;
}
if (!IsValidUse(use)) {
if (!IsValidUse(use, inst.result_id())) {
found_first_use = true;
target_function = nullptr;
return;
@@ -153,7 +153,8 @@ uint32_t PrivateToLocalPass::GetNewType(uint32_t old_type_id) {
return new_type_id;
}
bool PrivateToLocalPass::IsValidUse(const Instruction* inst) const {
bool PrivateToLocalPass::IsValidUse(const Instruction* inst,
uint32_t private_variable_id) const {
// The cases in this switch have to match the cases in |UpdateUse|.
// If we don't know how to update it, it is not valid.
if (inst->GetCommonDebugOpcode() == CommonDebugInfoDebugGlobalVariable) {
@@ -161,13 +162,14 @@ bool PrivateToLocalPass::IsValidUse(const Instruction* inst) const {
}
switch (inst->opcode()) {
case spv::Op::OpLoad:
case spv::Op::OpStore:
case spv::Op::OpImageTexelPointer: // Treat like a load
return true;
case spv::Op::OpStore:
return inst->GetOperand(1).AsId() != private_variable_id;
case spv::Op::OpAccessChain:
return context()->get_def_use_mgr()->WhileEachUser(
inst, [this](const Instruction* user) {
if (!IsValidUse(user)) return false;
inst, [this, inst](const Instruction* user) {
if (!IsValidUse(user, inst->result_id())) return false;
return true;
});
case spv::Op::OpName:

View File

@@ -53,7 +53,7 @@ class PrivateToLocalPass : public Pass {
// Returns true is |inst| is a valid use of a pointer. In this case, a
// valid use is one where the transformation is able to rewrite the type to
// match a change in storage class of the original variable.
bool IsValidUse(const Instruction* inst) const;
bool IsValidUse(const Instruction* inst, uint32_t private_variable_id) const;
// Given the result id of a pointer type, |old_type_id|, this function
// returns the id of a the same pointer type except the storage class has

View File

@@ -320,6 +320,14 @@ std::string Float::str() const {
assert(width_ == 16);
oss << "bfloat16";
break;
case spv::FPEncoding::Float8E4M3EXT:
assert(width_ == 8);
oss << "fp8e4m3";
break;
case spv::FPEncoding::Float8E5M2EXT:
assert(width_ == 8);
oss << "fp8e5m2";
break;
default:
oss << "float" << width_;
break;

View File

@@ -43,12 +43,35 @@ void EmitNumericLiteral(std::ostream* out, const spv_parsed_instruction_t& inst,
*out << word;
break;
case SPV_NUMBER_FLOATING:
if (operand.number_bit_width == 16) {
*out << spvtools::utils::FloatProxy<spvtools::utils::Float16>(
uint16_t(word & 0xFFFF));
} else {
// Assume 32-bit floats.
*out << spvtools::utils::FloatProxy<float>(word);
switch (operand.fp_encoding) {
case SPV_FP_ENCODING_IEEE754_BINARY16:
*out << spvtools::utils::FloatProxy<spvtools::utils::Float16>(
uint16_t(word & 0xFFFF));
break;
case SPV_FP_ENCODING_IEEE754_BINARY32:
*out << spvtools::utils::FloatProxy<float>(word);
break;
case SPV_FP_ENCODING_FLOAT8_E4M3:
*out << spvtools::utils::FloatProxy<spvtools::utils::Float8_E4M3>(
uint8_t(word & 0xFF));
break;
case SPV_FP_ENCODING_FLOAT8_E5M2:
*out << spvtools::utils::FloatProxy<spvtools::utils::Float8_E5M2>(
uint8_t(word & 0xFF));
break;
// TODO Bfloat16
case SPV_FP_ENCODING_UNKNOWN:
switch (operand.number_bit_width) {
case 16:
*out << spvtools::utils::FloatProxy<spvtools::utils::Float16>(
uint16_t(word & 0xFFFF));
break;
case 32:
*out << spvtools::utils::FloatProxy<float>(word);
break;
}
default:
break;
}
break;
default:

View File

@@ -254,13 +254,13 @@ spv_result_t AssemblyContext::binaryEncodeNumericLiteral(
<< "Unexpected numeric literal type";
case IdTypeClass::kScalarIntegerType:
if (type.isSigned) {
number_type = {type.bitwidth, SPV_NUMBER_SIGNED_INT};
number_type = {type.bitwidth, SPV_NUMBER_SIGNED_INT, type.encoding};
} else {
number_type = {type.bitwidth, SPV_NUMBER_UNSIGNED_INT};
number_type = {type.bitwidth, SPV_NUMBER_UNSIGNED_INT, type.encoding};
}
break;
case IdTypeClass::kScalarFloatType:
number_type = {type.bitwidth, SPV_NUMBER_FLOATING};
number_type = {type.bitwidth, SPV_NUMBER_FLOATING, type.encoding};
break;
case IdTypeClass::kBottom:
// kBottom means the type is unknown and we need to infer the type before
@@ -270,11 +270,11 @@ spv_result_t AssemblyContext::binaryEncodeNumericLiteral(
// signed integer, otherwise an unsigned integer.
uint32_t bitwidth = static_cast<uint32_t>(assumedBitWidth(type));
if (strchr(val, '.')) {
number_type = {bitwidth, SPV_NUMBER_FLOATING};
number_type = {bitwidth, SPV_NUMBER_FLOATING, type.encoding};
} else if (type.isSigned || val[0] == '-') {
number_type = {bitwidth, SPV_NUMBER_SIGNED_INT};
number_type = {bitwidth, SPV_NUMBER_SIGNED_INT, type.encoding};
} else {
number_type = {bitwidth, SPV_NUMBER_UNSIGNED_INT};
number_type = {bitwidth, SPV_NUMBER_UNSIGNED_INT, type.encoding};
}
break;
}
@@ -330,14 +330,27 @@ spv_result_t AssemblyContext::recordTypeDefinition(
if (pInst->words.size() != 4)
return diagnostic() << "Invalid OpTypeInt instruction";
types_[value] = {pInst->words[2], pInst->words[3] != 0,
IdTypeClass::kScalarIntegerType};
IdTypeClass::kScalarIntegerType, SPV_FP_ENCODING_UNKNOWN};
} else if (pInst->opcode == spv::Op::OpTypeFloat) {
if ((pInst->words.size() != 3) && (pInst->words.size() != 4))
return diagnostic() << "Invalid OpTypeFloat instruction";
// TODO(kpet) Do we need to record the FP Encoding here?
types_[value] = {pInst->words[2], false, IdTypeClass::kScalarFloatType};
spv_fp_encoding_t enc = SPV_FP_ENCODING_UNKNOWN;
if (pInst->words.size() >= 4) {
const spvtools::OperandDesc* desc;
spv_result_t status = spvtools::LookupOperand(SPV_OPERAND_TYPE_FPENCODING,
pInst->words[3], &desc);
if (status == SPV_SUCCESS) {
enc = spvFPEncodingFromOperandFPEncoding(
static_cast<spv::FPEncoding>(desc->value));
} else {
return diagnostic() << "Invalid OpTypeFloat encoding";
}
}
types_[value] = {pInst->words[2], false, IdTypeClass::kScalarFloatType,
enc};
} else {
types_[value] = {0, false, IdTypeClass::kOtherType};
types_[value] = {0, false, IdTypeClass::kOtherType,
SPV_FP_ENCODING_UNKNOWN};
}
return SPV_SUCCESS;
}

View File

@@ -47,6 +47,7 @@ struct IdType {
uint32_t bitwidth; // Safe to assume that we will not have > 2^32 bits.
bool isSigned; // This is only significant if type_class is integral.
IdTypeClass type_class;
spv_fp_encoding_t encoding;
};
// Default equality operator for IdType. Tests if all members are the same.

View File

@@ -36,6 +36,50 @@
namespace spvtools {
namespace utils {
class Float8_E4M3 {
public:
Float8_E4M3(uint8_t v) : val(v) {}
Float8_E4M3() = default;
static bool isNan(const Float8_E4M3& val) { return (val.val & 0x7f) == 0x7f; }
// Returns true if the given value is any kind of infinity.
static bool isInfinity(const Float8_E4M3&) {
return false; // E4M3 has no infinity representation
}
Float8_E4M3(const Float8_E4M3& other) { val = other.val; }
uint8_t get_value() const { return val; }
// Returns the maximum normal value.
static Float8_E4M3 max() { return Float8_E4M3(0x7e); }
// Returns the lowest normal value.
static Float8_E4M3 lowest() { return Float8_E4M3(0x8); }
private:
uint8_t val;
};
class Float8_E5M2 {
public:
Float8_E5M2(uint8_t v) : val(v) {}
Float8_E5M2() = default;
static bool isNan(const Float8_E5M2& val) {
return ((val.val & 0x7c) == 0x7c) && ((val.val & 0x3) != 0);
}
// Returns true if the given value is any kind of infinity.
static bool isInfinity(const Float8_E5M2& val) {
return (val.val & 0x7f) == 0x7c;
}
Float8_E5M2(const Float8_E5M2& other) { val = other.val; }
uint8_t get_value() const { return val; }
// Returns the maximum normal value.
static Float8_E5M2 max() { return Float8_E5M2(0x7b); }
// Returns the lowest normal value.
static Float8_E5M2 lowest() { return Float8_E5M2(0x4); }
private:
uint8_t val;
};
class Float16 {
public:
Float16(uint16_t v) : val(v) {}
@@ -110,6 +154,46 @@ struct FloatProxyTraits<double> {
static uint32_t width() { return 64u; }
};
template <>
struct FloatProxyTraits<Float8_E4M3> {
using uint_type = uint8_t;
static bool isNan(Float8_E4M3 f) { return Float8_E4M3::isNan(f); }
// Returns true if the given value is any kind of infinity.
static bool isInfinity(Float8_E4M3 f) { return Float8_E4M3::isInfinity(f); }
// Returns the maximum normal value.
static Float8_E4M3 max() { return Float8_E4M3::max(); }
// Returns the lowest normal value.
static Float8_E4M3 lowest() { return Float8_E4M3::lowest(); }
// Returns the value as the native floating point format.
static Float8_E4M3 getAsFloat(const uint_type& t) { return Float8_E4M3(t); }
// Returns the bits from the given floating pointer number.
static uint_type getBitsFromFloat(const Float8_E4M3& t) {
return t.get_value();
}
// Returns the bitwidth.
static uint32_t width() { return 8u; }
};
template <>
struct FloatProxyTraits<Float8_E5M2> {
using uint_type = uint8_t;
static bool isNan(Float8_E5M2 f) { return Float8_E5M2::isNan(f); }
// Returns true if the given value is any kind of infinity.
static bool isInfinity(Float8_E5M2 f) { return Float8_E5M2::isInfinity(f); }
// Returns the maximum normal value.
static Float8_E5M2 max() { return Float8_E5M2::max(); }
// Returns the lowest normal value.
static Float8_E5M2 lowest() { return Float8_E5M2::lowest(); }
// Returns the value as the native floating point format.
static Float8_E5M2 getAsFloat(const uint_type& t) { return Float8_E5M2(t); }
// Returns the bits from the given floating pointer number.
static uint_type getBitsFromFloat(const Float8_E5M2& t) {
return t.get_value();
}
// Returns the bitwidth.
static uint32_t width() { return 8u; }
};
template <>
struct FloatProxyTraits<Float16> {
using uint_type = uint16_t;
@@ -216,6 +300,7 @@ struct HexFloatTraits {
using int_type = void;
// The numerical type that this HexFloat represents.
using underlying_type = void;
using underlying_typetraits = void;
// The type needed to construct the underlying type.
using native_type = void;
// The number of bits that are actually relevant in the uint_type.
@@ -229,6 +314,8 @@ struct HexFloatTraits {
// The bias of the exponent. (How much we need to subtract from the stored
// value to get the correct value.)
static const uint32_t exponent_bias = 0;
static const bool has_infinity = true;
static const uint32_t NaN_pattern = 0;
};
// Traits for IEEE float.
@@ -238,11 +325,14 @@ struct HexFloatTraits<FloatProxy<float>> {
using uint_type = uint32_t;
using int_type = int32_t;
using underlying_type = FloatProxy<float>;
using underlying_typetraits = FloatProxyTraits<float>;
using native_type = float;
static const uint_type num_used_bits = 32;
static const uint_type num_exponent_bits = 8;
static const uint_type num_fraction_bits = 23;
static const uint_type exponent_bias = 127;
static const bool has_infinity = true;
static const uint_type NaN_pattern = 0x7f80000;
};
// Traits for IEEE double.
@@ -252,11 +342,48 @@ struct HexFloatTraits<FloatProxy<double>> {
using uint_type = uint64_t;
using int_type = int64_t;
using underlying_type = FloatProxy<double>;
using underlying_typetraits = FloatProxyTraits<double>;
using native_type = double;
static const uint_type num_used_bits = 64;
static const uint_type num_exponent_bits = 11;
static const uint_type num_fraction_bits = 52;
static const uint_type exponent_bias = 1023;
static const bool has_infinity = true;
static const uint_type NaN_pattern = 0x7FF0000000000000;
};
// Traits for FP8 E4M3.
// 1 sign bit, 4 exponent bits, 3 fractional bits.
template <>
struct HexFloatTraits<FloatProxy<Float8_E4M3>> {
using uint_type = uint8_t;
using int_type = int8_t;
using underlying_type = FloatProxy<Float8_E4M3>;
using underlying_typetraits = FloatProxyTraits<Float8_E4M3>;
using native_type = uint8_t;
static const uint_type num_used_bits = 8;
static const uint_type num_exponent_bits = 4;
static const uint_type num_fraction_bits = 3;
static const uint_type exponent_bias = 7;
static const bool has_infinity = false;
static const uint_type NaN_pattern = 0x7F;
};
// Traits for FP8 E5M2.
// 1 sign bit, 4 exponent bits, 3 fractional bits.
template <>
struct HexFloatTraits<FloatProxy<Float8_E5M2>> {
using uint_type = uint8_t;
using int_type = int8_t;
using underlying_type = FloatProxy<Float8_E5M2>;
using underlying_typetraits = FloatProxyTraits<Float8_E5M2>;
using native_type = uint8_t;
static const uint_type num_used_bits = 8;
static const uint_type num_exponent_bits = 5;
static const uint_type num_fraction_bits = 2;
static const uint_type exponent_bias = 15;
static const bool has_infinity = true;
static const uint_type NaN_pattern = 0x7c;
};
// Traits for IEEE half.
@@ -265,12 +392,15 @@ template <>
struct HexFloatTraits<FloatProxy<Float16>> {
using uint_type = uint16_t;
using int_type = int16_t;
using underlying_type = uint16_t;
using underlying_type = FloatProxy<Float16>;
using underlying_typetraits = FloatProxyTraits<Float16>;
using native_type = uint16_t;
static const uint_type num_used_bits = 16;
static const uint_type num_exponent_bits = 5;
static const uint_type num_fraction_bits = 10;
static const uint_type exponent_bias = 15;
static const bool has_infinity = true;
static const uint_type NaN_pattern = 0x7c00;
};
enum class round_direction {
@@ -291,6 +421,7 @@ class HexFloat {
using int_type = typename Traits::int_type;
using underlying_type = typename Traits::underlying_type;
using native_type = typename Traits::native_type;
using traits = Traits;
explicit HexFloat(T f) : value_(f) {}
@@ -493,9 +624,9 @@ class HexFloat {
struct negatable_left_shift {
static uint_type val(uint_type val) {
if (N > 0) {
return static_cast<uint_type>(val << N);
return static_cast<uint_type>(static_cast<uint64_t>(val) << N);
} else {
return static_cast<uint_type>(val >> N);
return static_cast<uint_type>(static_cast<uint64_t>(val) >> N);
}
}
};
@@ -519,28 +650,28 @@ class HexFloat {
template <int_type N, typename enable = void>
struct negatable_left_shift {
static uint_type val(uint_type val) {
return static_cast<uint_type>(val >> -N);
return static_cast<uint_type>(static_cast<uint64_t>(val) >> -N);
}
};
template <int_type N>
struct negatable_left_shift<N, typename std::enable_if<N >= 0>::type> {
static uint_type val(uint_type val) {
return static_cast<uint_type>(val << N);
return static_cast<uint_type>(static_cast<uint64_t>(val) << N);
}
};
template <int_type N, typename enable = void>
struct negatable_right_shift {
static uint_type val(uint_type val) {
return static_cast<uint_type>(val << -N);
return static_cast<uint_type>(static_cast<uint64_t>(val) << -N);
}
};
template <int_type N>
struct negatable_right_shift<N, typename std::enable_if<N >= 0>::type> {
static uint_type val(uint_type val) {
return static_cast<uint_type>(val >> N);
return static_cast<uint_type>(static_cast<uint64_t>(val) >> N);
}
};
#endif
@@ -639,6 +770,9 @@ class HexFloat {
// underflow to (0 or min depending on rounding) if the number underflows.
template <typename other_T>
void castTo(other_T& other, round_direction round_dir) {
using other_traits = typename other_T::traits;
using other_underlyingtraits = typename other_traits::underlying_typetraits;
other = other_T(static_cast<typename other_T::native_type>(0));
bool negate = isNegative();
if (getUnsignedBits() == 0) {
@@ -664,18 +798,24 @@ class HexFloat {
}
}
bool is_nan =
(getBits() & exponent_mask) == exponent_mask && significand != 0;
bool is_nan = T(getBits()).isNan();
bool is_inf =
!is_nan &&
((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) ||
(significand == 0 && (getBits() & exponent_mask) == exponent_mask));
T(getBits()).isInfinity());
// If we are Nan or Inf we should pass that through.
if (is_inf) {
other.set_value(typename other_T::underlying_type(
static_cast<typename other_T::uint_type>(
(negate ? other_T::sign_mask : 0) | other_T::exponent_mask)));
if (other_traits::has_infinity)
other.set_value(typename other_T::underlying_type(
static_cast<typename other_T::uint_type>(
(negate ? other_T::sign_mask : 0) | other_T::exponent_mask)));
else // if the type doesnt use infinity, set it to max value (E4M3)
other.set_value(typename other_T::underlying_type(
static_cast<typename other_T::uint_type>(
(negate ? other_T::sign_mask : 0) |
other_underlyingtraits::getBitsFromFloat(
other_underlyingtraits::max()))));
return;
}
if (is_nan) {
@@ -690,7 +830,8 @@ class HexFloat {
// just set the last bit.
other.set_value(typename other_T::underlying_type(
static_cast<typename other_T::uint_type>(
(negate ? other_T::sign_mask : 0) | other_T::exponent_mask |
other_traits::NaN_pattern | (negate ? other_T::sign_mask : 0) |
other_T::exponent_mask |
(shifted_significand == 0 ? 0x1 : shifted_significand))));
return;
}
@@ -738,8 +879,8 @@ inline uint8_t get_nibble_from_character(int character) {
template <typename T, typename Traits>
std::ostream& operator<<(std::ostream& os, const HexFloat<T, Traits>& value) {
using HF = HexFloat<T, Traits>;
using uint_type = typename HF::uint_type;
using int_type = typename HF::int_type;
using uint_type = uint64_t;
using int_type = int64_t;
static_assert(HF::num_used_bits != 0,
"num_used_bits must be non-zero for a valid float");
@@ -889,12 +1030,86 @@ ParseNormalFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>(
// Overflow on 16-bit behaves the same as for 32- and 64-bit: set the
// fail bit and set the lowest or highest value.
// /!\ We get an error if there is no overflow but the value is infinity.
// Is it what we want?
if (Float16::isInfinity(value.value().getAsFloat())) {
value.set_value(value.isNegative() ? Float16::lowest() : Float16::max());
is.setstate(std::ios_base::failbit);
}
return is;
}
// Specialization of ParseNormalFloat for FloatProxy<Float8_E4M3> values.
// This will parse the float as it were a 32-bit floating point number,
// and then round it down to fit into a Float8_E4M3 value.
// The number is rounded towards zero.
// If negate_value is true then the number may not have a leading minus or
// plus, and if it successfully parses, then the number is negated before
// being stored into the value parameter.
// If the value cannot be correctly parsed or overflows the target floating
// point type, then set the fail bit on the stream.
// TODO(dneto): Promise C++11 standard behavior in how the value is set in
// the error case, but only after all target platforms implement it correctly.
// In particular, the Microsoft C++ runtime appears to be out of spec.
template <>
inline std::istream& ParseNormalFloat<FloatProxy<Float8_E4M3>,
HexFloatTraits<FloatProxy<Float8_E4M3>>>(
std::istream& is, bool negate_value,
HexFloat<FloatProxy<Float8_E4M3>, HexFloatTraits<FloatProxy<Float8_E4M3>>>&
value) {
// First parse as a 32-bit float.
HexFloat<FloatProxy<float>> float_val(0.0f);
ParseNormalFloat(is, negate_value, float_val);
if (float_val.value().getAsFloat() > 448.0f) {
is.setstate(std::ios_base::failbit);
value.set_value(Float8_E4M3::max());
return is;
} else if (float_val.value().getAsFloat() < -448.0f) {
is.setstate(std::ios_base::failbit);
value.set_value(0x80 | Float8_E4M3::max().get_value());
return is;
}
// Then convert to E4M3 float, saturating at infinities, and
// rounding toward zero.
float_val.castTo(value, round_direction::kToZero);
return is;
}
// Specialization of ParseNormalFloat for FloatProxy<Float8_E5M2> values.
// This will parse the float as it were a Float8_E5M2 floating point number,
// and then round it down to fit into a Float16 value.
// The number is rounded towards zero.
// If negate_value is true then the number may not have a leading minus or
// plus, and if it successfully parses, then the number is negated before
// being stored into the value parameter.
// If the value cannot be correctly parsed or overflows the target floating
// point type, then set the fail bit on the stream.
// TODO(dneto): Promise C++11 standard behavior in how the value is set in
// the error case, but only after all target platforms implement it correctly.
// In particular, the Microsoft C++ runtime appears to be out of spec.
template <>
inline std::istream& ParseNormalFloat<FloatProxy<Float8_E5M2>,
HexFloatTraits<FloatProxy<Float8_E5M2>>>(
std::istream& is, bool negate_value,
HexFloat<FloatProxy<Float8_E5M2>, HexFloatTraits<FloatProxy<Float8_E5M2>>>&
value) {
// First parse as a 32-bit float.
HexFloat<FloatProxy<float>> float_val(0.0f);
ParseNormalFloat(is, negate_value, float_val);
// Then convert to Float8_E5M2 float, saturating at infinities, and
// rounding toward zero.
float_val.castTo(value, round_direction::kToZero);
// Overflow on Float8_E5M2 behaves the same as for 32- and 64-bit: set the
// fail bit and set the lowest or highest value.
if (Float8_E5M2::isInfinity(value.value().getAsFloat())) {
value.set_value(value.isNegative() ? Float8_E5M2::lowest()
: Float8_E5M2::max());
is.setstate(std::ios_base::failbit);
}
return is;
}
namespace detail {
@@ -1253,6 +1468,20 @@ inline std::ostream& operator<<<Float16>(std::ostream& os,
return os;
}
template <>
inline std::ostream& operator<< <Float8_E4M3>(
std::ostream& os, const FloatProxy<Float8_E4M3>& value) {
os << HexFloat<FloatProxy<Float8_E4M3>>(value);
return os;
}
template <>
inline std::ostream& operator<< <Float8_E5M2>(
std::ostream& os, const FloatProxy<Float8_E5M2>& value) {
os << HexFloat<FloatProxy<Float8_E5M2>>(value);
return os;
}
} // namespace utils
} // namespace spvtools

View File

@@ -131,6 +131,19 @@ EncodeNumberStatus ParseAndEncodeIntegerNumber(
return EncodeNumberStatus::kSuccess;
}
spv_fp_encoding_t DeduceEncoding(const NumberType& type) {
if (type.encoding != SPV_FP_ENCODING_UNKNOWN) return type.encoding;
switch (type.bitwidth) {
case 16:
return SPV_FP_ENCODING_IEEE754_BINARY16;
case 32:
return SPV_FP_ENCODING_IEEE754_BINARY32;
case 64:
return SPV_FP_ENCODING_IEEE754_BINARY64;
default:
return SPV_FP_ENCODING_UNKNOWN;
}
}
EncodeNumberStatus ParseAndEncodeFloatingPointNumber(
const char* text, const NumberType& type,
std::function<void(uint32_t)> emit, std::string* error_msg) {
@@ -145,8 +158,35 @@ EncodeNumberStatus ParseAndEncodeFloatingPointNumber(
}
const auto bit_width = AssumedBitWidth(type);
switch (bit_width) {
case 16: {
switch (DeduceEncoding(type)) {
case SPV_FP_ENCODING_FLOAT8_E4M3: {
HexFloat<FloatProxy<Float8_E4M3>> hVal(0);
if (!ParseNumber(text, &hVal)) {
ErrorMsgStream(error_msg) << "Invalid E4M3 float literal: " << text;
return EncodeNumberStatus::kInvalidText;
}
// getAsFloat will return the Float16 value, and get_value
// will return a uint16_t representing the bits of the float.
// The encoding is therefore correct from the perspective of the SPIR-V
// spec since the top 16 bits will be 0.
emit(static_cast<uint32_t>(hVal.value().getAsFloat().get_value()));
return EncodeNumberStatus::kSuccess;
} break;
case SPV_FP_ENCODING_FLOAT8_E5M2: {
HexFloat<FloatProxy<Float8_E5M2>> hVal(0);
if (!ParseNumber(text, &hVal)) {
ErrorMsgStream(error_msg) << "Invalid E5M2 float literal: " << text;
return EncodeNumberStatus::kInvalidText;
}
// getAsFloat will return the Float16 value, and get_value
// will return a uint16_t representing the bits of the float.
// The encoding is therefore correct from the perspective of the SPIR-V
// spec since the top 16 bits will be 0.
emit(static_cast<uint32_t>(hVal.value().getAsFloat().get_value()));
return EncodeNumberStatus::kSuccess;
} break;
case SPV_FP_ENCODING_BFLOAT16: // FIXME this likely needs separate handling
case SPV_FP_ENCODING_IEEE754_BINARY16: {
HexFloat<FloatProxy<Float16>> hVal(0);
if (!ParseNumber(text, &hVal)) {
ErrorMsgStream(error_msg) << "Invalid 16-bit float literal: " << text;
@@ -159,7 +199,7 @@ EncodeNumberStatus ParseAndEncodeFloatingPointNumber(
emit(static_cast<uint32_t>(hVal.value().getAsFloat().get_value()));
return EncodeNumberStatus::kSuccess;
} break;
case 32: {
case SPV_FP_ENCODING_IEEE754_BINARY32: {
HexFloat<FloatProxy<float>> fVal(0.0f);
if (!ParseNumber(text, &fVal)) {
ErrorMsgStream(error_msg) << "Invalid 32-bit float literal: " << text;
@@ -168,7 +208,7 @@ EncodeNumberStatus ParseAndEncodeFloatingPointNumber(
emit(BitwiseCast<uint32_t>(fVal));
return EncodeNumberStatus::kSuccess;
} break;
case 64: {
case SPV_FP_ENCODING_IEEE754_BINARY64: {
HexFloat<FloatProxy<double>> dVal(0.0);
if (!ParseNumber(text, &dVal)) {
ErrorMsgStream(error_msg) << "Invalid 64-bit float literal: " << text;

View File

@@ -32,6 +32,7 @@ struct NumberType {
// SPV_NUMBER_NONE means the type is unknown and is invalid to be used with
// ParseAndEncode{|Integer|Floating}Number().
spv_number_kind_t kind;
spv_fp_encoding_t encoding;
};
// Returns true if the type is a scalar integer type.
@@ -160,6 +161,14 @@ bool CheckRangeAndIfHexThenSignExtend(T value, const NumberType& type,
return true;
}
template <typename T>
struct IsHexFloat {
static const bool value = false;
};
template <typename T>
struct IsHexFloat<HexFloat<T>> {
static const bool value = true;
};
// Parses a numeric value of a given type from the given text. The number
// should take up the entire string, and should be within bounds for the target
// type. On success, returns true and populates the object referenced by
@@ -169,8 +178,10 @@ bool ParseNumber(const char* text, T* value_pointer) {
// C++11 doesn't define std::istringstream(int8_t&), so calling this method
// with a single-byte type leads to implementation-defined behaviour.
// Similarly for uint8_t.
static_assert(sizeof(T) > 1,
"Single-byte types are not supported in this parse method");
// HexFloat<T> overloads the operator
static_assert(sizeof(T) > 1 || IsHexFloat<T>::value,
"Single-byte types other than HexFloat<> are not supported in "
"this parse method");
if (!text) return false;
std::istringstream text_stream(text);

View File

@@ -233,7 +233,8 @@ spv_result_t ValidateDecorationTarget(ValidationState_t& _, spv::Decoration dec,
case spv::Decoration::DescriptorSet:
if (sc != spv::StorageClass::StorageBuffer &&
sc != spv::StorageClass::Uniform &&
sc != spv::StorageClass::UniformConstant) {
sc != spv::StorageClass::UniformConstant &&
sc != spv::StorageClass::TileAttachmentQCOM) {
return fail(6491) << "must be in the StorageBuffer, Uniform, or "
"UniformConstant storage class";
}

View File

@@ -590,6 +590,9 @@ class BuiltInsValidator {
spv_result_t ValidateBool(
const Decoration& decoration, const Instruction& inst,
const std::function<spv_result_t(const std::string& message)>& diag);
spv_result_t ValidateBlockBoolOrArrayedBool(
const Decoration& decoration, const Instruction& inst,
const std::function<spv_result_t(const std::string& message)>& diag);
spv_result_t ValidateI(
const Decoration& decoration, const Instruction& inst,
const std::function<spv_result_t(const std::string& message)>& diag);
@@ -820,6 +823,30 @@ spv_result_t BuiltInsValidator::ValidateBool(
return SPV_SUCCESS;
}
spv_result_t BuiltInsValidator::ValidateBlockBoolOrArrayedBool(
const Decoration& decoration, const Instruction& inst,
const std::function<spv_result_t(const std::string& message)>& diag) {
uint32_t underlying_type = 0;
if (spv_result_t error =
GetUnderlyingType(_, decoration, inst, &underlying_type)) {
return error;
}
// Strip the array, if present.
if (_.GetIdOpcode(underlying_type) == spv::Op::OpTypeArray) {
underlying_type = _.FindDef(underlying_type)->word(2u);
} else if (!_.HasDecoration(inst.id(), spv::Decoration::Block)) {
// If not in array, and bool is in a struct, must be in a Block struct
return diag(GetDefinitionDesc(decoration, inst) +
" Scalar boolean must be in a Block.");
}
if (!_.IsBoolScalarType(underlying_type)) {
return diag(GetDefinitionDesc(decoration, inst) + " is not a bool scalar.");
}
return SPV_SUCCESS;
}
spv_result_t BuiltInsValidator::ValidateI(
const Decoration& decoration, const Instruction& inst,
const std::function<spv_result_t(const std::string& message)>& diag) {
@@ -4339,7 +4366,7 @@ spv_result_t BuiltInsValidator::ValidateMeshShadingEXTBuiltinsAtDefinition(
}
break;
case spv::BuiltIn::CullPrimitiveEXT:
if (spv_result_t error = ValidateBool(
if (spv_result_t error = ValidateBlockBoolOrArrayedBool(
decoration, inst,
[this, &inst, &decoration,
&vuid](const std::string& message) -> spv_result_t {
@@ -4350,8 +4377,8 @@ spv_result_t BuiltInsValidator::ValidateMeshShadingEXTBuiltinsAtDefinition(
<< _.grammar().lookupOperandName(
SPV_OPERAND_TYPE_BUILT_IN,
(uint32_t)decoration.builtin())
<< " variable needs to be a boolean value "
"array."
<< " variable needs to be a either a boolean or an "
"array of booleans."
<< message;
})) {
return error;

View File

@@ -146,6 +146,7 @@ bool IsSupportOptionalVulkan_1_0(uint32_t capability) {
case spv::Capability::Float16:
case spv::Capability::Int8:
case spv::Capability::BFloat16TypeKHR:
case spv::Capability::Float8EXT:
return true;
default:
break;

View File

@@ -267,15 +267,12 @@ spv_result_t ConversionPass(ValidationState_t& _, const Instruction* inst) {
// Scalar type
const uint32_t resScalarType = _.GetComponentType(result_type);
const uint32_t inputScalartype = _.GetComponentType(input_type);
if (_.GetBitWidth(resScalarType) == _.GetBitWidth(inputScalartype))
if ((_.IsBfloat16ScalarType(resScalarType) &&
_.IsBfloat16ScalarType(inputScalartype)) ||
(!_.IsBfloat16ScalarType(inputScalartype) &&
!_.IsBfloat16ScalarType(resScalarType)))
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Expected input to have different bit width from Result "
"Type: "
<< spvOpcodeString(opcode);
if (resScalarType == inputScalartype) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Expected component type of Value to be different from "
"component type of Result Type: "
<< spvOpcodeString(opcode);
}
break;
}

View File

@@ -404,8 +404,7 @@ bool IsAlignedTo(uint32_t offset, uint32_t alignment) {
// or row major-ness.
spv_result_t checkLayout(uint32_t struct_id, const char* storage_class_str,
const char* decoration_str, bool blockRules,
bool scalar_block_layout,
uint32_t incoming_offset,
bool scalar_block_layout, uint32_t incoming_offset,
MemberConstraints& constraints,
ValidationState_t& vstate) {
if (vstate.options()->skip_block_layout) return SPV_SUCCESS;
@@ -1023,7 +1022,7 @@ spv_result_t CheckDecorationsOfEntryPoints(ValidationState_t& vstate) {
}
if (num_workgroup_variables_with_block > 1 &&
num_workgroup_variables_with_block !=
num_workgroup_variables_with_aliased) {
num_workgroup_variables_with_aliased) {
return vstate.diag(SPV_ERROR_INVALID_BINARY,
vstate.FindDef(entry_point))
<< "When declaring WorkgroupMemoryExplicitLayoutKHR, "
@@ -1246,10 +1245,10 @@ spv_result_t CheckDecorationsOfBuffers(ValidationState_t& vstate) {
}
// Prepare for messages
const char* sc_str =
uniform ? "Uniform"
: (push_constant ? "PushConstant"
: (workgroup ? "Workgroup"
: "StorageBuffer"));
uniform
? "Uniform"
: (push_constant ? "PushConstant"
: (workgroup ? "Workgroup" : "StorageBuffer"));
if (spvIsVulkanEnv(vstate.context()->target_env)) {
const bool block = hasDecoration(id, spv::Decoration::Block, vstate);
@@ -1765,6 +1764,7 @@ spv_result_t CheckNonWritableDecoration(ValidationState_t& vstate,
var_storage_class == spv::StorageClass::Private) &&
vstate.features().nonwritable_var_in_function_or_private) {
// New permitted feature in SPIR-V 1.4.
} else if (var_storage_class == spv::StorageClass::TileAttachmentQCOM) {
} else if (
// It may point to a UBO, SSBO, storage image, or raw access chain.
vstate.IsPointerToUniformBlock(type_id) ||
@@ -2030,7 +2030,8 @@ spv_result_t CheckRelaxPrecisionDecoration(ValidationState_t& vstate,
{ \
spv_result_t e##LINE = (X); \
if (e##LINE != SPV_SUCCESS) return e##LINE; \
} static_assert(true, "require extra semicolon")
} \
static_assert(true, "require extra semicolon")
#define PASS_OR_BAIL(X) PASS_OR_BAIL_AT_LINE(X, __LINE__)
// Check rules for decorations where we start from the decoration rather
@@ -2269,7 +2270,7 @@ spv_result_t CheckInvalidVulkanExplicitLayout(ValidationState_t& vstate) {
// For untyped pointers, check the type of the data operand for an
// invalid layout.
const auto sc = ptr_type->GetOperandAs<spv::StorageClass>(1);
const auto data_type_id = vstate.GetOperandTypeId(&inst, 2);
const auto data_type_id = vstate.GetOperandTypeId(&inst, 1);
if (!AllowsLayout(vstate, sc) &&
UsesExplicitLayout(vstate, data_type_id, cache)) {
fail_id = inst.GetOperandAs<uint32_t>(2);

View File

@@ -663,6 +663,25 @@ spv_result_t ValidateStorageClass(ValidationState_t& _,
<< _.getIdName(interface_var->id()) << " must not be declared "
<< "with a Storage Class of Input or Output.";
}
if (_.ContainsType(
result_type->GetOperandAs<uint32_t>(2),
[](const Instruction* inst) {
if (inst && inst->opcode() == spv::Op::OpTypeFloat) {
if (inst->words().size() > 3) {
auto encoding = inst->GetOperandAs<spv::FPEncoding>(2);
if ((encoding == spv::FPEncoding::Float8E4M3EXT) ||
(encoding == spv::FPEncoding::Float8E5M2EXT)) {
return true;
}
}
}
return false;
})) {
return _.diag(SPV_ERROR_INVALID_ID, interface_var)
<< "FP8 E4M3/E5M2 OpVariable <id> " // TODO VUID
<< _.getIdName(interface_var->id()) << " must not be declared "
<< "with a Storage Class of Input or Output.";
}
}
default:
break;

View File

@@ -74,6 +74,11 @@ spv_result_t InvalidTypePass(ValidationState_t& _, const Instruction* inst) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode) << " doesn't support BFloat16 type.";
}
if (_.IsFP8ScalarOrVectorType(result_type)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode)
<< " doesn't support FP8 E4M3/E5M2 types.";
}
break;
}
@@ -84,6 +89,11 @@ spv_result_t InvalidTypePass(ValidationState_t& _, const Instruction* inst) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode) << " doesn't support BFloat16 type.";
}
if (_.IsFP8VectorType(data_type)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode)
<< " doesn't support FP8 E4M3/E5M2 types.";
}
break;
}
// Relational and Logical Instructions
@@ -98,6 +108,11 @@ spv_result_t InvalidTypePass(ValidationState_t& _, const Instruction* inst) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode) << " doesn't support BFloat16 type.";
}
if (_.IsFP8ScalarOrVectorType(operand_type)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode)
<< " doesn't support FP8 E4M3/E5M2 types.";
}
break;
}
@@ -108,6 +123,12 @@ spv_result_t InvalidTypePass(ValidationState_t& _, const Instruction* inst) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode) << " doesn't support BFloat16 type.";
}
if (_.IsFP8ScalarOrVectorType(value_type)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode)
<< " doesn't support FP8 E4M3/E5M2 types.";
}
break;
}
@@ -124,6 +145,11 @@ spv_result_t InvalidTypePass(ValidationState_t& _, const Instruction* inst) {
<< spvOpcodeString(opcode)
<< " doesn't support BFloat16 type.";
}
if (_.IsFP8ScalarOrVectorType(res_component_type)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< spvOpcodeString(opcode)
<< " doesn't support FP8 E4M3/E5M2 types.";
}
}
break;
}

View File

@@ -937,6 +937,65 @@ spv_result_t ValidateVariable(ValidationState_t& _, const Instruction* inst) {
}
}
if (_.HasCapability(spv::Capability::TileShadingQCOM) &&
storage_class == spv::StorageClass::TileAttachmentQCOM) {
if (result_type->opcode() == spv::Op::OpTypePointer) {
const auto pointee_type =
_.FindDef(result_type->GetOperandAs<uint32_t>(2));
if (pointee_type && pointee_type->opcode() == spv::Op::OpTypeImage) {
spv::Dim dim = static_cast<spv::Dim>(pointee_type->word(3));
if (dim != spv::Dim::Dim2D) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Any OpTypeImage variable in the TileAttachmentQCOM "
"Storage Class must "
"have 2D as its dimension";
}
unsigned sampled = pointee_type->word(7);
if (sampled != 1 && sampled != 2) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Any OpyTpeImage variable in the TileAttachmentQCOM "
"Storage Class must "
"have 1 or 2 as Image 'Sampled' parameter";
}
for (const auto& pair_o : inst->uses()) {
const auto* use_inst_o = pair_o.first;
if (use_inst_o->opcode() == spv::Op::OpLoad) {
for (const auto& pair_i : use_inst_o->uses()) {
const auto* use_inst_i = pair_i.first;
switch (use_inst_i->opcode()) {
case spv::Op::OpImageQueryFormat:
case spv::Op::OpImageQueryOrder:
case spv::Op::OpImageQuerySizeLod:
case spv::Op::OpImageQuerySize:
case spv::Op::OpImageQueryLod:
case spv::Op::OpImageQueryLevels:
case spv::Op::OpImageQuerySamples:
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Any variable in the TileAttachmentQCOM Storage "
"Class must "
"not be consumed by an OpImageQuery* instruction";
default:
break;
}
}
}
}
}
}
if (!(_.HasDecoration(inst->id(), spv::Decoration::DescriptorSet) &&
_.HasDecoration(inst->id(), spv::Decoration::Binding))) {
return _.diag(SPV_ERROR_INVALID_ID, inst)
<< "Any variable in the TileAttachmentQCOM Storage Class must "
"be decorated with DescriptorSet and Binding";
}
if (_.HasDecoration(inst->id(), spv::Decoration::Component)) {
return _.diag(SPV_ERROR_INVALID_ID, inst)
<< "Any variable in the TileAttachmentQCOM Storage Class must "
"not be decorated with Component decoration";
}
}
return SPV_SUCCESS;
}

View File

@@ -311,17 +311,84 @@ spv_result_t ValidateEntryPoint(ValidationState_t& _, const Instruction* inst) {
}
}
}
if (!ok && _.HasCapability(spv::Capability::TileShadingQCOM)) {
ok =
execution_modes &&
execution_modes->count(spv::ExecutionMode::TileShadingRateQCOM);
}
if (!ok) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< _.VkErrorID(6426)
<< (_.HasCapability(spv::Capability::TileShadingQCOM)
? _.VkErrorID(10685)
: _.VkErrorID(6426))
<< "In the Vulkan environment, GLCompute execution model "
"entry points require either the LocalSize or "
"LocalSizeId execution mode or an object decorated with "
"WorkgroupSize must be specified.";
"entry points require either the "
<< (_.HasCapability(spv::Capability::TileShadingQCOM)
? "TileShadingRateQCOM, "
: "")
<< "LocalSize or LocalSizeId execution mode or an object "
"decorated with WorkgroupSize must be specified.";
}
}
if (_.HasCapability(spv::Capability::TileShadingQCOM)) {
if (execution_modes) {
if (execution_modes->count(
spv::ExecutionMode::TileShadingRateQCOM) &&
(execution_modes->count(spv::ExecutionMode::LocalSize) ||
execution_modes->count(spv::ExecutionMode::LocalSizeId))) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "If the TileShadingRateQCOM execution mode is used, "
<< "LocalSize and LocalSizeId must not be specified.";
}
if (execution_modes->count(
spv::ExecutionMode::NonCoherentTileAttachmentReadQCOM)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "The NonCoherentTileAttachmentQCOM execution mode must "
"not be used in any stage other than fragment.";
}
}
} else {
if (execution_modes &&
execution_modes->count(spv::ExecutionMode::TileShadingRateQCOM)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "If the TileShadingRateQCOM execution mode is used, the "
"TileShadingQCOM capability must be enabled.";
}
}
break;
default:
if (execution_modes &&
execution_modes->count(spv::ExecutionMode::TileShadingRateQCOM)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "The TileShadingRateQCOM execution mode must not be used "
"in any stage other than compute.";
}
if (execution_model != spv::ExecutionModel::Fragment) {
if (execution_modes &&
execution_modes->count(
spv::ExecutionMode::NonCoherentTileAttachmentReadQCOM)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "The NonCoherentTileAttachmentQCOM execution mode must "
"not be used in any stage other than fragment.";
}
if (_.HasCapability(spv::Capability::TileShadingQCOM)) {
return _.diag(SPV_ERROR_INVALID_CAPABILITY, inst)
<< "The TileShadingQCOM capability must not be enabled in "
"any stage other than compute or fragment.";
}
} else {
if (execution_modes &&
execution_modes->count(
spv::ExecutionMode::NonCoherentTileAttachmentReadQCOM)) {
if (!_.HasCapability(spv::Capability::TileShadingQCOM)) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "If the NonCoherentTileAttachmentReadQCOM execution "
"mode is used, the TileShadingQCOM capability must be "
"enabled.";
}
}
}
break;
}
}
@@ -758,6 +825,14 @@ spv_result_t ValidateExecutionMode(ValidationState_t& _,
<< "In the Vulkan environment, the PixelCenterInteger execution "
"mode must not be used.";
}
if (mode == spv::ExecutionMode::TileShadingRateQCOM) {
const auto rateX = inst->GetOperandAs<int>(2);
const auto rateY = inst->GetOperandAs<int>(3);
if ((rateX & (rateX - 1)) != 0 || (rateY & (rateY - 1)) != 0)
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "The TileShadingRateQCOM execution mode's x and y values "
"must be powers of 2.";
}
}
return SPV_SUCCESS;

View File

@@ -117,13 +117,6 @@ spv_result_t ValidateTypeFloat(ValidationState_t& _, const Instruction* inst) {
return SPV_SUCCESS;
}
auto operands = inst->words();
if (operands.size() > 3) {
if (operands[3] != 0) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Current FPEncoding only supports BFloat16KHR.";
}
return SPV_SUCCESS;
}
if (num_bits == 16) {
// An absence of FP encoding implies IEEE 754. The Float16 and Float16Buffer
@@ -136,6 +129,32 @@ spv_result_t ValidateTypeFloat(ValidationState_t& _, const Instruction* inst) {
<< "type requires the Float16 or Float16Buffer capability,"
" or an extension that explicitly enables 16-bit floating point.";
}
if (num_bits == 8) {
if (!_.features().declare_float8_type) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Using a 8-bit floating point "
<< "type requires the Float8EXT capability.";
}
if (!has_encoding) {
// we don't support fp8 without encoding
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "8-bit floating point type requires an encoding.";
}
const spvtools::OperandDesc* desc;
const std::set<spv::FPEncoding> known_encodings{
spv::FPEncoding::Float8E4M3EXT, spv::FPEncoding::Float8E5M2EXT};
spv_result_t status = spvtools::LookupOperand(SPV_OPERAND_TYPE_FPENCODING,
inst->words()[3], &desc);
if ((status != SPV_SUCCESS) ||
(known_encodings.find(static_cast<spv::FPEncoding>(desc->value)) ==
known_encodings.end())) {
return _.diag(SPV_ERROR_INVALID_DATA, inst)
<< "Unsupported 8-bit floating point encoding ("
<< desc->name().data() << ").";
}
return SPV_SUCCESS;
}
if (num_bits == 64) {
if (_.HasCapability(spv::Capability::Float64)) {
return SPV_SUCCESS;
@@ -664,6 +683,15 @@ spv_result_t ValidateTypeCooperativeMatrix(ValidationState_t& _,
}
}
if (_.IsFP8ScalarType(component_type_id)) {
if (!_.HasCapability(spv::Capability::Float8CooperativeMatrixEXT)) {
return _.diag(SPV_ERROR_INVALID_ID, inst)
<< "OpTypeCooperativeMatrix Component Type <id> "
<< _.getIdName(component_type_id)
<< "require Float8CooperativeMatrixEXT be declared.";
}
}
const auto scope_index = 2;
const auto scope_id = inst->GetOperandAs<uint32_t>(scope_index);
const auto scope = _.FindDef(scope_id);

View File

@@ -74,6 +74,7 @@ ModuleLayoutSection InstructionLayoutSection(
case spv::Op::OpMemberDecorateStringGOOGLE:
return kLayoutAnnotations;
case spv::Op::OpTypeForwardPointer:
case spv::Op::OpTypeTaskSequenceINTEL:
return kLayoutTypes;
case spv::Op::OpVariable:
case spv::Op::OpUntypedVariableKHR:
@@ -398,6 +399,9 @@ void ValidationState_t::RegisterCapability(spv::Capability cap) {
case spv::Capability::Float16Buffer:
features_.declare_float16_type = true;
break;
case spv::Capability::Float8EXT:
features_.declare_float8_type = true;
break;
case spv::Capability::StorageUniformBufferBlock16:
case spv::Capability::StorageUniform16:
case spv::Capability::StoragePushConstant16:
@@ -980,6 +984,37 @@ bool ValidationState_t::IsBfloat16VectorType(uint32_t id) const {
return false;
}
bool ValidationState_t::IsFP8ScalarType(uint32_t id) const {
const Instruction* inst = FindDef(id);
if (inst && inst->opcode() == spv::Op::OpTypeFloat) {
if (inst->words().size() > 3) {
auto encoding = inst->GetOperandAs<spv::FPEncoding>(2);
if ((encoding == spv::FPEncoding::Float8E4M3EXT) ||
(encoding == spv::FPEncoding::Float8E5M2EXT)) {
return true;
}
}
}
return false;
}
bool ValidationState_t::IsFP8VectorType(uint32_t id) const {
const Instruction* inst = FindDef(id);
if (!inst) {
return false;
}
if (inst->opcode() == spv::Op::OpTypeVector) {
return IsFP8ScalarType(GetComponentType(id));
}
return false;
}
bool ValidationState_t::IsFP8ScalarOrVectorType(uint32_t id) const {
return IsFP8ScalarType(id) || IsFP8VectorType(id);
}
bool ValidationState_t::IsFloatScalarType(uint32_t id) const {
const Instruction* inst = FindDef(id);
return inst && inst->opcode() == spv::Op::OpTypeFloat;
@@ -1904,6 +1939,7 @@ bool ValidationState_t::IsValidStorageClass(
case spv::StorageClass::HitObjectAttributeNV:
case spv::StorageClass::TileImageEXT:
case spv::StorageClass::NodePayloadAMDX:
case spv::StorageClass::TileAttachmentQCOM:
return true;
default:
return false;
@@ -2594,6 +2630,8 @@ std::string ValidationState_t::VkErrorID(uint32_t id,
return VUID_WRAP(VUID-StandaloneSpirv-Component-10583);
case 10684:
return VUID_WRAP(VUID-StandaloneSpirv-None-10684);
case 10685:
return VUID_WRAP(VUID-StandaloneSpirv-None-10685);
default:
return ""; // unknown id
}

View File

@@ -68,6 +68,7 @@ class ValidationState_t {
struct Feature {
bool declare_int16_type = false; // Allow OpTypeInt with 16 bit width?
bool declare_float16_type = false; // Allow OpTypeFloat with 16 bit width?
bool declare_float8_type = false; // Allow OpTypeFloat with 8 bit width?
bool free_fp_rounding_mode = false; // Allow the FPRoundingMode decoration
// and its values to be used without
// requiring any capability
@@ -637,6 +638,9 @@ class ValidationState_t {
bool IsScalarType(uint32_t id) const;
bool IsBfloat16ScalarType(uint32_t id) const;
bool IsBfloat16VectorType(uint32_t id) const;
bool IsFP8ScalarType(uint32_t id) const;
bool IsFP8VectorType(uint32_t id) const;
bool IsFP8ScalarOrVectorType(uint32_t id) const;
bool IsFloatScalarType(uint32_t id) const;
bool IsFloatArrayType(uint32_t id) const;
bool IsFloatVectorType(uint32_t id) const;