diff --git a/Android.mk b/Android.mk index 1e436d04..a16ec144 100644 --- a/Android.mk +++ b/Android.mk @@ -122,6 +122,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/pass_manager.cpp \ source/opt/private_to_local_pass.cpp \ source/opt/propagator.cpp \ + source/opt/reduce_load_size.cpp \ source/opt/redundancy_elimination.cpp \ source/opt/register_pressure.cpp \ source/opt/remove_duplicates_pass.cpp \ diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index 9c26965c..5630e36e 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -578,6 +578,12 @@ Optimizer::PassToken CreateCopyPropagateArraysPass(); // a pass of ADCE will be able to remove. Optimizer::PassToken CreateVectorDCEPass(); +// Create a pass to reduce the size of loads. +// This pass looks for loads of structures where only a few of its members are +// used. It replaces the loads feeding an OpExtract with an OpAccessChain and +// a load of the specific elements. +Optimizer::PassToken CreateReduceLoadSizePass(); + } // namespace spvtools #endif // SPIRV_TOOLS_OPTIMIZER_HPP_ diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index c1648c57..2825f309 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -77,6 +77,7 @@ add_library(SPIRV-Tools-opt pass_manager.h private_to_local_pass.h propagator.h + reduce_load_size.h redundancy_elimination.h reflect.h register_pressure.h @@ -161,6 +162,7 @@ add_library(SPIRV-Tools-opt pass_manager.cpp private_to_local_pass.cpp propagator.cpp + reduce_load_size.cpp redundancy_elimination.cpp register_pressure.cpp remove_duplicates_pass.cpp diff --git a/source/opt/copy_prop_arrays.cpp b/source/opt/copy_prop_arrays.cpp index 1d1e80c9..88f56466 100644 --- a/source/opt/copy_prop_arrays.cpp +++ b/source/opt/copy_prop_arrays.cpp @@ -269,7 +269,7 @@ CopyPropagateArrays::BuildMemoryObjectFromExtract( // Convert the indices in the extract instruction to a series of ids that // can be used by the |OpAccessChain| instruction. for (uint32_t i = 1; i < extract_inst->NumInOperands(); ++i) { - uint32_t index = extract_inst->GetSingleWordInOperand(1); + uint32_t index = extract_inst->GetSingleWordInOperand(i); const analysis::Constant* index_const = const_mgr->GetConstant(uint32_type, {index}); components.push_back( diff --git a/source/opt/ir_builder.h b/source/opt/ir_builder.h index aba6ef36..afc55dcb 100644 --- a/source/opt/ir_builder.h +++ b/source/opt/ir_builder.h @@ -331,6 +331,16 @@ class InstructionBuilder { return AddInstruction(std::move(new_inst)); } + ir::Instruction* AddLoad(uint32_t type_id, uint32_t base_ptr_id) { + std::vector operands; + operands.push_back({SPV_OPERAND_TYPE_ID, {base_ptr_id}}); + + std::unique_ptr new_inst( + new ir::Instruction(GetContext(), SpvOpLoad, type_id, + GetContext()->TakeNextId(), operands)); + return AddInstruction(std::move(new_inst)); + } + // Inserts the new instruction before the insertion point. ir::Instruction* AddInstruction(std::unique_ptr&& insn) { ir::Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn)); diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 51a77594..7c5da10c 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -18,6 +18,7 @@ #include "make_unique.h" #include "pass_manager.h" #include "passes.h" +#include "reduce_load_size.h" #include "simplification_pass.h" namespace spvtools { @@ -128,6 +129,7 @@ Optimizer& Optimizer::RegisterLegalizationPasses() { // or unused references to unbound external objects .RegisterPass(CreateVectorDCEPass()) .RegisterPass(CreateDeadInsertElimPass()) + .RegisterPass(CreateReduceLoadSizePass()) .RegisterPass(CreateAggressiveDCEPass()); } @@ -156,6 +158,7 @@ Optimizer& Optimizer::RegisterPerformancePasses() { .RegisterPass(CreateSimplificationPass()) .RegisterPass(CreateIfConversionPass()) .RegisterPass(CreateCopyPropagateArraysPass()) + .RegisterPass(CreateReduceLoadSizePass()) .RegisterPass(CreateAggressiveDCEPass()) .RegisterPass(CreateBlockMergePass()) .RegisterPass(CreateRedundancyEliminationPass()) @@ -466,4 +469,8 @@ Optimizer::PassToken CreateVectorDCEPass() { return MakeUnique(MakeUnique()); } +Optimizer::PassToken CreateReduceLoadSizePass() { + return MakeUnique( + MakeUnique()); +} } // namespace spvtools diff --git a/source/opt/passes.h b/source/opt/passes.h index 3a26220c..831fc86d 100644 --- a/source/opt/passes.h +++ b/source/opt/passes.h @@ -50,6 +50,7 @@ #include "merge_return_pass.h" #include "null_pass.h" #include "private_to_local_pass.h" +#include "reduce_load_size.h" #include "redundancy_elimination.h" #include "remove_duplicates_pass.h" #include "replace_invalid_opc.h" diff --git a/source/opt/reduce_load_size.cpp b/source/opt/reduce_load_size.cpp new file mode 100644 index 00000000..252db1ca --- /dev/null +++ b/source/opt/reduce_load_size.cpp @@ -0,0 +1,176 @@ +// Copyright (c) 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "reduce_load_size.h" +#include + +#include "instruction.h" +#include "ir_builder.h" +#include "ir_context.h" + +namespace { +const uint32_t kExtractCompositeIdInIdx = 0; +const uint32_t kVariableStorageClassInIdx = 0; +const uint32_t kLoadPointerInIdx = 0; +const double kThreshold = 0.9; +} // namespace + +namespace spvtools { +namespace opt { + +Pass::Status ReduceLoadSize::Process(ir::IRContext* ctx) { + InitializeProcessing(ctx); + bool modified = false; + + for (auto& func : *get_module()) { + func.ForEachInst([&modified, this](ir::Instruction* inst) { + if (inst->opcode() == SpvOpCompositeExtract) { + if (ShouldReplaceExtract(inst)) { + modified |= ReplaceExtract(inst); + } + } + }); + } + + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +bool ReduceLoadSize::ReplaceExtract(ir::Instruction* inst) { + assert(inst->opcode() == SpvOpCompositeExtract && + "Wrong opcode. Should be OpCompositeExtract."); + analysis::DefUseManager* def_use_mgr = inst->context()->get_def_use_mgr(); + analysis::TypeManager* type_mgr = inst->context()->get_type_mgr(); + analysis::ConstantManager* const_mgr = inst->context()->get_constant_mgr(); + + uint32_t composite_id = + inst->GetSingleWordInOperand(kExtractCompositeIdInIdx); + ir::Instruction* composite_inst = def_use_mgr->GetDef(composite_id); + + if (composite_inst->opcode() != SpvOpLoad) { + return false; + } + + analysis::Type* composite_type = type_mgr->GetType(composite_inst->type_id()); + if (composite_type->kind() == analysis::Type::kVector || + composite_type->kind() == analysis::Type::kMatrix) { + return false; + } + + ir::Instruction* var = composite_inst->GetBaseAddress(); + if (var == nullptr || var->opcode() != SpvOpVariable) { + return false; + } + + SpvStorageClass storage_class = static_cast( + var->GetSingleWordInOperand(kVariableStorageClassInIdx)); + switch (storage_class) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + case SpvStorageClassInput: + break; + default: + return false; + } + + // Create a new access chain and load just after the old load. + // We cannot create the new access chain load in the position of the extract + // because the storage may have been written to in between. + InstructionBuilder ir_builder(inst->context(), composite_inst, + ir::IRContext::kAnalysisInstrToBlockMapping | + ir::IRContext::kAnalysisDefUse); + + uint32_t pointer_to_result_type_id = + type_mgr->FindPointerToType(inst->type_id(), storage_class); + assert(pointer_to_result_type_id != 0 && + "We did not find the pointer type that we need."); + + analysis::Integer int_type(32, false); + const analysis::Type* uint32_type = type_mgr->GetRegisteredType(&int_type); + std::vector ids; + for (uint32_t i = 1; i < inst->NumInOperands(); ++i) { + uint32_t index = inst->GetSingleWordInOperand(i); + const analysis::Constant* index_const = + const_mgr->GetConstant(uint32_type, {index}); + ids.push_back(const_mgr->GetDefiningInstruction(index_const)->result_id()); + } + + ir::Instruction* new_access_chain = ir_builder.AddAccessChain( + pointer_to_result_type_id, + composite_inst->GetSingleWordInOperand(kLoadPointerInIdx), ids); + ir::Instruction* new_laod = + ir_builder.AddLoad(inst->type_id(), new_access_chain->result_id()); + + context()->ReplaceAllUsesWith(inst->result_id(), new_laod->result_id()); + context()->KillInst(inst); + return true; +} + +bool ReduceLoadSize::ShouldReplaceExtract(ir::Instruction* inst) { + analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr(); + ir::Instruction* op_inst = def_use_mgr->GetDef( + inst->GetSingleWordInOperand(kExtractCompositeIdInIdx)); + + if (op_inst->opcode() != SpvOpLoad) { + return false; + } + + auto cached_result = should_replace_cache_.find(op_inst->result_id()); + if (cached_result != should_replace_cache_.end()) { + return cached_result->second; + } + + bool all_elements_used = false; + std::set elements_used; + + def_use_mgr->ForEachUser( + op_inst, [&elements_used, &all_elements_used](ir::Instruction* use) { + if (use->opcode() != SpvOpCompositeExtract) { + all_elements_used = true; + } + elements_used.insert(use->GetSingleWordInOperand(1)); + }); + + bool should_replace = false; + if (all_elements_used) { + should_replace = false; + } else { + analysis::ConstantManager* const_mgr = context()->get_constant_mgr(); + analysis::TypeManager* type_mgr = context()->get_type_mgr(); + analysis::Type* load_type = type_mgr->GetType(op_inst->type_id()); + uint32_t total_size = 1; + switch (load_type->kind()) { + case analysis::Type::kArray: { + const analysis::Constant* size_const = + const_mgr->FindDeclaredConstant(load_type->AsArray()->LengthId()); + assert(size_const->AsIntConstant()); + total_size = size_const->GetU32(); + } break; + case analysis::Type::kStruct: + total_size = static_cast( + load_type->AsStruct()->element_types().size()); + break; + default: + break; + } + double percent_used = static_cast(elements_used.size()) / + static_cast(total_size); + should_replace = (percent_used < kThreshold); + } + + should_replace_cache_[op_inst->result_id()] = should_replace; + return should_replace; +} + +} // namespace opt +} // namespace spvtools diff --git a/source/opt/reduce_load_size.h b/source/opt/reduce_load_size.h new file mode 100644 index 00000000..ae3112ec --- /dev/null +++ b/source/opt/reduce_load_size.h @@ -0,0 +1,62 @@ +// Copyright (c) 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_ +#define LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_ + +#include "ir_context.h" +#include "module.h" +#include "pass.h" + +namespace spvtools { +namespace opt { + +// See optimizer.hpp for documentation. +class ReduceLoadSize : public Pass { + public: + const char* name() const override { return "reduce-load-size"; } + Status Process(ir::IRContext* irContext) override; + + // Return the mask of preserved Analyses. + ir::IRContext::Analysis GetPreservedAnalyses() override { + return ir::IRContext::kAnalysisInstrToBlockMapping | + ir::IRContext::kAnalysisCombinators | ir::IRContext::kAnalysisCFG | + ir::IRContext::kAnalysisDominatorAnalysis | + ir::IRContext::kAnalysisLoopAnalysis | + ir::IRContext::kAnalysisNameMap; + } + + private: + // Replaces |inst|, which must be an OpCompositeExtract instruction, with + // an OpAccessChain and a load if possible. This happens only if it is a load + // feeding |inst|. Returns true if the substitution happened. The position + // of the new instructions will be in the same place as the load feeding the + // extract. + bool ReplaceExtract(ir::Instruction* inst); + + // Returns true if the OpCompositeExtract instruction |inst| should be replace + // or not. This is determined by looking at the load that feeds |inst| if + // it is a load. |should_replace_cache_| is used to cache the results based + // on the load feeding |inst|. + bool ShouldReplaceExtract(ir::Instruction* inst); + + // Maps the result id of an OpLoad instruction to the result of whether or + // not the OpCompositeExtract that use the id should be replaced. + std::unordered_map should_replace_cache_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_ diff --git a/test/opt/CMakeLists.txt b/test/opt/CMakeLists.txt index d26fca3b..0db44eb9 100644 --- a/test/opt/CMakeLists.txt +++ b/test/opt/CMakeLists.txt @@ -322,3 +322,7 @@ add_spvtools_unittest(TARGET vector_dce LIBS SPIRV-Tools-opt ) +add_spvtools_unittest(TARGET reduce_load_size + SRCS reduce_load_size_test.cpp pass_utils.cpp + LIBS SPIRV-Tools-opt +) diff --git a/test/opt/copy_prop_array_test.cpp b/test/opt/copy_prop_array_test.cpp index a58acdfd..83eac78d 100644 --- a/test/opt/copy_prop_array_test.cpp +++ b/test/opt/copy_prop_array_test.cpp @@ -188,7 +188,7 @@ OpFunctionEnd SinglePassRunAndMatch(before, false); } -// Propagate 2d array. This test identifing a copy through multiple levels. +// Propagate 2d array. This test identifying a copy through multiple levels. // Also has to traverse multiple OpAccessChains. TEST_F(CopyPropArrayPassTest, Propagate2DArray) { const std::string text = @@ -277,6 +277,93 @@ OpFunctionEnd SinglePassRunAndMatch(text, false); } +// Propagate 2d array. This test identifying a copy through multiple levels. +// Also has to traverse multiple OpAccessChains. +TEST_F(CopyPropArrayPassTest, Propagate2DArrayWithMultiLevelExtract) { + const std::string text = + R"(OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %in_var_INDEX %out_var_SV_Target +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 600 +OpName %type_MyCBuffer "type.MyCBuffer" +OpMemberName %type_MyCBuffer 0 "Data" +OpName %MyCBuffer "MyCBuffer" +OpName %main "main" +OpName %in_var_INDEX "in.var.INDEX" +OpName %out_var_SV_Target "out.var.SV_Target" +OpDecorate %_arr_v4float_uint_2 ArrayStride 16 +OpDecorate %_arr__arr_v4float_uint_2_uint_2 ArrayStride 32 +OpMemberDecorate %type_MyCBuffer 0 Offset 0 +OpDecorate %type_MyCBuffer Block +OpDecorate %in_var_INDEX Flat +OpDecorate %in_var_INDEX Location 0 +OpDecorate %out_var_SV_Target Location 0 +OpDecorate %MyCBuffer DescriptorSet 0 +OpDecorate %MyCBuffer Binding 0 +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%uint = OpTypeInt 32 0 +%uint_2 = OpConstant %uint 2 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr__arr_v4float_uint_2_uint_2 = OpTypeArray %_arr_v4float_uint_2 %uint_2 +%type_MyCBuffer = OpTypeStruct %_arr__arr_v4float_uint_2_uint_2 +%_ptr_Uniform_type_MyCBuffer = OpTypePointer Uniform %type_MyCBuffer +%void = OpTypeVoid +%14 = OpTypeFunction %void +%int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_arr_v4float_uint_2_0 = OpTypeArray %v4float %uint_2 +%_arr__arr_v4float_uint_2_0_uint_2 = OpTypeArray %_arr_v4float_uint_2_0 %uint_2 +%_ptr_Function__arr__arr_v4float_uint_2_0_uint_2 = OpTypePointer Function %_arr__arr_v4float_uint_2_0_uint_2 +%int_0 = OpConstant %int 0 +%_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 = OpTypePointer Uniform %_arr__arr_v4float_uint_2_uint_2 +%_ptr_Function__arr_v4float_uint_2_0 = OpTypePointer Function %_arr_v4float_uint_2_0 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%MyCBuffer = OpVariable %_ptr_Uniform_type_MyCBuffer Uniform +%in_var_INDEX = OpVariable %_ptr_Input_int Input +%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output +; CHECK: OpFunction +; CHECK: OpLabel +; CHECK: OpVariable +; CHECK: OpVariable +; CHECK: OpAccessChain +; CHECK: [[new_address:%\w+]] = OpAccessChain %_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 %MyCBuffer %int_0 +%main = OpFunction %void None %14 +%25 = OpLabel +%26 = OpVariable %_ptr_Function__arr_v4float_uint_2_0 Function +%27 = OpVariable %_ptr_Function__arr__arr_v4float_uint_2_0_uint_2 Function +%28 = OpLoad %int %in_var_INDEX +%29 = OpAccessChain %_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 %MyCBuffer %int_0 +%30 = OpLoad %_arr__arr_v4float_uint_2_uint_2 %29 +%32 = OpCompositeExtract %v4float %30 0 0 +%33 = OpCompositeExtract %v4float %30 0 1 +%34 = OpCompositeConstruct %_arr_v4float_uint_2_0 %32 %33 +%36 = OpCompositeExtract %v4float %30 1 0 +%37 = OpCompositeExtract %v4float %30 1 1 +%38 = OpCompositeConstruct %_arr_v4float_uint_2_0 %36 %37 +%39 = OpCompositeConstruct %_arr__arr_v4float_uint_2_0_uint_2 %34 %38 +; CHECK: OpStore +OpStore %27 %39 +%40 = OpAccessChain %_ptr_Function__arr_v4float_uint_2_0 %27 %28 +%42 = OpAccessChain %_ptr_Function_v4float %40 %28 +%43 = OpLoad %v4float %42 +; CHECK: [[ac1:%\w+]] = OpAccessChain %_ptr_Uniform__arr_v4float_uint_2 [[new_address]] %28 +; CHECK: [[ac2:%\w+]] = OpAccessChain %_ptr_Uniform_v4float [[ac1]] %28 +; CHECK: [[load:%\w+]] = OpLoad %v4float [[ac2]] +; CHECK: OpStore %out_var_SV_Target [[load]] +OpStore %out_var_SV_Target %43 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER | + SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES); + SinglePassRunAndMatch(text, false); +} + // Test decomposing an object when we need to "rewrite" a store. TEST_F(CopyPropArrayPassTest, DecomposeObjectForArrayStore) { const std::string text = diff --git a/test/opt/fold_test.cpp b/test/opt/fold_test.cpp index 1a272004..7d6183bc 100644 --- a/test/opt/fold_test.cpp +++ b/test/opt/fold_test.cpp @@ -5518,4 +5518,4 @@ INSTANTIATE_TEST_CASE_P(DotProductMatchingTest, MatchingInstructionFoldingTest, 3, true) )); #endif -} // anonymous namespace \ No newline at end of file +} // anonymous namespace diff --git a/test/opt/reduce_load_size_test.cpp b/test/opt/reduce_load_size_test.cpp new file mode 100644 index 00000000..5b8ea75c --- /dev/null +++ b/test/opt/reduce_load_size_test.cpp @@ -0,0 +1,259 @@ +// Copyright (c) 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pass_fixture.h" +#include "pass_utils.h" + +namespace { + +using namespace spvtools; + +using ReduceLoadSizeTest = PassTest<::testing::Test>; + +#ifdef SPIRV_EFFCEE +TEST_F(ReduceLoadSizeTest, cbuffer_load_extract) { + // Originally from the following HLSL: + // struct S { + // uint f; + // }; + // + // + // cbuffer gBuffer { uint a[32]; }; + // + // RWStructuredBuffer gRWSBuffer; + // + // uint foo(uint p[32]) { + // return p[1]; + // } + // + // [numthreads(1,1,1)] + // void main() { + // gRWSBuffer[0].f = foo(a); + // } + const std::string test = + R"( + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource HLSL 600 + OpName %type_gBuffer "type.gBuffer" + OpMemberName %type_gBuffer 0 "a" + OpName %gBuffer "gBuffer" + OpName %S "S" + OpMemberName %S 0 "f" + OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S" + OpName %gRWSBuffer "gRWSBuffer" + OpName %main "main" + OpDecorate %_arr_uint_uint_32 ArrayStride 16 + OpMemberDecorate %type_gBuffer 0 Offset 0 + OpDecorate %type_gBuffer Block + OpMemberDecorate %S 0 Offset 0 + OpDecorate %_runtimearr_S ArrayStride 4 + OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0 + OpDecorate %type_RWStructuredBuffer_S BufferBlock + OpDecorate %gBuffer DescriptorSet 0 + OpDecorate %gBuffer Binding 0 + OpDecorate %gRWSBuffer DescriptorSet 0 + OpDecorate %gRWSBuffer Binding 1 + %uint = OpTypeInt 32 0 + %uint_32 = OpConstant %uint 32 +%_arr_uint_uint_32 = OpTypeArray %uint %uint_32 +%type_gBuffer = OpTypeStruct %_arr_uint_uint_32 +%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer + %S = OpTypeStruct %uint +%_runtimearr_S = OpTypeRuntimeArray %S +%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S +%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S + %int = OpTypeInt 32 1 + %void = OpTypeVoid + %15 = OpTypeFunction %void + %int_0 = OpConstant %int 0 +%_ptr_Uniform__arr_uint_uint_32 = OpTypePointer Uniform %_arr_uint_uint_32 + %uint_0 = OpConstant %uint 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform + %gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform + %main = OpFunction %void None %15 + %20 = OpLabel +; CHECK: [[ac1:%\w+]] = OpAccessChain {{%\w+}} %gBuffer %int_0 +; CHECK: [[ac2:%\w+]] = OpAccessChain {{%\w+}} [[ac1]] %uint_1 +; CHECK: [[ld:%\w+]] = OpLoad {{%\w+}} [[ac2]] +; CHECK: OpStore {{%\w+}} [[ld]] + %21 = OpAccessChain %_ptr_Uniform__arr_uint_uint_32 %gBuffer %int_0 + %22 = OpLoad %_arr_uint_uint_32 %21 ; Load of 32-element array. + %23 = OpCompositeExtract %uint %22 1 + %24 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0 + OpStore %24 %23 + OpReturn + OpFunctionEnd + )"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER | + SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES); + SinglePassRunAndMatch(test, false); +} +#endif + +TEST_F(ReduceLoadSizeTest, cbuffer_load_extract_vector) { + // Originally from the following HLSL: + // struct S { + // uint f; + // }; + // + // + // cbuffer gBuffer { uint a[32]; }; + // + // RWStructuredBuffer gRWSBuffer; + // + // uint foo(uint p[32]) { + // return p[1]; + // } + // + // [numthreads(1,1,1)] + // void main() { + // gRWSBuffer[0].f = foo(a); + // } + const std::string test = + R"(OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" +OpExecutionMode %main LocalSize 1 1 1 +OpSource HLSL 600 +OpName %type_gBuffer "type.gBuffer" +OpMemberName %type_gBuffer 0 "a" +OpName %gBuffer "gBuffer" +OpName %S "S" +OpMemberName %S 0 "f" +OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S" +OpName %gRWSBuffer "gRWSBuffer" +OpName %main "main" +OpMemberDecorate %type_gBuffer 0 Offset 0 +OpDecorate %type_gBuffer Block +OpMemberDecorate %S 0 Offset 0 +OpDecorate %_runtimearr_S ArrayStride 4 +OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0 +OpDecorate %type_RWStructuredBuffer_S BufferBlock +OpDecorate %gBuffer DescriptorSet 0 +OpDecorate %gBuffer Binding 0 +OpDecorate %gRWSBuffer DescriptorSet 0 +OpDecorate %gRWSBuffer Binding 1 +%uint = OpTypeInt 32 0 +%uint_32 = OpConstant %uint 32 +%v4uint = OpTypeVector %uint 4 +%type_gBuffer = OpTypeStruct %v4uint +%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer +%S = OpTypeStruct %uint +%_runtimearr_S = OpTypeRuntimeArray %S +%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S +%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S +%int = OpTypeInt 32 1 +%void = OpTypeVoid +%15 = OpTypeFunction %void +%int_0 = OpConstant %int 0 +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint +%uint_0 = OpConstant %uint 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform +%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform +%main = OpFunction %void None %15 +%20 = OpLabel +%21 = OpAccessChain %_ptr_Uniform_v4uint %gBuffer %int_0 +%22 = OpLoad %v4uint %21 +%23 = OpCompositeExtract %uint %22 1 +%24 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0 +OpStore %24 %23 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER | + SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES); + SinglePassRunAndCheck(test, test, true, false); +} + +TEST_F(ReduceLoadSizeTest, cbuffer_load_5_extract) { + // All of the elements of the value loaded are used, so we should not + // change the load. + const std::string test = + R"(OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" +OpExecutionMode %main LocalSize 1 1 1 +OpSource HLSL 600 +OpName %type_gBuffer "type.gBuffer" +OpMemberName %type_gBuffer 0 "a" +OpName %gBuffer "gBuffer" +OpName %S "S" +OpMemberName %S 0 "f" +OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S" +OpName %gRWSBuffer "gRWSBuffer" +OpName %main "main" +OpDecorate %_arr_uint_uint_5 ArrayStride 16 +OpMemberDecorate %type_gBuffer 0 Offset 0 +OpDecorate %type_gBuffer Block +OpMemberDecorate %S 0 Offset 0 +OpDecorate %_runtimearr_S ArrayStride 4 +OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0 +OpDecorate %type_RWStructuredBuffer_S BufferBlock +OpDecorate %gBuffer DescriptorSet 0 +OpDecorate %gBuffer Binding 0 +OpDecorate %gRWSBuffer DescriptorSet 0 +OpDecorate %gRWSBuffer Binding 1 +%uint = OpTypeInt 32 0 +%uint_5 = OpConstant %uint 5 +%_arr_uint_uint_5 = OpTypeArray %uint %uint_5 +%type_gBuffer = OpTypeStruct %_arr_uint_uint_5 +%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer +%S = OpTypeStruct %uint +%_runtimearr_S = OpTypeRuntimeArray %S +%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S +%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S +%int = OpTypeInt 32 1 +%void = OpTypeVoid +%15 = OpTypeFunction %void +%int_0 = OpConstant %int 0 +%_ptr_Uniform__arr_uint_uint_5 = OpTypePointer Uniform %_arr_uint_uint_5 +%uint_0 = OpConstant %uint 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform +%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform +%main = OpFunction %void None %15 +%20 = OpLabel +%21 = OpAccessChain %_ptr_Uniform__arr_uint_uint_5 %gBuffer %int_0 +%22 = OpLoad %_arr_uint_uint_5 %21 +%23 = OpCompositeExtract %uint %22 0 +%24 = OpCompositeExtract %uint %22 1 +%25 = OpCompositeExtract %uint %22 2 +%26 = OpCompositeExtract %uint %22 3 +%27 = OpCompositeExtract %uint %22 4 +%28 = OpIAdd %uint %23 %24 +%29 = OpIAdd %uint %28 %25 +%30 = OpIAdd %uint %29 %26 +%31 = OpIAdd %uint %20 %27 +%32 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0 +OpStore %32 %31 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER | + SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES); + SinglePassRunAndCheck(test, test, true, false); +} + +} // anonymous namespace diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index dbdc7834..bf04ab6d 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -580,6 +580,8 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer, optimizer->RegisterPass(CreateLocalRedundancyEliminationPass()); } else if (0 == strcmp(cur_arg, "--loop-invariant-code-motion")) { optimizer->RegisterPass(CreateLoopInvariantCodeMotionPass()); + } else if (0 == strcmp(cur_arg, "--reduce-load-size")) { + optimizer->RegisterPass(CreateReduceLoadSizePass()); } else if (0 == strcmp(cur_arg, "--redundancy-elimination")) { optimizer->RegisterPass(CreateRedundancyEliminationPass()); } else if (0 == strcmp(cur_arg, "--private-to-local")) {