Add pass to fold a load feeding an extract.

We have already disabled common uniform elimination because it created
sequences of loads an entire uniform object, then we extract just a
single element.  This caused problems in some drivers, and is just
generally slow because it loads more memory than needed.

However, there are other way to get into this situation, so I've added
a pass that looks specifically for this pattern and removes it when only
a portion of the load is used.

Fixes #1547.
This commit is contained in:
Steven Perron 2018-05-07 12:31:03 -04:00
parent 804e8884c4
commit af430ec822
14 changed files with 620 additions and 3 deletions

View File

@ -122,6 +122,7 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/pass_manager.cpp \
source/opt/private_to_local_pass.cpp \
source/opt/propagator.cpp \
source/opt/reduce_load_size.cpp \
source/opt/redundancy_elimination.cpp \
source/opt/register_pressure.cpp \
source/opt/remove_duplicates_pass.cpp \

View File

@ -578,6 +578,12 @@ Optimizer::PassToken CreateCopyPropagateArraysPass();
// a pass of ADCE will be able to remove.
Optimizer::PassToken CreateVectorDCEPass();
// Create a pass to reduce the size of loads.
// This pass looks for loads of structures where only a few of its members are
// used. It replaces the loads feeding an OpExtract with an OpAccessChain and
// a load of the specific elements.
Optimizer::PassToken CreateReduceLoadSizePass();
} // namespace spvtools
#endif // SPIRV_TOOLS_OPTIMIZER_HPP_

View File

@ -77,6 +77,7 @@ add_library(SPIRV-Tools-opt
pass_manager.h
private_to_local_pass.h
propagator.h
reduce_load_size.h
redundancy_elimination.h
reflect.h
register_pressure.h
@ -161,6 +162,7 @@ add_library(SPIRV-Tools-opt
pass_manager.cpp
private_to_local_pass.cpp
propagator.cpp
reduce_load_size.cpp
redundancy_elimination.cpp
register_pressure.cpp
remove_duplicates_pass.cpp

View File

@ -269,7 +269,7 @@ CopyPropagateArrays::BuildMemoryObjectFromExtract(
// Convert the indices in the extract instruction to a series of ids that
// can be used by the |OpAccessChain| instruction.
for (uint32_t i = 1; i < extract_inst->NumInOperands(); ++i) {
uint32_t index = extract_inst->GetSingleWordInOperand(1);
uint32_t index = extract_inst->GetSingleWordInOperand(i);
const analysis::Constant* index_const =
const_mgr->GetConstant(uint32_type, {index});
components.push_back(

View File

@ -331,6 +331,16 @@ class InstructionBuilder {
return AddInstruction(std::move(new_inst));
}
ir::Instruction* AddLoad(uint32_t type_id, uint32_t base_ptr_id) {
std::vector<ir::Operand> operands;
operands.push_back({SPV_OPERAND_TYPE_ID, {base_ptr_id}});
std::unique_ptr<ir::Instruction> new_inst(
new ir::Instruction(GetContext(), SpvOpLoad, type_id,
GetContext()->TakeNextId(), operands));
return AddInstruction(std::move(new_inst));
}
// Inserts the new instruction before the insertion point.
ir::Instruction* AddInstruction(std::unique_ptr<ir::Instruction>&& insn) {
ir::Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn));

View File

@ -18,6 +18,7 @@
#include "make_unique.h"
#include "pass_manager.h"
#include "passes.h"
#include "reduce_load_size.h"
#include "simplification_pass.h"
namespace spvtools {
@ -128,6 +129,7 @@ Optimizer& Optimizer::RegisterLegalizationPasses() {
// or unused references to unbound external objects
.RegisterPass(CreateVectorDCEPass())
.RegisterPass(CreateDeadInsertElimPass())
.RegisterPass(CreateReduceLoadSizePass())
.RegisterPass(CreateAggressiveDCEPass());
}
@ -156,6 +158,7 @@ Optimizer& Optimizer::RegisterPerformancePasses() {
.RegisterPass(CreateSimplificationPass())
.RegisterPass(CreateIfConversionPass())
.RegisterPass(CreateCopyPropagateArraysPass())
.RegisterPass(CreateReduceLoadSizePass())
.RegisterPass(CreateAggressiveDCEPass())
.RegisterPass(CreateBlockMergePass())
.RegisterPass(CreateRedundancyEliminationPass())
@ -466,4 +469,8 @@ Optimizer::PassToken CreateVectorDCEPass() {
return MakeUnique<Optimizer::PassToken::Impl>(MakeUnique<opt::VectorDCE>());
}
Optimizer::PassToken CreateReduceLoadSizePass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::ReduceLoadSize>());
}
} // namespace spvtools

View File

@ -50,6 +50,7 @@
#include "merge_return_pass.h"
#include "null_pass.h"
#include "private_to_local_pass.h"
#include "reduce_load_size.h"
#include "redundancy_elimination.h"
#include "remove_duplicates_pass.h"
#include "replace_invalid_opc.h"

View File

@ -0,0 +1,176 @@
// Copyright (c) 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "reduce_load_size.h"
#include <util/bit_vector.h>
#include "instruction.h"
#include "ir_builder.h"
#include "ir_context.h"
namespace {
const uint32_t kExtractCompositeIdInIdx = 0;
const uint32_t kVariableStorageClassInIdx = 0;
const uint32_t kLoadPointerInIdx = 0;
const double kThreshold = 0.9;
} // namespace
namespace spvtools {
namespace opt {
Pass::Status ReduceLoadSize::Process(ir::IRContext* ctx) {
InitializeProcessing(ctx);
bool modified = false;
for (auto& func : *get_module()) {
func.ForEachInst([&modified, this](ir::Instruction* inst) {
if (inst->opcode() == SpvOpCompositeExtract) {
if (ShouldReplaceExtract(inst)) {
modified |= ReplaceExtract(inst);
}
}
});
}
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}
bool ReduceLoadSize::ReplaceExtract(ir::Instruction* inst) {
assert(inst->opcode() == SpvOpCompositeExtract &&
"Wrong opcode. Should be OpCompositeExtract.");
analysis::DefUseManager* def_use_mgr = inst->context()->get_def_use_mgr();
analysis::TypeManager* type_mgr = inst->context()->get_type_mgr();
analysis::ConstantManager* const_mgr = inst->context()->get_constant_mgr();
uint32_t composite_id =
inst->GetSingleWordInOperand(kExtractCompositeIdInIdx);
ir::Instruction* composite_inst = def_use_mgr->GetDef(composite_id);
if (composite_inst->opcode() != SpvOpLoad) {
return false;
}
analysis::Type* composite_type = type_mgr->GetType(composite_inst->type_id());
if (composite_type->kind() == analysis::Type::kVector ||
composite_type->kind() == analysis::Type::kMatrix) {
return false;
}
ir::Instruction* var = composite_inst->GetBaseAddress();
if (var == nullptr || var->opcode() != SpvOpVariable) {
return false;
}
SpvStorageClass storage_class = static_cast<SpvStorageClass>(
var->GetSingleWordInOperand(kVariableStorageClassInIdx));
switch (storage_class) {
case SpvStorageClassUniform:
case SpvStorageClassUniformConstant:
case SpvStorageClassInput:
break;
default:
return false;
}
// Create a new access chain and load just after the old load.
// We cannot create the new access chain load in the position of the extract
// because the storage may have been written to in between.
InstructionBuilder ir_builder(inst->context(), composite_inst,
ir::IRContext::kAnalysisInstrToBlockMapping |
ir::IRContext::kAnalysisDefUse);
uint32_t pointer_to_result_type_id =
type_mgr->FindPointerToType(inst->type_id(), storage_class);
assert(pointer_to_result_type_id != 0 &&
"We did not find the pointer type that we need.");
analysis::Integer int_type(32, false);
const analysis::Type* uint32_type = type_mgr->GetRegisteredType(&int_type);
std::vector<uint32_t> ids;
for (uint32_t i = 1; i < inst->NumInOperands(); ++i) {
uint32_t index = inst->GetSingleWordInOperand(i);
const analysis::Constant* index_const =
const_mgr->GetConstant(uint32_type, {index});
ids.push_back(const_mgr->GetDefiningInstruction(index_const)->result_id());
}
ir::Instruction* new_access_chain = ir_builder.AddAccessChain(
pointer_to_result_type_id,
composite_inst->GetSingleWordInOperand(kLoadPointerInIdx), ids);
ir::Instruction* new_laod =
ir_builder.AddLoad(inst->type_id(), new_access_chain->result_id());
context()->ReplaceAllUsesWith(inst->result_id(), new_laod->result_id());
context()->KillInst(inst);
return true;
}
bool ReduceLoadSize::ShouldReplaceExtract(ir::Instruction* inst) {
analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
ir::Instruction* op_inst = def_use_mgr->GetDef(
inst->GetSingleWordInOperand(kExtractCompositeIdInIdx));
if (op_inst->opcode() != SpvOpLoad) {
return false;
}
auto cached_result = should_replace_cache_.find(op_inst->result_id());
if (cached_result != should_replace_cache_.end()) {
return cached_result->second;
}
bool all_elements_used = false;
std::set<uint32_t> elements_used;
def_use_mgr->ForEachUser(
op_inst, [&elements_used, &all_elements_used](ir::Instruction* use) {
if (use->opcode() != SpvOpCompositeExtract) {
all_elements_used = true;
}
elements_used.insert(use->GetSingleWordInOperand(1));
});
bool should_replace = false;
if (all_elements_used) {
should_replace = false;
} else {
analysis::ConstantManager* const_mgr = context()->get_constant_mgr();
analysis::TypeManager* type_mgr = context()->get_type_mgr();
analysis::Type* load_type = type_mgr->GetType(op_inst->type_id());
uint32_t total_size = 1;
switch (load_type->kind()) {
case analysis::Type::kArray: {
const analysis::Constant* size_const =
const_mgr->FindDeclaredConstant(load_type->AsArray()->LengthId());
assert(size_const->AsIntConstant());
total_size = size_const->GetU32();
} break;
case analysis::Type::kStruct:
total_size = static_cast<uint32_t>(
load_type->AsStruct()->element_types().size());
break;
default:
break;
}
double percent_used = static_cast<double>(elements_used.size()) /
static_cast<double>(total_size);
should_replace = (percent_used < kThreshold);
}
should_replace_cache_[op_inst->result_id()] = should_replace;
return should_replace;
}
} // namespace opt
} // namespace spvtools

View File

@ -0,0 +1,62 @@
// Copyright (c) 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_
#define LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_
#include "ir_context.h"
#include "module.h"
#include "pass.h"
namespace spvtools {
namespace opt {
// See optimizer.hpp for documentation.
class ReduceLoadSize : public Pass {
public:
const char* name() const override { return "reduce-load-size"; }
Status Process(ir::IRContext* irContext) override;
// Return the mask of preserved Analyses.
ir::IRContext::Analysis GetPreservedAnalyses() override {
return ir::IRContext::kAnalysisInstrToBlockMapping |
ir::IRContext::kAnalysisCombinators | ir::IRContext::kAnalysisCFG |
ir::IRContext::kAnalysisDominatorAnalysis |
ir::IRContext::kAnalysisLoopAnalysis |
ir::IRContext::kAnalysisNameMap;
}
private:
// Replaces |inst|, which must be an OpCompositeExtract instruction, with
// an OpAccessChain and a load if possible. This happens only if it is a load
// feeding |inst|. Returns true if the substitution happened. The position
// of the new instructions will be in the same place as the load feeding the
// extract.
bool ReplaceExtract(ir::Instruction* inst);
// Returns true if the OpCompositeExtract instruction |inst| should be replace
// or not. This is determined by looking at the load that feeds |inst| if
// it is a load. |should_replace_cache_| is used to cache the results based
// on the load feeding |inst|.
bool ShouldReplaceExtract(ir::Instruction* inst);
// Maps the result id of an OpLoad instruction to the result of whether or
// not the OpCompositeExtract that use the id should be replaced.
std::unordered_map<uint32_t, bool> should_replace_cache_;
};
} // namespace opt
} // namespace spvtools
#endif // LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_

View File

@ -322,3 +322,7 @@ add_spvtools_unittest(TARGET vector_dce
LIBS SPIRV-Tools-opt
)
add_spvtools_unittest(TARGET reduce_load_size
SRCS reduce_load_size_test.cpp pass_utils.cpp
LIBS SPIRV-Tools-opt
)

View File

@ -188,7 +188,7 @@ OpFunctionEnd
SinglePassRunAndMatch<opt::CopyPropagateArrays>(before, false);
}
// Propagate 2d array. This test identifing a copy through multiple levels.
// Propagate 2d array. This test identifying a copy through multiple levels.
// Also has to traverse multiple OpAccessChains.
TEST_F(CopyPropArrayPassTest, Propagate2DArray) {
const std::string text =
@ -277,6 +277,93 @@ OpFunctionEnd
SinglePassRunAndMatch<opt::CopyPropagateArrays>(text, false);
}
// Propagate 2d array. This test identifying a copy through multiple levels.
// Also has to traverse multiple OpAccessChains.
TEST_F(CopyPropArrayPassTest, Propagate2DArrayWithMultiLevelExtract) {
const std::string text =
R"(OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %in_var_INDEX %out_var_SV_Target
OpExecutionMode %main OriginUpperLeft
OpSource HLSL 600
OpName %type_MyCBuffer "type.MyCBuffer"
OpMemberName %type_MyCBuffer 0 "Data"
OpName %MyCBuffer "MyCBuffer"
OpName %main "main"
OpName %in_var_INDEX "in.var.INDEX"
OpName %out_var_SV_Target "out.var.SV_Target"
OpDecorate %_arr_v4float_uint_2 ArrayStride 16
OpDecorate %_arr__arr_v4float_uint_2_uint_2 ArrayStride 32
OpMemberDecorate %type_MyCBuffer 0 Offset 0
OpDecorate %type_MyCBuffer Block
OpDecorate %in_var_INDEX Flat
OpDecorate %in_var_INDEX Location 0
OpDecorate %out_var_SV_Target Location 0
OpDecorate %MyCBuffer DescriptorSet 0
OpDecorate %MyCBuffer Binding 0
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%uint = OpTypeInt 32 0
%uint_2 = OpConstant %uint 2
%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
%_arr__arr_v4float_uint_2_uint_2 = OpTypeArray %_arr_v4float_uint_2 %uint_2
%type_MyCBuffer = OpTypeStruct %_arr__arr_v4float_uint_2_uint_2
%_ptr_Uniform_type_MyCBuffer = OpTypePointer Uniform %type_MyCBuffer
%void = OpTypeVoid
%14 = OpTypeFunction %void
%int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_arr_v4float_uint_2_0 = OpTypeArray %v4float %uint_2
%_arr__arr_v4float_uint_2_0_uint_2 = OpTypeArray %_arr_v4float_uint_2_0 %uint_2
%_ptr_Function__arr__arr_v4float_uint_2_0_uint_2 = OpTypePointer Function %_arr__arr_v4float_uint_2_0_uint_2
%int_0 = OpConstant %int 0
%_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 = OpTypePointer Uniform %_arr__arr_v4float_uint_2_uint_2
%_ptr_Function__arr_v4float_uint_2_0 = OpTypePointer Function %_arr_v4float_uint_2_0
%_ptr_Function_v4float = OpTypePointer Function %v4float
%MyCBuffer = OpVariable %_ptr_Uniform_type_MyCBuffer Uniform
%in_var_INDEX = OpVariable %_ptr_Input_int Input
%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output
; CHECK: OpFunction
; CHECK: OpLabel
; CHECK: OpVariable
; CHECK: OpVariable
; CHECK: OpAccessChain
; CHECK: [[new_address:%\w+]] = OpAccessChain %_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 %MyCBuffer %int_0
%main = OpFunction %void None %14
%25 = OpLabel
%26 = OpVariable %_ptr_Function__arr_v4float_uint_2_0 Function
%27 = OpVariable %_ptr_Function__arr__arr_v4float_uint_2_0_uint_2 Function
%28 = OpLoad %int %in_var_INDEX
%29 = OpAccessChain %_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 %MyCBuffer %int_0
%30 = OpLoad %_arr__arr_v4float_uint_2_uint_2 %29
%32 = OpCompositeExtract %v4float %30 0 0
%33 = OpCompositeExtract %v4float %30 0 1
%34 = OpCompositeConstruct %_arr_v4float_uint_2_0 %32 %33
%36 = OpCompositeExtract %v4float %30 1 0
%37 = OpCompositeExtract %v4float %30 1 1
%38 = OpCompositeConstruct %_arr_v4float_uint_2_0 %36 %37
%39 = OpCompositeConstruct %_arr__arr_v4float_uint_2_0_uint_2 %34 %38
; CHECK: OpStore
OpStore %27 %39
%40 = OpAccessChain %_ptr_Function__arr_v4float_uint_2_0 %27 %28
%42 = OpAccessChain %_ptr_Function_v4float %40 %28
%43 = OpLoad %v4float %42
; CHECK: [[ac1:%\w+]] = OpAccessChain %_ptr_Uniform__arr_v4float_uint_2 [[new_address]] %28
; CHECK: [[ac2:%\w+]] = OpAccessChain %_ptr_Uniform_v4float [[ac1]] %28
; CHECK: [[load:%\w+]] = OpLoad %v4float [[ac2]]
; CHECK: OpStore %out_var_SV_Target [[load]]
OpStore %out_var_SV_Target %43
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
SinglePassRunAndMatch<opt::CopyPropagateArrays>(text, false);
}
// Test decomposing an object when we need to "rewrite" a store.
TEST_F(CopyPropArrayPassTest, DecomposeObjectForArrayStore) {
const std::string text =

View File

@ -5518,4 +5518,4 @@ INSTANTIATE_TEST_CASE_P(DotProductMatchingTest, MatchingInstructionFoldingTest,
3, true)
));
#endif
} // anonymous namespace
} // anonymous namespace

View File

@ -0,0 +1,259 @@
// Copyright (c) 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "pass_fixture.h"
#include "pass_utils.h"
namespace {
using namespace spvtools;
using ReduceLoadSizeTest = PassTest<::testing::Test>;
#ifdef SPIRV_EFFCEE
TEST_F(ReduceLoadSizeTest, cbuffer_load_extract) {
// Originally from the following HLSL:
// struct S {
// uint f;
// };
//
//
// cbuffer gBuffer { uint a[32]; };
//
// RWStructuredBuffer<S> gRWSBuffer;
//
// uint foo(uint p[32]) {
// return p[1];
// }
//
// [numthreads(1,1,1)]
// void main() {
// gRWSBuffer[0].f = foo(a);
// }
const std::string test =
R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource HLSL 600
OpName %type_gBuffer "type.gBuffer"
OpMemberName %type_gBuffer 0 "a"
OpName %gBuffer "gBuffer"
OpName %S "S"
OpMemberName %S 0 "f"
OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S"
OpName %gRWSBuffer "gRWSBuffer"
OpName %main "main"
OpDecorate %_arr_uint_uint_32 ArrayStride 16
OpMemberDecorate %type_gBuffer 0 Offset 0
OpDecorate %type_gBuffer Block
OpMemberDecorate %S 0 Offset 0
OpDecorate %_runtimearr_S ArrayStride 4
OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0
OpDecorate %type_RWStructuredBuffer_S BufferBlock
OpDecorate %gBuffer DescriptorSet 0
OpDecorate %gBuffer Binding 0
OpDecorate %gRWSBuffer DescriptorSet 0
OpDecorate %gRWSBuffer Binding 1
%uint = OpTypeInt 32 0
%uint_32 = OpConstant %uint 32
%_arr_uint_uint_32 = OpTypeArray %uint %uint_32
%type_gBuffer = OpTypeStruct %_arr_uint_uint_32
%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer
%S = OpTypeStruct %uint
%_runtimearr_S = OpTypeRuntimeArray %S
%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S
%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S
%int = OpTypeInt 32 1
%void = OpTypeVoid
%15 = OpTypeFunction %void
%int_0 = OpConstant %int 0
%_ptr_Uniform__arr_uint_uint_32 = OpTypePointer Uniform %_arr_uint_uint_32
%uint_0 = OpConstant %uint 0
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform
%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform
%main = OpFunction %void None %15
%20 = OpLabel
; CHECK: [[ac1:%\w+]] = OpAccessChain {{%\w+}} %gBuffer %int_0
; CHECK: [[ac2:%\w+]] = OpAccessChain {{%\w+}} [[ac1]] %uint_1
; CHECK: [[ld:%\w+]] = OpLoad {{%\w+}} [[ac2]]
; CHECK: OpStore {{%\w+}} [[ld]]
%21 = OpAccessChain %_ptr_Uniform__arr_uint_uint_32 %gBuffer %int_0
%22 = OpLoad %_arr_uint_uint_32 %21 ; Load of 32-element array.
%23 = OpCompositeExtract %uint %22 1
%24 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0
OpStore %24 %23
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
SinglePassRunAndMatch<opt::ReduceLoadSize>(test, false);
}
#endif
TEST_F(ReduceLoadSizeTest, cbuffer_load_extract_vector) {
// Originally from the following HLSL:
// struct S {
// uint f;
// };
//
//
// cbuffer gBuffer { uint a[32]; };
//
// RWStructuredBuffer<S> gRWSBuffer;
//
// uint foo(uint p[32]) {
// return p[1];
// }
//
// [numthreads(1,1,1)]
// void main() {
// gRWSBuffer[0].f = foo(a);
// }
const std::string test =
R"(OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource HLSL 600
OpName %type_gBuffer "type.gBuffer"
OpMemberName %type_gBuffer 0 "a"
OpName %gBuffer "gBuffer"
OpName %S "S"
OpMemberName %S 0 "f"
OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S"
OpName %gRWSBuffer "gRWSBuffer"
OpName %main "main"
OpMemberDecorate %type_gBuffer 0 Offset 0
OpDecorate %type_gBuffer Block
OpMemberDecorate %S 0 Offset 0
OpDecorate %_runtimearr_S ArrayStride 4
OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0
OpDecorate %type_RWStructuredBuffer_S BufferBlock
OpDecorate %gBuffer DescriptorSet 0
OpDecorate %gBuffer Binding 0
OpDecorate %gRWSBuffer DescriptorSet 0
OpDecorate %gRWSBuffer Binding 1
%uint = OpTypeInt 32 0
%uint_32 = OpConstant %uint 32
%v4uint = OpTypeVector %uint 4
%type_gBuffer = OpTypeStruct %v4uint
%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer
%S = OpTypeStruct %uint
%_runtimearr_S = OpTypeRuntimeArray %S
%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S
%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S
%int = OpTypeInt 32 1
%void = OpTypeVoid
%15 = OpTypeFunction %void
%int_0 = OpConstant %int 0
%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
%uint_0 = OpConstant %uint 0
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform
%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform
%main = OpFunction %void None %15
%20 = OpLabel
%21 = OpAccessChain %_ptr_Uniform_v4uint %gBuffer %int_0
%22 = OpLoad %v4uint %21
%23 = OpCompositeExtract %uint %22 1
%24 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0
OpStore %24 %23
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
SinglePassRunAndCheck<opt::ReduceLoadSize>(test, test, true, false);
}
TEST_F(ReduceLoadSizeTest, cbuffer_load_5_extract) {
// All of the elements of the value loaded are used, so we should not
// change the load.
const std::string test =
R"(OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource HLSL 600
OpName %type_gBuffer "type.gBuffer"
OpMemberName %type_gBuffer 0 "a"
OpName %gBuffer "gBuffer"
OpName %S "S"
OpMemberName %S 0 "f"
OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S"
OpName %gRWSBuffer "gRWSBuffer"
OpName %main "main"
OpDecorate %_arr_uint_uint_5 ArrayStride 16
OpMemberDecorate %type_gBuffer 0 Offset 0
OpDecorate %type_gBuffer Block
OpMemberDecorate %S 0 Offset 0
OpDecorate %_runtimearr_S ArrayStride 4
OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0
OpDecorate %type_RWStructuredBuffer_S BufferBlock
OpDecorate %gBuffer DescriptorSet 0
OpDecorate %gBuffer Binding 0
OpDecorate %gRWSBuffer DescriptorSet 0
OpDecorate %gRWSBuffer Binding 1
%uint = OpTypeInt 32 0
%uint_5 = OpConstant %uint 5
%_arr_uint_uint_5 = OpTypeArray %uint %uint_5
%type_gBuffer = OpTypeStruct %_arr_uint_uint_5
%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer
%S = OpTypeStruct %uint
%_runtimearr_S = OpTypeRuntimeArray %S
%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S
%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S
%int = OpTypeInt 32 1
%void = OpTypeVoid
%15 = OpTypeFunction %void
%int_0 = OpConstant %int 0
%_ptr_Uniform__arr_uint_uint_5 = OpTypePointer Uniform %_arr_uint_uint_5
%uint_0 = OpConstant %uint 0
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform
%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform
%main = OpFunction %void None %15
%20 = OpLabel
%21 = OpAccessChain %_ptr_Uniform__arr_uint_uint_5 %gBuffer %int_0
%22 = OpLoad %_arr_uint_uint_5 %21
%23 = OpCompositeExtract %uint %22 0
%24 = OpCompositeExtract %uint %22 1
%25 = OpCompositeExtract %uint %22 2
%26 = OpCompositeExtract %uint %22 3
%27 = OpCompositeExtract %uint %22 4
%28 = OpIAdd %uint %23 %24
%29 = OpIAdd %uint %28 %25
%30 = OpIAdd %uint %29 %26
%31 = OpIAdd %uint %20 %27
%32 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0
OpStore %32 %31
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
SinglePassRunAndCheck<opt::ReduceLoadSize>(test, test, true, false);
}
} // anonymous namespace

View File

@ -580,6 +580,8 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
optimizer->RegisterPass(CreateLocalRedundancyEliminationPass());
} else if (0 == strcmp(cur_arg, "--loop-invariant-code-motion")) {
optimizer->RegisterPass(CreateLoopInvariantCodeMotionPass());
} else if (0 == strcmp(cur_arg, "--reduce-load-size")) {
optimizer->RegisterPass(CreateReduceLoadSizePass());
} else if (0 == strcmp(cur_arg, "--redundancy-elimination")) {
optimizer->RegisterPass(CreateRedundancyEliminationPass());
} else if (0 == strcmp(cur_arg, "--private-to-local")) {