mirror of
https://gitee.com/openharmony/third_party_spirv-tools
synced 2024-11-23 07:20:28 +00:00
Add pass to fold a load feeding an extract.
We have already disabled common uniform elimination because it created sequences of loads an entire uniform object, then we extract just a single element. This caused problems in some drivers, and is just generally slow because it loads more memory than needed. However, there are other way to get into this situation, so I've added a pass that looks specifically for this pattern and removes it when only a portion of the load is used. Fixes #1547.
This commit is contained in:
parent
804e8884c4
commit
af430ec822
@ -122,6 +122,7 @@ SPVTOOLS_OPT_SRC_FILES := \
|
||||
source/opt/pass_manager.cpp \
|
||||
source/opt/private_to_local_pass.cpp \
|
||||
source/opt/propagator.cpp \
|
||||
source/opt/reduce_load_size.cpp \
|
||||
source/opt/redundancy_elimination.cpp \
|
||||
source/opt/register_pressure.cpp \
|
||||
source/opt/remove_duplicates_pass.cpp \
|
||||
|
@ -578,6 +578,12 @@ Optimizer::PassToken CreateCopyPropagateArraysPass();
|
||||
// a pass of ADCE will be able to remove.
|
||||
Optimizer::PassToken CreateVectorDCEPass();
|
||||
|
||||
// Create a pass to reduce the size of loads.
|
||||
// This pass looks for loads of structures where only a few of its members are
|
||||
// used. It replaces the loads feeding an OpExtract with an OpAccessChain and
|
||||
// a load of the specific elements.
|
||||
Optimizer::PassToken CreateReduceLoadSizePass();
|
||||
|
||||
} // namespace spvtools
|
||||
|
||||
#endif // SPIRV_TOOLS_OPTIMIZER_HPP_
|
||||
|
@ -77,6 +77,7 @@ add_library(SPIRV-Tools-opt
|
||||
pass_manager.h
|
||||
private_to_local_pass.h
|
||||
propagator.h
|
||||
reduce_load_size.h
|
||||
redundancy_elimination.h
|
||||
reflect.h
|
||||
register_pressure.h
|
||||
@ -161,6 +162,7 @@ add_library(SPIRV-Tools-opt
|
||||
pass_manager.cpp
|
||||
private_to_local_pass.cpp
|
||||
propagator.cpp
|
||||
reduce_load_size.cpp
|
||||
redundancy_elimination.cpp
|
||||
register_pressure.cpp
|
||||
remove_duplicates_pass.cpp
|
||||
|
@ -269,7 +269,7 @@ CopyPropagateArrays::BuildMemoryObjectFromExtract(
|
||||
// Convert the indices in the extract instruction to a series of ids that
|
||||
// can be used by the |OpAccessChain| instruction.
|
||||
for (uint32_t i = 1; i < extract_inst->NumInOperands(); ++i) {
|
||||
uint32_t index = extract_inst->GetSingleWordInOperand(1);
|
||||
uint32_t index = extract_inst->GetSingleWordInOperand(i);
|
||||
const analysis::Constant* index_const =
|
||||
const_mgr->GetConstant(uint32_type, {index});
|
||||
components.push_back(
|
||||
|
@ -331,6 +331,16 @@ class InstructionBuilder {
|
||||
return AddInstruction(std::move(new_inst));
|
||||
}
|
||||
|
||||
ir::Instruction* AddLoad(uint32_t type_id, uint32_t base_ptr_id) {
|
||||
std::vector<ir::Operand> operands;
|
||||
operands.push_back({SPV_OPERAND_TYPE_ID, {base_ptr_id}});
|
||||
|
||||
std::unique_ptr<ir::Instruction> new_inst(
|
||||
new ir::Instruction(GetContext(), SpvOpLoad, type_id,
|
||||
GetContext()->TakeNextId(), operands));
|
||||
return AddInstruction(std::move(new_inst));
|
||||
}
|
||||
|
||||
// Inserts the new instruction before the insertion point.
|
||||
ir::Instruction* AddInstruction(std::unique_ptr<ir::Instruction>&& insn) {
|
||||
ir::Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn));
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "make_unique.h"
|
||||
#include "pass_manager.h"
|
||||
#include "passes.h"
|
||||
#include "reduce_load_size.h"
|
||||
#include "simplification_pass.h"
|
||||
|
||||
namespace spvtools {
|
||||
@ -128,6 +129,7 @@ Optimizer& Optimizer::RegisterLegalizationPasses() {
|
||||
// or unused references to unbound external objects
|
||||
.RegisterPass(CreateVectorDCEPass())
|
||||
.RegisterPass(CreateDeadInsertElimPass())
|
||||
.RegisterPass(CreateReduceLoadSizePass())
|
||||
.RegisterPass(CreateAggressiveDCEPass());
|
||||
}
|
||||
|
||||
@ -156,6 +158,7 @@ Optimizer& Optimizer::RegisterPerformancePasses() {
|
||||
.RegisterPass(CreateSimplificationPass())
|
||||
.RegisterPass(CreateIfConversionPass())
|
||||
.RegisterPass(CreateCopyPropagateArraysPass())
|
||||
.RegisterPass(CreateReduceLoadSizePass())
|
||||
.RegisterPass(CreateAggressiveDCEPass())
|
||||
.RegisterPass(CreateBlockMergePass())
|
||||
.RegisterPass(CreateRedundancyEliminationPass())
|
||||
@ -466,4 +469,8 @@ Optimizer::PassToken CreateVectorDCEPass() {
|
||||
return MakeUnique<Optimizer::PassToken::Impl>(MakeUnique<opt::VectorDCE>());
|
||||
}
|
||||
|
||||
Optimizer::PassToken CreateReduceLoadSizePass() {
|
||||
return MakeUnique<Optimizer::PassToken::Impl>(
|
||||
MakeUnique<opt::ReduceLoadSize>());
|
||||
}
|
||||
} // namespace spvtools
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "merge_return_pass.h"
|
||||
#include "null_pass.h"
|
||||
#include "private_to_local_pass.h"
|
||||
#include "reduce_load_size.h"
|
||||
#include "redundancy_elimination.h"
|
||||
#include "remove_duplicates_pass.h"
|
||||
#include "replace_invalid_opc.h"
|
||||
|
176
source/opt/reduce_load_size.cpp
Normal file
176
source/opt/reduce_load_size.cpp
Normal file
@ -0,0 +1,176 @@
|
||||
// Copyright (c) 2018 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "reduce_load_size.h"
|
||||
#include <util/bit_vector.h>
|
||||
|
||||
#include "instruction.h"
|
||||
#include "ir_builder.h"
|
||||
#include "ir_context.h"
|
||||
|
||||
namespace {
|
||||
const uint32_t kExtractCompositeIdInIdx = 0;
|
||||
const uint32_t kVariableStorageClassInIdx = 0;
|
||||
const uint32_t kLoadPointerInIdx = 0;
|
||||
const double kThreshold = 0.9;
|
||||
} // namespace
|
||||
|
||||
namespace spvtools {
|
||||
namespace opt {
|
||||
|
||||
Pass::Status ReduceLoadSize::Process(ir::IRContext* ctx) {
|
||||
InitializeProcessing(ctx);
|
||||
bool modified = false;
|
||||
|
||||
for (auto& func : *get_module()) {
|
||||
func.ForEachInst([&modified, this](ir::Instruction* inst) {
|
||||
if (inst->opcode() == SpvOpCompositeExtract) {
|
||||
if (ShouldReplaceExtract(inst)) {
|
||||
modified |= ReplaceExtract(inst);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
|
||||
}
|
||||
|
||||
bool ReduceLoadSize::ReplaceExtract(ir::Instruction* inst) {
|
||||
assert(inst->opcode() == SpvOpCompositeExtract &&
|
||||
"Wrong opcode. Should be OpCompositeExtract.");
|
||||
analysis::DefUseManager* def_use_mgr = inst->context()->get_def_use_mgr();
|
||||
analysis::TypeManager* type_mgr = inst->context()->get_type_mgr();
|
||||
analysis::ConstantManager* const_mgr = inst->context()->get_constant_mgr();
|
||||
|
||||
uint32_t composite_id =
|
||||
inst->GetSingleWordInOperand(kExtractCompositeIdInIdx);
|
||||
ir::Instruction* composite_inst = def_use_mgr->GetDef(composite_id);
|
||||
|
||||
if (composite_inst->opcode() != SpvOpLoad) {
|
||||
return false;
|
||||
}
|
||||
|
||||
analysis::Type* composite_type = type_mgr->GetType(composite_inst->type_id());
|
||||
if (composite_type->kind() == analysis::Type::kVector ||
|
||||
composite_type->kind() == analysis::Type::kMatrix) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ir::Instruction* var = composite_inst->GetBaseAddress();
|
||||
if (var == nullptr || var->opcode() != SpvOpVariable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SpvStorageClass storage_class = static_cast<SpvStorageClass>(
|
||||
var->GetSingleWordInOperand(kVariableStorageClassInIdx));
|
||||
switch (storage_class) {
|
||||
case SpvStorageClassUniform:
|
||||
case SpvStorageClassUniformConstant:
|
||||
case SpvStorageClassInput:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create a new access chain and load just after the old load.
|
||||
// We cannot create the new access chain load in the position of the extract
|
||||
// because the storage may have been written to in between.
|
||||
InstructionBuilder ir_builder(inst->context(), composite_inst,
|
||||
ir::IRContext::kAnalysisInstrToBlockMapping |
|
||||
ir::IRContext::kAnalysisDefUse);
|
||||
|
||||
uint32_t pointer_to_result_type_id =
|
||||
type_mgr->FindPointerToType(inst->type_id(), storage_class);
|
||||
assert(pointer_to_result_type_id != 0 &&
|
||||
"We did not find the pointer type that we need.");
|
||||
|
||||
analysis::Integer int_type(32, false);
|
||||
const analysis::Type* uint32_type = type_mgr->GetRegisteredType(&int_type);
|
||||
std::vector<uint32_t> ids;
|
||||
for (uint32_t i = 1; i < inst->NumInOperands(); ++i) {
|
||||
uint32_t index = inst->GetSingleWordInOperand(i);
|
||||
const analysis::Constant* index_const =
|
||||
const_mgr->GetConstant(uint32_type, {index});
|
||||
ids.push_back(const_mgr->GetDefiningInstruction(index_const)->result_id());
|
||||
}
|
||||
|
||||
ir::Instruction* new_access_chain = ir_builder.AddAccessChain(
|
||||
pointer_to_result_type_id,
|
||||
composite_inst->GetSingleWordInOperand(kLoadPointerInIdx), ids);
|
||||
ir::Instruction* new_laod =
|
||||
ir_builder.AddLoad(inst->type_id(), new_access_chain->result_id());
|
||||
|
||||
context()->ReplaceAllUsesWith(inst->result_id(), new_laod->result_id());
|
||||
context()->KillInst(inst);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ReduceLoadSize::ShouldReplaceExtract(ir::Instruction* inst) {
|
||||
analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
|
||||
ir::Instruction* op_inst = def_use_mgr->GetDef(
|
||||
inst->GetSingleWordInOperand(kExtractCompositeIdInIdx));
|
||||
|
||||
if (op_inst->opcode() != SpvOpLoad) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cached_result = should_replace_cache_.find(op_inst->result_id());
|
||||
if (cached_result != should_replace_cache_.end()) {
|
||||
return cached_result->second;
|
||||
}
|
||||
|
||||
bool all_elements_used = false;
|
||||
std::set<uint32_t> elements_used;
|
||||
|
||||
def_use_mgr->ForEachUser(
|
||||
op_inst, [&elements_used, &all_elements_used](ir::Instruction* use) {
|
||||
if (use->opcode() != SpvOpCompositeExtract) {
|
||||
all_elements_used = true;
|
||||
}
|
||||
elements_used.insert(use->GetSingleWordInOperand(1));
|
||||
});
|
||||
|
||||
bool should_replace = false;
|
||||
if (all_elements_used) {
|
||||
should_replace = false;
|
||||
} else {
|
||||
analysis::ConstantManager* const_mgr = context()->get_constant_mgr();
|
||||
analysis::TypeManager* type_mgr = context()->get_type_mgr();
|
||||
analysis::Type* load_type = type_mgr->GetType(op_inst->type_id());
|
||||
uint32_t total_size = 1;
|
||||
switch (load_type->kind()) {
|
||||
case analysis::Type::kArray: {
|
||||
const analysis::Constant* size_const =
|
||||
const_mgr->FindDeclaredConstant(load_type->AsArray()->LengthId());
|
||||
assert(size_const->AsIntConstant());
|
||||
total_size = size_const->GetU32();
|
||||
} break;
|
||||
case analysis::Type::kStruct:
|
||||
total_size = static_cast<uint32_t>(
|
||||
load_type->AsStruct()->element_types().size());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
double percent_used = static_cast<double>(elements_used.size()) /
|
||||
static_cast<double>(total_size);
|
||||
should_replace = (percent_used < kThreshold);
|
||||
}
|
||||
|
||||
should_replace_cache_[op_inst->result_id()] = should_replace;
|
||||
return should_replace;
|
||||
}
|
||||
|
||||
} // namespace opt
|
||||
} // namespace spvtools
|
62
source/opt/reduce_load_size.h
Normal file
62
source/opt/reduce_load_size.h
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright (c) 2018 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_
|
||||
#define LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_
|
||||
|
||||
#include "ir_context.h"
|
||||
#include "module.h"
|
||||
#include "pass.h"
|
||||
|
||||
namespace spvtools {
|
||||
namespace opt {
|
||||
|
||||
// See optimizer.hpp for documentation.
|
||||
class ReduceLoadSize : public Pass {
|
||||
public:
|
||||
const char* name() const override { return "reduce-load-size"; }
|
||||
Status Process(ir::IRContext* irContext) override;
|
||||
|
||||
// Return the mask of preserved Analyses.
|
||||
ir::IRContext::Analysis GetPreservedAnalyses() override {
|
||||
return ir::IRContext::kAnalysisInstrToBlockMapping |
|
||||
ir::IRContext::kAnalysisCombinators | ir::IRContext::kAnalysisCFG |
|
||||
ir::IRContext::kAnalysisDominatorAnalysis |
|
||||
ir::IRContext::kAnalysisLoopAnalysis |
|
||||
ir::IRContext::kAnalysisNameMap;
|
||||
}
|
||||
|
||||
private:
|
||||
// Replaces |inst|, which must be an OpCompositeExtract instruction, with
|
||||
// an OpAccessChain and a load if possible. This happens only if it is a load
|
||||
// feeding |inst|. Returns true if the substitution happened. The position
|
||||
// of the new instructions will be in the same place as the load feeding the
|
||||
// extract.
|
||||
bool ReplaceExtract(ir::Instruction* inst);
|
||||
|
||||
// Returns true if the OpCompositeExtract instruction |inst| should be replace
|
||||
// or not. This is determined by looking at the load that feeds |inst| if
|
||||
// it is a load. |should_replace_cache_| is used to cache the results based
|
||||
// on the load feeding |inst|.
|
||||
bool ShouldReplaceExtract(ir::Instruction* inst);
|
||||
|
||||
// Maps the result id of an OpLoad instruction to the result of whether or
|
||||
// not the OpCompositeExtract that use the id should be replaced.
|
||||
std::unordered_map<uint32_t, bool> should_replace_cache_;
|
||||
};
|
||||
|
||||
} // namespace opt
|
||||
} // namespace spvtools
|
||||
|
||||
#endif // LIBSPIRV_OPT_REDUCE_LOAD_SIZE_H_
|
@ -322,3 +322,7 @@ add_spvtools_unittest(TARGET vector_dce
|
||||
LIBS SPIRV-Tools-opt
|
||||
)
|
||||
|
||||
add_spvtools_unittest(TARGET reduce_load_size
|
||||
SRCS reduce_load_size_test.cpp pass_utils.cpp
|
||||
LIBS SPIRV-Tools-opt
|
||||
)
|
||||
|
@ -188,7 +188,7 @@ OpFunctionEnd
|
||||
SinglePassRunAndMatch<opt::CopyPropagateArrays>(before, false);
|
||||
}
|
||||
|
||||
// Propagate 2d array. This test identifing a copy through multiple levels.
|
||||
// Propagate 2d array. This test identifying a copy through multiple levels.
|
||||
// Also has to traverse multiple OpAccessChains.
|
||||
TEST_F(CopyPropArrayPassTest, Propagate2DArray) {
|
||||
const std::string text =
|
||||
@ -277,6 +277,93 @@ OpFunctionEnd
|
||||
SinglePassRunAndMatch<opt::CopyPropagateArrays>(text, false);
|
||||
}
|
||||
|
||||
// Propagate 2d array. This test identifying a copy through multiple levels.
|
||||
// Also has to traverse multiple OpAccessChains.
|
||||
TEST_F(CopyPropArrayPassTest, Propagate2DArrayWithMultiLevelExtract) {
|
||||
const std::string text =
|
||||
R"(OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %in_var_INDEX %out_var_SV_Target
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource HLSL 600
|
||||
OpName %type_MyCBuffer "type.MyCBuffer"
|
||||
OpMemberName %type_MyCBuffer 0 "Data"
|
||||
OpName %MyCBuffer "MyCBuffer"
|
||||
OpName %main "main"
|
||||
OpName %in_var_INDEX "in.var.INDEX"
|
||||
OpName %out_var_SV_Target "out.var.SV_Target"
|
||||
OpDecorate %_arr_v4float_uint_2 ArrayStride 16
|
||||
OpDecorate %_arr__arr_v4float_uint_2_uint_2 ArrayStride 32
|
||||
OpMemberDecorate %type_MyCBuffer 0 Offset 0
|
||||
OpDecorate %type_MyCBuffer Block
|
||||
OpDecorate %in_var_INDEX Flat
|
||||
OpDecorate %in_var_INDEX Location 0
|
||||
OpDecorate %out_var_SV_Target Location 0
|
||||
OpDecorate %MyCBuffer DescriptorSet 0
|
||||
OpDecorate %MyCBuffer Binding 0
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
|
||||
%_arr__arr_v4float_uint_2_uint_2 = OpTypeArray %_arr_v4float_uint_2 %uint_2
|
||||
%type_MyCBuffer = OpTypeStruct %_arr__arr_v4float_uint_2_uint_2
|
||||
%_ptr_Uniform_type_MyCBuffer = OpTypePointer Uniform %type_MyCBuffer
|
||||
%void = OpTypeVoid
|
||||
%14 = OpTypeFunction %void
|
||||
%int = OpTypeInt 32 1
|
||||
%_ptr_Input_int = OpTypePointer Input %int
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%_arr_v4float_uint_2_0 = OpTypeArray %v4float %uint_2
|
||||
%_arr__arr_v4float_uint_2_0_uint_2 = OpTypeArray %_arr_v4float_uint_2_0 %uint_2
|
||||
%_ptr_Function__arr__arr_v4float_uint_2_0_uint_2 = OpTypePointer Function %_arr__arr_v4float_uint_2_0_uint_2
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 = OpTypePointer Uniform %_arr__arr_v4float_uint_2_uint_2
|
||||
%_ptr_Function__arr_v4float_uint_2_0 = OpTypePointer Function %_arr_v4float_uint_2_0
|
||||
%_ptr_Function_v4float = OpTypePointer Function %v4float
|
||||
%MyCBuffer = OpVariable %_ptr_Uniform_type_MyCBuffer Uniform
|
||||
%in_var_INDEX = OpVariable %_ptr_Input_int Input
|
||||
%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output
|
||||
; CHECK: OpFunction
|
||||
; CHECK: OpLabel
|
||||
; CHECK: OpVariable
|
||||
; CHECK: OpVariable
|
||||
; CHECK: OpAccessChain
|
||||
; CHECK: [[new_address:%\w+]] = OpAccessChain %_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 %MyCBuffer %int_0
|
||||
%main = OpFunction %void None %14
|
||||
%25 = OpLabel
|
||||
%26 = OpVariable %_ptr_Function__arr_v4float_uint_2_0 Function
|
||||
%27 = OpVariable %_ptr_Function__arr__arr_v4float_uint_2_0_uint_2 Function
|
||||
%28 = OpLoad %int %in_var_INDEX
|
||||
%29 = OpAccessChain %_ptr_Uniform__arr__arr_v4float_uint_2_uint_2 %MyCBuffer %int_0
|
||||
%30 = OpLoad %_arr__arr_v4float_uint_2_uint_2 %29
|
||||
%32 = OpCompositeExtract %v4float %30 0 0
|
||||
%33 = OpCompositeExtract %v4float %30 0 1
|
||||
%34 = OpCompositeConstruct %_arr_v4float_uint_2_0 %32 %33
|
||||
%36 = OpCompositeExtract %v4float %30 1 0
|
||||
%37 = OpCompositeExtract %v4float %30 1 1
|
||||
%38 = OpCompositeConstruct %_arr_v4float_uint_2_0 %36 %37
|
||||
%39 = OpCompositeConstruct %_arr__arr_v4float_uint_2_0_uint_2 %34 %38
|
||||
; CHECK: OpStore
|
||||
OpStore %27 %39
|
||||
%40 = OpAccessChain %_ptr_Function__arr_v4float_uint_2_0 %27 %28
|
||||
%42 = OpAccessChain %_ptr_Function_v4float %40 %28
|
||||
%43 = OpLoad %v4float %42
|
||||
; CHECK: [[ac1:%\w+]] = OpAccessChain %_ptr_Uniform__arr_v4float_uint_2 [[new_address]] %28
|
||||
; CHECK: [[ac2:%\w+]] = OpAccessChain %_ptr_Uniform_v4float [[ac1]] %28
|
||||
; CHECK: [[load:%\w+]] = OpLoad %v4float [[ac2]]
|
||||
; CHECK: OpStore %out_var_SV_Target [[load]]
|
||||
OpStore %out_var_SV_Target %43
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
)";
|
||||
|
||||
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
|
||||
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
|
||||
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
|
||||
SinglePassRunAndMatch<opt::CopyPropagateArrays>(text, false);
|
||||
}
|
||||
|
||||
// Test decomposing an object when we need to "rewrite" a store.
|
||||
TEST_F(CopyPropArrayPassTest, DecomposeObjectForArrayStore) {
|
||||
const std::string text =
|
||||
|
@ -5518,4 +5518,4 @@ INSTANTIATE_TEST_CASE_P(DotProductMatchingTest, MatchingInstructionFoldingTest,
|
||||
3, true)
|
||||
));
|
||||
#endif
|
||||
} // anonymous namespace
|
||||
} // anonymous namespace
|
||||
|
259
test/opt/reduce_load_size_test.cpp
Normal file
259
test/opt/reduce_load_size_test.cpp
Normal file
@ -0,0 +1,259 @@
|
||||
// Copyright (c) 2018 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "pass_fixture.h"
|
||||
#include "pass_utils.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace spvtools;
|
||||
|
||||
using ReduceLoadSizeTest = PassTest<::testing::Test>;
|
||||
|
||||
#ifdef SPIRV_EFFCEE
|
||||
TEST_F(ReduceLoadSizeTest, cbuffer_load_extract) {
|
||||
// Originally from the following HLSL:
|
||||
// struct S {
|
||||
// uint f;
|
||||
// };
|
||||
//
|
||||
//
|
||||
// cbuffer gBuffer { uint a[32]; };
|
||||
//
|
||||
// RWStructuredBuffer<S> gRWSBuffer;
|
||||
//
|
||||
// uint foo(uint p[32]) {
|
||||
// return p[1];
|
||||
// }
|
||||
//
|
||||
// [numthreads(1,1,1)]
|
||||
// void main() {
|
||||
// gRWSBuffer[0].f = foo(a);
|
||||
// }
|
||||
const std::string test =
|
||||
R"(
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main"
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource HLSL 600
|
||||
OpName %type_gBuffer "type.gBuffer"
|
||||
OpMemberName %type_gBuffer 0 "a"
|
||||
OpName %gBuffer "gBuffer"
|
||||
OpName %S "S"
|
||||
OpMemberName %S 0 "f"
|
||||
OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S"
|
||||
OpName %gRWSBuffer "gRWSBuffer"
|
||||
OpName %main "main"
|
||||
OpDecorate %_arr_uint_uint_32 ArrayStride 16
|
||||
OpMemberDecorate %type_gBuffer 0 Offset 0
|
||||
OpDecorate %type_gBuffer Block
|
||||
OpMemberDecorate %S 0 Offset 0
|
||||
OpDecorate %_runtimearr_S ArrayStride 4
|
||||
OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0
|
||||
OpDecorate %type_RWStructuredBuffer_S BufferBlock
|
||||
OpDecorate %gBuffer DescriptorSet 0
|
||||
OpDecorate %gBuffer Binding 0
|
||||
OpDecorate %gRWSBuffer DescriptorSet 0
|
||||
OpDecorate %gRWSBuffer Binding 1
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_32 = OpConstant %uint 32
|
||||
%_arr_uint_uint_32 = OpTypeArray %uint %uint_32
|
||||
%type_gBuffer = OpTypeStruct %_arr_uint_uint_32
|
||||
%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer
|
||||
%S = OpTypeStruct %uint
|
||||
%_runtimearr_S = OpTypeRuntimeArray %S
|
||||
%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S
|
||||
%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S
|
||||
%int = OpTypeInt 32 1
|
||||
%void = OpTypeVoid
|
||||
%15 = OpTypeFunction %void
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform__arr_uint_uint_32 = OpTypePointer Uniform %_arr_uint_uint_32
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform
|
||||
%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform
|
||||
%main = OpFunction %void None %15
|
||||
%20 = OpLabel
|
||||
; CHECK: [[ac1:%\w+]] = OpAccessChain {{%\w+}} %gBuffer %int_0
|
||||
; CHECK: [[ac2:%\w+]] = OpAccessChain {{%\w+}} [[ac1]] %uint_1
|
||||
; CHECK: [[ld:%\w+]] = OpLoad {{%\w+}} [[ac2]]
|
||||
; CHECK: OpStore {{%\w+}} [[ld]]
|
||||
%21 = OpAccessChain %_ptr_Uniform__arr_uint_uint_32 %gBuffer %int_0
|
||||
%22 = OpLoad %_arr_uint_uint_32 %21 ; Load of 32-element array.
|
||||
%23 = OpCompositeExtract %uint %22 1
|
||||
%24 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0
|
||||
OpStore %24 %23
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
)";
|
||||
|
||||
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
|
||||
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
|
||||
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
|
||||
SinglePassRunAndMatch<opt::ReduceLoadSize>(test, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(ReduceLoadSizeTest, cbuffer_load_extract_vector) {
|
||||
// Originally from the following HLSL:
|
||||
// struct S {
|
||||
// uint f;
|
||||
// };
|
||||
//
|
||||
//
|
||||
// cbuffer gBuffer { uint a[32]; };
|
||||
//
|
||||
// RWStructuredBuffer<S> gRWSBuffer;
|
||||
//
|
||||
// uint foo(uint p[32]) {
|
||||
// return p[1];
|
||||
// }
|
||||
//
|
||||
// [numthreads(1,1,1)]
|
||||
// void main() {
|
||||
// gRWSBuffer[0].f = foo(a);
|
||||
// }
|
||||
const std::string test =
|
||||
R"(OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main"
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource HLSL 600
|
||||
OpName %type_gBuffer "type.gBuffer"
|
||||
OpMemberName %type_gBuffer 0 "a"
|
||||
OpName %gBuffer "gBuffer"
|
||||
OpName %S "S"
|
||||
OpMemberName %S 0 "f"
|
||||
OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S"
|
||||
OpName %gRWSBuffer "gRWSBuffer"
|
||||
OpName %main "main"
|
||||
OpMemberDecorate %type_gBuffer 0 Offset 0
|
||||
OpDecorate %type_gBuffer Block
|
||||
OpMemberDecorate %S 0 Offset 0
|
||||
OpDecorate %_runtimearr_S ArrayStride 4
|
||||
OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0
|
||||
OpDecorate %type_RWStructuredBuffer_S BufferBlock
|
||||
OpDecorate %gBuffer DescriptorSet 0
|
||||
OpDecorate %gBuffer Binding 0
|
||||
OpDecorate %gRWSBuffer DescriptorSet 0
|
||||
OpDecorate %gRWSBuffer Binding 1
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_32 = OpConstant %uint 32
|
||||
%v4uint = OpTypeVector %uint 4
|
||||
%type_gBuffer = OpTypeStruct %v4uint
|
||||
%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer
|
||||
%S = OpTypeStruct %uint
|
||||
%_runtimearr_S = OpTypeRuntimeArray %S
|
||||
%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S
|
||||
%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S
|
||||
%int = OpTypeInt 32 1
|
||||
%void = OpTypeVoid
|
||||
%15 = OpTypeFunction %void
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform
|
||||
%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform
|
||||
%main = OpFunction %void None %15
|
||||
%20 = OpLabel
|
||||
%21 = OpAccessChain %_ptr_Uniform_v4uint %gBuffer %int_0
|
||||
%22 = OpLoad %v4uint %21
|
||||
%23 = OpCompositeExtract %uint %22 1
|
||||
%24 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0
|
||||
OpStore %24 %23
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
)";
|
||||
|
||||
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
|
||||
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
|
||||
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
|
||||
SinglePassRunAndCheck<opt::ReduceLoadSize>(test, test, true, false);
|
||||
}
|
||||
|
||||
TEST_F(ReduceLoadSizeTest, cbuffer_load_5_extract) {
|
||||
// All of the elements of the value loaded are used, so we should not
|
||||
// change the load.
|
||||
const std::string test =
|
||||
R"(OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main"
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource HLSL 600
|
||||
OpName %type_gBuffer "type.gBuffer"
|
||||
OpMemberName %type_gBuffer 0 "a"
|
||||
OpName %gBuffer "gBuffer"
|
||||
OpName %S "S"
|
||||
OpMemberName %S 0 "f"
|
||||
OpName %type_RWStructuredBuffer_S "type.RWStructuredBuffer.S"
|
||||
OpName %gRWSBuffer "gRWSBuffer"
|
||||
OpName %main "main"
|
||||
OpDecorate %_arr_uint_uint_5 ArrayStride 16
|
||||
OpMemberDecorate %type_gBuffer 0 Offset 0
|
||||
OpDecorate %type_gBuffer Block
|
||||
OpMemberDecorate %S 0 Offset 0
|
||||
OpDecorate %_runtimearr_S ArrayStride 4
|
||||
OpMemberDecorate %type_RWStructuredBuffer_S 0 Offset 0
|
||||
OpDecorate %type_RWStructuredBuffer_S BufferBlock
|
||||
OpDecorate %gBuffer DescriptorSet 0
|
||||
OpDecorate %gBuffer Binding 0
|
||||
OpDecorate %gRWSBuffer DescriptorSet 0
|
||||
OpDecorate %gRWSBuffer Binding 1
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_5 = OpConstant %uint 5
|
||||
%_arr_uint_uint_5 = OpTypeArray %uint %uint_5
|
||||
%type_gBuffer = OpTypeStruct %_arr_uint_uint_5
|
||||
%_ptr_Uniform_type_gBuffer = OpTypePointer Uniform %type_gBuffer
|
||||
%S = OpTypeStruct %uint
|
||||
%_runtimearr_S = OpTypeRuntimeArray %S
|
||||
%type_RWStructuredBuffer_S = OpTypeStruct %_runtimearr_S
|
||||
%_ptr_Uniform_type_RWStructuredBuffer_S = OpTypePointer Uniform %type_RWStructuredBuffer_S
|
||||
%int = OpTypeInt 32 1
|
||||
%void = OpTypeVoid
|
||||
%15 = OpTypeFunction %void
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform__arr_uint_uint_5 = OpTypePointer Uniform %_arr_uint_uint_5
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%gBuffer = OpVariable %_ptr_Uniform_type_gBuffer Uniform
|
||||
%gRWSBuffer = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_S Uniform
|
||||
%main = OpFunction %void None %15
|
||||
%20 = OpLabel
|
||||
%21 = OpAccessChain %_ptr_Uniform__arr_uint_uint_5 %gBuffer %int_0
|
||||
%22 = OpLoad %_arr_uint_uint_5 %21
|
||||
%23 = OpCompositeExtract %uint %22 0
|
||||
%24 = OpCompositeExtract %uint %22 1
|
||||
%25 = OpCompositeExtract %uint %22 2
|
||||
%26 = OpCompositeExtract %uint %22 3
|
||||
%27 = OpCompositeExtract %uint %22 4
|
||||
%28 = OpIAdd %uint %23 %24
|
||||
%29 = OpIAdd %uint %28 %25
|
||||
%30 = OpIAdd %uint %29 %26
|
||||
%31 = OpIAdd %uint %20 %27
|
||||
%32 = OpAccessChain %_ptr_Uniform_uint %gRWSBuffer %int_0 %uint_0 %int_0
|
||||
OpStore %32 %31
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
)";
|
||||
|
||||
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
|
||||
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER |
|
||||
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
|
||||
SinglePassRunAndCheck<opt::ReduceLoadSize>(test, test, true, false);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
@ -580,6 +580,8 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
|
||||
optimizer->RegisterPass(CreateLocalRedundancyEliminationPass());
|
||||
} else if (0 == strcmp(cur_arg, "--loop-invariant-code-motion")) {
|
||||
optimizer->RegisterPass(CreateLoopInvariantCodeMotionPass());
|
||||
} else if (0 == strcmp(cur_arg, "--reduce-load-size")) {
|
||||
optimizer->RegisterPass(CreateReduceLoadSizePass());
|
||||
} else if (0 == strcmp(cur_arg, "--redundancy-elimination")) {
|
||||
optimizer->RegisterPass(CreateRedundancyEliminationPass());
|
||||
} else if (0 == strcmp(cur_arg, "--private-to-local")) {
|
||||
|
Loading…
Reference in New Issue
Block a user