mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-13 13:45:16 +00:00
[NVPTX] Disable performance optimizations when OptLevel==None
Reviewers: jholewinski, tra, eliben Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D16874 llvm-svn: 259749
This commit is contained in:
parent
d5c0e4d69b
commit
f650441b04
@ -143,14 +143,20 @@ public:
|
||||
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
|
||||
|
||||
private:
|
||||
// if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
|
||||
// If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
|
||||
// function is only called in opt mode.
|
||||
void addEarlyCSEOrGVNPass();
|
||||
|
||||
// Add passes that propagate special memory spaces.
|
||||
void addMemorySpaceInferencePasses();
|
||||
|
||||
// Add passes that perform straight-line scalar optimizations.
|
||||
void addStraightLineScalarOptimizationPasses();
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
|
||||
return PassConfig;
|
||||
return new NVPTXPassConfig(this, PM);
|
||||
}
|
||||
|
||||
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
|
||||
@ -166,22 +172,7 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
|
||||
addPass(createEarlyCSEPass());
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addIRPasses() {
|
||||
// The following passes are known to not play well with virtual regs hanging
|
||||
// around after register allocation (which in our case, is *all* registers).
|
||||
// We explicitly disable them here. We do, however, need some functionality
|
||||
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
|
||||
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
|
||||
disablePass(&PrologEpilogCodeInserterID);
|
||||
disablePass(&MachineCopyPropagationID);
|
||||
disablePass(&TailDuplicateID);
|
||||
|
||||
addPass(createNVVMReflectPass());
|
||||
addPass(createNVPTXImageOptimizerPass());
|
||||
addPass(createNVPTXAssignValidGlobalNamesPass());
|
||||
addPass(createGenericToNVVMPass());
|
||||
|
||||
// === Propagate special address spaces ===
|
||||
void NVPTXPassConfig::addMemorySpaceInferencePasses() {
|
||||
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
|
||||
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
|
||||
// be eliminated by SROA.
|
||||
@ -192,8 +183,9 @@ void NVPTXPassConfig::addIRPasses() {
|
||||
// them unused. We could remove dead code in an ad-hoc manner, but that
|
||||
// requires manual work and might be error-prone.
|
||||
addPass(createDeadCodeEliminationPass());
|
||||
}
|
||||
|
||||
// === Straight-line scalar optimizations ===
|
||||
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
|
||||
addPass(createSeparateConstOffsetFromGEPPass());
|
||||
addPass(createSpeculativeExecutionPass());
|
||||
// ReassociateGEPs exposes more opportunites for SLSR. See
|
||||
@ -208,6 +200,28 @@ void NVPTXPassConfig::addIRPasses() {
|
||||
// NaryReassociate on GEPs creates redundant common expressions, so run
|
||||
// EarlyCSE after it.
|
||||
addPass(createEarlyCSEPass());
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addIRPasses() {
|
||||
// The following passes are known to not play well with virtual regs hanging
|
||||
// around after register allocation (which in our case, is *all* registers).
|
||||
// We explicitly disable them here. We do, however, need some functionality
|
||||
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
|
||||
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
|
||||
disablePass(&PrologEpilogCodeInserterID);
|
||||
disablePass(&MachineCopyPropagationID);
|
||||
disablePass(&TailDuplicateID);
|
||||
|
||||
addPass(createNVVMReflectPass());
|
||||
if (getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createNVPTXImageOptimizerPass());
|
||||
addPass(createNVPTXAssignValidGlobalNamesPass());
|
||||
addPass(createGenericToNVVMPass());
|
||||
|
||||
if (getOptLevel() != CodeGenOpt::None) {
|
||||
addMemorySpaceInferencePasses();
|
||||
addStraightLineScalarOptimizationPasses();
|
||||
}
|
||||
|
||||
// === LSR and other generic IR passes ===
|
||||
TargetPassConfig::addIRPasses();
|
||||
@ -223,7 +237,8 @@ void NVPTXPassConfig::addIRPasses() {
|
||||
// %1 = shl %a, 2
|
||||
//
|
||||
// but EarlyCSE can do neither of them.
|
||||
addEarlyCSEOrGVNPass();
|
||||
if (getOptLevel() != CodeGenOpt::None)
|
||||
addEarlyCSEOrGVNPass();
|
||||
}
|
||||
|
||||
bool NVPTXPassConfig::addInstSelector() {
|
||||
|
12
llvm/test/CodeGen/NVPTX/disable-opt.ll
Normal file
12
llvm/test/CodeGen/NVPTX/disable-opt.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
|
||||
|
||||
define void @foo(i32* %output) {
|
||||
; CHECK-LABEL: .visible .func foo(
|
||||
entry:
|
||||
%local = alloca i32
|
||||
; CHECK: __local_depot
|
||||
store i32 1, i32* %local
|
||||
%0 = load i32, i32* %local
|
||||
store i32 %0, i32* %output
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user