AMDGPU: Move subtarget feature checks into passes

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273937 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2016-06-27 20:32:13 +00:00
parent 87796a3096
commit dca409d5ad
11 changed files with 46 additions and 38 deletions

View File

@ -241,12 +241,6 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
"Force using DS instruction immediate offsets on SI" "Force using DS instruction immediate offsets on SI"
>; >;
def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
"EnableIfCvt",
"false",
"Disable the if conversion pass"
>;
def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"EnableSIScheduler", "EnableSIScheduler",
"true", "true",

View File

@ -124,6 +124,10 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
if (!TM || skipFunction(F)) if (!TM || skipFunction(F))
return false; return false;
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
if (!ST.isPromoteAllocaEnabled())
return false;
FunctionType *FTy = F.getFunctionType(); FunctionType *FTy = F.getFunctionType();
// If the function has any arguments in the local address space, then it's // If the function has any arguments in the local address space, then it's
@ -139,8 +143,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
} }
} }
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
LocalMemLimit = ST.getLocalMemorySize(); LocalMemLimit = ST.getLocalMemorySize();
if (LocalMemLimit == 0) if (LocalMemLimit == 0)
return false; return false;

View File

@ -105,7 +105,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
EnableVGPRSpilling(false), EnableVGPRSpilling(false),
EnablePromoteAlloca(false), EnablePromoteAlloca(false),
EnableIfCvt(true),
EnableLoadStoreOpt(false), EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false), EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false), EnableSIScheduler(false),

View File

@ -82,7 +82,6 @@ protected:
// Used as options. // Used as options.
bool EnableVGPRSpilling; bool EnableVGPRSpilling;
bool EnablePromoteAlloca; bool EnablePromoteAlloca;
bool EnableIfCvt;
bool EnableLoadStoreOpt; bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding; bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler; bool EnableSIScheduler;
@ -222,10 +221,6 @@ public:
return EnablePromoteAlloca; return EnablePromoteAlloca;
} }
bool isIfCvtEnabled() const {
return EnableIfCvt;
}
bool unsafeDSOffsetFoldingEnabled() const { bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding; return EnableUnsafeDSOffsetFolding;
} }

View File

@ -45,6 +45,18 @@ static cl::opt<bool> EnableR600StructurizeCFG(
cl::desc("Use StructurizeCFG IR pass"), cl::desc("Use StructurizeCFG IR pass"),
cl::init(true)); cl::init(true));
static cl::opt<bool> EnableSROA(
"amdgpu-sroa",
cl::desc("Run SROA after promote alloca pass"),
cl::ReallyHidden,
cl::init(true));
static cl::opt<bool> EnableR600IfConvert(
"r600-if-convert",
cl::desc("Use if conversion pass"),
cl::ReallyHidden,
cl::init(true));
extern "C" void LLVMInitializeAMDGPUTarget() { extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target // Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
@ -212,12 +224,7 @@ public:
} }
ScheduleDAGInstrs * ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override { createMachineScheduler(MachineSchedContext *C) const override;
const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl();
if (ST->enableSIScheduler())
return createSIMachineScheduler(C);
return nullptr;
}
bool addPreISel() override; bool addPreISel() override;
void addMachineSSAOptimization() override; void addMachineSSAOptimization() override;
@ -285,10 +292,11 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPUOpenCLImageTypeLoweringPass()); addPass(createAMDGPUOpenCLImageTypeLoweringPass());
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
const AMDGPUSubtarget &ST = *TM.getSubtargetImpl(); if (TM.getOptLevel() > CodeGenOpt::None) {
if (TM.getOptLevel() > CodeGenOpt::None && ST.isPromoteAllocaEnabled()) {
addPass(createAMDGPUPromoteAlloca(&TM)); addPass(createAMDGPUPromoteAlloca(&TM));
addPass(createSROAPass());
if (EnableSROA)
addPass(createSROAPass());
} }
addStraightLineScalarOptimizationPasses(); addStraightLineScalarOptimizationPasses();
@ -344,9 +352,8 @@ void R600PassConfig::addPreRegAlloc() {
} }
void R600PassConfig::addPreSched2() { void R600PassConfig::addPreSched2() {
const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false); addPass(createR600EmitClauseMarkers(), false);
if (ST.isIfCvtEnabled()) if (EnableR600IfConvert)
addPass(&IfConverterID, false); addPass(&IfConverterID, false);
addPass(createR600ClauseMergePass(*TM), false); addPass(createR600ClauseMergePass(*TM), false);
} }
@ -367,6 +374,14 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
// GCN Pass Setup // GCN Pass Setup
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
MachineSchedContext *C) const {
const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
if (ST.enableSIScheduler())
return createSIMachineScheduler(C);
return nullptr;
}
bool GCNPassConfig::addPreISel() { bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel(); AMDGPUPassConfig::addPreISel();
@ -415,8 +430,6 @@ bool GCNPassConfig::addRegBankSelect() {
#endif #endif
void GCNPassConfig::addPreRegAlloc() { void GCNPassConfig::addPreRegAlloc() {
const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl();
// This needs to be run directly before register allocation because // This needs to be run directly before register allocation because
// earlier passes might recompute live intervals. // earlier passes might recompute live intervals.
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
@ -424,15 +437,18 @@ void GCNPassConfig::addPreRegAlloc() {
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
} }
if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { if (getOptLevel() > CodeGenOpt::None) {
// Don't do this with no optimizations since it throws away debug info by // Don't do this with no optimizations since it throws away debug info by
// merging nonadjacent loads. // merging nonadjacent loads.
// This should be run after scheduling, but before register allocation. It // This should be run after scheduling, but before register allocation. It
// also need extra copies to the address operand to be eliminated. // also need extra copies to the address operand to be eliminated.
// FIXME: Move pre-RA and remove extra reg coalescer run.
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
insertPass(&MachineSchedulerID, &RegisterCoalescerID); insertPass(&MachineSchedulerID, &RegisterCoalescerID);
} }
addPass(createSIShrinkInstructionsPass()); addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass()); addPass(createSIWholeQuadModePass());
} }

View File

@ -412,6 +412,9 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
return false; return false;
const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
if (!STM.loadStoreOptEnabled())
return false;
TII = STM.getInstrInfo(); TII = STM.getInstrInfo();
TRI = &TII->getRegisterInfo(); TRI = &TII->getRegisterInfo();

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}stored_fi_to_lds: ; GCN-LABEL: {{^}}stored_fi_to_lds:
; GCN: s_load_dword [[LDSPTR:s[0-9]+]] ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]

View File

@ -1,9 +1,9 @@
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; OPT-LABEL: @test_sink_global_small_offset_i32( ; OPT-LABEL: @test_sink_global_small_offset_i32(
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}load_i8_sext_private: ; FUNC-LABEL: {{^}}load_i8_sext_private:
; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
@ -39,7 +39,7 @@ entry:
define void @load_i16_zext_private(i32 addrspace(1)* %out) { define void @load_i16_zext_private(i32 addrspace(1)* %out) {
entry: entry:
%tmp0 = alloca i16 %tmp0 = alloca i16
%tmp1 = load i16, i16* %tmp0 %tmp1 = load volatile i16, i16* %tmp0
%tmp2 = zext i16 %tmp1 to i32 %tmp2 = zext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out store i32 %tmp2, i32 addrspace(1)* %out
ret void ret void

View File

@ -1,5 +1,4 @@
; Function Attrs: nounwind ; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck %s
; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca < %s | FileCheck %s
; ;
; CFG flattening should use parallel-and mode to generate branch conditions and ; CFG flattening should use parallel-and mode to generate branch conditions and
; then merge if-regions with the same bodies. ; then merge if-regions with the same bodies.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=r600 -mattr=disable-ifcvt -mcpu=redwood | FileCheck %s ; RUN: llc -march=r600 -mcpu=redwood -r600-if-convert=0 < %s | FileCheck %s
; This tests for abug where the AMDILCFGStructurizer was crashing on loops ; This tests for abug where the AMDILCFGStructurizer was crashing on loops
; like this: ; like this: