mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 21:50:40 +00:00
AMDGPU: Move subtarget feature checks into passes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273937 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
87796a3096
commit
dca409d5ad
@ -241,12 +241,6 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
|
|||||||
"Force using DS instruction immediate offsets on SI"
|
"Force using DS instruction immediate offsets on SI"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
|
|
||||||
"EnableIfCvt",
|
|
||||||
"false",
|
|
||||||
"Disable the if conversion pass"
|
|
||||||
>;
|
|
||||||
|
|
||||||
def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
|
def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
|
||||||
"EnableSIScheduler",
|
"EnableSIScheduler",
|
||||||
"true",
|
"true",
|
||||||
|
@ -124,6 +124,10 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
|||||||
if (!TM || skipFunction(F))
|
if (!TM || skipFunction(F))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
|
||||||
|
if (!ST.isPromoteAllocaEnabled())
|
||||||
|
return false;
|
||||||
|
|
||||||
FunctionType *FTy = F.getFunctionType();
|
FunctionType *FTy = F.getFunctionType();
|
||||||
|
|
||||||
// If the function has any arguments in the local address space, then it's
|
// If the function has any arguments in the local address space, then it's
|
||||||
@ -139,8 +143,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
|
|
||||||
|
|
||||||
LocalMemLimit = ST.getLocalMemorySize();
|
LocalMemLimit = ST.getLocalMemorySize();
|
||||||
if (LocalMemLimit == 0)
|
if (LocalMemLimit == 0)
|
||||||
return false;
|
return false;
|
||||||
|
@ -105,7 +105,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||||||
|
|
||||||
EnableVGPRSpilling(false),
|
EnableVGPRSpilling(false),
|
||||||
EnablePromoteAlloca(false),
|
EnablePromoteAlloca(false),
|
||||||
EnableIfCvt(true),
|
|
||||||
EnableLoadStoreOpt(false),
|
EnableLoadStoreOpt(false),
|
||||||
EnableUnsafeDSOffsetFolding(false),
|
EnableUnsafeDSOffsetFolding(false),
|
||||||
EnableSIScheduler(false),
|
EnableSIScheduler(false),
|
||||||
|
@ -82,7 +82,6 @@ protected:
|
|||||||
// Used as options.
|
// Used as options.
|
||||||
bool EnableVGPRSpilling;
|
bool EnableVGPRSpilling;
|
||||||
bool EnablePromoteAlloca;
|
bool EnablePromoteAlloca;
|
||||||
bool EnableIfCvt;
|
|
||||||
bool EnableLoadStoreOpt;
|
bool EnableLoadStoreOpt;
|
||||||
bool EnableUnsafeDSOffsetFolding;
|
bool EnableUnsafeDSOffsetFolding;
|
||||||
bool EnableSIScheduler;
|
bool EnableSIScheduler;
|
||||||
@ -222,10 +221,6 @@ public:
|
|||||||
return EnablePromoteAlloca;
|
return EnablePromoteAlloca;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isIfCvtEnabled() const {
|
|
||||||
return EnableIfCvt;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool unsafeDSOffsetFoldingEnabled() const {
|
bool unsafeDSOffsetFoldingEnabled() const {
|
||||||
return EnableUnsafeDSOffsetFolding;
|
return EnableUnsafeDSOffsetFolding;
|
||||||
}
|
}
|
||||||
|
@ -45,6 +45,18 @@ static cl::opt<bool> EnableR600StructurizeCFG(
|
|||||||
cl::desc("Use StructurizeCFG IR pass"),
|
cl::desc("Use StructurizeCFG IR pass"),
|
||||||
cl::init(true));
|
cl::init(true));
|
||||||
|
|
||||||
|
static cl::opt<bool> EnableSROA(
|
||||||
|
"amdgpu-sroa",
|
||||||
|
cl::desc("Run SROA after promote alloca pass"),
|
||||||
|
cl::ReallyHidden,
|
||||||
|
cl::init(true));
|
||||||
|
|
||||||
|
static cl::opt<bool> EnableR600IfConvert(
|
||||||
|
"r600-if-convert",
|
||||||
|
cl::desc("Use if conversion pass"),
|
||||||
|
cl::ReallyHidden,
|
||||||
|
cl::init(true));
|
||||||
|
|
||||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||||
// Register the target
|
// Register the target
|
||||||
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
|
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
|
||||||
@ -212,12 +224,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
ScheduleDAGInstrs *
|
ScheduleDAGInstrs *
|
||||||
createMachineScheduler(MachineSchedContext *C) const override {
|
createMachineScheduler(MachineSchedContext *C) const override;
|
||||||
const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl();
|
|
||||||
if (ST->enableSIScheduler())
|
|
||||||
return createSIMachineScheduler(C);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool addPreISel() override;
|
bool addPreISel() override;
|
||||||
void addMachineSSAOptimization() override;
|
void addMachineSSAOptimization() override;
|
||||||
@ -285,10 +292,11 @@ void AMDGPUPassConfig::addIRPasses() {
|
|||||||
addPass(createAMDGPUOpenCLImageTypeLoweringPass());
|
addPass(createAMDGPUOpenCLImageTypeLoweringPass());
|
||||||
|
|
||||||
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
|
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
|
||||||
const AMDGPUSubtarget &ST = *TM.getSubtargetImpl();
|
if (TM.getOptLevel() > CodeGenOpt::None) {
|
||||||
if (TM.getOptLevel() > CodeGenOpt::None && ST.isPromoteAllocaEnabled()) {
|
|
||||||
addPass(createAMDGPUPromoteAlloca(&TM));
|
addPass(createAMDGPUPromoteAlloca(&TM));
|
||||||
addPass(createSROAPass());
|
|
||||||
|
if (EnableSROA)
|
||||||
|
addPass(createSROAPass());
|
||||||
}
|
}
|
||||||
|
|
||||||
addStraightLineScalarOptimizationPasses();
|
addStraightLineScalarOptimizationPasses();
|
||||||
@ -344,9 +352,8 @@ void R600PassConfig::addPreRegAlloc() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void R600PassConfig::addPreSched2() {
|
void R600PassConfig::addPreSched2() {
|
||||||
const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
|
|
||||||
addPass(createR600EmitClauseMarkers(), false);
|
addPass(createR600EmitClauseMarkers(), false);
|
||||||
if (ST.isIfCvtEnabled())
|
if (EnableR600IfConvert)
|
||||||
addPass(&IfConverterID, false);
|
addPass(&IfConverterID, false);
|
||||||
addPass(createR600ClauseMergePass(*TM), false);
|
addPass(createR600ClauseMergePass(*TM), false);
|
||||||
}
|
}
|
||||||
@ -367,6 +374,14 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
|
|||||||
// GCN Pass Setup
|
// GCN Pass Setup
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
|
||||||
|
MachineSchedContext *C) const {
|
||||||
|
const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
|
||||||
|
if (ST.enableSIScheduler())
|
||||||
|
return createSIMachineScheduler(C);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
bool GCNPassConfig::addPreISel() {
|
bool GCNPassConfig::addPreISel() {
|
||||||
AMDGPUPassConfig::addPreISel();
|
AMDGPUPassConfig::addPreISel();
|
||||||
|
|
||||||
@ -415,8 +430,6 @@ bool GCNPassConfig::addRegBankSelect() {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GCNPassConfig::addPreRegAlloc() {
|
void GCNPassConfig::addPreRegAlloc() {
|
||||||
const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl();
|
|
||||||
|
|
||||||
// This needs to be run directly before register allocation because
|
// This needs to be run directly before register allocation because
|
||||||
// earlier passes might recompute live intervals.
|
// earlier passes might recompute live intervals.
|
||||||
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
|
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
|
||||||
@ -424,15 +437,18 @@ void GCNPassConfig::addPreRegAlloc() {
|
|||||||
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
|
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
|
if (getOptLevel() > CodeGenOpt::None) {
|
||||||
// Don't do this with no optimizations since it throws away debug info by
|
// Don't do this with no optimizations since it throws away debug info by
|
||||||
// merging nonadjacent loads.
|
// merging nonadjacent loads.
|
||||||
|
|
||||||
// This should be run after scheduling, but before register allocation. It
|
// This should be run after scheduling, but before register allocation. It
|
||||||
// also need extra copies to the address operand to be eliminated.
|
// also need extra copies to the address operand to be eliminated.
|
||||||
|
|
||||||
|
// FIXME: Move pre-RA and remove extra reg coalescer run.
|
||||||
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
|
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
|
||||||
insertPass(&MachineSchedulerID, &RegisterCoalescerID);
|
insertPass(&MachineSchedulerID, &RegisterCoalescerID);
|
||||||
}
|
}
|
||||||
|
|
||||||
addPass(createSIShrinkInstructionsPass());
|
addPass(createSIShrinkInstructionsPass());
|
||||||
addPass(createSIWholeQuadModePass());
|
addPass(createSIWholeQuadModePass());
|
||||||
}
|
}
|
||||||
|
@ -412,6 +412,9 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||||
|
if (!STM.loadStoreOptEnabled())
|
||||||
|
return false;
|
||||||
|
|
||||||
TII = STM.getInstrInfo();
|
TII = STM.getInstrInfo();
|
||||||
TRI = &TII->getRegisterInfo();
|
TRI = &TII->getRegisterInfo();
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}stored_fi_to_lds:
|
; GCN-LABEL: {{^}}stored_fi_to_lds:
|
||||||
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
|
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
|
||||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
|
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
|
||||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
|
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||||
|
|
||||||
; OPT-LABEL: @test_sink_global_small_offset_i32(
|
; OPT-LABEL: @test_sink_global_small_offset_i32(
|
||||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
|
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}load_i8_sext_private:
|
; FUNC-LABEL: {{^}}load_i8_sext_private:
|
||||||
; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
||||||
@ -39,7 +39,7 @@ entry:
|
|||||||
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
|
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
%tmp0 = alloca i16
|
%tmp0 = alloca i16
|
||||||
%tmp1 = load i16, i16* %tmp0
|
%tmp1 = load volatile i16, i16* %tmp0
|
||||||
%tmp2 = zext i16 %tmp1 to i32
|
%tmp2 = zext i16 %tmp1 to i32
|
||||||
store i32 %tmp2, i32 addrspace(1)* %out
|
store i32 %tmp2, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
; Function Attrs: nounwind
|
; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck %s
|
||||||
; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca < %s | FileCheck %s
|
|
||||||
;
|
;
|
||||||
; CFG flattening should use parallel-and mode to generate branch conditions and
|
; CFG flattening should use parallel-and mode to generate branch conditions and
|
||||||
; then merge if-regions with the same bodies.
|
; then merge if-regions with the same bodies.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=r600 -mattr=disable-ifcvt -mcpu=redwood | FileCheck %s
|
; RUN: llc -march=r600 -mcpu=redwood -r600-if-convert=0 < %s | FileCheck %s
|
||||||
|
|
||||||
; This tests for abug where the AMDILCFGStructurizer was crashing on loops
|
; This tests for abug where the AMDILCFGStructurizer was crashing on loops
|
||||||
; like this:
|
; like this:
|
||||||
|
Loading…
Reference in New Issue
Block a user