AMDGPU: Fix return of non-void-returning shaders

Summary:
Since "AMDGPU: Fix verifier errors in SILowerControlFlow", the logic that
ensures that a non-void-returning shader falls off the end of the last
basic block was effectively disabled, since SI_RETURN is now used.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96731

Reviewers: arsenm, tstellarAMD

Subscribers: arsenm, kzhuravl, llvm-commits

Differential Revision: http://reviews.llvm.org/D21975

llvm-svn: 274612
This commit is contained in:
Nicolai Haehnle 2016-07-06 08:35:17 +00:00
parent a4df6590ca
commit 3d52c69ef5
2 changed files with 6 additions and 7 deletions

View File

@ -729,14 +729,13 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
break; break;
case AMDGPU::S_ENDPGM: { case AMDGPU::SI_RETURN: {
if (MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()) assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
break;
// Graphics shaders returning non-void shouldn't contain S_ENDPGM, // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
// because external bytecode will be appended at the end. // because external bytecode will be appended at the end.
if (BI != --MF.end() || I != MBB.getFirstTerminator()) { if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
// S_ENDPGM is not the last instruction. Add an empty block at // SI_RETURN is not the last instruction. Add an empty block at
// the end and jump there. // the end and jump there.
if (!EmptyMBBAtEnd) { if (!EmptyMBBAtEnd) {
EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
@ -746,9 +745,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
MBB.addSuccessor(EmptyMBBAtEnd); MBB.addSuccessor(EmptyMBBAtEnd);
BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH)) BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
.addMBB(EmptyMBBAtEnd); .addMBB(EmptyMBBAtEnd);
}
I->eraseFromParent(); I->eraseFromParent();
}
break; break;
} }
} }

View File

@ -12,10 +12,11 @@
; GCN-NEXT: ; mask branch [[UNREACHABLE_BB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: ; mask branch [[UNREACHABLE_BB:BB[0-9]+_[0-9]+]]
; GCN: [[RET_BB]]: ; GCN: [[RET_BB]]:
; GCN-NEXT: ; return ; GCN-NEXT: s_branch [[FINAL_BB:BB[0-9]+_[0-9]+]]
; GCN-NEXT: [[UNREACHABLE_BB]]: ; GCN-NEXT: [[UNREACHABLE_BB]]:
; GCN-NEXT: s_or_b64 exec, exec, [[XOR_EXEC]] ; GCN-NEXT: s_or_b64 exec, exec, [[XOR_EXEC]]
; GCN-NEXT: [[FINAL_BB]]:
; GCN-NEXT: .Lfunc_end0 ; GCN-NEXT: .Lfunc_end0
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body: main_body: