mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-24 20:30:06 +00:00
[PowerPC] Manually schedule the prologue and epilogue
This patch makes the following changes to the schedule of instructions in the prologue and epilogue. The stack pointer update is moved down in the prologue so that the callee saves do not have to wait for the update to happen. Saving the lr is moved down in the prologue to hide the latency of the mflr. The stack pointer is moved up in the epilogue so that restoring of the lr can happen sooner. The mtlr is moved up in the epilogue so that it is away form the blr at the end of the epilogue. The latency of the mtlr can now be hidden by the loads of the callee saved registers. This commit is almost identical to this one: r322036 except that two warnings that broke build bots have been fixed. The revision number is D41737 as before. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322124 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8b2b7db396
commit
a630e3e00c
@ -823,6 +823,39 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
assert((isPPC64 || !MustSaveCR) &&
|
||||
"Prologue CR saving supported only in 64-bit mode");
|
||||
|
||||
// Check if we can move the stack update instruction (stdu) down the prologue
|
||||
// past the callee saves. Hopefully this will avoid the situation where the
|
||||
// saves are waiting for the update on the store with update to complete.
|
||||
MachineBasicBlock::iterator StackUpdateLoc = MBBI;
|
||||
bool MovingStackUpdateDown = false;
|
||||
// This optimization has a number of guards. At this point we are being very
|
||||
// cautious and we do not try to do this when we have a fast call or
|
||||
// we are using PIC base or we are using a frame pointer or a base pointer.
|
||||
// It would be possible to turn on this optimization under these conditions
|
||||
// as well but it would require further modifications to the prologue and
|
||||
// epilogue. For example, if we want to turn on this optimization for
|
||||
// functions that use frame pointers we would have to take into consideration
|
||||
// the fact that spills to the stack may be using r30 instead of r1.
|
||||
// Aside form that we need to have a non-zero frame and we need to have a
|
||||
// non-large frame size. Notice that we did not use !isLargeFrame but we used
|
||||
// isInt<16>(FrameSize) instead. This is important because this guard has to
|
||||
// be identical to the one in the epilogue and in the epilogue the variable
|
||||
// is defined as bool isLargeFrame = !isInt<16>(FrameSize);
|
||||
if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
|
||||
!HasBP && isInt<16>(FrameSize)) {
|
||||
const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
|
||||
for (unsigned i=0; i<Info.size(); i++) {
|
||||
int FrIdx = Info[i].getFrameIdx();
|
||||
if (FrIdx < 0) {
|
||||
if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
|
||||
MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
|
||||
StackUpdateLoc++;
|
||||
MovingStackUpdateDown = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we need to spill the CR and the LR but we don't have two separate
|
||||
// registers available, we must spill them one at a time
|
||||
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
|
||||
@ -886,7 +919,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
if (MustSaveLR)
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
|
||||
.addReg(ScratchReg, getKillRegState(true))
|
||||
.addImm(LROffset)
|
||||
.addReg(SPReg);
|
||||
@ -954,7 +987,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
HasSTUX = true;
|
||||
|
||||
} else if (!isLargeFrame) {
|
||||
BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
|
||||
BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(NegFrameSize)
|
||||
.addReg(SPReg);
|
||||
@ -1194,6 +1227,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
|
||||
// We have changed the object offset above but we do not want to change
|
||||
// the actual offsets in the CFI instruction so we have to undo the
|
||||
// offset change here.
|
||||
if (MovingStackUpdateDown)
|
||||
Offset -= NegFrameSize;
|
||||
|
||||
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
|
||||
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
|
||||
@ -1339,6 +1378,23 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
unsigned RBReg = SPReg;
|
||||
unsigned SPAdd = 0;
|
||||
|
||||
// Check if we can move the stack update instruction up the epilogue
|
||||
// past the callee saves. This will allow the move to LR instruction
|
||||
// to be executed before the restores of the callee saves which means
|
||||
// that the callee saves can hide the latency from the MTLR instrcution.
|
||||
MachineBasicBlock::iterator StackUpdateLoc = MBBI;
|
||||
if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
|
||||
!HasBP && !isLargeFrame) {
|
||||
const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo();
|
||||
for (unsigned i=0; i<Info.size(); i++) {
|
||||
int FrIdx = Info[i].getFrameIdx();
|
||||
if (FrIdx < 0) {
|
||||
if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
|
||||
StackUpdateLoc--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (FrameSize) {
|
||||
// In the prologue, the loaded (or persistent) stack pointer value is
|
||||
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
|
||||
@ -1368,7 +1424,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
}
|
||||
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
|
||||
if (HasRedZone) {
|
||||
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
|
||||
BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(FrameSize);
|
||||
} else {
|
||||
@ -1392,7 +1448,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
.addReg(FPReg);
|
||||
RBReg = FPReg;
|
||||
}
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
|
||||
BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
|
||||
.addImm(0)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
@ -1425,7 +1481,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
// a base register anyway, because it may happen to be R0.
|
||||
bool LoadedLR = false;
|
||||
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
|
||||
BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
|
||||
.addImm(LROffset+SPAdd)
|
||||
.addReg(RBReg);
|
||||
LoadedLR = true;
|
||||
@ -1497,7 +1553,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
.addReg(TempReg, getKillRegState(i == e-1));
|
||||
|
||||
if (MustSaveLR)
|
||||
BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
|
||||
BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
|
||||
|
||||
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
|
||||
// call optimization
|
||||
|
@ -15,12 +15,12 @@
|
||||
define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
|
||||
; CHECK-LABEL: _ZN2CC3funEv:
|
||||
; CHECK: mflr 0
|
||||
; CHECK-NEXT: std 0, 16(1)
|
||||
; CHECK-NEXT: stdu 1, -48(1)
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
||||
; CHECK-NEXT: .cfi_offset lr, 16
|
||||
; CHECK-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-NEXT: std 30, 32(1)
|
||||
; CHECK-NEXT: std 30, -16(1)
|
||||
; CHECK-NEXT: std 0, 16(1)
|
||||
; CHECK-NEXT: stdu 1, -48(1)
|
||||
; CHECK-NEXT: mr 30, 3
|
||||
; CHECK-NEXT: ld 12, 0(30)
|
||||
; CHECK-NEXT: std 2, 24(1)
|
||||
@ -38,11 +38,11 @@ define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
|
||||
; CHECK-NEXT: mr 3, 30
|
||||
; CHECK-NEXT: bl _ZN2CC3barEPi
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK: ld 30, 32(1)
|
||||
; CHECK-NEXT: li 3, 0
|
||||
; CHECK: li 3, 0
|
||||
; CHECK-NEXT: addi 1, 1, 48
|
||||
; CHECK-NEXT: ld 0, 16(1)
|
||||
; CHECK-NEXT: mtlr 0
|
||||
; CHECK: ld 30, -16(1)
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0
|
||||
|
@ -16,12 +16,12 @@ entry:
|
||||
; stfd 14, 416(1)
|
||||
|
||||
; After the fix by patch D34337:
|
||||
; CHECK-LE:std 15, -280(1)
|
||||
; CHECK-LE:stfd 14, -144(1)
|
||||
; CHECK-LE: stdu 1, -528(1)
|
||||
; CHECK-LE:std 15, 248(1)
|
||||
; CHECK-LE:stfd 14, 384(1)
|
||||
; CHECK-BE:std 15, -280(1)
|
||||
; CHECK-BE:stfd 14, -144(1)
|
||||
; CHECK-BE: stdu 1, -544(1)
|
||||
; CHECK-BE:std 15, 264(1)
|
||||
; CHECK-BE:stfd 14, 400(1)
|
||||
}
|
||||
|
||||
define signext i32 @foo() {
|
||||
|
@ -110,7 +110,7 @@ declare i32 @doSomething(i32, i32*)
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK: mtlr {{[0-9]+}}
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK: blr
|
||||
;
|
||||
; ENABLE: .[[ELSE_LABEL]]: # %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
@ -171,7 +171,7 @@ declare i32 @something(...)
|
||||
; Next BB
|
||||
; CHECK: %for.end
|
||||
; CHECK: mtlr {{[0-9]+}}
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK: blr
|
||||
define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
|
||||
entry:
|
||||
br label %for.preheader
|
||||
@ -209,9 +209,9 @@ for.end: ; preds = %for.body
|
||||
; Make sure we save the link register
|
||||
; CHECK: mflr {{[0-9]+}}
|
||||
;
|
||||
; DISABLE: std
|
||||
; DISABLE-NEXT: std
|
||||
; DISABLE: cmplwi 0, 3, 0
|
||||
; DISABLE-NEXT: std
|
||||
; DISABLE-NEXT: std
|
||||
; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Loop preheader
|
||||
@ -240,7 +240,7 @@ for.end: ; preds = %for.body
|
||||
; DISABLE: .[[EPILOG_BB]]: # %if.end
|
||||
; Epilog code
|
||||
; CHECK: mtlr {{[0-9]+}}
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK: blr
|
||||
;
|
||||
; ENABLE: .[[ELSE_LABEL]]: # %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
@ -291,9 +291,9 @@ declare void @somethingElse(...)
|
||||
; Make sure we save the link register
|
||||
; CHECK: mflr {{[0-9]+}}
|
||||
;
|
||||
; DISABLE: std
|
||||
; DISABLE-NEXT: std
|
||||
; DISABLE: cmplwi 0, 3, 0
|
||||
; DISABLE-NEXT: std
|
||||
; DISABLE-NEXT: std
|
||||
; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: bl somethingElse
|
||||
@ -322,7 +322,7 @@ declare void @somethingElse(...)
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK: mtlr {{[0-9]+}}
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK: blr
|
||||
;
|
||||
; ENABLE: .[[ELSE_LABEL]]: # %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
|
@ -6,7 +6,7 @@ define void @test_foo(i32* nocapture %x01, i32* nocapture %x02, i32* nocapture %
|
||||
entry:
|
||||
|
||||
; CHECK-LABEL: test_foo:
|
||||
; CHECK: stdu 1, {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: stdu 1, {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3
|
||||
; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4
|
||||
; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5
|
||||
@ -15,14 +15,14 @@ entry:
|
||||
; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8
|
||||
; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9
|
||||
; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10
|
||||
; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_3]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_4]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_5]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_6]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_7]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_8]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_9]], {{-?[0-9]+}}(1)
|
||||
; CHECK-DAG: std [[BACKUP_10]], {{-?[0-9]+}}(1)
|
||||
; CHECK: bl __tls_get_addr
|
||||
; CHECK-DAG: stw 3, 0([[BACKUP_3]])
|
||||
; CHECK-DAG: stw 3, 0([[BACKUP_4]])
|
||||
|
@ -4,35 +4,37 @@
|
||||
define <4 x i32> @testSpill(<4 x i32> %a, <4 x i32> %b) {
|
||||
|
||||
; CHECK-LABEL: testSpill:
|
||||
; CHECK: li 11, 80
|
||||
; CHECK: li 12, 96
|
||||
; CHECK: li 3, 48
|
||||
; CHECK: li 10, 64
|
||||
; CHECK: stxvd2x 62, 1, 11 # 16-byte Folded Spill
|
||||
; CHECK: stxvd2x 63, 1, 12 # 16-byte Folded Spill
|
||||
; CHECK: stxvd2x 60, 1, 3 # 16-byte Folded Spill
|
||||
; CHECK: stxvd2x 61, 1, 10 # 16-byte Folded Spill
|
||||
; CHECK: li 9, 96
|
||||
; CHECK: li 10, 80
|
||||
; CHECK: li 11, 64
|
||||
; CHECK: li 12, 48
|
||||
; CHECK: lxvd2x 63, 1, 9 # 16-byte Folded Reload
|
||||
; CHECK: lxvd2x 62, 1, 10 # 16-byte Folded Reload
|
||||
; CHECK: lxvd2x 61, 1, 11 # 16-byte Folded Reload
|
||||
; CHECK: lxvd2x 60, 1, 12 # 16-byte Folded Reload
|
||||
; CHECK-DAG: li [[REG64:[0-9]+]], -64
|
||||
; CHECK-DAG: li [[REG48:[0-9]+]], -48
|
||||
; CHECK-DAG: li [[REG32:[0-9]+]], -32
|
||||
; CHECK-DAG: li [[REG16:[0-9]+]], -16
|
||||
; CHECK-NOT: li
|
||||
; CHECK-DAG: stxvd2x 60, 1, [[REG64]] # 16-byte Folded Spill
|
||||
; CHECK-DAG: stxvd2x 61, 1, [[REG48]] # 16-byte Folded Spill
|
||||
; CHECK-DAG: stxvd2x 62, 1, [[REG32]] # 16-byte Folded Spill
|
||||
; CHECK-DAG: stxvd2x 63, 1, [[REG16]] # 16-byte Folded Spill
|
||||
; CHECK: std 0, 16(1)
|
||||
; CHECK-DAG: li [[REG16:[0-9]+]], -16
|
||||
; CHECK-DAG: li [[REG32:[0-9]+]], -32
|
||||
; CHECK-DAG: li [[REG48:[0-9]+]], -48
|
||||
; CHECK-DAG: li [[REG64:[0-9]+]], -64
|
||||
; CHECK: mtlr 0
|
||||
; CHECK-DAG: lxvd2x 63, 1, [[REG16]] # 16-byte Folded Reload
|
||||
; CHECK-DAG: lxvd2x 62, 1, [[REG32]] # 16-byte Folded Reload
|
||||
; CHECK-DAG: lxvd2x 61, 1, [[REG48]] # 16-byte Folded Reload
|
||||
; CHECK-DAG: lxvd2x 60, 1, [[REG64]] # 16-byte Folded Reload
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-PWR9-LABEL: testSpill:
|
||||
; CHECK-PWR9: stxv 62, 80(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9: stxv 63, 96(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9: stxv 60, 48(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9: stxv 61, 64(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9: lxv 63, 96(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9: lxv 62, 80(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9: lxv 61, 64(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9: lxv 60, 48(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9-DAG: stxv 60, -64(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9-DAG: stxv 61, -48(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9-DAG: stxv 62, -32(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9-DAG: stxv 63, -16(1) # 16-byte Folded Spill
|
||||
; CHECK-PWR9: mtlr 0
|
||||
; CHECK-PWR9-DAG: lxv 63, -16(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9-DAG: lxv 62, -32(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9-DAG: lxv 61, -48(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9-DAG: lxv 60, -64(1) # 16-byte Folded Reload
|
||||
; CHECK-PWR9-NEXT: blr
|
||||
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user