mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-24 12:55:45 +00:00
[AArch64] Fold more spilled/refilled COPYs.
Summary: Make AArch64InstrInfo::foldMemoryOperandImpl more general by folding all full COPYs between register classes of the same size that are either spilled or refilled. Reviewers: MatzeB, qcolombet Subscribers: aemerson, rengolin, mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D27271 llvm-svn: 288439
This commit is contained in:
parent
078794f61e
commit
993081c749
@ -2598,8 +2598,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the case where a WZR/XZR copy is being spilled but the destination
|
||||
// register class doesn't contain WZR/XZR. For example:
|
||||
// Handle the case where a copy is being spilled or refilled but the source
|
||||
// and destination register class don't match. For example:
|
||||
//
|
||||
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
|
||||
//
|
||||
@ -2608,17 +2608,43 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
|
||||
//
|
||||
// STRXui %XZR, <fi#0>
|
||||
//
|
||||
if (MI.isFullCopy() && Ops.size() == 1 && Ops[0] == 0) {
|
||||
// This also eliminates spilled cross register class COPYs (e.g. between x and
|
||||
// d regs) of the same size. For example:
|
||||
//
|
||||
// %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
|
||||
//
|
||||
// will be refilled as
|
||||
//
|
||||
// LDRDui %vreg0, fi<#0>
|
||||
//
|
||||
// instead of
|
||||
//
|
||||
// LDRXui %vregTemp, fi<#0>
|
||||
// %vreg0 = FMOV %vregTemp
|
||||
//
|
||||
if (MI.isFullCopy() && Ops.size() == 1 &&
|
||||
// Make sure we're only folding the explicit COPY defs/uses.
|
||||
(Ops[0] == 0 || Ops[0] == 1)) {
|
||||
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const MachineOperand &DstMO = MI.getOperand(0);
|
||||
const MachineOperand &SrcMO = MI.getOperand(1);
|
||||
unsigned DstReg = DstMO.getReg();
|
||||
unsigned SrcReg = SrcMO.getReg();
|
||||
if (SrcReg == AArch64::WZR || SrcReg == AArch64::XZR) {
|
||||
const TargetRegisterInfo &TRI = getRegisterInfo();
|
||||
const TargetRegisterClass &RC = SrcReg == AArch64::WZR
|
||||
? AArch64::GPR32RegClass
|
||||
: AArch64::GPR64RegClass;
|
||||
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
|
||||
&RC, &TRI);
|
||||
auto getRegClass = [&](unsigned Reg) {
|
||||
return TargetRegisterInfo::isVirtualRegister(Reg)
|
||||
? MRI.getRegClass(Reg)
|
||||
: TRI.getMinimalPhysRegClass(Reg);
|
||||
};
|
||||
const TargetRegisterClass &DstRC = *getRegClass(DstReg);
|
||||
const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
|
||||
if (DstRC.getSize() == SrcRC.getSize()) {
|
||||
if (Ops[0] == 0)
|
||||
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
|
||||
&SrcRC, &TRI);
|
||||
else
|
||||
loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
|
||||
return &*--InsertPt;
|
||||
}
|
||||
}
|
||||
|
78
test/CodeGen/AArch64/spill-fold.ll
Normal file
78
test/CodeGen/AArch64/spill-fold.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
|
||||
|
||||
declare i32 @bar()
|
||||
declare i32 @baz()
|
||||
|
||||
; Check that the spill of the zero value gets stored directly instead
|
||||
; of being copied from wzr and then stored.
|
||||
define i32 @test_zr_spill_fold1(i1 %c) {
|
||||
; CHECK-LABEL: test_zr_spill_fold1:
|
||||
entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.else:
|
||||
; CHECK: bl bar
|
||||
; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%call1 = tail call i32 @bar()
|
||||
br label %if.end
|
||||
|
||||
if.then:
|
||||
; CHECK: bl baz
|
||||
; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
|
||||
%call2 = tail call i32 @baz()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
|
||||
ret i32 %x.0
|
||||
}
|
||||
|
||||
; Similar to test_zr_spill_fold1, but with mis-matched register
|
||||
; class between %x.0 and the 0 from %if.then.
|
||||
define i32 @test_zr_spill_fold2(i1 %c) {
|
||||
; CHECK-LABEL: test_zr_spill_fold2:
|
||||
entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.else:
|
||||
; CHECK: bl bar
|
||||
; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%call1 = tail call i32 @bar()
|
||||
br label %if.end
|
||||
|
||||
if.then:
|
||||
; CHECK: bl baz
|
||||
; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
|
||||
%call2 = tail call i32 @baz()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
|
||||
%x.1 = add i32 %x.0, 1
|
||||
ret i32 %x.1
|
||||
}
|
||||
|
||||
; Similar to test_zr_spill_fold1, but with a cross register-class copy feeding a spill store.
|
||||
define float @test_cross_spill_fold(i32 %v) {
|
||||
; CHECK-LABEL: test_cross_spill_fold:
|
||||
entry:
|
||||
; CHECK: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%v.f = bitcast i32 %v to float
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
|
||||
; CHECK: ldr s0, [sp, #[[SLOT]]]
|
||||
ret float %v.f
|
||||
}
|
||||
|
||||
; Similar to test_cross_spill_fold, but with a cross register-class copy fed by a refill load.
|
||||
define float @test_cross_spill_fold2(i32 %v) {
|
||||
; CHECK-LABEL: test_cross_spill_fold2:
|
||||
entry:
|
||||
; CHECK: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
|
||||
; CHECK: ldr s0, [sp, #[[SLOT]]]
|
||||
%v.f = bitcast i32 %v to float
|
||||
ret float %v.f
|
||||
}
|
||||
|
@ -28,57 +28,3 @@ define void @test_sp(i32 %val) {
|
||||
ret void
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
declare i32 @bar()
|
||||
declare i32 @baz()
|
||||
|
||||
; Check that the spill of the zero value gets stored directly instead
|
||||
; of being copied from wzr and then stored.
|
||||
define i32 @test_zr_spill_copyprop1(i1 %c) {
|
||||
; CHECK-LABEL: test_zr_spill_copyprop1:
|
||||
entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.else:
|
||||
; CHECK: bl bar
|
||||
; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%call1 = tail call i32 @bar()
|
||||
br label %if.end
|
||||
|
||||
if.then:
|
||||
; CHECK: bl baz
|
||||
; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
|
||||
%call2 = tail call i32 @baz()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
|
||||
ret i32 %x.0
|
||||
}
|
||||
|
||||
; Similar to test_zr_spill_copyprop1, but with mis-matched register
|
||||
; class between %x.0 and the 0 from %if.then.
|
||||
define i32 @test_zr_spill_copyprop2(i1 %c) {
|
||||
; CHECK-LABEL: test_zr_spill_copyprop2:
|
||||
entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.else:
|
||||
; CHECK: bl bar
|
||||
; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%call1 = tail call i32 @bar()
|
||||
br label %if.end
|
||||
|
||||
if.then:
|
||||
; CHECK: bl baz
|
||||
; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
|
||||
%call2 = tail call i32 @baz()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
|
||||
%x.1 = add i32 %x.0, 1
|
||||
ret i32 %x.1
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user