mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-24 05:09:34 +00:00
Codegen: [PPC] Fix PPCVSXFMAMutate to handle duplicates.
The purpose of PPCVSXFMAMutate is to elide copies by changing FMA forms on PPC. %vreg6<def> = COPY %vreg96 %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg5<kill>, %vreg7 ;v6 = v6 + v5 * v7 is replaced by %vreg5<def,tied1> = XSMADDMSP %vreg5<tied0>, %vreg7, %vreg96 ;v5 = v5 * v7 + v96 This was broken in the case where the target register was also used as a multiplicand. Fix this case by checking for it and replacing both uses with the copied register. %vreg6<def> = COPY %vreg96 %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg5<kill>, %vreg6 ;v6 = v6 + v5 * v6 is replaced by %vreg5<def,tied1> = XSMADDMSP %vreg5<tied0>, %vreg96, %vreg96 ;v5 = v5 * v96 + v96 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259617 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6a4c1bd3e6
commit
ee53ec02b6
@ -168,21 +168,32 @@ protected:
|
||||
if (OtherUsers || KillsAddendSrc)
|
||||
continue;
|
||||
|
||||
// Find one of the product operands that is killed by this instruction.
|
||||
|
||||
// The transformation doesn't work well with things like:
|
||||
// %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
|
||||
// unless vreg11 is also a kill, so skip when it is not,
|
||||
// and check operand 3 to see it is also a kill to handle the case:
|
||||
// %vreg5 = A-form-op %vreg5, %vreg5, %vreg11;
|
||||
// where vreg5 and vreg11 are both kills. This case would be skipped
|
||||
// otherwise.
|
||||
unsigned OldFMAReg = MI->getOperand(0).getReg();
|
||||
|
||||
// Find one of the product operands that is killed by this instruction.
|
||||
unsigned KilledProdOp = 0, OtherProdOp = 0;
|
||||
if (LIS->getInterval(MI->getOperand(2).getReg())
|
||||
.Query(FMAIdx).isKill()) {
|
||||
unsigned Reg2 = MI->getOperand(2).getReg();
|
||||
unsigned Reg3 = MI->getOperand(3).getReg();
|
||||
if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
|
||||
&& Reg2 != OldFMAReg) {
|
||||
KilledProdOp = 2;
|
||||
OtherProdOp = 3;
|
||||
} else if (LIS->getInterval(MI->getOperand(3).getReg())
|
||||
.Query(FMAIdx).isKill()) {
|
||||
} else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
|
||||
&& Reg3 != OldFMAReg) {
|
||||
KilledProdOp = 3;
|
||||
OtherProdOp = 2;
|
||||
}
|
||||
|
||||
// If there are no killed product operands, then this transformation is
|
||||
// likely not profitable.
|
||||
// If there are no usable killed product operands, then this
|
||||
// transformation is likely not profitable.
|
||||
if (!KilledProdOp)
|
||||
continue;
|
||||
|
||||
@ -212,14 +223,6 @@ protected:
|
||||
bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
|
||||
bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
|
||||
|
||||
unsigned OldFMAReg = MI->getOperand(0).getReg();
|
||||
|
||||
// The transformation doesn't work well with things like:
|
||||
// %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
|
||||
// so leave such things alone.
|
||||
if (OldFMAReg == KilledProdReg)
|
||||
continue;
|
||||
|
||||
// If there isn't a class that fits, we can't perform the transform.
|
||||
// This is needed for correctness with a mixture of VSX and Altivec
|
||||
// instructions to make sure that a low VSX register is not assigned to
|
||||
@ -236,23 +239,33 @@ protected:
|
||||
MI->getOperand(0).setReg(KilledProdReg);
|
||||
MI->getOperand(1).setReg(KilledProdReg);
|
||||
MI->getOperand(3).setReg(AddendSrcReg);
|
||||
MI->getOperand(2).setReg(OtherProdReg);
|
||||
|
||||
MI->getOperand(0).setSubReg(KilledProdSubReg);
|
||||
MI->getOperand(1).setSubReg(KilledProdSubReg);
|
||||
MI->getOperand(3).setSubReg(AddSubReg);
|
||||
MI->getOperand(2).setSubReg(OtherProdSubReg);
|
||||
|
||||
MI->getOperand(1).setIsKill(KilledProdRegKill);
|
||||
MI->getOperand(3).setIsKill(AddRegKill);
|
||||
MI->getOperand(2).setIsKill(OtherProdRegKill);
|
||||
|
||||
MI->getOperand(1).setIsUndef(KilledProdRegUndef);
|
||||
MI->getOperand(3).setIsUndef(AddRegUndef);
|
||||
MI->getOperand(2).setIsUndef(OtherProdRegUndef);
|
||||
|
||||
MI->setDesc(TII->get(AltOpc));
|
||||
|
||||
// If the addend is also a multiplicand, replace it with the addend
|
||||
// source in both places.
|
||||
if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
|
||||
MI->getOperand(2).setReg(AddendSrcReg);
|
||||
MI->getOperand(2).setSubReg(AddSubReg);
|
||||
MI->getOperand(2).setIsKill(AddRegKill);
|
||||
MI->getOperand(2).setIsUndef(AddRegUndef);
|
||||
} else {
|
||||
MI->getOperand(2).setReg(OtherProdReg);
|
||||
MI->getOperand(2).setSubReg(OtherProdSubReg);
|
||||
MI->getOperand(2).setIsKill(OtherProdRegKill);
|
||||
MI->getOperand(2).setIsUndef(OtherProdRegUndef);
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << " -> " << *MI);
|
||||
|
||||
// The killed product operand was killed here, so we can reuse it now
|
||||
|
36
test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll
Normal file
36
test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; RUN: llc -fp-contract=fast -O2 < %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-grtev4-linux-gnu"
|
||||
|
||||
; CHECK-LABEL: f
|
||||
; CHECK-NOT: xsmaddmsp [[REG:[0-9]+]], [[REG]], {{[0-9]+}}
|
||||
define float @f(float %xf) #0 {
|
||||
%1 = fmul float %xf, %xf
|
||||
%2 = fmul float %1, 0x3F43FB0140000000
|
||||
%3 = fsub float 1.000000e+00, %2
|
||||
%4 = fmul float %1, %3
|
||||
%5 = fmul float %4, 0x3F461C5440000000
|
||||
%6 = fsub float 1.000000e+00, %5
|
||||
%7 = fmul float %1, %6
|
||||
%8 = fmul float %7, 0x3F4899C100000000
|
||||
%9 = fsub float 1.000000e+00, %8
|
||||
%10 = fmul float %1, %9
|
||||
%11 = fmul float %10, 0x3F4B894020000000
|
||||
%12 = fsub float 1.000000e+00, %11
|
||||
%13 = fmul float %1, %12
|
||||
%14 = fmul float %13, 0x3F4F07C200000000
|
||||
%15 = fsub float 1.000000e+00, %14
|
||||
%16 = fmul float %1, %15
|
||||
%17 = fmul float %16, 0x3F519E0120000000
|
||||
%18 = fsub float 1.000000e+00, %17
|
||||
%19 = fmul float %1, %18
|
||||
%20 = fmul float %19, 0x3F542D6620000000
|
||||
%21 = fsub float 1.000000e+00, %20
|
||||
%22 = fmul float %1, %21
|
||||
%23 = fmul float %22, 0x3F5756CAC0000000
|
||||
%24 = fsub float 1.000000e+00, %23
|
||||
%25 = fmul float %1, %24
|
||||
ret float %25
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
Loading…
x
Reference in New Issue
Block a user