mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-12 07:41:14 +00:00
[PowerPC] Use the MachineCombiner to reassociate fadd/fmul
This is a direct port of the code from the X86 backend (r239486/r240361), which uses the MachineCombiner to reassociate (floating-point) adds/muls to increase ILP, to the PowerPC backend. The rationale is the same. There is a lot of copy-and-paste here between the X86 code and the PowerPC code, and we should extract at least some of this into CodeGen somewhere. However, I don't want to do that until this code is enhanced to handle FMAs as well. After that, we'll be in a better position to extract the common parts. llvm-svn: 242279
This commit is contained in:
parent
d668bc2386
commit
db4f85d64e
@ -144,6 +144,9 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
|
||||
UseMI, UseIdx);
|
||||
|
||||
if (!DefMI->getParent())
|
||||
return Latency;
|
||||
|
||||
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
|
||||
unsigned Reg = DefMO.getReg();
|
||||
|
||||
@ -186,6 +189,265 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
return Latency;
|
||||
}
|
||||
|
||||
static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
|
||||
const MachineBasicBlock *MBB) {
|
||||
const MachineOperand &Op1 = Inst.getOperand(1);
|
||||
const MachineOperand &Op2 = Inst.getOperand(2);
|
||||
const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
|
||||
// We need virtual register definitions.
|
||||
MachineInstr *MI1 = nullptr;
|
||||
MachineInstr *MI2 = nullptr;
|
||||
if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
|
||||
MI1 = MRI.getUniqueVRegDef(Op1.getReg());
|
||||
if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
|
||||
MI2 = MRI.getUniqueVRegDef(Op2.getReg());
|
||||
|
||||
// And they need to be in the trace (otherwise, they won't have a depth).
|
||||
if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
|
||||
const MachineBasicBlock *MBB = Inst.getParent();
|
||||
const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
|
||||
MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
|
||||
unsigned AssocOpcode = Inst.getOpcode();
|
||||
|
||||
// If only one operand has the same opcode and it's the second source operand,
|
||||
// the operands must be commuted.
|
||||
Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
|
||||
if (Commuted)
|
||||
std::swap(MI1, MI2);
|
||||
|
||||
// 1. The previous instruction must be the same type as Inst.
|
||||
// 2. The previous instruction must have virtual register definitions for its
|
||||
// operands in the same basic block as Inst.
|
||||
// 3. The previous instruction's result must only be used by Inst.
|
||||
if (MI1->getOpcode() == AssocOpcode &&
|
||||
hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
|
||||
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// This function does not list all associative and commutative operations, but
|
||||
// only those worth feeding through the machine combiner in an attempt to
|
||||
// reduce the critical path. Mostly, this means floating-point operations,
|
||||
// because they have high latencies (compared to other operations, such and
|
||||
// and/or, which are also associative and commutative, but have low latencies).
|
||||
//
|
||||
// The concept is that these operations can benefit from this kind of
|
||||
// transformation:
|
||||
//
|
||||
// A = ? op ?
|
||||
// B = A op X
|
||||
// C = B op Y
|
||||
// -->
|
||||
// A = ? op ?
|
||||
// B = X op Y
|
||||
// C = A op B
|
||||
//
|
||||
// breaking the dependency between A and B, allowing them to be executed in
|
||||
// parallel (or back-to-back in a pipeline) instead of depending on each other.
|
||||
static bool isAssociativeAndCommutative(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
// FP Add:
|
||||
case PPC::FADD:
|
||||
case PPC::FADDS:
|
||||
// FP Multiply:
|
||||
case PPC::FMUL:
|
||||
case PPC::FMULS:
|
||||
// Altivec Add:
|
||||
case PPC::VADDFP:
|
||||
// VSX Add:
|
||||
case PPC::XSADDDP:
|
||||
case PPC::XVADDDP:
|
||||
case PPC::XVADDSP:
|
||||
case PPC::XSADDSP:
|
||||
// VSX Multiply:
|
||||
case PPC::XSMULDP:
|
||||
case PPC::XVMULDP:
|
||||
case PPC::XVMULSP:
|
||||
case PPC::XSMULSP:
|
||||
// QPX Add:
|
||||
case PPC::QVFADD:
|
||||
case PPC::QVFADDS:
|
||||
case PPC::QVFADDSs:
|
||||
// QPX Multiply:
|
||||
case PPC::QVFMUL:
|
||||
case PPC::QVFMULS:
|
||||
case PPC::QVFMULSs:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the input instruction is part of a chain of dependent ops
|
||||
/// that are suitable for reassociation, otherwise return false.
|
||||
/// If the instruction's operands must be commuted to have a previous
|
||||
/// instruction of the same type define the first source operand, Commuted will
|
||||
/// be set to true.
|
||||
static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
|
||||
// 1. The operation must be associative and commutative.
|
||||
// 2. The instruction must have virtual register definitions for its
|
||||
// operands in the same basic block.
|
||||
// 3. The instruction must have a reassociable sibling.
|
||||
if (isAssociativeAndCommutative(Inst.getOpcode()) &&
|
||||
hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
|
||||
hasReassocSibling(Inst, Commuted))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCInstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
|
||||
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
|
||||
// Using the machine combiner in this way is potentially expensive, so
|
||||
// restrict to when aggressive optimizations are desired.
|
||||
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
|
||||
return false;
|
||||
|
||||
// FP reassociation is only legal when we don't need strict IEEE semantics.
|
||||
if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
|
||||
return false;
|
||||
|
||||
// Look for this reassociation pattern:
|
||||
// B = A op X (Prev)
|
||||
// C = B op Y (Root)
|
||||
|
||||
// FIXME: We should also match FMA operations here, where we consider the
|
||||
// 'part' of the FMA, either the addition or the multiplication, paired with
|
||||
// an actual addition or multiplication.
|
||||
|
||||
bool Commute;
|
||||
if (isReassocCandidate(Root, Commute)) {
|
||||
// We found a sequence of instructions that may be suitable for a
|
||||
// reassociation of operands to increase ILP. Specify each commutation
|
||||
// possibility for the Prev instruction in the sequence and let the
|
||||
// machine combiner decide if changing the operands is worthwhile.
|
||||
if (Commute) {
|
||||
Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
|
||||
Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
|
||||
} else {
|
||||
Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
|
||||
Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt the following reassociation to reduce critical path length:
|
||||
/// B = A op X (Prev)
|
||||
/// C = B op Y (Root)
|
||||
/// ===>
|
||||
/// B = X op Y
|
||||
/// C = A op B
|
||||
static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
|
||||
MachineCombinerPattern::MC_PATTERN Pattern,
|
||||
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
||||
SmallVectorImpl<MachineInstr *> &DelInstrs,
|
||||
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
|
||||
MachineFunction *MF = Root.getParent()->getParent();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
|
||||
const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
|
||||
|
||||
// This array encodes the operand index for each parameter because the
|
||||
// operands may be commuted. Each row corresponds to a pattern value,
|
||||
// and each column specifies the index of A, B, X, Y.
|
||||
unsigned OpIdx[4][4] = {
|
||||
{ 1, 1, 2, 2 },
|
||||
{ 1, 2, 2, 1 },
|
||||
{ 2, 1, 1, 2 },
|
||||
{ 2, 2, 1, 1 }
|
||||
};
|
||||
|
||||
MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
|
||||
MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
|
||||
MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
|
||||
MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
|
||||
MachineOperand &OpC = Root.getOperand(0);
|
||||
|
||||
unsigned RegA = OpA.getReg();
|
||||
unsigned RegB = OpB.getReg();
|
||||
unsigned RegX = OpX.getReg();
|
||||
unsigned RegY = OpY.getReg();
|
||||
unsigned RegC = OpC.getReg();
|
||||
|
||||
if (TargetRegisterInfo::isVirtualRegister(RegA))
|
||||
MRI.constrainRegClass(RegA, RC);
|
||||
if (TargetRegisterInfo::isVirtualRegister(RegB))
|
||||
MRI.constrainRegClass(RegB, RC);
|
||||
if (TargetRegisterInfo::isVirtualRegister(RegX))
|
||||
MRI.constrainRegClass(RegX, RC);
|
||||
if (TargetRegisterInfo::isVirtualRegister(RegY))
|
||||
MRI.constrainRegClass(RegY, RC);
|
||||
if (TargetRegisterInfo::isVirtualRegister(RegC))
|
||||
MRI.constrainRegClass(RegC, RC);
|
||||
|
||||
// Create a new virtual register for the result of (X op Y) instead of
|
||||
// recycling RegB because the MachineCombiner's computation of the critical
|
||||
// path requires a new register definition rather than an existing one.
|
||||
unsigned NewVR = MRI.createVirtualRegister(RC);
|
||||
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
||||
|
||||
unsigned Opcode = Root.getOpcode();
|
||||
bool KillA = OpA.isKill();
|
||||
bool KillX = OpX.isKill();
|
||||
bool KillY = OpY.isKill();
|
||||
|
||||
// Create new instructions for insertion.
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
|
||||
.addReg(RegX, getKillRegState(KillX))
|
||||
.addReg(RegY, getKillRegState(KillY));
|
||||
InsInstrs.push_back(MIB1);
|
||||
|
||||
MachineInstrBuilder MIB2 =
|
||||
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
|
||||
.addReg(RegA, getKillRegState(KillA))
|
||||
.addReg(NewVR, getKillRegState(true));
|
||||
InsInstrs.push_back(MIB2);
|
||||
|
||||
// Record old instructions for deletion.
|
||||
DelInstrs.push_back(&Prev);
|
||||
DelInstrs.push_back(&Root);
|
||||
}
|
||||
|
||||
void PPCInstrInfo::genAlternativeCodeSequence(
|
||||
MachineInstr &Root,
|
||||
MachineCombinerPattern::MC_PATTERN Pattern,
|
||||
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
||||
SmallVectorImpl<MachineInstr *> &DelInstrs,
|
||||
DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
|
||||
MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
|
||||
|
||||
// Select the previous instruction in the sequence based on the input pattern.
|
||||
MachineInstr *Prev = nullptr;
|
||||
switch (Pattern) {
|
||||
case MachineCombinerPattern::MC_REASSOC_AX_BY:
|
||||
case MachineCombinerPattern::MC_REASSOC_XA_BY:
|
||||
Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
|
||||
break;
|
||||
case MachineCombinerPattern::MC_REASSOC_AX_YB:
|
||||
case MachineCombinerPattern::MC_REASSOC_XA_YB:
|
||||
Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
|
||||
}
|
||||
assert(Prev && "Unknown pattern for machine combiner");
|
||||
|
||||
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
|
||||
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
|
@ -63,6 +63,19 @@ enum PPC970_Unit {
|
||||
};
|
||||
} // end namespace PPCII
|
||||
|
||||
namespace MachineCombinerPattern {
|
||||
enum MC_PATTERN : int {
|
||||
// These are commutative variants for reassociating a computation chain
|
||||
// of the form:
|
||||
// B = A op X (Prev)
|
||||
// C = B op Y (Root)
|
||||
MC_REASSOC_AX_BY = 0,
|
||||
MC_REASSOC_AX_YB = 1,
|
||||
MC_REASSOC_XA_BY = 2,
|
||||
MC_REASSOC_XA_YB = 3,
|
||||
};
|
||||
} // end namespace MachineCombinerPattern
|
||||
|
||||
class PPCSubtarget;
|
||||
class PPCInstrInfo : public PPCGenInstrInfo {
|
||||
PPCSubtarget &Subtarget;
|
||||
@ -119,6 +132,25 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool useMachineCombiner() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return true when there is potentially a faster code sequence
|
||||
/// for an instruction chain ending in <Root>. All potential patterns are
|
||||
/// output in the <Pattern> array.
|
||||
bool getMachineCombinerPatterns(
|
||||
MachineInstr &Root,
|
||||
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
|
||||
|
||||
/// When getMachineCombinerPatterns() finds a pattern, this function generates
|
||||
/// the instructions that could replace the original code sequence.
|
||||
void genAlternativeCodeSequence(
|
||||
MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
|
||||
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
||||
SmallVectorImpl<MachineInstr *> &DelInstrs,
|
||||
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
|
||||
|
||||
bool isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const override;
|
||||
|
@ -57,6 +57,11 @@ EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
|
||||
cl::desc("Add extra TOC register dependencies"),
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableMachineCombinerPass("ppc-machine-combiner",
|
||||
cl::desc("Enable the machine combiner pass"),
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
extern "C" void LLVMInitializePowerPCTarget() {
|
||||
// Register the targets
|
||||
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
|
||||
@ -316,6 +321,10 @@ bool PPCPassConfig::addPreISel() {
|
||||
|
||||
bool PPCPassConfig::addILPOpts() {
|
||||
addPass(&EarlyIfConverterID);
|
||||
|
||||
if (EnableMachineCombinerPass)
|
||||
addPass(&MachineCombinerID);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
188
test/CodeGen/PowerPC/machine-combiner.ll
Normal file
188
test/CodeGen/PowerPC/machine-combiner.ll
Normal file
@ -0,0 +1,188 @@
|
||||
; RUN: llc -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
|
||||
; RUN: llc -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Verify that the first two adds are independent regardless of how the inputs are
|
||||
; commuted. The destination registers are used as source registers for the third add.
|
||||
|
||||
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
|
||||
; CHECK-LABEL: reassociate_adds1:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK: fadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK: fadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd float %x0, %x1
|
||||
%t1 = fadd float %t0, %x2
|
||||
%t2 = fadd float %t1, %x3
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
|
||||
; CHECK-LABEL: reassociate_adds2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK: fadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK: fadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd float %x0, %x1
|
||||
%t1 = fadd float %x2, %t0
|
||||
%t2 = fadd float %t1, %x3
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
|
||||
; CHECK-LABEL: reassociate_adds3:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK: fadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK: fadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd float %x0, %x1
|
||||
%t1 = fadd float %t0, %x2
|
||||
%t2 = fadd float %x3, %t1
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
|
||||
; CHECK-LABEL: reassociate_adds4:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK: fadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK: fadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd float %x0, %x1
|
||||
%t1 = fadd float %x2, %t0
|
||||
%t2 = fadd float %x3, %t1
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
|
||||
; produced because that would cost more compile time.
|
||||
|
||||
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
|
||||
; CHECK-LABEL: reassociate_adds5:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK: fadds [[REG12:[0-9]+]], 5, 6
|
||||
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK: fadds [[REG11:[0-9]+]], 3, 4
|
||||
; CHECK: fadds [[REG13:[0-9]+]], [[REG12]], 7
|
||||
; CHECK: fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]]
|
||||
; CHECK: fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]]
|
||||
; CHECK: fadds 1, [[REG2]], 8
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd float %x0, %x1
|
||||
%t1 = fadd float %t0, %x2
|
||||
%t2 = fadd float %t1, %x3
|
||||
%t3 = fadd float %t2, %x4
|
||||
%t4 = fadd float %t3, %x5
|
||||
%t5 = fadd float %t4, %x6
|
||||
%t6 = fadd float %t5, %x7
|
||||
ret float %t6
|
||||
}
|
||||
|
||||
; Verify that we reassociate vector instructions too.
|
||||
|
||||
define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds1:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd <4 x float> %x0, %x1
|
||||
%t1 = fadd <4 x float> %t0, %x2
|
||||
%t2 = fadd <4 x float> %t1, %x3
|
||||
ret <4 x float> %t2
|
||||
}
|
||||
|
||||
define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd <4 x float> %x0, %x1
|
||||
%t1 = fadd <4 x float> %x2, %t0
|
||||
%t2 = fadd <4 x float> %t1, %x3
|
||||
ret <4 x float> %t2
|
||||
}
|
||||
|
||||
define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds3:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd <4 x float> %x0, %x1
|
||||
%t1 = fadd <4 x float> %t0, %x2
|
||||
%t2 = fadd <4 x float> %x3, %t1
|
||||
ret <4 x float> %t2
|
||||
}
|
||||
|
||||
define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds4:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
%t0 = fadd <4 x float> %x0, %x1
|
||||
%t1 = fadd <4 x float> %x2, %t0
|
||||
%t2 = fadd <4 x float> %x3, %t1
|
||||
ret <4 x float> %t2
|
||||
}
|
||||
|
||||
define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
|
||||
%t0 = fdiv float %x0, %x1
|
||||
%t1 = fadd float %x2, %t0
|
||||
%t2 = fadd float %x3, %t1
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
|
||||
%t0 = fdiv float %x0, %x1
|
||||
%t1 = fmul float %x2, %t0
|
||||
%t2 = fmul float %x3, %t1
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
|
||||
%t0 = fdiv double %x0, %x1
|
||||
%t1 = fadd double %x2, %t0
|
||||
%t2 = fadd double %x3, %t1
|
||||
ret double %t2
|
||||
}
|
||||
|
||||
define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
|
||||
%t0 = fdiv double %x0, %x1
|
||||
%t1 = fmul double %x2, %t0
|
||||
%t2 = fmul double %x3, %t1
|
||||
ret double %t2
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user