MachineCombiner Pass for selecting faster instruction

sequence on AArch64

Re-commit of r214669 without changes to test cases
LLVM::CodeGen/AArch64/arm64-neon-mul-div.ll and
LLVM:: CodeGen/AArch64/dp-3source.ll
This resolves the reported compfails of the original commit.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214832 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Gerolf Hoflehner 2014-08-05 01:16:13 +00:00
parent e2f9c8d663
commit c2328d552c
7 changed files with 561 additions and 20 deletions

View File

@ -1351,14 +1351,15 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
}
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
// MADD/MSUB generation is decided by MachineCombiner.cpp
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
[(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
[/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>,
Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
[/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>,
Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}

View File

@ -14,6 +14,7 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@ -697,17 +698,12 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
return true;
}
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register.
bool AArch64InstrInfo::optimizeCompareInstr(
MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
// Replace SUBSWrr with SUBWrr if NZCV is not used.
int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
if (Cmp_NZCV != -1) {
/// convertFlagSettingOpcode - return opcode that does not
/// set flags when possible. The caller is responsible to do
/// the actual substitution and legality checking.
static unsigned convertFlagSettingOpcode(MachineInstr *MI) {
unsigned NewOpc;
switch (CmpInstr->getOpcode()) {
switch (MI->getOpcode()) {
default:
return false;
case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break;
@ -727,7 +723,22 @@ bool AArch64InstrInfo::optimizeCompareInstr(
case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break;
case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break;
}
return NewOpc;
}
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register.
bool AArch64InstrInfo::optimizeCompareInstr(
MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
// Replace SUBSWrr with SUBWrr if NZCV is not used.
int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
if (Cmp_NZCV != -1) {
unsigned Opc = CmpInstr->getOpcode();
unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
if (NewOpc == Opc)
return false;
const MCInstrDesc &MCID = get(NewOpc);
CmpInstr->setDesc(MCID);
CmpInstr->RemoveOperand(Cmp_NZCV);
@ -2185,3 +2196,448 @@ void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(AArch64::HINT);
NopInst.addOperand(MCOperand::CreateImm(0));
}
/// useMachineCombiner - return true when a target supports MachineCombiner
bool AArch64InstrInfo::useMachineCombiner(void) const {
// AArch64 supports the combiner
return true;
}
//
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
switch (Opc) {
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
case AArch64::ADDSXrr:
case AArch64::ADDSXri:
case AArch64::SUBSWrr:
case AArch64::SUBSXrr:
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBSWri:
case AArch64::SUBSXri:
return true;
default:
break;
}
return false;
}
//
// 32b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate32(unsigned Opc) {
switch (Opc) {
case AArch64::ADDWrr:
case AArch64::ADDWri:
case AArch64::SUBWrr:
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
case AArch64::SUBSWrr:
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBWri:
case AArch64::SUBSWri:
return true;
default:
break;
}
return false;
}
//
// 64b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate64(unsigned Opc) {
switch (Opc) {
case AArch64::ADDXrr:
case AArch64::ADDXri:
case AArch64::SUBXrr:
case AArch64::ADDSXrr:
case AArch64::ADDSXri:
case AArch64::SUBSXrr:
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBXri:
case AArch64::SUBSXri:
return true;
default:
break;
}
return false;
}
//
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
}
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
unsigned MulOpc, unsigned ZeroReg) {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineInstr *MI = nullptr;
// We need a virtual register definition.
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
return false;
assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
// The third input reg must be zero.
if (MI->getOperand(3).getReg() != ZeroReg)
return false;
// Must only used by the user we combine with.
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
return false;
return true;
}
/// hasPattern - return true when there is potentially a faster code sequence
/// for an instruction chain ending in \p Root. All potential patterns are
/// listed
/// in the \p Pattern vector. Pattern should be sorted in priority order since
/// the pattern evaluator stops checking as soon as it finds a faster sequence.
bool AArch64InstrInfo::hasPattern(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
if (!isCombineInstrCandidate(Opc))
return 0;
if (isCombineInstrSettingFlag(Opc)) {
int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
// When NZCV is live bail out.
if (Cmp_NZCV == -1)
return 0;
unsigned NewOpc = convertFlagSettingOpcode(&Root);
// When opcode can't change bail out.
// CHECKME: do we miss any cases for opcode conversion?
if (NewOpc == Opc)
return 0;
Opc = NewOpc;
}
switch (Opc) {
default:
break;
case AArch64::ADDWrr:
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
"ADDWrr does not have register operands");
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
AArch64::WZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
Found = true;
}
break;
case AArch64::ADDXrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
AArch64::XZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
Found = true;
}
break;
case AArch64::SUBWrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
AArch64::WZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
Found = true;
}
break;
case AArch64::SUBXrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
AArch64::XZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
Found = true;
}
break;
case AArch64::ADDWri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
Found = true;
}
break;
case AArch64::ADDXri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
Found = true;
}
break;
case AArch64::SUBWri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
Found = true;
}
break;
case AArch64::SUBXri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
Found = true;
}
break;
}
return Found;
}
/// genMadd - Generate madd instruction and combine mul and add.
/// Example:
/// MUL I=A,B,0
/// ADD R,I,C
/// ==> MADD R,A,B,C
/// \param Root is the ADD instruction
/// \param [out] InsInstr is a vector of machine instructions and will
/// contain the generated madd instruction
/// \param IdxMulOpd is index of operand in Root that is the result of
/// the MUL. In the example above IdxMulOpd is 1.
/// \param MaddOpc the opcode fo the madd instruction
static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IdxMulOpd, unsigned MaddOpc) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
MachineOperand R = Root.getOperand(0);
MachineOperand A = MUL->getOperand(1);
MachineOperand B = MUL->getOperand(2);
MachineOperand C = Root.getOperand(IdxOtherOpd);
MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc))
.addOperand(R)
.addOperand(A)
.addOperand(B)
.addOperand(C);
// Insert the MADD
InsInstrs.push_back(MIB);
return MUL;
}
/// genMaddR - Generate madd instruction and combine mul and add using
/// an extra virtual register
/// Example - an ADD intermediate needs to be stored in a register:
/// MUL I=A,B,0
/// ADD R,I,Imm
/// ==> ORR V, ZR, Imm
/// ==> MADD R,A,B,V
/// \param Root is the ADD instruction
/// \param [out] InsInstr is a vector of machine instructions and will
/// contain the generated madd instruction
/// \param IdxMulOpd is index of operand in Root that is the result of
/// the MUL. In the example above IdxMulOpd is 1.
/// \param MaddOpc the opcode fo the madd instruction
/// \param VR is a virtual register that holds the value of an ADD operand
/// (V in the example above).
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IdxMulOpd, unsigned MaddOpc,
unsigned VR) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
MachineOperand R = Root.getOperand(0);
MachineOperand A = MUL->getOperand(1);
MachineOperand B = MUL->getOperand(2);
MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc))
.addOperand(R)
.addOperand(A)
.addOperand(B)
.addReg(VR);
// Insert the MADD
InsInstrs.push_back(MIB);
return MUL;
}
/// genAlternativeCodeSequence - when hasPattern() finds a pattern
/// this function generates the instructions that could replace the
/// original code sequence
void AArch64InstrInfo::genAlternativeCodeSequence(
MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
MachineBasicBlock &MBB = *Root.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getTarget().getSubtargetImpl()->getInstrInfo();
MachineInstr *MUL;
unsigned Opc;
switch (Pattern) {
default:
// signal error.
break;
case MachineCombinerPattern::MC_MULADDW_OP1:
case MachineCombinerPattern::MC_MULADDX_OP1:
// MUL I=A,B,0
// ADD R,I,C
// ==> MADD R,A,B,C
// --- Create(MADD);
Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP1 ? AArch64::MADDWrrr
: AArch64::MADDXrrr;
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc);
break;
case MachineCombinerPattern::MC_MULADDW_OP2:
case MachineCombinerPattern::MC_MULADDX_OP2:
// MUL I=A,B,0
// ADD R,C,I
// ==> MADD R,A,B,C
// --- Create(MADD);
Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP2 ? AArch64::MADDWrrr
: AArch64::MADDXrrr;
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc);
break;
case MachineCombinerPattern::MC_MULADDWI_OP1:
case MachineCombinerPattern::MC_MULADDXI_OP1:
// MUL I=A,B,0
// ADD R,I,Imm
// ==> ORR V, ZR, Imm
// ==> MADD R,A,B,V
// --- Create(MADD);
{
const TargetRegisterClass *RC =
MRI.getRegClass(Root.getOperand(1).getReg());
unsigned NewVR = MRI.createVirtualRegister(RC);
unsigned BitSize, OrrOpc, ZeroReg;
if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
BitSize = 32;
OrrOpc = AArch64::ORRWri;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
} else {
OrrOpc = AArch64::ORRXri;
BitSize = 64;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
}
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned val = Root.getOperand(3).getImm();
Imm = Imm << val;
}
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc))
.addOperand(MachineOperand::CreateReg(NewVR, RegState::Define))
.addReg(ZeroReg)
.addImm(Encoding);
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
}
}
break;
case MachineCombinerPattern::MC_MULSUBW_OP1:
case MachineCombinerPattern::MC_MULSUBX_OP1: {
// MUL I=A,B,0
// SUB R,I, C
// ==> SUB V, 0, C
// ==> MADD R,A,B,V // = -C + A*B
// --- Create(MADD);
const TargetRegisterClass *RC =
MRI.getRegClass(Root.getOperand(1).getReg());
unsigned NewVR = MRI.createVirtualRegister(RC);
unsigned SubOpc, ZeroReg;
if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
SubOpc = AArch64::SUBWrr;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
} else {
SubOpc = AArch64::SUBXrr;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
}
// SUB NewVR, 0, C
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc))
.addOperand(MachineOperand::CreateReg(NewVR, RegState::Define))
.addReg(ZeroReg)
.addOperand(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
} break;
case MachineCombinerPattern::MC_MULSUBW_OP2:
case MachineCombinerPattern::MC_MULSUBX_OP2:
// MUL I=A,B,0
// SUB R,C,I
// ==> MSUB R,A,B,C (computes C - A*B)
// --- Create(MSUB);
Opc = Pattern == MachineCombinerPattern::MC_MULSUBW_OP2 ? AArch64::MSUBWrrr
: AArch64::MSUBXrrr;
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc);
break;
case MachineCombinerPattern::MC_MULSUBWI_OP1:
case MachineCombinerPattern::MC_MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
// ==> ORR V, ZR, -Imm
// ==> MADD R,A,B,V // = -Imm + A*B
// --- Create(MADD);
const TargetRegisterClass *RC =
MRI.getRegClass(Root.getOperand(1).getReg());
unsigned NewVR = MRI.createVirtualRegister(RC);
unsigned BitSize, OrrOpc, ZeroReg;
if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
BitSize = 32;
OrrOpc = AArch64::ORRWri;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
} else {
OrrOpc = AArch64::ORRXri;
BitSize = 64;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
}
int Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned val = Root.getOperand(3).getImm();
Imm = Imm << val;
}
uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc))
.addOperand(MachineOperand::CreateReg(NewVR, RegState::Define))
.addReg(ZeroReg)
.addImm(Encoding);
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
}
} break;
}
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
return;
}

View File

@ -17,6 +17,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
@ -156,9 +157,26 @@ public:
bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
/// hasPattern - return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// listed
/// in the <Pattern> array.
virtual bool hasPattern(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const;
/// genAlternativeCodeSequence - when hasPattern() finds a pattern
/// this function generates the instructions that could replace the
/// original code sequence
virtual void genAlternativeCodeSequence(
MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
/// useMachineCombiner - AArch64 supports MachineCombiner
virtual bool useMachineCombiner(void) const;
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,

View File

@ -0,0 +1,42 @@
//===- AArch64MachineCombinerPattern.h -===//
//===- AArch64 instruction pattern supported by combiner -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines instruction pattern supported by combiner
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H
#define LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H
namespace llvm {
/// Enumeration of instruction pattern supported by machine combiner
///
///
namespace MachineCombinerPattern {
enum MC_PATTERN : int {
MC_NONE = 0,
MC_MULADDW_OP1 = 1,
MC_MULADDW_OP2 = 2,
MC_MULSUBW_OP1 = 3,
MC_MULSUBW_OP2 = 4,
MC_MULADDWI_OP1 = 5,
MC_MULSUBWI_OP1 = 6,
MC_MULADDX_OP1 = 7,
MC_MULADDX_OP2 = 8,
MC_MULSUBX_OP1 = 9,
MC_MULSUBX_OP2 = 10,
MC_MULADDXI_OP1 = 11,
MC_MULSUBXI_OP1 = 12
};
} // end namespace MachineCombinerPattern
} // end namespace llvm
#endif

View File

@ -24,6 +24,10 @@ static cl::opt<bool>
EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableMCR("aarch64-mcr",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
cl::init(true), cl::Hidden);
@ -174,6 +178,8 @@ bool AArch64PassConfig::addInstSelector() {
bool AArch64PassConfig::addILPOpts() {
if (EnableCCMP)
addPass(createAArch64ConditionalCompares());
if (EnableMCR)
addPass(&MachineCombinerID);
addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());

View File

@ -0,0 +1,19 @@
; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: test_128bitmul:
; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
; CHECK: madd x1, x1, x2, [[PART1]]
; CHECK: mul x0, x0, x2
; CHECK-BE-LABEL: test_128bitmul:
; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
; CHECK-BE: madd x0, x0, x3, [[PART1]]
; CHECK-BE: mul x1, x1, x3
%prod = mul i128 %lhs, %rhs
ret i128 %prod
}

View File

@ -1,17 +1,16 @@
; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s
define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: test_128bitmul:
; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
; CHECK: madd x1, x1, x2, [[PART1]]
; CHECK: mul [[PART2:x[0-9]+]], x1, x2
; CHECK: mul x0, x0, x2
; CHECK-BE-LABEL: test_128bitmul:
; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2
; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
; CHECK-BE: madd x0, x0, x3, [[PART1]]
; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3
; CHECK-BE: mul x1, x1, x3
%prod = mul i128 %lhs, %rhs