[FastISel][X86] Add MachineMemOperand to load/store instructions.

This commit adds MachineMemOperands to load and store instructions. This allows
the peephole optimizer to fold load instructions. Unfortunatelly the peephole
optimizer currently doesn't run at -O0.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210858 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-06-12 23:27:57 +00:00
parent cccc317ee0
commit 4eddf94a14
4 changed files with 127 additions and 39 deletions

View File

@ -377,6 +377,9 @@ protected:
/// Test whether the given value has exactly one use.
bool hasTrivialKill(const Value *V) const;
/// \brief Create a machine mem operand from the given instruction.
MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const;
private:
bool SelectBinaryOp(const User *I, unsigned ISDOpcode);

View File

@ -1737,3 +1737,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
}
MachineMemOperand *
FastISel::createMachineMemOperandFor(const Instruction *I) const {
const Value *Ptr;
Type *ValTy;
unsigned Alignment;
unsigned Flags;
bool IsVolatile;
if (const auto *LI = dyn_cast<LoadInst>(I)) {
Alignment = LI->getAlignment();
IsVolatile = LI->isVolatile();
Flags = MachineMemOperand::MOLoad;
Ptr = LI->getPointerOperand();
ValTy = LI->getType();
} else if (const auto *SI = dyn_cast<StoreInst>(I)) {
Alignment = SI->getAlignment();
IsVolatile = SI->isVolatile();
Flags = MachineMemOperand::MOStore;
Ptr = SI->getPointerOperand();
ValTy = SI->getValueOperand()->getType();
} else {
return nullptr;
}
bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
if (Alignment == 0) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlignment(ValTy);
unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
if (IsVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (IsNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
if (IsInvariant)
Flags |= MachineMemOperand::MOInvariant;
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
Alignment, TBAAInfo, Ranges);
}

View File

@ -78,12 +78,14 @@ public:
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
unsigned &ResultReg);
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
bool Aligned = false);
bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
bool Aligned = false);
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
const X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@ -180,7 +182,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
unsigned &ResultReg) {
MachineMemOperand *MMO, unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@ -228,8 +230,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc), ResultReg), AM);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
addFullAddress(MIB, AM);
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
@ -237,9 +242,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
const X86AddressMode &AM, bool Aligned) {
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
const X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@ -249,7 +254,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
TII.get(X86::AND8ri), AndResult)
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
@ -288,13 +294,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
break;
}
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc)), AM).addReg(ValReg);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
const X86AddressMode &AM, bool Aligned) {
const X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
@ -317,10 +328,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}
if (Opc) {
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc)), AM)
.addImm(Signed ? (uint64_t) CI->getSExtValue() :
CI->getZExtValue());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
: CI->getZExtValue());
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
}
@ -329,7 +342,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
if (ValReg == 0)
return false;
return X86FastEmitStore(VT, ValReg, AM, Aligned);
bool ValKill = hasTrivialKill(Val);
return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
}
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
@ -740,19 +754,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;
unsigned SABIAlignment =
DL.getABITypeAlignment(S->getValueOperand()->getType());
bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
const Value *Val = S->getValueOperand();
const Value *Ptr = S->getPointerOperand();
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
return false;
unsigned Alignment = S->getAlignment();
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = ABIAlignment;
bool Aligned = Alignment >= ABIAlignment;
X86AddressMode AM;
if (!X86SelectAddress(I->getOperand(1), AM))
if (!X86SelectAddress(Ptr, AM))
return false;
return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
}
/// X86SelectRet - Select and emit code to implement ret instructions.
@ -887,25 +906,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
bool X86FastISel::X86SelectLoad(const Instruction *I) {
const LoadInst *LI = cast<LoadInst>(I);
// Atomic loads need special handling.
if (cast<LoadInst>(I)->isAtomic())
if (LI->isAtomic())
return false;
MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
return false;
const Value *Ptr = LI->getPointerOperand();
X86AddressMode AM;
if (!X86SelectAddress(I->getOperand(0), AM))
if (!X86SelectAddress(Ptr, AM))
return false;
unsigned ResultReg = 0;
if (X86FastEmitLoad(VT, AM, ResultReg)) {
UpdateValueMap(I, ResultReg);
return true;
}
return false;
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
return false;
UpdateValueMap(I, ResultReg);
return true;
}
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
@ -1624,8 +1647,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
}
unsigned Reg;
bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
RV &= X86FastEmitStore(VT, Reg, DestAM);
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
assert(RV && "Failed to emit load or store??");
unsigned Size = VT.getSizeInBits()/8;
@ -2322,7 +2345,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
return false;
} else {
if (!X86FastEmitStore(ArgVT, Arg, AM))
if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
return false;
}
}
@ -2719,8 +2742,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
const Value *Ptr = LI->getPointerOperand();
X86AddressMode AM;
if (!X86SelectAddress(LI->getOperand(0), AM))
if (!X86SelectAddress(Ptr, AM))
return false;
const X86InstrInfo &XII = (const X86InstrInfo&)TII;
@ -2728,13 +2752,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned Size = DL.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = DL.getABITypeAlignment(LI->getType());
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
MachineInstr *Result =
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
if (!Result) return false;
if (!Result)
return false;
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;

View File

@ -0,0 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s
define i64 @fold_load(i64* %a, i64 %b) {
; CHECK-LABEL: fold_load
; CHECK: addq (%rdi), %rsi
; CHECK-NEXT: movq %rsi, %rax
%1 = load i64* %a, align 8
%2 = add i64 %1, %b
ret i64 %2
}