[PowerPC] Make LDtocL and friends invariant loads

LDtocL, and other loads that roughly correspond to the TOC_ENTRY SDAG node,
represent loads from the TOC, which is invariant. As a result, these loads can
be hoisted out of loops, etc. In order to do this, we need to generate
GOT-style MMOs for TOC_ENTRY, which requires treating it as a legitimate memory
intrinsic node type. Once this is done, the MMO transfer is automatically
handled for TableGen-driven instruction selection, and for nodes generated
directly in PPCISelDAGToDAG, we need to transfer the MMOs manually.

Also, we were not transferring MMOs associated with pre-increment loads, so do
that too.

Lastly, this fixes an exposed bug where R30 was not added as a defined operand of
UpdateGBR.

This problem was highlighted by an example (used to generate the test case)
posted to llvmdev by Francois Pichet.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230553 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-02-25 21:36:59 +00:00
parent e9e16aa4a5
commit 7840990de8
8 changed files with 133 additions and 71 deletions

View File

@ -223,6 +223,8 @@ private:
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
SDNode *transferMemOperands(SDNode *N, SDNode *Result);
};
}
@ -315,7 +317,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
}
@ -2342,6 +2344,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
}
SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
return Result;
}
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
@ -2460,9 +2470,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering->getPointerTy(),
MVT::Other, Ops);
return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
PPCLowering->getPointerTy(),
MVT::Other, Ops));
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@ -2497,9 +2508,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering->getPointerTy(),
MVT::Other, Ops);
return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
PPCLowering->getPointerTy(),
MVT::Other, Ops));
}
}
@ -2851,8 +2863,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
"Only supported for 64-bit ABI and 32-bit SVR4");
if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
SDValue GA = N->getOperand(0);
return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
N->getOperand(1));
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
MVT::i32, GA, N->getOperand(1)));
}
// For medium and large code model, we generate two instructions as
@ -2872,12 +2884,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
TOCbase, GA);
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
@ -2885,8 +2897,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
(GValue->isDeclaration() || GValue->isWeakForLinker())) ||
GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
MVT::i64, GA, SDValue(Tmp, 0)));
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,

View File

@ -1821,6 +1821,19 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) {
setUsesTOCBasePtr(DAG.getMachineFunction());
}
static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
SDValue GA) {
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
SDValue Ops[] = { GA, Reg };
return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
DAG.getVTList(VT, MVT::Other), Ops, VT,
MachinePointerInfo::getGOT(), 0, false, true,
false, 0);
}
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
@ -1832,8 +1845,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
return getTOCEntry(DAG, SDLoc(CP), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@ -1843,9 +1855,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
PPCII::MO_PIC_FLAG);
SDLoc DL(CP);
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
return getTOCEntry(DAG, SDLoc(CP), false, GA);
}
SDValue CPIHi =
@ -1864,8 +1874,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
return getTOCEntry(DAG, SDLoc(JT), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@ -1875,9 +1884,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
PPCII::MO_PIC_FLAG);
SDLoc DL(GA);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
return getTOCEntry(DAG, SDLoc(GA), false, GA);
}
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@ -1896,8 +1903,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@ -2007,8 +2013,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
return getTOCEntry(DAG, DL, true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@ -2019,8 +2024,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
GSDN->getOffset(),
PPCII::MO_PIC_FLAG);
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
return getTOCEntry(DAG, DL, false, GA);
}
SDValue GAHi =

View File

@ -71,8 +71,6 @@ namespace llvm {
/// though these are usually folded into other nodes.
Hi, Lo,
TOC_ENTRY,
/// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
@ -337,7 +335,12 @@ namespace llvm {
/// QBRC, CHAIN = QVLFSb CHAIN, Ptr
/// The 4xf32 load used for v4i1 constants.
QVLFSb
QVLFSb,
/// GPRC = TOC_ENTRY GA, TOC
/// Loads the entry for GA from the TOC, where the TOC base is given by
/// the last operand.
TOC_ENTRY
};
}

View File

@ -119,7 +119,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
[SDNPMayLoad, SDNPMemOperand]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;

View File

@ -0,0 +1,39 @@
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@phasor = external constant [4096 x i32]
; Function Attrs: nounwind
define void @test(i32* nocapture %out, i32 zeroext %step_size) #0 {
entry:
%shl = shl i32 %step_size, 2
%idxprom = zext i32 %shl to i64
br label %for.body
; Make sure that the TOC load has been hoisted out of the loop.
; CHECK-LABEL: @test
; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc@l
; CHECK: %for.body
; CHECK: blr
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = trunc i64 %indvars.iv to i32
%shl1 = shl i32 %0, %step_size
%idxprom2 = sext i32 %shl1 to i64
%arrayidx.sum = add nsw i64 %idxprom2, %idxprom
%arrayidx3 = getelementptr inbounds [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum
%1 = load i32* %arrayidx3, align 4
%arrayidx5 = getelementptr inbounds i32* %out, i64 %indvars.iv
store i32 %1, i32* %arrayidx5, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
%cmp = icmp slt i64 %indvars.iv.next, 1020
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
attributes #0 = { nounwind }

View File

@ -264,26 +264,26 @@ entry:
ret void
}
; CHECK-LABEL: @caller2
; CHECK: ld [[REG:[0-9]+]], .LC
; CHECK-DAG: lfs 1, 0([[REG]])
; CHECK-DAG: lfs 2, 4([[REG]])
; CHECK-DAG: lfs 3, 8([[REG]])
; CHECK-DAG: lfs 4, 12([[REG]])
; CHECK-DAG: lfs 5, 16([[REG]])
; CHECK-DAG: lfs 6, 20([[REG]])
; CHECK-DAG: lfs 7, 24([[REG]])
; CHECK-DAG: lfs 8, 28([[REG]])
; CHECK: ld [[REG:[0-9]+]], .LC
; CHECK-DAG: lfs 9, 0([[REG]])
; CHECK-DAG: lfs 10, 4([[REG]])
; CHECK-DAG: lfs 11, 8([[REG]])
; CHECK-DAG: lfs 12, 12([[REG]])
; CHECK-DAG: lfs 13, 16([[REG]])
; CHECK: ld [[REG:[0-9]+]], .LC
; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]])
; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]])
; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
; CHECK-DAG: or 10, [[REG0]], [[REG1]]
; CHECK: ld {{[0-9]+}}, .LC
; CHECK-DAG: lfs 1, 0({{[0-9]+}})
; CHECK-DAG: lfs 2, 4({{[0-9]+}})
; CHECK-DAG: lfs 3, 8({{[0-9]+}})
; CHECK-DAG: lfs 4, 12({{[0-9]+}})
; CHECK-DAG: lfs 5, 16({{[0-9]+}})
; CHECK-DAG: lfs 6, 20({{[0-9]+}})
; CHECK-DAG: lfs 7, 24({{[0-9]+}})
; CHECK-DAG: lfs 8, 28({{[0-9]+}})
; CHECK-DAG: lfs 9, 0({{[0-9]+}})
; CHECK-DAG: lfs 10, 4({{[0-9]+}})
; CHECK-DAG: lfs 11, 8({{[0-9]+}})
; CHECK-DAG: lfs 12, 12({{[0-9]+}})
; CHECK-DAG: lfs 13, 16({{[0-9]+}})
; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}})
; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}})
; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32
; CHECK-DAG: or 10, [[REG0]], [[REG2]]
; CHECK: bl test2
declare void @test2([8 x float], [5 x float], [2 x float])

View File

@ -22,7 +22,10 @@ entry:
; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)
; FIXME: We could check here for 'std {{[0-9]+}}, 0(3)', but that no longer
; works because, with new scheduling freedom, we create a copy of R3 based on the
; initial scheduling, but don't coalesce it again after we move the instructions
; so that the copy is no longer necessary.
; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
; CHECK-NEXT: nop

View File

@ -35,17 +35,17 @@ entry:
ret void
; CHECK-LABEL: @test2
; CHECK: ld {{[0-9]+}}, 112(1)
; CHECK: li [[REG16:[0-9]+]], 16
; CHECK: addi [[REGB:[0-9]+]], 1, 112
; CHECK: lvx 2, [[REGB]], [[REG16]]
; CHECK-DAG: ld {{[0-9]+}}, 112(1)
; CHECK-DAG: li [[REG16:[0-9]+]], 16
; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 112
; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
; CHECK-VSX-LABEL: @test2
; CHECK-VSX: ld {{[0-9]+}}, 112(1)
; CHECK-VSX: li [[REG16:[0-9]+]], 16
; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX-DAG: ld {{[0-9]+}}, 112(1)
; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 112
; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX: blr
}
@ -61,17 +61,17 @@ entry:
ret void
; CHECK-LABEL: @test3
; CHECK: ld {{[0-9]+}}, 128(1)
; CHECK: li [[REG16:[0-9]+]], 16
; CHECK: addi [[REGB:[0-9]+]], 1, 128
; CHECK: lvx 2, [[REGB]], [[REG16]]
; CHECK-DAG: ld {{[0-9]+}}, 128(1)
; CHECK-DAG: li [[REG16:[0-9]+]], 16
; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 128
; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
; CHECK-VSX-LABEL: @test3
; CHECK-VSX: ld {{[0-9]+}}, 128(1)
; CHECK-VSX: li [[REG16:[0-9]+]], 16
; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX-DAG: ld {{[0-9]+}}, 128(1)
; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 128
; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX: blr
}