[DAGCombiner] Split up an indexed load if only the base pointer value is live

Right now the load may not get DCE'd because of the side-effect of updating
the base pointer.

This can happen if we lower a read-modify-write of an illegal larger type
(e.g. i48) such that the modification only affects one of the subparts (the
lower i32 part but not the higher i16 part).  See the testcase.

In order to spot the dead load we need to revisit it when SimplifyDemandedBits
decided that the value of the load is masked off.  This is the
CommitTargetLoweringOpt piece.

I checked compile time with ARM64 by sending SPEC bitcode files through llc.
No measurable change.

Fixes <rdar://problem/16031651>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208640 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2014-05-12 23:00:03 +00:00
parent 1633f752f8
commit 73282018a1
2 changed files with 59 additions and 7 deletions

View File

@ -167,6 +167,7 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
@ -761,10 +762,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) {
SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode();
// For an operand generating multiple values, one of the values may
// become dead allowing further simplification (e.g. split index
// arithmetic from an indexed load).
if (Op->hasOneUse() || Op->getNumValues() > 1)
AddToWorkList(Op);
}
DAG.DeleteNode(TLO.Old.getNode());
}
}
@ -7844,6 +7849,17 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
/// \brief Return the base-pointer arithmetic from an indexed \p LD.
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
ISD::MemIndexedMode AM = LD->getAddressingMode();
assert(AM != ISD::UNINDEXED);
SDValue BP = LD->getOperand(1);
SDValue Inc = LD->getOperand(2);
unsigned Opc =
(AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@ -7880,8 +7896,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
if (!N->hasAnyUseOfValue(0)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
SDValue Index;
if (N->hasAnyUseOfValue(1)) {
Index = SplitIndexingFromLoad(LD);
// Try to fold the base pointer arithmetic into subsequent loads and
// stores.
AddUsersToWorkList(N);
} else
Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
@ -7889,8 +7913,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
DAG.getUNDEF(N->getValueType(1)));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);

View File

@ -0,0 +1,29 @@
; RUN: llc -mcpu=cyclone < %s | FileCheck %s
target datalayout = "e-i64:64-n32:64-S128"
target triple = "arm64-apple-ios"
%"struct.SU" = type { i32, %"struct.SU"*, i32*, i32, i32, %"struct.BO", i32, [5 x i8] }
%"struct.BO" = type { %"struct.RE" }
%"struct.RE" = type { i32, i32, i32, i32 }
; This is a read-modify-write of some bifields combined into an i48. It gets
; legalized into i32 and i16 accesses. Only a single store of zero to the low
; i32 part should be live.
; CHECK-LABEL: test:
; CHECK-NOT: ldr
; CHECK: str wzr
; CHECK-NOT: str
define void @test(%"struct.SU"* nocapture %su) {
entry:
%r1 = getelementptr inbounds %"struct.SU"* %su, i64 1, i32 5
%r2 = bitcast %"struct.BO"* %r1 to i48*
%r3 = load i48* %r2, align 8
%r4 = and i48 %r3, -4294967296
%r5 = or i48 0, %r4
store i48 %r5, i48* %r2, align 8
ret void
}