[LIR] Add support for creating memcpys from loops with a negative stride.

This allows us to transform the below loop into a memcpy.

void test(unsigned *__restrict__ a, unsigned *__restrict__ b) {
  for (int i = 2047; i >= 0; --i) {
    a[i] = b[i];
  }
}

This is the memcpy version of r251518, which added support for memset with
negative strided loops.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253091 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2015-11-13 21:51:02 +00:00
parent 3b47b1f0e2
commit 2588aa9425
2 changed files with 50 additions and 14 deletions

View File

@ -129,7 +129,7 @@ private:
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
const SCEV *BECount);
const SCEV *BECount, bool NegStride);
/// @}
/// \name Noncountable Loop Idiom Handling
@ -362,10 +362,6 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
StoredVal, SI, StoreEv, BECount, NegStride))
return true;
// TODO: We don't handle negative stride memcpys.
if (NegStride)
return false;
// If the stored value is a strided load in the same loop with the same stride
// this may be transformable into a memcpy. This kicks in for stuff like
// for (i) A[i] = B[i];
@ -374,7 +370,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount,
NegStride))
return true;
}
// errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
@ -626,7 +623,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
/// same-strided load.
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
const SCEVAddRecExpr *LoadEv, const SCEV *BECount, bool NegStride) {
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
@ -640,6 +637,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = SI->getPointerAddressSpace();
Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
// Handle negative strided loops.
if (NegStride)
StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@ -647,8 +652,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
Preheader->getTerminator());
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
StoreSize, *AA, SI)) {
@ -658,11 +662,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
return false;
}
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = LI->getPointerAddressSpace();
// Handle negative strided loops.
if (NegStride)
LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
Value *LoadBasePtr = Expander.expandCodeFor(
LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
Preheader->getTerminator());
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
*AA, SI)) {
@ -677,7 +687,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
const SCEV *NumBytesS =

View File

@ -469,7 +469,7 @@ for.cond.cleanup: ; preds = %for.body
; CHECK: ret void
}
; We don't handle memcpy-able loops with negative stride.
; Handle memcpy-able loops with negative stride.
define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
entry:
%conv = sext i32 %c to i64
@ -499,8 +499,35 @@ while.end.loopexit: ; preds = %while.body
while.end: ; preds = %while.end.loopexit, %entry
ret i32* %0
; CHECK-LABEL: @test17(
; CHECK-NOT: call void @llvm.memcpy
; CHECK: call void @llvm.memcpy
; CHECK: ret i32*
}
declare noalias i8* @malloc(i64)
; Handle memcpy-able loops with negative stride.
; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
; for (int i = 2047; i >= 0; --i) {
; a[i] = b[i];
; }
; }
define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %0, i32* %arrayidx2, align 4
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%cmp = icmp sgt i64 %indvars.iv, 0
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body
ret void
; CHECK-LABEL: @test18(
; CHECK: call void @llvm.memcpy
; CHECK: ret
}