mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-15 07:59:57 +00:00
[LIR] Add support for creating memcpys from loops with a negative stride.
This allows us to transform the below loop into a memcpy. void test(unsigned *__restrict__ a, unsigned *__restrict__ b) { for (int i = 2047; i >= 0; --i) { a[i] = b[i]; } } This is the memcpy version of r251518, which added support for memset with negative strided loops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253091 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3b47b1f0e2
commit
2588aa9425
@ -129,7 +129,7 @@ private:
|
||||
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
||||
const SCEVAddRecExpr *StoreEv,
|
||||
const SCEVAddRecExpr *LoadEv,
|
||||
const SCEV *BECount);
|
||||
const SCEV *BECount, bool NegStride);
|
||||
|
||||
/// @}
|
||||
/// \name Noncountable Loop Idiom Handling
|
||||
@ -362,10 +362,6 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
||||
StoredVal, SI, StoreEv, BECount, NegStride))
|
||||
return true;
|
||||
|
||||
// TODO: We don't handle negative stride memcpys.
|
||||
if (NegStride)
|
||||
return false;
|
||||
|
||||
// If the stored value is a strided load in the same loop with the same stride
|
||||
// this may be transformable into a memcpy. This kicks in for stuff like
|
||||
// for (i) A[i] = B[i];
|
||||
@ -374,7 +370,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
||||
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
|
||||
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
|
||||
StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
|
||||
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
|
||||
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount,
|
||||
NegStride))
|
||||
return true;
|
||||
}
|
||||
// errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
|
||||
@ -626,7 +623,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
||||
/// same-strided load.
|
||||
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
||||
StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
|
||||
const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
|
||||
const SCEVAddRecExpr *LoadEv, const SCEV *BECount, bool NegStride) {
|
||||
// If we're not allowed to form memcpy, we fail.
|
||||
if (!TLI->has(LibFunc::memcpy))
|
||||
return false;
|
||||
@ -640,6 +637,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
||||
IRBuilder<> Builder(Preheader->getTerminator());
|
||||
SCEVExpander Expander(*SE, *DL, "loop-idiom");
|
||||
|
||||
const SCEV *StrStart = StoreEv->getStart();
|
||||
unsigned StrAS = SI->getPointerAddressSpace();
|
||||
Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
|
||||
|
||||
// Handle negative strided loops.
|
||||
if (NegStride)
|
||||
StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
|
||||
|
||||
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
|
||||
// this into a memcpy in the loop preheader now if we want. However, this
|
||||
// would be unsafe to do if there is anything else in the loop that may read
|
||||
@ -647,8 +652,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
||||
// feeds the stores. Check for an alias by generating the base address and
|
||||
// checking everything.
|
||||
Value *StoreBasePtr = Expander.expandCodeFor(
|
||||
StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
|
||||
Preheader->getTerminator());
|
||||
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
|
||||
|
||||
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
|
||||
StoreSize, *AA, SI)) {
|
||||
@ -658,11 +662,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
||||
return false;
|
||||
}
|
||||
|
||||
const SCEV *LdStart = LoadEv->getStart();
|
||||
unsigned LdAS = LI->getPointerAddressSpace();
|
||||
|
||||
// Handle negative strided loops.
|
||||
if (NegStride)
|
||||
LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
|
||||
|
||||
// For a memcpy, we have to make sure that the input array is not being
|
||||
// mutated by the loop.
|
||||
Value *LoadBasePtr = Expander.expandCodeFor(
|
||||
LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
|
||||
Preheader->getTerminator());
|
||||
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
|
||||
|
||||
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
|
||||
*AA, SI)) {
|
||||
@ -677,7 +687,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
||||
|
||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
||||
// pointer size if it isn't already.
|
||||
Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
|
||||
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
|
||||
|
||||
const SCEV *NumBytesS =
|
||||
|
@ -469,7 +469,7 @@ for.cond.cleanup: ; preds = %for.body
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
; We don't handle memcpy-able loops with negative stride.
|
||||
; Handle memcpy-able loops with negative stride.
|
||||
define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
|
||||
entry:
|
||||
%conv = sext i32 %c to i64
|
||||
@ -499,8 +499,35 @@ while.end.loopexit: ; preds = %while.body
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
ret i32* %0
|
||||
; CHECK-LABEL: @test17(
|
||||
; CHECK-NOT: call void @llvm.memcpy
|
||||
; CHECK: call void @llvm.memcpy
|
||||
; CHECK: ret i32*
|
||||
}
|
||||
|
||||
declare noalias i8* @malloc(i64)
|
||||
|
||||
; Handle memcpy-able loops with negative stride.
|
||||
; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
|
||||
; for (int i = 2047; i >= 0; --i) {
|
||||
; a[i] = b[i];
|
||||
; }
|
||||
; }
|
||||
define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
store i32 %0, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||
%cmp = icmp sgt i64 %indvars.iv, 0
|
||||
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret void
|
||||
; CHECK-LABEL: @test18(
|
||||
; CHECK: call void @llvm.memcpy
|
||||
; CHECK: ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user