mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-29 14:40:25 +00:00
LoopIdiom: Recognize memmove loops.
This turns loops like for (unsigned i = 0; i != n; ++i) p[i] = p[i+1]; into memmove, which has a highly optimized implementation in most libcs. This was really easy with the new DependenceAnalysis :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166875 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
96c8735e28
commit
d11c5d08a5
@ -16,7 +16,7 @@
|
||||
// TODO List:
|
||||
//
|
||||
// Future loop memory idioms to recognize:
|
||||
// memcmp, memmove, strlen, etc.
|
||||
// memcmp, strlen, etc.
|
||||
// Future floating point idioms to recognize in -ffast-math mode:
|
||||
// fpowi
|
||||
// Future integer operation idioms to recognize:
|
||||
@ -60,8 +60,9 @@
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
|
||||
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
|
||||
STATISTIC(NumMemSet, "Number of memsets formed from loop stores");
|
||||
STATISTIC(NumMemCpy, "Number of memcpys formed from loop load+stores");
|
||||
STATISTIC(NumMemMove, "Number of memmoves formed from loop load+stores");
|
||||
|
||||
namespace {
|
||||
class LoopIdiomRecognize : public LoopPass {
|
||||
@ -532,6 +533,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
||||
// stores) in the loop. We ignore the direct dependency between SI and LI here
|
||||
// and check it later.
|
||||
DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
|
||||
bool isMemcpySafe = true;
|
||||
for (Loop::block_iterator BI = CurLoop->block_begin(),
|
||||
BE = CurLoop->block_end(); BI != BE; ++BI)
|
||||
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
|
||||
@ -552,8 +554,14 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
||||
// Now check the dependency between SI and LI. If there is no dependency we
|
||||
// can safely emit a memcpy.
|
||||
OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));
|
||||
if (Dep)
|
||||
return false;
|
||||
if (Dep) {
|
||||
// If there is a dependence but the direction is positive we can still
|
||||
// safely turn this into memmove.
|
||||
if (Dep->getLevels() != 1 ||
|
||||
Dep->getDirection(1) != Dependence::DVEntry::GT)
|
||||
return false;
|
||||
isMemcpySafe = false;
|
||||
}
|
||||
|
||||
// The trip count of the loop and the base pointer of the addrec SCEV is
|
||||
// guaranteed to be loop invariant, which means that it should dominate the
|
||||
@ -590,12 +598,19 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
||||
Value *NumBytes =
|
||||
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
|
||||
|
||||
CallInst *NewCall =
|
||||
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
|
||||
std::min(SI->getAlignment(), LI->getAlignment()));
|
||||
CallInst *NewCall;
|
||||
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
|
||||
if (isMemcpySafe) {
|
||||
NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
|
||||
++NumMemCpy;
|
||||
} else {
|
||||
NewCall = Builder.CreateMemMove(StoreBasePtr, LoadBasePtr, NumBytes, Align);
|
||||
++NumMemMove;
|
||||
}
|
||||
NewCall->setDebugLoc(SI->getDebugLoc());
|
||||
|
||||
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
|
||||
DEBUG(dbgs() << " Formed " << (isMemcpySafe ? "memcpy: " : "memmove: ")
|
||||
<< *NewCall << "\n"
|
||||
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
|
||||
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
|
||||
|
||||
@ -603,6 +618,5 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
||||
// Okay, the memset has been formed. Zap the original store and anything that
|
||||
// feeds into it.
|
||||
deleteDeadInstruction(SI, *SE, TLI);
|
||||
++NumMemCpy;
|
||||
return true;
|
||||
}
|
||||
|
@ -383,4 +383,26 @@ for.end: ; preds = %for.inc
|
||||
|
||||
}
|
||||
|
||||
@p = common global [1024 x i8] zeroinitializer, align 16
|
||||
|
||||
define void @test15(i32 %n) nounwind {
|
||||
entry:
|
||||
%cmp6 = icmp eq i32 %n, 0
|
||||
br i1 %cmp6, label %for.end, label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%arrayidx = getelementptr inbounds [1024 x i8]* @p, i64 0, i64 %indvars.iv.next
|
||||
%0 = load i8* %arrayidx, align 1
|
||||
%arrayidx2 = getelementptr inbounds [1024 x i8]* @p, i64 0, i64 %indvars.iv
|
||||
store i8 %0, i8* %arrayidx2, align 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
; CHECK: @test15
|
||||
; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([1024 x i8]* @p, i32 0, i32 0), i8* getelementptr inbounds ([1024 x i8]* @p, i64 0, i64 1),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user