mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-16 08:08:01 +00:00
LoopVectorizer: Enable unrolling of conditional stores and the load/store
unrolling heuristic per default Benchmarking on x86_64 (thanks Chandler!) and ARM has shown those options speed up some benchmarks while not causing any interesting regressions. llvm-svn: 200621
This commit is contained in:
parent
f89553a645
commit
8a0e82c2bc
@ -180,16 +180,16 @@ static cl::opt<bool> LoopVectorizeWithBlockFrequency(
|
||||
|
||||
// Runtime unroll loops for load/store throughput.
|
||||
static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
|
||||
"enable-loadstore-runtime-unroll", cl::init(false), cl::Hidden,
|
||||
"enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden,
|
||||
cl::desc("Enable runtime unrolling until load/store ports are saturated"));
|
||||
|
||||
/// The number of stores in a loop that are allowed to need predication.
|
||||
static cl::opt<unsigned> NumberOfStoresToPredicate(
|
||||
"vectorize-num-stores-pred", cl::init(0), cl::Hidden,
|
||||
"vectorize-num-stores-pred", cl::init(1), cl::Hidden,
|
||||
cl::desc("Max number of stores to be predicated behind an if."));
|
||||
|
||||
static cl::opt<bool> EnableIndVarRegisterHeur(
|
||||
"enable-ind-var-reg-heur", cl::init(false), cl::Hidden,
|
||||
"enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
|
||||
cl::desc("Count the induction variable only once when unrolling"));
|
||||
|
||||
static cl::opt<bool> EnableCondStoresVectorization(
|
||||
|
@ -47,6 +47,7 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
|
||||
%sum.03 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ]
|
||||
%sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ]
|
||||
%sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
|
||||
%sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
|
||||
%2 = getelementptr inbounds i32* %A, i32 %i.02
|
||||
%3 = load i32* %2, align 4
|
||||
%4 = add nsw i32 %3, %sum.01
|
||||
@ -55,6 +56,7 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
|
||||
%7 = add nsw i32 %3, %sum.03
|
||||
%8 = add nsw i32 %3, %sum.04
|
||||
%9 = add nsw i32 %3, %sum.05
|
||||
%10 = add nsw i32 %3, %sum.05
|
||||
%exitcond = icmp eq i32 %5, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
@ -64,5 +66,6 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
|
||||
%sum.2.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ]
|
||||
%sum.4.lcssa = phi i32 [ 0, %0 ], [ %8, %.lr.ph ]
|
||||
%sum.5.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
|
||||
%sum.6.lcssa = phi i32 [ 0, %0 ], [ %10, %.lr.ph ]
|
||||
ret i32 %sum.0.lcssa
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user