[LoopVectorizer] Predicate instructions in blocks with several incoming edges

We don't need to limit predication to blocks that have a single incoming
edge, we just need to use the right mask.
This fixes PR30172.

Differential Revision: https://reviews.llvm.org/D24009

llvm-svn: 280148
This commit is contained in:
Michael Kuperstein 2016-08-30 20:22:21 +00:00
parent 7622ad4008
commit 39c8e8b0c7
3 changed files with 66 additions and 16 deletions

View File

@ -2930,12 +2930,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
ScalarParts Entry(UF);
VectorParts Cond;
if (IfPredicateInstr) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
}
if (IfPredicateInstr)
Cond = createBlockInMask(Instr->getParent());
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
@ -6697,12 +6693,8 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
ScalarParts Entry(UF);
VectorParts Cond;
if (IfPredicateInstr) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
}
if (IfPredicateInstr)
Cond = createBlockInMask(Instr->getParent());
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {

View File

@ -153,3 +153,58 @@ if.end: ; preds = %if.then, %for.body
%exitcond = icmp eq i64 %indvars.iv.next, 128
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
define void @pr30172(i32* nocapture %asd, i32* nocapture %bsd) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %if.end
ret void
; CHECK-LABEL: pr30172
; CHECK: vector.body:
; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], <i32 100, i32 100>
; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], <i32 200, i32 200>
; CHECK: %[[XOR:.+]] = xor <2 x i1> %[[CMP1]], <i1 true, i1 true>
; CHECK: %[[AND1:.+]] = and <2 x i1> %[[XOR]], <i1 true, i1 true>
; CHECK: %[[OR1:.+]] = or <2 x i1> zeroinitializer, %[[AND1]]
; CHECK: %[[AND2:.+]] = and <2 x i1> %[[CMP2]], %[[OR1]]
; CHECK: %[[OR2:.+]] = or <2 x i1> zeroinitializer, %[[AND2]]
; CHECK: %[[AND3:.+]] = and <2 x i1> %[[CMP1]], <i1 true, i1 true>
; CHECK: %[[OR3:.+]] = or <2 x i1> %[[OR2]], %[[AND3]]
; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR3]], i32 0
; CHECK: %[[MASK:.+]] = icmp eq i1 %[[EXTRACT]], true
; CHECK: br i1 %[[MASK]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]
; CHECK: [[THEN]]:
; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
; CHECK: br label %[[FI]]
; CHECK: [[FI]]:
; CHECK: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ]
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
%lsd = load i32, i32* %isd, align 4
%isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv
%lsd.b = load i32, i32* %isd.b, align 4
%psd = add nsw i32 %lsd, 23
%cmp1 = icmp slt i32 %lsd, 100
br i1 %cmp1, label %if.then, label %check
check: ; preds = %for.body
%cmp2 = icmp sge i32 %lsd, 200
br i1 %cmp2, label %if.then, label %if.end
if.then: ; preds = %check, %for.body
%sd1 = sdiv i32 %psd, %lsd
%rsd = sdiv i32 %lsd.b, %sd1
br label %if.end
if.end: ; preds = %if.then, %check
%ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %check ]
store i32 %ysd.0, i32* %isd, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 128
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@ -18,7 +18,8 @@ entry:
; VEC: %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
; VEC: %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
; VEC: %[[o1:.+]] = or <2 x i1> zeroinitializer, %[[v10]]
; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[o1]], i32 0
; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
;
@ -28,7 +29,7 @@ entry:
; VEC: br label %[[else:.+]]
;
; VEC: [[else]]:
; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[o1]], i32 1
; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
;
@ -51,7 +52,9 @@ entry:
; UNROLL: %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
; UNROLL: %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
; UNROLL: %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[v4]], true
; UNROLL: %[[o1:[a-zA-Z0-9]+]] = or i1 false, %[[v4]]
; UNROLL: %[[o2:[a-zA-Z0-9]+]] = or i1 false, %[[v5]]
; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[o1]], true
; UNROLL: br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]]
;
; UNROLL: [[cond]]:
@ -59,7 +62,7 @@ entry:
; UNROLL: br label %[[else]]
;
; UNROLL: [[else]]:
; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[v5]], true
; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[o2]], true
; UNROLL: br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]]
;
; UNROLL: [[cond2]]: