LoopVectorizer: Truncate i64 trip counts of i32 phis if necessary

In signed arithmetic we could end up with an i64 trip count for an i32 phi.
Because it is signed arithmetic we know that this is only defined if the i32
does not wrap. It is therefore safe to truncate the i64 trip count to a i32
value.

Fixes PR18049.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195787 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer 2013-11-26 22:11:23 +00:00
parent 2bd48f03ba
commit b40f14eb89
2 changed files with 48 additions and 0 deletions

View File

@ -1537,6 +1537,15 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
// The exit count might have the type of i64 while the phi is i32. This can
// happen if we have an induction variable that is sign extended before the
// compare. The only way that we get a backedge taken count is that the
// induction variable was signed and as such will not overflow. In such a case
// truncation is legal.
if (ExitCount->getType()->getPrimitiveSizeInBits() >
IdxTy->getPrimitiveSizeInBits())
ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
ExitCount = SE->getAddExpr(ExitCount,

View File

@ -0,0 +1,39 @@
; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-unknown-freebsd11.0"
@big = external global [0 x i32]
; PR18049
; We need to truncate the exit count to i32. This is legal because the
; arithmetic is signed (%inc is nsw).
; CHECK-LABEL: tripcount
; CHECK: trunc i64 %count to i32
define void @tripcount(i64 %count) {
entry:
%cmp6 = icmp sgt i64 %count, 0
br i1 %cmp6, label %for.body.preheader, label %for.end
for.body.preheader:
br label %for.body
for.body:
%i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
%0 = load i32* %arrayidx, align 4
%neg = xor i32 %0, -1
store i32 %neg, i32* %arrayidx, align 4
%inc = add nsw i32 %i.07, 1
%conv = sext i32 %inc to i64
%cmp = icmp slt i64 %conv, %count
br i1 %cmp, label %for.body, label %for.end.loopexit
for.end.loopexit:
br label %for.end
for.end:
ret void
}