mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-05 10:39:21 +00:00
Loop Vectorize: optimize the vectorization of trunc(induction_var). The truncation is now done on scalars.
llvm-svn: 169904
This commit is contained in:
parent
97c09cbfb6
commit
fb45c4d6b4
@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
|
||||
case Instruction::Trunc:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast: {
|
||||
/// Vectorize bitcasts.
|
||||
CastInst *CI = dyn_cast<CastInst>(it);
|
||||
/// Optimize the special case where the source is the induction
|
||||
/// variable. Notice that we can only optimize the 'trunc' case
|
||||
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
|
||||
/// c. other casts depend on pointer size.
|
||||
if (CI->getOperand(0) == OldInduction &&
|
||||
it->getOpcode() == Instruction::Trunc) {
|
||||
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
|
||||
CI->getType());
|
||||
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
|
||||
WidenMap[it] = getConsecutiveVector(Broadcasted);
|
||||
break;
|
||||
}
|
||||
/// Vectorize casts.
|
||||
Value *A = getVectorValue(it->getOperand(0));
|
||||
Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
|
||||
WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
|
||||
|
30
test/Transforms/LoopVectorize/cast-induction.ll
Normal file
30
test/Transforms/LoopVectorize/cast-induction.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
|
||||
|
||||
; rdar://problem/12848162
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
@a = common global [2048 x i32] zeroinitializer, align 16
|
||||
|
||||
;CHECK: @example12
|
||||
;CHECK: trunc i64
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: ret void
|
||||
define void @example12() nounwind uwtable ssp {
|
||||
br label %1
|
||||
|
||||
; <label>:1 ; preds = %1, %0
|
||||
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
|
||||
%2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
|
||||
%3 = trunc i64 %indvars.iv to i32
|
||||
store i32 %3, i32* %2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %4, label %1
|
||||
|
||||
; <label>:4 ; preds = %1
|
||||
ret void
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
;CHECK: @cpp_new_arrays
|
||||
;CHECK: insertelement <4 x i32>
|
||||
;CHECK: sext i32
|
||||
;CHECK: load <4 x float>
|
||||
;CHECK: fadd <4 x float>
|
||||
;CHECK: ret i32
|
||||
|
@ -329,7 +329,7 @@ define void @example11() nounwind uwtable ssp {
|
||||
}
|
||||
|
||||
;CHECK: @example12
|
||||
;CHECK: trunc <4 x i64>
|
||||
;CHECK: trunc i64
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: ret void
|
||||
define void @example12() nounwind uwtable ssp {
|
||||
|
@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
|
||||
@array = common global [1024 x i32] zeroinitializer, align 16
|
||||
|
||||
;CHECK: @array_at_plus_one
|
||||
;CHECK: add <4 x i64>
|
||||
;CHECK: trunc <4 x i64>
|
||||
;CHECK: trunc i64
|
||||
;CHECK: add i64 %index, 12
|
||||
;CHECK: ret i32
|
||||
define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
|
||||
|
Loading…
Reference in New Issue
Block a user