mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-04 18:06:49 +00:00
Reapply "[LV] Extend trunc optimization to all IVs with constant integer steps"
This reapplies commit r294967 with a fix for the execution time regressions caught by the clang-cmake-aarch64-quick bot. We now extend the truncate optimization to non-primary induction variables only if the truncate isn't already free. Differential Revision: https://reviews.llvm.org/D29847 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295063 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2fce16a04e
commit
06f1a4b52b
@ -2014,6 +2014,42 @@ public:
|
||||
return WideningDecisions[InstOnVF].second;
|
||||
}
|
||||
|
||||
/// Return True if instruction \p I is an optimizable truncate whose operand
|
||||
/// is an induction variable. Such a truncate will be removed by adding a new
|
||||
/// induction variable with the destination type.
|
||||
bool isOptimizableIVTruncate(Instruction *I, unsigned VF) {
|
||||
|
||||
// If the instruction is not a truncate, return false.
|
||||
auto *Trunc = dyn_cast<TruncInst>(I);
|
||||
if (!Trunc)
|
||||
return false;
|
||||
|
||||
// Get the source and destination types of the truncate.
|
||||
Type *SrcTy = ToVectorTy(cast<CastInst>(I)->getSrcTy(), VF);
|
||||
Type *DestTy = ToVectorTy(cast<CastInst>(I)->getDestTy(), VF);
|
||||
|
||||
// If the truncate is free for the given types, return false. Replacing a
|
||||
// free truncate with an induction variable would add an induction variable
|
||||
// update instruction to each iteration of the loop. We exclude from this
|
||||
// check the primary induction variable since it will need an update
|
||||
// instruction regardless.
|
||||
Value *Op = Trunc->getOperand(0);
|
||||
if (Op != Legal->getInduction() && TTI.isTruncateFree(SrcTy, DestTy))
|
||||
return false;
|
||||
|
||||
// If the truncated value is not an induction variable, return false.
|
||||
if (!Legal->isInductionVariable(Op))
|
||||
return false;
|
||||
|
||||
// Lastly, we only consider an induction variable truncate to be
|
||||
// optimizable if it has a constant step.
|
||||
//
|
||||
// TODO: Expand optimizable truncates to include truncations of induction
|
||||
// variables having loop-invariant steps.
|
||||
auto ID = Legal->getInductionVars()->lookup(cast<PHINode>(Op));
|
||||
return ID.getConstIntStepValue();
|
||||
}
|
||||
|
||||
private:
|
||||
/// The vectorization cost is a combination of the cost itself and a boolean
|
||||
/// indicating whether any of the contributing operations will actually
|
||||
@ -4879,10 +4915,9 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
|
||||
// induction variable. Notice that we can only optimize the 'trunc' case
|
||||
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
|
||||
// (c) other casts depend on pointer size.
|
||||
auto ID = Legal->getInductionVars()->lookup(OldInduction);
|
||||
if (isa<TruncInst>(CI) && CI->getOperand(0) == OldInduction &&
|
||||
ID.getConstIntStepValue()) {
|
||||
widenIntInduction(OldInduction, cast<TruncInst>(CI));
|
||||
if (Cost->isOptimizableIVTruncate(CI, VF)) {
|
||||
widenIntInduction(cast<PHINode>(CI->getOperand(0)),
|
||||
cast<TruncInst>(CI));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -7224,12 +7259,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
case Instruction::Trunc:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast: {
|
||||
// We optimize the truncation of induction variable.
|
||||
// The cost of these is the same as the scalar operation.
|
||||
if (I->getOpcode() == Instruction::Trunc &&
|
||||
Legal->isInductionVariable(I->getOperand(0)))
|
||||
return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
|
||||
I->getOperand(0)->getType());
|
||||
// We optimize the truncation of induction variables having constant
|
||||
// integer steps. The cost of these truncations is the same as the scalar
|
||||
// operation.
|
||||
if (isOptimizableIVTruncate(I, VF)) {
|
||||
auto *Trunc = cast<TruncInst>(I);
|
||||
return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
|
||||
Trunc->getSrcTy());
|
||||
}
|
||||
|
||||
Type *SrcScalarTy = I->getOperand(0)->getType();
|
||||
Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
|
||||
|
30
test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
Normal file
30
test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: opt < %s -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
; CHECK-LABEL: @non_primary_iv_trunc_free(
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 5
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDUCTION]] to i32
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDUCTION1]] to i32
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
|
||||
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
;
|
||||
define void @non_primary_iv_trunc_free(i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
||||
%tmp0 = trunc i64 %i to i32
|
||||
%i.next = add nuw nsw i64 %i, 5
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
@ -773,3 +773,34 @@ for.body:
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @non_primary_iv_trunc(
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
; CHECK: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
|
||||
; CHECK: [[TMP3:%.*]] = add i64 %index, 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %a, i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
|
||||
; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: %index.next = add i64 %index, 2
|
||||
; CHECK: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
|
||||
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
define void @non_primary_iv_trunc(i32* %a, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
||||
%j = phi i64 [ %j.next, %for.body ], [ 0, %entry ]
|
||||
%tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
%tmp1 = trunc i64 %j to i32
|
||||
store i32 %tmp1, i32* %tmp0, align 4
|
||||
%i.next = add nuw nsw i64 %i, 1
|
||||
%j.next = add nuw nsw i64 %j, 2
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
@ -2,7 +2,8 @@
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
; Make sure that the reverse iterators are calculated using 64bit arithmetic, not 32.
|
||||
; PR15882: This test ensures that we do not produce wrapping arithmetic when
|
||||
; creating constant reverse step vectors.
|
||||
;
|
||||
; int foo(int n, int *A) {
|
||||
; int sum;
|
||||
@ -13,7 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
;
|
||||
|
||||
;CHECK-LABEL: @foo(
|
||||
;CHECK: <i64 0, i64 -1, i64 -2, i64 -3>
|
||||
;CHECK: <i32 0, i32 -1, i32 -2, i32 -3>
|
||||
;CHECK: ret
|
||||
define i32 @foo(i32 %n, i32* nocapture %A) {
|
||||
%1 = icmp sgt i32 %n, 0
|
||||
|
Loading…
Reference in New Issue
Block a user