[LoopVectorize] Add Support for Small Size Reductions.

Unlike scalar operations, we can perform vector operations on element types that
are smaller than the native integer types. We type-promote scalar operations if
they are smaller than a native type (e.g., i8 arithmetic is promoted to i32
arithmetic on Arm targets). This patch detects and removes type-promotions
within the reduction detection framework, enabling the vectorization of small
size reductions.

In the legality phase, we look through the ANDs and extensions that InstCombine
creates during promotion, keeping track of the smaller type. In the
profitability phase, we use the smaller type and ignore the ANDs and extensions
in the cost model. Finally, in the code generation phase, we truncate the result
of the reduction to allow InstCombine to rewrite the entire expression in the
smaller type.

This fixes PR21369.
http://reviews.llvm.org/D12202

Patch by Matt Simpson <mssimpso@codeaurora.org>!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246149 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2015-08-27 14:12:17 +00:00
parent 18671a3a8f
commit ed15c79fb6
4 changed files with 393 additions and 36 deletions

View File

@ -85,13 +85,16 @@ public:
RecurrenceDescriptor() RecurrenceDescriptor()
: StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence), : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence),
MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr) {} MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr),
RecurrenceType(nullptr), IsSigned(false) {}
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
MinMaxRecurrenceKind MK, MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
Instruction *UAI /*Unsafe Algebra Inst*/) bool Signed, SmallPtrSetImpl<Instruction *> &CI)
: StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
UnsafeAlgebraInst(UAI) {} UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
CastInsts.insert(CI.begin(), CI.end());
}
/// This POD struct holds information about a potential recurrence operation. /// This POD struct holds information about a potential recurrence operation.
class InstDesc { class InstDesc {
@ -184,6 +187,44 @@ public:
/// Returns first unsafe algebra instruction in the PHI node's use-chain. /// Returns first unsafe algebra instruction in the PHI node's use-chain.
Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
/// Returns true if the recurrence kind is an integer kind.
static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
/// Returns true if the recurrence kind is a floating point kind.
static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
/// Returns true if the recurrence kind is an arithmetic kind.
static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
/// Determines if Phi may have been type-promoted. If Phi has a single user
/// that ANDs the Phi with a type mask, return the user. RT is updated to
/// account for the narrower bit width represented by the mask, and the AND
/// instruction is added to CI.
static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
SmallPtrSetImpl<Instruction *> &Visited,
SmallPtrSetImpl<Instruction *> &CI);
/// Returns true if all the source operands of a recurrence are either
/// SExtInsts or ZExtInsts. This function is intended to be used with
/// lookThroughAnd to determine if the recurrence has been type-promoted. The
/// source operands are added to CI, and IsSigned is updated to indicate if
/// all source operands are SExtInsts.
static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
Type *RT, bool &IsSigned,
SmallPtrSetImpl<Instruction *> &Visited,
SmallPtrSetImpl<Instruction *> &CI);
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() { return RecurrenceType; }
/// Returns a reference to the instructions used for type-promoting the
/// recurrence.
SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
/// Returns true if all source operands of the recurrence are SExtInsts.
bool isSigned() { return IsSigned; }
private: private:
// The starting value of the recurrence. // The starting value of the recurrence.
// It does not have to be zero! // It does not have to be zero!
@ -196,6 +237,12 @@ private:
MinMaxRecurrenceKind MinMaxKind; MinMaxRecurrenceKind MinMaxKind;
// First occurance of unasfe algebra in the PHI's use-chain. // First occurance of unasfe algebra in the PHI's use-chain.
Instruction *UnsafeAlgebraInst; Instruction *UnsafeAlgebraInst;
// The type of the recurrence.
Type *RecurrenceType;
// True if all source operands of the recurrence are SExtInsts.
bool IsSigned;
// Instructions used for type-promoting the recurrence.
SmallPtrSet<Instruction *, 8> CastInsts;
}; };
/// A struct for saving information about induction variables. /// A struct for saving information about induction variables.

View File

@ -34,6 +34,116 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
return true; return true;
} }
bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
switch (Kind) {
default:
break;
case RK_IntegerAdd:
case RK_IntegerMult:
case RK_IntegerOr:
case RK_IntegerAnd:
case RK_IntegerXor:
case RK_IntegerMinMax:
return true;
}
return false;
}
bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
}
bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
switch (Kind) {
default:
break;
case RK_IntegerAdd:
case RK_IntegerMult:
case RK_FloatAdd:
case RK_FloatMult:
return true;
}
return false;
}
Instruction *
RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
SmallPtrSetImpl<Instruction *> &Visited,
SmallPtrSetImpl<Instruction *> &CI) {
if (!Phi->hasOneUse())
return Phi;
const APInt *M = nullptr;
Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
// Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
// with a new integer type of the corresponding bit width.
if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)),
m_And(m_APInt(M), m_Instruction(I))))) {
int32_t Bits = (*M + 1).exactLogBase2();
if (Bits > 0) {
RT = IntegerType::get(Phi->getContext(), Bits);
Visited.insert(Phi);
CI.insert(J);
return J;
}
}
return Phi;
}
bool RecurrenceDescriptor::getSourceExtensionKind(
Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
SmallPtrSetImpl<Instruction *> &Visited,
SmallPtrSetImpl<Instruction *> &CI) {
SmallVector<Instruction *, 8> Worklist;
bool FoundOneOperand = false;
Worklist.push_back(Exit);
// Traverse the instructions in the reduction expression, beginning with the
// exit value.
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
for (Use &U : I->operands()) {
// Terminate the traversal if the operand is not an instruction, or we
// reach the starting value.
Instruction *J = dyn_cast<Instruction>(U.get());
if (!J || J == Start)
continue;
// Otherwise, investigate the operation if it is also in the expression.
if (Visited.count(J)) {
Worklist.push_back(J);
continue;
}
// If the operand is not in Visited, it is not a reduction operation, but
// it does feed into one. Make sure it is either a single-use sign- or
// zero-extend of the recurrence type.
CastInst *Cast = dyn_cast<CastInst>(J);
bool IsSExtInst = isa<SExtInst>(J);
if (!Cast || !Cast->hasOneUse() || Cast->getSrcTy() != RT ||
!(isa<ZExtInst>(J) || IsSExtInst))
return false;
// Furthermore, ensure that all such extends are of the same kind.
if (FoundOneOperand) {
if (IsSigned != IsSExtInst)
return false;
} else {
FoundOneOperand = true;
IsSigned = IsSExtInst;
}
// Lastly, add the sign- or zero-extend to CI so that we can avoid
// accounting for it in the cost model.
CI.insert(Cast);
}
}
return true;
}
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
Loop *TheLoop, bool HasFunNoNaNAttr, Loop *TheLoop, bool HasFunNoNaNAttr,
RecurrenceDescriptor &RedDes) { RecurrenceDescriptor &RedDes) {
@ -68,10 +178,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
unsigned NumCmpSelectPatternInst = 0; unsigned NumCmpSelectPatternInst = 0;
InstDesc ReduxDesc(false, nullptr); InstDesc ReduxDesc(false, nullptr);
// Data used for determining if the recurrence has been type-promoted.
Type *RecurrenceType = Phi->getType();
SmallPtrSet<Instruction *, 4> CastInsts;
Instruction *Start = Phi;
bool IsSigned = false;
SmallPtrSet<Instruction *, 8> VisitedInsts; SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist; SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(Phi);
VisitedInsts.insert(Phi); // Return early if the recurrence kind does not match the type of Phi. If the
// recurrence kind is arithmetic, we attempt to look through AND operations
// resulting from the type promotion performed by InstCombine. Vector
// operations are not limited to the legal integer widths, so we may be able
// to evaluate the reduction in the narrower width.
if (RecurrenceType->isFloatingPointTy()) {
if (!isFloatingPointRecurrenceKind(Kind))
return false;
} else {
if (!isIntegerRecurrenceKind(Kind))
return false;
if (isArithmeticRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
}
Worklist.push_back(Start);
VisitedInsts.insert(Start);
// A value in the reduction can be used: // A value in the reduction can be used:
// - By the reduction: // - By the reduction:
@ -110,10 +242,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
!VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
return false; return false;
// Any reduction instruction must be of one of the allowed kinds. // Any reduction instruction must be of one of the allowed kinds. We ignore
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); // the starting value (the Phi or an AND instruction if the Phi has been
if (!ReduxDesc.isRecurrence()) // type-promoted).
return false; if (Cur != Start) {
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())
return false;
}
// A reduction operation must only have one use of the reduction value. // A reduction operation must only have one use of the reduction value.
if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
@ -131,7 +267,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
++NumCmpSelectPatternInst; ++NumCmpSelectPatternInst;
// Check whether we found a reduction operator. // Check whether we found a reduction operator.
FoundReduxOp |= !IsAPhi; FoundReduxOp |= !IsAPhi && Cur != Start;
// Process users of current instruction. Push non-PHI nodes after PHI nodes // Process users of current instruction. Push non-PHI nodes after PHI nodes
// onto the stack. This way we are going to have seen all inputs to PHI // onto the stack. This way we are going to have seen all inputs to PHI
@ -193,6 +329,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false; return false;
// If we think Phi may have been type-promoted, we also need to ensure that
// all source operands of the reduction are either SExtInsts or ZEstInsts. If
// so, we will be able to evaluate the reduction in the narrower bit width.
if (Start != Phi)
if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
IsSigned, VisitedInsts, CastInsts))
return false;
// We found a reduction var if we have reached the original phi node and we // We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users. // only have a single instruction with out-of-loop users.
@ -200,10 +344,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// is saved as part of the RecurrenceDescriptor. // is saved as part of the RecurrenceDescriptor.
// Save the description of this reduction variable. // Save the description of this reduction variable.
RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, RecurrenceDescriptor RD(
ReduxDesc.getMinMaxKind(), RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
ReduxDesc.getUnsafeAlgebraInst()); ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD; RedDes = RD;
return true; return true;
@ -272,9 +415,6 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
default: default:
return InstDesc(false, I); return InstDesc(false, I);
case Instruction::PHI: case Instruction::PHI:
if (FP &&
(Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
return InstDesc(false, I);
return InstDesc(I, Prev.getMinMaxKind()); return InstDesc(I, Prev.getMinMaxKind());
case Instruction::Sub: case Instruction::Sub:
case Instruction::Add: case Instruction::Add:

View File

@ -1308,11 +1308,10 @@ public:
LoopVectorizationLegality *Legal, LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI, const TargetTransformInfo &TTI,
const TargetLibraryInfo *TLI, AssumptionCache *AC, const TargetLibraryInfo *TLI, AssumptionCache *AC,
const Function *F, const LoopVectorizeHints *Hints) const Function *F, const LoopVectorizeHints *Hints,
SmallPtrSetImpl<const Value *> &ValuesToIgnore)
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
TheFunction(F), Hints(Hints) { TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
}
/// Information about vectorization costs /// Information about vectorization costs
struct VectorizationFactor { struct VectorizationFactor {
@ -1381,9 +1380,6 @@ private:
emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message); emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
} }
/// Values used only by @llvm.assume calls.
SmallPtrSet<const Value *, 32> EphValues;
/// The loop that we evaluate. /// The loop that we evaluate.
Loop *TheLoop; Loop *TheLoop;
/// Scev analysis. /// Scev analysis.
@ -1399,6 +1395,8 @@ private:
const Function *TheFunction; const Function *TheFunction;
// Loop Vectorize Hint. // Loop Vectorize Hint.
const LoopVectorizeHints *Hints; const LoopVectorizeHints *Hints;
// Values to ignore in the cost model.
const SmallPtrSetImpl<const Value *> &ValuesToIgnore;
}; };
/// \brief This holds vectorization requirements that must be verified late in /// \brief This holds vectorization requirements that must be verified late in
@ -1643,8 +1641,19 @@ struct LoopVectorize : public FunctionPass {
return false; return false;
} }
// Collect values we want to ignore in the cost model. This includes
// type-promoting instructions we identified during reduction detection.
SmallPtrSet<const Value *, 32> ValuesToIgnore;
CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore);
for (auto &Reduction : *LVL.getReductionVars()) {
RecurrenceDescriptor &RedDes = Reduction.second;
SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
ValuesToIgnore.insert(Casts.begin(), Casts.end());
}
// Use the cost model. // Use the cost model.
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints); LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints,
ValuesToIgnore);
// Check the function attributes to find out if this function should be // Check the function attributes to find out if this function should be
// optimized for size. // optimized for size.
@ -3234,12 +3243,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
// instructions. // instructions.
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt()); Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
VectorParts RdxParts; VectorParts RdxParts, &RdxExitVal = getVectorValue(LoopExitInst);
setDebugLocFromInst(Builder, LoopExitInst); setDebugLocFromInst(Builder, LoopExitInst);
for (unsigned part = 0; part < UF; ++part) { for (unsigned part = 0; part < UF; ++part) {
// This PHINode contains the vectorized reduction variable, or // This PHINode contains the vectorized reduction variable, or
// the initial value vector, if we bypass the vector loop. // the initial value vector, if we bypass the vector loop.
VectorParts &RdxExitVal = getVectorValue(LoopExitInst);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity; Value *StartVal = (part == 0) ? VectorStart : Identity;
for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
@ -3249,6 +3257,28 @@ void InnerLoopVectorizer::vectorizeLoop() {
RdxParts.push_back(NewPhi); RdxParts.push_back(NewPhi);
} }
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
if (VF > 1 && RdxPhi->getType() != RdxDesc.getRecurrenceType()) {
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(LoopVectorBody.back()->getTerminator());
for (unsigned part = 0; part < UF; ++part) {
Value *Trunc = Builder.CreateTrunc(RdxExitVal[part], RdxVecTy);
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
: Builder.CreateZExt(Trunc, VecTy);
for (Value::user_iterator UI = RdxExitVal[part]->user_begin();
UI != RdxExitVal[part]->user_end();)
if (*UI != Trunc)
(*UI++)->replaceUsesOfWith(RdxExitVal[part], Extnd);
else
++UI;
}
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
for (unsigned part = 0; part < UF; ++part)
RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
}
// Reduce all of the unrolled parts into a single vector. // Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0]; Value *ReducedPartRdx = RdxParts[0];
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK); unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
@ -3299,6 +3329,14 @@ void InnerLoopVectorizer::vectorizeLoop() {
// The result is in the first element of the vector. // The result is in the first element of the vector.
ReducedPartRdx = Builder.CreateExtractElement(TmpVec, ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
Builder.getInt32(0)); Builder.getInt32(0));
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (RdxPhi->getType() != RdxDesc.getRecurrenceType())
ReducedPartRdx =
RdxDesc.isSigned()
? Builder.CreateSExt(ReducedPartRdx, RdxPhi->getType())
: Builder.CreateZExt(ReducedPartRdx, RdxPhi->getType());
} }
// Create a phi node that merges control-flow from the backedge-taken check // Create a phi node that merges control-flow from the backedge-taken check
@ -4652,18 +4690,22 @@ unsigned LoopVectorizationCostModel::getWidestType() {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Type *T = it->getType(); Type *T = it->getType();
// Ignore ephemeral values. // Skip ignored values.
if (EphValues.count(it)) if (ValuesToIgnore.count(it))
continue; continue;
// Only examine Loads, Stores and PHINodes. // Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it)) if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue; continue;
// Examine PHI nodes that are reduction variables. // Examine PHI nodes that are reduction variables. Update the type to
if (PHINode *PN = dyn_cast<PHINode>(it)) // account for the recurrence type.
if (PHINode *PN = dyn_cast<PHINode>(it)) {
if (!Legal->getReductionVars()->count(PN)) if (!Legal->getReductionVars()->count(PN))
continue; continue;
RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN];
T = RdxDesc.getRecurrenceType();
}
// Examine the stored values. // Examine the stored values.
if (StoreInst *ST = dyn_cast<StoreInst>(it)) if (StoreInst *ST = dyn_cast<StoreInst>(it))
@ -4924,8 +4966,8 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
// Ignore instructions that are never used within the loop. // Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue; if (!Ends.count(I)) continue;
// Ignore ephemeral values. // Skip ignored values.
if (EphValues.count(I)) if (ValuesToIgnore.count(I))
continue; continue;
// Remove all of the instructions that end at this location. // Remove all of the instructions that end at this location.
@ -4968,8 +5010,8 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
if (isa<DbgInfoIntrinsic>(it)) if (isa<DbgInfoIntrinsic>(it))
continue; continue;
// Ignore ephemeral values. // Skip ignored values.
if (EphValues.count(it)) if (ValuesToIgnore.count(it))
continue; continue;
unsigned C = getInstructionCost(it, VF); unsigned C = getInstructionCost(it, VF);

View File

@ -0,0 +1,128 @@
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
; CHECK-LABEL: @reduction_i8
;
; char reduction_i8(char *a, char *b, int n) {
; char sum = 0;
; for (int i = 0; i < n; ++i)
; sum += (a[i] + b[i]);
; return sum;
; }
;
; CHECK: vector.body:
; CHECK: phi <16 x i8>
; CHECK: load <16 x i8>
; CHECK: load <16 x i8>
; CHECK: add <16 x i8>
; CHECK: add <16 x i8>
;
; CHECK: middle.block:
; CHECK: shufflevector <16 x i8>
; CHECK: add <16 x i8>
; CHECK: shufflevector <16 x i8>
; CHECK: add <16 x i8>
; CHECK: shufflevector <16 x i8>
; CHECK: add <16 x i8>
; CHECK: shufflevector <16 x i8>
; CHECK: add <16 x i8>
; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8>
; CHECK: zext i8 [[Rdx]] to i32
;
define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
entry:
%cmp.12 = icmp sgt i32 %n, 0
br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader:
br label %for.body
for.cond.for.cond.cleanup_crit_edge:
%add5.lcssa = phi i32 [ %add5, %for.body ]
%conv6 = trunc i32 %add5.lcssa to i8
br label %for.cond.cleanup
for.cond.cleanup:
%sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
ret i8 %sum.0.lcssa
for.body:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
%1 = load i8, i8* %arrayidx2, align 1
%conv3 = zext i8 %1 to i32
%conv4 = and i32 %sum.013, 255
%add = add nuw nsw i32 %conv, %conv4
%add5 = add nuw nsw i32 %add, %conv3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
}
; CHECK-LABEL: @reduction_i16
;
; short reduction_i16(short *a, short *b, int n) {
; short sum = 0;
; for (int i = 0; i < n; ++i)
; sum += (a[i] + b[i]);
; return sum;
; }
;
; CHECK: vector.body:
; CHECK: phi <8 x i16>
; CHECK: load <8 x i16>
; CHECK: load <8 x i16>
; CHECK: add <8 x i16>
; CHECK: add <8 x i16>
;
; CHECK: middle.block:
; CHECK: shufflevector <8 x i16>
; CHECK: add <8 x i16>
; CHECK: shufflevector <8 x i16>
; CHECK: add <8 x i16>
; CHECK: shufflevector <8 x i16>
; CHECK: add <8 x i16>
; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
; CHECK: zext i16 [[Rdx]] to i32
;
define i16 @reduction_i16(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
entry:
%cmp.16 = icmp sgt i32 %n, 0
br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader:
br label %for.body
for.cond.for.cond.cleanup_crit_edge:
%add5.lcssa = phi i32 [ %add5, %for.body ]
%conv6 = trunc i32 %add5.lcssa to i16
br label %for.cond.cleanup
for.cond.cleanup:
%sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
ret i16 %sum.0.lcssa
for.body:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
%conv.14 = zext i16 %0 to i32
%arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv
%1 = load i16, i16* %arrayidx2, align 2
%conv3.15 = zext i16 %1 to i32
%conv4.13 = and i32 %sum.017, 65535
%add = add nuw nsw i32 %conv.14, %conv4.13
%add5 = add nuw nsw i32 %add, %conv3.15
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
}