mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-20 13:04:00 +00:00
SLPVectorizer: vectorize compares and selects.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184282 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
69466952ed
commit
eb30e5115e
@ -384,6 +384,9 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||||||
case Instruction::Trunc:
|
case Instruction::Trunc:
|
||||||
case Instruction::FPTrunc:
|
case Instruction::FPTrunc:
|
||||||
case Instruction::BitCast:
|
case Instruction::BitCast:
|
||||||
|
case Instruction::Select:
|
||||||
|
case Instruction::ICmp:
|
||||||
|
case Instruction::FCmp:
|
||||||
case Instruction::Add:
|
case Instruction::Add:
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
case Instruction::Sub:
|
case Instruction::Sub:
|
||||||
@ -541,6 +544,18 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||||||
Cost += (VecCost - ScalarCost);
|
Cost += (VecCost - ScalarCost);
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
case Instruction::FCmp:
|
||||||
|
case Instruction::ICmp: {
|
||||||
|
// Check that all of the compares have the same predicate.
|
||||||
|
CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
|
||||||
|
for (unsigned i = 1, e = VL.size(); i < e; ++i) {
|
||||||
|
CmpInst *Cmp = cast<CmpInst>(VL[i]);
|
||||||
|
if (Cmp->getPredicate() != P0)
|
||||||
|
return getScalarizationCost(VecTy);
|
||||||
|
}
|
||||||
|
// Fall through.
|
||||||
|
}
|
||||||
|
case Instruction::Select:
|
||||||
case Instruction::Add:
|
case Instruction::Add:
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
case Instruction::Sub:
|
case Instruction::Sub:
|
||||||
@ -572,10 +587,19 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the cost of this instruction.
|
// Calculate the cost of this instruction.
|
||||||
int ScalarCost = VecTy->getNumElements() *
|
int ScalarCost = 0;
|
||||||
|
int VecCost = 0;
|
||||||
|
if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
|
||||||
|
Opcode == Instruction::Select) {
|
||||||
|
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
|
||||||
|
ScalarCost = VecTy->getNumElements() *
|
||||||
|
TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
|
||||||
|
VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
|
||||||
|
} else {
|
||||||
|
ScalarCost = VecTy->getNumElements() *
|
||||||
TTI->getArithmeticInstrCost(Opcode, ScalarTy);
|
TTI->getArithmeticInstrCost(Opcode, ScalarTy);
|
||||||
|
VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
|
||||||
int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
|
}
|
||||||
Cost += (VecCost - ScalarCost);
|
Cost += (VecCost - ScalarCost);
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
@ -772,6 +796,54 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
|||||||
|
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
case Instruction::FCmp:
|
||||||
|
case Instruction::ICmp: {
|
||||||
|
// Check that all of the compares have the same predicate.
|
||||||
|
CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
|
||||||
|
for (unsigned i = 1, e = VF; i < e; ++i) {
|
||||||
|
CmpInst *Cmp = cast<CmpInst>(VL[i]);
|
||||||
|
if (Cmp->getPredicate() != P0)
|
||||||
|
return Scalarize(VL, VecTy);
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueList LHSV, RHSV;
|
||||||
|
for (int i = 0; i < VF; ++i) {
|
||||||
|
LHSV.push_back(cast<Instruction>(VL[i])->getOperand(0));
|
||||||
|
RHSV.push_back(cast<Instruction>(VL[i])->getOperand(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *L = vectorizeTree_rec(LHSV, VF);
|
||||||
|
Value *R = vectorizeTree_rec(RHSV, VF);
|
||||||
|
Value *V;
|
||||||
|
if (VL0->getOpcode() == Instruction::FCmp)
|
||||||
|
V = Builder.CreateFCmp(P0, L, R);
|
||||||
|
else
|
||||||
|
V = Builder.CreateICmp(P0, L, R);
|
||||||
|
|
||||||
|
for (int i = 0; i < VF; ++i)
|
||||||
|
VectorizedValues[VL[i]] = V;
|
||||||
|
|
||||||
|
return V;
|
||||||
|
|
||||||
|
}
|
||||||
|
case Instruction::Select: {
|
||||||
|
ValueList TrueVec, FalseVec, CondVec;
|
||||||
|
for (int i = 0; i < VF; ++i) {
|
||||||
|
CondVec.push_back(cast<Instruction>(VL[i])->getOperand(0));
|
||||||
|
TrueVec.push_back(cast<Instruction>(VL[i])->getOperand(1));
|
||||||
|
FalseVec.push_back(cast<Instruction>(VL[i])->getOperand(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *True = vectorizeTree_rec(TrueVec, VF);
|
||||||
|
Value *False = vectorizeTree_rec(FalseVec, VF);
|
||||||
|
Value *Cond = vectorizeTree_rec(CondVec, VF);
|
||||||
|
Value *V = Builder.CreateSelect(Cond, True, False);
|
||||||
|
|
||||||
|
for (int i = 0; i < VF; ++i)
|
||||||
|
VectorizedValues[VL[i]] = V;
|
||||||
|
|
||||||
|
return V;
|
||||||
|
}
|
||||||
case Instruction::Add:
|
case Instruction::Add:
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
case Instruction::Sub:
|
case Instruction::Sub:
|
||||||
|
32
test/Transforms/SLPVectorizer/X86/cmp_sel.ll
Normal file
32
test/Transforms/SLPVectorizer/X86/cmp_sel.ll
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.8.0"
|
||||||
|
|
||||||
|
; int foo(double * restrict A, double * restrict B, double G) {
|
||||||
|
; A[0] = (B[10] ? G : 1);
|
||||||
|
; A[1] = (B[11] ? G : 1);
|
||||||
|
; }
|
||||||
|
|
||||||
|
;CHECK: @foo
|
||||||
|
;CHECK: load <2 x double>
|
||||||
|
;CHECK: fcmp une <2 x double>
|
||||||
|
;CHECK: select <2 x i1>
|
||||||
|
;CHECK: store <2 x double>
|
||||||
|
;CHECK: ret i32 undef
|
||||||
|
define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) {
|
||||||
|
entry:
|
||||||
|
%arrayidx = getelementptr inbounds double* %B, i64 10
|
||||||
|
%0 = load double* %arrayidx, align 8
|
||||||
|
%tobool = fcmp une double %0, 0.000000e+00
|
||||||
|
%cond = select i1 %tobool, double %G, double 1.000000e+00
|
||||||
|
store double %cond, double* %A, align 8
|
||||||
|
%arrayidx2 = getelementptr inbounds double* %B, i64 11
|
||||||
|
%1 = load double* %arrayidx2, align 8
|
||||||
|
%tobool3 = fcmp une double %1, 0.000000e+00
|
||||||
|
%cond7 = select i1 %tobool3, double %G, double 1.000000e+00
|
||||||
|
%arrayidx8 = getelementptr inbounds double* %A, i64 1
|
||||||
|
store double %cond7, double* %arrayidx8, align 8
|
||||||
|
ret i32 undef
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user