mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-10 05:41:40 +00:00
Generate extract for in-tree uses if the use is scalar operand in vectorized instruction. radar://18144665
llvm-svn: 216946
This commit is contained in:
parent
233c9d8109
commit
94e9de2c27
@ -342,6 +342,33 @@ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
|
||||
}
|
||||
}
|
||||
|
||||
/// \returns True if in-tree use also needs extract. This refers to
|
||||
/// possible scalar operand in vectorized instruction.
|
||||
static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
|
||||
TargetLibraryInfo *TLI) {
|
||||
|
||||
unsigned Opcode = UserInst->getOpcode();
|
||||
switch (Opcode) {
|
||||
case Instruction::Load: {
|
||||
LoadInst *LI = cast<LoadInst>(UserInst);
|
||||
return (LI->getPointerOperand() == Scalar);
|
||||
}
|
||||
case Instruction::Store: {
|
||||
StoreInst *SI = cast<StoreInst>(UserInst);
|
||||
return (SI->getPointerOperand() == Scalar);
|
||||
}
|
||||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(UserInst);
|
||||
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
|
||||
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
|
||||
return (CI->getArgOperand(1) == Scalar);
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Bottom Up SLP Vectorizer.
|
||||
class BoUpSLP {
|
||||
public:
|
||||
@ -864,18 +891,27 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||
for (User *U : Scalar->users()) {
|
||||
DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
|
||||
|
||||
// Skip in-tree scalars that become vectors.
|
||||
if (ScalarToTreeEntry.count(U)) {
|
||||
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
|
||||
*U << ".\n");
|
||||
int Idx = ScalarToTreeEntry[U]; (void) Idx;
|
||||
assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
|
||||
continue;
|
||||
}
|
||||
Instruction *UserInst = dyn_cast<Instruction>(U);
|
||||
if (!UserInst)
|
||||
continue;
|
||||
|
||||
// Skip in-tree scalars that become vectors
|
||||
if (ScalarToTreeEntry.count(U)) {
|
||||
int Idx = ScalarToTreeEntry[U];
|
||||
TreeEntry *UseEntry = &VectorizableTree[Idx];
|
||||
Value *UseScalar = UseEntry->Scalars[0];
|
||||
// Some in-tree scalars will remain as scalar in vectorized
|
||||
// instructions. If that is the case, the one in Lane 0 will
|
||||
// be used.
|
||||
if (UseScalar != U ||
|
||||
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
|
||||
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
|
||||
<< ".\n");
|
||||
assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore users in the user ignore list.
|
||||
if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
|
||||
UserIgnoreList.end())
|
||||
@ -1190,16 +1226,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
}
|
||||
}
|
||||
|
||||
// We combine only GEPs with a single use.
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
if (cast<Instruction>(VL[j])->getNumUses() > 1) {
|
||||
DEBUG(dbgs() << "SLP: not-vectorizable GEP (multiple uses).\n");
|
||||
BS.cancelScheduling(VL);
|
||||
newTreeEntry(VL, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We can't combine several GEPs into one vector if they operate on
|
||||
// different types.
|
||||
Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
|
||||
@ -2023,6 +2049,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
|
||||
Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
|
||||
VecTy->getPointerTo(AS));
|
||||
|
||||
// The pointer operand uses an in-tree scalar so we add the new BitCast to
|
||||
// ExternalUses list to make sure that an extract will be generated in the
|
||||
// future.
|
||||
if (ScalarToTreeEntry.count(LI->getPointerOperand()))
|
||||
ExternalUses.push_back(
|
||||
ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0));
|
||||
|
||||
unsigned Alignment = LI->getAlignment();
|
||||
LI = Builder.CreateLoad(VecPtr);
|
||||
if (!Alignment)
|
||||
@ -2047,6 +2081,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
|
||||
VecTy->getPointerTo(AS));
|
||||
StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
|
||||
|
||||
// The pointer operand uses an in-tree scalar so we add the new BitCast to
|
||||
// ExternalUses list to make sure that an extract will be generated in the
|
||||
// future.
|
||||
if (ScalarToTreeEntry.count(SI->getPointerOperand()))
|
||||
ExternalUses.push_back(
|
||||
ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
|
||||
|
||||
if (!Alignment)
|
||||
Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
|
||||
S->setAlignment(Alignment);
|
||||
@ -2088,6 +2130,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
setInsertPointAfterBundle(E->Scalars);
|
||||
Function *FI;
|
||||
Intrinsic::ID IID = Intrinsic::not_intrinsic;
|
||||
Value *ScalarArg = nullptr;
|
||||
if (CI && (FI = CI->getCalledFunction())) {
|
||||
IID = (Intrinsic::ID) FI->getIntrinsicID();
|
||||
}
|
||||
@ -2098,6 +2141,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
// a scalar. This argument should not be vectorized.
|
||||
if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
|
||||
CallInst *CEI = cast<CallInst>(E->Scalars[0]);
|
||||
ScalarArg = CEI->getArgOperand(j);
|
||||
OpVecs.push_back(CEI->getArgOperand(j));
|
||||
continue;
|
||||
}
|
||||
@ -2116,6 +2160,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
|
||||
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
|
||||
Value *V = Builder.CreateCall(CF, OpVecs);
|
||||
|
||||
// The scalar argument uses an in-tree scalar so we add the new vectorized
|
||||
// call to ExternalUses list to make sure that an extract will be
|
||||
// generated in the future.
|
||||
if (ScalarArg && ScalarToTreeEntry.count(ScalarArg))
|
||||
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
|
||||
|
||||
E->VectorizedValue = V;
|
||||
++NumVectorInstructions;
|
||||
return V;
|
||||
|
70
test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
Normal file
70
test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.9.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@a = common global i64* null, align 8
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define i32 @fn1() {
|
||||
entry:
|
||||
%0 = load i64** @a, align 8
|
||||
%add.ptr = getelementptr inbounds i64* %0, i64 11
|
||||
%1 = ptrtoint i64* %add.ptr to i64
|
||||
store i64 %1, i64* %add.ptr, align 8
|
||||
%add.ptr1 = getelementptr inbounds i64* %0, i64 56
|
||||
%2 = ptrtoint i64* %add.ptr1 to i64
|
||||
%arrayidx2 = getelementptr inbounds i64* %0, i64 12
|
||||
store i64 %2, i64* %arrayidx2, align 8
|
||||
ret i32 undef
|
||||
; CHECK-LABEL: @fn1(
|
||||
; CHECK: extractelement <2 x i64*>
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
|
||||
declare float @llvm.powi.f32(float, i32)
|
||||
define void @fn2(i32* %a, i32* %b, float* %c) {
|
||||
entry:
|
||||
%i0 = load i32* %a, align 4
|
||||
%i1 = load i32* %b, align 4
|
||||
%add1 = add i32 %i0, %i1
|
||||
%fp1 = sitofp i32 %add1 to float
|
||||
%call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
|
||||
%i2 = load i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
|
||||
%i3 = load i32* %arrayidx3, align 4
|
||||
%add2 = add i32 %i2, %i3
|
||||
%fp2 = sitofp i32 %add2 to float
|
||||
%call2 = tail call float @llvm.powi.f32(float %fp2,i32 %add1) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
|
||||
%i4 = load i32* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
|
||||
%i5 = load i32* %arrayidx5, align 4
|
||||
%add3 = add i32 %i4, %i5
|
||||
%fp3 = sitofp i32 %add3 to float
|
||||
%call3 = tail call float @llvm.powi.f32(float %fp3,i32 %add1) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
|
||||
%i6 = load i32* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
|
||||
%i7 = load i32* %arrayidx7, align 4
|
||||
%add4 = add i32 %i6, %i7
|
||||
%fp4 = sitofp i32 %add4 to float
|
||||
%call4 = tail call float @llvm.powi.f32(float %fp4,i32 %add1) nounwind readnone
|
||||
|
||||
store float %call1, float* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds float* %c, i32 1
|
||||
store float %call2, float* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds float* %c, i32 2
|
||||
store float %call3, float* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds float* %c, i32 3
|
||||
store float %call4, float* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @fn2(
|
||||
; CHECK: extractelement <4 x i32>
|
||||
; CHECK: ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user