From 66e793d0deff39e64ed89ecddbf8961c9c7073f6 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Fri, 2 Dec 2016 16:56:26 +0000 Subject: [PATCH] Revert "[SLP] Fix for PR6246: vectorization for scalar ops on vector elements." This reverts commit r288497, as it broke the AArch64 build of Compiler-RT's builtins (twice: once in r288412 and once in r288497). We should investigate this offline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288508 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 132 ++- test/Transforms/SLPVectorizer/X86/arith-fp.ll | 964 ++++++++++++------ .../X86/insert-element-build-vector.ll | 176 ++-- 3 files changed, 836 insertions(+), 436 deletions(-) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index c3409897862..d1b569d4cd3 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3870,11 +3870,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, unsigned Opcode0 = I0->getOpcode(); + // FIXME: Register size should be a parameter to this function, so we can + // try different vectorization factors. unsigned Sz = R.getVectorElementSize(I0); - unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz); - unsigned MaxVF = std::max(PowerOf2Floor(VL.size()), MinVF); - if (MaxVF < 2) - return false; + unsigned VF = R.getMinVecRegSize() / Sz; for (Value *V : VL) { Type *Ty = V->getType(); @@ -3890,83 +3889,76 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, // Keep track of values that were deleted by vectorizing in the loop below. SmallVector TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); - for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; - VF /= 2) { - for (unsigned I = NextInst; I < MaxInst; ++I) { - unsigned OpsWidth = 0; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + unsigned OpsWidth = 0; - if (I + VF > MaxInst) - OpsWidth = MaxInst - I; - else - OpsWidth = VF; + if (i + VF > e) + OpsWidth = e - i; + else + OpsWidth = VF; - if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) - break; + if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) + break; - // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) - continue; + // Check that a previous iteration of this loop did not delete the Value. + if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth)) + continue; - DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " - << "\n"); - ArrayRef Ops = VL.slice(I, OpsWidth); + DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " + << "\n"); + ArrayRef Ops = VL.slice(i, OpsWidth); - ArrayRef BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(I, OpsWidth); + ArrayRef BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(i, OpsWidth); - R.buildTree(Ops, BuildVectorSlice); - // TODO: check if we can allow reordering for more cases. - if (AllowReorder && R.shouldReorder()) { - // Conceptually, there is nothing actually preventing us from trying to - // reorder a larger list. In fact, we do exactly this when vectorizing - // reductions. However, at this point, we only expect to get here from - // tryToVectorizePair(). - assert(Ops.size() == 2); - assert(BuildVectorSlice.empty()); - Value *ReorderedOps[] = {Ops[1], Ops[0]}; - R.buildTree(ReorderedOps, None); - } - if (R.isTreeTinyAndNotFullyVectorizable()) - continue; + R.buildTree(Ops, BuildVectorSlice); + // TODO: check if we can allow reordering for more cases. + if (AllowReorder && R.shouldReorder()) { + // Conceptually, there is nothing actually preventing us from trying to + // reorder a larger list. In fact, we do exactly this when vectorizing + // reductions. However, at this point, we only expect to get here from + // tryToVectorizePair(). + assert(Ops.size() == 2); + assert(BuildVectorSlice.empty()); + Value *ReorderedOps[] = { Ops[1], Ops[0] }; + R.buildTree(ReorderedOps, None); + } + if (R.isTreeTinyAndNotFullyVectorizable()) + continue; - R.computeMinimumValueSizes(); - int Cost = R.getTreeCost(); + R.computeMinimumValueSizes(); + int Cost = R.getTreeCost(); - if (Cost < -SLPCostThreshold) { - DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - Value *VectorizedRoot = R.vectorizeTree(); + if (Cost < -SLPCostThreshold) { + DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); + Value *VectorizedRoot = R.vectorizeTree(); - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are - // undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - // The insert point is the last build vector instruction. The - // vectorized root will precede it. This guarantees that we get an - // instruction. The vectorized tree could have been constant folded. - Instruction *InsertAfter = cast(BuildVectorSlice.back()); - unsigned VecIdx = 0; - for (auto &V : BuildVectorSlice) { - IRBuilder Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); - Instruction *I = cast(V); - assert(isa(I) || isa(I)); - Instruction *Extract = - cast(Builder.CreateExtractElement( - VectorizedRoot, Builder.getInt32(VecIdx++))); - I->setOperand(1, Extract); - I->removeFromParent(); - I->insertAfter(Extract); - InsertAfter = I; - } + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The vectorized + // root will precede it. This guarantees that we get an instruction. The + // vectorized tree could have been constant folded. + Instruction *InsertAfter = cast(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder Builder(InsertAfter->getParent(), + ++BasicBlock::iterator(InsertAfter)); + Instruction *I = cast(V); + assert(isa(I) || isa(I)); + Instruction *Extract = cast(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + I->setOperand(1, Extract); + I->removeFromParent(); + I->insertAfter(Extract); + InsertAfter = I; } - // Move to the next bundle. - I += VF - 1; - NextInst = I + 1; - Changed = true; } + // Move to the next bundle. + i += VF - 1; + Changed = true; } } diff --git a/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/test/Transforms/SLPVectorizer/X86/arith-fp.ll index 7eec13e535d..cdbba35f45d 100644 --- a/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -222,15 +222,22 @@ define <4 x float> @buildvector_div_4f32(<4 x float> %a, <4 x float> %b) { define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @buildvector_add_4f64( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x double> %a, i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> %b, i32 3 +; CHECK-NEXT: [[C0:%.*]] = fadd double [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fadd double [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fadd double [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fadd double [[A3]], [[B3]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 @@ -254,15 +261,22 @@ define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) { define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @buildvector_sub_4f64( -; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x double> %a, i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> %b, i32 3 +; CHECK-NEXT: [[C0:%.*]] = fsub double [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fsub double [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fsub double [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fsub double [[A3]], [[B3]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 @@ -286,15 +300,22 @@ define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) { define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @buildvector_mul_4f64( -; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x double> %a, i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> %b, i32 3 +; CHECK-NEXT: [[C0:%.*]] = fmul double [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fmul double [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fmul double [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fmul double [[A3]], [[B3]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 @@ -318,15 +339,32 @@ define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) { define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @buildvector_div_4f64( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x double> %a, i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> %b, i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[A2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 +; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP13]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 +; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP14]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 @@ -350,23 +388,38 @@ define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) { define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @buildvector_add_8f32( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x float> %b, i32 7 +; CHECK-NEXT: [[C0:%.*]] = fadd float [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fadd float [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fadd float [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fadd float [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fadd float [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fadd float [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fadd float [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fadd float [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 @@ -406,23 +459,38 @@ define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) { define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @buildvector_sub_8f32( -; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x float> %b, i32 7 +; CHECK-NEXT: [[C0:%.*]] = fsub float [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fsub float [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fsub float [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fsub float [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fsub float [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fsub float [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fsub float [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fsub float [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 @@ -462,23 +530,38 @@ define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) { define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @buildvector_mul_8f32( -; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x float> %b, i32 7 +; CHECK-NEXT: [[C0:%.*]] = fmul float [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fmul float [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fmul float [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fmul float [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fmul float [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fmul float [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fmul float [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fmul float [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 @@ -518,23 +601,56 @@ define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) { define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @buildvector_div_8f32( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x float> %b, i32 7 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float [[B0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[B1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[B2]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[B3]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = fdiv <4 x float> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[A4]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[A5]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[A6]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[A7]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> undef, float [[B4]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[B5]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[B6]], i32 2 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float [[B7]], i32 3 +; CHECK-NEXT: [[TMP18:%.*]] = fdiv <4 x float> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP19]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP20]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP21]], i32 2 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP22]], i32 3 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP23]], i32 4 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP24]], i32 5 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP25]], i32 6 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP26]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 @@ -578,23 +694,38 @@ define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) { define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) { ; CHECK-LABEL: @buildvector_add_8f64( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x double> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x double> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x double> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x double> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x double> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x double> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x double> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x double> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x double> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x double> %b, i32 7 +; CHECK-NEXT: [[C0:%.*]] = fadd double [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fadd double [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fadd double [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fadd double [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fadd double [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fadd double [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fadd double [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fadd double [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 @@ -634,23 +765,38 @@ define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) { define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) { ; CHECK-LABEL: @buildvector_sub_8f64( -; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x double> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x double> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x double> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x double> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x double> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x double> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x double> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x double> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x double> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x double> %b, i32 7 +; CHECK-NEXT: [[C0:%.*]] = fsub double [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fsub double [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fsub double [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fsub double [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fsub double [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fsub double [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fsub double [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fsub double [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 @@ -690,23 +836,38 @@ define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) { define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) { ; CHECK-LABEL: @buildvector_mul_8f64( -; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x double> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x double> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x double> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x double> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x double> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x double> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x double> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x double> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x double> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x double> %b, i32 7 +; CHECK-NEXT: [[C0:%.*]] = fmul double [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fmul double [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fmul double [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fmul double [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fmul double [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fmul double [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fmul double [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fmul double [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 @@ -746,23 +907,58 @@ define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) { define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; CHECK-LABEL: @buildvector_div_8f64( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x double> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x double> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x double> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x double> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x double> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x double> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x double> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x double> %a, i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x double> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x double> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x double> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x double> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x double> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x double> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x double> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x double> %b, i32 7 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[A2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> undef, double [[A4]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> undef, double [[B4]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> undef, double [[A6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> undef, double [[B6]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP22]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP15]], i32 0 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP25]], i32 4 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP15]], i32 1 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP26]], i32 5 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[TMP20]], i32 0 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP27]], i32 6 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[TMP20]], i32 1 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP28]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 @@ -802,39 +998,70 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) { ; CHECK-LABEL: @buildvector_add_16f32( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8 -; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9 -; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10 -; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11 -; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12 -; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13 -; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14 -; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 -; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 +; CHECK-NEXT: [[A0:%.*]] = extractelement <16 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <16 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <16 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <16 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <16 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <16 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <16 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <16 x float> %a, i32 7 +; CHECK-NEXT: [[A8:%.*]] = extractelement <16 x float> %a, i32 8 +; CHECK-NEXT: [[A9:%.*]] = extractelement <16 x float> %a, i32 9 +; CHECK-NEXT: [[A10:%.*]] = extractelement <16 x float> %a, i32 10 +; CHECK-NEXT: [[A11:%.*]] = extractelement <16 x float> %a, i32 11 +; CHECK-NEXT: [[A12:%.*]] = extractelement <16 x float> %a, i32 12 +; CHECK-NEXT: [[A13:%.*]] = extractelement <16 x float> %a, i32 13 +; CHECK-NEXT: [[A14:%.*]] = extractelement <16 x float> %a, i32 14 +; CHECK-NEXT: [[A15:%.*]] = extractelement <16 x float> %a, i32 15 +; CHECK-NEXT: [[B0:%.*]] = extractelement <16 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <16 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <16 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <16 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <16 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <16 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <16 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <16 x float> %b, i32 7 +; CHECK-NEXT: [[B8:%.*]] = extractelement <16 x float> %b, i32 8 +; CHECK-NEXT: [[B9:%.*]] = extractelement <16 x float> %b, i32 9 +; CHECK-NEXT: [[B10:%.*]] = extractelement <16 x float> %b, i32 10 +; CHECK-NEXT: [[B11:%.*]] = extractelement <16 x float> %b, i32 11 +; CHECK-NEXT: [[B12:%.*]] = extractelement <16 x float> %b, i32 12 +; CHECK-NEXT: [[B13:%.*]] = extractelement <16 x float> %b, i32 13 +; CHECK-NEXT: [[B14:%.*]] = extractelement <16 x float> %b, i32 14 +; CHECK-NEXT: [[B15:%.*]] = extractelement <16 x float> %b, i32 15 +; CHECK-NEXT: [[C0:%.*]] = fadd float [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fadd float [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fadd float [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fadd float [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fadd float [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fadd float [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fadd float [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fadd float [[A7]], [[B7]] +; CHECK-NEXT: [[C8:%.*]] = fadd float [[A8]], [[B8]] +; CHECK-NEXT: [[C9:%.*]] = fadd float [[A9]], [[B9]] +; CHECK-NEXT: [[C10:%.*]] = fadd float [[A10]], [[B10]] +; CHECK-NEXT: [[C11:%.*]] = fadd float [[A11]], [[B11]] +; CHECK-NEXT: [[C12:%.*]] = fadd float [[A12]], [[B12]] +; CHECK-NEXT: [[C13:%.*]] = fadd float [[A13]], [[B13]] +; CHECK-NEXT: [[C14:%.*]] = fadd float [[A14]], [[B14]] +; CHECK-NEXT: [[C15:%.*]] = fadd float [[A15]], [[B15]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 @@ -906,39 +1133,70 @@ define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) { define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) { ; CHECK-LABEL: @buildvector_sub_16f32( -; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8 -; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9 -; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10 -; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11 -; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12 -; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13 -; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14 -; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 -; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 +; CHECK-NEXT: [[A0:%.*]] = extractelement <16 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <16 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <16 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <16 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <16 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <16 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <16 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <16 x float> %a, i32 7 +; CHECK-NEXT: [[A8:%.*]] = extractelement <16 x float> %a, i32 8 +; CHECK-NEXT: [[A9:%.*]] = extractelement <16 x float> %a, i32 9 +; CHECK-NEXT: [[A10:%.*]] = extractelement <16 x float> %a, i32 10 +; CHECK-NEXT: [[A11:%.*]] = extractelement <16 x float> %a, i32 11 +; CHECK-NEXT: [[A12:%.*]] = extractelement <16 x float> %a, i32 12 +; CHECK-NEXT: [[A13:%.*]] = extractelement <16 x float> %a, i32 13 +; CHECK-NEXT: [[A14:%.*]] = extractelement <16 x float> %a, i32 14 +; CHECK-NEXT: [[A15:%.*]] = extractelement <16 x float> %a, i32 15 +; CHECK-NEXT: [[B0:%.*]] = extractelement <16 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <16 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <16 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <16 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <16 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <16 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <16 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <16 x float> %b, i32 7 +; CHECK-NEXT: [[B8:%.*]] = extractelement <16 x float> %b, i32 8 +; CHECK-NEXT: [[B9:%.*]] = extractelement <16 x float> %b, i32 9 +; CHECK-NEXT: [[B10:%.*]] = extractelement <16 x float> %b, i32 10 +; CHECK-NEXT: [[B11:%.*]] = extractelement <16 x float> %b, i32 11 +; CHECK-NEXT: [[B12:%.*]] = extractelement <16 x float> %b, i32 12 +; CHECK-NEXT: [[B13:%.*]] = extractelement <16 x float> %b, i32 13 +; CHECK-NEXT: [[B14:%.*]] = extractelement <16 x float> %b, i32 14 +; CHECK-NEXT: [[B15:%.*]] = extractelement <16 x float> %b, i32 15 +; CHECK-NEXT: [[C0:%.*]] = fsub float [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fsub float [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fsub float [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fsub float [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fsub float [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fsub float [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fsub float [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fsub float [[A7]], [[B7]] +; CHECK-NEXT: [[C8:%.*]] = fsub float [[A8]], [[B8]] +; CHECK-NEXT: [[C9:%.*]] = fsub float [[A9]], [[B9]] +; CHECK-NEXT: [[C10:%.*]] = fsub float [[A10]], [[B10]] +; CHECK-NEXT: [[C11:%.*]] = fsub float [[A11]], [[B11]] +; CHECK-NEXT: [[C12:%.*]] = fsub float [[A12]], [[B12]] +; CHECK-NEXT: [[C13:%.*]] = fsub float [[A13]], [[B13]] +; CHECK-NEXT: [[C14:%.*]] = fsub float [[A14]], [[B14]] +; CHECK-NEXT: [[C15:%.*]] = fsub float [[A15]], [[B15]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 @@ -1010,39 +1268,70 @@ define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) { define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) { ; CHECK-LABEL: @buildvector_mul_16f32( -; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8 -; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9 -; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10 -; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11 -; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12 -; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13 -; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14 -; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 -; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 +; CHECK-NEXT: [[A0:%.*]] = extractelement <16 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <16 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <16 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <16 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <16 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <16 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <16 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <16 x float> %a, i32 7 +; CHECK-NEXT: [[A8:%.*]] = extractelement <16 x float> %a, i32 8 +; CHECK-NEXT: [[A9:%.*]] = extractelement <16 x float> %a, i32 9 +; CHECK-NEXT: [[A10:%.*]] = extractelement <16 x float> %a, i32 10 +; CHECK-NEXT: [[A11:%.*]] = extractelement <16 x float> %a, i32 11 +; CHECK-NEXT: [[A12:%.*]] = extractelement <16 x float> %a, i32 12 +; CHECK-NEXT: [[A13:%.*]] = extractelement <16 x float> %a, i32 13 +; CHECK-NEXT: [[A14:%.*]] = extractelement <16 x float> %a, i32 14 +; CHECK-NEXT: [[A15:%.*]] = extractelement <16 x float> %a, i32 15 +; CHECK-NEXT: [[B0:%.*]] = extractelement <16 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <16 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <16 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <16 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <16 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <16 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <16 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <16 x float> %b, i32 7 +; CHECK-NEXT: [[B8:%.*]] = extractelement <16 x float> %b, i32 8 +; CHECK-NEXT: [[B9:%.*]] = extractelement <16 x float> %b, i32 9 +; CHECK-NEXT: [[B10:%.*]] = extractelement <16 x float> %b, i32 10 +; CHECK-NEXT: [[B11:%.*]] = extractelement <16 x float> %b, i32 11 +; CHECK-NEXT: [[B12:%.*]] = extractelement <16 x float> %b, i32 12 +; CHECK-NEXT: [[B13:%.*]] = extractelement <16 x float> %b, i32 13 +; CHECK-NEXT: [[B14:%.*]] = extractelement <16 x float> %b, i32 14 +; CHECK-NEXT: [[B15:%.*]] = extractelement <16 x float> %b, i32 15 +; CHECK-NEXT: [[C0:%.*]] = fmul float [[A0]], [[B0]] +; CHECK-NEXT: [[C1:%.*]] = fmul float [[A1]], [[B1]] +; CHECK-NEXT: [[C2:%.*]] = fmul float [[A2]], [[B2]] +; CHECK-NEXT: [[C3:%.*]] = fmul float [[A3]], [[B3]] +; CHECK-NEXT: [[C4:%.*]] = fmul float [[A4]], [[B4]] +; CHECK-NEXT: [[C5:%.*]] = fmul float [[A5]], [[B5]] +; CHECK-NEXT: [[C6:%.*]] = fmul float [[A6]], [[B6]] +; CHECK-NEXT: [[C7:%.*]] = fmul float [[A7]], [[B7]] +; CHECK-NEXT: [[C8:%.*]] = fmul float [[A8]], [[B8]] +; CHECK-NEXT: [[C9:%.*]] = fmul float [[A9]], [[B9]] +; CHECK-NEXT: [[C10:%.*]] = fmul float [[A10]], [[B10]] +; CHECK-NEXT: [[C11:%.*]] = fmul float [[A11]], [[B11]] +; CHECK-NEXT: [[C12:%.*]] = fmul float [[A12]], [[B12]] +; CHECK-NEXT: [[C13:%.*]] = fmul float [[A13]], [[B13]] +; CHECK-NEXT: [[C14:%.*]] = fmul float [[A14]], [[B14]] +; CHECK-NEXT: [[C15:%.*]] = fmul float [[A15]], [[B15]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 @@ -1114,39 +1403,106 @@ define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) { define <16 x float> @buildvector_div_16f32(<16 x float> %a, <16 x float> %b) { ; CHECK-LABEL: @buildvector_div_16f32( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8 -; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9 -; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10 -; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11 -; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12 -; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13 -; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14 -; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 -; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 +; CHECK-NEXT: [[A0:%.*]] = extractelement <16 x float> %a, i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <16 x float> %a, i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <16 x float> %a, i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <16 x float> %a, i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <16 x float> %a, i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <16 x float> %a, i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <16 x float> %a, i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <16 x float> %a, i32 7 +; CHECK-NEXT: [[A8:%.*]] = extractelement <16 x float> %a, i32 8 +; CHECK-NEXT: [[A9:%.*]] = extractelement <16 x float> %a, i32 9 +; CHECK-NEXT: [[A10:%.*]] = extractelement <16 x float> %a, i32 10 +; CHECK-NEXT: [[A11:%.*]] = extractelement <16 x float> %a, i32 11 +; CHECK-NEXT: [[A12:%.*]] = extractelement <16 x float> %a, i32 12 +; CHECK-NEXT: [[A13:%.*]] = extractelement <16 x float> %a, i32 13 +; CHECK-NEXT: [[A14:%.*]] = extractelement <16 x float> %a, i32 14 +; CHECK-NEXT: [[A15:%.*]] = extractelement <16 x float> %a, i32 15 +; CHECK-NEXT: [[B0:%.*]] = extractelement <16 x float> %b, i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <16 x float> %b, i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <16 x float> %b, i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <16 x float> %b, i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <16 x float> %b, i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <16 x float> %b, i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <16 x float> %b, i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <16 x float> %b, i32 7 +; CHECK-NEXT: [[B8:%.*]] = extractelement <16 x float> %b, i32 8 +; CHECK-NEXT: [[B9:%.*]] = extractelement <16 x float> %b, i32 9 +; CHECK-NEXT: [[B10:%.*]] = extractelement <16 x float> %b, i32 10 +; CHECK-NEXT: [[B11:%.*]] = extractelement <16 x float> %b, i32 11 +; CHECK-NEXT: [[B12:%.*]] = extractelement <16 x float> %b, i32 12 +; CHECK-NEXT: [[B13:%.*]] = extractelement <16 x float> %b, i32 13 +; CHECK-NEXT: [[B14:%.*]] = extractelement <16 x float> %b, i32 14 +; CHECK-NEXT: [[B15:%.*]] = extractelement <16 x float> %b, i32 15 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float [[B0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[B1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[B2]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[B3]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = fdiv <4 x float> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[A4]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[A5]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[A6]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[A7]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> undef, float [[B4]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[B5]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[B6]], i32 2 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float [[B7]], i32 3 +; CHECK-NEXT: [[TMP18:%.*]] = fdiv <4 x float> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> undef, float [[A8]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[A9]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[A10]], i32 2 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[A11]], i32 3 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> undef, float [[B8]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[B9]], i32 1 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[B10]], i32 2 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[B11]], i32 3 +; CHECK-NEXT: [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> undef, float [[A12]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[A13]], i32 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[A14]], i32 2 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[A15]], i32 3 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> undef, float [[B12]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[B13]], i32 1 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[B14]], i32 2 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[B15]], i32 3 +; CHECK-NEXT: [[TMP36:%.*]] = fdiv <4 x float> [[TMP31]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP37]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 +; CHECK-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP38]], i32 1 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 +; CHECK-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP39]], i32 2 +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 +; CHECK-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP40]], i32 3 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 +; CHECK-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP41]], i32 4 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 +; CHECK-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP42]], i32 5 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 +; CHECK-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP43]], i32 6 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 +; CHECK-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP44]], i32 7 +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x float> [[TMP27]], i32 0 +; CHECK-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP45]], i32 8 +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x float> [[TMP27]], i32 1 +; CHECK-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP46]], i32 9 +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x float> [[TMP27]], i32 2 +; CHECK-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP47]], i32 10 +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x float> [[TMP27]], i32 3 +; CHECK-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP48]], i32 11 +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x float> [[TMP36]], i32 0 +; CHECK-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP49]], i32 12 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x float> [[TMP36]], i32 1 +; CHECK-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP50]], i32 13 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x float> [[TMP36]], i32 2 +; CHECK-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP51]], i32 14 +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x float> [[TMP36]], i32 3 +; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP52]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 9e4f503155e..06587cd6c83 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -616,38 +616,42 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-LABEL: @multi_tree( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3 -; CHECK-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double %w, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double %x, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %y, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double %z, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> , [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 +; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 +; CHECK-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP11]], i32 0 ; CHECK-NEXT: ret <4 x double> [[I4]] ; ; ZEROTHRESH-LABEL: @multi_tree( ; ZEROTHRESH-NEXT: entry: -; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0 -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1 -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2 -; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3 -; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] -; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 -; ZEROTHRESH-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 -; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 -; ZEROTHRESH-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2 -; ZEROTHRESH-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2 -; ZEROTHRESH-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1 -; ZEROTHRESH-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3 -; ZEROTHRESH-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0 +; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double %w, i32 0 +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double %x, i32 1 +; ZEROTHRESH-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %y, i32 0 +; ZEROTHRESH-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double %z, i32 1 +; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; ZEROTHRESH-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP2]] +; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; ZEROTHRESH-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3 +; ZEROTHRESH-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; ZEROTHRESH-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2 +; ZEROTHRESH-NEXT: [[TMP9:%.*]] = fmul <2 x double> , [[TMP5]] +; ZEROTHRESH-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 +; ZEROTHRESH-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP10]], i32 1 +; ZEROTHRESH-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 +; ZEROTHRESH-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP11]], i32 0 ; ZEROTHRESH-NEXT: ret <4 x double> [[I4]] ; entry: @@ -669,44 +673,92 @@ entry: define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 { ; CHECK-LABEL: @_vadd256( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4 -; CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5 -; CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6 -; CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7 -; CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> %a, i32 0 +; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <8 x float> %b, i32 0 +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <8 x float> %a, i32 1 +; CHECK-NEXT: [[VECEXT3:%.*]] = extractelement <8 x float> %b, i32 1 +; CHECK-NEXT: [[VECEXT5:%.*]] = extractelement <8 x float> %a, i32 2 +; CHECK-NEXT: [[VECEXT6:%.*]] = extractelement <8 x float> %b, i32 2 +; CHECK-NEXT: [[VECEXT8:%.*]] = extractelement <8 x float> %a, i32 3 +; CHECK-NEXT: [[VECEXT9:%.*]] = extractelement <8 x float> %b, i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> undef, float [[VECEXT]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[VECEXT2]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[VECEXT5]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[VECEXT8]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> undef, float [[VECEXT1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[VECEXT3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[VECEXT6]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[VECEXT9]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]] +; CHECK-NEXT: [[VECEXT11:%.*]] = extractelement <8 x float> %a, i32 4 +; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <8 x float> %b, i32 4 +; CHECK-NEXT: [[VECEXT14:%.*]] = extractelement <8 x float> %a, i32 5 +; CHECK-NEXT: [[VECEXT15:%.*]] = extractelement <8 x float> %b, i32 5 +; CHECK-NEXT: [[VECEXT17:%.*]] = extractelement <8 x float> %a, i32 6 +; CHECK-NEXT: [[VECEXT18:%.*]] = extractelement <8 x float> %b, i32 6 +; CHECK-NEXT: [[VECEXT20:%.*]] = extractelement <8 x float> %a, i32 7 +; CHECK-NEXT: [[VECEXT21:%.*]] = extractelement <8 x float> %b, i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> undef, float [[VECEXT11]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[VECEXT14]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[VECEXT17]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[VECEXT20]], i32 3 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[VECEXT12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[VECEXT15]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[VECEXT18]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[VECEXT21]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[TMP12]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP8]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP8]], i32 1 +; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP19]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP8]], i32 2 +; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP20]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP8]], i32 3 +; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP17]], i32 0 +; CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP22]], i32 4 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP17]], i32 1 +; CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP23]], i32 5 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP17]], i32 2 +; CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP24]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP17]], i32 3 +; CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP25]], i32 7 ; CHECK-NEXT: ret <8 x float> [[VECINIT7_I]] ; ; ZEROTHRESH-LABEL: @_vadd256( ; ZEROTHRESH-NEXT: entry: -; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0 -; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0 -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1 -; ZEROTHRESH-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1 -; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2 -; ZEROTHRESH-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2 -; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3 -; ZEROTHRESH-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3 -; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4 -; ZEROTHRESH-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4 -; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5 -; ZEROTHRESH-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5 -; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6 -; ZEROTHRESH-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6 -; ZEROTHRESH-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7 -; ZEROTHRESH-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7 +; ZEROTHRESH-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> %a, i32 0 +; ZEROTHRESH-NEXT: [[VECEXT1:%.*]] = extractelement <8 x float> %b, i32 0 +; ZEROTHRESH-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT1]] +; ZEROTHRESH-NEXT: [[VECEXT2:%.*]] = extractelement <8 x float> %a, i32 1 +; ZEROTHRESH-NEXT: [[VECEXT3:%.*]] = extractelement <8 x float> %b, i32 1 +; ZEROTHRESH-NEXT: [[ADD4:%.*]] = fadd float [[VECEXT2]], [[VECEXT3]] +; ZEROTHRESH-NEXT: [[VECEXT5:%.*]] = extractelement <8 x float> %a, i32 2 +; ZEROTHRESH-NEXT: [[VECEXT6:%.*]] = extractelement <8 x float> %b, i32 2 +; ZEROTHRESH-NEXT: [[ADD7:%.*]] = fadd float [[VECEXT5]], [[VECEXT6]] +; ZEROTHRESH-NEXT: [[VECEXT8:%.*]] = extractelement <8 x float> %a, i32 3 +; ZEROTHRESH-NEXT: [[VECEXT9:%.*]] = extractelement <8 x float> %b, i32 3 +; ZEROTHRESH-NEXT: [[ADD10:%.*]] = fadd float [[VECEXT8]], [[VECEXT9]] +; ZEROTHRESH-NEXT: [[VECEXT11:%.*]] = extractelement <8 x float> %a, i32 4 +; ZEROTHRESH-NEXT: [[VECEXT12:%.*]] = extractelement <8 x float> %b, i32 4 +; ZEROTHRESH-NEXT: [[ADD13:%.*]] = fadd float [[VECEXT11]], [[VECEXT12]] +; ZEROTHRESH-NEXT: [[VECEXT14:%.*]] = extractelement <8 x float> %a, i32 5 +; ZEROTHRESH-NEXT: [[VECEXT15:%.*]] = extractelement <8 x float> %b, i32 5 +; ZEROTHRESH-NEXT: [[ADD16:%.*]] = fadd float [[VECEXT14]], [[VECEXT15]] +; ZEROTHRESH-NEXT: [[VECEXT17:%.*]] = extractelement <8 x float> %a, i32 6 +; ZEROTHRESH-NEXT: [[VECEXT18:%.*]] = extractelement <8 x float> %b, i32 6 +; ZEROTHRESH-NEXT: [[ADD19:%.*]] = fadd float [[VECEXT17]], [[VECEXT18]] +; ZEROTHRESH-NEXT: [[VECEXT20:%.*]] = extractelement <8 x float> %a, i32 7 +; ZEROTHRESH-NEXT: [[VECEXT21:%.*]] = extractelement <8 x float> %b, i32 7 +; ZEROTHRESH-NEXT: [[ADD22:%.*]] = fadd float [[VECEXT20]], [[VECEXT21]] +; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[ADD]], i32 0 +; ZEROTHRESH-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[ADD4]], i32 1 +; ZEROTHRESH-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[ADD7]], i32 2 +; ZEROTHRESH-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[ADD10]], i32 3 +; ZEROTHRESH-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[ADD13]], i32 4 +; ZEROTHRESH-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[ADD16]], i32 5 +; ZEROTHRESH-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[ADD19]], i32 6 +; ZEROTHRESH-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[ADD22]], i32 7 ; ZEROTHRESH-NEXT: ret <8 x float> [[VECINIT7_I]] ; entry: