From d83dee827066dc34a0d79665213e02a91810eb8e Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Mon, 27 Jan 2014 02:53:41 +0000 Subject: [PATCH] Revert r199791. It's old version which has some bugs. I'll commit lattest patch soon. llvm-svn: 200179 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 113 +++------ lib/Target/AArch64/AArch64ISelLowering.h | 6 +- test/CodeGen/AArch64/neon-copy.ll | 270 --------------------- 3 files changed, 29 insertions(+), 360 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index a794fcdc1f3..27277c47f39 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4154,70 +4154,21 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } -// Check whether a shuffle_vector could be presented as concat_vector. -bool AArch64TargetLowering::isConcatVector(SDValue Op,SelectionDAG &DAG, - SDValue V0, SDValue V1, - const int* Mask, - SDValue &Res) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned V0NumElts = V0.getValueType().getVectorNumElements(); - bool isContactVector = true; - bool splitV0 = false; - int offset = 0; - for (int I = 0, E = NumElts; I != E; I++){ - if (Mask[I] != I + offset) { - if(I && !splitV0 && Mask[I] == I + (int)V0NumElts / 2) { - splitV0 = true; - offset = V0NumElts / 2; - } else { - isContactVector = false; - break; - } - } - } - if (isContactVector) { - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), - VT.getVectorElementType(), NumElts / 2); - if(CastVT.getSizeInBits() < 64) - return false; - - if (splitV0) { - assert(V0NumElts >= NumElts / 2 && - "invalid operand for extract_subvector!"); - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, - DAG.getConstant(0, MVT::i64)); - } - if (NumElts != V1.getValueType().getVectorNumElements() * 2) { - assert(V1.getValueType().getVectorNumElements() >= NumElts / 2 && - "invalid operand for extract_subvector!"); - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, - DAG.getConstant(0, MVT::i64)); - } - Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); - return true; - } - return false; -} - -// Check whether a Build Vector could be presented as Shuffle Vector. -// This Shuffle Vector maybe not legalized, so the length of its operand and -// the length of result may not equal. +// Check whether a Build Vector could be presented as Shuffle Vector. If yes, +// try to call LowerVECTOR_SHUFFLE to lower it. bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, - SDValue &V0, SDValue &V1, - int *Mask) const { + SDValue &Res) const { SDLoc DL(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned V0NumElts = 0; + int Mask[16]; + SDValue V0, V1; // Check if all elements are extracted from less than 3 vectors. for (unsigned i = 0; i < NumElts; ++i) { SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Elt.getOperand(0).getValueType().getVectorElementType() != - VT.getVectorElementType()) + if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; if (V0.getNode() == 0) { @@ -4238,7 +4189,25 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, return false; } } - return true; + + if (!V1.getNode() && V0NumElts == NumElts * 2) { + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, + DAG.getConstant(NumElts, MVT::i64)); + V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, + DAG.getConstant(0, MVT::i64)); + V0NumElts = V0.getValueType().getVectorNumElements(); + } + + if (V1.getNode() && NumElts == V0NumElts && + V0NumElts == V1.getValueType().getVectorNumElements()) { + SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); + if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) + Res = Shuffle; + else + Res = LowerVECTOR_SHUFFLE(Shuffle, DAG); + return true; + } else + return false; } // If this is a case we can't handle, return null and let the default @@ -4444,31 +4413,9 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try to lower this in lowering ShuffleVector way. - SDValue V0, V1; - int Mask[16]; - if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) { - unsigned V0NumElts = V0.getValueType().getVectorNumElements(); - if (!V1.getNode() && V0NumElts == NumElts * 2) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(NumElts, MVT::i64)); - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(0, MVT::i64)); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - - if (V1.getNode() && NumElts == V0NumElts && - V0NumElts == V1.getValueType().getVectorNumElements()) { - SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); - if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) - return Shuffle; - else - return LowerVECTOR_SHUFFLE(Shuffle, DAG); - } else { - SDValue Res; - if(isConcatVector(Op, DAG, V0, V1, Mask, Res)) - return Res; - } - } + SDValue Shuf; + if (isKnownShuffleVector(Op, DAG, Shuf)) + return Shuf; // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // know the default expansion would otherwise fall back on something even @@ -4654,10 +4601,6 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISDNo, dl, VT, V1, V2); } - SDValue Res; - if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res)) - return Res; - // If the element of shuffle mask are all the same constant, we can // transform it into either NEON_VDUP or NEON_VDUPLANE if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 8961d9c1415..3879663e570 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -232,11 +232,7 @@ public: SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; - bool isConcatVector(SDValue Op,SelectionDAG &DAG, SDValue V0, SDValue V1, - const int* Mask, SDValue &Res) const; - - bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0, - SDValue &V1, int *Mask) const; + bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST) const; diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index bda56564449..0799eb3b2d6 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -975,14 +975,6 @@ entry: declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) -define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> - ret <16 x i8> %vecinit30 -} - define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { ; CHECK-LABEL: test_concat_undef_v1i32: ; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0] @@ -1029,268 +1021,6 @@ entry: ret <2 x i32> %h } -define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <8 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <8 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <8 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <8 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <8 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <8 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <8 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <16 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <16 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <16 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <16 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <16 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <16 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <16 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <16 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecext15 = extractelement <8 x i8> %y, i32 0 - %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 - %vecext17 = extractelement <8 x i8> %y, i32 1 - %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 - %vecext19 = extractelement <8 x i8> %y, i32 2 - %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 - %vecext21 = extractelement <8 x i8> %y, i32 3 - %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 - %vecext23 = extractelement <8 x i8> %y, i32 4 - %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 - %vecext25 = extractelement <8 x i8> %y, i32 5 - %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 - %vecext27 = extractelement <8 x i8> %y, i32 6 - %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 - %vecext29 = extractelement <8 x i8> %y, i32 7 - %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <8 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <8 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <8 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <8 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <8 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <8 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <8 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecext15 = extractelement <8 x i8> %y, i32 0 - %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 - %vecext17 = extractelement <8 x i8> %y, i32 1 - %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 - %vecext19 = extractelement <8 x i8> %y, i32 2 - %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 - %vecext21 = extractelement <8 x i8> %y, i32 3 - %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 - %vecext23 = extractelement <8 x i8> %y, i32 4 - %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 - %vecext25 = extractelement <8 x i8> %y, i32 5 - %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 - %vecext27 = extractelement <8 x i8> %y, i32 6 - %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 - %vecext29 = extractelement <8 x i8> %y, i32 7 - %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 - ret <16 x i8> %vecinit30 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <4 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <4 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <4 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <8 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <8 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <8 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecext7 = extractelement <4 x i16> %y, i32 0 - %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 - %vecext9 = extractelement <4 x i16> %y, i32 1 - %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 - %vecext11 = extractelement <4 x i16> %y, i32 2 - %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 - %vecext13 = extractelement <4 x i16> %y, i32 3 - %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <4 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <4 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <4 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecext7 = extractelement <4 x i16> %y, i32 0 - %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 - %vecext9 = extractelement <4 x i16> %y, i32 1 - %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 - %vecext11 = extractelement <4 x i16> %y, i32 2 - %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 - %vecext13 = extractelement <4 x i16> %y, i32 3 - %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 - ret <8 x i16> %vecinit14 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <2 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <4 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecext3 = extractelement <2 x i32> %y, i32 0 - %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 - %vecext5 = extractelement <2 x i32> %y, i32 1 - %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <2 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecext3 = extractelement <2 x i32> %y, i32 0 - %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 - %vecext5 = extractelement <2 x i32> %y, i32 1 - %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 - ret <4 x i32> %vecinit6 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <1 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecext1 = extractelement <1 x i64> %y, i32 0 - %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <1 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecext1 = extractelement <1 x i64> %y, i32 0 - %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 - ret <2 x i64> %vecinit2 -} - declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {