diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0f43b324367..737007aa4cd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8254,7 +8254,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + + EVT inputLaneType = Vec.getValueType().getVectorElementType(); + switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; @@ -8264,7 +8266,8 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); + unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; + return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll index f84721f996c..ecaabd3b9f7 100644 --- a/test/CodeGen/ARM/vpadd.ll +++ b/test/CodeGen/ARM/vpadd.ll @@ -152,6 +152,17 @@ define void @addCombineToVPADDL() nounwind ssp { ret void } +; Legalization produces a EXTRACT_VECTOR_ELT DAG node which performs an extend from +; i16 to i32. In this case the input for the formed VPADDL needs to be a vector of i16s. +define <2 x i16> @fromExtendingExtractVectorElt(<4 x i16> %in) { +;CHECK-LABEL: fromExtendingExtractVectorElt: +;CHECK: vpaddl.s16 + %tmp1 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> + %tmp2 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> + %x = add <2 x i16> %tmp2, %tmp1 + ret <2 x i16> %x +} + declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone