diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c839fc65606..915b309a30d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2528,6 +2528,25 @@ static bool isVTRNMask(const SmallVectorImpl &M, EVT VT, return true; } +/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. +static bool isVTRN_v_undef_Mask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((unsigned) M[i] != i + WhichResult || + (unsigned) M[i+1] != i + WhichResult) + return false; + } + return true; +} + static bool isVUZPMask(const SmallVectorImpl &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -2548,6 +2567,33 @@ static bool isVUZPMask(const SmallVectorImpl &M, EVT VT, return true; } +/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, +static bool isVUZP_v_undef_Mask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned Half = VT.getVectorNumElements() / 2; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned j = 0; j != 2; ++j) { + unsigned Idx = WhichResult; + for (unsigned i = 0; i != Half; ++i) { + if ((unsigned) M[i + j * Half] != Idx) + return false; + Idx += 2; + } + } + + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && EltSz == 32) + return false; + + return true; +} + static bool isVZIPMask(const SmallVectorImpl &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -2571,6 +2617,33 @@ static bool isVZIPMask(const SmallVectorImpl &M, EVT VT, return true; } +/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. +static bool isVZIP_v_undef_Mask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((unsigned) M[i] != Idx || + (unsigned) M[i+1] != Idx) + return false; + Idx += 1; + } + + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && EltSz == 32) + return false; + + return true; +} + + static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. ConstantSDNode *ConstVal = cast(Val.getNode()); @@ -2683,7 +2756,10 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isVEXTMask(M, VT, ReverseVEXT, Imm) || isVTRNMask(M, VT, WhichResult) || isVUZPMask(M, VT, WhichResult) || - isVZIPMask(M, VT, WhichResult)); + isVZIPMask(M, VT, WhichResult) || + isVTRN_v_undef_Mask(M, VT, WhichResult) || + isVUZP_v_undef_Mask(M, VT, WhichResult) || + isVZIP_v_undef_Mask(M, VT, WhichResult)); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -2815,6 +2891,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), V1, V2).getValue(WhichResult); + if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. if (VT.getVectorNumElements() == 4 && diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll new file mode 100644 index 00000000000..a737591bd9f --- /dev/null +++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll @@ -0,0 +1,19 @@ +; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "armv7-apple-darwin10" + +%struct.int16x8_t = type { <8 x i16> } +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } + +define arm_apcscc void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind { +entry: +;CHECK: vtrn.16 + %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> + %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*> + store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16 + %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*> + store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16 + ret void +}