diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ff8491d2e62..320d7ccfbc2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5901,7 +5901,10 @@ static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; @@ -5932,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { @@ -5972,7 +5978,10 @@ static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6005,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index 771bf5f0521..06b49ab9405 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -282,6 +282,25 @@ entry: ret <8 x i16> %0 } +; NOTE: The mask here looks like something that could be done with a vzip, +; but which the current handling of two-result vzip can't do - thus ending up +; as a vtrn. +define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) { +; CHECK-LABEL: vzip_lower_shufflemask_undef_rev: +; CHECK: @ BB#0: @ %entry +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d19, [r0] +; CHECK-NEXT: vtrn.16 d19, d16 +; CHECK-NEXT: vmov r0, r1, d18 +; CHECK-NEXT: vmov r2, r3, d19 +; CHECK-NEXT: mov pc, lr +entry: + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> + ret <8 x i16> %0 +} + define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) { ; CHECK-LABEL: vzip_lower_shufflemask_zeroed: ; CHECK: @ BB#0: @ %entry