[ARM] Check the right order for halves of VZIP/VUZP if both parts are used

This is the exact same fix as in SVN r247254. In that commit, the fix was
applied only for isVTRNMask and isVTRN_v_undef_Mask, but the same issue
is present for VZIP/VUZP as well.

This fixes PR33921.

Differential Revision: https://reviews.llvm.org/D36899

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311258 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Martin Storsjo 2017-08-19 19:47:48 +00:00
parent 3329070a6e
commit daff186997
2 changed files with 35 additions and 4 deletions

View File

@ -5901,6 +5901,9 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; ++j) {
if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
@ -5932,6 +5935,9 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned Half = NumElts / 2;
for (unsigned i = 0; i < M.size(); i += NumElts) {
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += Half) {
unsigned Idx = WhichResult;
@ -5972,6 +5978,9 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
@ -6005,6 +6014,9 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {

View File

@ -282,6 +282,25 @@ entry:
ret <8 x i16> %0
}
; NOTE: The mask here looks like something that could be done with a vzip,
; but which the current handling of two-result vzip can't do - thus ending up
; as a vtrn.
define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) {
; CHECK-LABEL: vzip_lower_shufflemask_undef_rev:
; CHECK: @ BB#0: @ %entry
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d19, [r0]
; CHECK-NEXT: vtrn.16 d19, d16
; CHECK-NEXT: vmov r0, r1, d18
; CHECK-NEXT: vmov r2, r3, d19
; CHECK-NEXT: mov pc, lr
entry:
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 4, i32 undef, i32 undef>
ret <8 x i16> %0
}
define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
; CHECK-LABEL: vzip_lower_shufflemask_zeroed:
; CHECK: @ BB#0: @ %entry