diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b05c5dd2597..c2bd471faa1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3148,6 +3148,11 @@ static bool isVEXTMask(const SmallVectorImpl &M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first @@ -3163,6 +3168,7 @@ static bool isVEXTMask(const SmallVectorImpl &M, EVT VT, ReverseVEXT = true; } + if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } @@ -3188,13 +3194,16 @@ static bool isVREVMask(const SmallVectorImpl &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned) M[i] != - (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } @@ -3210,8 +3219,8 @@ static bool isVTRNMask(const SmallVectorImpl &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + NumElts + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) return false; } return true; @@ -3229,9 +3238,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if (M[i] < 0) continue; - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) return false; } return true; @@ -3246,6 +3254,7 @@ static bool isVUZPMask(const SmallVectorImpl &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != 2 * i + WhichResult) return false; } @@ -3271,7 +3280,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl &M, EVT VT, for (unsigned j = 0; j != 2; ++j) { unsigned Idx = WhichResult; for (unsigned i = 0; i != Half; ++i) { - if ((unsigned) M[i + j * Half] != Idx) + int MIdx = M[i + j * Half]; + if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } @@ -3294,8 +3304,8 @@ static bool isVZIPMask(const SmallVectorImpl &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx + NumElts) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) return false; Idx += 1; } @@ -3320,8 +3330,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) return false; Idx += 1; } diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index c11a67c6c43..e460a84f626 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp3 } +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd_undef: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq_undef: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index deed554d842..e1fe64b02d9 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 } + +; Undef shuffle indices should not prevent matching to VREV: + +define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8_undef: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16_undef: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %tmp2 +} diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index 10bb10ac24a..b1c2f93b47c 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VTRN: + +define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtrni8_undef: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtrnQi16_undef: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 6cef188d76d..9130f628919 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VUZP: + +define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vuzpi8_undef: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vuzpQi16_undef: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index a9ecdcab42d..926970aeb29 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VZIP: + +define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vzipi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vzipQi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} +