mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-14 15:57:47 +00:00
[X86][SSE] Improve i16 splatting shuffles
Better handling of the annoying pshuflw/pshufhw ops which only shuffle lower/upper halves of a vector. Added vXi16 unary shuffle support for cases where i16 elements (from the same half of the source) are being splatted to the whole of one of the halves. This avoids the general lowering case which must shuffle the 32-bit elements first - meaning that we used to end up with unnecessary duplicate pshuflw/pshufhw shuffles. Note this has the side effect of a lot of SSSE3 test cases no longer needing to use PSHUFB, as it falls below the 3 op combine threshold for when PSHUFB is typically worth it. I've raised PR26183 to discuss if the threshold should be changed and whether we need to make it more specific to the target CPU. Differential Revision: http://reviews.llvm.org/D14901 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258440 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ad24fed421
commit
180abd8ba0
@ -9052,6 +9052,26 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
|
||||
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
|
||||
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
|
||||
|
||||
// If we are splatting two values from one half - one to each half, then
|
||||
// we can shuffle that half so each is splatted to a dword, then splat those
|
||||
// to their respective halves.
|
||||
auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp,
|
||||
int DOffset) {
|
||||
int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4};
|
||||
int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1};
|
||||
V = DAG.getNode(ShufWOp, DL, VT, V,
|
||||
getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
|
||||
V = DAG.getBitcast(PSHUFDVT, V);
|
||||
V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V,
|
||||
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
|
||||
return DAG.getBitcast(VT, V);
|
||||
};
|
||||
|
||||
if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0)
|
||||
return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0);
|
||||
if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0)
|
||||
return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2);
|
||||
|
||||
// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
|
||||
// such inputs we can swap two of the dwords across the half mark and end up
|
||||
// with <=2 inputs to each half in each half. Once there, we can fall through
|
||||
|
@ -15,7 +15,8 @@ entry:
|
||||
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
|
||||
; CHECK-LABEL: funcB:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
|
||||
; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -54,30 +54,21 @@ vector.ph:
|
||||
}
|
||||
|
||||
define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
|
||||
; SSE2-LABEL: test3:
|
||||
; SSE2: ## BB#0: ## %vector.ph
|
||||
; SSE2-NEXT: movd %esi, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSE2-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: test3:
|
||||
; SSSE3: ## BB#0: ## %vector.ph
|
||||
; SSSE3-NEXT: movd %esi, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSSE3-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSSE3-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
; SSE-LABEL: test3:
|
||||
; SSE: ## BB#0: ## %vector.ph
|
||||
; SSE-NEXT: movd %esi, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSE-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test3:
|
||||
; AVX1: ## BB#0: ## %vector.ph
|
||||
; AVX1-NEXT: vmovd %esi, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vmovdqu (%rdi), %xmm1
|
||||
; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
@ -159,9 +150,8 @@ define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
|
||||
; SSE2: ## BB#0: ## %vector.ph
|
||||
; SSE2-NEXT: movd %esi, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSE2-NEXT: psubusb %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rdi)
|
||||
@ -304,46 +294,34 @@ vector.ph:
|
||||
}
|
||||
|
||||
define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
|
||||
; SSE2-LABEL: test9:
|
||||
; SSE2: ## BB#0: ## %vector.ph
|
||||
; SSE2-NEXT: movd %esi, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqu 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE2-NEXT: psubusw %xmm0, %xmm2
|
||||
; SSE2-NEXT: movdqu %xmm2, 16(%rdi)
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: test9:
|
||||
; SSSE3: ## BB#0: ## %vector.ph
|
||||
; SSSE3-NEXT: movd %esi, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSSE3-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSSE3-NEXT: movdqu 16(%rdi), %xmm2
|
||||
; SSSE3-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSSE3-NEXT: psubusw %xmm0, %xmm2
|
||||
; SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
|
||||
; SSSE3-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
; SSE-LABEL: test9:
|
||||
; SSE: ## BB#0: ## %vector.ph
|
||||
; SSE-NEXT: movd %esi, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSE-NEXT: movdqu 16(%rdi), %xmm2
|
||||
; SSE-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE-NEXT: psubusw %xmm0, %xmm2
|
||||
; SSE-NEXT: movdqu %xmm2, 16(%rdi)
|
||||
; SSE-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test9:
|
||||
; AVX1: ## BB#0: ## %vector.ph
|
||||
; AVX1-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovd %esi, %xmm1
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vmovd %esi, %xmm2
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
|
||||
; AVX1-NEXT: vpsubw %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
|
||||
; AVX1-NEXT: vpmaxuw %xmm1, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -471,9 +449,8 @@ define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
|
||||
; SSE2: ## BB#0: ## %vector.ph
|
||||
; SSE2-NEXT: movd %esi, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqu 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: psubusb %xmm0, %xmm1
|
||||
|
@ -745,9 +745,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_shift_v16i8:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,1,1]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; SSE2-NEXT: psllw $5, %xmm3
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
|
||||
@ -949,9 +948,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X32-SSE-LABEL: splatvar_shift_v16i8:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; X32-SSE-NEXT: pshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,1,1]
|
||||
; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; X32-SSE-NEXT: psllw $5, %xmm3
|
||||
; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
|
||||
|
@ -597,9 +597,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_shift_v16i8:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
|
||||
; SSE2-NEXT: psllw $5, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -727,9 +726,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X32-SSE-LABEL: splatvar_shift_v16i8:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
|
||||
; X32-SSE-NEXT: psllw $5, %xmm2
|
||||
; X32-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X32-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
|
@ -545,9 +545,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_shift_v16i8:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
|
||||
; SSE2-NEXT: psllw $5, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -667,9 +666,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X32-SSE-LABEL: splatvar_shift_v16i8:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
|
||||
; X32-SSE-NEXT: psllw $5, %xmm2
|
||||
; X32-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X32-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
|
@ -9,20 +9,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
|
||||
; FIXME: SSE2 should look like the following:
|
||||
; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
|
||||
; FIXME: # BB#0:
|
||||
; FIXME-NEXT: punpcklbw %xmm0, %xmm0
|
||||
; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; FIXME-NEXT: retq
|
||||
;
|
||||
; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -55,9 +46,8 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(
|
||||
; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
|
||||
@ -82,10 +72,10 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(
|
||||
; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,2,4,5,6,7]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
@ -182,33 +172,16 @@ define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(
|
||||
}
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
|
||||
; FIXME: SSE2 should be the following:
|
||||
; FIXME-LABEL: @shuffle_v16i8_0101010101010101
|
||||
; FIXME: # BB#0:
|
||||
; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; FIXME-NEXT: retq
|
||||
;
|
||||
; SSE2-LABEL: shuffle_v16i8_0101010101010101:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_0101010101010101:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_0101010101010101:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: shuffle_v16i8_0101010101010101:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: shuffle_v16i8_0101010101010101:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i8_0101010101010101:
|
||||
@ -252,9 +225,8 @@ define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm2
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
@ -1132,9 +1104,8 @@ define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm2
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
|
||||
; SSE2-NEXT: pand %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,7]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,3,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; SSE2-NEXT: pandn %xmm1, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1377,9 +1348,8 @@ define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
|
||||
@ -1420,9 +1390,8 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
; SSE2-NEXT: movsbl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
@ -1466,9 +1435,8 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
|
||||
@ -1505,9 +1473,8 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
|
||||
@ -1545,9 +1512,8 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
; SSE2-NEXT: movsbl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
|
||||
@ -1592,9 +1558,8 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
; SSE2-NEXT: movsbl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
|
||||
|
@ -61,26 +61,16 @@ define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
|
||||
}
|
||||
|
||||
define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE2-LABEL: shuffle_v8i16_00000000:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v8i16_00000000:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v8i16_00000000:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: shuffle_v8i16_00000000:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: shuffle_v8i16_00000000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i16_00000000:
|
||||
@ -2151,9 +2141,8 @@ define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v8i16_i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
|
||||
@ -2190,9 +2179,8 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movswl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
@ -2234,9 +2222,8 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||
@ -2272,9 +2259,9 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
|
||||
@ -2311,9 +2298,8 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movswl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
|
||||
@ -2357,9 +2343,9 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movswl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
|
||||
|
@ -7,7 +7,8 @@ target triple = "x86_64-unknown-unknown"
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -22,10 +23,9 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -42,7 +42,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -60,7 +61,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -78,7 +80,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -96,7 +99,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -114,7 +118,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -132,7 +137,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -153,7 +159,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -161,7 +168,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
@ -175,7 +183,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -196,7 +205,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -216,7 +226,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -236,7 +247,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -256,7 +268,8 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -276,7 +289,8 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -296,7 +310,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -313,16 +328,18 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -331,16 +348,18 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_0
|
||||
define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,3,3,6,6,7,7]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -721,8 +740,9 @@ define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_2
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,1,1,4,4,5,5]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
|
||||
@ -1162,7 +1182,8 @@ define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_1
|
||||
define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
@ -1654,7 +1675,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -3269,13 +3291,15 @@ define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16>
|
||||
define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
@ -3286,7 +3310,8 @@ define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a,
|
||||
; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -3311,7 +3336,8 @@ define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16>
|
||||
define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
|
||||
; ALL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3321,13 +3347,15 @@ define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a,
|
||||
; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3378,7 +3406,8 @@ define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -3398,7 +3427,8 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: movswl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -3420,7 +3450,8 @@ define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
|
||||
; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -970,7 +970,8 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_
|
||||
; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -988,21 +989,23 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_
|
||||
define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48(<32 x i8> %a, <32 x i8> %b) {
|
||||
; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
@ -1465,7 +1468,7 @@ define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,9,9,8,8,8,8,8,8,8,8]
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user