mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-14 15:57:47 +00:00
[X86][SSE41] Avoid variable blend for constant v8i16 shifts
The SSE41 v8i16 shift lowering using (v)pblendvb is great for non-constant shift amounts, but if it is constant then we can efficiently reduce the VSELECT to shuffles with the pre-SSE41 lowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263383 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
031194de74
commit
b419ca4ee0
@ -19785,10 +19785,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
if (VT == MVT::v8i16) {
|
||||
unsigned ShiftOpcode = Op->getOpcode();
|
||||
|
||||
// If we have a constant shift amount, the non-SSE41 path is best as
|
||||
// avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
|
||||
bool UseSSE41 = Subtarget.hasSSE41() &&
|
||||
!ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
|
||||
|
||||
auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
|
||||
// On SSE41 targets we make use of the fact that VSELECT lowers
|
||||
// to PBLENDVB which selects bytes based just on the sign bit.
|
||||
if (Subtarget.hasSSE41()) {
|
||||
if (UseSSE41) {
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
|
||||
V0 = DAG.getBitcast(ExtVT, V0);
|
||||
V1 = DAG.getBitcast(ExtVT, V1);
|
||||
@ -19805,7 +19810,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
};
|
||||
|
||||
// Turn 'a' into a mask suitable for VSELECT: a = a << 12;
|
||||
if (Subtarget.hasSSE41()) {
|
||||
if (UseSSE41) {
|
||||
// On SSE41 targets we need to replicate the shift mask in both
|
||||
// bytes for PBLENDVB.
|
||||
Amt = DAG.getNode(
|
||||
|
@ -955,44 +955,34 @@ define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: constant_rotate_v8i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: psrlw $8, %xmm3
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [256,61680,57568,53456,49344,45232,41120,37008]
|
||||
; SSE41-NEXT: pblendvb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: psrlw $4, %xmm3
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [512,57824,49600,41376,33152,24928,16704,8480]
|
||||
; SSE41-NEXT: pblendvb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: psrlw $2, %xmm3
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [1024,50112,33664,17216,768,49856,33408,16960]
|
||||
; SSE41-NEXT: pblendvb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: psrlw $1, %xmm3
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [2048,34688,1792,34432,1536,34176,1280,33920]
|
||||
; SSE41-NEXT: pblendvb %xmm3, %xmm1
|
||||
; SSE41-NEXT: por %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: psrlw $4, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4],xmm2[5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2],xmm0[3,4],xmm2[5,6],xmm0[7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: psrlw $1, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: constant_rotate_v8i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [256,61680,57568,53456,49344,45232,41120,37008]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [512,57824,49600,41376,33152,24928,16704,8480]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4],xmm0[5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1024,50112,33664,17216,768,49856,33408,16960]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2],xmm0[3,4],xmm2[5,6],xmm0[7]
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2048,34688,1792,34432,1536,34176,1280,33920]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -506,29 +506,21 @@ define <16 x i16> @constant_rotate_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32896,28784,24672,20560,16448,12336,8224,4112]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,57568,49344,41120,32896,24672,16448,8224]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4],xmm2[5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [512,49600,33152,16704,256,49344,32896,16448]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3,4],xmm3[5,6],xmm2[7]
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1024,33664,768,33408,512,33152,256,32896]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,61680,57568,53456,49344,45232,41120,37008]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [512,57824,49600,41376,33152,24928,16704,8480]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3,4],xmm0[5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1024,50112,33664,17216,768,49856,33408,16960]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2],xmm0[3,4],xmm3[5,6],xmm0[7]
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2048,34688,1792,34432,1536,34176,1280,33920]
|
||||
; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
|
@ -1192,39 +1192,24 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
; SSE41-LABEL: constant_shift_v8i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psraw $8, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,4112,8224,12336,16448,20560,24672,28784]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psraw $4, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,8224,16448,24672,32896,41120,49344,57568]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: psraw $4, %xmm1
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psraw $2, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,16448,32896,49344,256,16704,33152,49600]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psraw $1, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,32896,256,33152,512,33408,768,33664]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: psraw $1, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: constant_shift_v8i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,4112,8224,12336,16448,20560,24672,28784]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8224,16448,24672,32896,41120,49344,57568]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,16448,32896,49344,256,16704,33152,49600]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,32896,256,33152,512,33408,768,33664]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v8i16:
|
||||
|
@ -763,30 +763,19 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsraw $8, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32896,37008,41120,45232,49344,53456,57568,61680]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsraw $4, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [256,8480,16704,24928,33152,41376,49600,57824]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpsraw $2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [512,16960,33408,49856,768,17216,33664,50112]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpsraw $1, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1024,33920,1280,34176,1536,34432,1792,34688]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,4112,8224,12336,16448,20560,24672,28784]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
|
||||
; AVX1-NEXT: vpsraw $4, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,8224,16448,24672,32896,41120,49344,57568]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,16448,32896,49344,256,16704,33152,49600]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,32896,256,33152,512,33408,768,33664]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -926,39 +926,24 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
; SSE41-LABEL: constant_shift_v8i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,4112,8224,12336,16448,20560,24672,28784]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrlw $4, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,8224,16448,24672,32896,41120,49344,57568]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: psrlw $4, %xmm1
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrlw $2, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,16448,32896,49344,256,16704,33152,49600]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrlw $1, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,32896,256,33152,512,33408,768,33664]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: psrlw $1, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: constant_shift_v8i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,4112,8224,12336,16448,20560,24672,28784]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8224,16448,24672,32896,41120,49344,57568]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,16448,32896,49344,256,16704,33152,49600]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,32896,256,33152,512,33408,768,33664]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v8i16:
|
||||
|
@ -618,30 +618,19 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32896,37008,41120,45232,49344,53456,57568,61680]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [256,8480,16704,24928,33152,41376,49600,57824]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [512,16960,33408,49856,768,17216,33664,50112]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1024,33920,1280,34176,1536,34432,1792,34688]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,4112,8224,12336,16448,20560,24672,28784]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
|
||||
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,8224,16448,24672,32896,41120,49344,57568]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,16448,32896,49344,256,16704,33152,49600]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,32896,256,33152,512,33408,768,33664]
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user