mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-13 14:46:15 +00:00
[X86][SSE] pslldq/psrldq shuffle mask decodes
Patch to provide shuffle decodes and asm comments for the sse pslldq/psrldq SSE2/AVX2 byte shift instructions. Differential Revision: http://reviews.llvm.org/D5598 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219738 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b315be205c
commit
84a3feea38
@ -199,6 +199,44 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::PSLLDQri:
|
||||
case X86::VPSLLDQri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
if(MI->getOperand(MI->getNumOperands()-1).isImm())
|
||||
DecodePSLLDQMask(MVT::v16i8,
|
||||
MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::VPSLLDQYri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
if(MI->getOperand(MI->getNumOperands()-1).isImm())
|
||||
DecodePSLLDQMask(MVT::v32i8,
|
||||
MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::PSRLDQri:
|
||||
case X86::VPSRLDQri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
if(MI->getOperand(MI->getNumOperands()-1).isImm())
|
||||
DecodePSRLDQMask(MVT::v16i8,
|
||||
MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::VPSRLDQYri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
if(MI->getOperand(MI->getNumOperands()-1).isImm())
|
||||
DecodePSRLDQMask(MVT::v32i8,
|
||||
MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::PALIGNR128rr:
|
||||
case X86::VPALIGNR128rr:
|
||||
Src1Name = getRegName(MI->getOperand(2).getReg());
|
||||
|
@ -79,6 +79,35 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
|
||||
}
|
||||
}
|
||||
|
||||
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
|
||||
unsigned VectorSizeInBits = VT.getSizeInBits();
|
||||
unsigned NumElts = VectorSizeInBits / 8;
|
||||
unsigned NumLanes = VectorSizeInBits / 128;
|
||||
unsigned NumLaneElts = NumElts / NumLanes;
|
||||
|
||||
for (unsigned l = 0; l < NumElts; l += NumLaneElts)
|
||||
for (unsigned i = 0; i < NumLaneElts; ++i) {
|
||||
int M = SM_SentinelZero;
|
||||
if (i >= Imm) M = i - Imm + l;
|
||||
ShuffleMask.push_back(M);
|
||||
}
|
||||
}
|
||||
|
||||
void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
|
||||
unsigned VectorSizeInBits = VT.getSizeInBits();
|
||||
unsigned NumElts = VectorSizeInBits / 8;
|
||||
unsigned NumLanes = VectorSizeInBits / 128;
|
||||
unsigned NumLaneElts = NumElts / NumLanes;
|
||||
|
||||
for (unsigned l = 0; l < NumElts; l += NumLaneElts)
|
||||
for (unsigned i = 0; i < NumLaneElts; ++i) {
|
||||
unsigned Base = i + Imm;
|
||||
int M = Base + l;
|
||||
if (Base >= NumLaneElts) M = SM_SentinelZero;
|
||||
ShuffleMask.push_back(M);
|
||||
}
|
||||
}
|
||||
|
||||
void DecodePALIGNRMask(MVT VT, unsigned Imm,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
@ -40,6 +40,10 @@ void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
@ -455,21 +455,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
|
||||
; CHECK: vpslldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: vpslldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
|
||||
; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
@ -551,21 +551,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
|
||||
; CHECK: vpsrldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: vpsrldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
|
||||
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
|
@ -158,21 +158,21 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
|
||||
; CHECK: vpslldq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
|
||||
; CHECK: vpslldq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
|
||||
; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
|
||||
; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
@ -254,21 +254,21 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
|
||||
; CHECK: vpsrldq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
|
||||
; CHECK: vpsrldq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
|
||||
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
|
||||
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
|
||||
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
|
@ -408,21 +408,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
|
||||
; CHECK: pslldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: pslldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
|
||||
; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
@ -504,21 +504,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
|
||||
; CHECK: psrldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: psrldq
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
|
||||
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
|
||||
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
|
||||
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
|
@ -1397,77 +1397,77 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
|
||||
|
||||
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq $2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq $2, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq $10, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq $10, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq $14, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq $14, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq $4, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq $4, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 3
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 3
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user