mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-20 23:30:54 +00:00
[X86] Allow combineTruncateWithSat to use pack instructions for i16->i8 without AVX512BW.
We need AVX512BW to be able to truncate an i16 vector. If we don't have that we have to extend i16->i32, then trunc, i32->i8. But we won't be able to remove the min/max if we do that. At least not without more special handling. llvm-svn: 368623
This commit is contained in:
parent
acc8079f8e
commit
e07e593782
@ -39024,7 +39024,8 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
|
||||
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
|
||||
}
|
||||
if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
|
||||
!Subtarget.hasAVX512() &&
|
||||
!(Subtarget.hasAVX512() && InSVT == MVT::i32) &&
|
||||
!(Subtarget.hasBWI() && InSVT == MVT::i16) &&
|
||||
(SVT == MVT::i8 || SVT == MVT::i16) &&
|
||||
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
|
||||
if (auto USatVal = detectSSatPattern(In, VT, true)) {
|
||||
|
@ -753,11 +753,9 @@ define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
|
||||
define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
|
||||
; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
@ -781,11 +779,9 @@ define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
|
||||
define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
|
||||
; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
@ -808,11 +804,8 @@ define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
|
||||
define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
|
||||
; KNL-LABEL: smax_usat_trunc_wb_256:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
@ -834,9 +827,6 @@ define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
|
||||
define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
|
||||
; KNL-LABEL: smax_usat_trunc_wb_128_mem:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
|
@ -6452,17 +6452,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512F-NEXT: vpminsw %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpminsw %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408]
|
||||
; AVX512F-NEXT: vpmaxsw %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpmaxsw %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512F-NEXT: vpmovmskb %ymm2, %eax
|
||||
; AVX512F-NEXT: notl %eax
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
@ -7200,10 +7191,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512F-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX512F-NEXT: xorl $65535, %eax # imm = 0xFFFF
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
@ -7558,8 +7547,6 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
|
||||
; AVX512F-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
|
@ -2961,21 +2961,15 @@ define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) {
|
||||
;
|
||||
; AVX512F-LABEL: trunc_packus_v16i16_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_packus_v16i16_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -3029,32 +3023,14 @@ define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) {
|
||||
;
|
||||
; AVX512F-LABEL: trunc_packus_v32i16_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512F-NEXT: vpminsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpminsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_packus_v32i16_v32i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512VL-NEXT: vpminsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpminsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_packus_v32i16_v32i8:
|
||||
|
@ -2941,19 +2941,15 @@ define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) {
|
||||
;
|
||||
; AVX512F-LABEL: trunc_ssat_v16i16_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_ssat_v16i16_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -3004,32 +3000,14 @@ define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16> %a0) {
|
||||
;
|
||||
; AVX512F-LABEL: trunc_ssat_v32i16_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512F-NEXT: vpminsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpminsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408]
|
||||
; AVX512F-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_ssat_v32i16_v32i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512VL-NEXT: vpminsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpminsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408]
|
||||
; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8:
|
||||
|
Loading…
x
Reference in New Issue
Block a user