[X86][SSE] Propagate undef upper elements from scalar_to_vector during shuffle combining

Only do this for integer types currently - floats types (in particular insertps) load folding often fails with this.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295208 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim 2017-02-15 17:41:33 +00:00
parent aae13371be
commit be2cd40ad4
8 changed files with 37 additions and 47 deletions

View File

@ -5618,10 +5618,16 @@ static bool setTargetShuffleZeroElements(SDValue N,
}
// SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
// TODO: We currently only set UNDEF for integer types - floats use the same
// registers as vectors and many of the scalar folded loads rely on the
// SCALAR_TO_VECTOR pattern.
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Size % V.getValueType().getVectorNumElements()) == 0) {
int Scale = Size / V.getValueType().getVectorNumElements();
if (((M / Scale) == 0) && X86::isZeroNode(V.getOperand(0)))
int Idx = M / Scale;
if (Idx != 0 && !VT.isFloatingPoint())
Mask[i] = SM_SentinelUndef;
else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
Mask[i] = SM_SentinelZero;
continue;
}

View File

@ -449,7 +449,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq {{.*}}(%rip), %rax
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; CHECK-NEXT: psrad $16, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
@ -835,7 +835,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq {{.*}}(%rip), %rax
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; CHECK-NEXT: psrad $16, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000

View File

@ -150,7 +150,7 @@ define i32 @test4(x86_mmx %a) nounwind {
; X32-NEXT: subl $8, %esp
; X32-NEXT: movq %mm0, (%esp)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,0,1]
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,0,1]
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp

View File

@ -2661,7 +2661,7 @@ define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) {
; SSE-LABEL: sitofp_load_2i16_to_2f64:
; SSE: # BB#0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq

View File

@ -2822,7 +2822,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind {
; AVX1-NEXT: shlq $32, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vmovq %rdx, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: cvt_4f32_to_8i16_undef:
@ -2847,7 +2847,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind {
; AVX2-NEXT: shlq $32, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: cvt_4f32_to_8i16_undef:
@ -2873,7 +2873,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind {
; AVX512F-NEXT: shlq $32, %rdx
; AVX512F-NEXT: orq %rcx, %rdx
; AVX512F-NEXT: vmovq %rdx, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: cvt_4f32_to_8i16_undef:
@ -2899,7 +2899,6 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind {
; AVX512VL-NEXT: orq %rcx, %rdx
; AVX512VL-NEXT: vmovq %rdx, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512VL-NEXT: retq
%1 = fptrunc <4 x float> %a0 to <4 x half>
@ -2931,7 +2930,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
; AVX1-NEXT: shlq $32, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vmovq %rdx, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: retq
;
; AVX2-LABEL: cvt_4f32_to_8i16_zero:
@ -2956,7 +2955,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
; AVX2-NEXT: shlq $32, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: retq
;
; AVX512F-LABEL: cvt_4f32_to_8i16_zero:
@ -2982,7 +2981,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
; AVX512F-NEXT: shlq $32, %rdx
; AVX512F-NEXT: orq %rcx, %rdx
; AVX512F-NEXT: vmovq %rdx, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: cvt_4f32_to_8i16_zero:
@ -3008,7 +3007,6 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
; AVX512VL-NEXT: orq %rcx, %rdx
; AVX512VL-NEXT: vmovq %rdx, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
@ -3631,7 +3629,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw
; AVX1-NEXT: shlq $32, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vmovq %rdx, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: vmovdqa %xmm0, (%rdi)
; AVX1-NEXT: retq
;
@ -3657,7 +3655,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw
; AVX2-NEXT: shlq $32, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
; AVX2-NEXT: retq
;
@ -3684,7 +3682,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw
; AVX512F-NEXT: shlq $32, %rdx
; AVX512F-NEXT: orq %rcx, %rdx
; AVX512F-NEXT: vmovq %rdx, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512F-NEXT: retq
;
@ -3711,7 +3709,6 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw
; AVX512VL-NEXT: orq %rcx, %rdx
; AVX512VL-NEXT: vmovq %rdx, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512VL-NEXT: retq
@ -3745,7 +3742,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
; AVX1-NEXT: shlq $32, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vmovq %rdx, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vmovdqa %xmm0, (%rdi)
; AVX1-NEXT: retq
;
@ -3771,7 +3768,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
; AVX2-NEXT: shlq $32, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
; AVX2-NEXT: retq
;
@ -3798,7 +3795,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
; AVX512F-NEXT: shlq $32, %rdx
; AVX512F-NEXT: orq %rcx, %rdx
; AVX512F-NEXT: vmovq %rdx, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512F-NEXT: retq
;
@ -3825,7 +3822,6 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
; AVX512VL-NEXT: orq %rcx, %rdx
; AVX512VL-NEXT: vmovq %rdx, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
@ -4477,7 +4473,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX1-NEXT: shlq $32, %rax
; AVX1-NEXT: orq %r14, %rax
; AVX1-NEXT: vmovq %rax, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: addq $40, %rsp
; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %r14
@ -4515,7 +4511,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX2-NEXT: shlq $32, %rax
; AVX2-NEXT: orq %r14, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-NEXT: addq $40, %rsp
; AVX2-NEXT: popq %rbx
; AVX2-NEXT: popq %r14
@ -4550,7 +4546,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %r14, %rax
; AVX512F-NEXT: vmovq %rax, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: addq $40, %rsp
; AVX512F-NEXT: popq %rbx
; AVX512F-NEXT: popq %r14
@ -4586,7 +4582,6 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX512VL-NEXT: orq %r14, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512VL-NEXT: addq $40, %rsp
; AVX512VL-NEXT: popq %rbx
@ -4631,7 +4626,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX1-NEXT: shlq $32, %rax
; AVX1-NEXT: orq %r14, %rax
; AVX1-NEXT: vmovq %rax, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: addq $40, %rsp
; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %r14
@ -4669,7 +4664,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX2-NEXT: shlq $32, %rax
; AVX2-NEXT: orq %r14, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: addq $40, %rsp
; AVX2-NEXT: popq %rbx
; AVX2-NEXT: popq %r14
@ -4704,7 +4699,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %r14, %rax
; AVX512F-NEXT: vmovq %rax, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: addq $40, %rsp
; AVX512F-NEXT: popq %rbx
; AVX512F-NEXT: popq %r14
@ -4740,7 +4735,6 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX512VL-NEXT: orq %r14, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
@ -5250,7 +5244,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun
; AVX1-NEXT: shlq $32, %rax
; AVX1-NEXT: orq %rbx, %rax
; AVX1-NEXT: vmovq %rax, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: vmovdqa %xmm0, (%r14)
; AVX1-NEXT: addq $32, %rsp
; AVX1-NEXT: popq %rbx
@ -5292,7 +5286,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun
; AVX2-NEXT: shlq $32, %rax
; AVX2-NEXT: orq %rbx, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-NEXT: vmovdqa %xmm0, (%r14)
; AVX2-NEXT: addq $32, %rsp
; AVX2-NEXT: popq %rbx
@ -5331,7 +5325,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rbx, %rax
; AVX512F-NEXT: vmovq %rax, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: vmovdqa %xmm0, (%r14)
; AVX512F-NEXT: addq $32, %rsp
; AVX512F-NEXT: popq %rbx
@ -5371,7 +5365,6 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun
; AVX512VL-NEXT: orq %rbx, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
; AVX512VL-NEXT: addq $32, %rsp
@ -5421,7 +5414,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
; AVX1-NEXT: shlq $32, %rax
; AVX1-NEXT: orq %rbx, %rax
; AVX1-NEXT: vmovq %rax, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vmovdqa %xmm0, (%r14)
; AVX1-NEXT: addq $32, %rsp
; AVX1-NEXT: popq %rbx
@ -5463,7 +5456,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
; AVX2-NEXT: shlq $32, %rax
; AVX2-NEXT: orq %rbx, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vmovdqa %xmm0, (%r14)
; AVX2-NEXT: addq $32, %rsp
; AVX2-NEXT: popq %rbx
@ -5502,7 +5495,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rbx, %rax
; AVX512F-NEXT: vmovq %rax, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vmovdqa %xmm0, (%r14)
; AVX512F-NEXT: addq $32, %rsp
; AVX512F-NEXT: popq %rbx
@ -5542,7 +5535,6 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
; AVX512VL-NEXT: orq %rbx, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]

View File

@ -4435,7 +4435,7 @@ define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) {
; SSE2-LABEL: load_sext_2i16_to_2i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm0
@ -4445,7 +4445,7 @@ define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) {
; SSSE3-LABEL: load_sext_2i16_to_2i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: psrad $16, %xmm0

View File

@ -106,8 +106,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X64-SSE2: # BB#0: # %entry
; X64-SSE2-NEXT: movzwl (%rsi), %eax
; X64-SSE2-NEXT: movd %rax, %xmm0
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
@ -132,8 +130,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X64-SSE42-NEXT: movzbl 2(%rsi), %eax
; X64-SSE42-NEXT: movzwl (%rsi), %ecx
; X64-SSE42-NEXT: movd %rcx, %xmm0
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE42-NEXT: pinsrd $2, %eax, %xmm0
; X64-SSE42-NEXT: pslld $24, %xmm0

View File

@ -131,8 +131,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X64-SSE2: # BB#0: # %entry
; X64-SSE2-NEXT: movzwl (%rsi), %eax
; X64-SSE2-NEXT: movd %rax, %xmm0
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
@ -157,8 +155,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X64-SSE42-NEXT: movzbl 2(%rsi), %eax
; X64-SSE42-NEXT: movzwl (%rsi), %ecx
; X64-SSE42-NEXT: movd %rcx, %xmm0
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE42-NEXT: pinsrd $2, %eax, %xmm0
; X64-SSE42-NEXT: pand {{.*}}(%rip), %xmm0