llvm/test/CodeGen/X86/reduce-trunc-shl.ll
Craig Topper 53eca87929 [X86] In LowerTRUNCATE, create an ISD::VECTOR_SHUFFLE instead of explicitly creating a PSHUFB. This will be lowered by regular shuffle lowering to a PSHUFB later.
Similar was already done for several other shuffles in this function.

The test changes are because the old code used explicity zeroing for elements that could have been undef.

While I was here I also changed other shuffle vectors in the same function to use the same input twice instead of creating UNDEF nodes. getVectorShuffle can create the UNDEF for us.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294130 91177308-0d34-0410-b5e6-96231b3b80d8
2017-02-05 18:33:14 +00:00

167 lines
4.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
; SSE2-LABEL: trunc_shl_7_v4i32_v4i64:
; SSE2: # BB#0:
; SSE2-NEXT: movaps (%rsi), %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
; SSE2-NEXT: pslld $7, %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_7_v4i32_v4i64:
; AVX2: # BB#0:
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = mem[0,2,2,3,4,6,6,7]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vpslld $7, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in
%shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7>
%trunc = trunc <4 x i64> %shl to <4 x i32>
store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out
ret void
}
define <8 x i16> @trunc_shl_v8i16_v8i32(<8 x i32> %a) {
; SSE2-LABEL: trunc_shl_v8i16_v8i32:
; SSE2: # BB#0:
; SSE2-NEXT: pslld $17, %xmm0
; SSE2-NEXT: pslld $17, %xmm1
; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_v8i16_v8i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpslld $17, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
%conv = trunc <8 x i32> %shl to <8 x i16>
ret <8 x i16> %conv
}
define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_31_i32_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movl (%rsi), %eax
; SSE2-NEXT: shll $31, %eax
; SSE2-NEXT: movl %eax, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_31_i32_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movl (%rsi), %eax
; AVX2-NEXT: shll $31, %eax
; AVX2-NEXT: movl %eax, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 31
%trunc = trunc i64 %shl to i32
store i32 %trunc, i32* %out
ret void
}
define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_32_i32_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movl $0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_32_i32_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movl $0, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 32
%trunc = trunc i64 %shl to i32
store i32 %trunc, i32* %out
ret void
}
define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_15_i16_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movzwl (%rsi), %eax
; SSE2-NEXT: shlw $15, %ax
; SSE2-NEXT: movw %ax, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_15_i16_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movzwl (%rsi), %eax
; AVX2-NEXT: shlw $15, %ax
; AVX2-NEXT: movw %ax, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 15
%trunc = trunc i64 %shl to i16
store i16 %trunc, i16* %out
ret void
}
define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_16_i16_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movw $0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_16_i16_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movw $0, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 16
%trunc = trunc i64 %shl to i16
store i16 %trunc, i16* %out
ret void
}
define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_7_i8_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movb (%rsi), %al
; SSE2-NEXT: shlb $7, %al
; SSE2-NEXT: movb %al, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_7_i8_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movb (%rsi), %al
; AVX2-NEXT: shlb $7, %al
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 7
%trunc = trunc i64 %shl to i8
store i8 %trunc, i8* %out
ret void
}
define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_8_i8_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movb $0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_8_i8_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movb $0, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 8
%trunc = trunc i64 %shl to i8
store i8 %trunc, i8* %out
ret void
}