mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-12 23:40:54 +00:00
cdfe078a42
When lowering two blended PACKUS, we used to disregard the types of the PACKUS inputs, indiscriminately generating a v16i8 PACKUS. This leads to non-selectable things like: (v16i8 (PACKUS (v4i32 v0), (v4i32 v1))) Instead, check that the PACKUSes have the same type, and use that as the final result type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274138 91177308-0d34-0410-b5e6-96231b3b80d8
60 lines
2.6 KiB
LLVM
60 lines
2.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
|
|
|
|
define <8 x i16> @blend_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
|
|
; SSE41-LABEL: blend_packusdw:
|
|
; SSE41: # BB#0:
|
|
; SSE41-NEXT: packusdw %xmm2, %xmm0
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX-LABEL: blend_packusdw:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
|
|
%p1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3)
|
|
%s0 = shufflevector <8 x i16> %p0, <8 x i16> %p1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
|
ret <8 x i16> %s0
|
|
}
|
|
|
|
define <16 x i8> @blend_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
|
|
; SSE41-LABEL: blend_packuswb:
|
|
; SSE41: # BB#0:
|
|
; SSE41-NEXT: packuswb %xmm2, %xmm0
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX-LABEL: blend_packuswb:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%p0 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
%p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
|
|
%s0 = shufflevector <16 x i8> %p0, <16 x i8> %p1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
|
|
ret <16 x i8> %s0
|
|
}
|
|
|
|
define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) {
|
|
; SSE41-LABEL: blend_packusdw_packuswb:
|
|
; SSE41: # BB#0:
|
|
; SSE41-NEXT: packusdw %xmm1, %xmm0
|
|
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
|
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX-LABEL: blend_packusdw_packuswb:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vpackuswb %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
; AVX-NEXT: retq
|
|
%p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
|
|
%p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
|
|
%b1 = bitcast <16 x i8> %p1 to <8 x i16>
|
|
%s0 = shufflevector <8 x i16> %p0, <8 x i16> %b1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
|
ret <8 x i16> %s0
|
|
}
|
|
|
|
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
|
|
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
|