mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-06 11:19:09 +00:00
f6c2838f36
possible before resorting to pextrw and pinsrw. - Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles. - Improves (i16 extract_vector_element 0) codegen by recognizing (i32 extract_vector_element 0) does not require a pextrw. llvm-svn: 44836
29 lines
1.2 KiB
LLVM
29 lines
1.2 KiB
LLVM
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
|
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
|
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 4
|
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 6
|
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 3
|
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 2
|
|
|
|
define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) {
|
|
%tmp1 = load <8 x i16>* %A
|
|
%tmp2 = load <8 x i16>* %B
|
|
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
|
|
ret <8 x i16> %tmp3
|
|
}
|
|
|
|
define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
|
|
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
|
|
ret <8 x i16> %tmp
|
|
}
|
|
|
|
define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
|
|
%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
|
|
ret <8 x i16> %tmp
|
|
}
|
|
|
|
define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) {
|
|
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
|
|
ret <8 x i16> %tmp
|
|
}
|