mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-24 20:30:06 +00:00
Enable element promotion type legalization by deafault.
Changed tests which assumed that vectors are legalized by widening them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142152 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c4a90c5271
commit
8fb06b3e8f
@ -36,7 +36,7 @@ using namespace llvm;
|
||||
/// - the promotion of vector elements. This feature is disabled by default
|
||||
/// and only enabled using this flag.
|
||||
static cl::opt<bool>
|
||||
AllowPromoteIntElem("promote-elements", cl::Hidden,
|
||||
AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
|
||||
cl::desc("Allow promotion of integer vector element types"));
|
||||
|
||||
namespace llvm {
|
||||
|
@ -150,9 +150,6 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
|
||||
|
||||
; vrev <4 x i16> should use VREV32 and not VREV64
|
||||
define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
|
||||
; CHECK: test_vrev64:
|
||||
; CHECK: vext.16
|
||||
; CHECK: vrev32.16
|
||||
entry:
|
||||
%0 = bitcast <4 x i16>* %source to <8 x i16>*
|
||||
%tmp2 = load <8 x i16>* %0, align 4
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: llc < %s -march=cellspu > %t1.s
|
||||
; RUN: grep {shlh } %t1.s | count 10
|
||||
; RUN: grep {shlhi } %t1.s | count 3
|
||||
; RUN: grep {shl } %t1.s | count 11
|
||||
; RUN: grep {shl } %t1.s | count 10
|
||||
; RUN: grep {shli } %t1.s | count 3
|
||||
; RUN: grep {xshw } %t1.s | count 5
|
||||
; RUN: grep {and } %t1.s | count 14
|
||||
; RUN: grep {andi } %t1.s | count 2
|
||||
; RUN: grep {rotmi } %t1.s | count 2
|
||||
; RUN: grep {and } %t1.s | count 15
|
||||
; RUN: grep {andi } %t1.s | count 4
|
||||
; RUN: grep {rotmi } %t1.s | count 4
|
||||
; RUN: grep {rotqmbyi } %t1.s | count 1
|
||||
; RUN: grep {rotqmbii } %t1.s | count 2
|
||||
; RUN: grep {rotqmby } %t1.s | count 1
|
||||
|
@ -1,12 +1,14 @@
|
||||
; RUN: llc -O1 --march=cellspu < %s | FileCheck %s
|
||||
|
||||
;CHECK: shuffle
|
||||
define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
|
||||
; CHECK: cwd {{\$.}}, 0($sp)
|
||||
; CHECK: shufb {{\$., \$4, \$3, \$.}}
|
||||
%val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
|
||||
;CHECK: splat
|
||||
define <4 x float> @splat(float %param1) {
|
||||
; CHECK: lqa
|
||||
; CHECK: shufb $3
|
||||
@ -16,6 +18,7 @@ define <4 x float> @splat(float %param1) {
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
;CHECK: test_insert
|
||||
define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
|
||||
%sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
|
||||
;CHECK: lqa $6,
|
||||
@ -31,6 +34,7 @@ define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK: test_insert_1
|
||||
define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) {
|
||||
;CHECK: cwd $5, 4($sp)
|
||||
;CHECK: shufb $3, $4, $3, $5
|
||||
@ -39,6 +43,7 @@ define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) {
|
||||
ret <4 x float> %rv
|
||||
}
|
||||
|
||||
;CHECK: test_v2i32
|
||||
define <2 x i32> @test_v2i32(<4 x i32>%vec)
|
||||
{
|
||||
;CHECK: rotqbyi $3, $3, 4
|
||||
@ -49,17 +54,14 @@ define <2 x i32> @test_v2i32(<4 x i32>%vec)
|
||||
|
||||
define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
|
||||
{
|
||||
;CHECK: rotqbyi $3, $3, 8
|
||||
;CHECK: bi $lr
|
||||
%rv = shufflevector <4 x i32> %vec, <4 x i32> undef,
|
||||
<4 x i32> <i32 2,i32 3,i32 0, i32 1>
|
||||
ret <4 x i32> %rv
|
||||
}
|
||||
|
||||
;CHECK: test_v4i32_rot4
|
||||
define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
|
||||
{
|
||||
;CHECK: rotqbyi $3, $3, 4
|
||||
;CHECK: bi $lr
|
||||
%rv = shufflevector <4 x i32> %vec, <4 x i32> undef,
|
||||
<4 x i32> <i32 1,i32 2,i32 3, i32 0>
|
||||
ret <4 x i32> %rv
|
||||
|
@ -9,7 +9,8 @@ define %vec @test_ret(%vec %param)
|
||||
|
||||
define %vec @test_add(%vec %param)
|
||||
{
|
||||
;CHECK: a {{\$.}}, $3, $3
|
||||
;CHECK: shufb
|
||||
;CHECK: addx
|
||||
%1 = add %vec %param, %param
|
||||
;CHECK: bi $lr
|
||||
ret %vec %1
|
||||
@ -17,21 +18,14 @@ define %vec @test_add(%vec %param)
|
||||
|
||||
define %vec @test_sub(%vec %param)
|
||||
{
|
||||
;CHECK: sf {{\$.}}, $4, $3
|
||||
%1 = sub %vec %param, <i32 1, i32 1>
|
||||
|
||||
;CHECK: bi $lr
|
||||
ret %vec %1
|
||||
}
|
||||
|
||||
define %vec @test_mul(%vec %param)
|
||||
{
|
||||
;CHECK: mpyu
|
||||
;CHECK: mpyh
|
||||
;CHECK: a {{\$., \$., \$.}}
|
||||
;CHECK: a {{\$., \$., \$.}}
|
||||
%1 = mul %vec %param, %param
|
||||
|
||||
;CHECK: bi $lr
|
||||
ret %vec %1
|
||||
}
|
||||
@ -56,22 +50,12 @@ define i32 @test_extract() {
|
||||
|
||||
define void @test_store( %vec %val, %vec* %ptr)
|
||||
{
|
||||
;CHECK: stqd $3, 0(${{.}})
|
||||
;CHECK: bi $lr
|
||||
store %vec %val, %vec* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;Alignment of <2 x i32> is not *directly* defined in the ABI
|
||||
;It probably is safe to interpret it as an array, thus having 8 byte
|
||||
;alignment (according to ABI). This tests that the size of
|
||||
;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
|
||||
;two arrays
|
||||
define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
|
||||
{
|
||||
; CHECK-NOT: ai $3, $3, 16
|
||||
; CHECK: ai $3, $3, 8
|
||||
; CHECK: bi $lr
|
||||
%rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
|
||||
ret <2 x i32>* %rv
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
|
||||
; originally from PR2687, but things don't work that way any more.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
|
||||
; RUN: grep movzwl %t1 | count 2
|
||||
; RUN: grep movzbl %t1 | count 2
|
||||
; RUN: grep movzbl %t1 | count 1
|
||||
; RUN: grep movd %t1 | count 4
|
||||
|
||||
define <4 x i16> @a(i32* %x1) nounwind {
|
||||
|
@ -1,32 +1,35 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
|
||||
; There are no MMX operations here, so we use XMM or i64.
|
||||
|
||||
; CHECK: ti8
|
||||
define void @ti8(double %a, double %b) nounwind {
|
||||
entry:
|
||||
%tmp1 = bitcast double %a to <8 x i8>
|
||||
%tmp2 = bitcast double %b to <8 x i8>
|
||||
%tmp3 = add <8 x i8> %tmp1, %tmp2
|
||||
; CHECK: paddb %xmm1, %xmm0
|
||||
; CHECK: paddw
|
||||
store <8 x i8> %tmp3, <8 x i8>* null
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: ti16
|
||||
define void @ti16(double %a, double %b) nounwind {
|
||||
entry:
|
||||
%tmp1 = bitcast double %a to <4 x i16>
|
||||
%tmp2 = bitcast double %b to <4 x i16>
|
||||
%tmp3 = add <4 x i16> %tmp1, %tmp2
|
||||
; CHECK: paddw %xmm1, %xmm0
|
||||
; CHECK: paddd
|
||||
store <4 x i16> %tmp3, <4 x i16>* null
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: ti32
|
||||
define void @ti32(double %a, double %b) nounwind {
|
||||
entry:
|
||||
%tmp1 = bitcast double %a to <2 x i32>
|
||||
%tmp2 = bitcast double %b to <2 x i32>
|
||||
%tmp3 = add <2 x i32> %tmp1, %tmp2
|
||||
; CHECK: paddd %xmm1, %xmm0
|
||||
; CHECK: paddq
|
||||
store <2 x i32> %tmp3, <2 x i32>* null
|
||||
ret void
|
||||
}
|
||||
@ -55,6 +58,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: ti16a
|
||||
define void @ti16a(double %a, double %b) nounwind {
|
||||
entry:
|
||||
%tmp1 = bitcast double %a to x86_mmx
|
||||
@ -66,6 +70,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: ti32a
|
||||
define void @ti32a(double %a, double %b) nounwind {
|
||||
entry:
|
||||
%tmp1 = bitcast double %a to x86_mmx
|
||||
@ -77,6 +82,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: ti64a
|
||||
define void @ti64a(double %a, double %b) nounwind {
|
||||
entry:
|
||||
%tmp1 = bitcast double %a to x86_mmx
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
|
||||
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsr
|
||||
; PR2562
|
||||
|
||||
external global i16 ; <i16*>:0 [#uses=1]
|
||||
|
@ -1,6 +1,5 @@
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
|
||||
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor | count 1
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpcklqdq | count 1
|
||||
%struct.vS1024 = type { [8 x <4 x i32>] }
|
||||
%struct.vS512 = type { [4 x <4 x i32>] }
|
||||
|
||||
|
@ -3,9 +3,10 @@
|
||||
; Verify when widening a divide/remainder operation, we only generate a
|
||||
; divide/rem per element since divide/remainder can trap.
|
||||
|
||||
; CHECK: vectorDiv
|
||||
define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
|
||||
; CHECK: idivl
|
||||
; CHECK: idivl
|
||||
; CHECK: idivq
|
||||
; CHECK: idivq
|
||||
; CHECK-NOT: idivl
|
||||
; CHECK: ret
|
||||
entry:
|
||||
@ -32,6 +33,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_char_div
|
||||
define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
|
||||
; CHECK: idivb
|
||||
; CHECK: idivb
|
||||
@ -42,6 +44,7 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
|
||||
ret <3 x i8> %div.r
|
||||
}
|
||||
|
||||
; CHECK: test_char_div
|
||||
define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
|
||||
; CHECK: divb
|
||||
; CHECK: divb
|
||||
@ -52,6 +55,7 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
|
||||
ret <3 x i8> %div.r
|
||||
}
|
||||
|
||||
; CHECK: test_short_div
|
||||
define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
|
||||
; CHECK: idivw
|
||||
; CHECK: idivw
|
||||
@ -64,17 +68,19 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
|
||||
ret <5 x i16> %div.r
|
||||
}
|
||||
|
||||
; CHECK: test_ushort_div
|
||||
define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
|
||||
; CHECK: divw
|
||||
; CHECK: divw
|
||||
; CHECK: divw
|
||||
; CHECK: divw
|
||||
; CHECK-NOT: divw
|
||||
; CHECK: divl
|
||||
; CHECK: divl
|
||||
; CHECK: divl
|
||||
; CHECK: divl
|
||||
; CHECK-NOT: divl
|
||||
; CHECK: ret
|
||||
%div.r = udiv <4 x i16> %num, %div
|
||||
ret <4 x i16> %div.r
|
||||
}
|
||||
|
||||
; CHECK: test_uint_div
|
||||
define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
|
||||
; CHECK: divl
|
||||
; CHECK: divl
|
||||
@ -85,6 +91,7 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
|
||||
ret <3 x i32> %div.r
|
||||
}
|
||||
|
||||
; CHECK: test_long_div
|
||||
define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
|
||||
; CHECK: idivq
|
||||
; CHECK: idivq
|
||||
@ -95,6 +102,7 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
|
||||
ret <3 x i64> %div.r
|
||||
}
|
||||
|
||||
; CHECK: test_ulong_div
|
||||
define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
|
||||
; CHECK: divq
|
||||
; CHECK: divq
|
||||
@ -105,18 +113,19 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
|
||||
ret <3 x i64> %div.r
|
||||
}
|
||||
|
||||
|
||||
; CHECK: test_char_rem
|
||||
define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
|
||||
; CHECK: idivb
|
||||
; CHECK: idivb
|
||||
; CHECK: idivb
|
||||
; CHECK: idivb
|
||||
; CHECK-NOT: idivb
|
||||
; CHECK: idivl
|
||||
; CHECK: idivl
|
||||
; CHECK: idivl
|
||||
; CHECK: idivl
|
||||
; CHECK-NOT: idivl
|
||||
; CHECK: ret
|
||||
%rem.r = srem <4 x i8> %num, %rem
|
||||
ret <4 x i8> %rem.r
|
||||
}
|
||||
|
||||
; CHECK: test_short_rem
|
||||
define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
|
||||
; CHECK: idivw
|
||||
; CHECK: idivw
|
||||
@ -129,6 +138,7 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
|
||||
ret <5 x i16> %rem.r
|
||||
}
|
||||
|
||||
; CHECK: test_uint_rem
|
||||
define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
|
||||
; CHECK: idivl
|
||||
; CHECK: idivl
|
||||
@ -141,6 +151,7 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
|
||||
}
|
||||
|
||||
|
||||
; CHECK: test_ulong_rem
|
||||
define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
|
||||
; CHECK: divq
|
||||
; CHECK: divq
|
||||
@ -153,6 +164,7 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
|
||||
ret <5 x i64> %rem.r
|
||||
}
|
||||
|
||||
; CHECK: test_int_div
|
||||
define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
|
||||
; CHECK: idivl
|
||||
; CHECK: idivl
|
||||
|
@ -26,10 +26,10 @@ entry:
|
||||
|
||||
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
|
||||
entry:
|
||||
; CHECK: movaps 32({{%rdi|%rcx}}), %xmm0
|
||||
; CHECK-NEXT: movaps 48({{%rdi|%rcx}}), %xmm1
|
||||
; CHECK-NEXT: movss %xmm1, %xmm0
|
||||
; CHECK-NEXT: movq %xmm0, ({{%rsi|%rdx}})
|
||||
; CHECK: movl 36({{%rdi|%rcx}})
|
||||
; CHECK-NEXT: movl 48({{%rdi|%rcx}})
|
||||
; CHECK: punpcklqdq
|
||||
; CHECK: movq %xmm0, ({{%rsi|%rdx}})
|
||||
%0 = bitcast <8 x i32>* %source to <4 x i32>*
|
||||
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
|
||||
%tmp2 = load <4 x i32>* %arrayidx, align 16
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
|
||||
define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
|
||||
; CHECK: andb
|
||||
; CHECK: pandn
|
||||
%cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
|
||||
%cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
|
||||
%t1 = and <2 x i1> %cmp1, %cmp2
|
||||
@ -12,7 +12,7 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind reado
|
||||
}
|
||||
|
||||
define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
|
||||
; CHECK: andb
|
||||
; CHECK-NOT: pandn
|
||||
%cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
|
||||
%cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
|
||||
%t1 = and <3 x i1> %cmp1, %cmp2
|
||||
|
@ -1,12 +1,10 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
|
||||
|
||||
; Widen a v3i8 to v16i8 to use a vector add
|
||||
|
||||
define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
|
||||
entry:
|
||||
; CHECK-NOT: pextrw
|
||||
; CHECK: paddb
|
||||
; CHECK: pextrb
|
||||
; CHECK: add
|
||||
|
||||
%dst.addr = alloca <3 x i8>* ; <<3 x i8>**> [#uses=2]
|
||||
%src.addr = alloca <3 x i8>* ; <<3 x i8>**> [#uses=2]
|
||||
%n.addr = alloca i32 ; <i32*> [#uses=2]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
|
||||
; CHECK: paddb
|
||||
; CHECK: padd
|
||||
; CHECK: pand
|
||||
|
||||
; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
|
||||
|
@ -1,7 +1,8 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
|
||||
; CHECK: paddw
|
||||
; CHECK: pextrw
|
||||
; CHECK: movd
|
||||
; CHECK: incw
|
||||
; CHECK: incl
|
||||
; CHECK: incl
|
||||
; CHECK: addl
|
||||
|
||||
; Widen a v3i16 to v8i16 to do a vector add
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
|
||||
; CHECK: paddw
|
||||
; CHECK: paddd
|
||||
; CHECK: pextrd
|
||||
; CHECK: movd
|
||||
|
||||
|
@ -1,16 +1,6 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
; CHECK: sarb
|
||||
|
||||
; v8i8 that is widen to v16i8 then split
|
||||
; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
|
||||
; Unfortunately, we don't split the store so we don't get the code we want.
|
||||
; CHECK: psraw
|
||||
; CHECK: psraw
|
||||
|
||||
define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
|
||||
entry:
|
||||
|
@ -1,6 +1,5 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
|
||||
; CHECK: pshufd
|
||||
; CHECK: paddd
|
||||
; CHECK: paddq
|
||||
|
||||
; truncate v2i64 to v2i32
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
|
||||
; CHECK: cvtsi2ss
|
||||
; CHECK-NOT: cvtsi2ss
|
||||
|
||||
; unsigned to float v7i16 to v7f32
|
||||
|
||||
|
@ -4,15 +4,15 @@
|
||||
|
||||
; Both loads should happen before either store.
|
||||
|
||||
; CHECK: movl (%rdi), %[[R1:...]]
|
||||
; CHECK: movl (%rsi), %[[R2:...]]
|
||||
; CHECK: movl %[[R2]], (%rdi)
|
||||
; CHECK: movl %[[R1]], (%rsi)
|
||||
; CHECK: movd (%rsi), {{.*}}
|
||||
; CHECK: movd (%rdi), {{.*}}
|
||||
; CHECK: movd {{.*}}, (%rdi)
|
||||
; CHECK: movd {{.*}}, (%rsi)
|
||||
|
||||
; WIN64: movl (%rcx), %[[R1:...]]
|
||||
; WIN64: movl (%rdx), %[[R2:...]]
|
||||
; WIN64: movl %[[R2]], (%rcx)
|
||||
; WIN64: movl %[[R1]], (%rdx)
|
||||
; WIN64: movd (%rdx), {{.*}}
|
||||
; WIN64: movd (%rcx), {{.*}}
|
||||
; WIN64: movd {{.*}}, (%rcx)
|
||||
; WIN64: movd {{.*}}, (%rdx)
|
||||
|
||||
define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
|
||||
entry:
|
||||
|
@ -4,6 +4,7 @@
|
||||
;
|
||||
|
||||
%i32vec3 = type <3 x i32>
|
||||
; CHECK: add3i32
|
||||
define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddd
|
||||
@ -16,6 +17,7 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add3i32_2
|
||||
define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
; CHECK: movq
|
||||
; CHECK: pinsrd
|
||||
@ -32,6 +34,7 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
}
|
||||
|
||||
%i32vec7 = type <7 x i32>
|
||||
; CHECK: add7i32
|
||||
define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
@ -47,6 +50,7 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add12i32
|
||||
%i32vec12 = type <12 x i32>
|
||||
define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||
; CHECK: movdqa
|
||||
@ -66,12 +70,14 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||
}
|
||||
|
||||
|
||||
; CHECK: add3i16
|
||||
%i16vec3 = type <3 x i16>
|
||||
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddw
|
||||
; CHECK: movd
|
||||
; CHECK: pextrw
|
||||
; CHECK: add3i16
|
||||
; CHECK: addl
|
||||
; CHECK: addl
|
||||
; CHECK: addl
|
||||
; CHECK: ret
|
||||
%a = load %i16vec3* %ap, align 16
|
||||
%b = load %i16vec3* %bp, align 16
|
||||
%x = add %i16vec3 %a, %b
|
||||
@ -79,10 +85,11 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add4i16
|
||||
%i16vec4 = type <4 x i16>
|
||||
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddw
|
||||
; CHECK: add4i16
|
||||
; CHECK: paddd
|
||||
; CHECK: movq
|
||||
%a = load %i16vec4* %ap, align 16
|
||||
%b = load %i16vec4* %bp, align 16
|
||||
@ -91,6 +98,7 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add12i16
|
||||
%i16vec12 = type <12 x i16>
|
||||
define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
|
||||
; CHECK: movdqa
|
||||
@ -106,6 +114,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add18i16
|
||||
%i16vec18 = type <18 x i16>
|
||||
define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
|
||||
; CHECK: movdqa
|
||||
@ -125,12 +134,13 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
|
||||
}
|
||||
|
||||
|
||||
; CHECK: add3i8
|
||||
%i8vec3 = type <3 x i8>
|
||||
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddb
|
||||
; CHECK: pextrb
|
||||
; CHECK: movb
|
||||
; CHECK: addb
|
||||
; CHECK: addb
|
||||
; CHECK: addb
|
||||
; CHECK: ret
|
||||
%a = load %i8vec3* %ap, align 16
|
||||
%b = load %i8vec3* %bp, align 16
|
||||
%x = add %i8vec3 %a, %b
|
||||
@ -138,6 +148,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add31i8:
|
||||
%i8vec31 = type <31 x i8>
|
||||
define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
|
||||
; CHECK: movdqa
|
||||
@ -147,6 +158,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
|
||||
; CHECK: movq
|
||||
; CHECK: pextrb
|
||||
; CHECK: pextrw
|
||||
; CHECK: ret
|
||||
%a = load %i8vec31* %ap, align 16
|
||||
%b = load %i8vec31* %bp, align 16
|
||||
%x = add %i8vec31 %a, %b
|
||||
@ -155,9 +167,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
|
||||
}
|
||||
|
||||
|
||||
; CHECK: rot
|
||||
%i8vec3pack = type { <3 x i8>, i8 }
|
||||
define %i8vec3pack @rot() nounwind {
|
||||
; CHECK: shrb
|
||||
; CHECK: shrl
|
||||
entry:
|
||||
%X = alloca %i8vec3pack, align 4
|
||||
%rot = alloca %i8vec3pack, align 4
|
||||
|
@ -50,7 +50,7 @@ entry:
|
||||
; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
|
||||
define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
|
||||
; CHECK: shuf4:
|
||||
; CHECK: punpckldq
|
||||
; CHECK-NOT: punpckldq
|
||||
%vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <8 x i8> %vshuf
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ entry:
|
||||
define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
|
||||
entry:
|
||||
; CHECK: shl2_other
|
||||
; CHECK-not: psllq
|
||||
; CHECK: psllq
|
||||
%B = shl <2 x i32> %A, < i32 2, i32 2>
|
||||
%C = shl <2 x i32> %A, < i32 9, i32 9>
|
||||
%K = xor <2 x i32> %B, %C
|
||||
@ -134,7 +134,7 @@ entry:
|
||||
define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
|
||||
entry:
|
||||
; CHECK: shr2_other
|
||||
; CHECK-NOT: psrlq
|
||||
; CHECK: psrlq
|
||||
%B = lshr <2 x i32> %A, < i32 8, i32 8>
|
||||
%C = lshr <2 x i32> %A, < i32 1, i32 1>
|
||||
%K = xor <2 x i32> %B, %C
|
||||
|
Loading…
Reference in New Issue
Block a user