diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 907d8d9da1a..57cc398f406 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -36,7 +36,7 @@ using namespace llvm; /// - the promotion of vector elements. This feature is disabled by default /// and only enabled using this flag. static cl::opt -AllowPromoteIntElem("promote-elements", cl::Hidden, +AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), cl::desc("Allow promotion of integer vector element types")); namespace llvm { diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index 34acd1678ae..5c3c0fca10d 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -150,9 +150,6 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind { ; vrev <4 x i16> should use VREV32 and not VREV64 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { -; CHECK: test_vrev64: -; CHECK: vext.16 -; CHECK: vrev32.16 entry: %0 = bitcast <4 x i16>* %source to <8 x i16>* %tmp2 = load <8 x i16>* %0, align 4 diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 8ecf15432d5..f4aad44ed65 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -march=cellspu > %t1.s ; RUN: grep {shlh } %t1.s | count 10 ; RUN: grep {shlhi } %t1.s | count 3 -; RUN: grep {shl } %t1.s | count 11 +; RUN: grep {shl } %t1.s | count 10 ; RUN: grep {shli } %t1.s | count 3 ; RUN: grep {xshw } %t1.s | count 5 -; RUN: grep {and } %t1.s | count 14 -; RUN: grep {andi } %t1.s | count 2 -; RUN: grep {rotmi } %t1.s | count 2 +; RUN: grep {and } %t1.s | count 15 +; RUN: grep {andi } %t1.s | count 4 +; RUN: grep {rotmi } %t1.s | count 4 ; RUN: grep {rotqmbyi } %t1.s | count 1 ; RUN: grep {rotqmbii } %t1.s | count 2 ; RUN: grep {rotqmby } %t1.s | count 1 diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll index c88a258c26c..973586bf6cf 100644 --- a/test/CodeGen/CellSPU/shuffles.ll +++ b/test/CodeGen/CellSPU/shuffles.ll @@ -1,12 +1,14 @@ ; RUN: llc -O1 --march=cellspu < %s | FileCheck %s +;CHECK: shuffle define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) { ; CHECK: cwd {{\$.}}, 0($sp) ; CHECK: shufb {{\$., \$4, \$3, \$.}} %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> ret <4 x float> %val } - + +;CHECK: splat define <4 x float> @splat(float %param1) { ; CHECK: lqa ; CHECK: shufb $3 @@ -16,6 +18,7 @@ define <4 x float> @splat(float %param1) { ret <4 x float> %val } +;CHECK: test_insert define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) { %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0 ;CHECK: lqa $6, @@ -31,6 +34,7 @@ define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) { ret void } +;CHECK: test_insert_1 define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) { ;CHECK: cwd $5, 4($sp) ;CHECK: shufb $3, $4, $3, $5 @@ -39,6 +43,7 @@ define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) { ret <4 x float> %rv } +;CHECK: test_v2i32 define <2 x i32> @test_v2i32(<4 x i32>%vec) { ;CHECK: rotqbyi $3, $3, 4 @@ -49,17 +54,14 @@ define <2 x i32> @test_v2i32(<4 x i32>%vec) define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec) { -;CHECK: rotqbyi $3, $3, 8 -;CHECK: bi $lr %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %rv } +;CHECK: test_v4i32_rot4 define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec) { -;CHECK: rotqbyi $3, $3, 4 -;CHECK: bi $lr %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %rv diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll index 71d4aba6333..9c5b89613df 100644 --- a/test/CodeGen/CellSPU/v2i32.ll +++ b/test/CodeGen/CellSPU/v2i32.ll @@ -9,7 +9,8 @@ define %vec @test_ret(%vec %param) define %vec @test_add(%vec %param) { -;CHECK: a {{\$.}}, $3, $3 +;CHECK: shufb +;CHECK: addx %1 = add %vec %param, %param ;CHECK: bi $lr ret %vec %1 @@ -17,21 +18,14 @@ define %vec @test_add(%vec %param) define %vec @test_sub(%vec %param) { -;CHECK: sf {{\$.}}, $4, $3 %1 = sub %vec %param, - ;CHECK: bi $lr ret %vec %1 } define %vec @test_mul(%vec %param) { -;CHECK: mpyu -;CHECK: mpyh -;CHECK: a {{\$., \$., \$.}} -;CHECK: a {{\$., \$., \$.}} %1 = mul %vec %param, %param - ;CHECK: bi $lr ret %vec %1 } @@ -56,22 +50,12 @@ define i32 @test_extract() { define void @test_store( %vec %val, %vec* %ptr) { -;CHECK: stqd $3, 0(${{.}}) -;CHECK: bi $lr store %vec %val, %vec* %ptr ret void } -;Alignment of <2 x i32> is not *directly* defined in the ABI -;It probably is safe to interpret it as an array, thus having 8 byte -;alignment (according to ABI). This tests that the size of -;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the -;two arrays define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr) { -; CHECK-NOT: ai $3, $3, 16 -; CHECK: ai $3, $3, 8 -; CHECK: bi $lr %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1 ret <2 x i32>* %rv } diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll index 2dc1deaf173..757f1ff6825 100644 --- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll +++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd -; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd +; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd +; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1 ; originally from PR2687, but things don't work that way any more. diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll index 5c514805e48..5f5d5cccf71 100644 --- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll +++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1 ; RUN: grep movzwl %t1 | count 2 -; RUN: grep movzbl %t1 | count 2 +; RUN: grep movzbl %t1 | count 1 ; RUN: grep movd %t1 | count 4 define <4 x i16> @a(i32* %x1) nounwind { diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll index 69787c78cfd..5372bc52278 100644 --- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll +++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll @@ -1,32 +1,35 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s ; There are no MMX operations here, so we use XMM or i64. +; CHECK: ti8 define void @ti8(double %a, double %b) nounwind { entry: %tmp1 = bitcast double %a to <8 x i8> %tmp2 = bitcast double %b to <8 x i8> %tmp3 = add <8 x i8> %tmp1, %tmp2 -; CHECK: paddb %xmm1, %xmm0 +; CHECK: paddw store <8 x i8> %tmp3, <8 x i8>* null ret void } +; CHECK: ti16 define void @ti16(double %a, double %b) nounwind { entry: %tmp1 = bitcast double %a to <4 x i16> %tmp2 = bitcast double %b to <4 x i16> %tmp3 = add <4 x i16> %tmp1, %tmp2 -; CHECK: paddw %xmm1, %xmm0 +; CHECK: paddd store <4 x i16> %tmp3, <4 x i16>* null ret void } +; CHECK: ti32 define void @ti32(double %a, double %b) nounwind { entry: %tmp1 = bitcast double %a to <2 x i32> %tmp2 = bitcast double %b to <2 x i32> %tmp3 = add <2 x i32> %tmp1, %tmp2 -; CHECK: paddd %xmm1, %xmm0 +; CHECK: paddq store <2 x i32> %tmp3, <2 x i32>* null ret void } @@ -55,6 +58,7 @@ entry: ret void } +; CHECK: ti16a define void @ti16a(double %a, double %b) nounwind { entry: %tmp1 = bitcast double %a to x86_mmx @@ -66,6 +70,7 @@ entry: ret void } +; CHECK: ti32a define void @ti32a(double %a, double %b) nounwind { entry: %tmp1 = bitcast double %a to x86_mmx @@ -77,6 +82,7 @@ entry: ret void } +; CHECK: ti64a define void @ti64a(double %a, double %b) nounwind { entry: %tmp1 = bitcast double %a to x86_mmx diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll index 6062b505a56..cc31a119bd6 100644 --- a/test/CodeGen/X86/mmx-pinsrw.ll +++ b/test/CodeGen/X86/mmx-pinsrw.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1 +; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsr ; PR2562 external global i16 ; :0 [#uses=1] diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll index a7ce7d93920..6ceffed12ac 100644 --- a/test/CodeGen/X86/mmx-vzmovl-2.ll +++ b/test/CodeGen/X86/mmx-vzmovl-2.ll @@ -1,6 +1,5 @@ -; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor -; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq - +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor | count 1 +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpcklqdq | count 1 %struct.vS1024 = type { [8 x <4 x i32>] } %struct.vS512 = type { [4 x <4 x i32>] } diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll index adc58ac34b9..816f6deac7e 100644 --- a/test/CodeGen/X86/scalar_widen_div.ll +++ b/test/CodeGen/X86/scalar_widen_div.ll @@ -3,9 +3,10 @@ ; Verify when widening a divide/remainder operation, we only generate a ; divide/rem per element since divide/remainder can trap. +; CHECK: vectorDiv define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { -; CHECK: idivl -; CHECK: idivl +; CHECK: idivq +; CHECK: idivq ; CHECK-NOT: idivl ; CHECK: ret entry: @@ -32,6 +33,7 @@ entry: ret void } +; CHECK: test_char_div define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { ; CHECK: idivb ; CHECK: idivb @@ -42,6 +44,7 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { ret <3 x i8> %div.r } +; CHECK: test_char_div define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { ; CHECK: divb ; CHECK: divb @@ -52,6 +55,7 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { ret <3 x i8> %div.r } +; CHECK: test_short_div define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { ; CHECK: idivw ; CHECK: idivw @@ -64,17 +68,19 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { ret <5 x i16> %div.r } +; CHECK: test_ushort_div define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { -; CHECK: divw -; CHECK: divw -; CHECK: divw -; CHECK: divw -; CHECK-NOT: divw +; CHECK: divl +; CHECK: divl +; CHECK: divl +; CHECK: divl +; CHECK-NOT: divl ; CHECK: ret %div.r = udiv <4 x i16> %num, %div ret <4 x i16> %div.r } +; CHECK: test_uint_div define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { ; CHECK: divl ; CHECK: divl @@ -85,6 +91,7 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { ret <3 x i32> %div.r } +; CHECK: test_long_div define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { ; CHECK: idivq ; CHECK: idivq @@ -95,6 +102,7 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { ret <3 x i64> %div.r } +; CHECK: test_ulong_div define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { ; CHECK: divq ; CHECK: divq @@ -105,18 +113,19 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { ret <3 x i64> %div.r } - +; CHECK: test_char_rem define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { -; CHECK: idivb -; CHECK: idivb -; CHECK: idivb -; CHECK: idivb -; CHECK-NOT: idivb +; CHECK: idivl +; CHECK: idivl +; CHECK: idivl +; CHECK: idivl +; CHECK-NOT: idivl ; CHECK: ret %rem.r = srem <4 x i8> %num, %rem ret <4 x i8> %rem.r } +; CHECK: test_short_rem define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { ; CHECK: idivw ; CHECK: idivw @@ -129,6 +138,7 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { ret <5 x i16> %rem.r } +; CHECK: test_uint_rem define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { ; CHECK: idivl ; CHECK: idivl @@ -141,6 +151,7 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { } +; CHECK: test_ulong_rem define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { ; CHECK: divq ; CHECK: divq @@ -153,6 +164,7 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { ret <5 x i64> %rem.r } +; CHECK: test_int_div define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { ; CHECK: idivl ; CHECK: idivl diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll index 950040a124a..e91a7347cca 100644 --- a/test/CodeGen/X86/vec_shuffle-37.ll +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -26,10 +26,10 @@ entry: define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline { entry: -; CHECK: movaps 32({{%rdi|%rcx}}), %xmm0 -; CHECK-NEXT: movaps 48({{%rdi|%rcx}}), %xmm1 -; CHECK-NEXT: movss %xmm1, %xmm0 -; CHECK-NEXT: movq %xmm0, ({{%rsi|%rdx}}) +; CHECK: movl 36({{%rdi|%rcx}}) +; CHECK-NEXT: movl 48({{%rdi|%rcx}}) +; CHECK: punpcklqdq +; CHECK: movq %xmm0, ({{%rsi|%rdx}}) %0 = bitcast <8 x i32>* %source to <4 x i32>* %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3 %tmp2 = load <4 x i32>* %arrayidx, align 16 diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll index 97dacfdf09e..f9944ce2cb7 100644 --- a/test/CodeGen/X86/vsplit-and.ll +++ b/test/CodeGen/X86/vsplit-and.ll @@ -2,7 +2,7 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly { -; CHECK: andb +; CHECK: pandn %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer %t1 = and <2 x i1> %cmp1, %cmp2 @@ -12,7 +12,7 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind reado } define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly { -; CHECK: andb +; CHECK-NOT: pandn %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer %t1 = and <3 x i1> %cmp1, %cmp2 diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll index 4b8016dc713..85367e85f4f 100644 --- a/test/CodeGen/X86/widen_arith-1.ll +++ b/test/CodeGen/X86/widen_arith-1.ll @@ -1,12 +1,10 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -; Widen a v3i8 to v16i8 to use a vector add - define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind { entry: ; CHECK-NOT: pextrw -; CHECK: paddb -; CHECK: pextrb +; CHECK: add + %dst.addr = alloca <3 x i8>* ; <<3 x i8>**> [#uses=2] %src.addr = alloca <3 x i8>* ; <<3 x i8>**> [#uses=2] %n.addr = alloca i32 ; [#uses=2] diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll index 03b3fea01f6..d35abc30817 100644 --- a/test/CodeGen/X86/widen_arith-2.ll +++ b/test/CodeGen/X86/widen_arith-2.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -; CHECK: paddb +; CHECK: padd ; CHECK: pand ; widen v8i8 to v16i8 (checks even power of 2 widening with add & and) diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll index 057492377a2..11d56f57864 100644 --- a/test/CodeGen/X86/widen_arith-3.ll +++ b/test/CodeGen/X86/widen_arith-3.ll @@ -1,7 +1,8 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s -; CHECK: paddw -; CHECK: pextrw -; CHECK: movd +; CHECK: incw +; CHECK: incl +; CHECK: incl +; CHECK: addl ; Widen a v3i16 to v8i16 to do a vector add diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll index 1eace9e024e..4330aae8ec8 100644 --- a/test/CodeGen/X86/widen_cast-1.ll +++ b/test/CodeGen/X86/widen_cast-1.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s -; CHECK: paddw +; CHECK: paddd ; CHECK: pextrd ; CHECK: movd diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll index 8e1adf58f86..5ea54267692 100644 --- a/test/CodeGen/X86/widen_cast-4.ll +++ b/test/CodeGen/X86/widen_cast-4.ll @@ -1,16 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -; CHECK: sarb -; CHECK: sarb -; CHECK: sarb -; CHECK: sarb -; CHECK: sarb -; CHECK: sarb -; CHECK: sarb -; CHECK: sarb - -; v8i8 that is widen to v16i8 then split -; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector. -; Unfortunately, we don't split the store so we don't get the code we want. +; CHECK: psraw +; CHECK: psraw define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { entry: diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll index f6810cda9e3..51f1c887b00 100644 --- a/test/CodeGen/X86/widen_conv-1.ll +++ b/test/CodeGen/X86/widen_conv-1.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -; CHECK: pshufd -; CHECK: paddd +; CHECK: paddq ; truncate v2i64 to v2i32 diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll index 80f3a492c49..affd796ffc3 100644 --- a/test/CodeGen/X86/widen_conv-4.ll +++ b/test/CodeGen/X86/widen_conv-4.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -; CHECK: cvtsi2ss +; CHECK-NOT: cvtsi2ss ; unsigned to float v7i16 to v7f32 diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll index c91627cd27a..0d21b49ad77 100644 --- a/test/CodeGen/X86/widen_load-0.ll +++ b/test/CodeGen/X86/widen_load-0.ll @@ -4,15 +4,15 @@ ; Both loads should happen before either store. -; CHECK: movl (%rdi), %[[R1:...]] -; CHECK: movl (%rsi), %[[R2:...]] -; CHECK: movl %[[R2]], (%rdi) -; CHECK: movl %[[R1]], (%rsi) +; CHECK: movd (%rsi), {{.*}} +; CHECK: movd (%rdi), {{.*}} +; CHECK: movd {{.*}}, (%rdi) +; CHECK: movd {{.*}}, (%rsi) -; WIN64: movl (%rcx), %[[R1:...]] -; WIN64: movl (%rdx), %[[R2:...]] -; WIN64: movl %[[R2]], (%rcx) -; WIN64: movl %[[R1]], (%rdx) +; WIN64: movd (%rdx), {{.*}} +; WIN64: movd (%rcx), {{.*}} +; WIN64: movd {{.*}}, (%rcx) +; WIN64: movd {{.*}}, (%rdx) define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind { entry: diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 642206316c6..71699b8361d 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -4,6 +4,7 @@ ; %i32vec3 = type <3 x i32> +; CHECK: add3i32 define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ; CHECK: movdqa ; CHECK: paddd @@ -16,6 +17,7 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ret void } +; CHECK: add3i32_2 define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ; CHECK: movq ; CHECK: pinsrd @@ -32,6 +34,7 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { } %i32vec7 = type <7 x i32> +; CHECK: add7i32 define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { ; CHECK: movdqa ; CHECK: movdqa @@ -47,6 +50,7 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { ret void } +; CHECK: add12i32 %i32vec12 = type <12 x i32> define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { ; CHECK: movdqa @@ -66,12 +70,14 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { } +; CHECK: add3i16 %i16vec3 = type <3 x i16> define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { -; CHECK: movdqa -; CHECK: paddw -; CHECK: movd -; CHECK: pextrw +; CHECK: add3i16 +; CHECK: addl +; CHECK: addl +; CHECK: addl +; CHECK: ret %a = load %i16vec3* %ap, align 16 %b = load %i16vec3* %bp, align 16 %x = add %i16vec3 %a, %b @@ -79,10 +85,11 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp ret void } +; CHECK: add4i16 %i16vec4 = type <4 x i16> define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind { -; CHECK: movdqa -; CHECK: paddw +; CHECK: add4i16 +; CHECK: paddd ; CHECK: movq %a = load %i16vec4* %ap, align 16 %b = load %i16vec4* %bp, align 16 @@ -91,6 +98,7 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp ret void } +; CHECK: add12i16 %i16vec12 = type <12 x i16> define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind { ; CHECK: movdqa @@ -106,6 +114,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* ret void } +; CHECK: add18i16 %i16vec18 = type <18 x i16> define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind { ; CHECK: movdqa @@ -125,12 +134,13 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* } +; CHECK: add3i8 %i8vec3 = type <3 x i8> define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { -; CHECK: movdqa -; CHECK: paddb -; CHECK: pextrb -; CHECK: movb +; CHECK: addb +; CHECK: addb +; CHECK: addb +; CHECK: ret %a = load %i8vec3* %ap, align 16 %b = load %i8vec3* %bp, align 16 %x = add %i8vec3 %a, %b @@ -138,6 +148,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no ret void } +; CHECK: add31i8: %i8vec31 = type <31 x i8> define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind { ; CHECK: movdqa @@ -147,6 +158,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp ; CHECK: movq ; CHECK: pextrb ; CHECK: pextrw +; CHECK: ret %a = load %i8vec31* %ap, align 16 %b = load %i8vec31* %bp, align 16 %x = add %i8vec31 %a, %b @@ -155,9 +167,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp } +; CHECK: rot %i8vec3pack = type { <3 x i8>, i8 } define %i8vec3pack @rot() nounwind { -; CHECK: shrb +; CHECK: shrl entry: %X = alloca %i8vec3pack, align 4 %rot = alloca %i8vec3pack, align 4 diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 8e951b77ca6..2df3b6a30cc 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -50,7 +50,7 @@ entry: ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone { ; CHECK: shuf4: -; CHECK: punpckldq +; CHECK-NOT: punpckldq %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> ret <8 x i8> %vshuf } diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll index fdf68f92a92..1cb07aa0824 100644 --- a/test/CodeGen/X86/x86-shifts.ll +++ b/test/CodeGen/X86/x86-shifts.ll @@ -124,7 +124,7 @@ entry: define <2 x i32> @shl2_other(<2 x i32> %A) nounwind { entry: ; CHECK: shl2_other -; CHECK-not: psllq +; CHECK: psllq %B = shl <2 x i32> %A, < i32 2, i32 2> %C = shl <2 x i32> %A, < i32 9, i32 9> %K = xor <2 x i32> %B, %C @@ -134,7 +134,7 @@ entry: define <2 x i32> @shr2_other(<2 x i32> %A) nounwind { entry: ; CHECK: shr2_other -; CHECK-NOT: psrlq +; CHECK: psrlq %B = lshr <2 x i32> %A, < i32 8, i32 8> %C = lshr <2 x i32> %A, < i32 1, i32 1> %K = xor <2 x i32> %B, %C