[X86][MMX] Cleanup shuffle, bitcast and insert element tests

- Merge MMX arg passing test files
- Merge MMX bitcast, insert elt and shuffle tests

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227867 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2015-02-02 21:56:11 +00:00
parent 8ea8f377aa
commit 12c944ba10
16 changed files with 228 additions and 175 deletions

View File

@ -1,19 +0,0 @@
; RUN: llc < %s -march=x86-64 -mattr=+mmx | FileCheck %s
; CHECK: paddusw
@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
define void @foo(<1 x i64> %A, <1 x i64> %B) {
entry:
%tmp2 = bitcast <1 x i64> %A to x86_mmx
%tmp3 = bitcast <1 x i64> %B to x86_mmx
%tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=1]
store x86_mmx %tmp7, x86_mmx* @R
tail call void @llvm.x86.mmx.emms( )
ret void
}
declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()

View File

@ -1,12 +0,0 @@
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
; PR2850
@tmp_V2i = common global <2 x i32> zeroinitializer ; <<2 x i32>*> [#uses=2]
define void @f0() nounwind {
entry:
%0 = load <2 x i32>* @tmp_V2i, align 8 ; <<2 x i32>> [#uses=1]
%1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer ; <<2 x i32>> [#uses=1]
store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
ret void
}

View File

@ -1,9 +0,0 @@
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | FileCheck %s
; CHECK-NOT: movl
define <8 x i8> @a(i8 zeroext %x) nounwind {
%r = insertelement <8 x i8> undef, i8 %x, i32 0
ret <8 x i8> %r
}

View File

@ -1,10 +0,0 @@
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -mattr=+mmx | grep movd | count 2
define i64 @a(i32 %a, i32 %b) nounwind readnone {
entry:
%0 = insertelement <2 x i32> undef, i32 %a, i32 0 ; <<2 x i32>> [#uses=1]
%1 = insertelement <2 x i32> %0, i32 %b, i32 1 ; <<2 x i32>> [#uses=1]
%conv = bitcast <2 x i32> %1 to i64 ; <i64> [#uses=1]
ret i64 %conv
}

View File

@ -1,12 +0,0 @@
; RUN: llc < %s -march=x86-64
; PR4669
declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
define <1 x i64> @test(i64 %t) {
entry:
%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
%t0 = bitcast <1 x i64> %t1 to x86_mmx
%t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
%t3 = bitcast x86_mmx %t2 to <1 x i64>
ret <1 x i64> %t3
}

View File

@ -36,3 +36,38 @@ define void @t2(<1 x i64> %v1) nounwind {
; X86-64: movq %rdi
}
@g_v8qi = external global <8 x i8>
define void @t3() nounwind {
; X86-64-LABEL: t3:
; X86-64-NOT: movdq2q
; X86-64: punpcklbw
%tmp3 = load <8 x i8>* @g_v8qi, align 8
%tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind {
; X86-64-LABEL: t4:
; X86-64: movdq2q
; X86-64: movdq2q
; X86-64-NOT: movdq2q
%v1a = bitcast x86_mmx %v1 to <8 x i8>
%v2b = bitcast x86_mmx %v2 to <8 x i8>
%tmp3 = add <8 x i8> %v1a, %v2b
%tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
define void @t5() nounwind {
; X86-64-LABEL: t5:
; X86-64-NOT: movdq2q
; X86-64: xorl %edi, %edi
call void @pass_v1di( <1 x i64> zeroinitializer )
ret void
}
declare i32 @pass_v8qi(...)
declare void @pass_v1di(<1 x i64>)

View File

@ -1,28 +0,0 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
; Since the add is not an MMX add, we don't have a movq2dq any more.
@g_v8qi = external global <8 x i8>
define void @t1() nounwind {
%tmp3 = load <8 x i8>* @g_v8qi, align 8
%tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind {
%v1a = bitcast x86_mmx %v1 to <8 x i8>
%v2b = bitcast x86_mmx %v2 to <8 x i8>
%tmp3 = add <8 x i8> %v1a, %v2b
%tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
define void @t3() nounwind {
call void @pass_v1di( <1 x i64> zeroinitializer )
ret void
}
declare i32 @pass_v8qi(...)
declare void @pass_v1di(<1 x i64>)

View File

@ -1,31 +0,0 @@
; RUN: llc < %s -march=x86-64 | grep movd | count 4
define i64 @foo(x86_mmx* %p) {
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
define i64 @goo(x86_mmx* %p) {
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
define i64 @hoo(x86_mmx* %p) {
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
define i64 @ioo(x86_mmx* %p) {
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)

View File

@ -0,0 +1,109 @@
; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck %s
define i64 @t0(x86_mmx* %p) {
; CHECK-LABEL: t0:
; CHECK: ## BB#0:
; CHECK-NEXT: movq
; CHECK-NEXT: paddq %mm0, %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: retq
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
define i64 @t1(x86_mmx* %p) {
; CHECK-LABEL: t1:
; CHECK: ## BB#0:
; CHECK-NEXT: movq
; CHECK-NEXT: paddd %mm0, %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: retq
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
define i64 @t2(x86_mmx* %p) {
; CHECK-LABEL: t2:
; CHECK: ## BB#0:
; CHECK-NEXT: movq
; CHECK-NEXT: paddw %mm0, %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: retq
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
define i64 @t3(x86_mmx* %p) {
; CHECK-LABEL: t3:
; CHECK: ## BB#0:
; CHECK-NEXT: movq
; CHECK-NEXT: paddb %mm0, %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: retq
%t = load x86_mmx* %p
%u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
%s = bitcast x86_mmx %u to i64
ret i64 %s
}
@R = external global x86_mmx
define void @t4(<1 x i64> %A, <1 x i64> %B) {
; CHECK-LABEL: t4:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: movd
; CHECK-NEXT: movd
; CHECK: retq
entry:
%tmp2 = bitcast <1 x i64> %A to x86_mmx
%tmp3 = bitcast <1 x i64> %B to x86_mmx
%tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %tmp2, x86_mmx %tmp3)
store x86_mmx %tmp7, x86_mmx* @R
tail call void @llvm.x86.mmx.emms()
ret void
}
define i64 @t5(i32 %a, i32 %b) nounwind readnone {
; CHECK-LABEL: t5:
; CHECK: ## BB#0:
; CHECK-NEXT: movd
; CHECK-NEXT: movd
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
; CHECK-NEXT: movd %xmm0, %rax
; CHECK-NEXT: retq
%v0 = insertelement <2 x i32> undef, i32 %a, i32 0
%v1 = insertelement <2 x i32> %v0, i32 %b, i32 1
%conv = bitcast <2 x i32> %v1 to i64
ret i64 %conv
}
declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
define <1 x i64> @t6(i64 %t) {
; CHECK-LABEL: t6:
; CHECK: ## BB#0:
; CHECK-NEXT: movd
; CHECK-NEXT: psllq $48, %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: retq
%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
%t0 = bitcast <1 x i64> %t1 to x86_mmx
%t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
%t3 = bitcast x86_mmx %t2 to <1 x i64>
ret <1 x i64> %t3
}
declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()

View File

@ -1,11 +0,0 @@
; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
define void @foo() {
entry:
call void @llvm.x86.mmx.emms( )
br label %return
return: ; preds = %entry
ret void
}
declare void @llvm.x86.mmx.emms()

View File

@ -1,9 +0,0 @@
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
; This is not an MMX operation; promoted to XMM.
define x86_mmx @qux(i32 %A) nounwind {
%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 ; <<2 x i32>> [#uses=1]
%tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
ret x86_mmx %tmp4
}

View File

@ -1347,3 +1347,12 @@ define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
}
declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
; CHECK-LABEL: test90
define void @test90() {
; CHECK: emms
call void @llvm.x86.mmx.emms()
ret void
}
declare void @llvm.x86.mmx.emms()

View File

@ -1,17 +0,0 @@
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
; PR2562
; CHECK: pinsr
external global i16 ; <i16*>:0 [#uses=1]
external global <4 x i16> ; <<4 x i16>*>:1 [#uses=2]
declare void @abort()
define void @""() {
load i16* @0 ; <i16>:1 [#uses=1]
load <4 x i16>* @1 ; <<4 x i16>>:2 [#uses=1]
insertelement <4 x i16> %2, i16 %1, i32 0 ; <<4 x i16>>:3 [#uses=1]
store <4 x i16> %3, <4 x i16>* @1
ret void
}

View File

@ -1,15 +0,0 @@
; RUN: llc < %s -march=x86 -mattr=+mmx
; PR2574
define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
br i1 true, label %bb.nph, label %._crit_edge
bb.nph: ; preds = %bb.nph, %0
%t2206f2.0 = phi <2 x float> [ %2, %bb.nph ], [ undef, %0 ] ; <<2 x float>> [#uses=1]
insertelement <2 x float> %t2206f2.0, float 0.000000e+00, i32 0 ; <<2 x float>>:1 [#uses=1]
insertelement <2 x float> %1, float 0.000000e+00, i32 1 ; <<2 x float>>:2 [#uses=1]
br label %bb.nph
._crit_edge: ; preds = %0
ret void
}

View File

@ -0,0 +1,58 @@
; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-32
; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s -check-prefix=X86-64
; This is not an MMX operation; promoted to XMM.
define x86_mmx @t0(i32 %A) nounwind {
; X86-32-LABEL: t0:
; X86-32: ## BB#0:
; X86-32: movd {{[0-9]+}}(%esp), %xmm0
; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
; X86-32-NEXT: movlpd %xmm0, (%esp)
; X86-32-NEXT: movq (%esp), %mm0
; X86-32-NEXT: addl $12, %esp
; X86-32-NEXT: retl
%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1
%tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
ret x86_mmx %tmp4
}
define <8 x i8> @t1(i8 zeroext %x) nounwind {
; X86-32-LABEL: t1:
; X86-32: ## BB#0:
; X86-32-NOT: movl
; X86-32-NEXT: movd {{[0-9]+}}(%esp), %xmm0
; X86-32-NEXT: retl
%r = insertelement <8 x i8> undef, i8 %x, i32 0
ret <8 x i8> %r
}
; PR2574
define <2 x float> @t2(<2 x float> %a0) {
; X86-32-LABEL: t2:
; X86-32: ## BB#0:
; X86-32-NEXT: xorps %xmm0, %xmm0
; X86-32-NEXT: retl
%v1 = insertelement <2 x float> %a0, float 0.000000e+00, i32 0
%v2 = insertelement <2 x float> %v1, float 0.000000e+00, i32 1
ret <2 x float> %v2
}
@g0 = external global i16
@g1 = external global <4 x i16>
; PR2562
define void @t3() {
; X86-64-LABEL: t3:
; X86-64: ## BB#0:
; X86-64: pmovzxwd (%rcx)
; X86-64-NEXT: movzwl
; X86-64-NEXT: pinsrd $0
; X86-64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X86-64-NEXT: movq %xmm0
; X86-64-NEXT: retq
load i16* @g0
load <4 x i16>* @g1
insertelement <4 x i16> %2, i16 %1, i32 0
store <4 x i16> %3, <4 x i16>* @g1
ret void
}

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | FileCheck -check-prefix=X32 %s
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck -check-prefix=X64 %s
; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck -check-prefix=X32 %s
; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck -check-prefix=X64 %s
; If there is no explicit MMX type usage, always promote to XMM.
@ -37,4 +37,19 @@ entry:
ret void
}
@tmp_V2i = common global <2 x i32> zeroinitializer
define void @test2() nounwind {
; X32-LABEL: test2:
; X32: movsd
; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X32-NEXT: movlpd %xmm0, (%eax)
entry:
%0 = load <2 x i32>* @tmp_V2i, align 8
%1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
ret void
}
declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)