[x86] Regenerate a number of FileCheck assertions with my script for

test cases that will change with the new vector shuffle lowering. This
gives us a nice baseline for deltas against. I've checked and removed
the cases where there were weird register usage being pinned down, and
all of these are extremely pin-pointed tests so fully checking them
seems very appropriate.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218941 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2014-10-03 01:06:32 +00:00
parent f71a17d0c6
commit ea3d31f580
6 changed files with 426 additions and 214 deletions

View File

@ -5,277 +5,296 @@
; instruction which performs a blend operation.
define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
%or = or <2 x i64> %shuf1, %shuf2
ret <2 x i64> %or
}
; CHECK-LABEL: test1
; CHECK-NOT: xorps
; CHECK: movsd
; CHECK-NOT: orps
; CHECK: ret
define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test2
; CHECK-NOT: xorps
; CHECK: movsd
; CHECK: ret
define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%or = or <2 x i64> %shuf1, %shuf2
ret <2 x i64> %or
}
; CHECK-LABEL: test3
; CHECK-NOT: xorps
; CHECK: movsd
; CHECK-NEXT: ret
define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test4
; CHECK-NOT: xorps
; CHECK: movss
; CHECK-NOT: orps
; CHECK: ret
define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test5:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test5
; CHECK-NOT: xorps
; CHECK: movss
; CHECK-NEXT: ret
define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test6:
; CHECK: # BB#0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test6
; CHECK-NOT: xorps
; CHECK: blendps $12
; CHECK-NEXT: ret
define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test7:
; CHECK: # BB#0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
; CHECK-LABEL: test7
; CHECK-NOT: xorps
; CHECK: blendps $12
; CHECK-NEXT: ret
define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test8:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 -1, i64 0>
%and2 = and <2 x i64> %b, <i64 0, i64 -1>
%or = or <2 x i64> %and1, %and2
ret <2 x i64> %or
}
; CHECK-LABEL: test8
; CHECK-NOT: xorps
; CHECK: movsd
; CHECK-NOT: orps
; CHECK: ret
define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test9:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
; CHECK-LABEL: test9
; CHECK-NOT: xorps
; CHECK: movsd
; CHECK: ret
define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test10:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 0, i64 -1>
%and2 = and <2 x i64> %b, <i64 -1, i64 0>
%or = or <2 x i64> %and1, %and2
ret <2 x i64> %or
}
; CHECK-LABEL: test10
; CHECK-NOT: xorps
; CHECK: movsd
; CHECK-NEXT: ret
define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test11:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
; CHECK-LABEL: test11
; CHECK-NOT: xorps
; CHECK: movss
; CHECK-NOT: orps
; CHECK: ret
define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test12:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
; CHECK-LABEL: test12
; CHECK-NOT: xorps
; CHECK: movss
; CHECK-NEXT: ret
; Verify that the following test cases are folded into single shuffles.
define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test13:
; CHECK: # BB#0:
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test13
; CHECK-NOT: xorps
; CHECK: shufps
; CHECK-NEXT: ret
define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test14:
; CHECK: # BB#0:
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
%or = or <2 x i64> %shuf1, %shuf2
ret <2 x i64> %or
}
; CHECK-LABEL: test14
; CHECK-NOT: pslldq
; CHECK-NOT: por
; CHECK: punpcklqdq
; CHECK-NEXT: ret
define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test15:
; CHECK: # BB#0:
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test15
; CHECK-NOT: xorps
; CHECK: shufps
; CHECK-NOT: shufps
; CHECK-NOT: orps
; CHECK: ret
define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test16:
; CHECK: # BB#0:
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%or = or <2 x i64> %shuf1, %shuf2
ret <2 x i64> %or
}
; CHECK-LABEL: test16
; CHECK-NOT: pslldq
; CHECK-NOT: por
; CHECK: punpcklqdq
; CHECK: ret
; Verify that the dag-combiner does not fold a OR of two shuffles into a single
; shuffle instruction when the shuffle indexes are not compatible.
define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test17:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,0]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test17
; CHECK: por
; CHECK-NEXT: ret
define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test18:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: xorps %xmm3, %xmm3
; CHECK-NEXT: movss %xmm0, %xmm3
; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[0,0]
; CHECK-NEXT: movss %xmm1, %xmm2
; CHECK-NEXT: orps %xmm3, %xmm2
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test18
; CHECK: orps
; CHECK: ret
define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test19:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,0]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: movdqa %xmm1, %xmm2
; CHECK-NEXT: pslldq $8, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,2]
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; CHECK-LABEL: test19
; CHECK: por
; CHECK-NEXT: ret
define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test20:
; CHECK: # BB#0:
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: movq %xmm0, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%or = or <2 x i64> %shuf1, %shuf2
ret <2 x i64> %or
}
; CHECK-LABEL: test20
; CHECK-NOT: xorps
; CHECK: orps
; CHECK-NEXT: movq
; CHECK-NEXT: ret
define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test21:
; CHECK: # BB#0:
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pslldq $8, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
%or = or <2 x i64> %shuf1, %shuf2
ret <2 x i64> %or
}
; CHECK-LABEL: test21
; CHECK: por
; CHECK-NEXT: pslldq
; CHECK-NEXT: ret
; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
; handle legal vector value types.
define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_crash:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%or = or <4 x i8> %shuf1, %shuf2
ret <4 x i8> %or
}
; CHECK-LABEL: test_crash
; CHECK: movsd
; CHECK: ret

View File

@ -2,57 +2,87 @@
; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
define <1 x float> @test1(<1 x double>* %p) nounwind {
; CHECK: test1
; CHECK: cvtsd2ss
; CHECK: ret
; AVX: test1
; AVX: vcvtsd2ss
; AVX: ret
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movsd (%eax), %xmm0
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: movss %xmm0, (%esp)
; CHECK-NEXT: flds (%esp)
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
;
; AVX-LABEL: test1:
; AVX: # BB#0:
; AVX-NEXT: pushl %eax
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vmovsd (%eax), %xmm0
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss %xmm0, (%esp)
; AVX-NEXT: flds (%esp)
; AVX-NEXT: popl %eax
; AVX-NEXT: retl
%x = load <1 x double>* %p
%y = fptrunc <1 x double> %x to <1 x float>
ret <1 x float> %y
}
define <2 x float> @test2(<2 x double>* %p) nounwind {
; CHECK: test2
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: ret
; AVX: test2
; AVX: vcvtpd2psx {{[0-9]*}}(%{{.*}})
; AVX: ret
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
; CHECK-NEXT: retl
;
; AVX-LABEL: test2:
; AVX: # BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vcvtpd2psx (%eax), %xmm0
; AVX-NEXT: retl
%x = load <2 x double>* %p
%y = fptrunc <2 x double> %x to <2 x float>
ret <2 x float> %y
}
define <4 x float> @test3(<4 x double>* %p) nounwind {
; CHECK: test3
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: movlhps
; CHECK: ret
; AVX: test3
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
; AVX: ret
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test3:
; AVX: # BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vcvtpd2psy (%eax), %xmm0
; AVX-NEXT: retl
%x = load <4 x double>* %p
%y = fptrunc <4 x double> %x to <4 x float>
ret <4 x float> %y
}
define <8 x float> @test4(<8 x double>* %p) nounwind {
; CHECK: test4
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: movlhps
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: movlhps
; CHECK: ret
; AVX: test4
; AVX: vcvtpd2psy
; AVX: vcvtpd2psy
; AVX: vinsertf128
; AVX: ret
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm2
; CHECK-NEXT: cvtpd2ps 32(%eax), %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test4:
; AVX: # BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vcvtpd2psy (%eax), %xmm0
; AVX-NEXT: vcvtpd2psy 32(%eax), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retl
%x = load <8 x double>* %p
%y = fptrunc <8 x double> %x to <8 x float>
ret <8 x float> %y

View File

@ -2,55 +2,77 @@
; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
define <1 x float> @test1(<1 x double> %x) nounwind {
; CHECK: test1
; CHECK: cvtsd2ss
; CHECK: ret
; AVX: test1
; AVX: vcvtsd2ss
; AVX: ret
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movsd {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: movss %xmm0, (%esp)
; CHECK-NEXT: flds (%esp)
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
;
; AVX-LABEL: test1:
; AVX: # BB#0:
; AVX-NEXT: pushl %eax
; AVX-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss %xmm0, (%esp)
; AVX-NEXT: flds (%esp)
; AVX-NEXT: popl %eax
; AVX-NEXT: retl
%y = fptrunc <1 x double> %x to <1 x float>
ret <1 x float> %y
}
define <2 x float> @test2(<2 x double> %x) nounwind {
; CHECK: test2
; CHECK: cvtpd2ps
; CHECK: ret
; AVX: test2
; AVX-NOT: vcvtpd2psy
; AVX: vcvtpd2ps
; AVX: ret
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: retl
;
; AVX-LABEL: test2:
; AVX: # BB#0:
; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX-NEXT: retl
%y = fptrunc <2 x double> %x to <2 x float>
ret <2 x float> %y
}
define <4 x float> @test3(<4 x double> %x) nounwind {
; CHECK: test3
; CHECK: cvtpd2ps
; CHECK: cvtpd2ps
; CHECK: movlhps
; CHECK: ret
; AVX: test3
; AVX: vcvtpd2psy
; AVX: ret
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test3:
; AVX: # BB#0:
; AVX-NEXT: vcvtpd2psy %ymm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
%y = fptrunc <4 x double> %x to <4 x float>
ret <4 x float> %y
}
define <8 x float> @test4(<8 x double> %x) nounwind {
; CHECK: test4
; CHECK: cvtpd2ps
; CHECK: cvtpd2ps
; CHECK: movlhps
; CHECK: cvtpd2ps
; CHECK: cvtpd2ps
; CHECK: movlhps
; CHECK: ret
; AVX: test4
; AVX: vcvtpd2psy
; AVX: vcvtpd2psy
; AVX: vinsertf128
; AVX: ret
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cvtpd2ps %xmm3, %xmm3
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test4:
; AVX: # BB#0:
; AVX-NEXT: vcvtpd2psy %ymm0, %xmm0
; AVX-NEXT: vcvtpd2psy %ymm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retl
%y = fptrunc <8 x double> %x to <8 x float>
ret <8 x float> %y
}

View File

@ -3,40 +3,58 @@
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
define i32 @test1() nounwind readonly {
; X32-LABEL: test1:
; X32: # BB#0: # %entry
; X32-NEXT: movl %gs:196, %eax
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # BB#0: # %entry
; X64-NEXT: movq %gs:320, %rax
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: retq
entry:
%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
; X32-LABEL: test1:
; X32: movl %gs:196, %eax
; X32: movl (%eax), %eax
; X32: ret
; X64-LABEL: test1:
; X64: movq %gs:320, %rax
; X64: movl (%rax), %eax
; X64: ret
define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
; X32-LABEL: test2:
; X32: # BB#0: # %entry
; X32-NEXT: subl $12, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: calll *%gs:(%eax)
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: addl $12, %esp
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # BB#0: # %entry
; X64-NEXT: {{(subq.*%rsp|pushq)}}
; X64-NEXT: callq *%gs:(%{{(rcx|rdi)}})
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: {{(addq.*%rsp|popq)}}
; X64-NEXT: retq
entry:
%tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
tail call void %tmp9(i8* undef) nounwind optsize
ret i64 0
}
; rdar://8453210
; X32-LABEL: test2:
; X32: movl {{.*}}(%esp), %eax
; X32: calll *%gs:(%eax)
; X64-LABEL: test2:
; X64: callq *%gs:([[A0:%rdi|%rcx]])
define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
; X32-LABEL: pmovsxwd_1:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pmovsxwd %gs:(%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: pmovsxwd_1:
; X64: # BB#0: # %entry
; X64-NEXT: pmovsxwd %gs:(%{{(rcx|rdi)}}), %xmm0
; X64-NEXT: retq
entry:
%0 = load i64 addrspace(256)* %p
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
@ -44,20 +62,26 @@ entry:
%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
; X32-LABEL: pmovsxwd_1:
; X32: movl 4(%esp), %eax
; X32: pmovsxwd %gs:(%eax), %xmm0
; X32: ret
; X64-LABEL: pmovsxwd_1:
; X64: pmovsxwd %gs:([[A0]]), %xmm0
; X64: ret
}
; The two loads here both look identical to selection DAG, except for their
; address spaces. Make sure they aren't CSE'd.
define i32 @test_no_cse() nounwind readonly {
; X32-LABEL: test_no_cse:
; X32: # BB#0: # %entry
; X32-NEXT: movl %gs:196, %eax
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: movl %fs:196, %ecx
; X32-NEXT: addl (%ecx), %eax
; X32-NEXT: retl
;
; X64-LABEL: test_no_cse:
; X64: # BB#0: # %entry
; X64-NEXT: movq %gs:320, %rax
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: movq %fs:320, %rcx
; X64-NEXT: addl (%rcx), %eax
; X64-NEXT: retq
entry:
%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
@ -66,9 +90,5 @@ entry:
%tmp4 = add i32 %tmp1, %tmp3
ret i32 %tmp4
}
; X32-LABEL: test_no_cse:
; X32: movl %gs:196
; X32: movl %fs:196
; X32: ret
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone

View File

@ -3,58 +3,162 @@
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: test1:
; CHECK: pshufd
; CHECK-YONAH: pshufd
; CHECK: # BB#0:
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test1:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
ret <4 x i32> %C
}
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: test2:
; CHECK: palignr
; CHECK-YONAH: shufps
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test2:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
ret <4 x i32> %C
}
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: test3:
; CHECK: palignr
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test3:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[0,0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
ret <4 x i32> %C
}
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: test4:
; CHECK: palignr
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test4:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; CHECK-YONAH-NEXT: movaps %xmm1, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
ret <4 x i32> %C
}
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK-LABEL: test5:
; CHECK: palignr
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test5:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; CHECK-YONAH-NEXT: movaps %xmm1, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
ret <4 x float> %C
}
define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: test6:
; CHECK: palignr
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test6:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: movapd %xmm0, %xmm2
; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,2,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,5,6]
; CHECK-YONAH-NEXT: pextrw $3, %xmm0, %eax
; CHECK-YONAH-NEXT: pinsrw $0, %eax, %xmm1
; CHECK-YONAH-NEXT: pextrw $7, %xmm0, %eax
; CHECK-YONAH-NEXT: pinsrw $4, %eax, %xmm1
; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
ret <8 x i16> %C
}
define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: test7:
; CHECK: palignr
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test7:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,0,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
; CHECK-YONAH-NEXT: movd %xmm1, %eax
; CHECK-YONAH-NEXT: pinsrw $3, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $4, %xmm1, %eax
; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
ret <8 x i16> %C
}
define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
; CHECK-LABEL: test8:
; CHECK: palignr
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test8:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: pushl %esi
; CHECK-YONAH-NEXT: movdqa %xmm0, %xmm2
; CHECK-YONAH-NEXT: pextrw $4, %xmm2, %eax
; CHECK-YONAH-NEXT: pextrw $5, %xmm2, %ecx
; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
; CHECK-YONAH-NEXT: pextrw $2, %xmm2, %edx
; CHECK-YONAH-NEXT: pextrw $3, %xmm2, %esi
; CHECK-YONAH-NEXT: shrdw $8, %si, %dx
; CHECK-YONAH-NEXT: # kill: XMM0<def> XMM2<kill>
; CHECK-YONAH-NEXT: pinsrw $0, %edx, %xmm0
; CHECK-YONAH-NEXT: shrl $8, %esi
; CHECK-YONAH-NEXT: pinsrw $1, %esi, %xmm0
; CHECK-YONAH-NEXT: pinsrw $2, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $6, %xmm2, %eax
; CHECK-YONAH-NEXT: shrdw $8, %ax, %cx
; CHECK-YONAH-NEXT: pinsrw $3, %ecx, %xmm0
; CHECK-YONAH-NEXT: pextrw $7, %xmm2, %ecx
; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
; CHECK-YONAH-NEXT: pinsrw $4, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $8, %xmm1, %eax
; CHECK-YONAH-NEXT: shrdw $8, %ax, %cx
; CHECK-YONAH-NEXT: pinsrw $5, %ecx, %xmm0
; CHECK-YONAH-NEXT: pextrw $9, %xmm1, %ecx
; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
; CHECK-YONAH-NEXT: pinsrw $6, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $10, %xmm1, %eax
; CHECK-YONAH-NEXT: shldw $8, %cx, %ax
; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
; CHECK-YONAH-NEXT: popl %esi
; CHECK-YONAH-NEXT: retl
%C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
ret <16 x i8> %C
}
@ -65,8 +169,20 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
; was an UNDEF.)
define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: test9:
; CHECK-NOT: palignr
; CHECK: pshufb
; CHECK: # BB#0:
; CHECK-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,xmm1[4,5,6,7,8,9,10,11,12,13,14,15,0,1]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test9:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: pextrw $4, %xmm1, %eax
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,3,0,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
; CHECK-YONAH-NEXT: pinsrw $3, %eax, %xmm0
; CHECK-YONAH-NEXT: movd %xmm1, %eax
; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
ret <8 x i16> %C
}

View File

@ -3,6 +3,12 @@
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>)
define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
; CHECK-LABEL: good:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movdqa (%rdi), %xmm0
; CHECK-NEXT: pminud {{.*}}(%rip), %xmm0
; CHECK-NEXT: pmovzxwq %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
%2 = load <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
@ -13,13 +19,17 @@ entry:
%8 = bitcast i32 %4 to <2 x i16>
%9 = bitcast i32 %5 to <2 x i16>
ret <2 x i16> %8
; CHECK: good
; CHECK: pminud
; CHECK-NEXT: pmovzxwq
; CHECK: ret
}
define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
; CHECK-LABEL: bad:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movdqa (%rdi), %xmm0
; CHECK-NEXT: pminud {{.*}}(%rip), %xmm0
; CHECK-NEXT: pextrd $1, %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: pmovzxwq %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
%2 = load <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
@ -30,9 +40,4 @@ entry:
%8 = bitcast i32 %4 to <2 x i16>
%9 = bitcast i32 %5 to <2 x i16>
ret <2 x i16> %9
; CHECK: bad
; CHECK: pminud
; CHECK: pextrd
; CHECK: pmovzxwq
; CHECK: ret
}