[x86] Remove some windows line endings that snuck into the tests here.

Folks on Windows, remember to set up your subversion to strip these when
submitting...

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225593 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2015-01-11 01:36:20 +00:00
parent 6c92f6eb6a
commit 561088eb5d
8 changed files with 463 additions and 463 deletions

View File

@ -455,21 +455,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
@ -551,21 +551,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone

View File

@ -158,21 +158,21 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
@ -254,21 +254,21 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone

View File

@ -1,84 +1,84 @@
; RUN: llc -verify-machineinstrs -mtriple=i386--netbsd < %s | FileCheck %s
; Regression test for http://reviews.llvm.org/D5701
; ModuleID = 'xxhash.i'
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386--netbsd"
; CHECK-LABEL: fn1
; CHECK: shldl {{.*#+}} 4-byte Folded Spill
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: shldl {{.*#+}} 4-byte Folded Spill
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: imull {{.*#+}} 4-byte Folded Reload
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: retl
%struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
@a = common global i32 0, align 4
@b = common global i64 0, align 8
; Function Attrs: nounwind uwtable
define i64 @fn1() #0 {
entry:
%0 = load i32* @a, align 4, !tbaa !1
%1 = inttoptr i32 %0 to %struct.XXH_state64_t*
%total_len = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 0
%2 = load i32* %total_len, align 4, !tbaa !5
%tobool = icmp eq i32 %2, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%v3 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 3
%3 = load i64* %v3, align 4, !tbaa !8
%v4 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 4
%4 = load i64* %v4, align 4, !tbaa !9
%v2 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 2
%5 = load i64* %v2, align 4, !tbaa !10
%shl = shl i64 %5, 1
%or = or i64 %shl, %5
%shl2 = shl i64 %3, 2
%shr = lshr i64 %3, 1
%or3 = or i64 %shl2, %shr
%add = add i64 %or, %or3
%mul = mul i64 %4, -4417276706812531889
%shl4 = mul i64 %4, -8834553413625063778
%shr5 = ashr i64 %mul, 3
%or6 = or i64 %shr5, %shl4
%mul7 = mul nsw i64 %or6, 1400714785074694791
%xor = xor i64 %add, %mul7
store i64 %xor, i64* @b, align 8, !tbaa !11
%mul8 = mul nsw i64 %xor, 1400714785074694791
br label %if.end
if.else: ; preds = %entry
%6 = load i64* @b, align 8, !tbaa !11
%xor10 = xor i64 %6, -4417276706812531889
%mul11 = mul nsw i64 %xor10, 400714785074694791
br label %if.end
if.end: ; preds = %if.else, %if.then
%storemerge.in = phi i64 [ %mul11, %if.else ], [ %mul8, %if.then ]
%storemerge = add i64 %storemerge.in, -8796714831421723037
store i64 %storemerge, i64* @b, align 8, !tbaa !11
ret i64 undef
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.6 (trunk 219587)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !2, i64 0}
!6 = !{!"XXH_state64_t", !2, i64 0, !2, i64 4, !7, i64 8, !7, i64 16, !7, i64 24}
!7 = !{!"long long", !3, i64 0}
!8 = !{!6, !7, i64 16}
!9 = !{!6, !7, i64 24}
!10 = !{!6, !7, i64 8}
!11 = !{!7, !7, i64 0}
; RUN: llc -verify-machineinstrs -mtriple=i386--netbsd < %s | FileCheck %s
; Regression test for http://reviews.llvm.org/D5701
; ModuleID = 'xxhash.i'
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386--netbsd"
; CHECK-LABEL: fn1
; CHECK: shldl {{.*#+}} 4-byte Folded Spill
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: shldl {{.*#+}} 4-byte Folded Spill
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: imull {{.*#+}} 4-byte Folded Reload
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: retl
%struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
@a = common global i32 0, align 4
@b = common global i64 0, align 8
; Function Attrs: nounwind uwtable
define i64 @fn1() #0 {
entry:
%0 = load i32* @a, align 4, !tbaa !1
%1 = inttoptr i32 %0 to %struct.XXH_state64_t*
%total_len = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 0
%2 = load i32* %total_len, align 4, !tbaa !5
%tobool = icmp eq i32 %2, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%v3 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 3
%3 = load i64* %v3, align 4, !tbaa !8
%v4 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 4
%4 = load i64* %v4, align 4, !tbaa !9
%v2 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 2
%5 = load i64* %v2, align 4, !tbaa !10
%shl = shl i64 %5, 1
%or = or i64 %shl, %5
%shl2 = shl i64 %3, 2
%shr = lshr i64 %3, 1
%or3 = or i64 %shl2, %shr
%add = add i64 %or, %or3
%mul = mul i64 %4, -4417276706812531889
%shl4 = mul i64 %4, -8834553413625063778
%shr5 = ashr i64 %mul, 3
%or6 = or i64 %shr5, %shl4
%mul7 = mul nsw i64 %or6, 1400714785074694791
%xor = xor i64 %add, %mul7
store i64 %xor, i64* @b, align 8, !tbaa !11
%mul8 = mul nsw i64 %xor, 1400714785074694791
br label %if.end
if.else: ; preds = %entry
%6 = load i64* @b, align 8, !tbaa !11
%xor10 = xor i64 %6, -4417276706812531889
%mul11 = mul nsw i64 %xor10, 400714785074694791
br label %if.end
if.end: ; preds = %if.else, %if.then
%storemerge.in = phi i64 [ %mul11, %if.else ], [ %mul8, %if.then ]
%storemerge = add i64 %storemerge.in, -8796714831421723037
store i64 %storemerge, i64* @b, align 8, !tbaa !11
ret i64 undef
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.6 (trunk 219587)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !2, i64 0}
!6 = !{!"XXH_state64_t", !2, i64 0, !2, i64 4, !7, i64 8, !7, i64 16, !7, i64 24}
!7 = !{!"long long", !3, i64 0}
!8 = !{!6, !7, i64 16}
!9 = !{!6, !7, i64 24}
!10 = !{!6, !7, i64 8}
!11 = !{!7, !7, i64 0}

View File

@ -1,112 +1,112 @@
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
; Here, we should have a reserved frame, so we don't expect pushes
; NORMAL-LABEL: test1
; NORMAL: subl $16, %esp
; NORMAL-NEXT: movl $4, 12(%esp)
; NORMAL-NEXT: movl $3, 8(%esp)
; NORMAL-NEXT: movl $2, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: call
define void @test1() {
entry:
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Here, we expect a sequence of 4 immediate pushes
; NORMAL-LABEL: test2
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl $1
; NORMAL-NEXT: call
define void @test2(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Again, we expect a sequence of 4 immediate pushes
; Checks that we generate the right pushes for >8bit immediates
; NORMAL-LABEL: test2b
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4096
; NORMAL-NEXT: pushl $3072
; NORMAL-NEXT: pushl $2048
; NORMAL-NEXT: pushl $1024
; NORMAL-NEXT: call
define void @test2b(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
ret void
}
; The first push should push a register
; NORMAL-LABEL: test3
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl %e{{..}}
; NORMAL-NEXT: call
define void @test3(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 %k, i32 2, i32 3, i32 4)
ret void
}
; We don't support weird calling conventions
; NORMAL-LABEL: test4
; NORMAL: subl $12, %esp
; NORMAL-NEXT: movl $4, 8(%esp)
; NORMAL-NEXT: movl $3, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: movl $2, %eax
; NORMAL-NEXT: call
define void @test4(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @inreg(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Check that additional alignment is added when the pushes
; don't add up to the required alignment.
; ALIGNED-LABEL: test5
; ALIGNED: subl $16, %esp
; ALIGNED-NEXT: pushl $4
; ALIGNED-NEXT: pushl $3
; ALIGNED-NEXT: pushl $2
; ALIGNED-NEXT: pushl $1
; ALIGNED-NEXT: call
define void @test5(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Check that pushing the addresses of globals (Or generally, things that
; aren't exactly immediates) isn't broken.
; Fixes PR21878.
; NORMAL-LABEL: test6
; NORMAL: pushl $_ext
; NORMAL-NEXT: call
declare void @f(i8*)
@ext = external constant i8
define void @test6() {
call void @f(i8* @ext)
br label %bb
bb:
alloca i32
ret void
}
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
; Here, we should have a reserved frame, so we don't expect pushes
; NORMAL-LABEL: test1
; NORMAL: subl $16, %esp
; NORMAL-NEXT: movl $4, 12(%esp)
; NORMAL-NEXT: movl $3, 8(%esp)
; NORMAL-NEXT: movl $2, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: call
define void @test1() {
entry:
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Here, we expect a sequence of 4 immediate pushes
; NORMAL-LABEL: test2
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl $1
; NORMAL-NEXT: call
define void @test2(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Again, we expect a sequence of 4 immediate pushes
; Checks that we generate the right pushes for >8bit immediates
; NORMAL-LABEL: test2b
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4096
; NORMAL-NEXT: pushl $3072
; NORMAL-NEXT: pushl $2048
; NORMAL-NEXT: pushl $1024
; NORMAL-NEXT: call
define void @test2b(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
ret void
}
; The first push should push a register
; NORMAL-LABEL: test3
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl %e{{..}}
; NORMAL-NEXT: call
define void @test3(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 %k, i32 2, i32 3, i32 4)
ret void
}
; We don't support weird calling conventions
; NORMAL-LABEL: test4
; NORMAL: subl $12, %esp
; NORMAL-NEXT: movl $4, 8(%esp)
; NORMAL-NEXT: movl $3, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: movl $2, %eax
; NORMAL-NEXT: call
define void @test4(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @inreg(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Check that additional alignment is added when the pushes
; don't add up to the required alignment.
; ALIGNED-LABEL: test5
; ALIGNED: subl $16, %esp
; ALIGNED-NEXT: pushl $4
; ALIGNED-NEXT: pushl $3
; ALIGNED-NEXT: pushl $2
; ALIGNED-NEXT: pushl $1
; ALIGNED-NEXT: call
define void @test5(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Check that pushing the addresses of globals (Or generally, things that
; aren't exactly immediates) isn't broken.
; Fixes PR21878.
; NORMAL-LABEL: test6
; NORMAL: pushl $_ext
; NORMAL-NEXT: call
declare void @f(i8*)
@ext = external constant i8
define void @test6() {
call void @f(i8* @ext)
br label %bb
bb:
alloca i32
ret void
}

View File

@ -1,19 +1,19 @@
; RUN: llc < %s | FileCheck %s
; Don't try to emit a direct call through a TLS global.
; This fixes PR22103
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@a = external thread_local global i64
; Function Attrs: nounwind
define void @_Z1fv() {
; CHECK-NOT: callq *$a
; CHECK: movq %fs:0, [[RAX:%r..]]
; CHECK-NEXT: addq a@GOTTPOFF(%rip), [[RAX]]
; CHECK-NEXT: callq *[[RAX]]
entry:
call void bitcast (i64* @a to void ()*)()
ret void
}
; RUN: llc < %s | FileCheck %s
; Don't try to emit a direct call through a TLS global.
; This fixes PR22103
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@a = external thread_local global i64
; Function Attrs: nounwind
define void @_Z1fv() {
; CHECK-NOT: callq *$a
; CHECK: movq %fs:0, [[RAX:%r..]]
; CHECK-NEXT: addq a@GOTTPOFF(%rip), [[RAX]]
; CHECK-NEXT: callq *[[RAX]]
entry:
call void bitcast (i64* @a to void ()*)()
ret void
}

View File

@ -408,21 +408,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
@ -504,21 +504,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone

View File

@ -1,82 +1,82 @@
target triple = "x86_64-unknown-unknown"
; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
; When extracting multiple consecutive elements from a larger
; vector into a smaller one, do it efficiently. We should use
; an EXTRACT_SUBVECTOR node internally rather than a bunch of
; single element extractions.
; Extracting the low elements only requires using the right kind of store.
define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
%ext0 = extractelement <8 x float> %v, i32 0
%ext1 = extractelement <8 x float> %v, i32 1
%ext2 = extractelement <8 x float> %v, i32 2
%ext3 = extractelement <8 x float> %v, i32 3
%ins0 = insertelement <4 x float> undef, float %ext0, i32 0
%ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
%ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
%ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
store <4 x float> %ins3, <4 x float>* %ptr, align 16
ret void
; CHECK-LABEL: low_v8f32_to_v4f32
; CHECK: vmovaps
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
; Extracting the high elements requires just one AVX instruction.
define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
%ext0 = extractelement <8 x float> %v, i32 4
%ext1 = extractelement <8 x float> %v, i32 5
%ext2 = extractelement <8 x float> %v, i32 6
%ext3 = extractelement <8 x float> %v, i32 7
%ins0 = insertelement <4 x float> undef, float %ext0, i32 0
%ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
%ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
%ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
store <4 x float> %ins3, <4 x float>* %ptr, align 16
ret void
; CHECK-LABEL: high_v8f32_to_v4f32
; CHECK: vextractf128
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
; Make sure element type doesn't alter the codegen. Note that
; if we were actually using the vector in this function and
; have AVX2, we should generate vextracti128 (the int version).
define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
%ext0 = extractelement <8 x i32> %v, i32 4
%ext1 = extractelement <8 x i32> %v, i32 5
%ext2 = extractelement <8 x i32> %v, i32 6
%ext3 = extractelement <8 x i32> %v, i32 7
%ins0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
%ins1 = insertelement <4 x i32> %ins0, i32 %ext1, i32 1
%ins2 = insertelement <4 x i32> %ins1, i32 %ext2, i32 2
%ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
ret void
; CHECK-LABEL: high_v8i32_to_v4i32
; CHECK: vextractf128
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
; Make sure that element size doesn't alter the codegen.
define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
%ext0 = extractelement <4 x double> %v, i32 2
%ext1 = extractelement <4 x double> %v, i32 3
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
store <2 x double> %ins1, <2 x double>* %ptr, align 16
ret void
; CHECK-LABEL: high_v4f64_to_v2f64
; CHECK: vextractf128
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
target triple = "x86_64-unknown-unknown"
; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
; When extracting multiple consecutive elements from a larger
; vector into a smaller one, do it efficiently. We should use
; an EXTRACT_SUBVECTOR node internally rather than a bunch of
; single element extractions.
; Extracting the low elements only requires using the right kind of store.
define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
%ext0 = extractelement <8 x float> %v, i32 0
%ext1 = extractelement <8 x float> %v, i32 1
%ext2 = extractelement <8 x float> %v, i32 2
%ext3 = extractelement <8 x float> %v, i32 3
%ins0 = insertelement <4 x float> undef, float %ext0, i32 0
%ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
%ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
%ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
store <4 x float> %ins3, <4 x float>* %ptr, align 16
ret void
; CHECK-LABEL: low_v8f32_to_v4f32
; CHECK: vmovaps
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
; Extracting the high elements requires just one AVX instruction.
define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
%ext0 = extractelement <8 x float> %v, i32 4
%ext1 = extractelement <8 x float> %v, i32 5
%ext2 = extractelement <8 x float> %v, i32 6
%ext3 = extractelement <8 x float> %v, i32 7
%ins0 = insertelement <4 x float> undef, float %ext0, i32 0
%ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
%ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
%ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
store <4 x float> %ins3, <4 x float>* %ptr, align 16
ret void
; CHECK-LABEL: high_v8f32_to_v4f32
; CHECK: vextractf128
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
; Make sure element type doesn't alter the codegen. Note that
; if we were actually using the vector in this function and
; have AVX2, we should generate vextracti128 (the int version).
define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
%ext0 = extractelement <8 x i32> %v, i32 4
%ext1 = extractelement <8 x i32> %v, i32 5
%ext2 = extractelement <8 x i32> %v, i32 6
%ext3 = extractelement <8 x i32> %v, i32 7
%ins0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
%ins1 = insertelement <4 x i32> %ins0, i32 %ext1, i32 1
%ins2 = insertelement <4 x i32> %ins1, i32 %ext2, i32 2
%ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
ret void
; CHECK-LABEL: high_v8i32_to_v4i32
; CHECK: vextractf128
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
; Make sure that element size doesn't alter the codegen.
define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
%ext0 = extractelement <4 x double> %v, i32 2
%ext1 = extractelement <4 x double> %v, i32 3
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
store <2 x double> %ins1, <2 x double>* %ptr, align 16
ret void
; CHECK-LABEL: high_v4f64_to_v2f64
; CHECK: vextractf128
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}

View File

@ -467,23 +467,23 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: PR20540:
; SSSE3: # BB#0:
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: PR20540:
; SSE41: # BB#0:
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: PR20540:
; AVX: # BB#0:
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i8> %shuffle
;
; SSSE3-LABEL: PR20540:
; SSSE3: # BB#0:
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: PR20540:
; SSE41: # BB#0:
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: PR20540:
; AVX: # BB#0:
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
@ -493,25 +493,25 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
@ -523,25 +523,25 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %shuffle
}
@ -571,27 +571,27 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}