mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-07 21:09:21 +00:00
[DAGCombiner] Combine shuffles of BUILD_VECTOR and SCALAR_TO_VECTOR
This patch attempts to fold the shuffling of 'scalar source' inputs - BUILD_VECTOR and SCALAR_TO_VECTOR nodes - if the shuffle node is the only user. This folds away a lot of unnecessary shuffle nodes, and allows quite a bit of constant folding that was being missed. Differential Revision: http://reviews.llvm.org/D8516 llvm-svn: 234004
This commit is contained in:
parent
b2266efe11
commit
335a565d46
@ -11980,6 +11980,43 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
return V;
|
||||
}
|
||||
|
||||
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
|
||||
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
|
||||
if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (int M : SVN->getMask()) {
|
||||
SDValue Op = DAG.getUNDEF(VT.getScalarType());
|
||||
if (M >= 0) {
|
||||
int Idx = M % NumElts;
|
||||
SDValue &S = (M < (int)NumElts ? N0 : N1);
|
||||
if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
|
||||
Op = S.getOperand(Idx);
|
||||
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
|
||||
if (Idx == 0)
|
||||
Op = S.getOperand(0);
|
||||
} else {
|
||||
// Operand can't be combined - bail out.
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ops.push_back(Op);
|
||||
}
|
||||
if (Ops.size() == VT.getVectorNumElements()) {
|
||||
// BUILD_VECTOR requires all inputs to be of the same type, find the
|
||||
// maximum type and extend them all.
|
||||
EVT SVT = VT.getScalarType();
|
||||
if (SVT.isInteger())
|
||||
for (SDValue &Op : Ops)
|
||||
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
|
||||
if (SVT != VT.getScalarType())
|
||||
for (SDValue &Op : Ops)
|
||||
Op = TLI.isZExtFree(Op.getValueType(), SVT)
|
||||
? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
|
||||
: DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
|
||||
}
|
||||
}
|
||||
|
||||
// If this shuffle only has a single input that is a bitcasted shuffle,
|
||||
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
|
||||
// back to their original types.
|
||||
|
@ -1086,7 +1086,7 @@ define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
|
||||
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
|
||||
entry:
|
||||
%c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
|
||||
%d = insertelement <2 x i32> undef, i32 %c, i32 0
|
||||
|
@ -1,22 +1,8 @@
|
||||
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
|
||||
|
||||
|
||||
; The mask:
|
||||
; CHECK: lCPI0_0:
|
||||
; CHECK: .byte 2 ; 0x2
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 6 ; 0x6
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; The second vector is legalized to undef and the elements of the first vector
|
||||
; are used instead.
|
||||
; CHECK: .byte 2 ; 0x2
|
||||
; CHECK: .byte 4 ; 0x4
|
||||
; CHECK: .byte 6 ; 0x6
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: test1
|
||||
; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
|
||||
; CHECK: movi.8h v[[REG1:[0-9]+]], #0x1, lsl #8
|
||||
; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
|
||||
; CHECK: movi d[[REG0:[0-9]+]], #0000000000000000
|
||||
define <8 x i1> @test1() {
|
||||
entry:
|
||||
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
|
||||
@ -30,18 +16,16 @@ entry:
|
||||
|
||||
; CHECK: lCPI1_0:
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 2 ; 0x2
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 10 ; 0xa
|
||||
; CHECK: .byte 12 ; 0xc
|
||||
; CHECK: .byte 14 ; 0xe
|
||||
; CHECK: .byte 7 ; 0x7
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 1 ; 0x1
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: test2
|
||||
; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
|
||||
; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_1@PAGE
|
||||
; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1@PAGEOFF]
|
||||
; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
|
||||
; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE
|
||||
; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF]
|
||||
define <8 x i1>@test2() {
|
||||
bb:
|
||||
%Shuff = shufflevector <8 x i1> zeroinitializer,
|
||||
@ -51,28 +35,8 @@ bb:
|
||||
ret <8 x i1> %Shuff
|
||||
}
|
||||
|
||||
; CHECK: lCPI2_0:
|
||||
; CHECK: .byte 2 ; 0x2
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 6 ; 0x6
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 10 ; 0xa
|
||||
; CHECK: .byte 12 ; 0xc
|
||||
; CHECK: .byte 14 ; 0xe
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 2 ; 0x2
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 6 ; 0x6
|
||||
; CHECK: .byte 255 ; 0xff
|
||||
; CHECK: .byte 10 ; 0xa
|
||||
; CHECK: .byte 12 ; 0xc
|
||||
; CHECK: .byte 14 ; 0xe
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: test3
|
||||
; CHECK: adrp x[[REG3:[0-9]+]], lCPI2_0@PAGE
|
||||
; CHECK: ldr q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0@PAGEOFF]
|
||||
; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG3]], lCPI2_1@PAGEOFF]
|
||||
; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
|
||||
; CHECK: movi.4s v{{[0-9]+}}, #0x1
|
||||
define <16 x i1> @test3(i1* %ptr, i32 %v) {
|
||||
bb:
|
||||
%Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
|
||||
@ -81,29 +45,26 @@ bb:
|
||||
i32 14, i32 0>
|
||||
ret <16 x i1> %Shuff
|
||||
}
|
||||
; CHECK: lCPI3_1:
|
||||
; CHECK: lCPI3_0:
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 1 ; 0x1
|
||||
; CHECK: .byte 2 ; 0x2
|
||||
; CHECK: .byte 18 ; 0x12
|
||||
; CHECK: .byte 4 ; 0x4
|
||||
; CHECK: .byte 5 ; 0x5
|
||||
; CHECK: .byte 6 ; 0x6
|
||||
; CHECK: .byte 7 ; 0x7
|
||||
; CHECK: .byte 8 ; 0x8
|
||||
; CHECK: .byte 31 ; 0x1f
|
||||
; CHECK: .byte 10 ; 0xa
|
||||
; CHECK: .byte 30 ; 0x1e
|
||||
; CHECK: .byte 12 ; 0xc
|
||||
; CHECK: .byte 13 ; 0xd
|
||||
; CHECK: .byte 14 ; 0xe
|
||||
; CHECK: .byte 15 ; 0xf
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: .byte 0 ; 0x0
|
||||
; CHECK: _test4:
|
||||
; CHECK: ldr q[[REG1:[0-9]+]]
|
||||
; CHECK: movi.2d v[[REG0:[0-9]+]], #0000000000000000
|
||||
; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_1@PAGE
|
||||
; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1@PAGEOFF]
|
||||
; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
|
||||
; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
|
||||
; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
|
||||
define <16 x i1> @test4(i1* %ptr, i32 %v) {
|
||||
bb:
|
||||
%Shuff = shufflevector <16 x i1> zeroinitializer,
|
||||
|
@ -9,58 +9,23 @@ define <16 x i8> @foo() nounwind ssp {
|
||||
}
|
||||
|
||||
; CHECK: .LCPI0_0:
|
||||
; CHECK: .byte 31
|
||||
; CHECK: .byte 26
|
||||
; CHECK: .byte 21
|
||||
; CHECK: .byte 16
|
||||
; CHECK: .byte 11
|
||||
; CHECK: .byte 6
|
||||
; CHECK: .byte 1
|
||||
; CHECK: .byte 28
|
||||
; CHECK: .byte 23
|
||||
; CHECK: .byte 18
|
||||
; CHECK: .byte 13
|
||||
; CHECK: .byte 8
|
||||
; CHECK: .byte 3
|
||||
; CHECK: .byte 30
|
||||
; CHECK: .byte 25
|
||||
; CHECK: .byte 20
|
||||
; CHECK: .LCPI0_1:
|
||||
; CHECK: .byte 0
|
||||
; CHECK: .byte 1
|
||||
; CHECK: .byte 2
|
||||
; CHECK: .byte 3
|
||||
; CHECK: .byte 4
|
||||
; CHECK: .byte 5
|
||||
; CHECK: .byte 6
|
||||
; CHECK: .byte 7
|
||||
; CHECK: .byte 8
|
||||
; CHECK: .byte 9
|
||||
; CHECK: .byte 10
|
||||
; CHECK: .byte 11
|
||||
; CHECK: .byte 12
|
||||
; CHECK: .byte 13
|
||||
; CHECK: .byte 14
|
||||
; CHECK: .byte 15
|
||||
; CHECK: .LCPI0_2:
|
||||
; CHECK: .byte 16
|
||||
; CHECK: .byte 17
|
||||
; CHECK: .byte 18
|
||||
; CHECK: .byte 19
|
||||
; CHECK: .byte 20
|
||||
; CHECK: .byte 21
|
||||
; CHECK: .byte 22
|
||||
; CHECK: .byte 23
|
||||
; CHECK: .byte 24
|
||||
; CHECK: .byte 25
|
||||
; CHECK: .byte 26
|
||||
; CHECK: .byte 27
|
||||
; CHECK: .byte 28
|
||||
; CHECK: .byte 29
|
||||
; CHECK: .byte 30
|
||||
; CHECK: .byte 31
|
||||
; CHECK: .byte 3
|
||||
; CHECK: .byte 8
|
||||
; CHECK: .byte 13
|
||||
; CHECK: .byte 18
|
||||
; CHECK: .byte 23
|
||||
; CHECK: .byte 28
|
||||
; CHECK: .byte 1
|
||||
; CHECK: .byte 6
|
||||
; CHECK: .byte 11
|
||||
; CHECK: foo:
|
||||
; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2@toc@ha
|
||||
; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2@toc@l
|
||||
; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_0@toc@ha
|
||||
; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_0@toc@l
|
||||
; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
|
||||
; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}
|
||||
|
@ -75,8 +75,7 @@ define i64 @t5(i32 %a, i32 %b) nounwind readnone {
|
||||
; CHECK-NEXT: movd
|
||||
; CHECK-NEXT: movd
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
|
||||
; CHECK-NEXT: movd %xmm0, %rax
|
||||
; CHECK-NEXT: movd %xmm1, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%v0 = insertelement <2 x i32> undef, i32 %a, i32 0
|
||||
%v1 = insertelement <2 x i32> %v0, i32 %b, i32 1
|
||||
|
@ -1026,29 +1026,24 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
|
||||
}
|
||||
|
||||
; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
|
||||
define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
|
||||
define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
|
||||
; X32-LABEL: insertps_pr20411:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
|
||||
; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
|
||||
; X32-NEXT: movdqu %xmm1, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: insertps_pr20411:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
|
||||
; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
|
||||
; X64-NEXT: movdqu %xmm1, (%rdi)
|
||||
; X64-LABEL: insertps_pr20411:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
|
||||
; X64-NEXT: movdqu %xmm1, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; 4 5 6 7
|
||||
%shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x
|
||||
%shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x
|
||||
%ptrcast = bitcast i32* %RET to <4 x i32>*
|
||||
store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
|
||||
%shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
|
||||
%ptrcast = bitcast i32* %RET to <4 x i32>*
|
||||
store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind {
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: shll $12, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||
; CHECK-NEXT: movlpd %xmm0, (%eax)
|
||||
; CHECK-NEXT: retl
|
||||
%tmp12 = shl i32 %a, 12
|
||||
|
@ -6,7 +6,7 @@ define x86_mmx @t0(i32 %A) nounwind {
|
||||
; X86-32-LABEL: t0:
|
||||
; X86-32: ## BB#0:
|
||||
; X86-32: movd {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
|
||||
; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||
; X86-32-NEXT: movlpd %xmm0, (%esp)
|
||||
; X86-32-NEXT: movq (%esp), %mm0
|
||||
; X86-32-NEXT: addl $12, %esp
|
||||
|
@ -17,7 +17,7 @@ define void @test1() {
|
||||
|
||||
define void @test2() {
|
||||
;CHECK-LABEL: @test2
|
||||
;CHECK: pshufd
|
||||
;CHECK: pcmpeqd
|
||||
store <1 x i64> < i64 -1 >, <1 x i64>* @M1
|
||||
store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
|
||||
ret void
|
||||
|
@ -634,28 +634,16 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
|
||||
}
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
|
||||
; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movzbl %dil, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %edi, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzbl %dil, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: movzbl %dil, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 0
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
@ -663,29 +651,18 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
|
||||
}
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
|
||||
; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movzbl %dil, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %edi, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: shll $8, %edi
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $2, %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
|
||||
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: shll $8, %edi
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $2, %edi, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 0
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -695,14 +672,16 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
|
||||
define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
|
||||
; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd %edi, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
|
||||
; SSE-NEXT: shll $8, %edi
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $7, %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
|
||||
; AVX-NEXT: shll $8, %edi
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $7, %edi, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 0
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
|
||||
@ -710,32 +689,18 @@ define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(
|
||||
}
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
|
||||
; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movzbl %dil, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %edi, %xmm0
|
||||
; SSSE3-NEXT: pslld $24, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pslld $24, %xmm0
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzbl %dil, %eax
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpslld $24, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: movzbl %dil, %eax
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $1, %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 3
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
@ -1384,16 +1384,14 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
|
||||
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $1, %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $1, %edi, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
|
||||
@ -1403,16 +1401,14 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
|
||||
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $5, %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $5, %edi, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
|
||||
@ -1422,14 +1418,14 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
|
||||
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd %edi, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $7, %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $7, %edi, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 0
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
|
||||
@ -1439,16 +1435,14 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
|
||||
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
|
||||
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pinsrw $2, %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $2, %edi, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <8 x i16> undef, i16 %i, i32 3
|
||||
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
Loading…
x
Reference in New Issue
Block a user